Re: PaddleOCR - probably the best OCR tool available
Posted: 13 Apr 2022, 20:21
unfortunately got this error after including btt lib though:
Let's help each other out
https://www.autohotkey.com/boards/
Code: Select all
; Set your cursor to the AutoHotkey logo
ImagePutCursor("https://www.autohotkey.com/static/ahk_logo.png")
Oh, that is unfortunate.iseahound wrote: ↑28 Sep 2022, 19:19I think tuzi included that for fun. Unless your mouse cursor happens to be an image with text in it:
you will never get a result with OCR(A_Cursor)Code: Select all
; Set your cursor to the AutoHotkey logo ImagePutCursor("https://www.autohotkey.com/static/ahk_logo.png")
Code: Select all
PaddleOCR(Image, Configs:="")
{
static hModule, model, get_all_info, LastConfigs, DllPath := A_LineFile "\..\Dll"
; Verify running version
if (A_PtrSize!=8)
{
MsgBox, 0x40010, , PaddleOCR must run on x64.
ExitApp
}
; The configuration file is generated when the first run or the value of Configs is passed in
if (!hModule or IsObject(Configs))
{
; Supported Configs options
model := NonNull_Ret(Configs.model , model="" ? "server" : model)
get_all_info := NonNull_Ret(Configs.get_all_info , 0)
use_gpu := NonNull_Ret(Configs.use_gpu , 0) ; Using the GPU requires installing CUDA toolkit: (2.6+GB) https://developer.nvidia.com/cuda-10.2-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal
gpu_id := NonNull_Ret(Configs.gpu_id , 0)
gpu_mem := NonNull_Ret(Configs.gpu_mem , 4000)
cpu_math_library_num_threads := NonNull_Ret(Configs.cpu_math_library_num_threads, 10)
use_mkldnn := NonNull_Ret(Configs.use_mkldnn , 0) ; CPU acceleration with AVX2 instructions. this requires AVX2 supporting CPU (intel 4th gen and/or higher?)
max_side_len := NonNull_Ret(Configs.max_side_len , 960)
det_db_thresh := NonNull_Ret(Configs.det_db_thresh , 0.5)
det_db_box_thresh := NonNull_Ret(Configs.det_db_box_thresh , 0.5)
det_db_unclip_ratio := NonNull_Ret(Configs.det_db_unclip_ratio , 2.2)
use_polygon_score := NonNull_Ret(Configs.use_polygon_score , 1)
use_angle_cls := NonNull_Ret(Configs.use_angle_cls , 0)
cls_thresh := NonNull_Ret(Configs.cls_thresh , 0.9)
visualize := NonNull_Ret(Configs.visualize , 0)
use_tensorrt := NonNull_Ret(Configs.use_tensorrt , 0)
use_fp16 := NonNull_Ret(Configs.use_fp16 , 0)
; Use faster or more accurate models
model := (model="fast" or model="mobile") ? "mobile" : "server"
cls_model_dir := % """" DllPath "\inference\mobile_cls\" """"
det_model_dir := % """" DllPath "\inference\" model "_det\" """"
rec_model_dir := % """" DllPath "\inference\" model "_rec\" """"
char_list_file := % """" DllPath "\inference\dict.txt" """"
; config.txt template
template=
(LTrim
use_gpu %use_gpu% # Whether to use GPU. 1 means use, 0 means not use. I think this requires an nvidia GPu and installing the CUDA toolkit (2.6+GB): https://developer.nvidia.com/cuda-10.2-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exelocal
gpu_id %gpu_id% # GPU id. Effective when using GPU.
gpu_mem %gpu_mem% # The requested GPU memory.
cpu_math_library_num_threads %cpu_math_library_num_threads% # The number of threads at the time of CPU prediction. When the number of machine cores is sufficient, the larger the value, the faster the prediction speed.
use_mkldnn %use_mkldnn% # Whether to use mkldnn library (for CPU acceleration). 1 means use, 0 means not use. requires AVX2 compatible CPU
max_side_len %max_side_len% # If the length and width of the input image are greater than n, the image is scaled proportionally so that the longest side of the image is n.
det_db_thresh %det_db_thresh% # Used to filter the binarized image of DB prediction. Setting it to 0.-0.3 has no obvious effect on the result.
det_db_box_thresh %det_db_box_thresh% # DB post-processing filter box threshold. If there is a missing frame in the detection, it can be reduced as appropriate.
det_db_unclip_ratio %det_db_unclip_ratio% # Indicates how tight the text box
use_polygon_score %use_polygon_score% # Whether to use the polygon box to calculate the bbox score. 0 means using rectangular box calculation. The calculation speed of the rectangular box is faster, and the calculation of the polygonal box is more accurate for the curved text area.
det_model_dir %det_model_dir% # Check the location of the model.
use_angle_cls %use_angle_cls% # Whether to use the direction
cls_model_dir %cls_model_dir% # The location of the direction
cls_thresh %cls_thresh% # The score threshold of the direction classifier.
rec_model_dir %rec_model_dir% # Identify the location of the model
char_list_file %char_list_file% # The location of the dictionary
visualize %visualize% # Whether to visualize the results. When it is 1, the visual prediction result with the file name ocr_vis.png will be saved in the main code folder.
use_tensorrt %use_tensorrt% # Whether to use tensorrt.
use_fp16 %use_fp16% # Whether to use fp16.
)
if (template!=LastConfigs)
{
LastConfigs := template
NeedToInit := 1
}
}
; The default location of the sub-Dll that the search Dll depends on is the directory when the main code is running (this directory is invalid to change through SetWorkingDir).
; So if the main code and the 'dll file with sub-dependence' are not in the same directory, then you need to specify the location, otherwise an error will be reported that the Dll cannot be found.
; 3 methods.
; 1 is SetDllDirectory.
; 2 is LoadLibraryEx uses absolute path and adds LOAD_WITH_ALTERED_SEARCH_PATH option.
; 3 is to load all sub-dependent Dlls through LoadLibrary in advance.
; Because of LoadLibrary's feature of avoiding repeated loading based on file name.
; For example, LoadLibrary('c:a.dll') and then LoadLibrary('d:somedira.dll') get Dll in drive c.
; So method 3 is excluded here and method 1 is used.
if (!hModule)
{
DllCall("SetDllDirectory", "str", DllPath)
hModule := DllCall("LoadLibrary", "str", DllPath "\PaddleOCR.dll")
}
; Setting changes require reinitialization
if (NeedToInit)
{
DllCall("PaddleOCR\destroy")
VarSetCapacity(config, StrPut(template, "cp0"))
StrPut(template, &config, "cp0")
DllCall("PaddleOCR\load_config", "str", config)
}
; Load image into memory
pStream := ImagePutStream(Image)
DllCall("ole32\GetHGlobalFromStream", "ptr", pStream, "ptr*", hMemory)
pMemory := DllCall("GlobalLock", "ptr", hMemory, "ptr")
pSize := DllCall("GlobalSize", "ptr", hMemory, "uptr")
; Whether to return all information including the recognized content, confidence and coordinates (JSON format)
str := DllCall("PaddleOCR\ocr_from_binary", "ptr", pMemory, "int", pSize, "int", get_all_info, "str")
; Release memory resources
DllCall("GlobalUnlock", "ptr", hMemory)
DllCall("GlobalFree", "ptr", hMemory)
ObjRelease(pStream)
; Fix the problem that JSON cannot be parsed due to wrong score
if (get_all_info)
{
wrongChars = ,"score":-nan(ind),"range"
rightChars = ,"score":-1,"range"
str := StrReplace(str, wrongChars, rightChars)
; Fix the problem that str is empty and reports an error
return, str="" ? "" : JSON.Load(str)
}
return, str
}
#Include %A_LineFile%\..\Lib\ImagePut.ahk
#Include %A_LineFile%\..\Lib\NonNull.ahk
#Include %A_LineFile%\..\Lib\JSON.ahk