Optical character recognition (OCR) with UWP API

Re: Optical character recognition (OCR) with UWP API

28 Dec 2020, 10:21

Hello everyone,

I am trying to merge the first code below - 'Text Recognition From Screen' with the second code - 'Face Detection From File', to get the function of automatic face recognition on the screen and then point the mouse towards the face ( the one closest to the screen center).
First one is the code for 'text recognition from screen':

malcev wrote:
19 Feb 2020, 20:58
API recognizes text in 2 ways.

Script for recognizing screenshots by teadrinker:

Code: Select all

SetBatchLines, -1

Esc:: ExitApp

hBitmap := HBitmapFromScreen(GetArea()*)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream, "ru")
MsgBox, % text

GetArea() {
   area := []
   while !area.w
      Sleep, 100
   Return area
StartSelection(area) {
   handler := Func("Select").Bind(area)
   Hotkey, LButton, % handler, On

Select(area) {
   static hGui := CreateSelectionGui()
   Hook := new WindowsHook(WH_MOUSE_LL := 14, "LowLevelMouseProc", hGui)
   Loop {
      KeyWait, LButton
      WinGetPos, X, Y, W, H, ahk_id %hGui%
   } until w > 0
   Hotkey, LButton, Off
   Hook := ""
   Gui, %hGui%:Show, Hide
   for k, v in ["x", "y", "w", "h"]
      area[v] := %v%

ReplaceSystemCursors(IDC = "")
   static IMAGE_CURSOR := 2, SPI_SETCURSORS := 0x57
        , exitFunc := Func("ReplaceSystemCursors").Bind("")
        , SysCursors := { IDC_APPSTARTING: 32650
                        , IDC_ARROW      : 32512
                        , IDC_CROSS      : 32515
                        , IDC_HAND       : 32649
                        , IDC_HELP       : 32651
                        , IDC_IBEAM      : 32513
                        , IDC_NO         : 32648
                        , IDC_SIZEALL    : 32646
                        , IDC_SIZENESW   : 32643
                        , IDC_SIZENWSE   : 32642
                        , IDC_SIZEWE     : 32644
                        , IDC_SIZENS     : 32645 
                        , IDC_UPARROW    : 32516
                        , IDC_WAIT       : 32514 }
   if !IDC {
      DllCall("SystemParametersInfo", UInt, SPI_SETCURSORS, UInt, 0, UInt, 0, UInt, 0)
      OnExit(exitFunc, 0)
   else  {
      hCursor := DllCall("LoadCursor", Ptr, 0, UInt, SysCursors[IDC], Ptr)
      for k, v in SysCursors  {
         hCopy := DllCall("CopyImage", Ptr, hCursor, UInt, IMAGE_CURSOR, Int, 0, Int, 0, UInt, 0, Ptr)
         DllCall("SetSystemCursor", Ptr, hCopy, UInt, v)

CreateSelectionGui() {
   Gui, New, +hwndhGui +Alwaysontop -Caption +LastFound +ToolWindow +E0x20 -DPIScale
   WinSet, Transparent, 130
   Gui, Color, FFC800
   Return hGui

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   if (nCode = "timer") {
      while coords[1] {
         point := coords.RemoveAt(1)
         mouseX := point[1], mouseY := point[2]
         x := startMouseX < mouseX ? startMouseX : mouseX
         y := startMouseY < mouseY ? startMouseY : mouseY
         w := Abs(mouseX - startMouseX)
         h := Abs(mouseY - startMouseY)
         try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         coords.Push([mouseX, mouseY])
         SetTimer, % timer, -10
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)

class WindowsHook {
   __New(type, callback, eventInfo := "", isGlobal := true) {
      this.callbackPtr := RegisterCallback(callback, "Fast", 3, eventInfo)
      this.hHook := DllCall("SetWindowsHookEx", "Int", type, "Ptr", this.callbackPtr
                                              , "Ptr", !isGlobal ? 0 : DllCall("GetModuleHandle", "UInt", 0, "Ptr")
                                              , "UInt", isGlobal ? 0 : DllCall("GetCurrentThreadId"), "Ptr")
   __Delete() {
      DllCall("UnhookWindowsHookEx", "Ptr", this.hHook)
      DllCall("GlobalFree", "Ptr", this.callBackPtr, "Ptr")

HBitmapFromScreen(X, Y, W, H) {
   HDC := DllCall("GetDC", "Ptr", 0, "UPtr")
   HBM := DllCall("CreateCompatibleBitmap", "Ptr", HDC, "Int", W, "Int", H, "UPtr")
   PDC := DllCall("CreateCompatibleDC", "Ptr", HDC, "UPtr")
   DllCall("SelectObject", "Ptr", PDC, "Ptr", HBM)
   DllCall("BitBlt", "Ptr", PDC, "Int", 0, "Int", 0, "Int", W, "Int", H
                   , "Ptr", HDC, "Int", X, "Int", Y, "UInt", 0x00CC0020)
   DllCall("DeleteDC", "Ptr", PDC)
   DllCall("ReleaseDC", "Ptr", 0, "Ptr", HDC)
   Return HBM

HBitmapToRandomAccessStream(hBitmap) {
   static IID_IRandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}"
        , IID_IPicture            := "{7BF80980-BF32-101A-8BBB-00AA00300CAB}"
        , PICTYPE_BITMAP := 1
        , BSOS_DEFAULT   := 0
   DllCall("Ole32\CreateStreamOnHGlobal", "Ptr", 0, "UInt", true, "PtrP", pIStream, "UInt")
   VarSetCapacity(PICTDESC, sz := 8 + A_PtrSize*2, 0)
   NumPut(sz, PICTDESC)
   NumPut(hBitmap, PICTDESC, 8)
   riid := CLSIDFromString(IID_IPicture, GUID1)
   DllCall("OleAut32\OleCreatePictureIndirect", "Ptr", &PICTDESC, "Ptr", riid, "UInt", false, "PtrP", pIPicture, "UInt")
   ; IPicture::SaveAsFile
   DllCall(NumGet(NumGet(pIPicture+0) + A_PtrSize*15), "Ptr", pIPicture, "Ptr", pIStream, "UInt", true, "UIntP", size, "UInt")
   riid := CLSIDFromString(IID_IRandomAccessStream, GUID2)
   DllCall("ShCore\CreateRandomAccessStreamOverStream", "Ptr", pIStream, "UInt", BSOS_DEFAULT, "Ptr", riid, "PtrP", pIRandomAccessStream, "UInt")
   Return pIRandomAccessStream

CLSIDFromString(IID, ByRef CLSID) {
   VarSetCapacity(CLSID, 16, 0)
   if res := DllCall("ole32\CLSIDFromString", "WStr", IID, "Ptr", &CLSID, "UInt")
      throw Exception("CLSIDFromString failed. Error: " . Format("{:#x}", res))
   Return &CLSID

ocr(file, lang := "FirstFromAvailableLanguages")
   static OcrEngineStatics, OcrEngine, MaxDimension, LanguageFactory, Language, CurrentLanguage, BitmapDecoderStatics, GlobalizationPreferencesStatics
   if (OcrEngineStatics = "")
      CreateClass("Windows.Globalization.Language", ILanguageFactory := "{9B0252AC-0C27-44F8-B792-9793FB66C63E}", LanguageFactory)
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Media.Ocr.OcrEngine", IOcrEngineStatics := "{5BFFA85A-3384-3540-9940-699120D428A8}", OcrEngineStatics)
      DllCall(NumGet(NumGet(OcrEngineStatics+0)+6*A_PtrSize), "ptr", OcrEngineStatics, "uint*", MaxDimension)   ; MaxImageDimension
   if (file = "ShowAvailableLanguages")
      if (GlobalizationPreferencesStatics = "")
         CreateClass("Windows.System.UserProfile.GlobalizationPreferences", IGlobalizationPreferencesStatics := "{01BF4326-ED37-4E96-B0E9-C1340D1EA158}", GlobalizationPreferencesStatics)
      DllCall(NumGet(NumGet(GlobalizationPreferencesStatics+0)+9*A_PtrSize), "ptr", GlobalizationPreferencesStatics, "ptr*", LanguageList)   ; get_Languages
      DllCall(NumGet(NumGet(LanguageList+0)+7*A_PtrSize), "ptr", LanguageList, "int*", count)   ; count
      loop % count
         DllCall(NumGet(NumGet(LanguageList+0)+6*A_PtrSize), "ptr", LanguageList, "int", A_Index-1, "ptr*", hString)   ; get_Item
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", LanguageTest)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+8*A_PtrSize), "ptr", OcrEngineStatics, "ptr", LanguageTest, "int*", bool)   ; IsLanguageSupported
         if (bool = 1)
            DllCall(NumGet(NumGet(LanguageTest+0)+6*A_PtrSize), "ptr", LanguageTest, "ptr*", hText)
            buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
            text .= StrGet(buffer, "UTF-16") "`n"
      return text
   if (lang != CurrentLanguage) or (lang = "FirstFromAvailableLanguages")
      if (OcrEngine != "")
         if (CurrentLanguage != "FirstFromAvailableLanguages")
      if (lang = "FirstFromAvailableLanguages")
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+10*A_PtrSize), "ptr", OcrEngineStatics, "ptr*", OcrEngine)   ; TryCreateFromUserProfileLanguages
         CreateHString(lang, hString)
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", Language)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+9*A_PtrSize), "ptr", OcrEngineStatics, ptr, Language, "ptr*", OcrEngine)   ; TryCreateFromLanguage
      if (OcrEngine = 0)
         msgbox Can not use language "%lang%" for OCR, please install language pack.
      CurrentLanguage := lang
   IRandomAccessStream := file
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   if (width > MaxDimension) or (height > MaxDimension)
      msgbox Image is to big - %width%x%height%.`nIt should be maximum - %MaxDimension% pixels
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
   DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
   DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
   loop % count
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+7*A_PtrSize), "ptr", OcrLine, "ptr*", hText) 
      buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
      text .= StrGet(buffer, "UTF-16") "`n"
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   return text

CreateClass(string, interface, ByRef Class)
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class)
   if (result != 0)
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
         msgbox error: %result%

CreateHString(string, ByRef hString)
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)

   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)

WaitForAsync(ByRef Object)
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
         if (status != 1)
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
      sleep 10
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   Object := ObjectResult

This is the second - 'Face Detection From File', but it doesn't work by scanning screen content, but it scans specific file giving back coordinates of faces :

Code: Select all

msgbox % facedetect("face.jpg")

facedetect(file, maxheight := 2000)
   static BitmapDecoderStatics, BitmapEncoderStatics, SoftwareBitmapStatics, FaceDetector, SupportedBitmapPixelFormats
   if (FaceDetector = "")
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Graphics.Imaging.BitmapEncoder", IBitmapEncoderStatics := "{A74356A7-A4E4-4EB9-8E40-564DE7E1CCB2}", BitmapEncoderStatics)
      CreateClass("Windows.Graphics.Imaging.SoftwareBitmap", ISoftwareBitmapStatics := "{DF0385DB-672F-4A9D-806E-C2442F343E86}", SoftwareBitmapStatics)
      CreateClass("Windows.Media.FaceAnalysis.FaceDetector", IFaceDetectorStatics := "{BC042D67-9047-33F6-881B-6746C1B218B8}", FaceDetectorStatics)
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+6*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", FaceDetector)   ; CreateAsync
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+7*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", ReadOnlyList)   ; GetSupportedBitmapPixelFormats
      DllCall(NumGet(NumGet(ReadOnlyList+0)+7*A_PtrSize), "ptr", ReadOnlyList, "int*", count)   ; count
      loop % count
         DllCall(NumGet(NumGet(ReadOnlyList+0)+6*A_PtrSize), "ptr", ReadOnlyList, "int", A_Index-1, "uint*", BitmapPixelFormat)   ; get_Item
         SupportedBitmapPixelFormats .= "|" BitmapPixelFormat "|"
   if (SubStr(file, 2, 1) != ":")
      file := A_ScriptDir "\" file
   if !FileExist(file) or InStr(FileExist(file), "D")
      msgbox File "%file%" does not exist
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", IID_RandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}", "ptr", &GUID)
   DllCall("ShCore\CreateRandomAccessStreamOnFile", "wstr", file, "uint", Read := 0, "ptr", &GUID, "ptr*", IRandomAccessStream)
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   DllCall(NumGet(NumGet(BitmapFrame+0)+8*A_PtrSize), "ptr", BitmapFrame, "uint*", BitmapPixelFormat)   ; get_BitmapPixelFormat
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   if (height > maxheight)
      DllCall(NumGet(NumGet(BitmapEncoderStatics+0)+15*A_PtrSize), "ptr", BitmapEncoderStatics, "ptr", IRandomAccessStream, "ptr", BitmapDecoder, "ptr*", BitmapEncoder)   ; CreateForTranscodingAsync
      DllCall(NumGet(NumGet(BitmapEncoder+0)+15*A_PtrSize), "ptr", BitmapEncoder, "ptr*", BitmapTransform)   ; BitmapTransform
      DllCall(NumGet(NumGet(BitmapTransform+0)+7*A_PtrSize), "ptr", BitmapTransform, "int", floor(maxheight/height*width))   ; put_ScaledWidth
      DllCall(NumGet(NumGet(BitmapTransform+0)+9*A_PtrSize), "ptr", BitmapTransform, "int", maxheight)   ; put_ScaledHeight
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+8*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "uint", BitmapPixelFormat, "uint", Premultiplied := 0, "ptr", BitmapTransform, "uint", IgnoreExifOrientation := 0, "uint", DoNotColorManage := 0, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapTransformedAsync
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   if !InStr(SupportedBitmapPixelFormats, "|" BitmapPixelFormat "|")
      DllCall(NumGet(NumGet(SoftwareBitmapStatics+0)+7*A_PtrSize), "ptr", SoftwareBitmapStatics, "ptr", SoftwareBitmap, "uint", Gray8 := 62, "ptr*", SoftwareBitmapTemp)   ; Convert
      Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
      DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
      SoftwareBitmap := SoftwareBitmapTemp
   DllCall(NumGet(NumGet(FaceDetector+0)+6*A_PtrSize), "ptr", FaceDetector, ptr, SoftwareBitmap, "ptr*", DetectedFaceList)   ; DetectFacesAsync
   DllCall(NumGet(NumGet(DetectedFaceList+0)+7*A_PtrSize), "ptr", DetectedFaceList, "int*", count)   ; count
   loop % count
      varsetcapacity(bounds, 16, 0)
      DllCall(NumGet(NumGet(DetectedFaceList+0)+6*A_PtrSize), "ptr", DetectedFaceList, "int", A_Index-1, "ptr*", DetectedFace)   ; get_Item
      DllCall(NumGet(NumGet(DetectedFace+0)+6*A_PtrSize), "ptr", DetectedFace, "ptr", &bounds)   ; BitmapBounds
      x := numget(bounds, 0, "uint")
      y := numget(bounds, 4, "uint")
      width := numget(bounds, 8, "uint")
      height := numget(bounds, 12, "uint")
      result .= "face" A_Index ": x=" x ", y=" y ", width=" width ", height=" height "`n"
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   if (height > maxheight)
   return result

CreateClass(string, interface, ByRef Class)
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class, "uint")
   if (result != 0)
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
         msgbox error: %result%

CreateHString(string, ByRef hString)
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)

   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)

WaitForAsync(ByRef Object)
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
         if (status != 1)
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
      sleep 10
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   Object := ObjectResult

So I am trying to do 3 things:

1. Apply 'Face Recognition From File' script to the 'Text Recognition From Screen' script so that new script is finding faces on the screen
2. Remove Area Selector from 'Text Recognition From Screen' script, so that the script scans whole screen insted of selected area
3. Make new script Mouse move to the face found that is closest to the screen center

Dear autohotkey forum members, any help would be strongly appreciated and surely I will share my codes on the road to the goal script. :crazy:
Regards and all the best for You :wave:
Re: Optical character recognition (OCR) with UWP API

22 Jan 2021, 04:45


Is it possible to read field with coordinates inside .png file?

We have here function for screen reading, but what if I want to read it without puting picture on screen?

Re: Optical character recognition (OCR) with UWP API

22 Jan 2021, 05:17

The first code in this topic loads image.
If You want read particular coordinates only, then You have to crop Your image with gdi+ for example and then HBitmap convert to RandomAccessStream (look at second example at first post).
Re: Optical character recognition (OCR) with UWP API

22 Jan 2021, 06:46

Hi @malcev,

thank you for suggestion I will try like that.

Re: Optical character recognition (OCR) with UWP API

29 Jan 2021, 12:10

Works fine for me!!!

Solid job @malcev @teadrinker @adegard !!! Thank you!
Re: Optical character recognition (OCR) with UWP API

07 Feb 2021, 12:49

Hello folks! First of all thanks for this awesome OCR script!

I had quite some success but after a Windows update I now get a No valid COM object! error when I run the teadrinker code included in my runtime.
From this line here in the WaitForAsync function:

Code: Select all

AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
It works 2 times on BitmapDecoder and SoftwareBitmap but on the third call it breaks with the said error^. Thats from this call here:

Code: Select all

    DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
    DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
>>> WaitForAsync(OcrResult)
    DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
    DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
    loop % count
    { ...
As a workaround I call it with a separate Autohotkey.exe process via ComObjCreate("WScript.Shell") which also works but its rather sad that it breaks in my runtime now.
Any Ideas?
Re: Optical character recognition (OCR) with UWP API

07 Feb 2021, 13:27

Can't reproduce the issue. All latest updates are installed. Windows 10 Pro 20H2.
Re: Optical character recognition (OCR) with UWP API

07 Feb 2021, 18:08

Yeah I'm on the same. Hmm .. now it works again. With both methods :/ weird. 🤷‍♀️
Re: Optical character recognition (OCR) with UWP API

06 Apr 2021, 16:49

Hey guys, having some troubles here. If I run this on a separate script with nothing else in it, it gets all the text I want from the screen area I chose. Once I place this code in my main script (which has thousands of lines) then it fails.

Running a tooltip in the WaitForAsync seems to always just return 0 for Status which never resolves. The call just before this is: WaitForAsync(BitmapDecoder)

Code: Select all

DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
Any ideas how to debug this? Just want to know if there's any known conflicts or if theres any way I can start to work through what might be preventing the status from resolving.

Code: Select all

x = 15
y = 15
w = 800
h = 599
hBitmap := HBitmapFromScreen(x, y, w, h)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream)
msgbox, % text

edit: I've tried removing all timers, hooks, simplifying my auto-execute all to no avail. I checked to make sure the bitmap was valid by converting to pbitmap and saving to file and all looks good there.
Re: Optical character recognition (OCR) with UWP API

06 Apr 2021, 16:58

First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)
Re: Optical character recognition (OCR) with UWP API

06 Apr 2021, 17:08

teadrinker wrote:
06 Apr 2021, 16:58
First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)
It is returned. As some advice from a friend, he said maybe a critical thread is blocking it. I have disabled all critical threads and now it is working. I guess i have to work through each to find the culprit now.
Re: Optical character recognition (OCR) with UWP API

07 Apr 2021, 12:13

So i am using this error suppressor so my users don't get strange looking errors:

Code: Select all

SuppressRuntimeErrors("An error has occurred....)

    ; Call self-contained helper/message-monitor function:
    return SuppressRuntimeErrors_(NewErrorFormat, 0, 0, 0)

SuppressRuntimeErrors_(wParam, lParam, msg, hwnd)
    ; Constants:
    static WM_COMMNOTIFY := 0x0044, AHK_DIALOG := 1027
    ; Persistent variables:
    static sScriptWnd := 0, sScriptPID, sOnCommNotify, sMessage
    Critical 1000
    dhw := A_DetectHiddenWindows
    DetectHiddenWindows On
    if hwnd     ; Called internally to handle a WM_COMMNOTIFY message.
        if (hwnd = sScriptWnd  ; Script's main window is the recipient.
            && wParam = AHK_DIALOG  ; We're showing a dialog of some sort.
            && WinExist("ahk_class #32770 ahk_pid " sScriptPID))
            ControlGetText msg, Static1
            ; The following relies on the fact that all built-in error
            ; dialogs use this format to point out the current line:
            if RegExMatch(msg, "m`a)^--->`t0*\K\d+(?=:)", line)
                ; If we change the text, the dialog will still be sized
                ; based on the previous text.  So instead, close this
                ; dialog and show a new one.
				; grab the details of the message as well
				RegexMatch(msg, "Specifically:.*Line#",Matched)
				stringreplace, Matched, Matched, Specifically:,,
				stringreplace, Matched, Matched, Line#,,
				StringReplace, Matched, Matched, `r,, All
				StringReplace, Matched, Matched, `n,, All

                StringReplace msg, sMessage, {#}, %line%
                MsgBox 48,, % msg "`nDetails: " Matched
        ; Restore the setting to its thread-default.
        DetectHiddenWindows %dhw%
        ; The following calls the script's WM_COMMNOTIFY handler if it
        ; has one, otherwise it silently fails and returns nothing:
        return %sOnCommNotify%(wParam, lParam, msg, hwnd)
    else        ; Called by script.
        sMessage := wParam
        ; If we're already registered, just return.
        if OnMessage(WM_COMMNOTIFY) = A_ThisFunc
        ; Retrieve previous message handler, if the script has one.
        sOnCommNotify := OnMessage(WM_COMMNOTIFY)
        ; Retrieve hwnd of main window (usually hidden) and process ID.
        Process Exist
        sScriptPID := ErrorLevel
        sScriptWnd := WinExist("ahk_class AutoHotkey ahk_pid " sScriptPID)
        ; Register message handler.  Since hotkeys and other things can
        ; launch new threads while we're displaying our error dialog,
        ; pass 10 for MaxThreads so that we can catch any error dialogs
        ; that these other threads might display:
        OnMessage(WM_COMMNOTIFY, A_ThisFunc, 1)
        ; Since we were called by script, restore our caller's setting.
        DetectHiddenWindows %dhw%

As you can see this contains a Critical, 1000 at the top of the function. It seems that the critical here blocks the OCR WaitForAsync from resolving. There are a few other functions that contain "Critical" that also break WaitForAsync and keep "status" at 0 endlessly. It seems that calling "Critical, off" at the end of these functions fixes it but what is really happening here?
Re: Optical character recognition (OCR) with UWP API

15 Apr 2021, 03:41

How can I make this give accurate results from a specific font? I also tried Vis2 as well but the accuracy is similar to this one for the font I want it to read (i.e. not great) and runs much slower, I see that it's possible to train tesseract to work with new fonts but since it is so much slower I would prefer to use this ocr if possible. I have the .ttf of the font I want it to read. Even if I could just get it to read numbers and dashes accurately I would be very happy
Re: Optical character recognition (OCR) with UWP API

15 Apr 2021, 07:18

No, I think you can't train this OCR or something like this.
Re: Optical character recognition (OCR) with UWP API

21 Apr 2021, 06:52

teadrinker wrote:
15 Apr 2021, 07:18
No, I think you can't train this OCR or somethink like this.
Even if I give it scooby snacks? (ha ha) Yeah, I agree, I don't think this "learns"
Re: Optical character recognition (OCR) with UWP API

21 Apr 2021, 07:43

Joe Glines wrote: Even if I give it scooby snacks?
As a last resort, you can try to talk to it in a stern voice. :)
Re: Optical character recognition (OCR) with UWP API

22 Apr 2021, 16:16

Hey there! I made an account just to thank you for this wonderful tool.

I do have a question though: the left click dragging is REALLY choppy/laggy/slow. When you hold left click down and drag, it is sometimes half a second to a full second behind. Is there a better way to render this rectangle for better performance?

Thank you!
Re: Optical character recognition (OCR) with UWP API

23 Apr 2021, 03:34

It very depends on the performance/load of your computer.

Re: Optical character recognition (OCR) with UWP API

23 Apr 2021, 08:41

Hey teadrinker!

I have quite a powerful machine (i9-7XXX) with no active load on the PC. I stared at task manager while I was moving the rectangle around and load stayed low.

There was a clipboard tool linked in this thread that I downloaded and the click+drag is really smooth (almost native to doing it right on the Windows desktop). If you move the the mouse too quick around the desktop with this script, it's really slow. I'm happy to provide a gif of it happening, but it's certainly with the method being used here.
Re: Optical character recognition (OCR) with UWP API

23 Apr 2021, 10:24

mebo wrote: If you move the the mouse too quick around the desktop with this script, it's really slow.
Can't reproduce. Anyway, you can try adding SetWinDelay, 0 at the beginning of the script and replacing LowLevelMouseProc() with this one:

Code: Select all

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   if (nCode = "timer") {
      point := coords[1]
      mouseX := point[1], mouseY := point[2]
      x := startMouseX < mouseX ? startMouseX : mouseX
      y := startMouseY < mouseY ? startMouseY : mouseY
      w := Abs(mouseX - startMouseX)
      h := Abs(mouseY - startMouseY)
      try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         coords.1 := [mouseX, mouseY]
         SetTimer, % timer, -10
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)

