Optical character recognition (OCR) with UWP API

Post your working scripts, libraries and tools for AHK v1.1 and older
AndyRal123
Posts: 21
Joined: 10 Oct 2020, 12:32

Re: Optical character recognition (OCR) with UWP API

Post by AndyRal123 » 28 Dec 2020, 10:21

:headwall: :headwall: :roll:

Hello everyone,

I am trying to merge the first code below - 'Text Recognition From Screen' with the second code - 'Face Detection From File', to get the function of automatic face recognition on the screen and then point the mouse towards the face ( the one closest to the screen center).
First one is the code for 'text recognition from screen':



malcev wrote:
19 Feb 2020, 20:58
https://docs.microsoft.com/en-us/uwp/api/windows.media.ocr
API recognizes text in 2 ways.

Script for recognizing screenshots by teadrinker:

Code: Select all

#NoEnv
SetBatchLines, -1
Return

Esc:: ExitApp

^X::
hBitmap := HBitmapFromScreen(GetArea()*)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream, "ru")
MsgBox, % text
Return

GetArea() {
   area := []
   StartSelection(area)
   while !area.w
      Sleep, 100
   Return area
}
   
StartSelection(area) {
   handler := Func("Select").Bind(area)
   Hotkey, LButton, % handler, On
   ReplaceSystemCursors("IDC_CROSS")
}

Select(area) {
   static hGui := CreateSelectionGui()
   Hook := new WindowsHook(WH_MOUSE_LL := 14, "LowLevelMouseProc", hGui)
   Loop {
      KeyWait, LButton
      WinGetPos, X, Y, W, H, ahk_id %hGui%
   } until w > 0
   ReplaceSystemCursors("")
   Hotkey, LButton, Off
   Hook := ""
   Gui, %hGui%:Show, Hide
   for k, v in ["x", "y", "w", "h"]
      area[v] := %v%
}

ReplaceSystemCursors(IDC = "")
{
   static IMAGE_CURSOR := 2, SPI_SETCURSORS := 0x57
        , exitFunc := Func("ReplaceSystemCursors").Bind("")
        , SysCursors := { IDC_APPSTARTING: 32650
                        , IDC_ARROW      : 32512
                        , IDC_CROSS      : 32515
                        , IDC_HAND       : 32649
                        , IDC_HELP       : 32651
                        , IDC_IBEAM      : 32513
                        , IDC_NO         : 32648
                        , IDC_SIZEALL    : 32646
                        , IDC_SIZENESW   : 32643
                        , IDC_SIZENWSE   : 32642
                        , IDC_SIZEWE     : 32644
                        , IDC_SIZENS     : 32645 
                        , IDC_UPARROW    : 32516
                        , IDC_WAIT       : 32514 }
   if !IDC {
      DllCall("SystemParametersInfo", UInt, SPI_SETCURSORS, UInt, 0, UInt, 0, UInt, 0)
      OnExit(exitFunc, 0)
   }
   else  {
      hCursor := DllCall("LoadCursor", Ptr, 0, UInt, SysCursors[IDC], Ptr)
      for k, v in SysCursors  {
         hCopy := DllCall("CopyImage", Ptr, hCursor, UInt, IMAGE_CURSOR, Int, 0, Int, 0, UInt, 0, Ptr)
         DllCall("SetSystemCursor", Ptr, hCopy, UInt, v)
      }
      OnExit(exitFunc)
   }
}

CreateSelectionGui() {
   Gui, New, +hwndhGui +Alwaysontop -Caption +LastFound +ToolWindow +E0x20 -DPIScale
   WinSet, Transparent, 130
   Gui, Color, FFC800
   Return hGui
}

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   
   if (nCode = "timer") {
      while coords[1] {
         point := coords.RemoveAt(1)
         mouseX := point[1], mouseY := point[2]
         x := startMouseX < mouseX ? startMouseX : mouseX
         y := startMouseY < mouseY ? startMouseY : mouseY
         w := Abs(mouseX - startMouseX)
         h := Abs(mouseY - startMouseY)
         try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
      }
   }
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         }
         coords.Push([mouseX, mouseY])
         SetTimer, % timer, -10
      }
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)
   }
}

class WindowsHook {
   __New(type, callback, eventInfo := "", isGlobal := true) {
      this.callbackPtr := RegisterCallback(callback, "Fast", 3, eventInfo)
      this.hHook := DllCall("SetWindowsHookEx", "Int", type, "Ptr", this.callbackPtr
                                              , "Ptr", !isGlobal ? 0 : DllCall("GetModuleHandle", "UInt", 0, "Ptr")
                                              , "UInt", isGlobal ? 0 : DllCall("GetCurrentThreadId"), "Ptr")
   }
   __Delete() {
      DllCall("UnhookWindowsHookEx", "Ptr", this.hHook)
      DllCall("GlobalFree", "Ptr", this.callBackPtr, "Ptr")
   }
}

HBitmapFromScreen(X, Y, W, H) {
   HDC := DllCall("GetDC", "Ptr", 0, "UPtr")
   HBM := DllCall("CreateCompatibleBitmap", "Ptr", HDC, "Int", W, "Int", H, "UPtr")
   PDC := DllCall("CreateCompatibleDC", "Ptr", HDC, "UPtr")
   DllCall("SelectObject", "Ptr", PDC, "Ptr", HBM)
   DllCall("BitBlt", "Ptr", PDC, "Int", 0, "Int", 0, "Int", W, "Int", H
                   , "Ptr", HDC, "Int", X, "Int", Y, "UInt", 0x00CC0020)
   DllCall("DeleteDC", "Ptr", PDC)
   DllCall("ReleaseDC", "Ptr", 0, "Ptr", HDC)
   Return HBM
}

HBitmapToRandomAccessStream(hBitmap) {
   static IID_IRandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}"
        , IID_IPicture            := "{7BF80980-BF32-101A-8BBB-00AA00300CAB}"
        , PICTYPE_BITMAP := 1
        , BSOS_DEFAULT   := 0
        
   DllCall("Ole32\CreateStreamOnHGlobal", "Ptr", 0, "UInt", true, "PtrP", pIStream, "UInt")
   
   VarSetCapacity(PICTDESC, sz := 8 + A_PtrSize*2, 0)
   NumPut(sz, PICTDESC)
   NumPut(PICTYPE_BITMAP, PICTDESC, 4)
   NumPut(hBitmap, PICTDESC, 8)
   riid := CLSIDFromString(IID_IPicture, GUID1)
   DllCall("OleAut32\OleCreatePictureIndirect", "Ptr", &PICTDESC, "Ptr", riid, "UInt", false, "PtrP", pIPicture, "UInt")
   ; IPicture::SaveAsFile
   DllCall(NumGet(NumGet(pIPicture+0) + A_PtrSize*15), "Ptr", pIPicture, "Ptr", pIStream, "UInt", true, "UIntP", size, "UInt")
   riid := CLSIDFromString(IID_IRandomAccessStream, GUID2)
   DllCall("ShCore\CreateRandomAccessStreamOverStream", "Ptr", pIStream, "UInt", BSOS_DEFAULT, "Ptr", riid, "PtrP", pIRandomAccessStream, "UInt")
   ObjRelease(pIPicture)
   ObjRelease(pIStream)
   Return pIRandomAccessStream
}

CLSIDFromString(IID, ByRef CLSID) {
   VarSetCapacity(CLSID, 16, 0)
   if res := DllCall("ole32\CLSIDFromString", "WStr", IID, "Ptr", &CLSID, "UInt")
      throw Exception("CLSIDFromString failed. Error: " . Format("{:#x}", res))
   Return &CLSID
}


ocr(file, lang := "FirstFromAvailableLanguages")
{
   static OcrEngineStatics, OcrEngine, MaxDimension, LanguageFactory, Language, CurrentLanguage, BitmapDecoderStatics, GlobalizationPreferencesStatics
   if (OcrEngineStatics = "")
   {
      CreateClass("Windows.Globalization.Language", ILanguageFactory := "{9B0252AC-0C27-44F8-B792-9793FB66C63E}", LanguageFactory)
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Media.Ocr.OcrEngine", IOcrEngineStatics := "{5BFFA85A-3384-3540-9940-699120D428A8}", OcrEngineStatics)
      DllCall(NumGet(NumGet(OcrEngineStatics+0)+6*A_PtrSize), "ptr", OcrEngineStatics, "uint*", MaxDimension)   ; MaxImageDimension
   }
   if (file = "ShowAvailableLanguages")
   {
      if (GlobalizationPreferencesStatics = "")
         CreateClass("Windows.System.UserProfile.GlobalizationPreferences", IGlobalizationPreferencesStatics := "{01BF4326-ED37-4E96-B0E9-C1340D1EA158}", GlobalizationPreferencesStatics)
      DllCall(NumGet(NumGet(GlobalizationPreferencesStatics+0)+9*A_PtrSize), "ptr", GlobalizationPreferencesStatics, "ptr*", LanguageList)   ; get_Languages
      DllCall(NumGet(NumGet(LanguageList+0)+7*A_PtrSize), "ptr", LanguageList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(LanguageList+0)+6*A_PtrSize), "ptr", LanguageList, "int", A_Index-1, "ptr*", hString)   ; get_Item
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", LanguageTest)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+8*A_PtrSize), "ptr", OcrEngineStatics, "ptr", LanguageTest, "int*", bool)   ; IsLanguageSupported
         if (bool = 1)
         {
            DllCall(NumGet(NumGet(LanguageTest+0)+6*A_PtrSize), "ptr", LanguageTest, "ptr*", hText)
            buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
            text .= StrGet(buffer, "UTF-16") "`n"
         }
         ObjRelease(LanguageTest)
      }
      ObjRelease(LanguageList)
      return text
   }
   if (lang != CurrentLanguage) or (lang = "FirstFromAvailableLanguages")
   {
      if (OcrEngine != "")
      {
         ObjRelease(OcrEngine)
         if (CurrentLanguage != "FirstFromAvailableLanguages")
            ObjRelease(Language)
      }
      if (lang = "FirstFromAvailableLanguages")
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+10*A_PtrSize), "ptr", OcrEngineStatics, "ptr*", OcrEngine)   ; TryCreateFromUserProfileLanguages
      else
      {
         CreateHString(lang, hString)
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", Language)   ; CreateLanguage
         DeleteHString(hString)
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+9*A_PtrSize), "ptr", OcrEngineStatics, ptr, Language, "ptr*", OcrEngine)   ; TryCreateFromLanguage
      }
      if (OcrEngine = 0)
      {
         msgbox Can not use language "%lang%" for OCR, please install language pack.
         ExitApp
      }
      CurrentLanguage := lang
   }
   IRandomAccessStream := file
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   if (width > MaxDimension) or (height > MaxDimension)
   {
      msgbox Image is to big - %width%x%height%.`nIt should be maximum - %MaxDimension% pixels
      ExitApp
   }
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
   WaitForAsync(OcrResult)
   DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
   DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
   loop % count
   {
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+7*A_PtrSize), "ptr", OcrLine, "ptr*", hText) 
      buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
      text .= StrGet(buffer, "UTF-16") "`n"
      ObjRelease(OcrLine)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(OcrResult)
   ObjRelease(LinesList)
   return text
}



CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class)
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}




This is the second - 'Face Detection From File', but it doesn't work by scanning screen content, but it scans specific file giving back coordinates of faces :





Code: Select all

msgbox % facedetect("face.jpg")
ExitApp



facedetect(file, maxheight := 2000)
{
   static BitmapDecoderStatics, BitmapEncoderStatics, SoftwareBitmapStatics, FaceDetector, SupportedBitmapPixelFormats
   if (FaceDetector = "")
   {
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Graphics.Imaging.BitmapEncoder", IBitmapEncoderStatics := "{A74356A7-A4E4-4EB9-8E40-564DE7E1CCB2}", BitmapEncoderStatics)
      CreateClass("Windows.Graphics.Imaging.SoftwareBitmap", ISoftwareBitmapStatics := "{DF0385DB-672F-4A9D-806E-C2442F343E86}", SoftwareBitmapStatics)
      CreateClass("Windows.Media.FaceAnalysis.FaceDetector", IFaceDetectorStatics := "{BC042D67-9047-33F6-881B-6746C1B218B8}", FaceDetectorStatics)
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+6*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", FaceDetector)   ; CreateAsync
      WaitForAsync(FaceDetector)
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+7*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", ReadOnlyList)   ; GetSupportedBitmapPixelFormats
      DllCall(NumGet(NumGet(ReadOnlyList+0)+7*A_PtrSize), "ptr", ReadOnlyList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(ReadOnlyList+0)+6*A_PtrSize), "ptr", ReadOnlyList, "int", A_Index-1, "uint*", BitmapPixelFormat)   ; get_Item
         SupportedBitmapPixelFormats .= "|" BitmapPixelFormat "|"
      }
      ObjRelease(FaceDetectorStatics)
      ObjRelease(ReadOnlyList)
   }
   if (SubStr(file, 2, 1) != ":")
      file := A_ScriptDir "\" file
   if !FileExist(file) or InStr(FileExist(file), "D")
   {
      msgbox File "%file%" does not exist
      ExitApp
   }   
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", IID_RandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}", "ptr", &GUID)
   DllCall("ShCore\CreateRandomAccessStreamOnFile", "wstr", file, "uint", Read := 0, "ptr", &GUID, "ptr*", IRandomAccessStream)
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   DllCall(NumGet(NumGet(BitmapFrame+0)+8*A_PtrSize), "ptr", BitmapFrame, "uint*", BitmapPixelFormat)   ; get_BitmapPixelFormat
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   if (height > maxheight)
   {
      DllCall(NumGet(NumGet(BitmapEncoderStatics+0)+15*A_PtrSize), "ptr", BitmapEncoderStatics, "ptr", IRandomAccessStream, "ptr", BitmapDecoder, "ptr*", BitmapEncoder)   ; CreateForTranscodingAsync
      WaitForAsync(BitmapEncoder)
      DllCall(NumGet(NumGet(BitmapEncoder+0)+15*A_PtrSize), "ptr", BitmapEncoder, "ptr*", BitmapTransform)   ; BitmapTransform
      DllCall(NumGet(NumGet(BitmapTransform+0)+7*A_PtrSize), "ptr", BitmapTransform, "int", floor(maxheight/height*width))   ; put_ScaledWidth
      DllCall(NumGet(NumGet(BitmapTransform+0)+9*A_PtrSize), "ptr", BitmapTransform, "int", maxheight)   ; put_ScaledHeight
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+8*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "uint", BitmapPixelFormat, "uint", Premultiplied := 0, "ptr", BitmapTransform, "uint", IgnoreExifOrientation := 0, "uint", DoNotColorManage := 0, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapTransformedAsync
   }
   else
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   if !InStr(SupportedBitmapPixelFormats, "|" BitmapPixelFormat "|")
   {
      DllCall(NumGet(NumGet(SoftwareBitmapStatics+0)+7*A_PtrSize), "ptr", SoftwareBitmapStatics, "ptr", SoftwareBitmap, "uint", Gray8 := 62, "ptr*", SoftwareBitmapTemp)   ; Convert
      Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
      DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
      ObjRelease(Close)
      ObjRelease(SoftwareBitmap)
      SoftwareBitmap := SoftwareBitmapTemp
   }
   DllCall(NumGet(NumGet(FaceDetector+0)+6*A_PtrSize), "ptr", FaceDetector, ptr, SoftwareBitmap, "ptr*", DetectedFaceList)   ; DetectFacesAsync
   WaitForAsync(DetectedFaceList)
   DllCall(NumGet(NumGet(DetectedFaceList+0)+7*A_PtrSize), "ptr", DetectedFaceList, "int*", count)   ; count
   loop % count
   {
      varsetcapacity(bounds, 16, 0)
      DllCall(NumGet(NumGet(DetectedFaceList+0)+6*A_PtrSize), "ptr", DetectedFaceList, "int", A_Index-1, "ptr*", DetectedFace)   ; get_Item
      DllCall(NumGet(NumGet(DetectedFace+0)+6*A_PtrSize), "ptr", DetectedFace, "ptr", &bounds)   ; BitmapBounds
      x := numget(bounds, 0, "uint")
      y := numget(bounds, 4, "uint")
      width := numget(bounds, 8, "uint")
      height := numget(bounds, 12, "uint")
      result .= "face" A_Index ": x=" x ", y=" y ", width=" width ", height=" height "`n"
      ObjRelease(DetectedFace)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   if (height > maxheight)
   {
      ObjRelease(BitmapEncoder)
      ObjRelease(BitmapTransform)
   }
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(DetectedFaceList)
   return result
}



CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class, "uint")
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}


So I am trying to do 3 things:

1. Apply 'Face Recognition From File' script to the 'Text Recognition From Screen' script so that new script is finding faces on the screen
2. Remove Area Selector from 'Text Recognition From Screen' script, so that the script scans whole screen insted of selected area
3. Make new script Mouse move to the face found that is closest to the screen center



Dear autohotkey forum members, any help would be strongly appreciated and surely I will share my codes on the road to the goal script. :crazy:
Regards and all the best for You :wave:
Andy

blue83
Posts: 157
Joined: 11 Apr 2018, 06:38

Re: Optical character recognition (OCR) with UWP API

Post by blue83 » 22 Jan 2021, 04:45

Hi,

Is it possible to read field with coordinates inside .png file?

We have here function for screen reading, but what if I want to read it without puting picture on screen?

Thanks,
blue

malcev
Posts: 1769
Joined: 12 Aug 2014, 12:37

Re: Optical character recognition (OCR) with UWP API

Post by malcev » 22 Jan 2021, 05:17

The first code in this topic loads image.
If You want read particular coordinates only, then You have to crop Your image with gdi+ for example and then HBitmap convert to RandomAccessStream (look at second example at first post).

blue83
Posts: 157
Joined: 11 Apr 2018, 06:38

Re: Optical character recognition (OCR) with UWP API

Post by blue83 » 22 Jan 2021, 06:46

Hi @malcev,

thank you for suggestion I will try like that.

Blue

User avatar
blackjoker
Posts: 18
Joined: 01 Dec 2020, 02:57

Re: Optical character recognition (OCR) with UWP API

Post by blackjoker » 29 Jan 2021, 12:10

Works fine for me!!!

Solid job @malcev @teadrinker @adegard !!! Thank you!

ewerybody
Posts: 16
Joined: 04 Nov 2016, 09:16
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by ewerybody » 07 Feb 2021, 12:49

Hello folks! First of all thanks for this awesome OCR script!

I had quite some success but after a Windows update I now get a No valid COM object! error when I run the teadrinker code included in my runtime.
From this line here in the WaitForAsync function:

Code: Select all

AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
It works 2 times on BitmapDecoder and SoftwareBitmap but on the third call it breaks with the said error^. Thats from this call here:

Code: Select all

    ...
    DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
    WaitForAsync(SoftwareBitmap)
    DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
>>> WaitForAsync(OcrResult)
    DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
    DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
    loop % count
    { ...
As a workaround I call it with a separate Autohotkey.exe process via ComObjCreate("WScript.Shell") which also works but its rather sad that it breaks in my runtime now.
Any Ideas?

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 07 Feb 2021, 13:27

@ewerybody
Can't reproduce the issue. All latest updates are installed. Windows 10 Pro 20H2.

ewerybody
Posts: 16
Joined: 04 Nov 2016, 09:16
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by ewerybody » 07 Feb 2021, 18:08

Yeah I'm on the same. Hmm .. now it works again. With both methods :/ weird. 🤷‍♀️

doubledave22
Posts: 343
Joined: 08 Jun 2019, 17:36

Re: Optical character recognition (OCR) with UWP API

Post by doubledave22 » 06 Apr 2021, 16:49

Hey guys, having some troubles here. If I run this on a separate script with nothing else in it, it gets all the text I want from the screen area I chose. Once I place this code in my main script (which has thousands of lines) then it fails.

Running a tooltip in the WaitForAsync seems to always just return 0 for Status which never resolves. The call just before this is: WaitForAsync(BitmapDecoder)

Code: Select all

DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
Any ideas how to debug this? Just want to know if there's any known conflicts or if theres any way I can start to work through what might be preventing the status from resolving.

Code: Select all


^t::
x = 15
y = 15
w = 800
h = 599
hBitmap := HBitmapFromScreen(x, y, w, h)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream)
msgbox, % text
return

edit: I've tried removing all timers, hooks, simplifying my auto-execute all to no avail. I checked to make sure the bitmap was valid by converting to pbitmap and saving to file and all looks good there.

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 06 Apr 2021, 16:58

Hi
First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)

doubledave22
Posts: 343
Joined: 08 Jun 2019, 17:36

Re: Optical character recognition (OCR) with UWP API

Post by doubledave22 » 06 Apr 2021, 17:08

teadrinker wrote:
06 Apr 2021, 16:58
Hi
First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)
It is returned. As some advice from a friend, he said maybe a critical thread is blocking it. I have disabled all critical threads and now it is working. I guess i have to work through each to find the culprit now.

doubledave22
Posts: 343
Joined: 08 Jun 2019, 17:36

Re: Optical character recognition (OCR) with UWP API

Post by doubledave22 » 07 Apr 2021, 12:13

So i am using this error suppressor so my users don't get strange looking errors:
https://autohotkey.com/board/topic/65672-suppressing-run-time-error-messages/

Code: Select all

SuppressRuntimeErrors("An error has occurred....)
return

SuppressRuntimeErrors(NewErrorFormat)
{
    ; Call self-contained helper/message-monitor function:
    return SuppressRuntimeErrors_(NewErrorFormat, 0, 0, 0)
}

SuppressRuntimeErrors_(wParam, lParam, msg, hwnd)
{
    ; Constants:
    static WM_COMMNOTIFY := 0x0044, AHK_DIALOG := 1027
    ; Persistent variables:
    static sScriptWnd := 0, sScriptPID, sOnCommNotify, sMessage
    
    Critical 1000
    
    dhw := A_DetectHiddenWindows
    DetectHiddenWindows On
    
    if hwnd     ; Called internally to handle a WM_COMMNOTIFY message.
    {
        if (hwnd = sScriptWnd  ; Script's main window is the recipient.
            && wParam = AHK_DIALOG  ; We're showing a dialog of some sort.
            && WinExist("ahk_class #32770 ahk_pid " sScriptPID))
        {
            ControlGetText msg, Static1
            ; The following relies on the fact that all built-in error
            ; dialogs use this format to point out the current line:
            if RegExMatch(msg, "m`a)^--->`t0*\K\d+(?=:)", line)
            {
                ; If we change the text, the dialog will still be sized
                ; based on the previous text.  So instead, close this
                ; dialog and show a new one.
                WinClose
				; grab the details of the message as well
				RegexMatch(msg, "Specifically:.*Line#",Matched)
				stringreplace, Matched, Matched, Specifically:,,
				stringreplace, Matched, Matched, Line#,,
				StringReplace, Matched, Matched, `r,, All
				StringReplace, Matched, Matched, `n,, All

                StringReplace msg, sMessage, {#}, %line%
                MsgBox 48,, % msg "`nDetails: " Matched
            }
        }
        
        ; Restore the setting to its thread-default.
        DetectHiddenWindows %dhw%
        
        ; The following calls the script's WM_COMMNOTIFY handler if it
        ; has one, otherwise it silently fails and returns nothing:
        return %sOnCommNotify%(wParam, lParam, msg, hwnd)
    }
    else        ; Called by script.
    {
        sMessage := wParam
        
        ; If we're already registered, just return.
        if OnMessage(WM_COMMNOTIFY) = A_ThisFunc
            return
        
        ; Retrieve previous message handler, if the script has one.
        sOnCommNotify := OnMessage(WM_COMMNOTIFY)
        
        ; Retrieve hwnd of main window (usually hidden) and process ID.
        Process Exist
        sScriptPID := ErrorLevel
        sScriptWnd := WinExist("ahk_class AutoHotkey ahk_pid " sScriptPID)
        
        ; Register message handler.  Since hotkeys and other things can
        ; launch new threads while we're displaying our error dialog,
        ; pass 10 for MaxThreads so that we can catch any error dialogs
        ; that these other threads might display:
        OnMessage(WM_COMMNOTIFY, A_ThisFunc, 1)
        
        ; Since we were called by script, restore our caller's setting.
        DetectHiddenWindows %dhw%
    }
}

As you can see this contains a Critical, 1000 at the top of the function. It seems that the critical here blocks the OCR WaitForAsync from resolving. There are a few other functions that contain "Critical" that also break WaitForAsync and keep "status" at 0 endlessly. It seems that calling "Critical, off" at the end of these functions fixes it but what is really happening here?

8992
Posts: 1
Joined: 15 Apr 2021, 03:28

Re: Optical character recognition (OCR) with UWP API

Post by 8992 » 15 Apr 2021, 03:41

How can I make this give accurate results from a specific font? I also tried Vis2 as well but the accuracy is similar to this one for the font I want it to read (i.e. not great) and runs much slower, I see that it's possible to train tesseract to work with new fonts but since it is so much slower I would prefer to use this ocr if possible. I have the .ttf of the font I want it to read. Even if I could just get it to read numbers and dashes accurately I would be very happy

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 15 Apr 2021, 07:18

No, I think you can't train this OCR or something like this.
Last edited by teadrinker on 21 Apr 2021, 07:36, edited 1 time in total.

User avatar
Joe Glines
Posts: 770
Joined: 30 Sep 2013, 20:49
Location: Dallas
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by Joe Glines » 21 Apr 2021, 06:52

teadrinker wrote:
15 Apr 2021, 07:18
No, I think you can't train this OCR or somethink like this.
Even if I give it scooby snacks? (ha ha) Yeah, I agree, I don't think this "learns"
Sign-up for the 🅰️HK Newsletter

ImageImageImageImage:clap:
AHK Tutorials:Web Scraping | | Webservice APIs | AHK and Excel | Chrome | RegEx | Functions
Training: AHK Webinars Courses on AutoHotkey :ugeek:
YouTube

:thumbup: Quick Access Popup, the powerful Windows folders, apps and documents launcher!

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 21 Apr 2021, 07:43

Joe Glines wrote: Even if I give it scooby snacks?
As a last resort, you can try to talk to it in a stern voice. :)

mebo
Posts: 14
Joined: 22 Apr 2021, 16:08

Re: Optical character recognition (OCR) with UWP API

Post by mebo » 22 Apr 2021, 16:16

Hey there! I made an account just to thank you for this wonderful tool.

I do have a question though: the left click dragging is REALLY choppy/laggy/slow. When you hold left click down and drag, it is sometimes half a second to a full second behind. Is there a better way to render this rectangle for better performance?

Thank you!

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 23 Apr 2021, 03:34

@mebo
It very depends on the performance/load of your computer.

Image

mebo
Posts: 14
Joined: 22 Apr 2021, 16:08

Re: Optical character recognition (OCR) with UWP API

Post by mebo » 23 Apr 2021, 08:41

Hey teadrinker!

I have quite a powerful machine (i9-7XXX) with no active load on the PC. I stared at task manager while I was moving the rectangle around and load stayed low.

There was a clipboard tool linked in this thread that I downloaded and the click+drag is really smooth (almost native to doing it right on the Windows desktop). If you move the the mouse too quick around the desktop with this script, it's really slow. I'm happy to provide a gif of it happening, but it's certainly with the method being used here.

teadrinker
Posts: 4309
Joined: 29 Mar 2015, 09:41
Contact:

Re: Optical character recognition (OCR) with UWP API

Post by teadrinker » 23 Apr 2021, 10:24

mebo wrote: If you move the the mouse too quick around the desktop with this script, it's really slow.
Can't reproduce. Anyway, you can try adding SetWinDelay, 0 at the beginning of the script and replacing LowLevelMouseProc() with this one:

Code: Select all

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   
   if (nCode = "timer") {
      point := coords[1]
      mouseX := point[1], mouseY := point[2]
      x := startMouseX < mouseX ? startMouseX : mouseX
      y := startMouseY < mouseY ? startMouseY : mouseY
      w := Abs(mouseX - startMouseX)
      h := Abs(mouseY - startMouseY)
      try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
   }
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         }
         coords.1 := [mouseX, mouseY]
         SetTimer, % timer, -10
      }
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)
   }
}

Post Reply

Return to “Scripts and Functions (v1)”