Page 4 of 8

Re: Optical character recognition (OCR) with UWP API

Posted: 28 Dec 2020, 10:21
by AndyRal123
:headwall: :headwall: :roll:

Hello everyone,

I am trying to merge the first code below - 'Text Recognition From Screen' with the second code - 'Face Detection From File', to get the function of automatic face recognition on the screen and then point the mouse towards the face ( the one closest to the screen center).
First one is the code for 'text recognition from screen':



malcev wrote:
19 Feb 2020, 20:58
https://docs.microsoft.com/en-us/uwp/api/windows.media.ocr
API recognizes text in 2 ways.

Script for recognizing screenshots by teadrinker:

Code: Select all

#NoEnv
SetBatchLines, -1
Return

Esc:: ExitApp

^X::
hBitmap := HBitmapFromScreen(GetArea()*)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream, "ru")
MsgBox, % text
Return

GetArea() {
   area := []
   StartSelection(area)
   while !area.w
      Sleep, 100
   Return area
}
   
StartSelection(area) {
   handler := Func("Select").Bind(area)
   Hotkey, LButton, % handler, On
   ReplaceSystemCursors("IDC_CROSS")
}

Select(area) {
   static hGui := CreateSelectionGui()
   Hook := new WindowsHook(WH_MOUSE_LL := 14, "LowLevelMouseProc", hGui)
   Loop {
      KeyWait, LButton
      WinGetPos, X, Y, W, H, ahk_id %hGui%
   } until w > 0
   ReplaceSystemCursors("")
   Hotkey, LButton, Off
   Hook := ""
   Gui, %hGui%:Show, Hide
   for k, v in ["x", "y", "w", "h"]
      area[v] := %v%
}

ReplaceSystemCursors(IDC = "")
{
   static IMAGE_CURSOR := 2, SPI_SETCURSORS := 0x57
        , exitFunc := Func("ReplaceSystemCursors").Bind("")
        , SysCursors := { IDC_APPSTARTING: 32650
                        , IDC_ARROW      : 32512
                        , IDC_CROSS      : 32515
                        , IDC_HAND       : 32649
                        , IDC_HELP       : 32651
                        , IDC_IBEAM      : 32513
                        , IDC_NO         : 32648
                        , IDC_SIZEALL    : 32646
                        , IDC_SIZENESW   : 32643
                        , IDC_SIZENWSE   : 32642
                        , IDC_SIZEWE     : 32644
                        , IDC_SIZENS     : 32645 
                        , IDC_UPARROW    : 32516
                        , IDC_WAIT       : 32514 }
   if !IDC {
      DllCall("SystemParametersInfo", UInt, SPI_SETCURSORS, UInt, 0, UInt, 0, UInt, 0)
      OnExit(exitFunc, 0)
   }
   else  {
      hCursor := DllCall("LoadCursor", Ptr, 0, UInt, SysCursors[IDC], Ptr)
      for k, v in SysCursors  {
         hCopy := DllCall("CopyImage", Ptr, hCursor, UInt, IMAGE_CURSOR, Int, 0, Int, 0, UInt, 0, Ptr)
         DllCall("SetSystemCursor", Ptr, hCopy, UInt, v)
      }
      OnExit(exitFunc)
   }
}

CreateSelectionGui() {
   Gui, New, +hwndhGui +Alwaysontop -Caption +LastFound +ToolWindow +E0x20 -DPIScale
   WinSet, Transparent, 130
   Gui, Color, FFC800
   Return hGui
}

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   
   if (nCode = "timer") {
      while coords[1] {
         point := coords.RemoveAt(1)
         mouseX := point[1], mouseY := point[2]
         x := startMouseX < mouseX ? startMouseX : mouseX
         y := startMouseY < mouseY ? startMouseY : mouseY
         w := Abs(mouseX - startMouseX)
         h := Abs(mouseY - startMouseY)
         try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
      }
   }
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         }
         coords.Push([mouseX, mouseY])
         SetTimer, % timer, -10
      }
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)
   }
}

class WindowsHook {
   __New(type, callback, eventInfo := "", isGlobal := true) {
      this.callbackPtr := RegisterCallback(callback, "Fast", 3, eventInfo)
      this.hHook := DllCall("SetWindowsHookEx", "Int", type, "Ptr", this.callbackPtr
                                              , "Ptr", !isGlobal ? 0 : DllCall("GetModuleHandle", "UInt", 0, "Ptr")
                                              , "UInt", isGlobal ? 0 : DllCall("GetCurrentThreadId"), "Ptr")
   }
   __Delete() {
      DllCall("UnhookWindowsHookEx", "Ptr", this.hHook)
      DllCall("GlobalFree", "Ptr", this.callBackPtr, "Ptr")
   }
}

HBitmapFromScreen(X, Y, W, H) {
   HDC := DllCall("GetDC", "Ptr", 0, "UPtr")
   HBM := DllCall("CreateCompatibleBitmap", "Ptr", HDC, "Int", W, "Int", H, "UPtr")
   PDC := DllCall("CreateCompatibleDC", "Ptr", HDC, "UPtr")
   DllCall("SelectObject", "Ptr", PDC, "Ptr", HBM)
   DllCall("BitBlt", "Ptr", PDC, "Int", 0, "Int", 0, "Int", W, "Int", H
                   , "Ptr", HDC, "Int", X, "Int", Y, "UInt", 0x00CC0020)
   DllCall("DeleteDC", "Ptr", PDC)
   DllCall("ReleaseDC", "Ptr", 0, "Ptr", HDC)
   Return HBM
}

HBitmapToRandomAccessStream(hBitmap) {
   static IID_IRandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}"
        , IID_IPicture            := "{7BF80980-BF32-101A-8BBB-00AA00300CAB}"
        , PICTYPE_BITMAP := 1
        , BSOS_DEFAULT   := 0
        
   DllCall("Ole32\CreateStreamOnHGlobal", "Ptr", 0, "UInt", true, "PtrP", pIStream, "UInt")
   
   VarSetCapacity(PICTDESC, sz := 8 + A_PtrSize*2, 0)
   NumPut(sz, PICTDESC)
   NumPut(PICTYPE_BITMAP, PICTDESC, 4)
   NumPut(hBitmap, PICTDESC, 8)
   riid := CLSIDFromString(IID_IPicture, GUID1)
   DllCall("OleAut32\OleCreatePictureIndirect", "Ptr", &PICTDESC, "Ptr", riid, "UInt", false, "PtrP", pIPicture, "UInt")
   ; IPicture::SaveAsFile
   DllCall(NumGet(NumGet(pIPicture+0) + A_PtrSize*15), "Ptr", pIPicture, "Ptr", pIStream, "UInt", true, "UIntP", size, "UInt")
   riid := CLSIDFromString(IID_IRandomAccessStream, GUID2)
   DllCall("ShCore\CreateRandomAccessStreamOverStream", "Ptr", pIStream, "UInt", BSOS_DEFAULT, "Ptr", riid, "PtrP", pIRandomAccessStream, "UInt")
   ObjRelease(pIPicture)
   ObjRelease(pIStream)
   Return pIRandomAccessStream
}

CLSIDFromString(IID, ByRef CLSID) {
   VarSetCapacity(CLSID, 16, 0)
   if res := DllCall("ole32\CLSIDFromString", "WStr", IID, "Ptr", &CLSID, "UInt")
      throw Exception("CLSIDFromString failed. Error: " . Format("{:#x}", res))
   Return &CLSID
}


ocr(file, lang := "FirstFromAvailableLanguages")
{
   static OcrEngineStatics, OcrEngine, MaxDimension, LanguageFactory, Language, CurrentLanguage, BitmapDecoderStatics, GlobalizationPreferencesStatics
   if (OcrEngineStatics = "")
   {
      CreateClass("Windows.Globalization.Language", ILanguageFactory := "{9B0252AC-0C27-44F8-B792-9793FB66C63E}", LanguageFactory)
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Media.Ocr.OcrEngine", IOcrEngineStatics := "{5BFFA85A-3384-3540-9940-699120D428A8}", OcrEngineStatics)
      DllCall(NumGet(NumGet(OcrEngineStatics+0)+6*A_PtrSize), "ptr", OcrEngineStatics, "uint*", MaxDimension)   ; MaxImageDimension
   }
   if (file = "ShowAvailableLanguages")
   {
      if (GlobalizationPreferencesStatics = "")
         CreateClass("Windows.System.UserProfile.GlobalizationPreferences", IGlobalizationPreferencesStatics := "{01BF4326-ED37-4E96-B0E9-C1340D1EA158}", GlobalizationPreferencesStatics)
      DllCall(NumGet(NumGet(GlobalizationPreferencesStatics+0)+9*A_PtrSize), "ptr", GlobalizationPreferencesStatics, "ptr*", LanguageList)   ; get_Languages
      DllCall(NumGet(NumGet(LanguageList+0)+7*A_PtrSize), "ptr", LanguageList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(LanguageList+0)+6*A_PtrSize), "ptr", LanguageList, "int", A_Index-1, "ptr*", hString)   ; get_Item
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", LanguageTest)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+8*A_PtrSize), "ptr", OcrEngineStatics, "ptr", LanguageTest, "int*", bool)   ; IsLanguageSupported
         if (bool = 1)
         {
            DllCall(NumGet(NumGet(LanguageTest+0)+6*A_PtrSize), "ptr", LanguageTest, "ptr*", hText)
            buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
            text .= StrGet(buffer, "UTF-16") "`n"
         }
         ObjRelease(LanguageTest)
      }
      ObjRelease(LanguageList)
      return text
   }
   if (lang != CurrentLanguage) or (lang = "FirstFromAvailableLanguages")
   {
      if (OcrEngine != "")
      {
         ObjRelease(OcrEngine)
         if (CurrentLanguage != "FirstFromAvailableLanguages")
            ObjRelease(Language)
      }
      if (lang = "FirstFromAvailableLanguages")
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+10*A_PtrSize), "ptr", OcrEngineStatics, "ptr*", OcrEngine)   ; TryCreateFromUserProfileLanguages
      else
      {
         CreateHString(lang, hString)
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", Language)   ; CreateLanguage
         DeleteHString(hString)
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+9*A_PtrSize), "ptr", OcrEngineStatics, ptr, Language, "ptr*", OcrEngine)   ; TryCreateFromLanguage
      }
      if (OcrEngine = 0)
      {
         msgbox Can not use language "%lang%" for OCR, please install language pack.
         ExitApp
      }
      CurrentLanguage := lang
   }
   IRandomAccessStream := file
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   if (width > MaxDimension) or (height > MaxDimension)
   {
      msgbox Image is to big - %width%x%height%.`nIt should be maximum - %MaxDimension% pixels
      ExitApp
   }
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
   WaitForAsync(OcrResult)
   DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
   DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
   loop % count
   {
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+7*A_PtrSize), "ptr", OcrLine, "ptr*", hText) 
      buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
      text .= StrGet(buffer, "UTF-16") "`n"
      ObjRelease(OcrLine)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(OcrResult)
   ObjRelease(LinesList)
   return text
}



CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class)
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}




This is the second - 'Face Detection From File', but it doesn't work by scanning screen content, but it scans specific file giving back coordinates of faces :





Code: Select all

msgbox % facedetect("face.jpg")
ExitApp



facedetect(file, maxheight := 2000)
{
   static BitmapDecoderStatics, BitmapEncoderStatics, SoftwareBitmapStatics, FaceDetector, SupportedBitmapPixelFormats
   if (FaceDetector = "")
   {
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Graphics.Imaging.BitmapEncoder", IBitmapEncoderStatics := "{A74356A7-A4E4-4EB9-8E40-564DE7E1CCB2}", BitmapEncoderStatics)
      CreateClass("Windows.Graphics.Imaging.SoftwareBitmap", ISoftwareBitmapStatics := "{DF0385DB-672F-4A9D-806E-C2442F343E86}", SoftwareBitmapStatics)
      CreateClass("Windows.Media.FaceAnalysis.FaceDetector", IFaceDetectorStatics := "{BC042D67-9047-33F6-881B-6746C1B218B8}", FaceDetectorStatics)
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+6*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", FaceDetector)   ; CreateAsync
      WaitForAsync(FaceDetector)
      DllCall(NumGet(NumGet(FaceDetectorStatics+0)+7*A_PtrSize), "ptr", FaceDetectorStatics, "ptr*", ReadOnlyList)   ; GetSupportedBitmapPixelFormats
      DllCall(NumGet(NumGet(ReadOnlyList+0)+7*A_PtrSize), "ptr", ReadOnlyList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(ReadOnlyList+0)+6*A_PtrSize), "ptr", ReadOnlyList, "int", A_Index-1, "uint*", BitmapPixelFormat)   ; get_Item
         SupportedBitmapPixelFormats .= "|" BitmapPixelFormat "|"
      }
      ObjRelease(FaceDetectorStatics)
      ObjRelease(ReadOnlyList)
   }
   if (SubStr(file, 2, 1) != ":")
      file := A_ScriptDir "\" file
   if !FileExist(file) or InStr(FileExist(file), "D")
   {
      msgbox File "%file%" does not exist
      ExitApp
   }   
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", IID_RandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}", "ptr", &GUID)
   DllCall("ShCore\CreateRandomAccessStreamOnFile", "wstr", file, "uint", Read := 0, "ptr", &GUID, "ptr*", IRandomAccessStream)
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   DllCall(NumGet(NumGet(BitmapFrame+0)+8*A_PtrSize), "ptr", BitmapFrame, "uint*", BitmapPixelFormat)   ; get_BitmapPixelFormat
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   if (height > maxheight)
   {
      DllCall(NumGet(NumGet(BitmapEncoderStatics+0)+15*A_PtrSize), "ptr", BitmapEncoderStatics, "ptr", IRandomAccessStream, "ptr", BitmapDecoder, "ptr*", BitmapEncoder)   ; CreateForTranscodingAsync
      WaitForAsync(BitmapEncoder)
      DllCall(NumGet(NumGet(BitmapEncoder+0)+15*A_PtrSize), "ptr", BitmapEncoder, "ptr*", BitmapTransform)   ; BitmapTransform
      DllCall(NumGet(NumGet(BitmapTransform+0)+7*A_PtrSize), "ptr", BitmapTransform, "int", floor(maxheight/height*width))   ; put_ScaledWidth
      DllCall(NumGet(NumGet(BitmapTransform+0)+9*A_PtrSize), "ptr", BitmapTransform, "int", maxheight)   ; put_ScaledHeight
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+8*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "uint", BitmapPixelFormat, "uint", Premultiplied := 0, "ptr", BitmapTransform, "uint", IgnoreExifOrientation := 0, "uint", DoNotColorManage := 0, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapTransformedAsync
   }
   else
      DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   if !InStr(SupportedBitmapPixelFormats, "|" BitmapPixelFormat "|")
   {
      DllCall(NumGet(NumGet(SoftwareBitmapStatics+0)+7*A_PtrSize), "ptr", SoftwareBitmapStatics, "ptr", SoftwareBitmap, "uint", Gray8 := 62, "ptr*", SoftwareBitmapTemp)   ; Convert
      Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
      DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
      ObjRelease(Close)
      ObjRelease(SoftwareBitmap)
      SoftwareBitmap := SoftwareBitmapTemp
   }
   DllCall(NumGet(NumGet(FaceDetector+0)+6*A_PtrSize), "ptr", FaceDetector, ptr, SoftwareBitmap, "ptr*", DetectedFaceList)   ; DetectFacesAsync
   WaitForAsync(DetectedFaceList)
   DllCall(NumGet(NumGet(DetectedFaceList+0)+7*A_PtrSize), "ptr", DetectedFaceList, "int*", count)   ; count
   loop % count
   {
      varsetcapacity(bounds, 16, 0)
      DllCall(NumGet(NumGet(DetectedFaceList+0)+6*A_PtrSize), "ptr", DetectedFaceList, "int", A_Index-1, "ptr*", DetectedFace)   ; get_Item
      DllCall(NumGet(NumGet(DetectedFace+0)+6*A_PtrSize), "ptr", DetectedFace, "ptr", &bounds)   ; BitmapBounds
      x := numget(bounds, 0, "uint")
      y := numget(bounds, 4, "uint")
      width := numget(bounds, 8, "uint")
      height := numget(bounds, 12, "uint")
      result .= "face" A_Index ": x=" x ", y=" y ", width=" width ", height=" height "`n"
      ObjRelease(DetectedFace)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   if (height > maxheight)
   {
      ObjRelease(BitmapEncoder)
      ObjRelease(BitmapTransform)
   }
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(DetectedFaceList)
   return result
}



CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class, "uint")
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}


So I am trying to do 3 things:

1. Apply 'Face Recognition From File' script to the 'Text Recognition From Screen' script so that new script is finding faces on the screen
2. Remove Area Selector from 'Text Recognition From Screen' script, so that the script scans whole screen insted of selected area
3. Make new script Mouse move to the face found that is closest to the screen center



Dear autohotkey forum members, any help would be strongly appreciated and surely I will share my codes on the road to the goal script. :crazy:
Regards and all the best for You :wave:
Andy

Re: Optical character recognition (OCR) with UWP API

Posted: 22 Jan 2021, 04:45
by blue83
Hi,

Is it possible to read field with coordinates inside .png file?

We have here function for screen reading, but what if I want to read it without puting picture on screen?

Thanks,
blue

Re: Optical character recognition (OCR) with UWP API

Posted: 22 Jan 2021, 05:17
by malcev
The first code in this topic loads image.
If You want read particular coordinates only, then You have to crop Your image with gdi+ for example and then HBitmap convert to RandomAccessStream (look at second example at first post).

Re: Optical character recognition (OCR) with UWP API

Posted: 22 Jan 2021, 06:46
by blue83
Hi @malcev,

thank you for suggestion I will try like that.

Blue

Re: Optical character recognition (OCR) with UWP API

Posted: 29 Jan 2021, 12:10
by blackjoker
Works fine for me!!!

Solid job @malcev @teadrinker @adegard !!! Thank you!

Re: Optical character recognition (OCR) with UWP API

Posted: 07 Feb 2021, 12:49
by ewerybody
Hello folks! First of all thanks for this awesome OCR script!

I had quite some success but after a Windows update I now get a No valid COM object! error when I run the teadrinker code included in my runtime.
From this line here in the WaitForAsync function:

Code: Select all

AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
It works 2 times on BitmapDecoder and SoftwareBitmap but on the third call it breaks with the said error^. Thats from this call here:

Code: Select all

    ...
    DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
    WaitForAsync(SoftwareBitmap)
    DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
>>> WaitForAsync(OcrResult)
    DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
    DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
    loop % count
    { ...
As a workaround I call it with a separate Autohotkey.exe process via ComObjCreate("WScript.Shell") which also works but its rather sad that it breaks in my runtime now.
Any Ideas?

Re: Optical character recognition (OCR) with UWP API

Posted: 07 Feb 2021, 13:27
by teadrinker
@ewerybody
Can't reproduce the issue. All latest updates are installed. Windows 10 Pro 20H2.

Re: Optical character recognition (OCR) with UWP API

Posted: 07 Feb 2021, 18:08
by ewerybody
Yeah I'm on the same. Hmm .. now it works again. With both methods :/ weird. 🤷‍♀️

Re: Optical character recognition (OCR) with UWP API

Posted: 06 Apr 2021, 16:49
by doubledave22
Hey guys, having some troubles here. If I run this on a separate script with nothing else in it, it gets all the text I want from the screen area I chose. Once I place this code in my main script (which has thousands of lines) then it fails.

Running a tooltip in the WaitForAsync seems to always just return 0 for Status which never resolves. The call just before this is: WaitForAsync(BitmapDecoder)

Code: Select all

DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
Any ideas how to debug this? Just want to know if there's any known conflicts or if theres any way I can start to work through what might be preventing the status from resolving.

Code: Select all


^t::
x = 15
y = 15
w = 800
h = 599
hBitmap := HBitmapFromScreen(x, y, w, h)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream)
msgbox, % text
return

edit: I've tried removing all timers, hooks, simplifying my auto-execute all to no avail. I checked to make sure the bitmap was valid by converting to pbitmap and saving to file and all looks good there.

Re: Optical character recognition (OCR) with UWP API

Posted: 06 Apr 2021, 16:58
by teadrinker
Hi
First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)

Re: Optical character recognition (OCR) with UWP API

Posted: 06 Apr 2021, 17:08
by doubledave22
teadrinker wrote:
06 Apr 2021, 16:58
Hi
First, make sure hBitmap is actually returned:

Code: Select all

MsgBox, % hBitmap := HBitmapFromScreen(x, y, w, h)
It is returned. As some advice from a friend, he said maybe a critical thread is blocking it. I have disabled all critical threads and now it is working. I guess i have to work through each to find the culprit now.

Re: Optical character recognition (OCR) with UWP API

Posted: 07 Apr 2021, 12:13
by doubledave22
So i am using this error suppressor so my users don't get strange looking errors:
https://autohotkey.com/board/topic/65672-suppressing-run-time-error-messages/

Code: Select all

SuppressRuntimeErrors("An error has occurred....)
return

SuppressRuntimeErrors(NewErrorFormat)
{
    ; Call self-contained helper/message-monitor function:
    return SuppressRuntimeErrors_(NewErrorFormat, 0, 0, 0)
}

SuppressRuntimeErrors_(wParam, lParam, msg, hwnd)
{
    ; Constants:
    static WM_COMMNOTIFY := 0x0044, AHK_DIALOG := 1027
    ; Persistent variables:
    static sScriptWnd := 0, sScriptPID, sOnCommNotify, sMessage
    
    Critical 1000
    
    dhw := A_DetectHiddenWindows
    DetectHiddenWindows On
    
    if hwnd     ; Called internally to handle a WM_COMMNOTIFY message.
    {
        if (hwnd = sScriptWnd  ; Script's main window is the recipient.
            && wParam = AHK_DIALOG  ; We're showing a dialog of some sort.
            && WinExist("ahk_class #32770 ahk_pid " sScriptPID))
        {
            ControlGetText msg, Static1
            ; The following relies on the fact that all built-in error
            ; dialogs use this format to point out the current line:
            if RegExMatch(msg, "m`a)^--->`t0*\K\d+(?=:)", line)
            {
                ; If we change the text, the dialog will still be sized
                ; based on the previous text.  So instead, close this
                ; dialog and show a new one.
                WinClose
				; grab the details of the message as well
				RegexMatch(msg, "Specifically:.*Line#",Matched)
				stringreplace, Matched, Matched, Specifically:,,
				stringreplace, Matched, Matched, Line#,,
				StringReplace, Matched, Matched, `r,, All
				StringReplace, Matched, Matched, `n,, All

                StringReplace msg, sMessage, {#}, %line%
                MsgBox 48,, % msg "`nDetails: " Matched
            }
        }
        
        ; Restore the setting to its thread-default.
        DetectHiddenWindows %dhw%
        
        ; The following calls the script's WM_COMMNOTIFY handler if it
        ; has one, otherwise it silently fails and returns nothing:
        return %sOnCommNotify%(wParam, lParam, msg, hwnd)
    }
    else        ; Called by script.
    {
        sMessage := wParam
        
        ; If we're already registered, just return.
        if OnMessage(WM_COMMNOTIFY) = A_ThisFunc
            return
        
        ; Retrieve previous message handler, if the script has one.
        sOnCommNotify := OnMessage(WM_COMMNOTIFY)
        
        ; Retrieve hwnd of main window (usually hidden) and process ID.
        Process Exist
        sScriptPID := ErrorLevel
        sScriptWnd := WinExist("ahk_class AutoHotkey ahk_pid " sScriptPID)
        
        ; Register message handler.  Since hotkeys and other things can
        ; launch new threads while we're displaying our error dialog,
        ; pass 10 for MaxThreads so that we can catch any error dialogs
        ; that these other threads might display:
        OnMessage(WM_COMMNOTIFY, A_ThisFunc, 1)
        
        ; Since we were called by script, restore our caller's setting.
        DetectHiddenWindows %dhw%
    }
}

As you can see this contains a Critical, 1000 at the top of the function. It seems that the critical here blocks the OCR WaitForAsync from resolving. There are a few other functions that contain "Critical" that also break WaitForAsync and keep "status" at 0 endlessly. It seems that calling "Critical, off" at the end of these functions fixes it but what is really happening here?

Re: Optical character recognition (OCR) with UWP API

Posted: 15 Apr 2021, 03:41
by 8992
How can I make this give accurate results from a specific font? I also tried Vis2 as well but the accuracy is similar to this one for the font I want it to read (i.e. not great) and runs much slower, I see that it's possible to train tesseract to work with new fonts but since it is so much slower I would prefer to use this ocr if possible. I have the .ttf of the font I want it to read. Even if I could just get it to read numbers and dashes accurately I would be very happy

Re: Optical character recognition (OCR) with UWP API

Posted: 15 Apr 2021, 07:18
by teadrinker
No, I think you can't train this OCR or something like this.

Re: Optical character recognition (OCR) with UWP API

Posted: 21 Apr 2021, 06:52
by Joe Glines
teadrinker wrote:
15 Apr 2021, 07:18
No, I think you can't train this OCR or somethink like this.
Even if I give it scooby snacks? (ha ha) Yeah, I agree, I don't think this "learns"

Re: Optical character recognition (OCR) with UWP API

Posted: 21 Apr 2021, 07:43
by teadrinker
Joe Glines wrote: Even if I give it scooby snacks?
As a last resort, you can try to talk to it in a stern voice. :)

Re: Optical character recognition (OCR) with UWP API

Posted: 22 Apr 2021, 16:16
by mebo
Hey there! I made an account just to thank you for this wonderful tool.

I do have a question though: the left click dragging is REALLY choppy/laggy/slow. When you hold left click down and drag, it is sometimes half a second to a full second behind. Is there a better way to render this rectangle for better performance?

Thank you!

Re: Optical character recognition (OCR) with UWP API

Posted: 23 Apr 2021, 03:34
by teadrinker
@mebo
It very depends on the performance/load of your computer.

Image

Re: Optical character recognition (OCR) with UWP API

Posted: 23 Apr 2021, 08:41
by mebo
Hey teadrinker!

I have quite a powerful machine (i9-7XXX) with no active load on the PC. I stared at task manager while I was moving the rectangle around and load stayed low.

There was a clipboard tool linked in this thread that I downloaded and the click+drag is really smooth (almost native to doing it right on the Windows desktop). If you move the the mouse too quick around the desktop with this script, it's really slow. I'm happy to provide a gif of it happening, but it's certainly with the method being used here.

Re: Optical character recognition (OCR) with UWP API

Posted: 23 Apr 2021, 10:24
by teadrinker
mebo wrote: If you move the the mouse too quick around the desktop with this script, it's really slow.
Can't reproduce. Anyway, you can try adding SetWinDelay, 0 at the beginning of the script and replacing LowLevelMouseProc() with this one:

Code: Select all

LowLevelMouseProc(nCode, wParam, lParam) {
   static WM_MOUSEMOVE := 0x200, WM_LBUTTONUP := 0x202
        , coords := [], startMouseX, startMouseY, hGui
        , timer := Func("LowLevelMouseProc").Bind("timer", "", "")
   
   if (nCode = "timer") {
      point := coords[1]
      mouseX := point[1], mouseY := point[2]
      x := startMouseX < mouseX ? startMouseX : mouseX
      y := startMouseY < mouseY ? startMouseY : mouseY
      w := Abs(mouseX - startMouseX)
      h := Abs(mouseY - startMouseY)
      try Gui, %hGUi%: Show, x%x% y%y% w%w% h%h% NA
   }
   else {
      (!hGui && hGui := A_EventInfo)
      if (wParam = WM_LBUTTONUP)
         startMouseX := startMouseY := ""
      if (wParam = WM_MOUSEMOVE)  {
         mouseX := NumGet(lParam + 0, "Int")
         mouseY := NumGet(lParam + 4, "Int")
         if (startMouseX = "") {
            startMouseX := mouseX
            startMouseY := mouseY
         }
         coords.1 := [mouseX, mouseY]
         SetTimer, % timer, -10
      }
      Return DllCall("CallNextHookEx", Ptr, 0, Int, nCode, UInt, wParam, Ptr, lParam)
   }
}