Optical character recognition (OCR) with UWP API

Post your working scripts, libraries and tools
r2997790
Posts: 56
Joined: 02 Feb 2017, 02:46

Re: Optical character recognition (OCR) with UWP API

24 Feb 2020, 16:17

Thanks Malcev, my mistake, for some reason I needed to put the full path of to my test image... works super fast and is fantastic.
... now to hook it up to the screenclipper script that's floating around.

Love it! Fast and not flaky like the tesseract library.

Thank you very much.
malcev
Posts: 531
Joined: 12 Aug 2014, 12:37

Re: Optical character recognition (OCR) with UWP API

24 Feb 2020, 16:46

I updated 1 post.
Now, I think You can use dont need put full path.
Also it checks for file existance.
Also I added code for recognizing screenshots by teadrinker.
User avatar
adegard
Posts: 87
Joined: 24 Nov 2017, 05:58
GitHub: adegard

Re: Optical character recognition (OCR) with UWP API

22 Mar 2020, 08:59

Hi @malcev , this script is very cool! :dance:

I would like to create a 'click on text' using your ocr.
how can I obtain a recursive function to search in the full screen, some words, dividing it like a puzzle. Then, pointing the mouse in the middle of the successul area (which contains the strings) and click or double click on it...
malcev
Posts: 531
Joined: 12 Aug 2014, 12:37

Re: Optical character recognition (OCR) with UWP API

22 Mar 2020, 10:30

Code: Select all

SetBatchLines, -1
hBitmap := HBitmapFromScreen(0, 0, A_ScreenWidth, A_ScreenHeight)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
ocr(pIRandomAccessStream, "en")
MsgBox, done
Return


HBitmapFromScreen(X, Y, W, H) {
   HDC := DllCall("GetDC", "Ptr", 0, "UPtr")
   HBM := DllCall("CreateCompatibleBitmap", "Ptr", HDC, "Int", W, "Int", H, "UPtr")
   PDC := DllCall("CreateCompatibleDC", "Ptr", HDC, "UPtr")
   DllCall("SelectObject", "Ptr", PDC, "Ptr", HBM)
   DllCall("BitBlt", "Ptr", PDC, "Int", 0, "Int", 0, "Int", W, "Int", H
                   , "Ptr", HDC, "Int", X, "Int", Y, "UInt", 0x00CC0020)
   DllCall("DeleteDC", "Ptr", PDC)
   DllCall("ReleaseDC", "Ptr", 0, "Ptr", HDC)
   Return HBM
}

HBitmapToRandomAccessStream(hBitmap) {
   static IID_IRandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}"
        , IID_IPicture            := "{7BF80980-BF32-101A-8BBB-00AA00300CAB}"
        , PICTYPE_BITMAP := 1
        , BSOS_DEFAULT   := 0
        
   DllCall("Ole32\CreateStreamOnHGlobal", "Ptr", 0, "UInt", true, "PtrP", pIStream, "UInt")
   
   VarSetCapacity(PICTDESC, sz := 8 + A_PtrSize*2, 0)
   NumPut(sz, PICTDESC)
   NumPut(PICTYPE_BITMAP, PICTDESC, 4)
   NumPut(hBitmap, PICTDESC, 8)
   riid := CLSIDFromString(IID_IPicture, GUID1)
   DllCall("OleAut32\OleCreatePictureIndirect", "Ptr", &PICTDESC, "Ptr", riid, "UInt", false, "PtrP", pIPicture, "UInt")
   ; IPicture::SaveAsFile
   DllCall(NumGet(NumGet(pIPicture+0) + A_PtrSize*15), "Ptr", pIPicture, "Ptr", pIStream, "UInt", true, "UIntP", size, "UInt")
   riid := CLSIDFromString(IID_IRandomAccessStream, GUID2)
   DllCall("ShCore\CreateRandomAccessStreamOverStream", "Ptr", pIStream, "UInt", BSOS_DEFAULT, "Ptr", riid, "PtrP", pIRandomAccessStream, "UInt")
   ObjRelease(pIPicture)
   ObjRelease(pIStream)
   Return pIRandomAccessStream
}

CLSIDFromString(IID, ByRef CLSID) {
   VarSetCapacity(CLSID, 16, 0)
   if res := DllCall("ole32\CLSIDFromString", "WStr", IID, "Ptr", &CLSID, "UInt")
      throw Exception("CLSIDFromString failed. Error: " . Format("{:#x}", res))
   Return &CLSID
}


ocr(file, lang := "FirstFromAvailableLanguages")
{
   static OcrEngineStatics, OcrEngine, MaxDimension, LanguageFactory, Language, CurrentLanguage, BitmapDecoderStatics, GlobalizationPreferencesStatics
   if (OcrEngineStatics = "")
   {
      CreateClass("Windows.Globalization.Language", ILanguageFactory := "{9B0252AC-0C27-44F8-B792-9793FB66C63E}", LanguageFactory)
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Media.Ocr.OcrEngine", IOcrEngineStatics := "{5BFFA85A-3384-3540-9940-699120D428A8}", OcrEngineStatics)
      DllCall(NumGet(NumGet(OcrEngineStatics+0)+6*A_PtrSize), "ptr", OcrEngineStatics, "uint*", MaxDimension)   ; MaxImageDimension
   }
   if (file = "ShowAvailableLanguages")
   {
      if (GlobalizationPreferencesStatics = "")
         CreateClass("Windows.System.UserProfile.GlobalizationPreferences", IGlobalizationPreferencesStatics := "{01BF4326-ED37-4E96-B0E9-C1340D1EA158}", GlobalizationPreferencesStatics)
      DllCall(NumGet(NumGet(GlobalizationPreferencesStatics+0)+9*A_PtrSize), "ptr", GlobalizationPreferencesStatics, "ptr*", LanguageList)   ; get_Languages
      DllCall(NumGet(NumGet(LanguageList+0)+7*A_PtrSize), "ptr", LanguageList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(LanguageList+0)+6*A_PtrSize), "ptr", LanguageList, "int", A_Index-1, "ptr*", hString)   ; get_Item
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", LanguageTest)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+8*A_PtrSize), "ptr", OcrEngineStatics, "ptr", LanguageTest, "int*", bool)   ; IsLanguageSupported
         if (bool = 1)
         {
            DllCall(NumGet(NumGet(LanguageTest+0)+6*A_PtrSize), "ptr", LanguageTest, "ptr*", hText)
            buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
            text .= StrGet(buffer, "UTF-16") "`n"
         }
         ObjRelease(LanguageTest)
      }
      ObjRelease(LanguageList)
      return text
   }
   if (lang != CurrentLanguage) or (lang = "FirstFromAvailableLanguages")
   {
      if (OcrEngine != "")
      {
         ObjRelease(OcrEngine)
         if (CurrentLanguage != "FirstFromAvailableLanguages")
            ObjRelease(Language)
      }
      if (lang = "FirstFromAvailableLanguages")
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+10*A_PtrSize), "ptr", OcrEngineStatics, "ptr*", OcrEngine)   ; TryCreateFromUserProfileLanguages
      else
      {
         CreateHString(lang, hString)
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", Language)   ; CreateLanguage
         DeleteHString(hString)
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+9*A_PtrSize), "ptr", OcrEngineStatics, ptr, Language, "ptr*", OcrEngine)   ; TryCreateFromLanguage
      }
      if (OcrEngine = 0)
      {
         msgbox Can not use language "%lang%" for OCR, please install language pack.
         ExitApp
      }
      CurrentLanguage := lang
   }
   IRandomAccessStream := file
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   if (width > MaxDimension) or (height > MaxDimension)
   {
      msgbox Image is to big - %width%x%height%.`nIt should be maximum - %MaxDimension% pixels
      ExitApp
   }
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
   WaitForAsync(OcrResult)
   DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
   DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
   loop % count
   {
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+6*A_PtrSize), "ptr", OcrLine, "ptr*", WordsList)   ; get_Words
      DllCall(NumGet(NumGet(WordsList+0)+7*A_PtrSize), "ptr", WordsList, "int*", WordsCount)   ; Words count
      loop % WordsCount
      {
         DllCall(NumGet(NumGet(WordsList+0)+6*A_PtrSize), "ptr", WordsList, "int", A_Index-1, "ptr*", OcrWord)
         VarSetCapacity(RECT, 16, 0)
         DllCall(NumGet(NumGet(OcrWord+0)+6*A_PtrSize), "ptr", OcrWord, "ptr", &RECT)   ; get_BoundingRect
         DllCall(NumGet(NumGet(OcrWord+0)+7*A_PtrSize), "ptr", OcrWord, "ptr*", hText)   ; get_Text
         buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
         x := NumGet(&RECT, 0, "float")
         y := NumGet(&RECT, 4, "float")
         w := NumGet(&RECT, 8, "float")
         h := NumGet(&RECT, 12, "float")
         text := StrGet(buffer, "UTF-16")
         msgbox %text%`nx: %x%, y: %y%, w: %w%, h: %h%
         ObjRelease(OcrWord)
      }
      ObjRelease(WordsList)
      ObjRelease(OcrLine)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(OcrResult)
   ObjRelease(LinesList)
   return text
}



CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class, "uint")
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}
rommmcek
Posts: 910
Joined: 15 Aug 2014, 15:18

Re: Optical character recognition (OCR) with UWP API

23 Mar 2020, 09:22

Great idea by adegard and excellent execution by malcev!
Dpi independent, zoom independent, resolution independent... Many thanks!

P.s. off topic: How to get position of Magnifier (in full screen mode - Win10)
daywalker
Posts: 4
Joined: 18 Jun 2019, 01:37

Re: Optical character recognition (OCR) with UWP API

24 Mar 2020, 06:49

Very nice!!

Does anyone have a idea, why there is different position of mouse cursor and yellow box?
example.jpg
example.jpg (18.31 KiB) Viewed 1178 times
rommmcek
Posts: 910
Joined: 15 Aug 2014, 15:18

Re: Optical character recognition (OCR) with UWP API

24 Mar 2020, 07:13

Probably you should use CoordMode, Mouse, Screen or
...
WinGetPos, wx, wy, ww, wh, A
hBitmap := HBitmapFromScreen(wx, wy, ww, wh)
...
User avatar
adegard
Posts: 87
Joined: 24 Nov 2017, 05:58
GitHub: adegard

Re: Optical character recognition (OCR) with UWP API

24 Mar 2020, 15:42

Excellent @malcev,
I insert here a little test in your OCR function with a third parameter "Find_Text" : ocr(pIRandomAccessStream, "en", Find_Text)
Now I check for a word, and click on it!
Spoiler
But what can I do to check more words in my string (sequence of words like "AutoHotkey Community")
rommmcek
Posts: 910
Joined: 15 Aug 2014, 15:18

Re: Optical character recognition (OCR) with UWP API

24 Mar 2020, 18:14

You can do something like:

Code: Select all

        ...
        text .= StrGet(buffer, "UTF-16") " "
        if InStr(text, Str:= "AutoHotkey Community", 1) {
            last:=StrLen("Community"), total:= StrLen(Str), wt:=total*w/last, Found:= 1
            MouseClick,, x+w-wt//2, y+h//2 ; approximately middle of the string
            SoundBeep, 1400 ; audio feed back
        } 
        ObjRelease(OcrWord)
        if Found {
            ObjRelease(WordsList)
            ObjRelease(OcrLine)
            Break 2
        }
      ObjRelease(WordsList)
      ObjRelease(OcrLine)
   }
Addendum: Further function change: More user friendly + multiple text/string detection.

Code: Select all

    ...
    m:=ocr(pIRandomAccessStream, "en", "AutoHotkey Community")
    for i, j in m
        MouseClick,, m[i].1, m[i].2
    ...
Return

ocr(file, lang := "FirstFromAvailableLanguages", Str:= "")
{
   ...
   loop % count
   {
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+6*A_PtrSize), "ptr", OcrLine, "ptr*", WordsList)   ; get_Words
      DllCall(NumGet(NumGet(WordsList+0)+7*A_PtrSize), "ptr", WordsList, "int*", WordsCount)   ; Words count
      A_Index<2? (coord:= [], f:= "", wl:= (Arr:=StrSplit(Str, " ")).Length()): ""
      loop % WordsCount
      {
         DllCall(NumGet(NumGet(WordsList+0)+6*A_PtrSize), "ptr", WordsList, "int", A_Index-1, "ptr*", OcrWord)
         VarSetCapacity(RECT, 16, 0)
         DllCall(NumGet(NumGet(OcrWord+0)+6*A_PtrSize), "ptr", OcrWord, "ptr", &RECT)   ; get_BoundingRect
         DllCall(NumGet(NumGet(OcrWord+0)+7*A_PtrSize), "ptr", OcrWord, "ptr*", hText)   ; get_Text
         buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
         x := NumGet(&RECT, 0, "float")
         y := NumGet(&RECT, 4, "float")
         w := NumGet(&RECT, 8, "float")
         h := NumGet(&RECT, 12, "float")
         
         if ((tl:=StrSplit(text, " ").Length()) > wl) {
            txt:= StrSplit(text, " "), text:= ""
            loop % tl-1 
                A_Index>1? text.= txt[A_Index] " ": ""
         } text.= StrGet(buffer, "UTF-16")
         
         if (text = Str) {
            last:=StrLen(Arr[wl]), total:= StrLen(Str), wt:=total*w/last
            Coord.Push([x+w-wt//2, y+h//2])
         } else text.= " "
         
         ObjRelease(OcrWord)
      }
      ObjRelease(WordsList)
      ObjRelease(OcrLine)
   }
   ...
   return Coord
}
Note: For strings consisting of multiple words: OCR sometimes finds columns in the text so applied concatenation might not always match passed string!
User avatar
flyingDman
Posts: 670
Joined: 29 Sep 2013, 19:01

Re: Optical character recognition (OCR) with UWP API

29 Apr 2020, 12:01

This is an excellent OCR tool. Thank you @malcev , @teadrinker and, for the rectangle drawing tool, @suoabb and @Rohwedder (see here https://www.autohotkey.com/boards/viewtopic.php?t=57919).

I often need to read a large number of PDF pages and collect specific information from these pages. Often these PDF's are not searchable and so copy/paste is out of the question. I slightly adapted this OCR script using a simplified (but just as effective) clipping tool. It adds the copy / paste functionality to non-searchable PDF's.
Shift Left-click and drag the orange rectangle.

Code: Select all

#NoEnv
#singleinstance, force			; force, ignore, off
SetBatchLines, -1 				; For speed in general
SetWinDelay, -1   				; For speed of WinMove
CoordMode, Mouse, Screen

BW := 2           				; Border width (and height) in pixels
BC := "FF8800"       			; Border color
FirstCall := True
Gui, -Caption +ToolWindow +LastFound +AlwaysOnTop
Gui, Color, %BC%
Return

Esc::
ExitApp

+LButton::
MouseGetPos, OriginX, OriginY
WinGetActiveStats, Title, WindowWidth, WindowHeight, WindowX, WindowY
SetTimer, DrawRectangle, 20
Return

+LButton Up::
SetTimer, DrawRectangle, Off
FirstCall := True
Gui, Cancel
hBitmap := HBitmapFromScreen(X1,Y1,W1,H1)
pIRandomAccessStream := HBitmapToRandomAccessStream(hBitmap)
DllCall("DeleteObject", "Ptr", hBitmap)
text := ocr(pIRandomAccessStream, "en")
text :=  StrReplace(text, "`n", " ")
msgbox % text                                                       ; and / or clipboard := text and / or  Sapi.Speak(text)
Return

DrawRectangle:
MouseGetPos, X2, Y2
If (XO = X2) And (YO = Y2)
	Return
Gui, +LastFound
XO := X2, YO := Y2
If (X2 < OriginX)
	X1 := X2, X2 := OriginX
Else
	X1 := OriginX
If (Y2 < OriginY)
	Y1 := Y2, Y2 := OriginY
Else
	Y1 := OriginY
W1 := X2 - X1, H1 := Y2 - Y1
W2 := W1 - BW, H2 := H1 - BW
WinSet, Region, 0-0 %W1%-0 %W1%-%H1% 0-%H1% 0-0 %BW%-%BW% %W2%-%BW% %W2%-%H2% %BW%-%H2% %BW%-%BW%
If (FirstCall) 
	{
	Gui, Show, NA x%X1% y%Y1% w%W1% h%H1%
	FirstCall := False
	}
WinMove, , , X1, Y1, W1, H1
Return

HBitmapFromScreen(X, Y, W, H) {
   HDC := DllCall("GetDC", "Ptr", 0, "UPtr")
   HBM := DllCall("CreateCompatibleBitmap", "Ptr", HDC, "Int", W, "Int", H, "UPtr")
   PDC := DllCall("CreateCompatibleDC", "Ptr", HDC, "UPtr")
   DllCall("SelectObject", "Ptr", PDC, "Ptr", HBM)
   DllCall("BitBlt", "Ptr", PDC, "Int", 0, "Int", 0, "Int", W, "Int", H
                   , "Ptr", HDC, "Int", X, "Int", Y, "UInt", 0x00CC0020)
   DllCall("DeleteDC", "Ptr", PDC)
   DllCall("ReleaseDC", "Ptr", 0, "Ptr", HDC)
   Return HBM
}

HBitmapToRandomAccessStream(hBitmap) {
   static IID_IRandomAccessStream := "{905A0FE1-BC53-11DF-8C49-001E4FC686DA}"
        , IID_IPicture            := "{7BF80980-BF32-101A-8BBB-00AA00300CAB}"
        , PICTYPE_BITMAP := 1
        , BSOS_DEFAULT   := 0
        
   DllCall("Ole32\CreateStreamOnHGlobal", "Ptr", 0, "UInt", true, "PtrP", pIStream, "UInt")
   
   VarSetCapacity(PICTDESC, sz := 8 + A_PtrSize*2, 0)
   NumPut(sz, PICTDESC)
   NumPut(PICTYPE_BITMAP, PICTDESC, 4)
   NumPut(hBitmap, PICTDESC, 8)
   riid := CLSIDFromString(IID_IPicture, GUID1)
   DllCall("OleAut32\OleCreatePictureIndirect", "Ptr", &PICTDESC, "Ptr", riid, "UInt", false, "PtrP", pIPicture, "UInt")
   ; IPicture::SaveAsFile
   DllCall(NumGet(NumGet(pIPicture+0) + A_PtrSize*15), "Ptr", pIPicture, "Ptr", pIStream, "UInt", true, "UIntP", size, "UInt")
   riid := CLSIDFromString(IID_IRandomAccessStream, GUID2)
   DllCall("ShCore\CreateRandomAccessStreamOverStream", "Ptr", pIStream, "UInt", BSOS_DEFAULT, "Ptr", riid, "PtrP", pIRandomAccessStream, "UInt")
   ObjRelease(pIPicture)
   ObjRelease(pIStream)
   Return pIRandomAccessStream
}

CLSIDFromString(IID, ByRef CLSID) {
   VarSetCapacity(CLSID, 16, 0)
   if res := DllCall("ole32\CLSIDFromString", "WStr", IID, "Ptr", &CLSID, "UInt")
      throw Exception("CLSIDFromString failed. Error: " . Format("{:#x}", res))
   Return &CLSID
}

ocr(file, lang := "FirstFromAvailableLanguages")
{
   static OcrEngineStatics, OcrEngine, MaxDimension, LanguageFactory, Language, CurrentLanguage, BitmapDecoderStatics, GlobalizationPreferencesStatics
   if (OcrEngineStatics = "")
   {
      CreateClass("Windows.Globalization.Language", ILanguageFactory := "{9B0252AC-0C27-44F8-B792-9793FB66C63E}", LanguageFactory)
      CreateClass("Windows.Graphics.Imaging.BitmapDecoder", IBitmapDecoderStatics := "{438CCB26-BCEF-4E95-BAD6-23A822E58D01}", BitmapDecoderStatics)
      CreateClass("Windows.Media.Ocr.OcrEngine", IOcrEngineStatics := "{5BFFA85A-3384-3540-9940-699120D428A8}", OcrEngineStatics)
      DllCall(NumGet(NumGet(OcrEngineStatics+0)+6*A_PtrSize), "ptr", OcrEngineStatics, "uint*", MaxDimension)   ; MaxImageDimension
   }
   if (file = "ShowAvailableLanguages")
   {
      if (GlobalizationPreferencesStatics = "")
         CreateClass("Windows.System.UserProfile.GlobalizationPreferences", IGlobalizationPreferencesStatics := "{01BF4326-ED37-4E96-B0E9-C1340D1EA158}", GlobalizationPreferencesStatics)
      DllCall(NumGet(NumGet(GlobalizationPreferencesStatics+0)+9*A_PtrSize), "ptr", GlobalizationPreferencesStatics, "ptr*", LanguageList)   ; get_Languages
      DllCall(NumGet(NumGet(LanguageList+0)+7*A_PtrSize), "ptr", LanguageList, "int*", count)   ; count
      loop % count
      {
         DllCall(NumGet(NumGet(LanguageList+0)+6*A_PtrSize), "ptr", LanguageList, "int", A_Index-1, "ptr*", hString)   ; get_Item
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", LanguageTest)   ; CreateLanguage
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+8*A_PtrSize), "ptr", OcrEngineStatics, "ptr", LanguageTest, "int*", bool)   ; IsLanguageSupported
         if (bool = 1)
         {
            DllCall(NumGet(NumGet(LanguageTest+0)+6*A_PtrSize), "ptr", LanguageTest, "ptr*", hText)
            buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
            text .= StrGet(buffer, "UTF-16") "`n"
         }
         ObjRelease(LanguageTest)
      }
      ObjRelease(LanguageList)
      return text
   }
   if (lang != CurrentLanguage) or (lang = "FirstFromAvailableLanguages")
   {
      if (OcrEngine != "")
      {
         ObjRelease(OcrEngine)
         if (CurrentLanguage != "FirstFromAvailableLanguages")
            ObjRelease(Language)
      }
      if (lang = "FirstFromAvailableLanguages")
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+10*A_PtrSize), "ptr", OcrEngineStatics, "ptr*", OcrEngine)   ; TryCreateFromUserProfileLanguages
      else
      {
         CreateHString(lang, hString)
         DllCall(NumGet(NumGet(LanguageFactory+0)+6*A_PtrSize), "ptr", LanguageFactory, "ptr", hString, "ptr*", Language)   ; CreateLanguage
         DeleteHString(hString)
         DllCall(NumGet(NumGet(OcrEngineStatics+0)+9*A_PtrSize), "ptr", OcrEngineStatics, ptr, Language, "ptr*", OcrEngine)   ; TryCreateFromLanguage
      }
      if (OcrEngine = 0)
      {
         msgbox Can not use language "%lang%" for OCR, please install language pack.
         ExitApp
      }
      CurrentLanguage := lang
   }
   IRandomAccessStream := file
   DllCall(NumGet(NumGet(BitmapDecoderStatics+0)+14*A_PtrSize), "ptr", BitmapDecoderStatics, "ptr", IRandomAccessStream, "ptr*", BitmapDecoder)   ; CreateAsync
   WaitForAsync(BitmapDecoder)
   BitmapFrame := ComObjQuery(BitmapDecoder, IBitmapFrame := "{72A49A1C-8081-438D-91BC-94ECFC8185C6}")
   DllCall(NumGet(NumGet(BitmapFrame+0)+12*A_PtrSize), "ptr", BitmapFrame, "uint*", width)   ; get_PixelWidth
   DllCall(NumGet(NumGet(BitmapFrame+0)+13*A_PtrSize), "ptr", BitmapFrame, "uint*", height)   ; get_PixelHeight
   if (width > MaxDimension) or (height > MaxDimension)
   {
      msgbox Image is to big - %width%x%height%.`nIt should be maximum - %MaxDimension% pixels
      ExitApp
   }
   BitmapFrameWithSoftwareBitmap := ComObjQuery(BitmapDecoder, IBitmapFrameWithSoftwareBitmap := "{FE287C9A-420C-4963-87AD-691436E08383}")
   DllCall(NumGet(NumGet(BitmapFrameWithSoftwareBitmap+0)+6*A_PtrSize), "ptr", BitmapFrameWithSoftwareBitmap, "ptr*", SoftwareBitmap)   ; GetSoftwareBitmapAsync
   WaitForAsync(SoftwareBitmap)
   DllCall(NumGet(NumGet(OcrEngine+0)+6*A_PtrSize), "ptr", OcrEngine, ptr, SoftwareBitmap, "ptr*", OcrResult)   ; RecognizeAsync
   WaitForAsync(OcrResult)
   DllCall(NumGet(NumGet(OcrResult+0)+6*A_PtrSize), "ptr", OcrResult, "ptr*", LinesList)   ; get_Lines
   DllCall(NumGet(NumGet(LinesList+0)+7*A_PtrSize), "ptr", LinesList, "int*", count)   ; count
   loop % count
   {
      DllCall(NumGet(NumGet(LinesList+0)+6*A_PtrSize), "ptr", LinesList, "int", A_Index-1, "ptr*", OcrLine)
      DllCall(NumGet(NumGet(OcrLine+0)+7*A_PtrSize), "ptr", OcrLine, "ptr*", hText) 
      buffer := DllCall("Combase.dll\WindowsGetStringRawBuffer", "ptr", hText, "uint*", length, "ptr")
      text .= StrGet(buffer, "UTF-16") "`n"
      ObjRelease(OcrLine)
   }
   Close := ComObjQuery(IRandomAccessStream, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   Close := ComObjQuery(SoftwareBitmap, IClosable := "{30D5A829-7FA4-4026-83BB-D75BAE4EA99E}")
   DllCall(NumGet(NumGet(Close+0)+6*A_PtrSize), "ptr", Close)   ; Close
   ObjRelease(Close)
   ObjRelease(IRandomAccessStream)
   ObjRelease(BitmapDecoder)
   ObjRelease(BitmapFrame)
   ObjRelease(BitmapFrameWithSoftwareBitmap)
   ObjRelease(SoftwareBitmap)
   ObjRelease(OcrResult)
   ObjRelease(LinesList)
   return text
}

CreateClass(string, interface, ByRef Class)
{
   CreateHString(string, hString)
   VarSetCapacity(GUID, 16)
   DllCall("ole32\CLSIDFromString", "wstr", interface, "ptr", &GUID)
   result := DllCall("Combase.dll\RoGetActivationFactory", "ptr", hString, "ptr", &GUID, "ptr*", Class)
   if (result != 0)
   {
      if (result = 0x80004002)
         msgbox No such interface supported
      else if (result = 0x80040154)
         msgbox Class not registered
      else
         msgbox error: %result%
      ExitApp
   }
   DeleteHString(hString)
}

CreateHString(string, ByRef hString)
{
    DllCall("Combase.dll\WindowsCreateString", "wstr", string, "uint", StrLen(string), "ptr*", hString)
}

DeleteHString(hString)
{
   DllCall("Combase.dll\WindowsDeleteString", "ptr", hString)
}

WaitForAsync(ByRef Object)
{
   AsyncInfo := ComObjQuery(Object, IAsyncInfo := "{00000036-0000-0000-C000-000000000046}")
   loop
   {
      DllCall(NumGet(NumGet(AsyncInfo+0)+7*A_PtrSize), "ptr", AsyncInfo, "uint*", status)   ; IAsyncInfo.Status
      if (status != 0)
      {
         if (status != 1)
         {
            DllCall(NumGet(NumGet(AsyncInfo+0)+8*A_PtrSize), "ptr", AsyncInfo, "uint*", ErrorCode)   ; IAsyncInfo.ErrorCode
            msgbox AsyncInfo status error: %ErrorCode%
            ExitApp
         }
         ObjRelease(AsyncInfo)
         break
      }
      sleep 10
   }
   DllCall(NumGet(NumGet(Object+0)+8*A_PtrSize), "ptr", Object, "ptr*", ObjectResult)   ; GetResults
   ObjRelease(Object)
   Object := ObjectResult
}
This is also an excellent tool for the visually impaired. If text is too small or too blurry to read, add sapi := ComObjCreate("SAPI.SpVoice") at top of the script and Sapi.Speak(text) instead of msgbox % text and the clipped text is read out loud.
rommmcek
Posts: 910
Joined: 15 Aug 2014, 15:18

Re: Optical character recognition (OCR) with UWP API

02 May 2020, 07:53

I like your selecting frame! I added initial cross-hair:
Spoiler
User avatar
flyingDman
Posts: 670
Joined: 29 Sep 2013, 19:01

Re: Optical character recognition (OCR) with UWP API

02 May 2020, 11:03

I was thinking about a Xhair feature as well and have one that I liked but wanted to keep it simple for demo. Your Xhair is much "lighter" than what I had: (requires gdip)

Code: Select all

#Singleinstance force
#NoEnv
CoordMode, Mouse, screen
mouse_blocked 					:= false
count 							:= 1
SetTimer, pdt_drwng, 25

LButton::
MouseGetPos, oVarX%count%, oVarY%count%
if count = 2
	{
	SetTimer, pdt_drwng, off
	hotkey, LButton,,off
	gui, Drawing:destroy
	pToken 						:= Gdip_Startup()
	hdc_frame_full 				:= GetDC("")
	hdc_buffer_full 			:= CreateCompatibleDC(hdc_frame_full)
	hbm_buffer_full 			:= CreateCompatibleBitmap(hdc_frame_full, oVarX2 - oVarX1, oVarY2 - oVarY1)
	SelectObject(hdc_buffer_full, hbm_buffer_full)
	BitBlt(hdc_buffer_full, 0, 0, oVarX2 - oVarX1, oVarY2 - oVarY1, hdc_frame_full, oVarX1, oVarY1, 0x00CC0020)  
	bitmap_full 				:= Gdip_CreateBitmapFromHBITMAP(hbm_buffer_full, 0)
	DeleteDC(hdc_buffer_full)
	DeleteObject(hbm_buffer_full)
	goto, capture
	}
++count
return

pdt_drwng:
gui,Drawing:submit, nohide
if (!mouse_blocked)
	{                                                    						; This is the first time the pdt_drwng is run.
	mouse_blocked 				:= true                                         ; Prepare Drawing GUI for crosshair
	gui, Drawing:+AlwaysOnTop +E0x20 -Caption +E0x80000 -Border +ToolWindow +OwnDialogs +Owner +LastFound
	gui, Drawing: Show, , Drawing												; gui, Drawing: Show, NA, Drawing
	pToken 						:= Gdip_Startup()
	hwnd1 						:= WinExist("Drawing")
	hbm 						:= CreateDIBSection(A_ScreenWidth, A_ScreenHeight)
	hdc 						:= CreateCompatibleDC()
	obm 						:= SelectObject(hdc, hbm)
	g 							:= Gdip_GraphicsFromHDC(hdc)
	Gdip_SetSmoothingMode(g, 4)
	pPen1 := Gdip_CreatePen("0xFFFF0000", 1)								    ; set the line color thickness (thin is 1); red
	pPen2 := Gdip_CreatePen("0xFFFF0000", 2)								    
	}
else 
	{                                                                    		; Update crosshair according to mouse position
	MouseGetPos, now_x, now_y
	Gdip_GraphicsClear(G)                                                  		; Delete old graphics
	if count = 1
		{
		Gdip_DrawLine(G, pPen1, now_x, 0, now_x, A_ScreenHeight)          		; Vertical Line of Crosshair
		Gdip_DrawLine(G, pPen1, 0, now_y, A_ScreenWidth, now_y)           		; Horizontal Line of Crosshair
		}
	if count = 2
		{
		Gdip_DrawLine(G, pPen1, now_x, 0, now_x, A_ScreenHeight)          		; Vertical Line of Crosshair
		Gdip_DrawLine(G, pPen1, 0, now_y, A_ScreenWidth, now_y)           		; Horizontal Line of Crosshair
		Gdip_DrawLine(G, pPen2, now_x, oVarY1, now_x, now_y)         			; Vertical Line of the selection area
		Gdip_DrawLine(G, pPen2, oVarX1, now_y, now_x, now_y)          			; Horizontal Line of the selection area
		Gdip_DrawLine(G, pPen2, oVarX1, oVarY1, oVarX1, now_y)       			; Vertical Line of the selection area
		Gdip_DrawLine(G, pPen2, oVarX1, oVarY1, now_x, oVarY1)        			; Horizontal Line of the selection area
		}
	UpdateLayeredWindow(hwnd1, hdc, 0, 0, A_ScreenWidth, A_ScreenHeight)   	 	; Draw everything
	}
return

capture:
msgbox % ovarX1 "|" ovarY1 "|" ovarX2 "|" ovarY2 
;return

esc::
Gdip_DeleteBrush(pBrush)
SelectObject(hdc, obm)
DeleteObject(hbm)
DeleteDC(hdc)
Gdip_Shutdown(pToken)
exitapp
burque505
Posts: 1239
Joined: 22 Jan 2017, 19:37

Re: Optical character recognition (OCR) with UWP API

02 May 2020, 11:41

Very nice, @flyingDman, thank you!
R0nya
Posts: 4
Joined: 05 Aug 2016, 09:30

Re: Optical character recognition (OCR) with UWP API

07 May 2020, 15:36

Nice OCR very fast! And it's not killing my CPU like tesseract! Thanks, guys.
How to implement hwnd here to get a screenshot from the window not the whole screen?


Edit

seems works

Code: Select all

HDC := DllCall("GetDC", "Ptr", hwnd, "UPtr")

Return to “Scripts and Functions”

Who is online

Users browsing this forum: Bral, robodesign and 45 guests