I've implemented a search algorithm that improves performance by changing the order that the template pixels are checked. It's not quite the same as the topic of this thread because it changes the order that the pixels
within the template are checked, not the order in which candidate locations are checked. But the C code at the end could be modified or extended so it might be a useful starting point for getting your center-out search.
The reason I made this in the first place is that I had noticed poor performance with some templates, when the template has blank areas that are the same color as blank areas in the window.
As an example, consider searching for this template (a piece of the autohotkey logo):
If the window has a lot of white area, and the search algorithm checks template pixels left to right, top to bottom, it has to scan many template pixels before finding a nonmatching one. This can be very bad if you're not careful to crop out as much background as possible.
My algorithm loads a template and rearranges it into a sort of 'out of order
RLE' (OOORLE) format, with the least frequent template colors occuring first. Each run is encoded as six bytes: red, green, blue, x position (run start), y position, and run length. The template size is limited to 255x255 pixels for simplicity. Transparent template pixels are simply absent from the out-of-order RLE format and require neither space nor time. The OOORLE format also has five bytes at the very start to indicate the width and height and number of runs.
These are functions for loading a bmp image and transforming it into OOORLE format:
Code:
BMPWidth(ByRef bmpdata) {
return NumGet(bmpdata, 18, "UInt")
}
BMPHeight(ByRef bmpdata) {
return NumGet(bmpdata, 22 "UInt")
}
BMPBpp(ByRef bmpdata) {
return NumGet(bmpdata, 28, "UShort")
}
; load bitmap and do some basic sanity checking
BMPLoad(fname, ByRef bmpdata) {
FileRead, bmpdata, %fname%
bytes := VarSetCapacity(bmpdata)
; check magick number
if (NumGet(bmpdata, 0, "UShort") != 0x4D42) {
return "bad bmp file format"
}
; check expected file size in header
if (NumGet(bmpdata, 2, "UInt") != VarSetCapacity(bmpdata)) {
return "file size in header doesn't match actual file size"
}
; check bits per pixel is something we can handle (24 or 32)
bpp := BMPBpp(bmpdata)
if (bpp != 24 && bpp != 32) {
return "only 24 and 32 bit bitmaps are supported at the moment"
}
; check compression
if (NumGet(bmpdata, 30, "UInt") != 0) {
return "only uncompressed (compression=0 for 'BI_RGB') is supported"
}
; check height is positive (can't handle negative heights at the moment)
if (BMPHeight(bmpdata) < 0) {
return "negative height"
}
; all passed, return empty string for success
return ""
}
BMPGetPixel(ByRef bmpdata, x, y) {
bpp := BMPBpp(bmpdata)
width := BMPWidth(bmpdata)
height := BMPHeight(bmpdata)
stride := (bpp*width + 7)//8 ; number of bytes needed to encode the pixel data
stride := (stride + 3) // 4 * 4 ; round up to the next multiple of 4 bytes
headersize := 54 ; standard header is 54 bytes when there is no palette
pixelbytes := bpp//8
rowoffs := (height-1-y)*stride
pixoffs := x*pixelbytes
; bytes are BB GG RR XX, but when read into a UInt on a little-endian machine
; it turns into 0xXXRRGGBB, where least significant bits is the blue value.
xRGB := NumGet(bmpdata, headersize+rowoffs+pixoffs, "UInt") ; read 4 bytes which may include junk
return (xRGB & 0xFFFFFF) ; discard the high byte
}
; transpc is the transparent color or -1 which won't match any bmp pixels
BMPTransform(ByRef bmpdata, ByRef output, transpc=-1) {
height := BMPHeight(bmpdata)
width := BMPWidth(bmpdata)
colorlist := ""
runcount := 0
; count the number of occurrences of each color, and build a list of all colors that appear
Loop, %height% {
y := A_Index - 1
lastcolor := -1
lastcount := 0
Loop, %width% {
x := A_Index - 1
pixrgb := BMPGetPixel(bmpdata, x, y)
if (n%pixrgb%) {
n%pixrgb% += 1
}
else { ; first occurrence of this color
if (pixrgb != transpc) {
colorlist .= pixrgb . "|"
}
n%pixrgb% := 1
r%pixrgb% := ""
}
if (pixrgb == lastcolor) { ; continue existing RLE
lastcount++
}
else {
if (lastcount > 0) { ; end of RLE
if (lastcolor != transpc) {
runstart := x - lastcount
r%lastcolor% .= runstart . " " . y . " " . lastcount . "|"
runcount++
}
}
lastcolor := pixrgb
lastcount := 1
}
}
if (lastcolor != transpc) {
runstart := width - lastcount
r%lastcolor% .= runstart . " " . y . " " . lastcount . "|"
runcount++
}
}
StringTrimRight, colorlist, colorlist, 1 ; chop off trailing pipe character
; build a newline-delimited list of colors with their respective counts
ncolorlist := ""
Loop, Parse, colorlist, |
{
ncolorlist .= n%A_LoopField% . " " . A_LoopField . "`n"
}
StringTrimRight, ncolorlist, ncolorlist, 1 ; chop off trailing newline
; sort the list of colors from rarest to most common
Sort, ncolorlist, N
VarSetCapacity(output, 5 + runcount*6) ; 5 'header' bytes and each run consumes 6 bytes
outbyte := 0
NumPut(1, output, outbyte++, "UChar") ; magic number 1
NumPut(width, output, outbyte++, "UChar") ; width
NumPut(height, output, outbyte++, "UChar") ; height
NumPut(runcount, output, outbyte, "UShort") ; number of runs (little endian format)
outbyte += 2
Loop, Parse, ncolorlist, `n
{
StringSplit, part, A_LoopField, %A_Space% ; part1 is the count, part2 is the color
; output the runs for pixels matching the color
matchcolor := part2
StringTrimRight, r%matchcolor%, r%matchcolor%, 1
Loop, Parse, r%matchcolor%, |
{
StringSplit, xyc, A_LoopField, %A_Space%
NumPut((matchcolor >> 16) & 255, output, outbyte++, "UChar") ; red
NumPut((matchcolor >> 8) & 255, output, outbyte++, "UChar") ; green
NumPut(matchcolor & 255, output, outbyte++, "UChar") ; blue
NumPut(xyc1, output, outbyte++, "UChar") ; x coordinate of run start
NumPut(xyc2, output, outbyte++, "UChar") ; y coordinate of run start
NumPut(xyc3, output, outbyte++, "UChar") ; run length
}
}
return outbyte ; should be 5+runcount*6
}
Then here are functions to grab a snapshot of a window to a DIB and then search that DIB for the OOORLE template:
Code:
OffscreenSnap(wid) {
global snappixbuf, snapwidth, snapheight
bpp := 24
WinGetPos, , , snapwidth, snapheight, ahk_id %wid%
sdc := GetDC(wid)
dc2 := CreateCompatibleDC(sdc)
hbmp := CreateCompatibleBitmap(sdc, snapwidth, snapheight)
oldobj := SelectObject(dc2, hbmp)
bufsize := AllocateDIBBuf(snappixbuf, snapwidth, snapheight, bpp)
PrintWindow(wid, dc2) ; bitblt doesn't work for obscured or offscreen windows
MakeBitmapInfoHeader(bmi, snapwidth, snapheight, bpp)
GetDIBits(sdc, hbmp, 0, snapheight, &snappixbuf, bmi)
SelectObject(dc2, oldobj)
DeleteObject(hbmp)
DeleteDC(dc2)
ReleaseDC(wid, sdc)
}
SearchLastSnap(ByRef foundx, ByRef foundy, x1, y1, x2, y2, templaddr) {
global snappixbuf, snapwidth, snapheight
static isearch24, first:=1
if (first) {
first := 0
Base64Decode(isearch24, "i1QkIIPsJIA6AXQLuAIAAACDxCTCKAAPt0IDD7ZKAlOJRCQYVVZXi3wkSIvHK8"
. "GLTCRQjVwIAQ+2QgGLTCREi/Er8ItEJEyNRAYBiUQkLItEJDyNdEADD7ZCBYhEJEwPtkIGiEQkPA"
. "+2QgeIRCQSD7ZCCMHuAgP2iEQkUA+2QgkD9olcJDCIRCRIO/sPg48BAACLbCQ46wiNpCQAAAAAkI"
. "lMJBg7TCQsD4NqAQAAD7ZEJEiLXCRAK9gPtkQkUCvfSw+v3gPBjQxAA92NRBkBiUQkKIpMJEw6SA"
. "EPhRIBAACKTCQ8OggPhQYBAACKTCQSOkj/D4X5AAAAM8C5AQAAAIlMJCDHRCQcAAAAAGY7RCQkci"
. "GLRCQYi1QkWItMJFyJAok5X15dM8Bbg8QkwigAkItUJFSFyQ+EtgAAAA+3RCQcjQRAilxCB40EQg"
. "+2UAWIVCQWD7ZQBohUJBUPtlAIiFQkFA+2UAmKQAqIVCQTMtKIRCQXhMB2WoXJdFYPtkwkE4tEJE"
. "ArwQ+2TCQUK8dID6/GD7bqA2wkGAPNi2wkOI0MSQPIOBwpjQQpdRKKTCQVOEgBdQmKTCQWOEgCdA"
. "gzyYlMJCDrBItMJCD+wjpUJBdypotEJBxAiUQkHGY7RCQkD4JK////hckPhSX///+LVCRUi0wkGI"
. "tEJChBg8ADiUwkGIlEJCg7TCQsD4LD/v//i0wkRItcJDBHO/sPgn/+//9fXl24AQAAAFuDxCTCKAA=")
}
x1 := (x1 < 0) ? snapwidth+x1 : x1
y1 := (y1 < 0) ? snapheight+y1 : y1
x2 := (x2 < 0) ? snapwidth+x2 : x2
y2 := (y2 < 0) ? snapheight+y2 : y2
searchwidth := x2-x1+1
searchheight := y2-y1+1
if (searchwidth < 0 || searchheight < 0) {
return 0
}
rc := DllCall(&isearch24, "UInt", &snappixbuf, "UInt", snapwidth, "UInt", snapheight, "UInt", x1, "UInt", y1
, "UInt", searchwidth, "UInt", searchheight, "UInt", templaddr, "UInt*", foundx, "UInt*", foundy)
return rc
}
This is the C code for the "mcode" function stored in isearch24 (but base64 encoded to be more compact):
Code:
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
#define PLIST_HSIZE 5
uint __stdcall imgsearch24(uchar *DIB24, uint dibwidth, uint dibheight, uint searchx0, uint searchy0,
uint searchwidth, uint searchheight, uchar *pixlist, uint *foundx, uint *foundy)
{
const uchar format = pixlist[0];
if (format != 1) { // this function can only handle the 1-byte templates (dimensions up to 255)
return 2;
}
const uchar templwidth = pixlist[1];
const uchar templheight = pixlist[2];
const ushort pixcount = *(ushort *)(pixlist + 3);
const uint ylim = searchy0 + searchheight - templheight + 1;
const uint xlim = searchx0 + searchwidth - templwidth + 1;
const uint dibstride = (dibwidth*3+3)/4*4;
const uchar firstr = pixlist[PLIST_HSIZE+0];
const uchar firstg = pixlist[PLIST_HSIZE+1];
const uchar firstb = pixlist[PLIST_HSIZE+2];
const uchar firstx = pixlist[PLIST_HSIZE+3];
const uchar firsty = pixlist[PLIST_HSIZE+4];
for (uint y=searchy0; y < ylim; y++) {
for (uint x=searchx0; x < xlim; x++) {
uint match = 0;
// check the first pixel outside the loop, might be faster
if (firstr==DIB24[(dibheight-1-y-firsty)*dibstride+(x+firstx)*3+2] &&
firstg==DIB24[(dibheight-1-y-firsty)*dibstride+(x+firstx)*3+1] &&
firstb == DIB24[(dibheight-1-y-firsty)*dibstride+(x+firstx)*3])
{
match = 1;
// look for a nonmatching pixel in the list
for (ushort pixno=0; pixno < pixcount && match; pixno++) {
const uchar pr = pixlist[pixno*6+PLIST_HSIZE+0];
const uchar pg = pixlist[pixno*6+PLIST_HSIZE+1];
const uchar pb = pixlist[pixno*6+PLIST_HSIZE+2];
const uchar px = pixlist[pixno*6+PLIST_HSIZE+3];
const uchar py = pixlist[pixno*6+PLIST_HSIZE+4];
const uchar pc = pixlist[pixno*6+PLIST_HSIZE+5];
for (uchar j=0; j < pc && match; j++) {
const uint yoffs = (dibheight-1-y-py)*dibstride;
const uint xoffs = (x+px+j)*3;
const uchar b = DIB24[yoffs+xoffs];
const uchar g = DIB24[yoffs+xoffs+1];
const uchar r = DIB24[yoffs+xoffs+2];
if (b != pb || g != pg || r != pr) {
match = 0; // found a nonmatching pixel
}
}
}
}
if (match) { // didn't find any nonmatching pixels, we're good!
*foundx = x;
*foundy = y;
return 0;
}
}
}
return 1;
}
This is showing very good performance for me, esp since I can grab the DIB once and then search it for numerous templates.
I hope this helps!