Jump to content

Sky Slate Blueberry Blackcurrant Watermelon Strawberry Orange Banana Apple Emerald Chocolate
Photo

Machine code binary buffer searching regardless of NULL


  • Please log in to reply
52 replies to this topic
wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
Blazing fast machine-code CASE-SENSITIVE searching in a (binary) buffer for a sequence of bytes, that may include NULL characters. Returns either position of 'sought' inside 'haystack' or -1 if not found.

Time to search is 0 ms generally :-), and in case of a very hostile buffer contents (all bytes are the same and *equal Needle's first* byte) on my pc - 60megabytes in 0.1 - 0.5 sec.

InBuf - look for binary Needle in binary Buffer.
0-based (-1 = not found), case-sensitive.

InBuf(haystackAddr, needleAddr, haystackSize, needleSize, StartOffset=0)
{   Static fun
   IfEqual,fun,
   {
      h=
      ( LTrim join
         5589E583EC0C53515256579C8B5D1483FB000F8EC20000008B4D108B451829C129D9410F8E
         B10000008B7D0801C78B750C31C0FCAC4B742A4B742D4B74364B74144B753F93AD93F2AE0F
         858B000000391F75F4EB754EADF2AE757F3947FF75F7EB68F2AE7574EB628A26F2AE756C38
         2775F8EB569366AD93F2AE755E66391F75F7EB474E43AD8975FC89DAC1EB02895DF483E203
         8955F887DF87D187FB87CAF2AE75373947FF75F789FB89CA83C7038B75FC8B4DF485C97404
         F3A775DE8B4DF885C97404F3A675D389DF4F89F82B45089D5F5E5A595BC9C2140031C0F7D0EBF0
      )
      VarSetCapacity(fun,StrLen(h)//2)
      Loop % StrLen(h)//2
         NumPut("0x" . SubStr(h,2*A_Index-1,2), fun, A_Index-1, "Char")
   }
   Return DllCall(&fun
      , "uint",haystackAddr, "uint",needleAddr
      , "uint",haystackSize, "uint",needleSize
      , "uint",StartOffset)
}

InBufRev - reverse look for binary Needle in binary Buffer.
0-based (-1 = not found), case-sensitive.
StartOffsetOfLastNeedleByte - maximum hayStack offset to contain Needle's bytes (-1=whole haystackSize)

InBufRev(haystackAddr, needleAddr, haystackSize, needleSize, StartOffsetOfLastNeedleByte=-1)
{   Static fun
   IfEqual,fun,
   {
      h=
      ( LTrim join
         5589E583EC0C53515256579C8B5D1483FB000F8EDE0000008B4510488B4D1883F9FF0F44
         C839C80F4CC829D989CF410F8EC1000000037D088B750C83E000FCAC4B74224B742A4B74
         354B74434B754E93AD93FDF2AE0F859B000000395F0275F3E981000000FDF2AE0F858800
         0000EB76FD8A26F2AE757F38670275F7EB689366AD93FDF2AE756F66395F0275F6EB574E
         ADFDF2AE756039470175F7EB494E43AD8975FC89DAC1EB02895DF483E2038955F887DF87
         D1FD87FB87CAF2AE753839470175F7FC89FB89CA83C7058B75FC8B4DF485C97404F3A775
         DC8B4DF885C97404F3A675D189DF4789F82B45089D5F5E5A595BC9C2140031C0F7D0EBF0
      )
      VarSetCapacity(fun,StrLen(h)//2)
      Loop % StrLen(h)//2
      NumPut("0x" . SubStr(h,2*A_Index-1,2), fun, A_Index-1, "Char")
   }
   return DllCall(&fun
      , "uint",haystackAddr, "uint",needleAddr
      , "uint",haystackSize, "uint",needleSize
      , "uint",StartOffsetOfLastNeedleByte)
}

Wrappers for searching a string in a binary buffer
;InBufStr - look for string Needle in binary Buffer
;         0-based (-1 = not found), case-sensitive.
InBufStr(haystackAddr, needleStr, haystackSize, StartOffset=0)
{   return InBuf(haystackAddr, &needleStr, haystackSize, strlen(needleStr))
}

;InBufStrRev - reverse look for string Needle in binary Buffer
;         0-based (-1 = not found), case-sensitive.
;         StartOffsetOfLastNeedleByte - maximum hayStack offset to contain Needle's bytes (-1=whole haystackSize)
InBufStrRev(haystackAddr, needleStr, haystackSize, StartOffsetOfLastNeedleByte=-1)
{   return InBufRev(haystackAddr, &needleStr, haystackSize, strlen(needleStr), StartOffsetOfLastNeedleByte)
}

Wrappers for extracting a string from a binary buffer
;substrBuf - extract a string of specified length from a binary buffer
;         usage: stringVar:=substrBuf( &buf+Offset, 100 )
;         the Length is optional, if none specified then NULL-terminated string is extracted
;         be accurate in order to not exceed the buf bounds, if no NULL is there
substrBuf(bufAddr, Length="") 
{  IfEqual,Length,
      Length:=dllCall("lstrlen","uint",bufAddr) 
   VarSetCapacity(result,Length) 
   DllCall("RtlMoveMemory", "str",result, "uint", bufAddr, "uint",Length) 
   return result 
}

Usage:
Offset := InBuf( &Buffer, &sought, 10000, 100)
Offset := InBufRev( &Buffer, &sought, 10000, 100, 5000)

Offset := InBuf[b]Str[/b]Rev( &Buffer, "ImmediateString", 10000, -1)
Offset := InBuf[b]Str[/b]( &Buffer, StringVar, 10000, StartOffset)
Offset := InBuf[b]Str[/b]( &Buffer, StringVar, 10000000)
ifNotEqual, Offset, -1
   Text := substrBuf( &buffer+Offset)
ifNotEqual, Offset, -1
   Text := substrBuf( &buffer+Offset1, Offset2-Offset1)

InFile - InBuf based case-sensitive searching in file's contents of any* size
*: even larger than 4GB, StartOffset may also be larger than 4GB.
InFile( fileName, needleAddr, needleLen, StartOffset=0 )
{
	lRet=-1
	IfEqual,needleLen,0, return lRet
	IfEqual,needleAddr,0, return lRet
	hFile:=DllCall("CreateFile", "str", fileName,"uint",0x80000000 ;GENERIC_READ
				,"uint", 1 ;FILE_SHARE_READ
				,"uint", 0, "uint",3 ;OPEN_EXISTING
				,"uint",0x2000000 ;FILE_FLAG_BACKUP_SEMANTICS
				,"uint", 0)
	ifEqual,hFile,-1, return lRet

	VarSetCapacity( lBufLen, 8, 0 )
	NumPut( DllCall("GetFileSize","uint",hFile,"uint",&lBufLen+4), lBufLen )
	DllCall( "RtlMoveMemory", "int64 *",lBufLen64, "uint",&lBufLen, "uint",8 )
	lBufLen64 -= StartOffset
	If( lBufLen64>=0 )
	{	hMap:=DllCall("CreateFileMapping", "uint",hFile, "uint",0, "uint",2 ;PAGE_READONLY
					,"uint",0,"uint",0,"uint",0)
		if( hMap )
		{	lMax32b=0xFFFFFFFF
			lMaxView=0x40000000 ;1GB
			VarSetCapacity( SI, 36, 0 )
			DllCall("GetSystemInfo","uint",&SI)
			memAllocGranularity:=NumGet( SI, 28 )
			loop
			{	FileOffs:=(StartOffset//memAllocGranularity)*memAllocGranularity
				delta:=StartOffset-FileOffs
				lBufLenLo:=(lBufLen64+delta > lMaxView) ? lMaxView : lBufLen64+delta
				hView:=DllCall("MapViewOfFile", "uint",hMap, "uint", 4 ;FILE_MAP_READ
							,"uint",FileOffs>>32,"uint",FileOffs & lMax32b,"uint",lBufLenLo)
				ifEqual,hView,0, break
				lRet:=InBuf( hView, needleAddr, lBufLenLo, needleLen, delta )
				DllCall("UnmapViewOfFile","uint",hView)
				if( lRet!=-1 )
				{	lRet += FileOffs
					break
				}
				StartOffset += lBufLenLo-needleLen
				lBufLen64 -= lBufLenLo-needleLen
				IfLessOrEqual,lBufLen64,0, break
			}
			DllCall("CloseHandle","uint",hMap)
		}
	}
	DllCall("CloseHandle","uint",hFile)
	return lRet
}
Usage:
fileOffs:=InFile( "d:\filename", &needle, needleLen, StartOffset )

The advantage of using CreateFileMapping - no need to maintain chunked reading of file in case your file is under 1GB, no need to read the whole contents of file into memory, the OS does it for you automatically when actual memory access occurs (in InBuf code). The disadvantage may also be huge - given current implementation of InFile and InBuf there is no way to stop the process or show feedback... but this can also be circumvented if needed by multiple loop calls to InBuf with BufLen parameter set to a small value

Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005
This can come in handy! Thanks for sharing it.

Could you post the full ASM code and detailed explanations? In some cases, if someone is really after speed, he might want to tweak the code. E.g. if the search string is 2, 4 or 8 byte long, you could use integer comparisons, or, if the search pattern is very long or has some regularities, some more complex algorithm.

Does anyone have experiences with MMX (SSE or 3DNow!)? They could make the code even faster, if the user has the right processor.

wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
Built with FlatASM.
There are optimizations for 1,2,3,4,5 byte sequences.
Note: only PROC code matters, everything else is dummy of course, only to allow fasm compile it to exe to rip the code.
format PE GUI 4.0
entry start

include 'win32a.inc'

section '.data' data readable writeable

  hayStack db '1111111122222111111'
  Needle db '22222'

section '.code' code readable executable

  start:

	push	0 5 19 Needle hayStack
	call	InBuf
	push	-1 5 19 Needle hayStack
	call	InBufRev
	invoke	ExitProcess,0

proc InBuf stdcall uses ebx ecx edx esi edi, hayStack,Needle,hayStackSize,NeedleSize,StartOffset
	local	lNeedleRemDwords:DWORD	;(NeedleSize-4)>>2
	local	lNeedleRemTail:DWORD	;Needle remainder byte count (NeedleSize-4) mod 4 -> (0..3)
	local	lNeedleRemPtr4:DWORD	;&Needle[4]

	pushfd

	mov	ebx,[NeedleSize]
	cmp	ebx,0
	jle	.NotFound
	mov	ecx,[hayStackSize]
	mov	eax,[StartOffset]
	sub	ecx,eax
	sub	ecx,ebx
	inc	ecx	;repetitions=hayStackSize-StartOffset-NeedleSize+1
	jle	.NotFound

	mov	edi,[hayStack]
	add	edi,eax ;edi=&(hayStack[StartOffset])

	;load Needle FirstByte
	mov	esi,[Needle]
	xor	eax,eax
	cld
	lodsb	; AL=Needle[0], keep EAX now!

	;decide on needle length
	dec	ebx
	jz	.NeedleLenIs1
	dec	ebx
	jz	.NeedleLenIs2
	dec	ebx
	jz	.NeedleLenIs3
	dec	ebx
	jz	.NeedleLenIs4
	dec	ebx
	jnz	.NeedleLenIsLong

;.NeedleLenIs5:
	xchg	eax,ebx
	lodsd		;AL=Needle[0]
	xchg	eax,ebx ;EBX=bytes 1..5 of Needle

   .ScanNeedleLenIs5:
	repne	scasb
	jne	.NotFound
	cmp	[edi],ebx
	jne	.ScanNeedleLenIs5
	jmp	.Found

.NeedleLenIs4:
	dec	esi
	lodsd	;EAX=first 4 bytes of Needle
   .ScanNeedleLenIs4:
	repne	scasb
	jne	.NotFound
	cmp	[edi-1],eax
	jne	.ScanNeedleLenIs4
	jmp	.Found

.NeedleLenIs1:
	repne	scasb
	jne	.NotFound
	jmp	.Found

.NeedleLenIs2:
	mov	ah,[esi]
   .ScanNeedleLenIs2:
	repne	scasb
	jne	.NotFound
	cmp	[edi],ah
	jne	.ScanNeedleLenIs2
	jmp	.Found

.NeedleLenIs3:
	xchg	ebx,eax
	lodsw
	xchg	ebx,eax
   .ScanNeedleLenIs3:
	repne	scasb
	jne	.NotFound
	cmp	[edi],bx
	jne	.ScanNeedleLenIs3
	jmp	.Found

.NeedleLenIsLong:
	; get (needleSize-1)//4, (needleSize-1) mod 4
	dec	esi	;ESI=&(Needle[0])
	inc	ebx	;EBX=NeedleSize-4
	lodsd	;EAX=first 4 bytes of Needle
	mov	[lNeedleRemPtr4],esi
	mov	edx,ebx
	shr	ebx,2
	mov	[lNeedleRemDwords],ebx
	and	edx,3
	mov	[lNeedleRemTail],edx

	xchg	ebx,edi ;EBX=save EDI buf ptr for scasb
	xchg	edx,ecx ;EDX=save ECX counter for scasb

   .ScanNeedleLenIsLong:
	xchg	edi,ebx ;load saved buf ptr
	xchg	ecx,edx ;load saved counter
   .ScanNeedleLenIsLongJustScan:
	repne	scasb
	jne	.NotFound

	;check all 4 bytes
	cmp	[edi-1],eax
	jne	.ScanNeedleLenIsLongJustScan

	;check up to Needle's tail
	mov	ebx,edi
	mov	edx,ecx
	add	edi,3
	mov	esi,[lNeedleRemPtr4]
	mov	ecx,[lNeedleRemDwords]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongTail
	repe	cmpsd
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongTail:
	mov	ecx,[lNeedleRemTail]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongFound
	repe	cmpsb
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongFound:
	mov	edi,ebx ;FOUND!

.Found:
	dec	edi
	mov	eax,edi
	sub	eax,[hayStack]
.popOut:
	popfd
	ret
.NotFound:
	xor	eax,eax
	not	eax
	jmp	.popOut
endp

;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

proc InBufRev stdcall uses ebx ecx edx esi edi, hayStack,Needle,hayStackSize,NeedleSize,StartOffsetOfLastByte
	local	lNeedleRemDwords:DWORD	;(NeedleSize-4)>>2
	local	lNeedleRemTail:DWORD	;Needle remainder byte count (NeedleSize-4) mod 4 -> (0..3)
	local	lNeedleRemPtr4:DWORD	;&Needle[4]

	pushfd

	mov	ebx,[NeedleSize]
	cmp	ebx,0
	jle	.NotFound
	mov	eax,[hayStackSize]
	dec	eax
	mov	ecx,[StartOffsetOfLastByte]
	cmp	ecx,-1
	cmovE	ecx,eax
	cmp	eax,ecx
	cmovL	ecx,eax
	sub	ecx,ebx
	mov	edi,ecx
	inc	ecx	;repetitions=min(hayStackSize-1,StartOffsetOfLastByte)-NeedleSize+2
	jle	.NotFound

	add	edi,[hayStack]	;edi=&(hayStack[min(hayStackSize-1,StartOffsetOfLastByte)-NeedleSize+1])

	;load Needle FirstByte
	mov	esi,[Needle]
	and	eax,0
	cld
	lodsb	; AL=Needle[0], keep EAX now!

	;decide on needle length
	dec	ebx
	jz	.NeedleLenIs1
	dec	ebx
	jz	.NeedleLenIs2
	dec	ebx
	jz	.NeedleLenIs3
	dec	ebx
	jz	.NeedleLenIs4
	dec	ebx
	jnz	.NeedleLenIsLong

;.NeedleLenIs5:
	xchg	eax,ebx
	lodsd		;AL=Needle[0]
	xchg	eax,ebx ;EBX=bytes 1..4 of Needle (0-based)
	std

   .ScanNeedleLenIs5:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],ebx
	jne	.ScanNeedleLenIs5
	jmp	.Found

.NeedleLenIs1:
	std
	repne	scasb
	jne	.NotFound
	jmp	.Found

.NeedleLenIs2:
	std
	mov	ah,[esi]	;AH=Needle[1]
   .ScanNeedleLenIs2:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],ah
	jne	.ScanNeedleLenIs2
	jmp	.Found

.NeedleLenIs3:
	xchg	ebx,eax
	lodsw
	xchg	ebx,eax
	std
   .ScanNeedleLenIs3:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],bx
	jne	.ScanNeedleLenIs3
	jmp	.Found

.NeedleLenIs4:
	dec	esi
	lodsd	;EAX=first 4 bytes of Needle
	std
   .ScanNeedleLenIs4:
	repne	scasb
	jne	.NotFound
	cmp	[edi+1],eax
	jne	.ScanNeedleLenIs4
	jmp	.Found

.NeedleLenIsLong:
	; get (needleSize-1)//4, (needleSize-1) mod 4
	dec	esi	;ESI=&(Needle[0])
	inc	ebx	;EBX=NeedleSize-4
	lodsd	;EAX=first 4 bytes of Needle
	mov	[lNeedleRemPtr4],esi
	mov	edx,ebx
	shr	ebx,2
	mov	[lNeedleRemDwords],ebx
	and	edx,3
	mov	[lNeedleRemTail],edx

	xchg	ebx,edi ;EBX=save EDI buf ptr for scasb
	xchg	edx,ecx ;EDX=save ECX counter for scasb

   .ScanNeedleLenIsLong:
	std
	xchg	edi,ebx ;load saved buf ptr
	xchg	ecx,edx ;load saved counter
   .ScanNeedleLenIsLongJustScan:
	repne	scasb
	jne	.NotFound

	;check all 4 bytes
	cmp	[edi+1],eax
	jne	.ScanNeedleLenIsLongJustScan

	;check up to Needle's tail
	cld
	mov	ebx,edi
	mov	edx,ecx
	add	edi,5
	mov	esi,[lNeedleRemPtr4]
	mov	ecx,[lNeedleRemDwords]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongTail
	repe	cmpsd
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongTail:
	mov	ecx,[lNeedleRemTail]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongFound
	repe	cmpsb
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongFound:
	mov	edi,ebx ;FOUND!

.Found:
	inc	edi
	mov	eax,edi
	sub	eax,[hayStack]
.popOut:
	popfd
	ret
.NotFound:
	xor	eax,eax
	not	eax
	jmp	.popOut
endp


data import

library kernel32,'KERNEL32.DLL'
import kernel32,ExitProcess,'ExitProcess'

end data


Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005
This seems to be the first pure assembler function written for AHK. Of course we don’t laugh at you, but learn from it and at some point try to improve or to adapt it to some other task. Not many programmers want to write perfect code, just one, which does the job.

wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
InBuf (consequently, InBufStr wrapper as well) has undergone a very serious optimization, I've updated my earlier posts (also with asm code).

Time to search a very hostile buffer contents (all bytes are the same and equal Needle's first byte) on my pc - 60megabytes for 0.1sec minimum in case of 1 byte search, 0.5 sec - other lengths. That's time being measured before and after DllCall, for a simple case of under 1MB buffer the time was less than 1ms sometimes :-) which indicates actually anything less than 18ms - not bad either way :-D

Initial version yielded 1.2seconds in most cases for hostile 60MB buffer, so the maximum difference is 12 times!

InBufRev will be the next to get optimized.

majkinetor
  • Moderators
  • 4512 posts
  • Last active: May 20 2019 07:41 AM
  • Joined: 24 May 2006
Thx for your work. Highly appriciated.

I don't have use for it right now, so I will not test it ATM.


Have fun.
Posted Image

derRaphael
  • Members
  • 872 posts
  • Last active: Mar 19 2013 04:42 PM
  • Joined: 23 Nov 2007
hi
i wonder, if this routine might be used to update/modify the array the autohotkey.exe files generates on runtime for its controls.

if it'll be accessible, so this tiny script can search for a particular name assigned to a control by a script and modyfies it somehow (without corrupting the array structure - like replacing the varName through 012N123 without changinit its value) it should be possible to remove a control, by sending WM_CANCEL or WM_DESTROY, modify the autohotkey array and reassign a particular varName again to a new generated control.

im sure, unless the array wont be updated correctly - means the variable and the value deleted out of its structure - this would hit the 11,000 Control Limit / Gui when used frequently.

Found on:
http://www.autohotke...ght=array#18313

but it might be a workaround, if someone really needs desperate to detroy a control item and reassign that previously used varName to a new Control

Greets

DerRaphael

wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
New: InBufRev (reverse lookup) is now also optimized (the way as InBuf is).

Fixed: a dumb bug, now -1 is returned if nothing is found (was '0' :-D )

bmcclure
  • Members
  • 774 posts
  • Last active: Jan 04 2014 10:44 PM
  • Joined: 24 Nov 2007
Great functions!

;substrBuf - extract a string of specified length from a binary buffer
;         usage: stringVar:=substrBuf( &buf+Offset, 100 )
substrBuf(bufAddr, Length)
{  VarSetCapacity(result,Length)
   DllCall("RtlMoveMemory", "str",result, "uint", bufAddr, "uint",Length)
   return result
}
;substrZBuf - extract a NULL terminated string from a binary buffer
;         usage: stringVar:=substrZBuf( &buf+Offset )
substrZBuf(bufAddr)
{  L:=dllCall("lstrlen","uint",bufAddr)
   VarSetCapacity(result,L)
   DllCall("RtlMoveMemory", "str",result, "uint", bufAddr, "uint",L)
   return result
}


Couldn't this be combined?
;substrBuf - extract a string of specified length from a binary buffer
;         usage: stringVar:=substrBuf( &buf+Offset, 100 )
; Omit Length to extract null-terminated string
substrBuf(bufAddr, Length="")
{  
   If (Length = "")
   	Length:=dllCall("lstrlen","uint",bufAddr)
   VarSetCapacity(result,Length)
   DllCall("RtlMoveMemory", "str",result, "uint", bufAddr, "uint",Length)
   return result
}


wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
NICE, the first post is updated

polyethene
  • Members
  • 5519 posts
  • Last active: May 17 2015 06:39 AM
  • Joined: 26 Oct 2012
You know RegEx can search past null chars too.

autohotkey.com/net Site Manager

 

Contact me by email (polyethene at autohotkey.net) or message tidbit


Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005

You know RegEx can search past null chars too.

If Haystack contains nulls, it looks easy, but could you give some example, how to handle search- and replacement strings, which contain null chars?

polyethene
  • Members
  • 5519 posts
  • Last active: May 17 2015 06:39 AM
  • Joined: 26 Oct 2012

If Haystack contains nulls, it looks easy, but could you give some example, how to handle search- and replacement strings, which contain null chars?

Don't see why you couldn't have experimented yourself, but for the sake of it here's a lousy demonstration:

var = abcdef123ghi
NumPut(0, var, 3, "Char") ; replace 4th char with null byte

MsgBox, % "Position of null: " . RegExMatch(var, "\0") ; find
	. "`nPosition of last char: " . RegExMatch(var, ".$") ; search past

new := RegExReplace(var, "\d", "-") ; replace digits which are past the null char
NumPut(46, new, 3, "Char") ; 'unmask' var from ahk display
MsgBox, %new%

Time to search a very hostile buffer contents (all bytes are the same and equal Needle's first byte) on my pc - 60megabytes for 0.1sec minimum in case of 1 byte search, 0.5 sec - other Needle's lengths (full 60MB span)

Using the following script I got speeds as fast as 0.0069785545369593 ms (on P4 3ghz) to search for a char past a null byte in a 60MB stack:

i = 10
s := VarSetCapacity(var, 60 * 1000 * 1000, 0xff) ; 60 MB
Random, r, 1, s - 1
NumPut(0, var, r, "UChar")
NumPut(0x61, var, r + 1, "UChar")

t1 := t2 := 0x00000000 + 0
RegExMatch(var, "\x61")

DllCall("QueryPerformanceCounter", "Int64P", t1)

Loop, %i%
	RegExMatch(var, "\x61")

DllCall("QueryPerformanceCounter", "Int64P", t2)
DllCall("QueryPerformanceFrequency", "Int64P", f)

SetFormat, Float, 0.16
MsgBox, % (t2 - t1) / i / f . " ms"

autohotkey.com/net Site Manager

 

Contact me by email (polyethene at autohotkey.net) or message tidbit


wOxxOm
  • Members
  • 371 posts
  • Last active: Feb 20 2015 12:10 PM
  • Joined: 09 Feb 2006
hm, your example is the most simple of all the possible ones, of course my function will be even faster in such case, because it will check all the 60MB in ONE asm command: REPNE SCASB, and there hardly could be anything faster.

A good call though anyway.

P.S. RegExMatch has the advantage of being built-in in AHK core code, whereas my InBuf is being called via extremely slow in itself DllCall mechanism.
On the other hand there is no sane way to use regExMatch to search adlib binary data inside binary buffer, lest you create an escapement function, which is well also possible :-D

Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005

Don't see why you couldn't have experimented yourself

I could, but you sounded as someone, who already did the work. Still, your example does not answer the question.

It looks like RegExMatch/Replace uses counted strings for haystack, not NULL terminated ones. I did not find it documented, but it is a pleasant surprise. However, I asked for an example to search for a string containing nulls. Your example indicates that in the search string we have to replace each special character ([]()"\'.*?<>^$|null…) with a hex (or octal, as in your example) escape sequence (or precede them with ""), which is not a very elegant solution.

AHK passes the length of HayStack to the regular expression function. The difficulty is setting the right StrLen if we get a binary buffer to RAM. When characters of an existing string are overwritten with NumPut (as in your example), StrLen stays unchanged.

When a binary file is to be read into RAM, we have to use the *c option, which sets StrLen the file size, but the data is stored in a special variable, not usable for RegEx. We have to copy it into another variable (or use dllcalls to open/read/close the file).
FileRead a, *c %A_AhkPath%
VarSetCapacity(b,StrLen(a),1)
DllCall("RtlMoveMemory", UInt,&b, UInt,&a, Uint,StrLen(a))
MsgBox % "It is found: " RegExMatch(b, "\0\03")