InBuf function currently 32-bit only (machine code binary buffer searching)

Get help with using AutoHotkey (v1.1 and older) and its commands and hotkeys
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

InBuf function currently 32-bit only (machine code binary buffer searching)

20 Feb 2017, 21:17

tl;dr
- when I convert InBuf asm file to exe, how do I know where the bytes I need start/end in the exe
- how to convert InBuf x32 asm file to x64 asm file
- (also, how to convert bytes in the exe back to an asm file)

see also:
best AutoHotkey machine code functions - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=5&t=32242

tl;dr
A function that uses machine code, only works with AHK 32-bit.
Some info about it, in case someone is able to make a 64-bit version.
(Btw any info on writing asm, for asm to exe, for use in AHK functions would be interesting.)

See InBuf by wOxxOm here together with code for and links to other interesting machine code functions:
Machine code binary buffer searching regardless of NULL - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/23627-machine-code-binary-buffer-searching-regardless-of-null/

The InBuf function:

Code: Select all

InBuf(haystackAddr, needleAddr, haystackSize, needleSize, StartOffset=0)
{   Static fun
   IfEqual,fun,
   {
      h=
      ( LTrim join
         5589E583EC0C53515256579C8B5D1483FB000F8EC20000008B4D108B451829C129D9410F8E
         B10000008B7D0801C78B750C31C0FCAC4B742A4B742D4B74364B74144B753F93AD93F2AE0F
         858B000000391F75F4EB754EADF2AE757F3947FF75F7EB68F2AE7574EB628A26F2AE756C38
         2775F8EB569366AD93F2AE755E66391F75F7EB474E43AD8975FC89DAC1EB02895DF483E203
         8955F887DF87D187FB87CAF2AE75373947FF75F789FB89CA83C7038B75FC8B4DF485C97404
         F3A775DE8B4DF885C97404F3A675D389DF4F89F82B45089D5F5E5A595BC9C2140031C0F7D0EBF0
      )
      VarSetCapacity(fun,StrLen(h)//2)
      Loop % StrLen(h)//2
         NumPut("0x" . SubStr(h,2*A_Index-1,2), fun, A_Index-1, "Char")
   }
   Return DllCall(&fun
      , "uint",haystackAddr, "uint",needleAddr
      , "uint",haystackSize, "uint",needleSize
      , "uint",StartOffset)
}
The link also provides the source code for an asm file:

Code: Select all

format PE GUI 4.0
entry start

include 'win32a.inc'

section '.data' data readable writeable

  hayStack db '1111111122222111111'
  Needle db '22222'

section '.code' code readable executable

  start:

	push	0 5 19 Needle hayStack
	call	InBuf
	push	-1 5 19 Needle hayStack
	call	InBufRev
	invoke	ExitProcess,0

proc InBuf stdcall uses ebx ecx edx esi edi, hayStack,Needle,hayStackSize,NeedleSize,StartOffset
	local	lNeedleRemDwords:DWORD	;(NeedleSize-4)>>2
	local	lNeedleRemTail:DWORD	;Needle remainder byte count (NeedleSize-4) mod 4 -> (0..3)
	local	lNeedleRemPtr4:DWORD	;&Needle[4]

	pushfd

	mov	ebx,[NeedleSize]
	cmp	ebx,0
	jle	.NotFound
	mov	ecx,[hayStackSize]
	mov	eax,[StartOffset]
	sub	ecx,eax
	sub	ecx,ebx
	inc	ecx	;repetitions=hayStackSize-StartOffset-NeedleSize+1
	jle	.NotFound

	mov	edi,[hayStack]
	add	edi,eax ;edi=&(hayStack[StartOffset])

	;load Needle FirstByte
	mov	esi,[Needle]
	xor	eax,eax
	cld
	lodsb	; AL=Needle[0], keep EAX now!

	;decide on needle length
	dec	ebx
	jz	.NeedleLenIs1
	dec	ebx
	jz	.NeedleLenIs2
	dec	ebx
	jz	.NeedleLenIs3
	dec	ebx
	jz	.NeedleLenIs4
	dec	ebx
	jnz	.NeedleLenIsLong

;.NeedleLenIs5:
	xchg	eax,ebx
	lodsd		;AL=Needle[0]
	xchg	eax,ebx ;EBX=bytes 1..5 of Needle

   .ScanNeedleLenIs5:
	repne	scasb
	jne	.NotFound
	cmp	[edi],ebx
	jne	.ScanNeedleLenIs5
	jmp	.Found

.NeedleLenIs4:
	dec	esi
	lodsd	;EAX=first 4 bytes of Needle
   .ScanNeedleLenIs4:
	repne	scasb
	jne	.NotFound
	cmp	[edi-1],eax
	jne	.ScanNeedleLenIs4
	jmp	.Found

.NeedleLenIs1:
	repne	scasb
	jne	.NotFound
	jmp	.Found

.NeedleLenIs2:
	mov	ah,[esi]
   .ScanNeedleLenIs2:
	repne	scasb
	jne	.NotFound
	cmp	[edi],ah
	jne	.ScanNeedleLenIs2
	jmp	.Found

.NeedleLenIs3:
	xchg	ebx,eax
	lodsw
	xchg	ebx,eax
   .ScanNeedleLenIs3:
	repne	scasb
	jne	.NotFound
	cmp	[edi],bx
	jne	.ScanNeedleLenIs3
	jmp	.Found

.NeedleLenIsLong:
	; get (needleSize-1)//4, (needleSize-1) mod 4
	dec	esi	;ESI=&(Needle[0])
	inc	ebx	;EBX=NeedleSize-4
	lodsd	;EAX=first 4 bytes of Needle
	mov	[lNeedleRemPtr4],esi
	mov	edx,ebx
	shr	ebx,2
	mov	[lNeedleRemDwords],ebx
	and	edx,3
	mov	[lNeedleRemTail],edx

	xchg	ebx,edi ;EBX=save EDI buf ptr for scasb
	xchg	edx,ecx ;EDX=save ECX counter for scasb

   .ScanNeedleLenIsLong:
	xchg	edi,ebx ;load saved buf ptr
	xchg	ecx,edx ;load saved counter
   .ScanNeedleLenIsLongJustScan:
	repne	scasb
	jne	.NotFound

	;check all 4 bytes
	cmp	[edi-1],eax
	jne	.ScanNeedleLenIsLongJustScan

	;check up to Needle's tail
	mov	ebx,edi
	mov	edx,ecx
	add	edi,3
	mov	esi,[lNeedleRemPtr4]
	mov	ecx,[lNeedleRemDwords]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongTail
	repe	cmpsd
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongTail:
	mov	ecx,[lNeedleRemTail]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongFound
	repe	cmpsb
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongFound:
	mov	edi,ebx ;FOUND!

.Found:
	dec	edi
	mov	eax,edi
	sub	eax,[hayStack]
.popOut:
	popfd
	ret
.NotFound:
	xor	eax,eax
	not	eax
	jmp	.popOut
endp

;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

proc InBufRev stdcall uses ebx ecx edx esi edi, hayStack,Needle,hayStackSize,NeedleSize,StartOffsetOfLastByte
	local	lNeedleRemDwords:DWORD	;(NeedleSize-4)>>2
	local	lNeedleRemTail:DWORD	;Needle remainder byte count (NeedleSize-4) mod 4 -> (0..3)
	local	lNeedleRemPtr4:DWORD	;&Needle[4]

	pushfd

	mov	ebx,[NeedleSize]
	cmp	ebx,0
	jle	.NotFound
	mov	eax,[hayStackSize]
	dec	eax
	mov	ecx,[StartOffsetOfLastByte]
	cmp	ecx,-1
	cmovE	ecx,eax
	cmp	eax,ecx
	cmovL	ecx,eax
	sub	ecx,ebx
	mov	edi,ecx
	inc	ecx	;repetitions=min(hayStackSize-1,StartOffsetOfLastByte)-NeedleSize+2
	jle	.NotFound

	add	edi,[hayStack]	;edi=&(hayStack[min(hayStackSize-1,StartOffsetOfLastByte)-NeedleSize+1])

	;load Needle FirstByte
	mov	esi,[Needle]
	and	eax,0
	cld
	lodsb	; AL=Needle[0], keep EAX now!

	;decide on needle length
	dec	ebx
	jz	.NeedleLenIs1
	dec	ebx
	jz	.NeedleLenIs2
	dec	ebx
	jz	.NeedleLenIs3
	dec	ebx
	jz	.NeedleLenIs4
	dec	ebx
	jnz	.NeedleLenIsLong

;.NeedleLenIs5:
	xchg	eax,ebx
	lodsd		;AL=Needle[0]
	xchg	eax,ebx ;EBX=bytes 1..4 of Needle (0-based)
	std

   .ScanNeedleLenIs5:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],ebx
	jne	.ScanNeedleLenIs5
	jmp	.Found

.NeedleLenIs1:
	std
	repne	scasb
	jne	.NotFound
	jmp	.Found

.NeedleLenIs2:
	std
	mov	ah,[esi]	;AH=Needle[1]
   .ScanNeedleLenIs2:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],ah
	jne	.ScanNeedleLenIs2
	jmp	.Found

.NeedleLenIs3:
	xchg	ebx,eax
	lodsw
	xchg	ebx,eax
	std
   .ScanNeedleLenIs3:
	repne	scasb
	jne	.NotFound
	cmp	[edi+2],bx
	jne	.ScanNeedleLenIs3
	jmp	.Found

.NeedleLenIs4:
	dec	esi
	lodsd	;EAX=first 4 bytes of Needle
	std
   .ScanNeedleLenIs4:
	repne	scasb
	jne	.NotFound
	cmp	[edi+1],eax
	jne	.ScanNeedleLenIs4
	jmp	.Found

.NeedleLenIsLong:
	; get (needleSize-1)//4, (needleSize-1) mod 4
	dec	esi	;ESI=&(Needle[0])
	inc	ebx	;EBX=NeedleSize-4
	lodsd	;EAX=first 4 bytes of Needle
	mov	[lNeedleRemPtr4],esi
	mov	edx,ebx
	shr	ebx,2
	mov	[lNeedleRemDwords],ebx
	and	edx,3
	mov	[lNeedleRemTail],edx

	xchg	ebx,edi ;EBX=save EDI buf ptr for scasb
	xchg	edx,ecx ;EDX=save ECX counter for scasb

   .ScanNeedleLenIsLong:
	std
	xchg	edi,ebx ;load saved buf ptr
	xchg	ecx,edx ;load saved counter
   .ScanNeedleLenIsLongJustScan:
	repne	scasb
	jne	.NotFound

	;check all 4 bytes
	cmp	[edi+1],eax
	jne	.ScanNeedleLenIsLongJustScan

	;check up to Needle's tail
	cld
	mov	ebx,edi
	mov	edx,ecx
	add	edi,5
	mov	esi,[lNeedleRemPtr4]
	mov	ecx,[lNeedleRemDwords]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongTail
	repe	cmpsd
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongTail:
	mov	ecx,[lNeedleRemTail]
	test	ecx,ecx
	jz	.ScanNeedleLenIsLongFound
	repe	cmpsb
	jne	.ScanNeedleLenIsLong
   .ScanNeedleLenIsLongFound:
	mov	edi,ebx ;FOUND!

.Found:
	inc	edi
	mov	eax,edi
	sub	eax,[hayStack]
.popOut:
	popfd
	ret
.NotFound:
	xor	eax,eax
	not	eax
	jmp	.popOut
endp

data import

library kernel32,'KERNEL32.DLL'
import kernel32,ExitProcess,'ExitProcess'

end data
I used flat assembler as wOxxOm did, to compile the asm file to an exe.
flat assembler
http://flatassembler.net/

(When I first tried to compile the asm to exe, it said that is was missing include files, I moved FASM.EXE to the INCLUDE folder and it worked.)
I compiled simply by doing: Run, "%vPathFASM%" "%vPathAsm%".

I read the contents of the exe file as a hex string.
The required hex string used in the function was in the exe file, however I would not have known which bytes to copy and paste into the function.

The resulting exe was 2048 bytes in size: 1074+224+750, the required code of 224 bytes began at 1074 bytes.

5589E583EC0C53515256579C8B5D1483FB000F8EC20000008B4D108B451829C129D9410F8EB10000008B7D0801C78B750C31C0FCAC4B742A4B742D4B74364B74144B753F93AD93F2AE0F858B000000391F75F4EB754EADF2AE757F3947FF75F7EB68F2AE7574EB628A26F2AE756C382775F8EB569366AD93F2AE755E66391F75F7EB474E43AD8975FC89DAC1EB02895DF483E2038955F887DF87D187FB87CAF2AE75373947FF75F789FB89CA83C7038B75FC8B4DF485C97404F3A775DE8B4DF885C97404F3A675D389DF4F89F82B45089D5F5E5A595BC9C2140031C0F7D0EBF0

Btw does anyone have any recommended tools for asm to exe, or exe to asm.

Also I haven't found too much on AHK and machine code except:
MCode Tutorial (Compiled Code in AHK) - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=7&t=32

[EDIT:]
I have often used InBuf to search through binary data in the address space.
RegExMatch can only search for an even number of bytes at even byte intervals, so you could run a RegExMatch search twice, altering/cropping the search needle as required. (This can be a problem though, especially on a needle of under 10 bytes.)
RegEx, however, has the advantage of being able to search case insensitive.

Btw also, what is the best method, for opening binary data, and replacing null bytes with spaces.

Btw I have a file searcher, and it is complete apart from that I would be interested in the best methods for searching for text/data.
E.g. binary data / ANSI/UTF-8/UTF-16 LE case sensitive/insensitive.
A function like this could do binary data, and case insensitive text, 2 runs of RegEx could do UTF-16 LE (text offset at even/odd bytes), but I'm not sure about ANSI/UTF-8 case insensitive.

Thanks.

[EDIT:] See the BINARY DATA section, here, for more binary-related links:
jeeswg's homepage - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=17&t=30931&p=144437#p144437
Last edited by jeeswg on 24 Aug 2019, 14:24, edited 4 times in total.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

09 Apr 2017, 19:45

This link gives me some clues:
flat assembler - View topic - Hello world on x64
https://board.flatassembler.net/topic.php?p=141331

Like changing:

Code: Select all

format PE GUI 4.0
entry start

include 'win32a.inc'
to:

Code: Select all

format PE64 GUI
entry start

include 'win64ax.inc'
I could do with some assistance, if anybody knows how to convert an asm file (x86 or x64, see the x86 asm file above) to machine code as hex, for use with AHK (x86 or x64). Thanks.

Some links:

[use machine code as hex in AHK functions]
MCode Tutorial (Compiled Code in AHK) - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=7&t=32

[convert C/C++ code to machine code]
MCode4GCC -- C/C++ to MCode Generator - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=6&t=4642
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

26 May 2017, 17:42

OllyDbg can be used as a disassembler for x32 exe files.
- I.e. assembler (asm to exe) (assembly language to machine language) ('friendly' shorthand to binary).
- I.e. disassembler (exe to asm) (machine language to assembly language) (binary to 'friendly' shorthand).

To me it wasn't exactly clear how asm would translate into exe, so I wanted to translate back again from the exe into asm to try and understand the asm better.

OllyDbg v1.10
http://www.ollydbg.de/

http://www.ollydbg.de/odbg201.zip

tools - Is there any disassembler to rival IDA Pro? - Reverse Engineering Stack Exchange
https://reverseengineering.stackexchang ... al-ida-pro

x86 Disassembly/Disassemblers and Decompilers - Wikibooks, open books for an open world
https://en.wikibooks.org/wiki/X86_Disas ... ecompilers
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

27 May 2017, 19:53

Hi jeeswg. I saw this link in the other thread.
Try this, briefly tested,

Code: Select all

inBuf(hay,ned,haylen,nedlen,bufPos:=true,init:=false){
	; Input: 
	;	- hay, haystack buffer to be searched for first occurance of ned.
	;	- ned, needle buffer to search for first occurance in hay.
	;	- haylen, length of hay, in bytes
	;	- nedlen, length of ned, in bytes
	;	- bufpos, return buffer position, set to false to return string position.
	;	- init, internal use only, don't set.
	; Return:
	;	- position of ned in hay according to bufPos parameter, if no match, return -1 if nedlen>haylen, else -2.	
	; Url:
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx 	(VirtualAlloc function)
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366786(v=vs.85).aspx 	(Memory Protection Constants)
	;
	; Due to:
	;	- https://autohotkey.com/boards/viewtopic.php?f=5&t=28393
	local k, i, raw
	static flProtect:=0x40, flAllocationType:=0x1000 ; PAGE_EXECUTE_READWRITE ,	; MEM_COMMIT	
	static raw32:=[1398167381,2332355715,2334401612,2334139500,689710196,2336323789,2367169628,80215929,36,2127136000,112594723,477430584,829751173,233558577,369407503,1964180536,1962555659,29524768,3967142457,19137667,956416899,3447596076,3087320195,4294967294,1566531163,604277699,1527039107,3277676382]
	static raw64:=[3054457686,1170223410,2202126377,2302738921,876102338,1160344849,829737349,3956420933,479020310,3548988418,438089231,421279810,960826997,1091990730,1157743235,3782658617,1140965507,3329671225,4294966968,3277741055]
	static bin:=inBuf("","","","","",true)
	if init {
		bin:=DllCall("Kernel32.dll\VirtualAlloc", "Uptr",0, "Ptr", (raw:=A_PtrSize==4?raw32:raw64).length()*4, "Uint", flAllocationType, "Uint", flProtect, "Ptr")
		for k, i in raw
			NumPut(i,bin+(k-1)*4,"Int")
		raw32:="",raw64:=""
		return bin
	}
	if (nedlen>haylen)
		return -1
	p:=DllCall(bin, "Ptr", hay, "Ptr", ned, "Uint", haylen, "Uint", nedlen, "cdecl")
	return p>=0 ? (bufPos ? p : (p+(A_IsUnicode?2:1)) // (A_IsUnicode?2:1)) : p
}
Here is the source code:

Code: Select all

	.file	"abc.c"
	.text
	.globl	inbuf
	.def	inbuf;	.scl	2;	.type	32;	.endef
	.seh_proc	inbuf
inbuf:
	pushq	%rbp
	.seh_pushreg	%rbp
	movq	%rsp, %rbp
	.seh_setframe	%rbp, 0
	subq	$16, %rsp
	.seh_stackalloc	16
	.seh_endprologue
	movq	%rcx, 16(%rbp)
	movq	%rdx, 24(%rbp)
	movl	%r8d, 32(%rbp)
	movl	%r9d, 40(%rbp)
	movl	$0, -4(%rbp)
	jmp	.L2
.L9:
	movl	$0, -8(%rbp)
	jmp	.L3
.L8:
	movl	-4(%rbp), %edx
	movl	-8(%rbp), %eax
	addl	%edx, %eax
	movl	%eax, %edx
	movq	16(%rbp), %rax
	addq	%rdx, %rax
	movzbl	(%rax), %edx
	movl	-8(%rbp), %ecx
	movq	24(%rbp), %rax
	addq	%rcx, %rax
	movzbl	(%rax), %eax
	cmpb	%al, %dl
	jne	.L10
	movl	40(%rbp), %eax
	subl	$1, %eax
	cmpl	-8(%rbp), %eax
	jne	.L6
	movl	-4(%rbp), %eax
	jmp	.L7
.L6:
	addl	$1, -8(%rbp)
.L3:
	movl	40(%rbp), %eax
	subl	$1, %eax
	cmpl	-8(%rbp), %eax
	jnb	.L8
	jmp	.L5
.L10:
	nop
.L5:
	addl	$1, -4(%rbp)
.L2:
	movl	32(%rbp), %eax
	subl	40(%rbp), %eax
	cmpl	-4(%rbp), %eax
	jnb	.L9
	movl	$-2, %eax
.L7:
	addq	$16, %rsp
	popq	%rbp
	ret
	.seh_endproc
	.ident	"GCC: (GNU) 5.3.0"
Spoiler
Cheers.
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

27 May 2017, 20:32

Omg it's only taken 10 years. Someone even asked for an x64 version on the original thread 6 years ago.

It's only been as I've been investigating this problem, that I've realised that direct asm to exe is less common than C++ to exe, and that actually Laszlo's bit wizardry functions were based on the C++ approach. For the longest time though, wOxxOm's function was the only machine code function I was (knowingly) using, and his approach seemed like the default approach to me, as it was the first time I saw and used machine code.

Anyway I had heard of assembly language, and hoped to investigate it to some extent. If you're going to learn C++ one day, you might as well learn assembly language. Or conversely, AutoHotkey is just as removed from 'real' programming as C++ is.

Compiling in C++ is like stepping into a river, no two exes are ever the same. With FASM (flat assembler) I compiled the asm 10 years later and got exactly the same result.

The key test will be speed, if InBuf is faster, that is a good argument for pure asm. However, the code you have produced does something that was previously impossible in AutoHotkey x64 unless you used repeated NumGet and/or dodgy RegExMatch workarounds (since in Unicode versions of AutoHotkey, RegExMatch treats everything in byte pairs).

For C++, you need Visual Studio for example, which unless I'm able to customise it using something like AutoHotkey, is not a nice experience to use. Although at least it doesn't use the Ribbon. Plus there are so many options everywhere, and it takes 7 gigabytes to install.

Many many thanks for this Helgef. Also, if anyone is able to answer my questions at the top of this thread, and/or able to create an x64 version of the x32 asm, I would be extremely grateful.

Btw wOxxOm spoke about the function being optimised for needles of certain lengths. If there are speed benefits, I might be interested in separate functions that are written to search only for a specific needle (string), or search only for needles that are 1 or 2 or 4 or 8 bytes in length.

Btw potential uses are: searching through many megabytes of data in the address space, replacing nulls with spaces, and complicated searches for multiple data/text needles, across a big folder or an entire drive.

Btw I've said before that I think we need some kind of RegExMatch 'ANSI' version for AutoHotkey Unicode. In general I'm interested in doing case sensitive/case insensitive searches for ANSI/UTF-8/UTF-16 data. I don't know if there is a fast way to search for case insensitive UTF-8 data, other than some very flexible RegExMatch searches, or converting the text to UTF-16 first.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

28 May 2017, 13:24

@Helgef: Thanks so much I tested your inBuf function it worked in AHK x64. Btw it currently contains a reference to itself, which could be a problem if the user renames it!

wOxxOm's function is pretty speedy, so maybe pure assembler isn't so bad, 'semb it like wOxxOm' (BILB (2002)).

Machine code binary buffer searching regardless of NULL - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/2362 ... s-of-null/
This seems to be the first pure assembler function written for AHK.
I'm surprised that Laszlo with all his clever optimised bit wizardry functions, himself, wasn't more interested in pure assembler functions.

I've done some benchmark tests:

Code: Select all

#SingleInstance force
ListLines, Off
#KeyHistory 0
Menu, Tray, Click, 1
#NoEnv
AutoTrim, Off
#UseHook

SplitPath, A_ScriptName,,,, vScriptNameNoExt
Menu, Tray, Tip, % vScriptNameNoExt

;==================================================

;q:: ;test Helgef's inBuf function (renamed here to inBuf2)
vEnc := "CP0"
vEnc := "UTF-16"

vWidth := (vEnc = "UTF-16") ? 2 : 1
Clipboard := A_AhkPath
FileGetSize, vSize, % A_AhkPath
VarSetCapacity(vData, vSize, 1)
FileRead, vData2, % "*c " A_AhkPath
DllCall("kernel32\RtlMoveMemory", Ptr,&vData, Ptr,&vData2, UPtr,vSize)

vNeedleText := "AutoHotkeyGUI"
vLenNeedle := StrLen(vNeedleText)
vSizeNeedle := vLenNeedle*vWidth
VarSetCapacity(vNeedle, vLenNeedle*vWidth)
StrPut(vNeedleText, &vNeedle, vLenNeedle, vEnc)
vNeedleRegEx := ""
Loop, % Floor(vSizeNeedle/2)
{
	vNum := NumGet(&vNeedle+0, A_Index*2-2, "UShort")
	vNeedleRegEx .= "\x{" Format("{:04X}", vNum) "}"
}
MsgBox, % vNeedleRegEx
MsgBox, % InBuf(&vData, &vNeedle, vSize, vSizeNeedle)
MsgBox, % inBuf2(&vData, &vNeedle, vSize, vSizeNeedle)
MsgBox, % RegExMatch(vData, vNeedleRegEx)*2-2
MsgBox, % InBufViaNumGet(&vData, &vNeedle, vSize, vSizeNeedle)

vOffset := 10000000
vSize := vOffset + 1000
VarSetCapacity(vData, vSize, 1)
StrPut(vNeedleText, &vData+vOffset, vSizeNeedle, vEnc)
MsgBox, % InBuf(&vData, &vNeedle, vSize, vSizeNeedle)
MsgBox, % inBuf2(&vData, &vNeedle, vSize, vSizeNeedle)

vNum := 100
vTickCount1 := A_TickCount
Loop, % vNum
	InBuf(&vData, &vNeedle, vSize, vSizeNeedle)
vTickCount2 := A_TickCount
Loop, % vNum
	inBuf2(&vData, &vNeedle, vSize, vSizeNeedle)
vTickCount3 := A_TickCount
Loop, % vNum
	RegExMatch(vData, vNeedleRegEx)
vTickCount4 := A_TickCount
;InBufViaNumGet: 20810 msec to do 1 iteration
;Loop, % vNum
;	InBufViaNumGet(&vData, &vNeedle, vSize, vSizeNeedle)
;vTickCount5 := A_TickCount

vOutput := (vTickCount2-vTickCount1) " " (vTickCount3-vTickCount2) " " (vTickCount4-vTickCount3) ;" " (vTickCount5-vTickCount4)
Clipboard := vOutput
MsgBox, % vOutput
return

;==================================================

;results AHK x32:
;1217 3120 562
;1233 3120 530
;1248 3136 561

;note: in Unicode AHK, if the needle could appear
;at an odd offset, you would have to run RegExMatch twice,
;thus the time for RegExMatch would be doubled
;making it roughly the same speed as InBuf

;note: in Unicode AHK, with RegExMatch you cannot
;specify a needle with an odd number of bytes

;==================================================

InBuf(haystackAddr, needleAddr, haystackSize, needleSize, StartOffset=0)
{   Static fun
   IfEqual,fun,
   {
      h=
      ( LTrim join
         5589E583EC0C53515256579C8B5D1483FB000F8EC20000008B4D108B451829C129D9410F8E
         B10000008B7D0801C78B750C31C0FCAC4B742A4B742D4B74364B74144B753F93AD93F2AE0F
         858B000000391F75F4EB754EADF2AE757F3947FF75F7EB68F2AE7574EB628A26F2AE756C38
         2775F8EB569366AD93F2AE755E66391F75F7EB474E43AD8975FC89DAC1EB02895DF483E203
         8955F887DF87D187FB87CAF2AE75373947FF75F789FB89CA83C7038B75FC8B4DF485C97404
         F3A775DE8B4DF885C97404F3A675D389DF4F89F82B45089D5F5E5A595BC9C2140031C0F7D0EBF0
      )
      VarSetCapacity(fun,StrLen(h)//2)
      Loop % StrLen(h)//2
         NumPut("0x" . SubStr(h,2*A_Index-1,2), fun, A_Index-1, "Char")
   }
   Return DllCall(&fun
      , "uint",haystackAddr, "uint",needleAddr
      , "uint",haystackSize, "uint",needleSize
      , "uint",StartOffset)
}

;==================================================

;note: inBuf2 currently has a reference to itself, which also has to be renamed if you rename the function
inBuf2(hay,ned,haylen,nedlen,bufPos:=true,init:=false){
	; Input:
	;	- hay, haystack buffer to be searched for first occurance of ned.
	;	- ned, needle buffer to search for first occurance in hay.
	;	- haylen, length of hay, in bytes
	;	- nedlen, length of ned, in bytes
	;	- bufpos, return buffer position, set to false to return string position.
	;	- init, internal use only, don't set.
	; Return:
	;	- position of ned in hay according to bufPos parameter, if no match, return -1 if nedlen>haylen, else -2.
	; Url:
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx 	(VirtualAlloc function)
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366786(v=vs.85).aspx 	(Memory Protection Constants)
	;
	; Due to:
	;	- https://autohotkey.com/boards/viewtopic.php?f=5&t=28393
	local k, i, raw
	static flProtect:=0x40, flAllocationType:=0x1000 ; PAGE_EXECUTE_READWRITE ,	; MEM_COMMIT
	static raw32:=[1398167381,2332355715,2334401612,2334139500,689710196,2336323789,2367169628,80215929,36,2127136000,112594723,477430584,829751173,233558577,369407503,1964180536,1962555659,29524768,3967142457,19137667,956416899,3447596076,3087320195,4294967294,1566531163,604277699,1527039107,3277676382]
	static raw64:=[3054457686,1170223410,2202126377,2302738921,876102338,1160344849,829737349,3956420933,479020310,3548988418,438089231,421279810,960826997,1091990730,1157743235,3782658617,1140965507,3329671225,4294966968,3277741055]
	static bin:=inBuf2("","","","","",true)
	if init {
		bin:=DllCall("Kernel32.dll\VirtualAlloc", "Uptr",0, "Ptr", (raw:=A_PtrSize==4?raw32:raw64).length()*4, "Uint", flAllocationType, "Uint", flProtect, "Ptr")
		for k, i in raw
			NumPut(i,bin+(k-1)*4,"Int")
		raw32:="",raw64:=""
		return bin
	}
	if (nedlen>haylen)
		return -1
	p:=DllCall(bin, "Ptr", hay, "Ptr", ned, "Uint", haylen, "Uint", nedlen, "cdecl")
	return p>=0 ? (bufPos ? p : (p+(A_IsUnicode?2:1)) // (A_IsUnicode?2:1)) : p
}

;==================================================

InBufViaNumGet(vAddrDataH, vAddrDataN, vSizeH, vSizeN)
{
	if (vSizeH < vSizeN)
		return -1
	Loop, % vSizeH - vSizeN + 1
	{
		vIndex := A_Index-1
		vDoBreak := 0
		Loop, % vSizeN
		{
			if !(NumGet(vAddrDataH+vIndex, A_Index-1, "UChar") = NumGet(vAddrDataN+0, A_Index-1, "UChar"))
			{
				vDoBreak := 1
				break
			}
		}
		if !vDoBreak
			return vIndex
	}
	return -1
}

;==================================================
Btw when you put a hot beverage character, what drink is that?
You don't program in Java do you?
Thanks again for your 'InGef' function.

Cheers. ☕
Last edited by jeeswg on 16 Oct 2018, 15:24, edited 1 time in total.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

28 May 2017, 16:25

Interesting jeeswg.
There will probably be some speed gain on 64 bit.
I tested my function on this (one call),

Code: Select all

; search for (figurative) 00000001 in 000...000000001, where ... is ~1 billion 0. (takes ca 12 seconds on my pc.)
haylen:=1024**3
nedlen:=8
VarSetCapacity(hay,haylen,0)
VarSetCapacity(ned,nedlen,0)
Numput(1,ned,nedlen-1,"char")
Numput(1,hay,haylen-1,"char")
I wonder if you use these kinds of searches in a context where it matters if a search take 10 or 30 ms? Isn't the rexeg method good enough, or even best?

☕ is certainly not to celebrate my Java programming skills. It symbolises coffee, black.

Cheers ☕
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

28 May 2017, 19:15

If you want to search for 5 bytes e.g. 'abcde', with RegExMatch you have to:
- search for 'abcd' and check the 5th byte manually
- search for 'bcde' and check the 1st byte manually

This is worse for 4 bytes e.g. 'abcd':
- search for 'abcd'
- search for 'bc' and check the 1st and 4th bytes manually

This is because in Unicode versions of AHK, RegExMatch treats things in units of 2 bytes.

*** There had been no good solution for this on AHK x64 until this thread. TY HG. ***

(That's the best way to get a response, if you say something wrong, people will correct you immediately, otherwise you get no response. I would prefer if I was wrong on this, it's surprising when certain key issues don't get much attention.)

Taking into account things like this can make the code very messy, also you would then have to combine two sets of results into one list.

It would be much easier to share and write code using your functions than it would be to share something that used the 'RegExMatch twice' approach.

If one could somehow always know that the results would appear at even offsets, then that could be a good argument for RegEx ... a slippery slope. However, searching and replacing nulls with spaces or pipes in binary data, does require handling odd offsets. Furthermore I don't know if RegExMatch on AHK Unicode versions can realistically handle searches for individual bytes.

In terms of how much does the speed really matter, I'm not sure, however, if you're checking for things in the address space, that can be 100s of megabytes. Process Hacker seems to be very good at handling this.

One use for this is for a NirSoft SearchMyFiles alternative, for certain complex needle/results criteria.

Btw haha I didn't think to use a big font, and I wasn't sure if you used a full stop or not.

Cheers ☕☕☕☕☕ [Ooh 200 is the maximum size, I would have just used one cup and made it 400. So here's a little pattern instead.]

[EDIT:] Btw now that I have a working x64 binary search function (subject to any optimisations, separate search n bytes functions, and replace nulls with space/pipe/CR/LF functions), my only remaining major AHK problem now is ...
Explorer context menu shell extensions - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=5&t=32152
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

29 May 2017, 11:52

Regarding searching for unicode strings, I think the first char is always be an adress which is mod(adr,A_Ptrsize)==0 so, if searching for a proper english string, you only need to check every other byte,

Code: Select all

raw64:=[3054457686,1170223410,2202126377,2302739177,876102338,1160344849,829737349,3956420933,479020310,3548988418,438089231,421279810,960826997,1091990730,1157808771,3782658617,1141031043,3329671225,4294966968,3277741055] ; For unicode string search.
I also compared my test (from my last post) hay/ned vs the original InBuf, inBuf2 was twice as fast in that case. I'm not convinced about the asm approach, and I'm not even sure that the author really did write the function in asm, it wasn't explicitly stated. Compiler optimisation is pretty good you know. Finally, if you really need fast searches for private use, why don't you make your own built-in function in ahk? The one I posted probably compiles in c++ (I consider it c).
Spoiler
Edit: Duh, we could of course take A_Ptrsize sized steps, I'll try it later. :crazy:
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

29 May 2017, 13:21

Step size 8 for you to test, if you search a buffer of a 32 bit process, you'll need step size 4. Note, this is somewhat speculative, but might be usable in some cases. In any case, it might be nice to have a parameter for the step size. I might consider this later.

Code: Select all

raw64:=[3054457686,1170223410,2202126377,2302739177,876102338,1160344849,829737349,3956420933,479020310,3548988418,438089231,421279810,960826997,1091990730,1157808771,3782658617,1141424259,3329671225,4294966968,3277741055]
Cheers.
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

31 May 2017, 16:07

Added step size for both haystack and needle, parameter description is poor, look at the source code if it is not clear.

Code: Select all

inBufStep(hay,ned,haylen,nedlen,haystep:=1,nedstep:=1,bufPos:=true,init:=false){
	; Input: 
	;	- hay, haystack buffer to be searched for first occurance of ned.
	;	- ned, needle buffer to search for first occurance in hay.
	;	- haylen, length of hay, in bytes
	;	- nedlen, length of ned, in bytes
	;	- haystep, size of steps to take in hay
	;	- nedstep, size of steps to take in ned
	;	- bufpos, return buffer position, set to false to return string position.
	;	- init, internal use only, don't set.
	; Return:
	;	- position of ned in hay according to bufPos parameter, if no match, return -1 if nedlen>haylen, else -2.	
	; Url:
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx 	(VirtualAlloc function)
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366786(v=vs.85).aspx 	(Memory Protection Constants)
	;
	; Due to:
	;	- https://autohotkey.com/boards/viewtopic.php?f=5&t=28393
	local k, i, raw
	static flProtect:=0x40, flAllocationType:=0x1000 ; PAGE_EXECUTE_READWRITE ,	; MEM_COMMIT	
	static raw32:=[1398167381,2332159107,2333680748,2334205004,2334729340,253043828,687883702,689775692,604276985,3054485553,506995740,3380944757,4203294324,3054441451,939529564,225777180,479054393,2300146746,1992964570,608437225,611582757,608451365,3100341789,4294967294,1526842499,3277676382]
	static raw64:=[1398167381,1344566411,824882703,612142016,3358147912,1106323777,943899273,846532908,1959363909,4069081401,258352107,17439,270306630,265521476,1109007542,1964579896,3510191377,370969926,2303005812,3510191578,4160872051,1992309060,4294883518,1583087615,12803423]
	static bin:=inBufStep("","","","","","","",true)
	if init {
		bin:=DllCall("Kernel32.dll\VirtualAlloc", "Uptr",0, "Ptr", (raw:=A_PtrSize==4?raw32:raw64).length()*4, "Uint", flAllocationType, "Uint", flProtect, "Ptr")
		for k, i in raw
			NumPut(i,bin+(k-1)*4,"Int")
		raw32:="",raw64:=""
		return bin
	}
	if (nedlen>haylen)
		return -1
	p:=DllCall(bin, "Ptr", hay, "Ptr", ned, "Uint", haylen, "Uint", nedlen, "Uint", haystep, "Uint", nedstep)
	return p>=0 ? (bufPos ? p : (p+(A_IsUnicode?2:1)) // (A_IsUnicode?2:1)) : p
}
source

Also, reverse search

Code: Select all

inBufRev(hay,ned,haylen,nedlen,haystep:=1,nedstep:=1,bufPos:=true,init:=false){
	; Reverse search, with step size.
	; Input: 
	;	- hay, haystack buffer to be searched for first occurance of ned.
	;	- ned, needle buffer to search for first occurance in hay.
	;	- haylen, length of hay, in bytes
	;	- nedlen, length of ned, in bytes
	;	- haystep, size of steps to take in hay
	;	- nedstep, size of steps to take in ned
	;	- bufpos, return buffer position, set to false to return string position.
	;	- init, internal use only, don't set.
	; Return:
	;	- position of ned in hay according to bufPos parameter, if no match, return -1 if nedlen>haylen, else -2.	
	; Url:
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx 	(VirtualAlloc function)
	;	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa366786(v=vs.85).aspx 	(Memory Protection Constants)
	;
	; Due to:
	;	- https://autohotkey.com/boards/viewtopic.php?f=5&t=28393
	local k, i, raw
	static flProtect:=0x40, flAllocationType:=0x1000 ; PAGE_EXECUTE_READWRITE ,	; MEM_COMMIT	
	static raw32:=[1398167381,2332617859,2334401604,2334663788,959456340,2338026216,689710204,2312569320,2332304508,18883708,1926576599,608996155,611617568,431361820,1963334712,1960130859,611617590,837388548,604277211,267109889,520402447,1964901432,970129676,690058442,1992964561,604277739,740574251,3993093769,4273518451,2214592511,1583024324,2428722527,2200175755,1583024324,12803423]
	static raw64:=[2337494615,1145054332,1210340491,1153468740,960874537,1162375880,980603193,264276289,943856310,779419932,1959868741,3364439351,3956508997,479019545,3733537816,842315279,422852672,21303413,3258533331,692393588,3258533328,4163493750,30312772,3099096006,4294967294,3277807195]
	static bin:=inBufRev("","","","","","","",true)
	if init {
		bin:=DllCall("Kernel32.dll\VirtualAlloc", "Uptr",0, "Ptr", (raw:=A_PtrSize==4?raw32:raw64).length()*4, "Uint", flAllocationType, "Uint", flProtect, "Ptr")
		for k, i in raw
			NumPut(i,bin+(k-1)*4,"Int")
		raw32:="",raw64:=""
		return bin
	}
	if (nedlen>haylen)
		return -1
	p:=DllCall(bin, "Ptr", hay, "Ptr", ned, "Uint", haylen, "Uint", nedlen, "Uint", haystep, "Uint", nedstep, "Cdecl")
	return p>=0 ? (bufPos ? p : (p+(A_IsUnicode?2:1)) // (A_IsUnicode?2:1)) : p
}
source
Very limited testing.

Cheers.
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Jun 2017, 08:37

@Helgef: Thanks for your work on this, I'm not quite sure what the significance of the step size is, is that to say check at 0th byte, 4th byte, 8th byte etc? (E.g. RegExMatch on AHK Unicode versions checks at 0 / 2 / 4 etc.)

Btw I'm not sure how easy it is to do this, but I would like to avoid using lines like this:
static bin:=inBufStep("","","","","","","",true)
in a function, in case I or another user changes the name of the function.

Wow, re. a reverse search function.

Depending on how easy they are to write it might be worth looking at:
[ReplaceByte]
Machine code binary buffer searching regardless of NULL - Page 4 - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/2362 ... ull/page-4
[InFile]
Machine code binary buffer searching regardless of NULL - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/2362 ... ull/page-1

Btw are you working on any AHK/IT projects at the moment? Cheers.

==================================================

[EDIT:][There's also C++ code for Bin2Hex/Hex2Bin, although perhaps these should be called BinData2Hex/Hex2BinData. As Bin would be used for generating numbers in binary.]
Machine code functions: Bit Wizardry - Page 2 - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/1948 ... ntry128202
Machine code functions: Bit Wizardry - Page 8 - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/1948 ... ntry153340

Btw do you want to give a bit more info about how you do C++ to hex? Cheers.

Machine code functions: Bit Wizardry - Page 8 - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/1948 ... dry/page-8
The simplest process I know to get machine code from C:
==================================================

[EDIT:] InFile might also be worth looking at:
Machine code binary buffer searching regardless of NULL - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/2362 ... s-of-null/

Prompted by:
Find string in a very large file - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=5&t=32612
Last edited by jeeswg on 03 Jun 2017, 09:28, edited 1 time in total.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Jun 2017, 09:37

jeeswg wrote: is that to say check at 0th byte, 4th byte, 8th byte etc?
Yes. Edit: haystep:=4 says check at 0th byte, 4th, ... Use this if you suspect that what you are looking for, resides at an position that is a multiple of that step size. nedstep:=2, says, compare only needle bytes 0, 2, 4, ... against the haystack. Use if you are searching for a unicode string where every other byte is 0 anyways. (any step size other than 1 always risk false positives and or misses, although, depending on what you are doing it may be very little risk and great benefits.)
jeeswg wrote: Btw I'm not sure how easy it is to do this, but I would like to avoid using lines like this:
static bin:=inBufStep("","","","","","","",true)
in a function, in case I or another user changes the name of the function.
you can wrap the call in any way you please, for example, just do if !bin instead of if init, and have static bin instead of static bin:=.... Edit: Do not return bin in this case :terms:
jeeswg wrote:Wow, re. a reverse search function.
Very limited testing :?
jeeswg wrote:Links, btws and edits...
I might take a look at some of this later. Thanks. ;)

Cheers.
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Oct 2017, 06:53

Hello jeeswg :wave:
I recently wanted a stringCount() function for null delimited strings. The problem overlaps with this topic because the problem of counting includes the problem of finding. You might be interested in some of the functions are available in buf.ahk.

Note 1: The buf_byte_find() function is the corresponding function for the inBuf() function, while buf_short_find() would be the corrsponding function for what you use regexmatch for in this thread, but without the need for building the awkward needle.
Note 2: Very little testing has been done.

Cheers
Spoiler
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Oct 2017, 07:11

Hello Helgef, yes, these sorts of machine code functions are classic, you'll always need one eventually. Interesting stuff, thanks for sharing.

Here I mentioned about searching UTF-8 case insensitive, which is the main function I would currently be interested in:
Move to Utf-8 - AutoHotkey Community
https://autohotkey.com/boards/viewtopic ... 52#p172952

The way I see it is that I will try to do various functions in machine code via C++, and possibly FASM (I might join the forum). If anybody more familiar with C++ to machine code does some of them in the meantime that will be useful as a basis for me to compare with/learn from/make faster progress.

Overall these are the sorts of functions I have in mind:

encoding / direction / case / don't stop at nulls / UChar or UShort units / search in part or whole of haystack / replace / (x64 and x32 compatible)

- ANSI/UTF-8/UTF-16 search
- from start/end of string
- case sensitive (easier)
- case insensitive (harder)
- doesn't stop at first null character
- specify to search only within bytes A to B of haystack
- search anywhere (ANSI/UTF-8/UTF-16)
- search at 2-byte unit intervals (i.e. UTF-16)

also:
- fastest null byte/null 2-byte to space (e.g. load a binary file as Notepad does, as ANSI, with nulls replaced with spaces)
- fastest null byte/null 2-byte to specified character
- general find and replace
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Oct 2017, 07:52

Btw here is a function for counting a string, including overlaps, in regular AHK:

Code: Select all

q::
vText := "aaaaa"
vNeedle := "aa"
vCaseSen := 0
vPos := -3
MsgBox, % JEE_StrCountOverlap(vText, vNeedle, vCaseSen, vPos)
return

==================================================

;vStartPos: -n is the nth-to-last character
JEE_StrCountOverlap(ByRef vText, vNeedle, vCaseSen:=0, vStartPos:=1)
{
	local vCount := 0
	if (vStartPos = 0)
		return 0
	else if (vStartPos < 0)
		vStartPos := StrLen(vText) + 1 + vStartPos
	while InStr(vText, vNeedle, vCaseSen, vStartPos)
		vCount += 1, vStartPos += 1
	return vCount
}
Although did you need something faster than this? Or that could specially handle binary/ANSI? Cheers.
Last edited by jeeswg on 01 Oct 2017, 13:59, edited 1 time in total.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Oct 2017, 13:06

Hello, interesting ideas :thumbup:
- search at 2-byte unit intervals
buf_short_find does this. (Exact match only, no case options)
also:
the buf_XXX_write functions finds the needle in the haystack, and writes a specified sequence of bytes/byte-pairs (char/short) to the found location + optional offset. If the length of what you want to write is equal to that of the needle length, the write functions acts like strReplace, that is, the needle is just replaced. So you can replace nulls with spaces, eg,

Code: Select all

#include buf.ahk
buf:="hello world"
buflen:=strlen(buf)
numput(0,&buf,(instr(buf,A_Space)-1)*2,"ushort")
msgbox(buf)
msgbox(nullReplace(&buf,buflen))
msgbox(buf)
nullReplace(buf,buflen,rep:=" ", count:=-1, nullWidth:=2){
	static nll, init:=varSetCapacity(nll,2,0)
	return nullWidth==1 ? buf_byte_write(buf, &nll, &rep, buflen, 1, 1, count) : buf_short_write(buf, &nll, &rep, buflen, 1, 1, count)
}

Code: Select all

JEE_StrCountOverlap
while instr(.) is what I would have done too. :thumbup:
JEE_StrCountOverlap wrote:

Code: Select all

static vIsV1
Tells me you made it two-way compatible, but I get different results in v1 and v2.
I didn't need the overlap feature really, but I included it since it was trivial and doesn't affect performance. It might come in handy someday.
Last edited by Helgef on 01 Oct 2017, 17:27, edited 1 time in total.
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

01 Oct 2017, 14:05

I've fixed the function, it didn't actually need to work out IsV1 in this case.

I've done (written) so many string functions and never needed it, I don't know if I'll ever use it. If anyone can prove that it's useful please do. If there's a better name than StrCountOverlap / StrCountAllowOverlap that would be nice to know also.

A bit like that Rexx string split function, Parse, string splitting is useful, but the quirks of its implementation just didn't seem very helpful.

Splitting Strings - AutoHotkey Community
https://autohotkey.com/boards/viewtopic ... 29#p170229
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
User avatar
jeeswg
Posts: 6902
Joined: 19 Dec 2016, 01:58
Location: UK

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

23 May 2018, 16:11

- Hello Helgef. I'm thinking of coming back to this topic, working on and compiling various simple examples. I might do a tutorial.
- I was interested to know which version of Visual Studio you use (hopefully it would make no difference to the hex for basic machine code examples, but one wonders), and which tools you use to convert C++ code to machine code.
[EDIT:] OK, looks like you use TDM-GCC.
MCode4GCC -- C/C++ to MCode Generator - Page 2 - AutoHotkey Community
https://autohotkey.com/boards/viewtopic ... 84#p164684
- I believe that wOxxOm may have actually written his code in assembly language (and compiled it using FASM, I got the same machine code when I compiled it), and I've been reading up on assembler and on the exe file format also. So I might try to do a small bit of programming in assembly after achieving any success with C++.

==================================================

Links:
[C++ to machine code]
MCode Tutorial (Compiled Code in AHK) - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=7&t=32
MCode tutorial - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=11&t=116
MCode4GCC -- C/C++ to MCode Generator - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=6&t=4642
MCodeGen - Easily transform C/C++ code into mcode - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/5516 ... nto-mcode/
Machine code functions: Bit Wizardry - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/1948 ... -wizardry/
Machine code - Rosetta Code
https://rosettacode.org/wiki/Machine_code

[ASM to machine code]
[e.g. take wOxxOm's ASM code, save as an ASM file, compile to create an exe file, read the exe file as hex, crop the hex, use the hex in a machine code function]
[I still don't know at which points to crop the hex, although I have been investigating]
[once I find out about the crop points, I can investigate converting the x32 ASM file to an x64 ASM file]
flat assembler
https://flatassembler.net/
Machine code binary buffer searching regardless of NULL - Scripts and Functions - AutoHotkey Community
https://autohotkey.com/board/topic/2362 ... s-of-null/

[Derek Banas ASM/C++ tutorials]
[one-off tutorial]
C++ Programming - YouTube
https://www.youtube.com/watch?v=Rub-JsjMhWY
[series of (17 so far) videos]
C++ Tutorial - YouTube
https://www.youtube.com/watch?v=N5HgK1bTLOg
[series of 4 videos]
Assembly Language Tutorial - YouTube
https://www.youtube.com/watch?v=ViNnfoE56V8

[Usborne book: Machine Code For Beginners]
[basically just take a look at the index of opcodes to get a feel for the simple operations]
[pdf on assembly language]
Usborne releases free PDFs of its classic 1980s computer programming books / Boing Boing
https://boingboing.net/2016/02/07/usbor ... fs-of.html
Computer and coding books from Usborne
https://usborne.com/browse-books/featur ... ing-books/
machine-code-for-beginners.pdf - Google Drive
https://drive.google.com/file/d/0Bxv0Ss ... VtU00/view

[OllyDbg: exe to assembly language]
[open exe with OllyDbg]
[right-click, View, module 'exe name']
[right-click, View, Executable file]
Download
http://www.ollydbg.de/download.htm

[exe file format]
EXE Format
http://www.delorie.com/djgpp/doc/exe/
x86 Disassembly/Windows Executable Files - Wikibooks, open books for an open world
https://en.wikibooks.org/wiki/X86_Disas ... tion_Table
determine if a program is 32-bit/64-bit without running it - AutoHotkey Community
https://autohotkey.com/boards/viewtopic.php?f=5&t=26713
Last edited by jeeswg on 24 May 2018, 13:13, edited 1 time in total.
homepage | tutorials | wish list | fun threads | donate
WARNING: copy your posts/messages before hitting Submit as you may lose them due to CAPTCHA
Helgef
Posts: 4709
Joined: 17 Jul 2016, 01:02
Contact:

Re: InBuf function currently 32-bit only (machine code binary buffer searching)

24 May 2018, 05:49

- Hello jeeswg.

I use a private script for these things, it is not suitable for sharing. I use gcc, mostly.

Regarding writing your functions in ASM, I'd advice against it, the c compilers will produce just as good machine code (from the asm code it prodcues from you c source ;) ). Even my simple double loop above was faster than the ASM code in some cases, writing it in ASM isn't magically going to make it faster, and you'll need to write one version for 32 bit and one for 64 bit, unless there are converters available. ASM is great for learning about low level programming, otherwise, it is mostly used for special cases and then probably only as inline assembly, or for fun I guess.

Cheers.

Return to “Ask for Help (v1)”

Who is online

Users browsing this forum: No registered users and 135 guests