 |
AutoHotkey Community Let's help each other out
|
| View previous topic :: View next topic |
| Author |
Message |
DerRaphael
Joined: 23 Nov 2007 Posts: 462 Location: Heidelberg, Germany
|
Posted: Wed Jul 02, 2008 12:42 pm Post subject: Autohotkey Huffman Compression - a proof of concept |
|
|
For all those of you who dont know what Huffman Compression is, i'll explain it in a nutshell.
Huffman Encoding is a so called entropy encoding. Its based upon the quantity of chars and represents those with a higher quantity with a shorter binary value, those who have a lesser quantity with a longer one. This usually results in a smaller bitcount of the result. - It has no effect on already compressed binary values, since these have an optimized use of available chars. This means, when you're trying to compress an already compressed binary it results in a larger bitcount than before.
Credits: Laszlo for his hash (md5 hashing), Titan for toBin (integer to binary)
aHC_051b.ahk
| Code: | ; huffman compression in ahk / v0.5.1b
; (c|w) 01.07.2008 derRaphael@oleco.net / zLib style released
; *************************************************************************
; compression
; *************************************************************************
aHC_Compress(ByRef Data, ByRef compressedData, Size = 0, aHC_InfoStyle = 1)
{
global aHC_Info, aHC_Current
aHC_Current := "Compression"
aHC_Info := ""
OldFloat := A_FormatFloat
SetFormat,Float,3.1
HuffHead := "HuffmanCompressed 051", valuecount := 0
if (size=0) ; If possible pass Size parameter - since Autohotkey
size := VarSetCapacity(Data,-1) ; wont properly handle binarydata correct due to \0
;~ tooltip, part one - get the count of unique chars the data
Loop % size
{
value := NumGet(Data,A_Index-1,"Uchar")
if (data_%value%=0) {
valuecount++
data_%value% := 0
} else {
data_%value%++
}
}
;~ tooltip, part two - byte usage statistic
Loop,256
{
n := A_Index-1
if (StrLen(data_%n%)!=0)
out .= data_%n% " " n "`n"
sd_%n% := ""
}
;~ tooltip, part three - build huffman tree and encoding table
Loop,
{
sort,out,N
cc := ccount("`n",out)
if (cc>1) {
Loop,Parse,out,`n
if (A_Index<3)
n%A_index% := A_LoopField
Else
break
v1a := RegExReplace(n1,"S)\s.*"), v1b := RegExReplace(n1,"S)\d+\s")
v2a := RegExReplace(n2,"S)\s.*"), v2b := RegExReplace(n2,"S)\d+\s")
v3a := v1a + v2a, v3b := "(#" v1b ",#" v2b ")" ; 0 - 1
nout := v3a " " v3b "`n"
Loop,Parse,out,`n
if (A_Index>2)
nout .= A_loopField "`n"
StringReplace,out,nout,`n`n,`n,All
l1 := RegExReplace(v1b,"S)[^#\d]"), l1 := RegExReplace(l1,"S)##","#")
l2 := RegExReplace(v2b,"S)[^#\d]"), l2 := RegExReplace(l2,"S)##","#")
if (substr(l1,1,1)="#")
l1 := SubStr(l1,2)
if (substr(l2,1,1)="#")
l2 := SubStr(l2,2)
Loop,Parse,l1,#
sd_%A_LoopField% := "0" sd_%A_LoopField%
Loop,Parse,l2,#
sd_%A_LoopField% := "1" sd_%A_LoopField%
} else
Break
}
;~ tooltip, part four - building decompression table
offset := 0, tokenCount := 0
VarSetCapacity(compressedData,size*2,0)
hhc := StrLen(HuffHead)
Loop,% hhc
NumPut(*(&HuffHead-1+A_index),compressedData,A_index-1,"UChar")
Loop,256
{
n := A_Index-1
l := StrLen(RegExReplace(sd_%n%,"S)[^01]"))
if (l>0) {
offset := ++tokencount*6+hhc+50
NumPut(n,compressedData,offset-2,"Uchar")
NumPut(l,compressedData,offset-1,"Uchar")
NumPut(b2d(sd_%n%),compressedData,offset,"Uint")
}
}
NumPut(tokenCount&255,compressedData,hhc+2,"UChar")
NumPut(size,compressedData,hhc+5,"Uint")
offset +=4
;~ tooltip, part five - compress origin data %size%
pressed := ""
Loop % size
{
value := NumGet(Data,A_Index-1,"Uchar")
pressed .= sd_%value%
Loop,
if (Strlen(pressed)>8) {
NumPut(b2d(substr(pressed,1,8)),compressedData,offset++,"uchar")
pressed := substr(pressed,9)
} else
Break
percent := (a_index/Size)*100
If (aHC_InfoStyle&1=1)
aHC_Info := percent "%"
else if (aHC_InfoStyle&3=3)
aHC_Info := round(percent) "%"
}
if (Strlen(Pressed)!=0)
Numput(b2d(pressed SubStr("00000000",1,8-strlen(pressed))),compressedData,offset,"uchar")
NumPut(offset,compressedData,hhc+10,"Uint")
SetFormat,Float,%OldFloat%
return offset+1
}
; *************************************************************************
; decompression
; *************************************************************************
aHC_Decompress(ByRef compressedData, ByRef Data, aHC_InfoStyle = 1)
{
global aHC_Info, aHC_Current
aHC_Current := "Decompression"
aHC_Info := ""
OldFloat := A_FormatFloat
SetFormat,Float,3.1
;~ tooltip part one - check head
HuffHead := "HuffmanCompressed 051", hhc := StrLen(HuffHead), hCheck := ""
Loop, % hhc
hCheck .= chr(*(&compressedData-1+a_index))
if (hCheck!=HuffHead) ; err no compressed data
Return -1 ; head found
;~ tooltip part two - build decompression table
tokenCount := NumGet(compressedData,hhc+2,"UChar")
if (tokenCount=0)
tokenCount := 256
dSize := NumGet(compressedData,hhc+5,"Uint")
VarSetCapacity(Data,dSize,0)
MaxTokenLength := 0
Loop,% tokenCount
{
offset := A_Index*6+hhc+50, s := ""
b := d2b(NumGet(compressedData,offset,"Uint"))
Loop, % l := NumGet(compressedData,offset-1,"Uchar")
s .= "0"
b := SubStr(s,1,l-(strlen(b))) b
_%b% := NumGet(compressedData,offset-2,"Uchar")
if (strlen(b)>maxTokenLength)
MaxTokenLength := b
}
;~ tooltip part three - decompress!
; take amount of bits from stream and find match in decompression table
; thus resolving it into its origin binary value
offset += 4, oc := 0
Loop,
{
bits := d2b(NumGet(compressedData,offset++,"uchar"))
_tmp .= substr("00000000",1,8-strlen(bits)) bits
match := ""
Loop,Parse,_tmp
{
match .= A_LoopField, value := _%match%
if (StrLen(value)!=0) {
NumPut(value,Data,oc++,"uchar")
match := ""
}
}
if (StrLen(match)>0) {
_tmp := match
} Else
_tmp := ""
percent := (oc/dSize)*100
If (aHC_InfoStyle&1=1)
aHC_Info := percent "%"
else if (aHC_InfoStyle&3=3)
aHC_Info := round(percent) "%"
if (strlen(_tmp)>maxTokenLength) {
MsgBox ERR
return -2 ; internal Data Error - Shouldn't happen at all
}
if (oc=dSize)
break
}
SetFormat,Float,%OldFloat%
return dSize
}
; *************************************************************************
; Helperfunctions
; *************************************************************************
ccount(char,data)
{
OldFormat := A_FormatInteger
SetFormat,Integer,H
needle := "S)\" substr(asc(char),2)
c := (RegExReplace(data,needle,char,counter)) ? counter : 0
SetFormat,Integer,%OldFormat%
c += 0
Return c
}
b2d(str, dec=0)
{
Loop,% x := StrLen(str)
dec += SubStr(str,x-a_index+1,1)*(2**(A_index-1))
return dec
}
d2b(i, s = 0, c = 0)
{ ; Thx Titan, http://www.autohotkey.com/forum/viewtopic.php?t=13522
l := StrLen(i := Abs(i + u := i < 0))
Loop, % Abs(s) + !s * l << 2
b := u ^ 1 & i // (1 << c++) . b
Return RegExReplace(b,"^0+")
} |
The here demonstrated script compresses texts and binary data very nicely. This is a too early development stage, to use this in a productive environment, also the header data needs more optimization. (not to mention the speed).
Here is a lil script to toy around with above's functions and test 'em.
| Code: | ;~ Huffman-0.5.x testSuite
#include aHC_051b.ahk
FileSelectFile,testFile
FileRead,test,%testFile%
FileGetSize,size,%testfile%
oCD := ""
SetTimer, ttInfo, 50
newsize := aHC_Compress(test,oCD,size)
SetTimer, ttInfo, OFF
MsgBox % "uncompressed: " size " bytes / compressed: " newsize " bytes`n"
. "Compression Ratio: " (newsize/size*100) "% of original size`n`n"
newData := ""
SetTimer, ttInfo, 50
newsize := aHC_Decompress(oCD,newData)
if (newsize<0) {
MsgBox ERROR in Data
ExitApp
}
SetTimer, ttInfo, OFF
ToolTip, % " Errorcheck in Progress "
md5a := hash(test,size)
md5b := hash(newData,newsize)
tooltip
MsgBox % md5a " (origin md5)`n" md5b " (new decompressed md5)"
Return
ttInfo:
ToolTip, % "[" aHC_Info "] Huffman " aHC_Current " in Progress "
return
HASH(ByRef sData, nLen, SID = 3) ; SID = 3: MD5, 4: SHA1
{ ; Thx Laszlo: http://www.autohotkey.com/forum/viewtopic.php?p=113252#113252
DllCall("advapi32\CryptAcquireContextA", UIntP,hProv, UInt,0, UInt,0, UInt,1, UInt,0xF0000000)
DllCall("advapi32\CryptCreateHash", UInt,hProv, UInt,0x8000|0|SID, UInt,0, UInt,0, UIntP, hHash)
DllCall("advapi32\CryptHashData", UInt,hHash, UInt,&sData, UInt,nLen, UInt,0)
DllCall("advapi32\CryptGetHashParam", UInt,hHash, UInt,2, UInt,0, UIntP,nSize, UInt,0)
VarSetCapacity(HashVal, nSize, 0)
DllCall("advapi32\CryptGetHashParam", UInt,hHash, UInt,2, UInt,&HashVal, UIntP,nSize, UInt,0)
DllCall("advapi32\CryptDestroyHash", UInt,hHash)
DllCall("advapi32\CryptReleaseContext", UInt,hProv, UInt,0)
IFormat := A_FormatInteger
SetFormat Integer, H
Loop %nSize%
sHash .= SubStr(*(&HashVal+A_Index-1)+0x100,-1)
SetFormat Integer, %IFormat%
Return sHash
} |
v0.5.1b Known bugs:
it cannot decompress itself once it has been compressed - the decompression keeps going far beyond 100% _________________
|
|
| Back to top |
|
 |
rani
Joined: 18 Mar 2008 Posts: 59
|
Posted: Mon Jul 14, 2008 5:23 am Post subject: |
|
|
numput, numget
no connection to the compress you show,
by using the bit mechanizm in AHK:
is it possible to set and then also get
a bit value=0 or bit=1
in specific location in a var ?
means
I set bit=1 as binary set in offset=20001234 (bit location very large)
in varcode var,let say
and then I want to retrieve (get) the bit value from above offset
can it be done ?
rgds
ell |
|
| Back to top |
|
 |
Guest
|
Posted: Mon Jul 14, 2008 5:36 am Post subject: |
|
|
| omg the hash function was not written by laszlo was written by sean |
|
| Back to top |
|
 |
Laszlo
Joined: 14 Feb 2005 Posts: 4016 Location: Pittsburgh
|
Posted: Mon Jul 14, 2008 6:23 am Post subject: |
|
|
| For MD5 see here and there. For the binary conversion, see. e.g. here. We can do it with dll calls, too. |
|
| Back to top |
|
 |
PixelVision
Joined: 28 Jun 2008 Posts: 4 Location: NIECE, FRANCE
|
Posted: Sun Jul 20, 2008 8:46 am Post subject: |
|
|
This is looking like a very good utility in the making.
DerRaphael, will you be releasing a version which
1- Allows the selected file to be saved?
2- Allows reliable un-compression of such file?
Looking good so far and this will be a big help to the community
This would be really neat to see advanced. |
|
| Back to top |
|
 |
|
|
You can post new topics in this forum You can reply to topics in this forum
|
Powered by phpBB © 2001, 2005 phpBB Group
|