Jump to content

Sky Slate Blueberry Blackcurrant Watermelon Strawberry Orange Banana Apple Emerald Chocolate
Photo

Ascii85 codec for binary buffers


  • Please log in to reply
2 replies to this topic
Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005
Here are two simple functions for ASCII85 encode/decode binary buffers, or strings. (For the later case Titan provided a script earlier, but it is not fully standard conform.) A script might only need to encode or decode data. In this case the other function need not be included, so two separate functions are better.

ASCII85 encodes the input data four bytes at a time.
Each block of four input bytes is treated as a single 32 bit number (1st byte = MS).
It is encoded to create a block of five printable characters, as base 85 representation
with adding 33, and taking the resulting ASCII character (in the range ! (33) to u (117)).

If all 4 original bytes are zero, the result is coded as a single character z
instead of the five character string !!!!!.

The final block is padded with zeros to make it length 4. If it had a length of k bytes,
the encoded characters consist of C0..Ck, followed by the 2 characters ~>.
The encoded data may start with <~, but this is not required.

Decoding is a bit tricky, when there are fewer than 5 code characters in the last block.
The original data ended with a few 0 bits, so, we can treat the discarded code characters
as over the maximum of 85(+33). A possible carry is absorbed by these 0's.
Drop(string,list,delim="") {                    ; Drop entries of list from string
   IfEqual delim,, SetEnv delim,`,
   Loop Parse, list, %delim%
      StringReplace string, string, %A_LoopField%,,All
   Return String
}

Ascii85Decode(ByRef data, code, ByRef len) {
   code := drop(code,"<~, ,`f,`n,`r,`t,`v,~>")  ; Ignore white space, terminators
   case = %A_StringCaseSense%
   StringCaseSense On                           ; Z != z
   StringReplace, code, code, z, !!!!!, All
   StringCaseSense %case%                       ; Restore StringCaseSense

   m := Mod(StrLen(code),5) - (Mod(StrLen(code),5)>0) ; 12345 -> x1230
   len := (StrLen(code)//5)*4 + m
   VarSetCapacity(data,len,0)
   p := &data
   x = 0
   Loop Parse, code                             ; For each code char
      If Mod(A_Index,5) = 0 {                   ; After 5 chars seen
         x := x*85 + Asc(A_LoopField)-33
         Loop 4 {                               ; Poke 4 Big Endian bytes
            DllCall("RtlFillMemory",UInt,p, UInt,1, UChar,x>>(32-8*A_Index))
            p++                                 ; Advance data pointer
         }
         x = 0
      }
      Else
         x := x*85 + Asc(A_LoopField)-33        ; Collect base 85 digits
   x := (x+1) * 85**(4-m) ; omitted code chars -> 85+33, carry stops at trailing 0's of x
   Loop %m%  {                                  ; Slack
      DllCall("RtlFillMemory",UInt,p, UInt,1, UChar,x>>(32-8*A_Index))
      p++
   }
}
Ascii85Encode(ByRef code, ByRef data, n=0) {
   If n not between 1 and % VarSetCapacity(data)
      n := StrLen(data)                         ; If no \0 in data, can omit n
   x = 0
   code = <~                                    ; Terminator
   Loop %n% {
      x += *(&data+A_Index-1) << (24-8*((A_Index-1)&3)) ; Collect 4 Big Endian bytes
      If !(A_Index & 3) {                       ; Mod(index,4) = 0
         IfEqual x,0, SetEnv code,%code%z       ; 0000 -> "z"
         Else Loop 5                            ; Base 85 conversion (32-bit arithmetic)
            code := code  Chr(33+Mod(x//85**(5-A_Index),85))
         x = 0                                  ; Restart byte collection
      }
   }
   If (n & 3)                                   ; Mod(n,4) > 0
      Loop % (n&3)+1                            ; Slack
         code := code  Chr(33+Mod(x//85**(5-A_Index),85))
   code = %code%~>                              ; Terminator
}
I could not find much trustworthy test data, so there might be some bugs left undetected. Please post any wrong result, with the expected data, to help fixing the code. Below there are test cases, some of them are just made up.
Loop 7 {
   data(d,A_Index,len)     ; set: binary d, len in bytes
   Ascii85Encode(c,d,len)  ; set: string c
   d := Bin2hex(d,len)
   Ascii85Decode(b,c,len)  ; set: binary b, len in bytes
   b := Bin2hex(b,len)
   MsgBox % "data:`n" d "`n`ndecoded:`n" b "`n`nencoded:`n" c "`n`nstandard:`n" code(A_Index) "`n`n OK: " (b=d) (c=code(A_Index))
}
ExitApp

data(ByRef d, i, ByRef l) {
   IfEqual i,1, {
      l = 1
      d = .
   }
   Else IfEqual i,2, {
      l = 2
      d = ..
   }
   Else IfEqual i,3, {
      l = 3
      Hex2Bin(d,"ffffff")
   }
   Else IfEqual i,4, {
      l = 4
      d = ....
   }
   Else IfEqual i,5, {
      l = 5
      Hex2Bin(d,"123456789A")
   }
   Else IfEqual i,6, {
      l = 6
      Hex2Bin(d,"000000000001")
   }
   Else IfEqual i,7, {
      l = 269
      d =
( Join
Man is distinguished, not only by his reason, but by this singular passion from other animals,
 which is a lust of the mind, that by a perseverance of delight in the continued and
 indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.
)
   }
}

code(i) {
   IfEqual i,1, Return      "<~/c~>"
   Else IfEqual i,2, Return "<~/hR~>"
   Else IfEqual i,3, Return "<~s8W*~>"
   Else IfEqual i,4, Return "<~/hSb/~>"
   Else IfEqual i,5, Return "<~&I<X6RK~>"
   Else IfEqual i,6, Return "<~z!!*~>"
   Else IfEqual i,7, Return,           ; " --> "", ` --> ``
( Join LTrim
"<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>[email protected]$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
 O<DJ+*.@<*K0@<6L(Df-\0Ec5e;DffZ(EZee.Bl.9pF""AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
 i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
 l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs``8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
 >uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>"
)
}

Bin2Hex(ByRef b, n=0)            ; n bytes binary data -> stream of 2-digit hex
{                                ; n = 0: all (SetCapacity can be larger than used!)
   format = %A_FormatInteger%    ; save original integer format
   SetFormat Integer, Hex        ; for converting bytes to hex

   m := VarSetCapacity(b)
   If n not between 1 and %m%    ; invalid length -> all allocated
       n = %m%
   Loop %n%
      h := h 256+*(&b+A_Index-1) ; concatenate  0x1xx
   StringReplace h, h, 0x1,,All  ; remove every 0x1

   SetFormat Integer, %format%   ; restore original format
   Return h
}

Hex2Bin(ByRef bin, hex) {        ; Write hex as binary to bin
   VarSetCapacity(bin, StrLen(hex)//2)
   Loop Parse, hex
      If (A_Index & 1)           ; Odd index
         x = 0x%A_LoopField%     ; 1st hex digit of a Byte
      Else
         DllCall("RtlFillMemory",UInt,&bin+A_Index//2-1, UInt,1, UChar,x A_LoopField)
}
Edit 20060713: White space in code is ignored, minor simplifications
Edit 20060715: Added comments, handle more white space, restore StringCaseSense

Laszlo
  • Moderators
  • 4713 posts
  • Last active: Mar 31 2012 03:17 AM
  • Joined: 14 Feb 2005
Updated script: Added comments, handle more white space, restore StringCaseSense

Azerty
  • Members
  • 72 posts
  • Last active: Jan 16 2009 10:08 AM
  • Joined: 19 Dec 2006
To interested people, I published an ASM written library after Lazslo's suggestion.

You may find it here.