prefix compressor
Community Forums/Showcase/prefix compressor
| ||
| this is a prefix compressor i've been working on. It works by switching characters in a file with prefix codes of varying lengths, some shorter than the character and some longer. If you've come across things like "fibinacci encoders/compressors", this one is a little different because it optimizes the prefix codes a bit (or 2, *slaps knee*). Use it for whatever you want. If it's in a project you don't need to include any credits, but if you want to share the source code somewhere else try to include my name ;) -edit: forgot to say: This should only use ~1-2mb of memory (never bothered to check) regardless of the file. It is also fast (though i haven't benchmarked that either)
;******************************************************************************************************************************
;************************************************* Prefix compression *********************************************************
;******************************************************************************************************************************
;by Alec Dee
;this compression method swaps bytes with prefix codes of lengths 2 Or greater. The prefix codes are generated by
;fibinacci numbers with some improvement To the original codes, For example:
;Before After
;11 11
;011 011
;0011 001
;1011 101
;00011 000
;10011 100
;01011 010
;the Last "block" of prefixes can have the Last two 1's taken away And the second To Last block can have the Last
;1 taken away, For shorter prefixes.
;this compressor also uses order-1 modeling. This changes the frequency of characters based on the last character
;read, english text usually has big compression improvements with this method
Dim counts(0,0)
Dim places(0,0)
Dim replaces(0,0)
Dim fibs(0)
Dim prefix_codes(0)
Dim prefix_lengths(0)
Function prefix(file$,newfile$)
in=ReadFile(file)
If in=0
RuntimeError file+" cannot be read"
EndIf
out=WriteFile(newfile)
If out=0
RuntimeError newfile+" cannot be written"
EndIf
size=FileSize(file)
Dim counts(255,255)
Dim places(255,255)
Dim replaces(255,255)
Dim fibs(13)
;holds the binary string of prefix codes
Dim prefix_codes(255)
;holds the lengths of the prefix codes
Dim prefix_lengths(255)
;these fibinacci numbers are going to be used to compute the prefix codes
fibs(0)=1
fibs(1)=2
fibs(2)=3
fibs(3)=5
fibs(4)=8
fibs(5)=13
fibs(6)=21
fibs(7)=34
fibs(8)=55
fibs(9)=89
fibs(10)=144
fibs(11)=233
fibs(12)=377
oldpos=0
prepos=0
bit=0
;compute the prefix's
For i=0 To 255
bit=i+1
If fibs(oldpos+1)=bit
oldpos=oldpos+1
EndIf
prefix_lengths(i)=oldpos+1
prepos=oldpos
prefix_codes(i)=1
While prepos>-1
If fibs(prepos)<=bit
prefix_codes(i)=prefix_codes(i)+(1 Shl (prefix_lengths(i)-prepos))
bit=bit-fibs(prepos)
EndIf
prepos=prepos-1
Wend
Next
;remove unecessary bits from the end prefix's
For i=0 To 255
move=prefix_lengths(i)-oldpos+1
If move>0
prefix_lengths(i)=prefix_lengths(i)-move
prefix_codes(i)=prefix_codes(i) Shr move
EndIf
Next
last_byte=0
buffer=0
bits_left=7
max_frequency=16382 ;when one of the counts(x,x) crosses this it's values get halved, if you change this value make sure to
;change it for unprefix
;set the default values for count, and lookup tables
For i=0 To 255
For t=0 To 255
counts(i,t)=0
places(i,t)=t
replaces(i,t)=t
Next
Next
;read each bit of the file
For i=1 To size
c=ReadByte(in)
counts(last_byte,c)=counts(last_byte,c)+32
If counts(last_byte,c)>max_frequency
For t=0 To 255
counts(last_byte,t)=counts(last_byte,t) Shr 1
Next
EndIf
;find the character's place (rank according to other counts)
place=replaces(last_byte,c)
;output the prefix code for that place, which is hopefully has a length < 8
For t=prefix_lengths(place) To 0 Step -1
buffer=buffer Or (((prefix_codes(place) Shr t) And 1) Shl bits_left)
bits_left=bits_left-1
;if the output buffer gets filled, output the buffer and reset it
If bits_left=-1
WriteByte out,buffer
bits_left=7
buffer=0
EndIf
Next
;sort the character based on it's neighbors
If place>0
While counts(last_byte,places(last_byte,place-1))<counts(last_byte,places(last_byte,place))
oldplace=places(last_byte,place-1)
places(last_byte,place-1)=places(last_byte,place)
places(last_byte,place)=oldplace
replaces(last_byte,c)=replaces(last_byte,c)-1
replaces(last_byte,oldplace)=replaces(last_byte,oldplace)+1
place=place-1
If place=0
Exit
EndIf
Wend
EndIf
;record this byte, this is what's needed for the order-1 effect
last_byte=c
Next
;the buffer wasn't completely filled, but has imformation anyway, so output it
If bits_left<>7
WriteByte out,buffer
EndIf
Dim counts(0,0)
Dim places(0,0)
Dim replaces(0,0)
Dim fibs(0)
Dim prefix_codes(0)
Dim prefix_lengths(0)
CloseFile(in)
CloseFile(out)
End Function
;this decompresses the prefix encoded files
;the prefix codes aren't used in this one, because they aren't needed. Prefix values can be calculated instead just using
;the fibinacci numbers
Function unprefix(file$,newfile$)
in=ReadFile(file)
If in=0
RuntimeError file+" cannot be read"
EndIf
out=WriteFile(newfile)
If out=0
RuntimeError newfile+" cannot be written"
EndIf
size=FileSize(file)
Dim counts(255,255)
Dim places(255,255)
Dim replaces(255,255)
Dim fibs(12)
fibs(0)=1
fibs(1)=2
fibs(2)=3
fibs(3)=5
fibs(4)=8
fibs(5)=13
fibs(6)=21
fibs(7)=34
fibs(8)=55
fibs(9)=89
fibs(10)=144
fibs(11)=233
fibs(12)=377
last_byte=0
bitval=0
prepos=0
thisbit=0
lastbit=0
madesymbol=0
max_frequency=16382
For i=0 To 255
For t=0 To 255
counts(i,t)=0
places(i,t)=t
replaces(i,t)=t
Next
Next
For i=1 To size
c=ReadByte(in)
For mask=7 To 0 Step -1
thisbit=(c Shr mask) And 1
If lastbit And thisbit
madesymbol=1
ElseIf prepos=10
If thisbit
bitval=bitval+144
Else
bitval=bitval+233
EndIf
madesymbol=1
Else
bitval=bitval+thisbit*fibs(prepos)
prepos=prepos+1
lastbit=thisbit
EndIf
If madesymbol
bitval=bitval-1
symbol=places(last_byte,bitval)
WriteByte out,symbol
counts(last_byte,symbol)=counts(last_byte,symbol)+32
If counts(last_byte,symbol)>max_frequency
For t=0 To 255
counts(last_byte,t)=counts(last_byte,t) Shr 1
Next
EndIf
place=bitval
If place>0
While counts(last_byte,places(last_byte,place-1))<counts(last_byte,places(last_byte,place))
oldplace=places(last_byte,place-1)
places(last_byte,place-1)=places(last_byte,place)
places(last_byte,place)=oldplace
replaces(last_byte,symbol)=replaces(last_byte,c)-1
replaces(last_byte,oldplace)=replaces(last_byte,oldplace)+1
place=place-1
If place=0
Exit
EndIf
Wend
EndIf
last_byte=symbol
bitval=0
prepos=0
madesymbol=0
lastbit=0
EndIf
Next
Next
Dim counts(0,0)
Dim places(0,0)
Dim replaces(0,0)
Dim fibs(0)
CloseFile(in)
CloseFile(out)
End Function
;prefix("agog.bmp","alec.txt")
;unprefix("alec.txt","agog2.bmp")
;sometimes this compressor doesn't compress everything completely, try compressing more than once
;prefix("agog.bmp","alec1.txt")
;prefix("alec1.txt","alec2.txt")
;unprefix("alec2.txt","alec3.txt")
;unprefix("alec3.txt","agog2.bmp")
|