Files
coven/modules/ufbx/misc/test_zlib_debug_compressor.py

434 lines
13 KiB
Python

import zlib_debug_compressor as zz
import zlib
import sys
import itertools
import random
def test_dynamic():
"""Simple dynamic Huffman tree compressed block"""
opts = zz.Options(force_block_types=[2])
data = b"Hello Hello!"
return data, zz.deflate(data, opts)
def test_dynamic_no_match():
"""Simple dynamic Huffman tree without matches"""
opts = zz.Options(force_block_types=[2])
data = b"Hello World!"
return data, zz.deflate(data, opts)
def test_dynamic_empty():
"""Dynamic Huffman block with a single symbol (end)"""
opts = zz.Options(force_block_types=[2])
data = b""
return data, zz.deflate(data, opts)
def test_dynamic_rle():
"""Simple dynamic Huffman with a single repeating match"""
opts = zz.Options(force_block_types=[2])
data = b"AAAAAAAAAAAAAAAAA"
message = [zz.Literal(b"A"), zz.Match(16, 1)]
return data, zz.compress_message(message, opts)
def test_dynamic_rle_boundary():
"""Simple dynamic Huffman with a single repeating match, adjusted to cross a 16 byte boundary"""
opts = zz.Options(force_block_types=[2])
data = b"AAAAAAAAAAAAAAAAAAAAAAAAA"
message = [zz.Literal(b"A"), zz.Match(24, 1)]
return data, zz.compress_message(message, opts)
def test_repeat_length():
"""Dynamic Huffman compressed block with repeat lengths"""
data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZZYXWVUTSRQPONMLKJIHGFEDCBA"
return data, zz.deflate(data)
def test_huff_lengths():
"""Test all possible lit/len code lengths"""
data = b"0123456789ABCDE"
freq = 1
probs = { }
for c in data:
probs[c] = freq
freq *= 2
opts = zz.Options(force_block_types=[2], override_litlen_counts=probs)
return data, zz.deflate(data, opts)
def test_multi_part_matches():
"""Matches that refer to earlier compression blocks"""
data = b"Test Part Data Data Test Data Part New Test Data"
opts = zz.Options(block_size=4, force_block_types=[0,1,2,0,1,2])
return data, zz.deflate(data, opts)
def create_match_distances_and_lengths_message():
lens = [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
23,24,25,26,27,28,29,30,31,32,33,34,35,39,42,43,48,50,51,
55,58,59,63,66,67,70,82,83,90,98,99,105,114,115,120,130,
131,140,150,162,163,170,180,194,195,200,210,226,227,230,
240,250,257,258]
dists = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20,24,25,28,
32,33,40,48,49,50,64,65,75,96,97,110,128,129,160,192,
193,230,256,257,330,384,385,400,512,513,600,768,769,
900,1024,1025,1250,1536,1537,1800,2048,2049,2500,3072,
3073,3500,4096,4097,5000,6144,6145,7000,8192,8193,10000,
12288,12289,14000,16384,16385,20000,24576,24577,25000,
26000, 27000, 28000, 29000, 30000, 31000, 32768, 32768+300]
message = []
l_iter = itertools.chain(lens, itertools.repeat(lens[-1]))
lit_iter = itertools.cycle(range(0,256))
pos = 0
prev_d = 1
for d in dists:
while pos < d:
l = next(l_iter)
pos += l
message.append(zz.Literal(bytes([next(lit_iter), next(lit_iter)])))
message.append(zz.Match(l, prev_d))
prev_d = d
return message
def test_static_distances_and_lengths():
"""Test all possible match length and distance buckets (Static)"""
message = create_match_distances_and_lengths_message()
opts = zz.Options(block_size=4294967296, force_block_types=[1])
data = zz.decode(message)
return data, zz.compress_message(message, opts)
def test_dynamic_distances_and_lengths():
"""Test all possible match length and distance buckets (Dynamic)"""
message = create_match_distances_and_lengths_message()
opts = zz.Options(block_size=4294967296, force_block_types=[2])
data = zz.decode(message)
return data, zz.compress_message(message, opts)
def test_long_codes():
"""Test longest possible bit-codes for symbols"""
message = [zz.Literal(b"test")]
pos = 0
matches = [(140,10000),(180,14000),(210,20000),(230,30000)]
while pos < 30000:
message.append(zz.Match(258, 4))
next_pos = pos + 258
for l,o in matches:
if pos < o and next_pos >= o:
for n in range(5):
for m in range(n - 1):
message.append(zz.Literal(bytes([ord("A") + m])))
message.append(zz.Match(l, o))
next_pos += l
l += 1
pos = next_pos
ll_override = { }
count = 1000000000
for ll in itertools.chain([285], b"Test", range(260,284)):
ll_override[ll] = count
count /= 2
dist_override = { }
count = 1000000000
for dist in itertools.chain([3], range(10,28)):
dist_override[dist] = count
count /= 2
opts = zz.Options(block_size=4294967296, force_block_types=[2],
override_litlen_counts=ll_override,
override_dist_counts=dist_override)
data = zz.decode(message)
return data, zz.compress_message(message, opts)
def test_long_code_sequences():
"""Test sequences of long codes with N bit symbols"""
messages = []
# Generate random prefix
random.seed(1)
total_message = []
message = []
data = bytes(random.choices(range(ord("0"), ord("4")), k=300))
message.append(zz.Literal(data))
message_len = 300
while message_len <= 24000:
dist = min(random.randrange(256, 1024), message_len - 200)
message.append(zz.Match(200, dist))
data = bytes(random.choices(range(ord("0"), ord("4")), k=10))
message.append(zz.Literal(data))
message_len += 210
opts = zz.Options(force_block_types=[2])
messages += [message, opts]
total_message += message
# Generate matches with increasing bit counts
for ll_bits in range(2, 15+1):
for dist_bits in [ll_bits, 15]:
message = []
ll_override = { }
dist_override = { }
for n in range(ll_bits - 3):
ll_override[n] = 2**(32-n)
for n in range(dist_bits - 1):
dist_override[n] = 2**(32-n)
for ll in [256, 284, ord("A"), ord("B"), ord("C"), ord("D"), ord("E"), ord("F")]:
ll_override[ll] = 2**8
dist_override[29] = 2**8
match_len = random.randrange(230, 250)
match_dist = random.randrange(17000, 24000)
message.append(zz.Match(match_len, match_dist))
for lits in range(0, 8):
if lits:
message.append(zz.Literal(bytes(random.choices(b"ABCDEF", k=lits))))
match_len = random.randrange(230, 250)
match_dist = random.randrange(17000, 24000)
message.append(zz.Match(match_len, match_dist))
opts = zz.Options(force_block_types=[2],
override_litlen_counts=ll_override,
override_dist_counts=dist_override)
messages += [message, opts]
total_message += message
data = zz.decode(total_message)
return data, zz.compress_message(*messages)
def test_two_symbol_bits():
"""Test some combinations of bit lengths for two symbols"""
messages = []
data = b""
for lo in range(2, 16):
for hi in range(lo, min(lo + 6, 16)):
delta = hi - lo
ll_override = { }
ll_override[256] = 64**16
for n in range(lo):
ll_override[96 + n] = 8**(16-n)
ll_override[ord("A")] = 8**(16-lo)
for n in range(2**delta):
assert n < 64
ll_override[n] = 8**(16-hi)
ll_override[ord("B")] = 8**(16-hi)
message = [zz.Literal(b"AB")]
data += b"AB"
opts = zz.Options(force_block_types=[2],
override_litlen_counts=ll_override)
messages += [message, opts]
return data, zz.compress_message(*messages)
def test_fail_codelen_16_overflow():
"""Test oveflow of codelen symbol 16"""
data = b"\xfd\xfe\xff"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Patch Litlen 254-256 repeat extra N to 4
buf.patch(0x66, 1, 2)
return data, buf
def test_fail_codelen_17_overflow():
"""Test oveflow of codelen symbol 17"""
data = b"\xfc"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Patch Litlen 254-256 zero extra N to 5
buf.patch(0x6c, 2, 3)
return data, buf
def test_fail_codelen_18_overflow():
"""Test oveflow of codelen symbol 18"""
data = b"\xf4"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Patch Litlen 254-256 extra N to 13
buf.patch(0x6a, 2, 7)
return data, buf
def test_fail_codelen_overfull():
"""Test bad codelen Huffman tree with too many symbols"""
data = b"Codelen"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Over-filled Huffman tree
buf.patch(0x30, 1, 3)
return data, buf
def test_fail_codelen_underfull():
"""Test bad codelen Huffman tree too few symbols"""
data = b"Codelen"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Under-filled Huffman tree
buf.patch(0x4e, 5, 3)
return data, buf
def test_fail_litlen_bad_huffman():
"""Test bad lit/len Huffman tree"""
data = b"Literal/Length codes"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Under-filled Huffman tree
buf.patch(0x6d, 1, 2)
return data, buf
def test_fail_distance_bad_huffman():
"""Test bad distance Huffman tree"""
data = b"Dist Dist .. Dist"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Under-filled Huffman tree
buf.patch(0xb1, 0b1111, 4)
return data, buf
def test_fail_bad_distance():
"""Test bad distance symbol (30..31)"""
data = b"Dist Dist"
opts = zz.Options(force_block_types=[1])
buf = zz.deflate(data, opts)
# Distance symbol 30
buf.patch(0x42, 0b01111, 5)
return data, buf
def test_fail_bad_static_litlen():
"""Test bad static lit/length (286..287)"""
data = b"A"
opts = zz.Options(force_block_types=[1])
buf = zz.deflate(data, opts)
buf.patch(19, 0b01100011, 8, "Invalid symbol 285")
return data, buf
def test_fail_distance_too_far():
"""Test with distance too far to the output"""
opts = zz.Options(force_block_types=[1], no_decode=True)
message = [zz.Literal(b"A"), zz.Match(4, 2)]
buf = zz.compress_message(message, opts)
return b"", buf
def test_fail_bad_distance_bit():
"""Test bad distance symbol in one symbol alphabet"""
data = b"asd asd"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Distance code 1
buf.patch(0xaa, 0b1, 1)
return data, buf
def test_fail_bad_distance_empty():
"""Test using distance code from an empty tree"""
data = b"asd asd"
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Add another distance code and replace distance 3 code for 1 (0111)
# with the code for 0 (00) for distances 3 and 4
buf.patch(0x18, 4, 5)
buf.patch(0x98, 0b0000, 4)
return data, buf
def test_fail_bad_lit_length():
"""Test bad lit/length symbol"""
data = b""
opts = zz.Options(force_block_types=[2])
buf = zz.deflate(data, opts)
# Patch end-of-block 0 to 1
buf.patch(0x6b, 0b1, 1)
return data, buf
def test_fail_no_litlen_codes():
"""Test lit/len table with no codes"""
data = b""
probs = { n: 0 for n in range(286) }
opts = zz.Options(force_block_types=[2], override_litlen_counts=probs, invalid_sym=zz.Code(0, 1))
buf = zz.deflate(data, opts)
return data, buf
def test_fail_no_dist_codes():
"""Test distance table with no codes"""
probs = { n: 0 for n in range(30) }
opts = zz.Options(force_block_types=[2], override_dist_counts=probs, invalid_sym=zz.Code(0, 1))
message = [zz.Literal(b"A"), zz.Match(4, 1)]
buf = zz.compress_message(message, opts)
return data, buf
def fmt_bytes(data, cols=20):
lines = []
for begin in range(0, len(data), cols):
chunk = data[begin:begin+cols]
lines.append("\"" + "".join("\\x%02x" % c for c in chunk) + "\"")
return "\n".join(lines)
def fnv1a(data):
h = 0x811c9dc5
for d in data:
h = ((h ^ (d&0xff)) * 0x01000193) & 0xffffffff
return h
test_cases = [
test_dynamic,
test_dynamic_no_match,
test_dynamic_empty,
test_dynamic_rle,
test_dynamic_rle,
test_repeat_length,
test_huff_lengths,
test_multi_part_matches,
test_static_distances_and_lengths,
test_dynamic_distances_and_lengths,
test_long_codes,
test_long_code_sequences,
test_two_symbol_bits,
]
good = True
for case in test_cases:
try:
data, buf = case()
result = zlib.decompress(buf.to_bytes())
if data != result:
raise ValueError("Round trip failed")
print("{}: OK".format(case.__name__))
except Exception as e:
print("{}: FAIL ({})".format(case.__name__, e))
good = False
sys.exit(0 if good else 1)