import zlib_debug_compressor as zz import zlib import sys import itertools import random def test_dynamic(): """Simple dynamic Huffman tree compressed block""" opts = zz.Options(force_block_types=[2]) data = b"Hello Hello!" return data, zz.deflate(data, opts) def test_dynamic_no_match(): """Simple dynamic Huffman tree without matches""" opts = zz.Options(force_block_types=[2]) data = b"Hello World!" return data, zz.deflate(data, opts) def test_dynamic_empty(): """Dynamic Huffman block with a single symbol (end)""" opts = zz.Options(force_block_types=[2]) data = b"" return data, zz.deflate(data, opts) def test_dynamic_rle(): """Simple dynamic Huffman with a single repeating match""" opts = zz.Options(force_block_types=[2]) data = b"AAAAAAAAAAAAAAAAA" message = [zz.Literal(b"A"), zz.Match(16, 1)] return data, zz.compress_message(message, opts) def test_dynamic_rle_boundary(): """Simple dynamic Huffman with a single repeating match, adjusted to cross a 16 byte boundary""" opts = zz.Options(force_block_types=[2]) data = b"AAAAAAAAAAAAAAAAAAAAAAAAA" message = [zz.Literal(b"A"), zz.Match(24, 1)] return data, zz.compress_message(message, opts) def test_repeat_length(): """Dynamic Huffman compressed block with repeat lengths""" data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZZYXWVUTSRQPONMLKJIHGFEDCBA" return data, zz.deflate(data) def test_huff_lengths(): """Test all possible lit/len code lengths""" data = b"0123456789ABCDE" freq = 1 probs = { } for c in data: probs[c] = freq freq *= 2 opts = zz.Options(force_block_types=[2], override_litlen_counts=probs) return data, zz.deflate(data, opts) def test_multi_part_matches(): """Matches that refer to earlier compression blocks""" data = b"Test Part Data Data Test Data Part New Test Data" opts = zz.Options(block_size=4, force_block_types=[0,1,2,0,1,2]) return data, zz.deflate(data, opts) def create_match_distances_and_lengths_message(): lens = [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22, 23,24,25,26,27,28,29,30,31,32,33,34,35,39,42,43,48,50,51, 55,58,59,63,66,67,70,82,83,90,98,99,105,114,115,120,130, 131,140,150,162,163,170,180,194,195,200,210,226,227,230, 240,250,257,258] dists = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20,24,25,28, 32,33,40,48,49,50,64,65,75,96,97,110,128,129,160,192, 193,230,256,257,330,384,385,400,512,513,600,768,769, 900,1024,1025,1250,1536,1537,1800,2048,2049,2500,3072, 3073,3500,4096,4097,5000,6144,6145,7000,8192,8193,10000, 12288,12289,14000,16384,16385,20000,24576,24577,25000, 26000, 27000, 28000, 29000, 30000, 31000, 32768, 32768+300] message = [] l_iter = itertools.chain(lens, itertools.repeat(lens[-1])) lit_iter = itertools.cycle(range(0,256)) pos = 0 prev_d = 1 for d in dists: while pos < d: l = next(l_iter) pos += l message.append(zz.Literal(bytes([next(lit_iter), next(lit_iter)]))) message.append(zz.Match(l, prev_d)) prev_d = d return message def test_static_distances_and_lengths(): """Test all possible match length and distance buckets (Static)""" message = create_match_distances_and_lengths_message() opts = zz.Options(block_size=4294967296, force_block_types=[1]) data = zz.decode(message) return data, zz.compress_message(message, opts) def test_dynamic_distances_and_lengths(): """Test all possible match length and distance buckets (Dynamic)""" message = create_match_distances_and_lengths_message() opts = zz.Options(block_size=4294967296, force_block_types=[2]) data = zz.decode(message) return data, zz.compress_message(message, opts) def test_long_codes(): """Test longest possible bit-codes for symbols""" message = [zz.Literal(b"test")] pos = 0 matches = [(140,10000),(180,14000),(210,20000),(230,30000)] while pos < 30000: message.append(zz.Match(258, 4)) next_pos = pos + 258 for l,o in matches: if pos < o and next_pos >= o: for n in range(5): for m in range(n - 1): message.append(zz.Literal(bytes([ord("A") + m]))) message.append(zz.Match(l, o)) next_pos += l l += 1 pos = next_pos ll_override = { } count = 1000000000 for ll in itertools.chain([285], b"Test", range(260,284)): ll_override[ll] = count count /= 2 dist_override = { } count = 1000000000 for dist in itertools.chain([3], range(10,28)): dist_override[dist] = count count /= 2 opts = zz.Options(block_size=4294967296, force_block_types=[2], override_litlen_counts=ll_override, override_dist_counts=dist_override) data = zz.decode(message) return data, zz.compress_message(message, opts) def test_long_code_sequences(): """Test sequences of long codes with N bit symbols""" messages = [] # Generate random prefix random.seed(1) total_message = [] message = [] data = bytes(random.choices(range(ord("0"), ord("4")), k=300)) message.append(zz.Literal(data)) message_len = 300 while message_len <= 24000: dist = min(random.randrange(256, 1024), message_len - 200) message.append(zz.Match(200, dist)) data = bytes(random.choices(range(ord("0"), ord("4")), k=10)) message.append(zz.Literal(data)) message_len += 210 opts = zz.Options(force_block_types=[2]) messages += [message, opts] total_message += message # Generate matches with increasing bit counts for ll_bits in range(2, 15+1): for dist_bits in [ll_bits, 15]: message = [] ll_override = { } dist_override = { } for n in range(ll_bits - 3): ll_override[n] = 2**(32-n) for n in range(dist_bits - 1): dist_override[n] = 2**(32-n) for ll in [256, 284, ord("A"), ord("B"), ord("C"), ord("D"), ord("E"), ord("F")]: ll_override[ll] = 2**8 dist_override[29] = 2**8 match_len = random.randrange(230, 250) match_dist = random.randrange(17000, 24000) message.append(zz.Match(match_len, match_dist)) for lits in range(0, 8): if lits: message.append(zz.Literal(bytes(random.choices(b"ABCDEF", k=lits)))) match_len = random.randrange(230, 250) match_dist = random.randrange(17000, 24000) message.append(zz.Match(match_len, match_dist)) opts = zz.Options(force_block_types=[2], override_litlen_counts=ll_override, override_dist_counts=dist_override) messages += [message, opts] total_message += message data = zz.decode(total_message) return data, zz.compress_message(*messages) def test_two_symbol_bits(): """Test some combinations of bit lengths for two symbols""" messages = [] data = b"" for lo in range(2, 16): for hi in range(lo, min(lo + 6, 16)): delta = hi - lo ll_override = { } ll_override[256] = 64**16 for n in range(lo): ll_override[96 + n] = 8**(16-n) ll_override[ord("A")] = 8**(16-lo) for n in range(2**delta): assert n < 64 ll_override[n] = 8**(16-hi) ll_override[ord("B")] = 8**(16-hi) message = [zz.Literal(b"AB")] data += b"AB" opts = zz.Options(force_block_types=[2], override_litlen_counts=ll_override) messages += [message, opts] return data, zz.compress_message(*messages) def test_fail_codelen_16_overflow(): """Test oveflow of codelen symbol 16""" data = b"\xfd\xfe\xff" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Patch Litlen 254-256 repeat extra N to 4 buf.patch(0x66, 1, 2) return data, buf def test_fail_codelen_17_overflow(): """Test oveflow of codelen symbol 17""" data = b"\xfc" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Patch Litlen 254-256 zero extra N to 5 buf.patch(0x6c, 2, 3) return data, buf def test_fail_codelen_18_overflow(): """Test oveflow of codelen symbol 18""" data = b"\xf4" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Patch Litlen 254-256 extra N to 13 buf.patch(0x6a, 2, 7) return data, buf def test_fail_codelen_overfull(): """Test bad codelen Huffman tree with too many symbols""" data = b"Codelen" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Over-filled Huffman tree buf.patch(0x30, 1, 3) return data, buf def test_fail_codelen_underfull(): """Test bad codelen Huffman tree too few symbols""" data = b"Codelen" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Under-filled Huffman tree buf.patch(0x4e, 5, 3) return data, buf def test_fail_litlen_bad_huffman(): """Test bad lit/len Huffman tree""" data = b"Literal/Length codes" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Under-filled Huffman tree buf.patch(0x6d, 1, 2) return data, buf def test_fail_distance_bad_huffman(): """Test bad distance Huffman tree""" data = b"Dist Dist .. Dist" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Under-filled Huffman tree buf.patch(0xb1, 0b1111, 4) return data, buf def test_fail_bad_distance(): """Test bad distance symbol (30..31)""" data = b"Dist Dist" opts = zz.Options(force_block_types=[1]) buf = zz.deflate(data, opts) # Distance symbol 30 buf.patch(0x42, 0b01111, 5) return data, buf def test_fail_bad_static_litlen(): """Test bad static lit/length (286..287)""" data = b"A" opts = zz.Options(force_block_types=[1]) buf = zz.deflate(data, opts) buf.patch(19, 0b01100011, 8, "Invalid symbol 285") return data, buf def test_fail_distance_too_far(): """Test with distance too far to the output""" opts = zz.Options(force_block_types=[1], no_decode=True) message = [zz.Literal(b"A"), zz.Match(4, 2)] buf = zz.compress_message(message, opts) return b"", buf def test_fail_bad_distance_bit(): """Test bad distance symbol in one symbol alphabet""" data = b"asd asd" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Distance code 1 buf.patch(0xaa, 0b1, 1) return data, buf def test_fail_bad_distance_empty(): """Test using distance code from an empty tree""" data = b"asd asd" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Add another distance code and replace distance 3 code for 1 (0111) # with the code for 0 (00) for distances 3 and 4 buf.patch(0x18, 4, 5) buf.patch(0x98, 0b0000, 4) return data, buf def test_fail_bad_lit_length(): """Test bad lit/length symbol""" data = b"" opts = zz.Options(force_block_types=[2]) buf = zz.deflate(data, opts) # Patch end-of-block 0 to 1 buf.patch(0x6b, 0b1, 1) return data, buf def test_fail_no_litlen_codes(): """Test lit/len table with no codes""" data = b"" probs = { n: 0 for n in range(286) } opts = zz.Options(force_block_types=[2], override_litlen_counts=probs, invalid_sym=zz.Code(0, 1)) buf = zz.deflate(data, opts) return data, buf def test_fail_no_dist_codes(): """Test distance table with no codes""" probs = { n: 0 for n in range(30) } opts = zz.Options(force_block_types=[2], override_dist_counts=probs, invalid_sym=zz.Code(0, 1)) message = [zz.Literal(b"A"), zz.Match(4, 1)] buf = zz.compress_message(message, opts) return data, buf def fmt_bytes(data, cols=20): lines = [] for begin in range(0, len(data), cols): chunk = data[begin:begin+cols] lines.append("\"" + "".join("\\x%02x" % c for c in chunk) + "\"") return "\n".join(lines) def fnv1a(data): h = 0x811c9dc5 for d in data: h = ((h ^ (d&0xff)) * 0x01000193) & 0xffffffff return h test_cases = [ test_dynamic, test_dynamic_no_match, test_dynamic_empty, test_dynamic_rle, test_dynamic_rle, test_repeat_length, test_huff_lengths, test_multi_part_matches, test_static_distances_and_lengths, test_dynamic_distances_and_lengths, test_long_codes, test_long_code_sequences, test_two_symbol_bits, ] good = True for case in test_cases: try: data, buf = case() result = zlib.decompress(buf.to_bytes()) if data != result: raise ValueError("Round trip failed") print("{}: OK".format(case.__name__)) except Exception as e: print("{}: FAIL ({})".format(case.__name__, e)) good = False sys.exit(0 if good else 1)