coven/modules/ufbx/misc/test_zlib_debug_compressor.py

import zlib_debug_compressor as zz
import zlib
import sys
import itertools
import random

def test_dynamic():
    """Simple dynamic Huffman tree compressed block"""
    opts = zz.Options(force_block_types=[2])
    data = b"Hello Hello!"
    return data, zz.deflate(data, opts)

def test_dynamic_no_match():
    """Simple dynamic Huffman tree without matches"""
    opts = zz.Options(force_block_types=[2])
    data = b"Hello World!"
    return data, zz.deflate(data, opts)

def test_dynamic_empty():
    """Dynamic Huffman block with a single symbol (end)"""
    opts = zz.Options(force_block_types=[2])
    data = b""
    return data, zz.deflate(data, opts)

def test_dynamic_rle():
    """Simple dynamic Huffman with a single repeating match"""
    opts = zz.Options(force_block_types=[2])
    data = b"AAAAAAAAAAAAAAAAA"
    message = [zz.Literal(b"A"), zz.Match(16, 1)]
    return data, zz.compress_message(message, opts)

def test_dynamic_rle_boundary():
    """Simple dynamic Huffman with a single repeating match, adjusted to cross a 16 byte boundary"""
    opts = zz.Options(force_block_types=[2])
    data = b"AAAAAAAAAAAAAAAAAAAAAAAAA"
    message = [zz.Literal(b"A"), zz.Match(24, 1)]
    return data, zz.compress_message(message, opts)

def test_repeat_length():
    """Dynamic Huffman compressed block with repeat lengths"""
    data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZZYXWVUTSRQPONMLKJIHGFEDCBA"
    return data, zz.deflate(data)

def test_huff_lengths():
    """Test all possible lit/len code lengths"""
    data = b"0123456789ABCDE"
    freq = 1
    probs = { }
    for c in data:
        probs[c] = freq
        freq *= 2
    opts = zz.Options(force_block_types=[2], override_litlen_counts=probs)
    return data, zz.deflate(data, opts)

def test_multi_part_matches():
    """Matches that refer to earlier compression blocks"""
    data = b"Test Part Data Data Test Data Part New Test Data"
    opts = zz.Options(block_size=4, force_block_types=[0,1,2,0,1,2])
    return data, zz.deflate(data, opts)

def create_match_distances_and_lengths_message():
    lens = [3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
            23,24,25,26,27,28,29,30,31,32,33,34,35,39,42,43,48,50,51,
            55,58,59,63,66,67,70,82,83,90,98,99,105,114,115,120,130,
            131,140,150,162,163,170,180,194,195,200,210,226,227,230,
            240,250,257,258]
    dists = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,20,24,25,28,
             32,33,40,48,49,50,64,65,75,96,97,110,128,129,160,192,
             193,230,256,257,330,384,385,400,512,513,600,768,769,
             900,1024,1025,1250,1536,1537,1800,2048,2049,2500,3072,
             3073,3500,4096,4097,5000,6144,6145,7000,8192,8193,10000,
             12288,12289,14000,16384,16385,20000,24576,24577,25000,
             26000, 27000, 28000, 29000, 30000, 31000, 32768, 32768+300]

    message = []

    l_iter = itertools.chain(lens, itertools.repeat(lens[-1]))
    lit_iter = itertools.cycle(range(0,256))
    pos = 0
    prev_d = 1
    for d in dists:
        while pos < d:
            l = next(l_iter)
            pos += l
            message.append(zz.Literal(bytes([next(lit_iter), next(lit_iter)])))
            message.append(zz.Match(l, prev_d))
        prev_d = d
    return message

def test_static_distances_and_lengths():
    """Test all possible match length and distance buckets (Static)"""
    message = create_match_distances_and_lengths_message()
    opts = zz.Options(block_size=4294967296, force_block_types=[1])
    data = zz.decode(message)
    return data, zz.compress_message(message, opts)

def test_dynamic_distances_and_lengths():
    """Test all possible match length and distance buckets (Dynamic)"""
    message = create_match_distances_and_lengths_message()
    opts = zz.Options(block_size=4294967296, force_block_types=[2])
    data = zz.decode(message)
    return data, zz.compress_message(message, opts)

def test_long_codes():
    """Test longest possible bit-codes for symbols"""
    message = [zz.Literal(b"test")]
    pos = 0
    matches = [(140,10000),(180,14000),(210,20000),(230,30000)]
    while pos < 30000:
        message.append(zz.Match(258, 4))
        next_pos = pos + 258
        for l,o in matches:
            if pos < o and next_pos >= o:
                for n in range(5):
                    for m in range(n - 1):
                        message.append(zz.Literal(bytes([ord("A") + m])))
                    message.append(zz.Match(l, o))
                    next_pos += l
                    l += 1
        pos = next_pos

    ll_override = { }
    count = 1000000000
    for ll in itertools.chain([285], b"Test", range(260,284)):
        ll_override[ll] = count
        count /= 2

    dist_override = { }
    count = 1000000000
    for dist in itertools.chain([3], range(10,28)):
        dist_override[dist] = count
        count /= 2

    opts = zz.Options(block_size=4294967296, force_block_types=[2],
                      override_litlen_counts=ll_override,
                      override_dist_counts=dist_override)
    data = zz.decode(message)
    return data, zz.compress_message(message, opts)

def test_long_code_sequences():
    """Test sequences of long codes with N bit symbols"""
    messages = []

    # Generate random prefix
    random.seed(1)

    total_message = []

    message = []

    data = bytes(random.choices(range(ord("0"), ord("4")), k=300))
    message.append(zz.Literal(data))
    message_len = 300

    while message_len <= 24000:
        dist = min(random.randrange(256, 1024), message_len - 200)
        message.append(zz.Match(200, dist))

        data = bytes(random.choices(range(ord("0"), ord("4")), k=10))
        message.append(zz.Literal(data))

        message_len += 210

    opts = zz.Options(force_block_types=[2])
    messages += [message, opts]
    total_message += message

    # Generate matches with increasing bit counts
    for ll_bits in range(2, 15+1):
        for dist_bits in [ll_bits, 15]:
            message = []

            ll_override = { }
            dist_override = { }
            for n in range(ll_bits - 3):
                ll_override[n] = 2**(32-n)
            for n in range(dist_bits - 1):
                dist_override[n] = 2**(32-n)

            for ll in [256, 284, ord("A"), ord("B"), ord("C"), ord("D"), ord("E"), ord("F")]:
                ll_override[ll] = 2**8
            dist_override[29] = 2**8

            match_len = random.randrange(230, 250)
            match_dist = random.randrange(17000, 24000)
            message.append(zz.Match(match_len, match_dist))

            for lits in range(0, 8):
                if lits:
                    message.append(zz.Literal(bytes(random.choices(b"ABCDEF", k=lits))))
                match_len = random.randrange(230, 250)
                match_dist = random.randrange(17000, 24000)
                message.append(zz.Match(match_len, match_dist))

            opts = zz.Options(force_block_types=[2],
                            override_litlen_counts=ll_override,
                            override_dist_counts=dist_override)
            messages += [message, opts]
            total_message += message


    data = zz.decode(total_message)
    return data, zz.compress_message(*messages)

def test_two_symbol_bits():
    """Test some combinations of bit lengths for two symbols"""
    messages = []
    data = b""

    for lo in range(2, 16):
        for hi in range(lo, min(lo + 6, 16)):
            delta = hi - lo

            ll_override = { }
            ll_override[256] = 64**16

            for n in range(lo):
                ll_override[96 + n] = 8**(16-n)
            ll_override[ord("A")] = 8**(16-lo)

            for n in range(2**delta):
                assert n < 64
                ll_override[n] = 8**(16-hi)
            ll_override[ord("B")] = 8**(16-hi)

            message = [zz.Literal(b"AB")]
            data += b"AB"
            opts = zz.Options(force_block_types=[2],
                            override_litlen_counts=ll_override)
            messages += [message, opts]

    return data, zz.compress_message(*messages)

def test_fail_codelen_16_overflow():
    """Test oveflow of codelen symbol 16"""
    data = b"\xfd\xfe\xff"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Patch Litlen 254-256 repeat extra N to 4
    buf.patch(0x66, 1, 2)

    return data, buf

def test_fail_codelen_17_overflow():
    """Test oveflow of codelen symbol 17"""
    data = b"\xfc"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Patch Litlen 254-256 zero extra N to 5
    buf.patch(0x6c, 2, 3)

    return data, buf

def test_fail_codelen_18_overflow():
    """Test oveflow of codelen symbol 18"""
    data = b"\xf4"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Patch Litlen 254-256 extra N to 13
    buf.patch(0x6a, 2, 7)

    return data, buf

def test_fail_codelen_overfull():
    """Test bad codelen Huffman tree with too many symbols"""
    data = b"Codelen"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Over-filled Huffman tree
    buf.patch(0x30, 1, 3)

    return data, buf

def test_fail_codelen_underfull():
    """Test bad codelen Huffman tree too few symbols"""
    data = b"Codelen"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Under-filled Huffman tree
    buf.patch(0x4e, 5, 3)

    return data, buf

def test_fail_litlen_bad_huffman():
    """Test bad lit/len Huffman tree"""
    data = b"Literal/Length codes"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Under-filled Huffman tree
    buf.patch(0x6d, 1, 2)

    return data, buf

def test_fail_distance_bad_huffman():
    """Test bad distance Huffman tree"""
    data = b"Dist Dist .. Dist"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Under-filled Huffman tree
    buf.patch(0xb1, 0b1111, 4)

    return data, buf

def test_fail_bad_distance():
    """Test bad distance symbol (30..31)"""
    data = b"Dist Dist"
    opts = zz.Options(force_block_types=[1])
    buf = zz.deflate(data, opts)

    # Distance symbol 30
    buf.patch(0x42, 0b01111, 5)

    return data, buf

def test_fail_bad_static_litlen():
    """Test bad static lit/length (286..287)"""
    data = b"A"
    opts = zz.Options(force_block_types=[1])
    buf = zz.deflate(data, opts)
    buf.patch(19, 0b01100011, 8, "Invalid symbol 285")

    return data, buf

def test_fail_distance_too_far():
    """Test with distance too far to the output"""
    opts = zz.Options(force_block_types=[1], no_decode=True)
    message = [zz.Literal(b"A"), zz.Match(4, 2)]
    buf = zz.compress_message(message, opts)

    return b"", buf

def test_fail_bad_distance_bit():
    """Test bad distance symbol in one symbol alphabet"""
    data = b"asd asd"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Distance code 1
    buf.patch(0xaa, 0b1, 1)

    return data, buf

def test_fail_bad_distance_empty():
    """Test using distance code from an empty tree"""
    data = b"asd asd"
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Add another distance code and replace distance 3 code for 1 (0111)
    # with the code for 0 (00) for distances 3 and 4
    buf.patch(0x18, 4, 5)
    buf.patch(0x98, 0b0000, 4)

    return data, buf

def test_fail_bad_lit_length():
    """Test bad lit/length symbol"""
    data = b""
    opts = zz.Options(force_block_types=[2])
    buf = zz.deflate(data, opts)

    # Patch end-of-block 0 to 1
    buf.patch(0x6b, 0b1, 1)

    return data, buf

def test_fail_no_litlen_codes():
    """Test lit/len table with no codes"""
    data = b""
    probs = { n: 0 for n in range(286) }
    opts = zz.Options(force_block_types=[2], override_litlen_counts=probs, invalid_sym=zz.Code(0, 1))
    buf = zz.deflate(data, opts)

    return data, buf

def test_fail_no_dist_codes():
    """Test distance table with no codes"""
    probs = { n: 0 for n in range(30) }
    opts = zz.Options(force_block_types=[2], override_dist_counts=probs, invalid_sym=zz.Code(0, 1))
    message = [zz.Literal(b"A"), zz.Match(4, 1)]
    buf = zz.compress_message(message, opts)

    return data, buf

def fmt_bytes(data, cols=20):
    lines = []
    for begin in range(0, len(data), cols):
        chunk = data[begin:begin+cols]
        lines.append("\"" + "".join("\\x%02x" % c for c in chunk) + "\"")
    return "\n".join(lines)

def fnv1a(data):
    h = 0x811c9dc5
    for d in data:
        h = ((h ^ (d&0xff)) * 0x01000193) & 0xffffffff
    return h

test_cases = [
    test_dynamic,
    test_dynamic_no_match,
    test_dynamic_empty,
    test_dynamic_rle,
    test_dynamic_rle,
    test_repeat_length,
    test_huff_lengths,
    test_multi_part_matches,
    test_static_distances_and_lengths,
    test_dynamic_distances_and_lengths,
    test_long_codes,
    test_long_code_sequences,
    test_two_symbol_bits,
]

good = True
for case in test_cases:
    try:
        data, buf = case()
        result = zlib.decompress(buf.to_bytes())
        if data != result:
            raise ValueError("Round trip failed")
        print("{}: OK".format(case.__name__))
    except Exception as e:
        print("{}: FAIL ({})".format(case.__name__, e))
        good = False

sys.exit(0 if good else 1)