File size: 4,554 Bytes
24c19d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
"""
Test cases that match specific examples from Han et al. (2008) paper.
Tests only end-to-end encoding/decoding and verifies results match paper.
"""

import numpy as np
from enumerative_coding import EnumerativeEncoder
from collections import Counter


def test_example_from_paper():
    """
    Test with a specific example that should match the paper's results.
    This verifies the encoding produces the expected output.
    """
    print("Testing Example from Paper")
    print("=" * 50)
    
    # Use a simple sequence that we can verify by hand
    # This sequence has: 2 zeros, 2 ones, 1 two
    sequence = [0, 1, 0, 1, 2]
    
    print(f"Input sequence: {sequence}")
    print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}")
    
    encoder = EnumerativeEncoder()
    encoded = encoder.encode(sequence)
    decoded = encoder.decode(encoded)
    
    # Test end-to-end correctness
    assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}"
    print("✓ End-to-end encoding/decoding successful")
    
    # Show compression metrics
    original_size = len(sequence)
    compressed_size = len(encoded)
    print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes")
    print(f"Compression ratio: {original_size / compressed_size:.2f}")


def test_paper_sequence_properties():
    """
    Test with sequences that have the properties discussed in the paper.
    Verify the algorithm handles different symbol distributions correctly.
    """
    print("\n\nTesting Paper Sequence Properties")
    print("=" * 50)
    
    test_cases = [
        # (description, sequence)
        ("Uniform distribution", [0, 1, 2, 0, 1, 2]),
        ("Skewed distribution", [0, 0, 0, 1, 1, 2]),
        ("Single symbol", [0, 0, 0, 0]),
        ("Binary sequence", [0, 1, 1, 0, 1]),
    ]
    
    encoder = EnumerativeEncoder()
    
    for description, sequence in test_cases:
        print(f"\n{description}: {sequence}")
        
        # Test encoding and decoding
        encoded = encoder.encode(sequence)
        decoded = encoder.decode(encoded)
        
        # Verify correctness
        assert decoded == sequence, f"Failed for {description}"
        
        # Show results
        compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf')
        print(f"  Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})")
        print("  ✓ Correctly encoded and decoded")


def test_lexicographic_ordering():
    """
    Test that sequences with the same symbol counts but different orders
    produce different encodings (different lexicographic indices).
    """
    print("\n\nTesting Lexicographic Ordering")
    print("=" * 50)
    
    # All permutations of [0, 0, 1, 1] should have different lexicographic indices
    permutations = [
        [0, 0, 1, 1],
        [0, 1, 0, 1], 
        [0, 1, 1, 0],
        [1, 0, 0, 1],
        [1, 0, 1, 0],
        [1, 1, 0, 0]
    ]
    
    encoder = EnumerativeEncoder()
    indices = []
    
    for perm in permutations:
        encoded = encoder.encode(perm)
        decoded = encoder.decode(encoded)
        
        # Verify end-to-end correctness
        assert decoded == perm, f"Decoding failed for {perm}"
        
        # Collect compressed size as a proxy for different encodings
        compressed_size = len(encoded)
        indices.append(compressed_size)
        print(f"  {perm} -> compressed size: {compressed_size} bytes")
    
    # Verify all encodings are different (different permutations should produce different encodings)
    encodings = []
    for perm in permutations:
        encoded = encoder.encode(perm)
        encodings.append(encoded)
    
    # Check that different permutations produce different encodings
    unique_encodings = len(set(encodings))
    total_permutations = len(permutations)
    print(f"  Unique encodings: {unique_encodings} out of {total_permutations} permutations")
    
    assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences."
    print("  ✓ All permutations have unique encodings")


def main():
    """Run all paper example tests."""
    test_example_from_paper()
    test_paper_sequence_properties()
    test_lexicographic_ordering()
    
    print("\n" + "=" * 50)
    print("All paper example tests passed! ✓")


if __name__ == "__main__":
    main()