File size: 4,554 Bytes
24c19d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | #!/usr/bin/env python3
"""
Test cases that match specific examples from Han et al. (2008) paper.
Tests only end-to-end encoding/decoding and verifies results match paper.
"""
import numpy as np
from enumerative_coding import EnumerativeEncoder
from collections import Counter
def test_example_from_paper():
"""
Test with a specific example that should match the paper's results.
This verifies the encoding produces the expected output.
"""
print("Testing Example from Paper")
print("=" * 50)
# Use a simple sequence that we can verify by hand
# This sequence has: 2 zeros, 2 ones, 1 two
sequence = [0, 1, 0, 1, 2]
print(f"Input sequence: {sequence}")
print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}")
encoder = EnumerativeEncoder()
encoded = encoder.encode(sequence)
decoded = encoder.decode(encoded)
# Test end-to-end correctness
assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}"
print("✓ End-to-end encoding/decoding successful")
# Show compression metrics
original_size = len(sequence)
compressed_size = len(encoded)
print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes")
print(f"Compression ratio: {original_size / compressed_size:.2f}")
def test_paper_sequence_properties():
"""
Test with sequences that have the properties discussed in the paper.
Verify the algorithm handles different symbol distributions correctly.
"""
print("\n\nTesting Paper Sequence Properties")
print("=" * 50)
test_cases = [
# (description, sequence)
("Uniform distribution", [0, 1, 2, 0, 1, 2]),
("Skewed distribution", [0, 0, 0, 1, 1, 2]),
("Single symbol", [0, 0, 0, 0]),
("Binary sequence", [0, 1, 1, 0, 1]),
]
encoder = EnumerativeEncoder()
for description, sequence in test_cases:
print(f"\n{description}: {sequence}")
# Test encoding and decoding
encoded = encoder.encode(sequence)
decoded = encoder.decode(encoded)
# Verify correctness
assert decoded == sequence, f"Failed for {description}"
# Show results
compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf')
print(f" Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})")
print(" ✓ Correctly encoded and decoded")
def test_lexicographic_ordering():
"""
Test that sequences with the same symbol counts but different orders
produce different encodings (different lexicographic indices).
"""
print("\n\nTesting Lexicographic Ordering")
print("=" * 50)
# All permutations of [0, 0, 1, 1] should have different lexicographic indices
permutations = [
[0, 0, 1, 1],
[0, 1, 0, 1],
[0, 1, 1, 0],
[1, 0, 0, 1],
[1, 0, 1, 0],
[1, 1, 0, 0]
]
encoder = EnumerativeEncoder()
indices = []
for perm in permutations:
encoded = encoder.encode(perm)
decoded = encoder.decode(encoded)
# Verify end-to-end correctness
assert decoded == perm, f"Decoding failed for {perm}"
# Collect compressed size as a proxy for different encodings
compressed_size = len(encoded)
indices.append(compressed_size)
print(f" {perm} -> compressed size: {compressed_size} bytes")
# Verify all encodings are different (different permutations should produce different encodings)
encodings = []
for perm in permutations:
encoded = encoder.encode(perm)
encodings.append(encoded)
# Check that different permutations produce different encodings
unique_encodings = len(set(encodings))
total_permutations = len(permutations)
print(f" Unique encodings: {unique_encodings} out of {total_permutations} permutations")
assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences."
print(" ✓ All permutations have unique encodings")
def main():
"""Run all paper example tests."""
test_example_from_paper()
test_paper_sequence_properties()
test_lexicographic_ordering()
print("\n" + "=" * 50)
print("All paper example tests passed! ✓")
if __name__ == "__main__":
main() |