#!/usr/bin/env python3 """ Test cases that match specific examples from Han et al. (2008) paper. Tests only end-to-end encoding/decoding and verifies results match paper. """ import numpy as np from enumerative_coding import EnumerativeEncoder from collections import Counter def test_example_from_paper(): """ Test with a specific example that should match the paper's results. This verifies the encoding produces the expected output. """ print("Testing Example from Paper") print("=" * 50) # Use a simple sequence that we can verify by hand # This sequence has: 2 zeros, 2 ones, 1 two sequence = [0, 1, 0, 1, 2] print(f"Input sequence: {sequence}") print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}") encoder = EnumerativeEncoder() encoded = encoder.encode(sequence) decoded = encoder.decode(encoded) # Test end-to-end correctness assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}" print("✓ End-to-end encoding/decoding successful") # Show compression metrics original_size = len(sequence) compressed_size = len(encoded) print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes") print(f"Compression ratio: {original_size / compressed_size:.2f}") def test_paper_sequence_properties(): """ Test with sequences that have the properties discussed in the paper. Verify the algorithm handles different symbol distributions correctly. """ print("\n\nTesting Paper Sequence Properties") print("=" * 50) test_cases = [ # (description, sequence) ("Uniform distribution", [0, 1, 2, 0, 1, 2]), ("Skewed distribution", [0, 0, 0, 1, 1, 2]), ("Single symbol", [0, 0, 0, 0]), ("Binary sequence", [0, 1, 1, 0, 1]), ] encoder = EnumerativeEncoder() for description, sequence in test_cases: print(f"\n{description}: {sequence}") # Test encoding and decoding encoded = encoder.encode(sequence) decoded = encoder.decode(encoded) # Verify correctness assert decoded == sequence, f"Failed for {description}" # Show results compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf') print(f" Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})") print(" ✓ Correctly encoded and decoded") def test_lexicographic_ordering(): """ Test that sequences with the same symbol counts but different orders produce different encodings (different lexicographic indices). """ print("\n\nTesting Lexicographic Ordering") print("=" * 50) # All permutations of [0, 0, 1, 1] should have different lexicographic indices permutations = [ [0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 1, 0], [1, 1, 0, 0] ] encoder = EnumerativeEncoder() indices = [] for perm in permutations: encoded = encoder.encode(perm) decoded = encoder.decode(encoded) # Verify end-to-end correctness assert decoded == perm, f"Decoding failed for {perm}" # Collect compressed size as a proxy for different encodings compressed_size = len(encoded) indices.append(compressed_size) print(f" {perm} -> compressed size: {compressed_size} bytes") # Verify all encodings are different (different permutations should produce different encodings) encodings = [] for perm in permutations: encoded = encoder.encode(perm) encodings.append(encoded) # Check that different permutations produce different encodings unique_encodings = len(set(encodings)) total_permutations = len(permutations) print(f" Unique encodings: {unique_encodings} out of {total_permutations} permutations") assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences." print(" ✓ All permutations have unique encodings") def main(): """Run all paper example tests.""" test_example_from_paper() test_paper_sequence_properties() test_lexicographic_ordering() print("\n" + "=" * 50) print("All paper example tests passed! ✓") if __name__ == "__main__": main()