#!/usr/bin/env python3
"""
Test cases that match specific examples from Han et al. (2008) paper.
Tests only end-to-end encoding/decoding and verifies results match paper.
"""

import numpy as np
from enumerative_coding import EnumerativeEncoder
from collections import Counter


def test_example_from_paper():
    """
    Test with a specific example that should match the paper's results.
    This verifies the encoding produces the expected output.
    """
    print("Testing Example from Paper")
    print("=" * 50)
    
    # Use a simple sequence that we can verify by hand
    # This sequence has: 2 zeros, 2 ones, 1 two
    sequence = [0, 1, 0, 1, 2]
    
    print(f"Input sequence: {sequence}")
    print(f"Symbol counts: {[sequence.count(i) for i in range(3)]}")
    
    encoder = EnumerativeEncoder()
    encoded = encoder.encode(sequence)
    decoded = encoder.decode(encoded)
    
    # Test end-to-end correctness
    assert decoded == sequence, f"Decoding failed: expected {sequence}, got {decoded}"
    print("✓ End-to-end encoding/decoding successful")
    
    # Show compression metrics
    original_size = len(sequence)
    compressed_size = len(encoded)
    print(f"Original: {original_size} symbols -> Compressed: {compressed_size} bytes")
    print(f"Compression ratio: {original_size / compressed_size:.2f}")


def test_paper_sequence_properties():
    """
    Test with sequences that have the properties discussed in the paper.
    Verify the algorithm handles different symbol distributions correctly.
    """
    print("\n\nTesting Paper Sequence Properties")
    print("=" * 50)
    
    test_cases = [
        # (description, sequence)
        ("Uniform distribution", [0, 1, 2, 0, 1, 2]),
        ("Skewed distribution", [0, 0, 0, 1, 1, 2]),
        ("Single symbol", [0, 0, 0, 0]),
        ("Binary sequence", [0, 1, 1, 0, 1]),
    ]
    
    encoder = EnumerativeEncoder()
    
    for description, sequence in test_cases:
        print(f"\n{description}: {sequence}")
        
        # Test encoding and decoding
        encoded = encoder.encode(sequence)
        decoded = encoder.decode(encoded)
        
        # Verify correctness
        assert decoded == sequence, f"Failed for {description}"
        
        # Show results
        compression_ratio = len(sequence) / len(encoded) if len(encoded) > 0 else float('inf')
        print(f"  Compressed: {len(sequence)} -> {len(encoded)} bytes (ratio: {compression_ratio:.2f})")
        print("  ✓ Correctly encoded and decoded")


def test_lexicographic_ordering():
    """
    Test that sequences with the same symbol counts but different orders
    produce different encodings (different lexicographic indices).
    """
    print("\n\nTesting Lexicographic Ordering")
    print("=" * 50)
    
    # All permutations of [0, 0, 1, 1] should have different lexicographic indices
    permutations = [
        [0, 0, 1, 1],
        [0, 1, 0, 1], 
        [0, 1, 1, 0],
        [1, 0, 0, 1],
        [1, 0, 1, 0],
        [1, 1, 0, 0]
    ]
    
    encoder = EnumerativeEncoder()
    indices = []
    
    for perm in permutations:
        encoded = encoder.encode(perm)
        decoded = encoder.decode(encoded)
        
        # Verify end-to-end correctness
        assert decoded == perm, f"Decoding failed for {perm}"
        
        # Collect compressed size as a proxy for different encodings
        compressed_size = len(encoded)
        indices.append(compressed_size)
        print(f"  {perm} -> compressed size: {compressed_size} bytes")
    
    # Verify all encodings are different (different permutations should produce different encodings)
    encodings = []
    for perm in permutations:
        encoded = encoder.encode(perm)
        encodings.append(encoded)
    
    # Check that different permutations produce different encodings
    unique_encodings = len(set(encodings))
    total_permutations = len(permutations)
    print(f"  Unique encodings: {unique_encodings} out of {total_permutations} permutations")
    
    assert unique_encodings == total_permutations, f"CRITICAL BUG: Different permutations produced identical encodings! This means the algorithm is lossy and cannot uniquely decode sequences."
    print("  ✓ All permutations have unique encodings")


def main():
    """Run all paper example tests."""
    test_example_from_paper()
    test_paper_sequence_properties()
    test_lexicographic_ordering()
    
    print("\n" + "=" * 50)
    print("All paper example tests passed! ✓")


if __name__ == "__main__":
    main()