morphological-transformer

Runtime error

File size: 6,621 Bytes

1f39ae1

#!/usr/bin/env python3
"""
Test script to verify training optimizations work correctly
"""

import torch
import os
import sys
from pathlib import Path

def test_mixed_precision():
    """Test mixed precision training setup"""
    print("Testing Mixed Precision Training...")
    
    try:
        from torch.cuda.amp import GradScaler, autocast
        
        # Test scaler creation
        scaler = GradScaler(enabled=True)
        print("✓ GradScaler created successfully")
        
        # Test autocast context
        with autocast():
            x = torch.randn(10, 10)
            y = torch.randn(10, 10)
            z = x @ y
        print("✓ Autocast context works")
        
        return True
    except Exception as e:
        print(f"✗ Mixed precision test failed: {e}")
        return False

def test_optimized_dataset():
    """Test optimized dataset functionality"""
    print("\nTesting Optimized Dataset...")
    
    try:
        from morphological_dataset import MorphologicalDataset, build_vocabulary
        
        # Create dummy data files
        os.makedirs("test_data", exist_ok=True)
        
        with open("test_data/test.src", "w") as f:
            f.write("hello world\n")
            f.write("test sequence\n")
        
        with open("test_data/test.tgt", "w") as f:
            f.write("hola mundo\n")
            f.write("secuencia prueba\n")
        
        # Test vocabulary building
        src_vocab = build_vocabulary(["test_data/test.src"])
        tgt_vocab = build_vocabulary(["test_data/test.tgt"])
        print("✓ Vocabulary building works")
        
        # Test dataset creation
        dataset = MorphologicalDataset("test_data/test.src", "test_data/test.tgt", 
                                     src_vocab, tgt_vocab, max_length=10)
        print("✓ Dataset creation works")
        
        # Test data loading
        item = dataset[0]
        print(f"✓ Dataset item shape: {len(item)}")
        
        # Cleanup
        import shutil
        shutil.rmtree("test_data")
        
        return True
    except Exception as e:
        print(f"✗ Dataset test failed: {e}")
        return False

def test_optimized_dataloader():
    """Test optimized DataLoader configuration"""
    print("\nTesting Optimized DataLoader...")
    
    try:
        from torch.utils.data import DataLoader
        from morphological_dataset import MorphologicalDataset, build_vocabulary, collate_fn
        
        # Create test dataset
        os.makedirs("test_data", exist_ok=True)
        
        with open("test_data/test.src", "w") as f:
            f.write("hello world\n")
            f.write("test sequence\n")
        
        with open("test_data/test.tgt", "w") as f:
            f.write("hola mundo\n")
            f.write("secuencia prueba\n")
        
        src_vocab = build_vocabulary(["test_data/test.src"])
        tgt_vocab = build_vocabulary(["test_data/test.tgt"])
        dataset = MorphologicalDataset("test_data/test.src", "test_data/test.tgt", 
                                     src_vocab, tgt_vocab, max_length=10)
        
        # Test optimized DataLoader
        dataloader = DataLoader(
            dataset,
            batch_size=2,
            shuffle=True,
            collate_fn=lambda batch: collate_fn(batch, src_vocab, tgt_vocab, 10),
            num_workers=0,  # Use 0 for testing
            pin_memory=False,  # Disable for testing
            persistent_workers=False,  # Disable for testing
            drop_last=True
        )
        
        # Test iteration
        for batch in dataloader:
            src, src_mask, tgt, tgt_mask = batch
            print(f"✓ Batch shapes - src: {src.shape}, tgt: {tgt.shape}")
            break
        
        # Cleanup
        import shutil
        shutil.rmtree("test_data")
        
        return True
    except Exception as e:
        print(f"✗ DataLoader test failed: {e}")
        return False

def test_cuda_optimizations():
    """Test CUDA optimizations"""
    print("\nTesting CUDA Optimizations...")
    
    if not torch.cuda.is_available():
        print("⚠ CUDA not available, skipping CUDA tests")
        return True
    
    try:
        # Test CUDA optimizations
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.deterministic = False
        print("✓ CUDA optimizations enabled")
        
        # Test non-blocking transfers
        x = torch.randn(100, 100)
        y = x.cuda(non_blocking=True)
        print("✓ Non-blocking CUDA transfer works")
        
        return True
    except Exception as e:
        print(f"✗ CUDA test failed: {e}")
        return False

def test_model_creation():
    """Test model creation with optimizations"""
    print("\nTesting Model Creation...")
    
    try:
        from transformer import TagTransformer
        
        # Test model creation
        model = TagTransformer(
            src_vocab_size=1000,
            trg_vocab_size=1000,
            embed_dim=256,
            nb_heads=4,
            src_hid_size=1024,
            src_nb_layers=2,
            trg_hid_size=1024,
            trg_nb_layers=2,
            dropout_p=0.1,
            tie_trg_embed=True,
            label_smooth=0.1,
            nb_attr=5,
            src_c2i={},
            trg_c2i={},
            attr_c2i={}
        )
        
        print("✓ Model creation works")
        
        # Test parameter count
        param_count = model.count_nb_params()
        print(f"✓ Model has {param_count:,} parameters")
        
        return True
    except Exception as e:
        print(f"✗ Model test failed: {e}")
        return False

def run_all_tests():
    """Run all optimization tests"""
    print("=== Testing Training Optimizations ===\n")
    
    tests = [
        test_mixed_precision,
        test_optimized_dataset,
        test_optimized_dataloader,
        test_cuda_optimizations,
        test_model_creation
    ]
    
    passed = 0
    total = len(tests)
    
    for test in tests:
        try:
            if test():
                passed += 1
        except Exception as e:
            print(f"✗ Test {test.__name__} failed with exception: {e}")
    
    print(f"\n=== Test Results ===")
    print(f"Passed: {passed}/{total}")
    
    if passed == total:
        print("🎉 All tests passed! Optimizations are working correctly.")
        return True
    else:
        print("❌ Some tests failed. Check the errors above.")
        return False

if __name__ == '__main__':
    success = run_all_tests()
    sys.exit(0 if success else 1)