Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 10,845 Bytes

#!/usr/bin/env python3
"""
Comprehensive GPU integration test for the text analyzer.
Tests the entire GPU pipeline from configuration to model usage.
"""

import sys
import time
import torch
import spacy
from text_analyzer.base_analyzer import BaseAnalyzer
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

def print_header(title):
    """Print a formatted header."""
    print("\n" + "="*60)
    print(f" {title} ")
    print("="*60)

def test_gpu_environment():
    """Test GPU environment setup."""
    print_header("1. GPU Environment Test")
    
    results = {
        "pytorch_available": False,
        "cuda_available": False,
        "gpu_count": 0,
        "gpu_name": None,
        "cuda_version": None
    }
    
    try:
        import torch
        results["pytorch_available"] = True
        print(f"✓ PyTorch installed: {torch.__version__}")
        
        if torch.cuda.is_available():
            results["cuda_available"] = True
            results["gpu_count"] = torch.cuda.device_count()
            results["cuda_version"] = torch.version.cuda
            
            print(f"✓ CUDA available: {results['cuda_version']}")
            print(f"✓ GPU count: {results['gpu_count']}")
            
            for i in range(results["gpu_count"]):
                gpu_name = torch.cuda.get_device_name(i)
                results["gpu_name"] = gpu_name
                print(f"✓ GPU {i}: {gpu_name}")
                
                # Memory info
                props = torch.cuda.get_device_properties(i)
                total_memory = props.total_memory / (1024**3)
                print(f"  - Total memory: {total_memory:.1f} GB")
                print(f"  - Compute capability: {props.major}.{props.minor}")
        else:
            print("✗ CUDA not available")
            
    except ImportError:
        print("✗ PyTorch not installed")
    except Exception as e:
        print(f"✗ Error: {e}")
    
    return results

def test_spacy_gpu_configuration():
    """Test SpaCy GPU configuration."""
    print_header("2. SpaCy GPU Configuration Test")
    
    results = {
        "spacy_gpu_enabled": False,
        "transformer_packages": []
    }
    
    try:
        # Test GPU preference
        import torch
        if torch.cuda.is_available():
            torch.cuda.set_device(0)
            print(f"✓ Set CUDA device to 0")
        
        gpu_id = spacy.prefer_gpu(0)
        if gpu_id is not False:
            results["spacy_gpu_enabled"] = True
            print(f"✓ SpaCy GPU enabled on device {gpu_id}")
        else:
            print("✗ SpaCy GPU not enabled")
        
        # Check packages
        try:
            import spacy_transformers
            results["transformer_packages"].append("spacy-transformers")
        except ImportError:
            pass
        
        try:
            import spacy_curated_transformers
            results["transformer_packages"].append("spacy-curated-transformers")
        except ImportError:
            pass
        
        if results["transformer_packages"]:
            print(f"✓ Transformer packages: {', '.join(results['transformer_packages'])}")
        else:
            print("✗ No transformer packages found")
            
    except Exception as e:
        print(f"✗ Error: {e}")
    
    return results

def test_model_gpu_loading():
    """Test loading models with GPU support."""
    print_header("3. Model GPU Loading Test")
    
    results = {
        "model_loaded": False,
        "gpu_verified": False,
        "components_on_gpu": [],
        "processing_works": False
    }
    
    try:
        # Initialize analyzer with transformer model
        print("Loading English transformer model...")
        analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        results["model_loaded"] = True
        
        # Check model info
        model_info = analyzer.get_model_info()
        print(f"✓ Model loaded: {model_info['name']}")
        print(f"  Device: {model_info['device']}")
        print(f"  GPU enabled: {model_info['gpu_enabled']}")
        
        # Verify GPU usage at component level
        if hasattr(analyzer, 'nlp') and analyzer.nlp:
            for pipe_name, pipe in analyzer.nlp.pipeline:
                if hasattr(pipe, 'model'):
                    is_on_gpu = False
                    
                    # Check if model has parameters on GPU
                    if hasattr(pipe.model, 'parameters'):
                        try:
                            for param in pipe.model.parameters():
                                if param.is_cuda:
                                    is_on_gpu = True
                                    break
                        except:
                            pass
                    
                    if is_on_gpu:
                        results["components_on_gpu"].append(pipe_name)
                        print(f"✓ Component '{pipe_name}' is on GPU")
                    else:
                        print(f"✗ Component '{pipe_name}' is on CPU")
            
            if results["components_on_gpu"]:
                results["gpu_verified"] = True
        
        # Test processing
        print("\nTesting text processing...")
        test_text = "The quick brown fox jumps over the lazy dog."
        doc = analyzer.process_document(test_text)
        results["processing_works"] = True
        print(f"✓ Processed {len(doc)} tokens successfully")
        
    except Exception as e:
        print(f"✗ Error: {e}")
        import traceback
        traceback.print_exc()
    
    return results

def test_gpu_performance():
    """Test GPU performance improvement."""
    print_header("4. GPU Performance Test")
    
    # Generate test data
    test_texts = [
        "The quick brown fox jumps over the lazy dog. " * 20
        for _ in range(5)
    ]
    
    results = {
        "gpu_time": None,
        "cpu_time": None,
        "speedup": None
    }
    
    try:
        # Test with GPU
        print("Testing GPU performance...")
        analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        
        # Warm up
        _ = analyzer_gpu.process_document(test_texts[0])
        
        # Measure
        start_time = time.time()
        for text in test_texts:
            _ = analyzer_gpu.process_document(text)
        results["gpu_time"] = time.time() - start_time
        print(f"✓ GPU processing time: {results['gpu_time']:.2f} seconds")
        
        # Test with CPU
        print("\nTesting CPU performance...")
        analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1)
        
        # Warm up
        _ = analyzer_cpu.process_document(test_texts[0])
        
        # Measure
        start_time = time.time()
        for text in test_texts:
            _ = analyzer_cpu.process_document(text)
        results["cpu_time"] = time.time() - start_time
        print(f"✓ CPU processing time: {results['cpu_time']:.2f} seconds")
        
        # Calculate speedup
        if results["gpu_time"] and results["cpu_time"]:
            results["speedup"] = results["cpu_time"] / results["gpu_time"]
            print(f"\n✓ GPU speedup: {results['speedup']:.2f}x faster")
        
    except Exception as e:
        print(f"✗ Performance test error: {e}")
    
    return results

def test_memory_usage():
    """Test GPU memory usage."""
    print_header("5. GPU Memory Usage Test")
    
    if not torch.cuda.is_available():
        print("✗ CUDA not available, skipping memory test")
        return {}
    
    results = {
        "before_load": None,
        "after_load": None,
        "after_process": None
    }
    
    try:
        # Clear cache
        torch.cuda.empty_cache()
        
        # Measure before loading
        results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory before model load: {results['before_load']:.2f} GB")
        
        # Load model
        analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory after model load: {results['after_load']:.2f} GB")
        print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB")
        
        # Process text
        long_text = " ".join(["This is a test sentence." for _ in range(100)])
        _ = analyzer.process_document(long_text)
        results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory after processing: {results['after_process']:.2f} GB")
        
        # Clean up
        del analyzer
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"✗ Memory test error: {e}")
    
    return results

def main():
    """Run all GPU integration tests."""
    print("="*60)
    print(" GPU Integration Test Suite ")
    print("="*60)
    
    all_results = {}
    
    # Run tests
    all_results["environment"] = test_gpu_environment()
    all_results["spacy_config"] = test_spacy_gpu_configuration()
    all_results["model_loading"] = test_model_gpu_loading()
    
    # Only run performance tests if GPU is available
    if all_results["environment"]["cuda_available"]:
        all_results["performance"] = test_gpu_performance()
        all_results["memory"] = test_memory_usage()
    
    # Summary
    print_header("Test Summary")
    
    # Check if GPU is working
    gpu_working = (
        all_results["environment"]["cuda_available"] and
        all_results["spacy_config"]["spacy_gpu_enabled"] and
        all_results["model_loading"]["gpu_verified"]
    )
    
    if gpu_working:
        print("✅ GPU INTEGRATION SUCCESSFUL")
        print(f"  - PyTorch CUDA: {all_results['environment']['cuda_version']}")
        print(f"  - GPU: {all_results['environment']['gpu_name']}")
        print(f"  - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}")
        
        if "performance" in all_results and all_results["performance"]["speedup"]:
            print(f"  - Performance speedup: {all_results['performance']['speedup']:.2f}x")
    else:
        print("❌ GPU INTEGRATION FAILED")
        print("\nIssues detected:")
        
        if not all_results["environment"]["cuda_available"]:
            print("  - CUDA not available (check PyTorch installation)")
        
        if not all_results["spacy_config"]["spacy_gpu_enabled"]:
            print("  - SpaCy GPU not enabled")
        
        if not all_results["model_loading"]["gpu_verified"]:
            print("  - Model components not on GPU")
    
    print("\n" + "="*60)

if __name__ == "__main__":
    main()