Spaces:

egumasa
/

simple-text-analyzer

Building

File size: 6,458 Bytes

4d2898f

#!/usr/bin/env python3
"""
Test script to verify GPU/CUDA support for spaCy processing.
Run this to check if GPU acceleration is working correctly.
"""

import sys
import torch
import spacy
from text_analyzer.base_analyzer import BaseAnalyzer
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer
from text_analyzer.pos_parser import POSParser

def check_cuda_availability():
    """Check if CUDA is available and display GPU information."""
    print("=== CUDA/GPU Information ===")
    
    try:
        if torch.cuda.is_available():
            print(f"✓ CUDA is available")
            print(f"  - PyTorch version: {torch.__version__}")
            print(f"  - CUDA version: {torch.version.cuda}")
            print(f"  - Number of GPUs: {torch.cuda.device_count()}")
            
            for i in range(torch.cuda.device_count()):
                print(f"  - GPU {i}: {torch.cuda.get_device_name(i)}")
                memory_allocated = torch.cuda.memory_allocated(i) / 1024**2
                memory_reserved = torch.cuda.memory_reserved(i) / 1024**2
                print(f"    Memory allocated: {memory_allocated:.2f} MB")
                print(f"    Memory reserved: {memory_reserved:.2f} MB")
        else:
            print("✗ CUDA is not available")
            print("  - PyTorch is installed but no GPU detected")
    except ImportError:
        print("✗ PyTorch is not installed")
        print("  - GPU support requires PyTorch installation")
    
    print()

def test_spacy_gpu():
    """Test if spaCy can use GPU."""
    print("=== SpaCy GPU Configuration ===")
    
    try:
        # Try to enable GPU
        gpu_id = spacy.prefer_gpu()
        if gpu_id is not False:
            print(f"✓ SpaCy GPU enabled on device {gpu_id}")
        else:
            print("✗ SpaCy could not enable GPU")
        
        # Check if spacy-transformers is installed
        try:
            import spacy_transformers
            print("✓ spacy-transformers is installed")
        except ImportError:
            print("✗ spacy-transformers not installed (required for transformer models)")
        
    except Exception as e:
        print(f"✗ Error testing spaCy GPU: {e}")
    
    print()

def test_analyzer_gpu(language="en", model_size="trf"):
    """Test analyzer with GPU support."""
    print(f"=== Testing {language.upper()} {model_size.upper()} Model ===")
    
    try:
        # Test with automatic GPU detection
        print("1. Testing automatic GPU detection...")
        analyzer = LexicalSophisticationAnalyzer(language=language, model_size=model_size)
        model_info = analyzer.get_model_info()
        print(f"   Model: {model_info['name']}")
        print(f"   Device: {model_info['device']}")
        print(f"   GPU Enabled: {model_info['gpu_enabled']}")
        
        # Test processing
        test_text = "The quick brown fox jumps over the lazy dog." if language == "en" else "これはテストです。"
        print(f"\n2. Testing text processing...")
        doc = analyzer.process_document(test_text)
        print(f"   ✓ Successfully processed {len(doc)} tokens")
        
        # Test with explicit GPU device
        if torch.cuda.is_available():
            print("\n3. Testing explicit GPU device selection...")
            analyzer_gpu = LexicalSophisticationAnalyzer(language=language, model_size=model_size, gpu_device=0)
            model_info_gpu = analyzer_gpu.get_model_info()
            print(f"   Device: {model_info_gpu['device']}")
            print(f"   GPU Enabled: {model_info_gpu['gpu_enabled']}")
        
        # Test with CPU only
        print("\n4. Testing CPU-only mode...")
        analyzer_cpu = LexicalSophisticationAnalyzer(language=language, model_size=model_size, gpu_device=-1)
        model_info_cpu = analyzer_cpu.get_model_info()
        print(f"   Device: {model_info_cpu['device']}")
        print(f"   GPU Enabled: {model_info_cpu['gpu_enabled']}")
        
    except Exception as e:
        print(f"✗ Error testing analyzer: {e}")
    
    print()

def test_batch_processing_performance():
    """Test batch processing performance with GPU vs CPU."""
    print("=== Batch Processing Performance Test ===")
    
    import time
    
    # Generate test texts
    test_texts = [
        "The quick brown fox jumps over the lazy dog. " * 10 
        for _ in range(10)
    ]
    
    try:
        # Test with GPU (if available)
        if torch.cuda.is_available():
            print("1. Testing GPU batch processing...")
            analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=0)
            
            start_time = time.time()
            for text in test_texts:
                doc = analyzer_gpu.process_document(text)
            gpu_time = time.time() - start_time
            print(f"   GPU processing time: {gpu_time:.2f} seconds")
            print(f"   Average per text: {gpu_time/len(test_texts):.3f} seconds")
        
        # Test with CPU
        print("\n2. Testing CPU batch processing...")
        analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1)
        
        start_time = time.time()
        for text in test_texts:
            doc = analyzer_cpu.process_document(text)
        cpu_time = time.time() - start_time
        print(f"   CPU processing time: {cpu_time:.2f} seconds")
        print(f"   Average per text: {cpu_time/len(test_texts):.3f} seconds")
        
        if torch.cuda.is_available():
            speedup = cpu_time / gpu_time
            print(f"\n   Speedup: {speedup:.2f}x")
        
    except Exception as e:
        print(f"✗ Error in performance test: {e}")
    
    print()

def main():
    """Run all GPU tests."""
    print("="*50)
    print("SpaCy GPU Support Test")
    print("="*50)
    print()
    
    # Check CUDA availability
    check_cuda_availability()
    
    # Test spaCy GPU
    test_spacy_gpu()
    
    # Test analyzers with different configurations
    test_analyzer_gpu("en", "trf")
    
    # Only test Japanese if the model is installed
    try:
        test_analyzer_gpu("ja", "trf")
    except:
        print("Japanese transformer model not installed, skipping...")
    
    # Performance test
    test_batch_processing_performance()
    
    print("\n" + "="*50)
    print("Test completed!")
    print("="*50)

if __name__ == "__main__":
    main()