#!/usr/bin/env python3 """ Comprehensive GPU integration test for the text analyzer. Tests the entire GPU pipeline from configuration to model usage. """ import sys import time import torch import spacy from text_analyzer.base_analyzer import BaseAnalyzer from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer def print_header(title): """Print a formatted header.""" print("\n" + "="*60) print(f" {title} ") print("="*60) def test_gpu_environment(): """Test GPU environment setup.""" print_header("1. GPU Environment Test") results = { "pytorch_available": False, "cuda_available": False, "gpu_count": 0, "gpu_name": None, "cuda_version": None } try: import torch results["pytorch_available"] = True print(f"✓ PyTorch installed: {torch.__version__}") if torch.cuda.is_available(): results["cuda_available"] = True results["gpu_count"] = torch.cuda.device_count() results["cuda_version"] = torch.version.cuda print(f"✓ CUDA available: {results['cuda_version']}") print(f"✓ GPU count: {results['gpu_count']}") for i in range(results["gpu_count"]): gpu_name = torch.cuda.get_device_name(i) results["gpu_name"] = gpu_name print(f"✓ GPU {i}: {gpu_name}") # Memory info props = torch.cuda.get_device_properties(i) total_memory = props.total_memory / (1024**3) print(f" - Total memory: {total_memory:.1f} GB") print(f" - Compute capability: {props.major}.{props.minor}") else: print("✗ CUDA not available") except ImportError: print("✗ PyTorch not installed") except Exception as e: print(f"✗ Error: {e}") return results def test_spacy_gpu_configuration(): """Test SpaCy GPU configuration.""" print_header("2. SpaCy GPU Configuration Test") results = { "spacy_gpu_enabled": False, "transformer_packages": [] } try: # Test GPU preference import torch if torch.cuda.is_available(): torch.cuda.set_device(0) print(f"✓ Set CUDA device to 0") gpu_id = spacy.prefer_gpu(0) if gpu_id is not False: results["spacy_gpu_enabled"] = True print(f"✓ SpaCy GPU enabled on device {gpu_id}") else: print("✗ SpaCy GPU not enabled") # Check packages try: import spacy_transformers results["transformer_packages"].append("spacy-transformers") except ImportError: pass try: import spacy_curated_transformers results["transformer_packages"].append("spacy-curated-transformers") except ImportError: pass if results["transformer_packages"]: print(f"✓ Transformer packages: {', '.join(results['transformer_packages'])}") else: print("✗ No transformer packages found") except Exception as e: print(f"✗ Error: {e}") return results def test_model_gpu_loading(): """Test loading models with GPU support.""" print_header("3. Model GPU Loading Test") results = { "model_loaded": False, "gpu_verified": False, "components_on_gpu": [], "processing_works": False } try: # Initialize analyzer with transformer model print("Loading English transformer model...") analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf") results["model_loaded"] = True # Check model info model_info = analyzer.get_model_info() print(f"✓ Model loaded: {model_info['name']}") print(f" Device: {model_info['device']}") print(f" GPU enabled: {model_info['gpu_enabled']}") # Verify GPU usage at component level if hasattr(analyzer, 'nlp') and analyzer.nlp: for pipe_name, pipe in analyzer.nlp.pipeline: if hasattr(pipe, 'model'): is_on_gpu = False # Check if model has parameters on GPU if hasattr(pipe.model, 'parameters'): try: for param in pipe.model.parameters(): if param.is_cuda: is_on_gpu = True break except: pass if is_on_gpu: results["components_on_gpu"].append(pipe_name) print(f"✓ Component '{pipe_name}' is on GPU") else: print(f"✗ Component '{pipe_name}' is on CPU") if results["components_on_gpu"]: results["gpu_verified"] = True # Test processing print("\nTesting text processing...") test_text = "The quick brown fox jumps over the lazy dog." doc = analyzer.process_document(test_text) results["processing_works"] = True print(f"✓ Processed {len(doc)} tokens successfully") except Exception as e: print(f"✗ Error: {e}") import traceback traceback.print_exc() return results def test_gpu_performance(): """Test GPU performance improvement.""" print_header("4. GPU Performance Test") # Generate test data test_texts = [ "The quick brown fox jumps over the lazy dog. " * 20 for _ in range(5) ] results = { "gpu_time": None, "cpu_time": None, "speedup": None } try: # Test with GPU print("Testing GPU performance...") analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf") # Warm up _ = analyzer_gpu.process_document(test_texts[0]) # Measure start_time = time.time() for text in test_texts: _ = analyzer_gpu.process_document(text) results["gpu_time"] = time.time() - start_time print(f"✓ GPU processing time: {results['gpu_time']:.2f} seconds") # Test with CPU print("\nTesting CPU performance...") analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1) # Warm up _ = analyzer_cpu.process_document(test_texts[0]) # Measure start_time = time.time() for text in test_texts: _ = analyzer_cpu.process_document(text) results["cpu_time"] = time.time() - start_time print(f"✓ CPU processing time: {results['cpu_time']:.2f} seconds") # Calculate speedup if results["gpu_time"] and results["cpu_time"]: results["speedup"] = results["cpu_time"] / results["gpu_time"] print(f"\n✓ GPU speedup: {results['speedup']:.2f}x faster") except Exception as e: print(f"✗ Performance test error: {e}") return results def test_memory_usage(): """Test GPU memory usage.""" print_header("5. GPU Memory Usage Test") if not torch.cuda.is_available(): print("✗ CUDA not available, skipping memory test") return {} results = { "before_load": None, "after_load": None, "after_process": None } try: # Clear cache torch.cuda.empty_cache() # Measure before loading results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3) print(f"Memory before model load: {results['before_load']:.2f} GB") # Load model analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf") results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3) print(f"Memory after model load: {results['after_load']:.2f} GB") print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB") # Process text long_text = " ".join(["This is a test sentence." for _ in range(100)]) _ = analyzer.process_document(long_text) results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3) print(f"Memory after processing: {results['after_process']:.2f} GB") # Clean up del analyzer torch.cuda.empty_cache() except Exception as e: print(f"✗ Memory test error: {e}") return results def main(): """Run all GPU integration tests.""" print("="*60) print(" GPU Integration Test Suite ") print("="*60) all_results = {} # Run tests all_results["environment"] = test_gpu_environment() all_results["spacy_config"] = test_spacy_gpu_configuration() all_results["model_loading"] = test_model_gpu_loading() # Only run performance tests if GPU is available if all_results["environment"]["cuda_available"]: all_results["performance"] = test_gpu_performance() all_results["memory"] = test_memory_usage() # Summary print_header("Test Summary") # Check if GPU is working gpu_working = ( all_results["environment"]["cuda_available"] and all_results["spacy_config"]["spacy_gpu_enabled"] and all_results["model_loading"]["gpu_verified"] ) if gpu_working: print("✅ GPU INTEGRATION SUCCESSFUL") print(f" - PyTorch CUDA: {all_results['environment']['cuda_version']}") print(f" - GPU: {all_results['environment']['gpu_name']}") print(f" - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}") if "performance" in all_results and all_results["performance"]["speedup"]: print(f" - Performance speedup: {all_results['performance']['speedup']:.2f}x") else: print("❌ GPU INTEGRATION FAILED") print("\nIssues detected:") if not all_results["environment"]["cuda_available"]: print(" - CUDA not available (check PyTorch installation)") if not all_results["spacy_config"]["spacy_gpu_enabled"]: print(" - SpaCy GPU not enabled") if not all_results["model_loading"]["gpu_verified"]: print(" - Model components not on GPU") print("\n" + "="*60) if __name__ == "__main__": main()