Spaces:
Building
Building
| #!/usr/bin/env python3 | |
| """ | |
| Comprehensive GPU integration test for the text analyzer. | |
| Tests the entire GPU pipeline from configuration to model usage. | |
| """ | |
| import sys | |
| import time | |
| import torch | |
| import spacy | |
| from text_analyzer.base_analyzer import BaseAnalyzer | |
| from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer | |
| def print_header(title): | |
| """Print a formatted header.""" | |
| print("\n" + "="*60) | |
| print(f" {title} ") | |
| print("="*60) | |
| def test_gpu_environment(): | |
| """Test GPU environment setup.""" | |
| print_header("1. GPU Environment Test") | |
| results = { | |
| "pytorch_available": False, | |
| "cuda_available": False, | |
| "gpu_count": 0, | |
| "gpu_name": None, | |
| "cuda_version": None | |
| } | |
| try: | |
| import torch | |
| results["pytorch_available"] = True | |
| print(f"β PyTorch installed: {torch.__version__}") | |
| if torch.cuda.is_available(): | |
| results["cuda_available"] = True | |
| results["gpu_count"] = torch.cuda.device_count() | |
| results["cuda_version"] = torch.version.cuda | |
| print(f"β CUDA available: {results['cuda_version']}") | |
| print(f"β GPU count: {results['gpu_count']}") | |
| for i in range(results["gpu_count"]): | |
| gpu_name = torch.cuda.get_device_name(i) | |
| results["gpu_name"] = gpu_name | |
| print(f"β GPU {i}: {gpu_name}") | |
| # Memory info | |
| props = torch.cuda.get_device_properties(i) | |
| total_memory = props.total_memory / (1024**3) | |
| print(f" - Total memory: {total_memory:.1f} GB") | |
| print(f" - Compute capability: {props.major}.{props.minor}") | |
| else: | |
| print("β CUDA not available") | |
| except ImportError: | |
| print("β PyTorch not installed") | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| return results | |
| def test_spacy_gpu_configuration(): | |
| """Test SpaCy GPU configuration.""" | |
| print_header("2. SpaCy GPU Configuration Test") | |
| results = { | |
| "spacy_gpu_enabled": False, | |
| "transformer_packages": [] | |
| } | |
| try: | |
| # Test GPU preference | |
| import torch | |
| if torch.cuda.is_available(): | |
| torch.cuda.set_device(0) | |
| print(f"β Set CUDA device to 0") | |
| gpu_id = spacy.prefer_gpu(0) | |
| if gpu_id is not False: | |
| results["spacy_gpu_enabled"] = True | |
| print(f"β SpaCy GPU enabled on device {gpu_id}") | |
| else: | |
| print("β SpaCy GPU not enabled") | |
| # Check packages | |
| try: | |
| import spacy_transformers | |
| results["transformer_packages"].append("spacy-transformers") | |
| except ImportError: | |
| pass | |
| try: | |
| import spacy_curated_transformers | |
| results["transformer_packages"].append("spacy-curated-transformers") | |
| except ImportError: | |
| pass | |
| if results["transformer_packages"]: | |
| print(f"β Transformer packages: {', '.join(results['transformer_packages'])}") | |
| else: | |
| print("β No transformer packages found") | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| return results | |
| def test_model_gpu_loading(): | |
| """Test loading models with GPU support.""" | |
| print_header("3. Model GPU Loading Test") | |
| results = { | |
| "model_loaded": False, | |
| "gpu_verified": False, | |
| "components_on_gpu": [], | |
| "processing_works": False | |
| } | |
| try: | |
| # Initialize analyzer with transformer model | |
| print("Loading English transformer model...") | |
| analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf") | |
| results["model_loaded"] = True | |
| # Check model info | |
| model_info = analyzer.get_model_info() | |
| print(f"β Model loaded: {model_info['name']}") | |
| print(f" Device: {model_info['device']}") | |
| print(f" GPU enabled: {model_info['gpu_enabled']}") | |
| # Verify GPU usage at component level | |
| if hasattr(analyzer, 'nlp') and analyzer.nlp: | |
| for pipe_name, pipe in analyzer.nlp.pipeline: | |
| if hasattr(pipe, 'model'): | |
| is_on_gpu = False | |
| # Check if model has parameters on GPU | |
| if hasattr(pipe.model, 'parameters'): | |
| try: | |
| for param in pipe.model.parameters(): | |
| if param.is_cuda: | |
| is_on_gpu = True | |
| break | |
| except: | |
| pass | |
| if is_on_gpu: | |
| results["components_on_gpu"].append(pipe_name) | |
| print(f"β Component '{pipe_name}' is on GPU") | |
| else: | |
| print(f"β Component '{pipe_name}' is on CPU") | |
| if results["components_on_gpu"]: | |
| results["gpu_verified"] = True | |
| # Test processing | |
| print("\nTesting text processing...") | |
| test_text = "The quick brown fox jumps over the lazy dog." | |
| doc = analyzer.process_document(test_text) | |
| results["processing_works"] = True | |
| print(f"β Processed {len(doc)} tokens successfully") | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return results | |
| def test_gpu_performance(): | |
| """Test GPU performance improvement.""" | |
| print_header("4. GPU Performance Test") | |
| # Generate test data | |
| test_texts = [ | |
| "The quick brown fox jumps over the lazy dog. " * 20 | |
| for _ in range(5) | |
| ] | |
| results = { | |
| "gpu_time": None, | |
| "cpu_time": None, | |
| "speedup": None | |
| } | |
| try: | |
| # Test with GPU | |
| print("Testing GPU performance...") | |
| analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf") | |
| # Warm up | |
| _ = analyzer_gpu.process_document(test_texts[0]) | |
| # Measure | |
| start_time = time.time() | |
| for text in test_texts: | |
| _ = analyzer_gpu.process_document(text) | |
| results["gpu_time"] = time.time() - start_time | |
| print(f"β GPU processing time: {results['gpu_time']:.2f} seconds") | |
| # Test with CPU | |
| print("\nTesting CPU performance...") | |
| analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1) | |
| # Warm up | |
| _ = analyzer_cpu.process_document(test_texts[0]) | |
| # Measure | |
| start_time = time.time() | |
| for text in test_texts: | |
| _ = analyzer_cpu.process_document(text) | |
| results["cpu_time"] = time.time() - start_time | |
| print(f"β CPU processing time: {results['cpu_time']:.2f} seconds") | |
| # Calculate speedup | |
| if results["gpu_time"] and results["cpu_time"]: | |
| results["speedup"] = results["cpu_time"] / results["gpu_time"] | |
| print(f"\nβ GPU speedup: {results['speedup']:.2f}x faster") | |
| except Exception as e: | |
| print(f"β Performance test error: {e}") | |
| return results | |
| def test_memory_usage(): | |
| """Test GPU memory usage.""" | |
| print_header("5. GPU Memory Usage Test") | |
| if not torch.cuda.is_available(): | |
| print("β CUDA not available, skipping memory test") | |
| return {} | |
| results = { | |
| "before_load": None, | |
| "after_load": None, | |
| "after_process": None | |
| } | |
| try: | |
| # Clear cache | |
| torch.cuda.empty_cache() | |
| # Measure before loading | |
| results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3) | |
| print(f"Memory before model load: {results['before_load']:.2f} GB") | |
| # Load model | |
| analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf") | |
| results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3) | |
| print(f"Memory after model load: {results['after_load']:.2f} GB") | |
| print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB") | |
| # Process text | |
| long_text = " ".join(["This is a test sentence." for _ in range(100)]) | |
| _ = analyzer.process_document(long_text) | |
| results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3) | |
| print(f"Memory after processing: {results['after_process']:.2f} GB") | |
| # Clean up | |
| del analyzer | |
| torch.cuda.empty_cache() | |
| except Exception as e: | |
| print(f"β Memory test error: {e}") | |
| return results | |
| def main(): | |
| """Run all GPU integration tests.""" | |
| print("="*60) | |
| print(" GPU Integration Test Suite ") | |
| print("="*60) | |
| all_results = {} | |
| # Run tests | |
| all_results["environment"] = test_gpu_environment() | |
| all_results["spacy_config"] = test_spacy_gpu_configuration() | |
| all_results["model_loading"] = test_model_gpu_loading() | |
| # Only run performance tests if GPU is available | |
| if all_results["environment"]["cuda_available"]: | |
| all_results["performance"] = test_gpu_performance() | |
| all_results["memory"] = test_memory_usage() | |
| # Summary | |
| print_header("Test Summary") | |
| # Check if GPU is working | |
| gpu_working = ( | |
| all_results["environment"]["cuda_available"] and | |
| all_results["spacy_config"]["spacy_gpu_enabled"] and | |
| all_results["model_loading"]["gpu_verified"] | |
| ) | |
| if gpu_working: | |
| print("β GPU INTEGRATION SUCCESSFUL") | |
| print(f" - PyTorch CUDA: {all_results['environment']['cuda_version']}") | |
| print(f" - GPU: {all_results['environment']['gpu_name']}") | |
| print(f" - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}") | |
| if "performance" in all_results and all_results["performance"]["speedup"]: | |
| print(f" - Performance speedup: {all_results['performance']['speedup']:.2f}x") | |
| else: | |
| print("β GPU INTEGRATION FAILED") | |
| print("\nIssues detected:") | |
| if not all_results["environment"]["cuda_available"]: | |
| print(" - CUDA not available (check PyTorch installation)") | |
| if not all_results["spacy_config"]["spacy_gpu_enabled"]: | |
| print(" - SpaCy GPU not enabled") | |
| if not all_results["model_loading"]["gpu_verified"]: | |
| print(" - Model components not on GPU") | |
| print("\n" + "="*60) | |
| if __name__ == "__main__": | |
| main() | |