simple-text-analyzer / test_gpu_integration.py
egumasa's picture
Fix GPU support for SpaCy transformer models
a12eec8
#!/usr/bin/env python3
"""
Comprehensive GPU integration test for the text analyzer.
Tests the entire GPU pipeline from configuration to model usage.
"""
import sys
import time
import torch
import spacy
from text_analyzer.base_analyzer import BaseAnalyzer
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer
def print_header(title):
"""Print a formatted header."""
print("\n" + "="*60)
print(f" {title} ")
print("="*60)
def test_gpu_environment():
"""Test GPU environment setup."""
print_header("1. GPU Environment Test")
results = {
"pytorch_available": False,
"cuda_available": False,
"gpu_count": 0,
"gpu_name": None,
"cuda_version": None
}
try:
import torch
results["pytorch_available"] = True
print(f"βœ“ PyTorch installed: {torch.__version__}")
if torch.cuda.is_available():
results["cuda_available"] = True
results["gpu_count"] = torch.cuda.device_count()
results["cuda_version"] = torch.version.cuda
print(f"βœ“ CUDA available: {results['cuda_version']}")
print(f"βœ“ GPU count: {results['gpu_count']}")
for i in range(results["gpu_count"]):
gpu_name = torch.cuda.get_device_name(i)
results["gpu_name"] = gpu_name
print(f"βœ“ GPU {i}: {gpu_name}")
# Memory info
props = torch.cuda.get_device_properties(i)
total_memory = props.total_memory / (1024**3)
print(f" - Total memory: {total_memory:.1f} GB")
print(f" - Compute capability: {props.major}.{props.minor}")
else:
print("βœ— CUDA not available")
except ImportError:
print("βœ— PyTorch not installed")
except Exception as e:
print(f"βœ— Error: {e}")
return results
def test_spacy_gpu_configuration():
"""Test SpaCy GPU configuration."""
print_header("2. SpaCy GPU Configuration Test")
results = {
"spacy_gpu_enabled": False,
"transformer_packages": []
}
try:
# Test GPU preference
import torch
if torch.cuda.is_available():
torch.cuda.set_device(0)
print(f"βœ“ Set CUDA device to 0")
gpu_id = spacy.prefer_gpu(0)
if gpu_id is not False:
results["spacy_gpu_enabled"] = True
print(f"βœ“ SpaCy GPU enabled on device {gpu_id}")
else:
print("βœ— SpaCy GPU not enabled")
# Check packages
try:
import spacy_transformers
results["transformer_packages"].append("spacy-transformers")
except ImportError:
pass
try:
import spacy_curated_transformers
results["transformer_packages"].append("spacy-curated-transformers")
except ImportError:
pass
if results["transformer_packages"]:
print(f"βœ“ Transformer packages: {', '.join(results['transformer_packages'])}")
else:
print("βœ— No transformer packages found")
except Exception as e:
print(f"βœ— Error: {e}")
return results
def test_model_gpu_loading():
"""Test loading models with GPU support."""
print_header("3. Model GPU Loading Test")
results = {
"model_loaded": False,
"gpu_verified": False,
"components_on_gpu": [],
"processing_works": False
}
try:
# Initialize analyzer with transformer model
print("Loading English transformer model...")
analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
results["model_loaded"] = True
# Check model info
model_info = analyzer.get_model_info()
print(f"βœ“ Model loaded: {model_info['name']}")
print(f" Device: {model_info['device']}")
print(f" GPU enabled: {model_info['gpu_enabled']}")
# Verify GPU usage at component level
if hasattr(analyzer, 'nlp') and analyzer.nlp:
for pipe_name, pipe in analyzer.nlp.pipeline:
if hasattr(pipe, 'model'):
is_on_gpu = False
# Check if model has parameters on GPU
if hasattr(pipe.model, 'parameters'):
try:
for param in pipe.model.parameters():
if param.is_cuda:
is_on_gpu = True
break
except:
pass
if is_on_gpu:
results["components_on_gpu"].append(pipe_name)
print(f"βœ“ Component '{pipe_name}' is on GPU")
else:
print(f"βœ— Component '{pipe_name}' is on CPU")
if results["components_on_gpu"]:
results["gpu_verified"] = True
# Test processing
print("\nTesting text processing...")
test_text = "The quick brown fox jumps over the lazy dog."
doc = analyzer.process_document(test_text)
results["processing_works"] = True
print(f"βœ“ Processed {len(doc)} tokens successfully")
except Exception as e:
print(f"βœ— Error: {e}")
import traceback
traceback.print_exc()
return results
def test_gpu_performance():
"""Test GPU performance improvement."""
print_header("4. GPU Performance Test")
# Generate test data
test_texts = [
"The quick brown fox jumps over the lazy dog. " * 20
for _ in range(5)
]
results = {
"gpu_time": None,
"cpu_time": None,
"speedup": None
}
try:
# Test with GPU
print("Testing GPU performance...")
analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf")
# Warm up
_ = analyzer_gpu.process_document(test_texts[0])
# Measure
start_time = time.time()
for text in test_texts:
_ = analyzer_gpu.process_document(text)
results["gpu_time"] = time.time() - start_time
print(f"βœ“ GPU processing time: {results['gpu_time']:.2f} seconds")
# Test with CPU
print("\nTesting CPU performance...")
analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1)
# Warm up
_ = analyzer_cpu.process_document(test_texts[0])
# Measure
start_time = time.time()
for text in test_texts:
_ = analyzer_cpu.process_document(text)
results["cpu_time"] = time.time() - start_time
print(f"βœ“ CPU processing time: {results['cpu_time']:.2f} seconds")
# Calculate speedup
if results["gpu_time"] and results["cpu_time"]:
results["speedup"] = results["cpu_time"] / results["gpu_time"]
print(f"\nβœ“ GPU speedup: {results['speedup']:.2f}x faster")
except Exception as e:
print(f"βœ— Performance test error: {e}")
return results
def test_memory_usage():
"""Test GPU memory usage."""
print_header("5. GPU Memory Usage Test")
if not torch.cuda.is_available():
print("βœ— CUDA not available, skipping memory test")
return {}
results = {
"before_load": None,
"after_load": None,
"after_process": None
}
try:
# Clear cache
torch.cuda.empty_cache()
# Measure before loading
results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3)
print(f"Memory before model load: {results['before_load']:.2f} GB")
# Load model
analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3)
print(f"Memory after model load: {results['after_load']:.2f} GB")
print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB")
# Process text
long_text = " ".join(["This is a test sentence." for _ in range(100)])
_ = analyzer.process_document(long_text)
results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3)
print(f"Memory after processing: {results['after_process']:.2f} GB")
# Clean up
del analyzer
torch.cuda.empty_cache()
except Exception as e:
print(f"βœ— Memory test error: {e}")
return results
def main():
"""Run all GPU integration tests."""
print("="*60)
print(" GPU Integration Test Suite ")
print("="*60)
all_results = {}
# Run tests
all_results["environment"] = test_gpu_environment()
all_results["spacy_config"] = test_spacy_gpu_configuration()
all_results["model_loading"] = test_model_gpu_loading()
# Only run performance tests if GPU is available
if all_results["environment"]["cuda_available"]:
all_results["performance"] = test_gpu_performance()
all_results["memory"] = test_memory_usage()
# Summary
print_header("Test Summary")
# Check if GPU is working
gpu_working = (
all_results["environment"]["cuda_available"] and
all_results["spacy_config"]["spacy_gpu_enabled"] and
all_results["model_loading"]["gpu_verified"]
)
if gpu_working:
print("βœ… GPU INTEGRATION SUCCESSFUL")
print(f" - PyTorch CUDA: {all_results['environment']['cuda_version']}")
print(f" - GPU: {all_results['environment']['gpu_name']}")
print(f" - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}")
if "performance" in all_results and all_results["performance"]["speedup"]:
print(f" - Performance speedup: {all_results['performance']['speedup']:.2f}x")
else:
print("❌ GPU INTEGRATION FAILED")
print("\nIssues detected:")
if not all_results["environment"]["cuda_available"]:
print(" - CUDA not available (check PyTorch installation)")
if not all_results["spacy_config"]["spacy_gpu_enabled"]:
print(" - SpaCy GPU not enabled")
if not all_results["model_loading"]["gpu_verified"]:
print(" - Model components not on GPU")
print("\n" + "="*60)
if __name__ == "__main__":
main()