Spaces:

egumasa
/

simple-text-analyzer

Building

App Files Files Community

simple-text-analyzer / test_gpu_integration.py

egumasa

Fix GPU support for SpaCy transformer models

a12eec8 7 months ago

raw

history blame contribute delete

10.8 kB

	#!/usr/bin/env python3
	"""
	Comprehensive GPU integration test for the text analyzer.
	Tests the entire GPU pipeline from configuration to model usage.
	"""

	import sys
	import time
	import torch
	import spacy
	from text_analyzer.base_analyzer import BaseAnalyzer
	from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

	def print_header(title):
	"""Print a formatted header."""
	print("\n" + "="*60)
	print(f" {title} ")
	print("="*60)

	def test_gpu_environment():
	"""Test GPU environment setup."""
	print_header("1. GPU Environment Test")

	results = {
	"pytorch_available": False,
	"cuda_available": False,
	"gpu_count": 0,
	"gpu_name": None,
	"cuda_version": None
	}

	try:
	import torch
	results["pytorch_available"] = True
	print(f"✓ PyTorch installed: {torch.__version__}")

	if torch.cuda.is_available():
	results["cuda_available"] = True
	results["gpu_count"] = torch.cuda.device_count()
	results["cuda_version"] = torch.version.cuda

	print(f"✓ CUDA available: {results['cuda_version']}")
	print(f"✓ GPU count: {results['gpu_count']}")

	for i in range(results["gpu_count"]):
	gpu_name = torch.cuda.get_device_name(i)
	results["gpu_name"] = gpu_name
	print(f"✓ GPU {i}: {gpu_name}")

	# Memory info
	props = torch.cuda.get_device_properties(i)
	total_memory = props.total_memory / (1024**3)
	print(f" - Total memory: {total_memory:.1f} GB")
	print(f" - Compute capability: {props.major}.{props.minor}")
	else:
	print("✗ CUDA not available")

	except ImportError:
	print("✗ PyTorch not installed")
	except Exception as e:
	print(f"✗ Error: {e}")

	return results

	def test_spacy_gpu_configuration():
	"""Test SpaCy GPU configuration."""
	print_header("2. SpaCy GPU Configuration Test")

	results = {
	"spacy_gpu_enabled": False,
	"transformer_packages": []
	}

	try:
	# Test GPU preference
	import torch
	if torch.cuda.is_available():
	torch.cuda.set_device(0)
	print(f"✓ Set CUDA device to 0")

	gpu_id = spacy.prefer_gpu(0)
	if gpu_id is not False:
	results["spacy_gpu_enabled"] = True
	print(f"✓ SpaCy GPU enabled on device {gpu_id}")
	else:
	print("✗ SpaCy GPU not enabled")

	# Check packages
	try:
	import spacy_transformers
	results["transformer_packages"].append("spacy-transformers")
	except ImportError:
	pass

	try:
	import spacy_curated_transformers
	results["transformer_packages"].append("spacy-curated-transformers")
	except ImportError:
	pass

	if results["transformer_packages"]:
	print(f"✓ Transformer packages: {', '.join(results['transformer_packages'])}")
	else:
	print("✗ No transformer packages found")

	except Exception as e:
	print(f"✗ Error: {e}")

	return results

	def test_model_gpu_loading():
	"""Test loading models with GPU support."""
	print_header("3. Model GPU Loading Test")

	results = {
	"model_loaded": False,
	"gpu_verified": False,
	"components_on_gpu": [],
	"processing_works": False
	}

	try:
	# Initialize analyzer with transformer model
	print("Loading English transformer model...")
	analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
	results["model_loaded"] = True

	# Check model info
	model_info = analyzer.get_model_info()
	print(f"✓ Model loaded: {model_info['name']}")
	print(f" Device: {model_info['device']}")
	print(f" GPU enabled: {model_info['gpu_enabled']}")

	# Verify GPU usage at component level
	if hasattr(analyzer, 'nlp') and analyzer.nlp:
	for pipe_name, pipe in analyzer.nlp.pipeline:
	if hasattr(pipe, 'model'):
	is_on_gpu = False

	# Check if model has parameters on GPU
	if hasattr(pipe.model, 'parameters'):
	try:
	for param in pipe.model.parameters():
	if param.is_cuda:
	is_on_gpu = True
	break
	except:
	pass

	if is_on_gpu:
	results["components_on_gpu"].append(pipe_name)
	print(f"✓ Component '{pipe_name}' is on GPU")
	else:
	print(f"✗ Component '{pipe_name}' is on CPU")

	if results["components_on_gpu"]:
	results["gpu_verified"] = True

	# Test processing
	print("\nTesting text processing...")
	test_text = "The quick brown fox jumps over the lazy dog."
	doc = analyzer.process_document(test_text)
	results["processing_works"] = True
	print(f"✓ Processed {len(doc)} tokens successfully")

	except Exception as e:
	print(f"✗ Error: {e}")
	import traceback
	traceback.print_exc()

	return results

	def test_gpu_performance():
	"""Test GPU performance improvement."""
	print_header("4. GPU Performance Test")

	# Generate test data
	test_texts = [
	"The quick brown fox jumps over the lazy dog. " * 20
	for _ in range(5)
	]

	results = {
	"gpu_time": None,
	"cpu_time": None,
	"speedup": None
	}

	try:
	# Test with GPU
	print("Testing GPU performance...")
	analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf")

	# Warm up
	_ = analyzer_gpu.process_document(test_texts[0])

	# Measure
	start_time = time.time()
	for text in test_texts:
	_ = analyzer_gpu.process_document(text)
	results["gpu_time"] = time.time() - start_time
	print(f"✓ GPU processing time: {results['gpu_time']:.2f} seconds")

	# Test with CPU
	print("\nTesting CPU performance...")
	analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1)

	# Warm up
	_ = analyzer_cpu.process_document(test_texts[0])

	# Measure
	start_time = time.time()
	for text in test_texts:
	_ = analyzer_cpu.process_document(text)
	results["cpu_time"] = time.time() - start_time
	print(f"✓ CPU processing time: {results['cpu_time']:.2f} seconds")

	# Calculate speedup
	if results["gpu_time"] and results["cpu_time"]:
	results["speedup"] = results["cpu_time"] / results["gpu_time"]
	print(f"\n✓ GPU speedup: {results['speedup']:.2f}x faster")

	except Exception as e:
	print(f"✗ Performance test error: {e}")

	return results

	def test_memory_usage():
	"""Test GPU memory usage."""
	print_header("5. GPU Memory Usage Test")

	if not torch.cuda.is_available():
	print("✗ CUDA not available, skipping memory test")
	return {}

	results = {
	"before_load": None,
	"after_load": None,
	"after_process": None
	}

	try:
	# Clear cache
	torch.cuda.empty_cache()

	# Measure before loading
	results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3)
	print(f"Memory before model load: {results['before_load']:.2f} GB")

	# Load model
	analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
	results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3)
	print(f"Memory after model load: {results['after_load']:.2f} GB")
	print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB")

	# Process text
	long_text = " ".join(["This is a test sentence." for _ in range(100)])
	_ = analyzer.process_document(long_text)
	results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3)
	print(f"Memory after processing: {results['after_process']:.2f} GB")

	# Clean up
	del analyzer
	torch.cuda.empty_cache()

	except Exception as e:
	print(f"✗ Memory test error: {e}")

	return results

	def main():
	"""Run all GPU integration tests."""
	print("="*60)
	print(" GPU Integration Test Suite ")
	print("="*60)

	all_results = {}

	# Run tests
	all_results["environment"] = test_gpu_environment()
	all_results["spacy_config"] = test_spacy_gpu_configuration()
	all_results["model_loading"] = test_model_gpu_loading()

	# Only run performance tests if GPU is available
	if all_results["environment"]["cuda_available"]:
	all_results["performance"] = test_gpu_performance()
	all_results["memory"] = test_memory_usage()

	# Summary
	print_header("Test Summary")

	# Check if GPU is working
	gpu_working = (
	all_results["environment"]["cuda_available"] and
	all_results["spacy_config"]["spacy_gpu_enabled"] and
	all_results["model_loading"]["gpu_verified"]
	)

	if gpu_working:
	print("✅ GPU INTEGRATION SUCCESSFUL")
	print(f" - PyTorch CUDA: {all_results['environment']['cuda_version']}")
	print(f" - GPU: {all_results['environment']['gpu_name']}")
	print(f" - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}")

	if "performance" in all_results and all_results["performance"]["speedup"]:
	print(f" - Performance speedup: {all_results['performance']['speedup']:.2f}x")
	else:
	print("❌ GPU INTEGRATION FAILED")
	print("\nIssues detected:")

	if not all_results["environment"]["cuda_available"]:
	print(" - CUDA not available (check PyTorch installation)")

	if not all_results["spacy_config"]["spacy_gpu_enabled"]:
	print(" - SpaCy GPU not enabled")

	if not all_results["model_loading"]["gpu_verified"]:
	print(" - Model components not on GPU")

	print("\n" + "="*60)

	if __name__ == "__main__":
	main()