File size: 10,845 Bytes
4d2898f
 
a12eec8
 
4d2898f
 
 
a12eec8
 
 
 
 
4d2898f
a12eec8
 
 
 
 
4d2898f
a12eec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d2898f
a12eec8
 
 
4d2898f
a12eec8
 
 
 
4d2898f
 
a12eec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d2898f
a12eec8
 
 
4d2898f
a12eec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d2898f
a12eec8
 
 
 
4d2898f
 
a12eec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d2898f
 
a12eec8
 
 
 
 
 
 
 
 
 
 
 
4d2898f
a12eec8
 
 
 
 
 
 
 
 
4d2898f
 
a12eec8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d2898f
a12eec8
4d2898f
 
a12eec8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#!/usr/bin/env python3
"""
Comprehensive GPU integration test for the text analyzer.
Tests the entire GPU pipeline from configuration to model usage.
"""

import sys
import time
import torch
import spacy
from text_analyzer.base_analyzer import BaseAnalyzer
from text_analyzer.lexical_sophistication import LexicalSophisticationAnalyzer

def print_header(title):
    """Print a formatted header."""
    print("\n" + "="*60)
    print(f" {title} ")
    print("="*60)

def test_gpu_environment():
    """Test GPU environment setup."""
    print_header("1. GPU Environment Test")
    
    results = {
        "pytorch_available": False,
        "cuda_available": False,
        "gpu_count": 0,
        "gpu_name": None,
        "cuda_version": None
    }
    
    try:
        import torch
        results["pytorch_available"] = True
        print(f"βœ“ PyTorch installed: {torch.__version__}")
        
        if torch.cuda.is_available():
            results["cuda_available"] = True
            results["gpu_count"] = torch.cuda.device_count()
            results["cuda_version"] = torch.version.cuda
            
            print(f"βœ“ CUDA available: {results['cuda_version']}")
            print(f"βœ“ GPU count: {results['gpu_count']}")
            
            for i in range(results["gpu_count"]):
                gpu_name = torch.cuda.get_device_name(i)
                results["gpu_name"] = gpu_name
                print(f"βœ“ GPU {i}: {gpu_name}")
                
                # Memory info
                props = torch.cuda.get_device_properties(i)
                total_memory = props.total_memory / (1024**3)
                print(f"  - Total memory: {total_memory:.1f} GB")
                print(f"  - Compute capability: {props.major}.{props.minor}")
        else:
            print("βœ— CUDA not available")
            
    except ImportError:
        print("βœ— PyTorch not installed")
    except Exception as e:
        print(f"βœ— Error: {e}")
    
    return results

def test_spacy_gpu_configuration():
    """Test SpaCy GPU configuration."""
    print_header("2. SpaCy GPU Configuration Test")
    
    results = {
        "spacy_gpu_enabled": False,
        "transformer_packages": []
    }
    
    try:
        # Test GPU preference
        import torch
        if torch.cuda.is_available():
            torch.cuda.set_device(0)
            print(f"βœ“ Set CUDA device to 0")
        
        gpu_id = spacy.prefer_gpu(0)
        if gpu_id is not False:
            results["spacy_gpu_enabled"] = True
            print(f"βœ“ SpaCy GPU enabled on device {gpu_id}")
        else:
            print("βœ— SpaCy GPU not enabled")
        
        # Check packages
        try:
            import spacy_transformers
            results["transformer_packages"].append("spacy-transformers")
        except ImportError:
            pass
        
        try:
            import spacy_curated_transformers
            results["transformer_packages"].append("spacy-curated-transformers")
        except ImportError:
            pass
        
        if results["transformer_packages"]:
            print(f"βœ“ Transformer packages: {', '.join(results['transformer_packages'])}")
        else:
            print("βœ— No transformer packages found")
            
    except Exception as e:
        print(f"βœ— Error: {e}")
    
    return results

def test_model_gpu_loading():
    """Test loading models with GPU support."""
    print_header("3. Model GPU Loading Test")
    
    results = {
        "model_loaded": False,
        "gpu_verified": False,
        "components_on_gpu": [],
        "processing_works": False
    }
    
    try:
        # Initialize analyzer with transformer model
        print("Loading English transformer model...")
        analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        results["model_loaded"] = True
        
        # Check model info
        model_info = analyzer.get_model_info()
        print(f"βœ“ Model loaded: {model_info['name']}")
        print(f"  Device: {model_info['device']}")
        print(f"  GPU enabled: {model_info['gpu_enabled']}")
        
        # Verify GPU usage at component level
        if hasattr(analyzer, 'nlp') and analyzer.nlp:
            for pipe_name, pipe in analyzer.nlp.pipeline:
                if hasattr(pipe, 'model'):
                    is_on_gpu = False
                    
                    # Check if model has parameters on GPU
                    if hasattr(pipe.model, 'parameters'):
                        try:
                            for param in pipe.model.parameters():
                                if param.is_cuda:
                                    is_on_gpu = True
                                    break
                        except:
                            pass
                    
                    if is_on_gpu:
                        results["components_on_gpu"].append(pipe_name)
                        print(f"βœ“ Component '{pipe_name}' is on GPU")
                    else:
                        print(f"βœ— Component '{pipe_name}' is on CPU")
            
            if results["components_on_gpu"]:
                results["gpu_verified"] = True
        
        # Test processing
        print("\nTesting text processing...")
        test_text = "The quick brown fox jumps over the lazy dog."
        doc = analyzer.process_document(test_text)
        results["processing_works"] = True
        print(f"βœ“ Processed {len(doc)} tokens successfully")
        
    except Exception as e:
        print(f"βœ— Error: {e}")
        import traceback
        traceback.print_exc()
    
    return results

def test_gpu_performance():
    """Test GPU performance improvement."""
    print_header("4. GPU Performance Test")
    
    # Generate test data
    test_texts = [
        "The quick brown fox jumps over the lazy dog. " * 20
        for _ in range(5)
    ]
    
    results = {
        "gpu_time": None,
        "cpu_time": None,
        "speedup": None
    }
    
    try:
        # Test with GPU
        print("Testing GPU performance...")
        analyzer_gpu = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        
        # Warm up
        _ = analyzer_gpu.process_document(test_texts[0])
        
        # Measure
        start_time = time.time()
        for text in test_texts:
            _ = analyzer_gpu.process_document(text)
        results["gpu_time"] = time.time() - start_time
        print(f"βœ“ GPU processing time: {results['gpu_time']:.2f} seconds")
        
        # Test with CPU
        print("\nTesting CPU performance...")
        analyzer_cpu = LexicalSophisticationAnalyzer(language="en", model_size="trf", gpu_device=-1)
        
        # Warm up
        _ = analyzer_cpu.process_document(test_texts[0])
        
        # Measure
        start_time = time.time()
        for text in test_texts:
            _ = analyzer_cpu.process_document(text)
        results["cpu_time"] = time.time() - start_time
        print(f"βœ“ CPU processing time: {results['cpu_time']:.2f} seconds")
        
        # Calculate speedup
        if results["gpu_time"] and results["cpu_time"]:
            results["speedup"] = results["cpu_time"] / results["gpu_time"]
            print(f"\nβœ“ GPU speedup: {results['speedup']:.2f}x faster")
        
    except Exception as e:
        print(f"βœ— Performance test error: {e}")
    
    return results

def test_memory_usage():
    """Test GPU memory usage."""
    print_header("5. GPU Memory Usage Test")
    
    if not torch.cuda.is_available():
        print("βœ— CUDA not available, skipping memory test")
        return {}
    
    results = {
        "before_load": None,
        "after_load": None,
        "after_process": None
    }
    
    try:
        # Clear cache
        torch.cuda.empty_cache()
        
        # Measure before loading
        results["before_load"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory before model load: {results['before_load']:.2f} GB")
        
        # Load model
        analyzer = LexicalSophisticationAnalyzer(language="en", model_size="trf")
        results["after_load"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory after model load: {results['after_load']:.2f} GB")
        print(f"Model uses: {results['after_load'] - results['before_load']:.2f} GB")
        
        # Process text
        long_text = " ".join(["This is a test sentence." for _ in range(100)])
        _ = analyzer.process_document(long_text)
        results["after_process"] = torch.cuda.memory_allocated(0) / (1024**3)
        print(f"Memory after processing: {results['after_process']:.2f} GB")
        
        # Clean up
        del analyzer
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"βœ— Memory test error: {e}")
    
    return results

def main():
    """Run all GPU integration tests."""
    print("="*60)
    print(" GPU Integration Test Suite ")
    print("="*60)
    
    all_results = {}
    
    # Run tests
    all_results["environment"] = test_gpu_environment()
    all_results["spacy_config"] = test_spacy_gpu_configuration()
    all_results["model_loading"] = test_model_gpu_loading()
    
    # Only run performance tests if GPU is available
    if all_results["environment"]["cuda_available"]:
        all_results["performance"] = test_gpu_performance()
        all_results["memory"] = test_memory_usage()
    
    # Summary
    print_header("Test Summary")
    
    # Check if GPU is working
    gpu_working = (
        all_results["environment"]["cuda_available"] and
        all_results["spacy_config"]["spacy_gpu_enabled"] and
        all_results["model_loading"]["gpu_verified"]
    )
    
    if gpu_working:
        print("βœ… GPU INTEGRATION SUCCESSFUL")
        print(f"  - PyTorch CUDA: {all_results['environment']['cuda_version']}")
        print(f"  - GPU: {all_results['environment']['gpu_name']}")
        print(f"  - Components on GPU: {', '.join(all_results['model_loading']['components_on_gpu'])}")
        
        if "performance" in all_results and all_results["performance"]["speedup"]:
            print(f"  - Performance speedup: {all_results['performance']['speedup']:.2f}x")
    else:
        print("❌ GPU INTEGRATION FAILED")
        print("\nIssues detected:")
        
        if not all_results["environment"]["cuda_available"]:
            print("  - CUDA not available (check PyTorch installation)")
        
        if not all_results["spacy_config"]["spacy_gpu_enabled"]:
            print("  - SpaCy GPU not enabled")
        
        if not all_results["model_loading"]["gpu_verified"]:
            print("  - Model components not on GPU")
    
    print("\n" + "="*60)

if __name__ == "__main__":
    main()