File size: 9,845 Bytes
7b7db64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/usr/bin/env python3
"""
CUDA Test Script for Speech Transcription App

This script helps users verify their CUDA setup and test performance
between CPU and GPU configurations.

Usage:
    python test_cuda.py
"""

import os
import sys
import time
import torch
import numpy as np
from dotenv import load_dotenv

def print_header(title):
    """Print a formatted header"""
    print("\n" + "=" * 60)
    print(f" {title}")
    print("=" * 60)

def print_section(title):
    """Print a formatted section header"""
    print(f"\nπŸ” {title}")
    print("-" * 40)

def test_pytorch_cuda():
    """Test PyTorch CUDA availability and performance"""
    print_section("PyTorch CUDA Test")

    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")

    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
        print(f"cuDNN version: {torch.backends.cudnn.version()}")
        print(f"Number of CUDA devices: {torch.cuda.device_count()}")

        for i in range(torch.cuda.device_count()):
            props = torch.cuda.get_device_properties(i)
            print(f"Device {i}: {props.name}")
            print(f"  Memory: {props.total_memory / 1e9:.1f} GB")
            print(f"  Compute capability: {props.major}.{props.minor}")
    else:
        print("❌ CUDA not available")
        return False

    return True

def test_transformers_device():
    """Test transformers library device detection"""
    print_section("Transformers Device Test")

    try:
        from transformers import pipeline

        # Test with CPU
        print("Testing CPU pipeline...")
        start_time = time.time()
        pipe_cpu = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
        result_cpu = pipe_cpu("This is a test sentence")
        cpu_time = time.time() - start_time
        print(f"βœ… CPU pipeline loaded in {cpu_time:.2f}s")
        print(f"Result: {result_cpu}")

        # Test with CUDA if available
        if torch.cuda.is_available():
            print("\nTesting CUDA pipeline...")
            start_time = time.time()
            pipe_cuda = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=0)
            result_cuda = pipe_cuda("This is a test sentence")
            cuda_time = time.time() - start_time
            print(f"βœ… CUDA pipeline loaded in {cuda_time:.2f}s")
            print(f"Result: {result_cuda}")

            speedup = cpu_time / cuda_time if cuda_time > 0 else 0
            print(f"\nπŸš€ Speedup: {speedup:.2f}x faster with CUDA")

        return True

    except Exception as e:
        print(f"❌ Error testing transformers: {e}")
        return False

def test_whisper_models():
    """Test Whisper model loading with different devices"""
    print_section("Whisper Model Test")

    try:
        from faster_whisper import WhisperModel

        # Test CPU model
        print("Testing Whisper on CPU...")
        start_time = time.time()
        model_cpu = WhisperModel("tiny.en", device="cpu", compute_type="int8")
        cpu_load_time = time.time() - start_time
        print(f"βœ… CPU model loaded in {cpu_load_time:.2f}s")

        # Test CUDA model if available
        if torch.cuda.is_available():
            print("\nTesting Whisper on CUDA...")
            start_time = time.time()
            try:
                model_cuda = WhisperModel("tiny.en", device="cuda", compute_type="float16")
                cuda_load_time = time.time() - start_time
                print(f"βœ… CUDA model loaded in {cuda_load_time:.2f}s")

                speedup = cpu_load_time / cuda_load_time if cuda_load_time > 0 else 0
                print(f"πŸš€ Load speedup: {speedup:.2f}x faster with CUDA")

            except Exception as e:
                print(f"❌ Error loading CUDA model: {e}")
                return False

        return True

    except ImportError:
        print("❌ faster-whisper not installed")
        return False
    except Exception as e:
        print(f"❌ Error testing Whisper: {e}")
        return False

def test_memory_usage():
    """Test GPU memory usage"""
    print_section("GPU Memory Test")

    if not torch.cuda.is_available():
        print("❌ CUDA not available for memory test")
        return False

    # Get initial memory
    torch.cuda.empty_cache()
    initial_memory = torch.cuda.memory_allocated()
    total_memory = torch.cuda.get_device_properties(0).total_memory

    print(f"Total GPU memory: {total_memory / 1e9:.1f} GB")
    print(f"Initial memory usage: {initial_memory / 1e6:.1f} MB")

    # Create a large tensor to test memory
    try:
        test_tensor = torch.randn(1000, 1000, device="cuda")
        allocated_memory = torch.cuda.memory_allocated()
        print(f"Memory after tensor allocation: {allocated_memory / 1e6:.1f} MB")
        print(f"Available memory: {(total_memory - allocated_memory) / 1e9:.1f} GB")

        # Clean up
        del test_tensor
        torch.cuda.empty_cache()
        print("βœ… Memory test completed")
        return True

    except Exception as e:
        print(f"❌ Memory test failed: {e}")
        return False

def test_environment_config():
    """Test environment configuration"""
    print_section("Environment Configuration Test")

    # Load .env file if it exists
    env_file = os.path.join(os.path.dirname(__file__), '.env')
    if os.path.exists(env_file):
        load_dotenv(env_file)
        print(f"βœ… Found .env file: {env_file}")
    else:
        print(f"ℹ️  No .env file found at: {env_file}")
        print("   Create one from .env.example to configure CUDA usage")

    # Check USE_CUDA setting
    use_cuda = os.getenv('USE_CUDA', 'false').lower() == 'true'
    print(f"USE_CUDA environment variable: {os.getenv('USE_CUDA', 'false')}")
    print(f"Parsed USE_CUDA value: {use_cuda}")

    # Test config import
    try:
        sys.path.append(os.path.dirname(__file__))
        from config import config
        print("βœ… Config module imported successfully")

        device_info = config.get_device_info()
        print(f"Selected device: {device_info['device']}")
        print(f"Compute type: {device_info['compute_type']}")

        return True

    except Exception as e:
        print(f"❌ Error importing config: {e}")
        return False

def run_performance_benchmark():
    """Run a simple performance benchmark"""
    print_section("Performance Benchmark")

    if not torch.cuda.is_available():
        print("❌ CUDA not available for benchmark")
        return

    # Matrix multiplication benchmark
    size = 2000
    iterations = 5

    print(f"Running {iterations} matrix multiplications ({size}x{size})...")

    # CPU benchmark
    print("\nCPU benchmark:")
    cpu_times = []
    for i in range(iterations):
        a = torch.randn(size, size)
        b = torch.randn(size, size)

        start_time = time.time()
        c = torch.mm(a, b)
        cpu_time = time.time() - start_time
        cpu_times.append(cpu_time)
        print(f"  Iteration {i+1}: {cpu_time:.3f}s")

    avg_cpu_time = sum(cpu_times) / len(cpu_times)
    print(f"Average CPU time: {avg_cpu_time:.3f}s")

    # CUDA benchmark
    print("\nCUDA benchmark:")
    cuda_times = []
    for i in range(iterations):
        a = torch.randn(size, size, device="cuda")
        b = torch.randn(size, size, device="cuda")

        torch.cuda.synchronize()  # Wait for GPU
        start_time = time.time()
        c = torch.mm(a, b)
        torch.cuda.synchronize()  # Wait for GPU
        cuda_time = time.time() - start_time
        cuda_times.append(cuda_time)
        print(f"  Iteration {i+1}: {cuda_time:.3f}s")

    avg_cuda_time = sum(cuda_times) / len(cuda_times)
    print(f"Average CUDA time: {avg_cuda_time:.3f}s")

    speedup = avg_cpu_time / avg_cuda_time
    print(f"\nπŸš€ Overall speedup: {speedup:.2f}x faster with CUDA")

def main():
    """Main test function"""
    print_header("CUDA Configuration Test for Speech Transcription App")

    print("This script will test your CUDA setup and help you configure")
    print("the speech transcription app for optimal performance.")

    # Run tests
    tests_passed = 0
    total_tests = 5

    if test_pytorch_cuda():
        tests_passed += 1

    if test_transformers_device():
        tests_passed += 1

    if test_whisper_models():
        tests_passed += 1

    if test_memory_usage():
        tests_passed += 1

    if test_environment_config():
        tests_passed += 1

    # Performance benchmark (optional)
    if torch.cuda.is_available():
        try:
            run_performance_benchmark()
        except Exception as e:
            print(f"❌ Benchmark failed: {e}")

    # Summary
    print_header("Test Summary")
    print(f"Tests passed: {tests_passed}/{total_tests}")

    if tests_passed == total_tests and torch.cuda.is_available():
        print("πŸŽ‰ All tests passed! Your CUDA setup is working correctly.")
        print("\nTo enable CUDA acceleration:")
        print("1. Create a .env file (copy from .env.example)")
        print("2. Set USE_CUDA=true in the .env file")
        print("3. Run the speech transcription app")
    elif torch.cuda.is_available():
        print("⚠️  Some tests failed. Check the error messages above.")
        print("You may still be able to use CUDA, but with potential issues.")
    else:
        print("ℹ️  CUDA not available. The app will run on CPU.")
        print("This is perfectly fine for most use cases!")

    print("\nFor CPU usage (always works):")
    print("1. Create a .env file (copy from .env.example)")
    print("2. Set USE_CUDA=false in the .env file")
    print("3. Run the speech transcription app")

if __name__ == "__main__":
    main()