Spaces:
Sleeping
Sleeping
File size: 9,845 Bytes
7b7db64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 |
#!/usr/bin/env python3
"""
CUDA Test Script for Speech Transcription App
This script helps users verify their CUDA setup and test performance
between CPU and GPU configurations.
Usage:
python test_cuda.py
"""
import os
import sys
import time
import torch
import numpy as np
from dotenv import load_dotenv
def print_header(title):
"""Print a formatted header"""
print("\n" + "=" * 60)
print(f" {title}")
print("=" * 60)
def print_section(title):
"""Print a formatted section header"""
print(f"\nπ {title}")
print("-" * 40)
def test_pytorch_cuda():
"""Test PyTorch CUDA availability and performance"""
print_section("PyTorch CUDA Test")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
props = torch.cuda.get_device_properties(i)
print(f"Device {i}: {props.name}")
print(f" Memory: {props.total_memory / 1e9:.1f} GB")
print(f" Compute capability: {props.major}.{props.minor}")
else:
print("β CUDA not available")
return False
return True
def test_transformers_device():
"""Test transformers library device detection"""
print_section("Transformers Device Test")
try:
from transformers import pipeline
# Test with CPU
print("Testing CPU pipeline...")
start_time = time.time()
pipe_cpu = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=-1)
result_cpu = pipe_cpu("This is a test sentence")
cpu_time = time.time() - start_time
print(f"β
CPU pipeline loaded in {cpu_time:.2f}s")
print(f"Result: {result_cpu}")
# Test with CUDA if available
if torch.cuda.is_available():
print("\nTesting CUDA pipeline...")
start_time = time.time()
pipe_cuda = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", device=0)
result_cuda = pipe_cuda("This is a test sentence")
cuda_time = time.time() - start_time
print(f"β
CUDA pipeline loaded in {cuda_time:.2f}s")
print(f"Result: {result_cuda}")
speedup = cpu_time / cuda_time if cuda_time > 0 else 0
print(f"\nπ Speedup: {speedup:.2f}x faster with CUDA")
return True
except Exception as e:
print(f"β Error testing transformers: {e}")
return False
def test_whisper_models():
"""Test Whisper model loading with different devices"""
print_section("Whisper Model Test")
try:
from faster_whisper import WhisperModel
# Test CPU model
print("Testing Whisper on CPU...")
start_time = time.time()
model_cpu = WhisperModel("tiny.en", device="cpu", compute_type="int8")
cpu_load_time = time.time() - start_time
print(f"β
CPU model loaded in {cpu_load_time:.2f}s")
# Test CUDA model if available
if torch.cuda.is_available():
print("\nTesting Whisper on CUDA...")
start_time = time.time()
try:
model_cuda = WhisperModel("tiny.en", device="cuda", compute_type="float16")
cuda_load_time = time.time() - start_time
print(f"β
CUDA model loaded in {cuda_load_time:.2f}s")
speedup = cpu_load_time / cuda_load_time if cuda_load_time > 0 else 0
print(f"π Load speedup: {speedup:.2f}x faster with CUDA")
except Exception as e:
print(f"β Error loading CUDA model: {e}")
return False
return True
except ImportError:
print("β faster-whisper not installed")
return False
except Exception as e:
print(f"β Error testing Whisper: {e}")
return False
def test_memory_usage():
"""Test GPU memory usage"""
print_section("GPU Memory Test")
if not torch.cuda.is_available():
print("β CUDA not available for memory test")
return False
# Get initial memory
torch.cuda.empty_cache()
initial_memory = torch.cuda.memory_allocated()
total_memory = torch.cuda.get_device_properties(0).total_memory
print(f"Total GPU memory: {total_memory / 1e9:.1f} GB")
print(f"Initial memory usage: {initial_memory / 1e6:.1f} MB")
# Create a large tensor to test memory
try:
test_tensor = torch.randn(1000, 1000, device="cuda")
allocated_memory = torch.cuda.memory_allocated()
print(f"Memory after tensor allocation: {allocated_memory / 1e6:.1f} MB")
print(f"Available memory: {(total_memory - allocated_memory) / 1e9:.1f} GB")
# Clean up
del test_tensor
torch.cuda.empty_cache()
print("β
Memory test completed")
return True
except Exception as e:
print(f"β Memory test failed: {e}")
return False
def test_environment_config():
"""Test environment configuration"""
print_section("Environment Configuration Test")
# Load .env file if it exists
env_file = os.path.join(os.path.dirname(__file__), '.env')
if os.path.exists(env_file):
load_dotenv(env_file)
print(f"β
Found .env file: {env_file}")
else:
print(f"βΉοΈ No .env file found at: {env_file}")
print(" Create one from .env.example to configure CUDA usage")
# Check USE_CUDA setting
use_cuda = os.getenv('USE_CUDA', 'false').lower() == 'true'
print(f"USE_CUDA environment variable: {os.getenv('USE_CUDA', 'false')}")
print(f"Parsed USE_CUDA value: {use_cuda}")
# Test config import
try:
sys.path.append(os.path.dirname(__file__))
from config import config
print("β
Config module imported successfully")
device_info = config.get_device_info()
print(f"Selected device: {device_info['device']}")
print(f"Compute type: {device_info['compute_type']}")
return True
except Exception as e:
print(f"β Error importing config: {e}")
return False
def run_performance_benchmark():
"""Run a simple performance benchmark"""
print_section("Performance Benchmark")
if not torch.cuda.is_available():
print("β CUDA not available for benchmark")
return
# Matrix multiplication benchmark
size = 2000
iterations = 5
print(f"Running {iterations} matrix multiplications ({size}x{size})...")
# CPU benchmark
print("\nCPU benchmark:")
cpu_times = []
for i in range(iterations):
a = torch.randn(size, size)
b = torch.randn(size, size)
start_time = time.time()
c = torch.mm(a, b)
cpu_time = time.time() - start_time
cpu_times.append(cpu_time)
print(f" Iteration {i+1}: {cpu_time:.3f}s")
avg_cpu_time = sum(cpu_times) / len(cpu_times)
print(f"Average CPU time: {avg_cpu_time:.3f}s")
# CUDA benchmark
print("\nCUDA benchmark:")
cuda_times = []
for i in range(iterations):
a = torch.randn(size, size, device="cuda")
b = torch.randn(size, size, device="cuda")
torch.cuda.synchronize() # Wait for GPU
start_time = time.time()
c = torch.mm(a, b)
torch.cuda.synchronize() # Wait for GPU
cuda_time = time.time() - start_time
cuda_times.append(cuda_time)
print(f" Iteration {i+1}: {cuda_time:.3f}s")
avg_cuda_time = sum(cuda_times) / len(cuda_times)
print(f"Average CUDA time: {avg_cuda_time:.3f}s")
speedup = avg_cpu_time / avg_cuda_time
print(f"\nπ Overall speedup: {speedup:.2f}x faster with CUDA")
def main():
"""Main test function"""
print_header("CUDA Configuration Test for Speech Transcription App")
print("This script will test your CUDA setup and help you configure")
print("the speech transcription app for optimal performance.")
# Run tests
tests_passed = 0
total_tests = 5
if test_pytorch_cuda():
tests_passed += 1
if test_transformers_device():
tests_passed += 1
if test_whisper_models():
tests_passed += 1
if test_memory_usage():
tests_passed += 1
if test_environment_config():
tests_passed += 1
# Performance benchmark (optional)
if torch.cuda.is_available():
try:
run_performance_benchmark()
except Exception as e:
print(f"β Benchmark failed: {e}")
# Summary
print_header("Test Summary")
print(f"Tests passed: {tests_passed}/{total_tests}")
if tests_passed == total_tests and torch.cuda.is_available():
print("π All tests passed! Your CUDA setup is working correctly.")
print("\nTo enable CUDA acceleration:")
print("1. Create a .env file (copy from .env.example)")
print("2. Set USE_CUDA=true in the .env file")
print("3. Run the speech transcription app")
elif torch.cuda.is_available():
print("β οΈ Some tests failed. Check the error messages above.")
print("You may still be able to use CUDA, but with potential issues.")
else:
print("βΉοΈ CUDA not available. The app will run on CPU.")
print("This is perfectly fine for most use cases!")
print("\nFor CPU usage (always works):")
print("1. Create a .env file (copy from .env.example)")
print("2. Set USE_CUDA=false in the .env file")
print("3. Run the speech transcription app")
if __name__ == "__main__":
main()
|