Brightcodelab commited on
Commit
c1a9cc1
·
verified ·
1 Parent(s): 57574f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -147
app.py CHANGED
@@ -1,153 +1,22 @@
1
  import os
 
2
  import torch
3
- import time
4
- import json
5
  import subprocess
6
 
7
- def test_gpu():
8
- """Run a comprehensive GPU test and return detailed results"""
9
- results = {
10
- "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
11
- "gpu_available": False,
12
- "gpu_count": 0,
13
- "gpus": [],
14
- "cuda_version": None,
15
- "torch_version": torch.__version__,
16
- "tests_passed": False,
17
- "errors": [],
18
- "performance": None
19
- }
20
-
21
- # Check if CUDA is available
22
- try:
23
- results["gpu_available"] = torch.cuda.is_available()
24
- if not results["gpu_available"]:
25
- results["errors"].append("CUDA is not available")
26
- return results
27
-
28
- # Get GPU count and info
29
- results["gpu_count"] = torch.cuda.device_count()
30
- results["cuda_version"] = torch.version.cuda
31
-
32
- for i in range(results["gpu_count"]):
33
- props = torch.cuda.get_device_properties(i)
34
- gpu_info = {
35
- "index": i,
36
- "name": props.name,
37
- "total_memory_gb": round(props.total_memory / (1024**3), 2),
38
- "compute_capability": f"{props.major}.{props.minor}"
39
- }
40
- results["gpus"].append(gpu_info)
41
-
42
- # Try to get VRAM usage with nvidia-smi
43
- try:
44
- output = subprocess.check_output(['nvidia-smi', '--query-gpu=index,memory.used,memory.total,utilization.gpu', '--format=csv,noheader,nounits'], text=True)
45
- for line in output.strip().split('\n'):
46
- if line.strip():
47
- parts = line.split(',')
48
- if len(parts) >= 3:
49
- idx = int(parts[0])
50
- mem_used = float(parts[1].strip())
51
- mem_total = float(parts[2].strip())
52
- util = float(parts[3].strip()) if len(parts) > 3 else 0
53
-
54
- # Update the corresponding entry in gpu_info
55
- for gpu in results["gpus"]:
56
- if gpu["index"] == idx:
57
- gpu["memory_used_gb"] = round(mem_used / 1024, 2)
58
- gpu["utilization"] = util
59
- break
60
- except (subprocess.SubprocessError, FileNotFoundError):
61
- # nvidia-smi not available, we'll continue without this info
62
- pass
63
-
64
- # Run a simple computation test
65
- device = torch.device("cuda")
66
-
67
- # Matrix multiplication test
68
- start_time = time.time()
69
- matrix_size = 5000
70
- a = torch.randn(matrix_size, matrix_size, device=device)
71
- b = torch.randn(matrix_size, matrix_size, device=device)
72
- torch.cuda.synchronize() # Wait for GPU operation to complete
73
-
74
- # Perform matrix multiplication
75
- start_compute = time.time()
76
- c = torch.matmul(a, b)
77
- torch.cuda.synchronize()
78
- end_compute = time.time()
79
-
80
- # Access a value to ensure computation completed
81
- _ = c[0, 0].item()
82
-
83
- end_time = time.time()
84
-
85
- # Record performance metrics
86
- results["performance"] = {
87
- "matrix_size": matrix_size,
88
- "total_time_ms": round((end_time - start_time) * 1000, 2),
89
- "computation_time_ms": round((end_compute - start_compute) * 1000, 2)
90
- }
91
-
92
- # Simple CUDA kernel launch test
93
- try:
94
- x = torch.ones(10, device=device)
95
- y = x + 1
96
- assert y.cpu().numpy().all() == 2
97
- except Exception as e:
98
- results["errors"].append(f"CUDA kernel test failed: {str(e)}")
99
- return results
100
-
101
- # All tests passed
102
- results["tests_passed"] = True
103
-
104
- except Exception as e:
105
- results["errors"].append(f"Test failed: {str(e)}")
106
-
107
- return results
108
 
109
- def main():
110
- print("======== GPU TEST STARTING ========")
111
- results = test_gpu()
112
-
113
- # Print results
114
- print(f"\nTimestamp: {results['timestamp']}")
115
- print(f"PyTorch version: {results['torch_version']}")
116
- print(f"CUDA version: {results['cuda_version']}")
117
- print(f"GPU available: {results['gpu_available']}")
118
-
119
- if results['gpu_available']:
120
- print(f"Found {results['gpu_count']} GPU(s)")
121
- for gpu in results['gpus']:
122
- print(f" GPU {gpu['index']}: {gpu['name']} ({gpu['total_memory_gb']}GB)")
123
- if 'memory_used_gb' in gpu:
124
- print(f" Memory used: {gpu['memory_used_gb']}GB")
125
- if 'utilization' in gpu:
126
- print(f" Utilization: {gpu['utilization']}%")
127
-
128
- if results['performance']:
129
- perf = results['performance']
130
- print(f"\nPerformance test ({perf['matrix_size']}x{perf['matrix_size']} matrix multiplication):")
131
- print(f" Total time: {perf['total_time_ms']}ms")
132
- print(f" Computation time: {perf['computation_time_ms']}ms")
133
-
134
- if results['errors']:
135
- print("\nErrors:")
136
- for error in results['errors']:
137
- print(f" - {error}")
138
-
139
- print(f"\nTests passed: {results['tests_passed']}")
140
- print("\n======== GPU TEST COMPLETE ========")
141
-
142
- # Save results to file
143
- with open("gpu_test_results.json", "w") as f:
144
- json.dump(results, f, indent=2)
145
-
146
- print("\nResults saved to gpu_test_results.json")
147
-
148
- # Return exit code based on test results
149
- return 0 if results["tests_passed"] else 1
150
 
151
- if __name__ == "__main__":
152
- exit_code = main()
153
- exit(exit_code)
 
1
  import os
2
+ import gradio as gr
3
  import torch
 
 
4
  import subprocess
5
 
6
+ print("===== Space Hardware Check =====")
7
+ print(f"CUDA available: {torch.cuda.is_available()}")
8
+ if torch.cuda.is_available():
9
+ print(f"GPU count: {torch.cuda.device_count()}")
10
+ for i in range(torch.cuda.device_count()):
11
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
12
+ else:
13
+ print("No GPU detected by PyTorch")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ try:
16
+ nvidia_output = subprocess.check_output("nvidia-smi", shell=True).decode()
17
+ print("\nNVIDIA-SMI output:")
18
+ print(nvidia_output)
19
+ except Exception as e:
20
+ print(f"nvidia-smi error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # Then your regular Gradio app code...