Brightcodelab commited on
Commit
7c7ed5d
·
verified ·
1 Parent(s): c1a9cc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -16
app.py CHANGED
@@ -1,22 +1,97 @@
1
- import os
2
  import gradio as gr
3
  import torch
4
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- print("===== Space Hardware Check =====")
7
- print(f"CUDA available: {torch.cuda.is_available()}")
8
- if torch.cuda.is_available():
9
- print(f"GPU count: {torch.cuda.device_count()}")
10
- for i in range(torch.cuda.device_count()):
11
- print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
12
- else:
13
- print("No GPU detected by PyTorch")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- try:
16
- nvidia_output = subprocess.check_output("nvidia-smi", shell=True).decode()
17
- print("\nNVIDIA-SMI output:")
18
- print(nvidia_output)
19
- except Exception as e:
20
- print(f"nvidia-smi error: {e}")
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Then your regular Gradio app code...
 
 
1
  import gradio as gr
2
  import torch
3
  import subprocess
4
+ import os
5
+ import time
6
+
7
+ def check_gpu():
8
+ results = []
9
+
10
+ # Add timestamp
11
+ results.append(f"Test run at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
12
+
13
+ # Check PyTorch CUDA availability
14
+ results.append(f"PyTorch version: {torch.__version__}")
15
+ results.append(f"CUDA available: {torch.cuda.is_available()}")
16
+
17
+ if torch.cuda.is_available():
18
+ results.append(f"CUDA version: {torch.version.cuda}")
19
+ results.append(f"GPU count: {torch.cuda.device_count()}")
20
+ for i in range(torch.cuda.device_count()):
21
+ props = torch.cuda.get_device_properties(i)
22
+ results.append(f"GPU {i}: {props.name}")
23
+ results.append(f" - Total memory: {props.total_memory / 1024**3:.2f} GB")
24
+ results.append(f" - Compute capability: {props.major}.{props.minor}")
25
+
26
+ # Test a simple CUDA operation
27
+ try:
28
+ x = torch.rand(1000, 1000, device="cuda")
29
+ y = torch.rand(1000, 1000, device="cuda")
30
+ start_time = time.time()
31
+ z = torch.matmul(x, y)
32
+ torch.cuda.synchronize() # Wait for operation to complete
33
+ end_time = time.time()
34
+ results.append(f"Matrix multiplication test: {(end_time - start_time)*1000:.2f} ms")
35
+ results.append("CUDA operations working correctly ✅")
36
+ except Exception as e:
37
+ results.append(f"CUDA operation failed: {e}")
38
+
39
+ # Try nvidia-smi
40
+ try:
41
+ nvidia_smi = subprocess.check_output("nvidia-smi", shell=True).decode()
42
+ results.append("\nNVIDIA-SMI output:")
43
+ results.append(nvidia_smi)
44
+ except Exception as e:
45
+ results.append(f"nvidia-smi error: {e}")
46
+
47
+ # Check environment variables
48
+ cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "Not set")
49
+ results.append(f"\nCUDA_VISIBLE_DEVICES: {cuda_visible_devices}")
50
+
51
+ return "\n".join(results)
52
 
53
+ def test_memory_allocation():
54
+ try:
55
+ # See how much GPU memory we can allocate
56
+ max_memory = 0
57
+ tensors = []
58
+
59
+ for size in [100, 500, 1000, 2000, 4000, 8000]:
60
+ try:
61
+ # Try to allocate a tensor of increasing size
62
+ tensor = torch.rand(size, size, device="cuda")
63
+ tensors.append(tensor)
64
+ memory_allocated = torch.cuda.memory_allocated() / (1024**3) # Convert to GB
65
+ max_memory = memory_allocated
66
+ result = f"Successfully allocated {size}x{size} tensor. Total memory: {memory_allocated:.2f} GB"
67
+ except Exception as e:
68
+ result = f"Failed to allocate {size}x{size} tensor: {e}"
69
+ break
70
+
71
+ # Clean up
72
+ tensors = None
73
+ torch.cuda.empty_cache()
74
+
75
+ return f"Maximum GPU memory allocated: {max_memory:.2f} GB\n{result}"
76
+ except Exception as e:
77
+ return f"Memory test failed: {e}"
78
 
79
+ # Create the Gradio interface
80
+ with gr.Blocks(title="GPU Test") as demo:
81
+ gr.Markdown("# GPU Availability Test")
82
+ gr.Markdown("This app checks if GPU/CUDA is available and working in this Hugging Face Space")
83
+
84
+ with gr.Tab("Basic GPU Test"):
85
+ check_btn = gr.Button("Check GPU Status", variant="primary")
86
+ output = gr.Textbox(label="Results", lines=20)
87
+ check_btn.click(fn=check_gpu, outputs=output)
88
+
89
+ with gr.Tab("Memory Test"):
90
+ memory_btn = gr.Button("Test GPU Memory Allocation", variant="primary")
91
+ memory_output = gr.Textbox(label="Memory Test Results", lines=5)
92
+ memory_btn.click(fn=test_memory_allocation, outputs=memory_output)
93
+
94
+ # Auto-run the check on page load
95
+ demo.load(fn=check_gpu, outputs=output)
96
 
97
+ demo.launch()