Spaces:

Brightcodelab
/

THUDM-CogVideoX-5b

Paused

App Files Files Community

Brightcodelab commited on Apr 17

Commit

7c7ed5d

verified ·

1 Parent(s): c1a9cc1

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -16

app.py CHANGED Viewed

@@ -1,22 +1,97 @@
-import os
 import gradio as gr
 import torch
 import subprocess
-print("===== Space Hardware Check =====")
-print(f"CUDA available: {torch.cuda.is_available()}")
-if torch.cuda.is_available():
-    print(f"GPU count: {torch.cuda.device_count()}")
-    for i in range(torch.cuda.device_count()):
-        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
-else:
-    print("No GPU detected by PyTorch")
-try:
-    nvidia_output = subprocess.check_output("nvidia-smi", shell=True).decode()
-    print("\nNVIDIA-SMI output:")
-    print(nvidia_output)
-except Exception as e:
-    print(f"nvidia-smi error: {e}")
-# Then your regular Gradio app code...

 import gradio as gr
 import torch
 import subprocess
+import os
+import time
+def check_gpu():
+    results = []
+    # Add timestamp
+    results.append(f"Test run at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
+    # Check PyTorch CUDA availability
+    results.append(f"PyTorch version: {torch.__version__}")
+    results.append(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        results.append(f"CUDA version: {torch.version.cuda}")
+        results.append(f"GPU count: {torch.cuda.device_count()}")
+        for i in range(torch.cuda.device_count()):
+            props = torch.cuda.get_device_properties(i)
+            results.append(f"GPU {i}: {props.name}")
+            results.append(f"  - Total memory: {props.total_memory / 1024**3:.2f} GB")
+            results.append(f"  - Compute capability: {props.major}.{props.minor}")
+        # Test a simple CUDA operation
+        try:
+            x = torch.rand(1000, 1000, device="cuda")
+            y = torch.rand(1000, 1000, device="cuda")
+            start_time = time.time()
+            z = torch.matmul(x, y)
+            torch.cuda.synchronize()  # Wait for operation to complete
+            end_time = time.time()
+            results.append(f"Matrix multiplication test: {(end_time - start_time)*1000:.2f} ms")
+            results.append("CUDA operations working correctly ✅")
+        except Exception as e:
+            results.append(f"CUDA operation failed: {e}")
+    # Try nvidia-smi
+    try:
+        nvidia_smi = subprocess.check_output("nvidia-smi", shell=True).decode()
+        results.append("\nNVIDIA-SMI output:")
+        results.append(nvidia_smi)
+    except Exception as e:
+        results.append(f"nvidia-smi error: {e}")
+    # Check environment variables
+    cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "Not set")
+    results.append(f"\nCUDA_VISIBLE_DEVICES: {cuda_visible_devices}")
+    return "\n".join(results)
+def test_memory_allocation():
+    try:
+        # See how much GPU memory we can allocate
+        max_memory = 0
+        tensors = []
+        for size in [100, 500, 1000, 2000, 4000, 8000]:
+            try:
+                # Try to allocate a tensor of increasing size
+                tensor = torch.rand(size, size, device="cuda")
+                tensors.append(tensor)
+                memory_allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
+                max_memory = memory_allocated
+                result = f"Successfully allocated {size}x{size} tensor. Total memory: {memory_allocated:.2f} GB"
+            except Exception as e:
+                result = f"Failed to allocate {size}x{size} tensor: {e}"
+                break
+        # Clean up
+        tensors = None
+        torch.cuda.empty_cache()
+        return f"Maximum GPU memory allocated: {max_memory:.2f} GB\n{result}"
+    except Exception as e:
+        return f"Memory test failed: {e}"
+# Create the Gradio interface
+with gr.Blocks(title="GPU Test") as demo:
+    gr.Markdown("# GPU Availability Test")
+    gr.Markdown("This app checks if GPU/CUDA is available and working in this Hugging Face Space")
+    with gr.Tab("Basic GPU Test"):
+        check_btn = gr.Button("Check GPU Status", variant="primary")
+        output = gr.Textbox(label="Results", lines=20)
+        check_btn.click(fn=check_gpu, outputs=output)
+    with gr.Tab("Memory Test"):
+        memory_btn = gr.Button("Test GPU Memory Allocation", variant="primary")
+        memory_output = gr.Textbox(label="Memory Test Results", lines=5)
+        memory_btn.click(fn=test_memory_allocation, outputs=memory_output)
+    # Auto-run the check on page load
+    demo.load(fn=check_gpu, outputs=output)
+demo.launch()