Spaces:

Brightcodelab
/

THUDM-CogVideoX-5b

Paused

App Files Files Community

Brightcodelab commited on Apr 17

Commit

c1a9cc1

verified ·

1 Parent(s): 57574f0

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -147

app.py CHANGED Viewed

@@ -1,153 +1,22 @@
 import os
 import torch
-import time
-import json
 import subprocess
-def test_gpu():
-    """Run a comprehensive GPU test and return detailed results"""
-    results = {
-        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
-        "gpu_available": False,
-        "gpu_count": 0,
-        "gpus": [],
-        "cuda_version": None,
-        "torch_version": torch.__version__,
-        "tests_passed": False,
-        "errors": [],
-        "performance": None
-    }
-    # Check if CUDA is available
-    try:
-        results["gpu_available"] = torch.cuda.is_available()
-        if not results["gpu_available"]:
-            results["errors"].append("CUDA is not available")
-            return results
-        # Get GPU count and info
-        results["gpu_count"] = torch.cuda.device_count()
-        results["cuda_version"] = torch.version.cuda
-        for i in range(results["gpu_count"]):
-            props = torch.cuda.get_device_properties(i)
-            gpu_info = {
-                "index": i,
-                "name": props.name,
-                "total_memory_gb": round(props.total_memory / (1024**3), 2),
-                "compute_capability": f"{props.major}.{props.minor}"
-            }
-            results["gpus"].append(gpu_info)
-        # Try to get VRAM usage with nvidia-smi
-        try:
-            output = subprocess.check_output(['nvidia-smi', '--query-gpu=index,memory.used,memory.total,utilization.gpu', '--format=csv,noheader,nounits'], text=True)
-            for line in output.strip().split('\n'):
-                if line.strip():
-                    parts = line.split(',')
-                    if len(parts) >= 3:
-                        idx = int(parts[0])
-                        mem_used = float(parts[1].strip())
-                        mem_total = float(parts[2].strip())
-                        util = float(parts[3].strip()) if len(parts) > 3 else 0
-                        # Update the corresponding entry in gpu_info
-                        for gpu in results["gpus"]:
-                            if gpu["index"] == idx:
-                                gpu["memory_used_gb"] = round(mem_used / 1024, 2)
-                                gpu["utilization"] = util
-                                break
-        except (subprocess.SubprocessError, FileNotFoundError):
-            # nvidia-smi not available, we'll continue without this info
-            pass
-        # Run a simple computation test
-        device = torch.device("cuda")
-        # Matrix multiplication test
-        start_time = time.time()
-        matrix_size = 5000
-        a = torch.randn(matrix_size, matrix_size, device=device)
-        b = torch.randn(matrix_size, matrix_size, device=device)
-        torch.cuda.synchronize()  # Wait for GPU operation to complete
-        # Perform matrix multiplication
-        start_compute = time.time()
-        c = torch.matmul(a, b)
-        torch.cuda.synchronize()
-        end_compute = time.time()
-        # Access a value to ensure computation completed
-        _ = c[0, 0].item()
-        end_time = time.time()
-        # Record performance metrics
-        results["performance"] = {
-            "matrix_size": matrix_size,
-            "total_time_ms": round((end_time - start_time) * 1000, 2),
-            "computation_time_ms": round((end_compute - start_compute) * 1000, 2)
-        }
-        # Simple CUDA kernel launch test
-        try:
-            x = torch.ones(10, device=device)
-            y = x + 1
-            assert y.cpu().numpy().all() == 2
-        except Exception as e:
-            results["errors"].append(f"CUDA kernel test failed: {str(e)}")
-            return results
-        # All tests passed
-        results["tests_passed"] = True
-    except Exception as e:
-        results["errors"].append(f"Test failed: {str(e)}")
-    return results
-def main():
-    print("======== GPU TEST STARTING ========")
-    results = test_gpu()
-    # Print results
-    print(f"\nTimestamp: {results['timestamp']}")
-    print(f"PyTorch version: {results['torch_version']}")
-    print(f"CUDA version: {results['cuda_version']}")
-    print(f"GPU available: {results['gpu_available']}")
-    if results['gpu_available']:
-        print(f"Found {results['gpu_count']} GPU(s)")
-        for gpu in results['gpus']:
-            print(f"  GPU {gpu['index']}: {gpu['name']} ({gpu['total_memory_gb']}GB)")
-            if 'memory_used_gb' in gpu:
-                print(f"    Memory used: {gpu['memory_used_gb']}GB")
-            if 'utilization' in gpu:
-                print(f"    Utilization: {gpu['utilization']}%")
-    if results['performance']:
-        perf = results['performance']
-        print(f"\nPerformance test ({perf['matrix_size']}x{perf['matrix_size']} matrix multiplication):")
-        print(f"  Total time: {perf['total_time_ms']}ms")
-        print(f"  Computation time: {perf['computation_time_ms']}ms")
-    if results['errors']:
-        print("\nErrors:")
-        for error in results['errors']:
-            print(f"  - {error}")
-    print(f"\nTests passed: {results['tests_passed']}")
-    print("\n======== GPU TEST COMPLETE ========")
-    # Save results to file
-    with open("gpu_test_results.json", "w") as f:
-        json.dump(results, f, indent=2)
-    print("\nResults saved to gpu_test_results.json")
-    # Return exit code based on test results
-    return 0 if results["tests_passed"] else 1
-if __name__ == "__main__":
-    exit_code = main()
-    exit(exit_code)

 import os
+import gradio as gr
 import torch
 import subprocess
+print("===== Space Hardware Check =====")
+print(f"CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    print(f"GPU count: {torch.cuda.device_count()}")
+    for i in range(torch.cuda.device_count()):
+        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+else:
+    print("No GPU detected by PyTorch")
+try:
+    nvidia_output = subprocess.check_output("nvidia-smi", shell=True).decode()
+    print("\nNVIDIA-SMI output:")
+    print(nvidia_output)
+except Exception as e:
+    print(f"nvidia-smi error: {e}")
+# Then your regular Gradio app code...