|
|
import gradio as gr |
|
|
import torch |
|
|
import subprocess |
|
|
import os |
|
|
import time |
|
|
from huggingface_hub import SpaceStage |
|
|
from huggingface_hub.utils import RepositoryNotFoundError |
|
|
from huggingface_hub.spaces import get_space_stage |
|
|
import spaces |
|
|
|
|
|
@spaces.GPU |
|
|
def check_gpu(): |
|
|
results = [] |
|
|
|
|
|
|
|
|
results.append(f"Test run at: {time.strftime('%Y-%m-%d %H:%M:%S')}") |
|
|
|
|
|
|
|
|
results.append(f"PyTorch version: {torch.__version__}") |
|
|
results.append(f"CUDA available: {torch.cuda.is_available()}") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
results.append(f"CUDA version: {torch.version.cuda}") |
|
|
results.append(f"GPU count: {torch.cuda.device_count()}") |
|
|
for i in range(torch.cuda.device_count()): |
|
|
props = torch.cuda.get_device_properties(i) |
|
|
results.append(f"GPU {i}: {props.name}") |
|
|
results.append(f" - Total memory: {props.total_memory / 1024**3:.2f} GB") |
|
|
results.append(f" - Compute capability: {props.major}.{props.minor}") |
|
|
|
|
|
|
|
|
try: |
|
|
x = torch.rand(1000, 1000, device="cuda") |
|
|
y = torch.rand(1000, 1000, device="cuda") |
|
|
start_time = time.time() |
|
|
z = torch.matmul(x, y) |
|
|
torch.cuda.synchronize() |
|
|
end_time = time.time() |
|
|
results.append(f"Matrix multiplication test: {(end_time - start_time)*1000:.2f} ms") |
|
|
results.append("CUDA operations working correctly ✅") |
|
|
except Exception as e: |
|
|
results.append(f"CUDA operation failed: {e}") |
|
|
|
|
|
|
|
|
try: |
|
|
nvidia_smi = subprocess.check_output("nvidia-smi", shell=True).decode() |
|
|
results.append("\nNVIDIA-SMI output:") |
|
|
results.append(nvidia_smi) |
|
|
except Exception as e: |
|
|
results.append(f"nvidia-smi error: {e}") |
|
|
|
|
|
|
|
|
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "Not set") |
|
|
results.append(f"\nCUDA_VISIBLE_DEVICES: {cuda_visible_devices}") |
|
|
|
|
|
return "\n".join(results) |
|
|
|
|
|
@spaces.GPU |
|
|
def test_memory_allocation(): |
|
|
try: |
|
|
|
|
|
max_memory = 0 |
|
|
tensors = [] |
|
|
|
|
|
for size in [100, 500, 1000, 2000, 4000, 8000]: |
|
|
try: |
|
|
|
|
|
tensor = torch.rand(size, size, device="cuda") |
|
|
tensors.append(tensor) |
|
|
memory_allocated = torch.cuda.memory_allocated() / (1024**3) |
|
|
max_memory = memory_allocated |
|
|
result = f"Successfully allocated {size}x{size} tensor. Total memory: {memory_allocated:.2f} GB" |
|
|
except Exception as e: |
|
|
result = f"Failed to allocate {size}x{size} tensor: {e}" |
|
|
break |
|
|
|
|
|
|
|
|
tensors = None |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
return f"Maximum GPU memory allocated: {max_memory:.2f} GB\n{result}" |
|
|
except Exception as e: |
|
|
return f"Memory test failed: {e}" |
|
|
|
|
|
def get_space_status(): |
|
|
try: |
|
|
stage = get_space_stage() |
|
|
if stage == SpaceStage.RUNNING: |
|
|
status = "Space is running" |
|
|
elif stage == SpaceStage.BUILDING: |
|
|
status = "Space is building" |
|
|
else: |
|
|
status = f"Space is in stage: {stage}" |
|
|
except RepositoryNotFoundError: |
|
|
status = "Not running in a Space" |
|
|
except Exception as e: |
|
|
status = f"Error getting Space stage: {e}" |
|
|
|
|
|
return status |
|
|
|
|
|
|
|
|
with gr.Blocks(title="GPU Test") as demo: |
|
|
gr.Markdown("# GPU Availability Test") |
|
|
gr.Markdown("This app checks if GPU/CUDA is available and working in this Hugging Face Space") |
|
|
|
|
|
|
|
|
status_text = gr.Markdown(f"**Space Status**: {get_space_status()}") |
|
|
|
|
|
with gr.Tab("Basic GPU Test"): |
|
|
check_btn = gr.Button("Check GPU Status", variant="primary") |
|
|
output = gr.Textbox(label="Results", lines=20) |
|
|
check_btn.click(fn=check_gpu, outputs=output) |
|
|
|
|
|
with gr.Tab("Memory Test"): |
|
|
memory_btn = gr.Button("Test GPU Memory Allocation", variant="primary") |
|
|
memory_output = gr.Textbox(label="Memory Test Results", lines=5) |
|
|
memory_btn.click(fn=test_memory_allocation, outputs=memory_output) |
|
|
|
|
|
|
|
|
demo.load(fn=check_gpu, outputs=output) |
|
|
|
|
|
demo.launch() |