import gradio as gr import spaces import torch zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' on ZeroGPU init @spaces.GPU def greet(n): print(zero.device) # <-- 'cuda:0' when GPU allocated gpu_name = torch.cuda.get_device_name(0) vram = torch.cuda.get_device_properties(0).total_mem / (1024**3) return f"Hello {zero + n} Tensor | GPU: {gpu_name} | VRAM: {vram:.1f} GB" demo = gr.Interface( fn=greet, inputs=gr.Number(label="Input Number"), outputs=gr.Text(label="Result"), title="Prefix Cache Analyzer (Setup Verification)", description="Verifying ZeroGPU allocation and hardware detection.", ) demo.launch()