import gradio as gr
import spaces
import torch

zero = torch.Tensor([0]).cuda()
print(zero.device)  # <-- 'cpu' on ZeroGPU init

@spaces.GPU
def greet(n):
    print(zero.device)  # <-- 'cuda:0' when GPU allocated
    gpu_name = torch.cuda.get_device_name(0)
    vram = torch.cuda.get_device_properties(0).total_mem / (1024**3)
    return f"Hello {zero + n} Tensor | GPU: {gpu_name} | VRAM: {vram:.1f} GB"

demo = gr.Interface(
    fn=greet,
    inputs=gr.Number(label="Input Number"),
    outputs=gr.Text(label="Result"),
    title="Prefix Cache Analyzer (Setup Verification)",
    description="Verifying ZeroGPU allocation and hardware detection.",
)
demo.launch()