import os
import gradio as gr
import torch
from diffusers import DiffusionPipeline

# Load your token from environment (set it in Hugging Face Space -> Secrets)
HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")

# Use the gated or private model with token authentication
model_id = "cerspense/zeroscope-v2"
pipe = DiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    use_auth_token=HF_TOKEN
)
pipe.to("cuda" if torch.cuda.is_available() else "cpu")

# Function to generate video from text
def generate_video(prompt):
    output = pipe(prompt, num_inference_steps=25)
    video_path = output["videos"][0]
    return video_path

# Build Gradio interface
demo = gr.Interface(
    fn=generate_video,
    inputs=gr.Textbox(label="Enter a text prompt", placeholder="e.g. trees in wind"),
    outputs=gr.Video(label="Generated Video"),
    title="Text-to-Video Generator 🎥",
    description="Enter any prompt to generate a short video using a diffusion model.",
)

if __name__ == "__main__":
    demo.launch()