import gradio as gr
import torch
import os
import tempfile
import numpy as np
from models import Model  # Modify based on your actual model class
from dataset import extract_features  # Or however you handle input
from eval import predict  # Assume this runs inference and returns timestamps

# Load model
def load_model(checkpoint_path='checkpoint/ckp_best.pth.tar'):
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    model = Model(**checkpoint['config'])  # Adjust depending on how your model is initialized
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()
    return model

model = load_model()

def process_video(video_file):
    # Save uploaded file
    temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    with open(temp_path, "wb") as f:
        f.write(video_file.read())

    # Optional: convert to features using your function
    features = extract_features(temp_path)  # Modify if needed
    # Save to temp .npz file if your pipeline needs it
    npz_path = temp_path.replace(".mp4", ".npz")
    np.savez(npz_path, features=features)

    # Predict
    predictions = predict(model, npz_path)

    # Format output
    results = "\n".join([
        f"{label}: {start:.2f}s - {end:.2f}s"
        for label, start, end in predictions
    ])
    
    os.remove(temp_path)
    os.remove(npz_path)
    return results

demo = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload a video"),
    outputs=gr.Textbox(label="Detected Actions"),
    title="Temporal Action Localization"
)

if __name__ == "__main__":
    demo.launch()