sai-Rohan's picture
Create app.py
283bd4f verified
raw
history blame contribute delete
894 Bytes
import gradio as gr
import torch
from transformers import VideoMAEForVideoClassification, VideoMAEFeatureExtractor
from PIL import Image
import numpy as np
# Load model & processor
model_name = "OPear/videomae-large-finetuned-UCF-Crime"
model = VideoMAEForVideoClassification.from_pretrained(model_name)
processor = VideoMAEFeatureExtractor.from_pretrained(model_name)
def classify_video(video):
# video is a numpy array of shape (frames, H, W, C)
inputs = processor(video, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predicted_class_idx = logits.argmax(-1).item()
return model.config.id2label[predicted_class_idx]
iface = gr.Interface(fn=classify_video,
inputs=gr.Video(),
outputs="text",
title="Video Classifier using VideoMAE")
iface.launch()