Spaces:

cweigendev
/

videoanalyer2

Paused

App Files Files Community

cweigendev commited on Aug 6

Commit

842b83a

verified ·

1 Parent(s): db96b99

app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import torch
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoProcessor
+# Clone the model if not already present
+if not os.path.exists("VideoLLaMA3-7B"):
+    os.system("apt-get update && apt-get install -y git git-lfs && git lfs install")
+    os.system("git clone https://huggingface.co/DAMO-NLP-SG/VideoLLaMA3-7B")
+# Load model and processor from the local clone
+model_path = "./VideoLLaMA3-7B"
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    trust_remote_code=True,
+    device_map="auto",
+    torch_dtype=torch.bfloat16,
+    attn_implementation="flash_attention_2",
+)
+processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+def describe_video(video, question):
+    conversation = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {
+            "role": "user",
+            "content": [
+                {"type": "video", "video": {"video_path": video, "fps": 1, "max_frames": 128}},
+                {"type": "text", "text": question},
+            ]
+        },
+    ]
+    inputs = processor(conversation=conversation, return_tensors="pt")
+    inputs = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+    if "pixel_values" in inputs:
+        inputs["pixel_values"] = inputs["pixel_values"].to(torch.bfloat16)
+    output_ids = model.generate(**inputs, max_new_tokens=128)
+    return processor.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
+# Gradio UI
+demo = gr.Interface(
+    fn=describe_video,
+    inputs=[
+        gr.Video(label="Upload a video"),
+        gr.Textbox(label="Question", value="Describe this video in detail."),
+    ],
+    outputs=gr.Textbox(label="Response"),
+)
+demo.launch()