Spaces:

yinde
/

videoqa

Sleeping

App Files Files Community

yinde commited on Jun 4, 2025

Commit

041032b

2 Parent(s): 6b0dd20 61a09f9

Merge branch 'master'

Browse files

Files changed (2) hide show

app.py +111 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import cv2
+import base64
+import gradio as gr
+from openai import OpenAI
+# 1. Frame Extraction
+def extract_frames(video_path: str, num_frames: int = 8, max_resolution: int = 720):
+    frames_base64 = []
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise RuntimeError(f"Cannot open video file: {video_path}")
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    step = max(total_frames // num_frames, 1)
+    frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
+    for index in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, index)
+        ret, frame = cap.read()
+        if not ret or frame is None:
+            continue
+        h, w, _ = frame.shape
+        if max(h, w) > max_resolution:
+            scale = max_resolution / float(max(h, w))
+            frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
+        success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
+        if success:
+            b64 = base64.b64encode(buffer).decode("utf-8")
+            data_uri = f"data:image/jpeg;base64,{b64}"
+            frames_base64.append(data_uri)
+    cap.release()
+    return frames_base64
+# 2. Prompt Construction
+def build_prompt(frames, question):
+    content = [{"type": "text", "text": question}]
+    for image_data_uri in frames:
+        content.append({
+            "type": "image_url",
+            "image_url": {"url": image_data_uri}
+        })
+    return content
+# 3. Nebius Inference Call
+def query_qwen(prompt_content):
+    api_key = os.getenv("NEBIUS_API_KEY")
+    print(api_key)  # Debugging line to check if API key is loaded correctly
+    if not api_key:
+        raise ValueError("NEBIUS_API_KEY not found in environment variables.")
+    client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
+    try:
+        response = client.chat.completions.create(
+            model="Qwen/Qwen2.5-VL-72B-Instruct",
+            messages=[{"role": "user", "content": prompt_content}],
+            temperature=0.2,
+            max_tokens=512
+        )
+        return response
+    except Exception as e:
+        return {"error": str(e)}
+# 4. Parse Response
+def parse_response(response):
+    if isinstance(response, dict) and "error" in response:
+        return f"Error: {response['error']}"
+    try:
+        choice = response.choices[0]
+        if hasattr(choice, "message"):
+            return choice.message.content.strip()
+        else:
+            return choice.get("message", {}).get("content", "No message received.")
+    except Exception as e:
+        return f"Failed to parse response: {str(e)}"
+# MCP Core Function
+def answer_question(video_path: str, question: str) -> str:
+    try:
+        frames = extract_frames(video_path)
+        prompt = build_prompt(frames, question)
+        response = query_qwen(prompt)
+        return parse_response(response)
+    except Exception as e:
+        return f"Something went wrong: {str(e)}"
+# Gradio App UI
+def gradio_interface(video, question):
+    return answer_question(video, question)
+with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
+    gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
+    answer_output = gr.Textbox(label="Model Answer", lines=3)
+    submit_btn = gr.Button("Get Answer")
+    submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
+# Launch the interface and MCP server
+if __name__ == "__main__":
+    demo.launch(mcp_server=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio[mcp]
+textblob
+openai
+opencv-python