Spaces:

yinde
/

videoqa

Running

App Files Files Community

yinde commited on Jun 12, 2025

Commit

a0f484b

verified ·

1 Parent(s): 64f265e

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -110

app.py CHANGED Viewed

@@ -1,111 +1,111 @@
-import os
-import cv2
-import base64
-import gradio as gr
-from openai import OpenAI
-# 1. Frame Extraction
-def extract_frames(video_path: str, num_frames: int = 8, max_resolution: int = 720):
-    frames_base64 = []
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened():
-        raise RuntimeError(f"Cannot open video file: {video_path}")
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    step = max(total_frames // num_frames, 1)
-    frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
-    for index in frame_indices:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, index)
-        ret, frame = cap.read()
-        if not ret or frame is None:
-            continue
-        h, w, _ = frame.shape
-        if max(h, w) > max_resolution:
-            scale = max_resolution / float(max(h, w))
-            frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
-        success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
-        if success:
-            b64 = base64.b64encode(buffer).decode("utf-8")
-            data_uri = f"data:image/jpeg;base64,{b64}"
-            frames_base64.append(data_uri)
-    cap.release()
-    return frames_base64
-# 2. Prompt Construction
-def build_prompt(frames, question):
-    content = [{"type": "text", "text": question}]
-    for image_data_uri in frames:
-        content.append({
-            "type": "image_url",
-            "image_url": {"url": image_data_uri}
-        })
-    return content
-# 3. Nebius Inference Call
-def query_qwen(prompt_content):
-    api_key = os.getenv("NEBIUS_API_KEY")
-    print(api_key)  # Debugging line to check if API key is loaded correctly
-    if not api_key:
-        raise ValueError("NEBIUS_API_KEY not found in environment variables.")
-    client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
-    try:
-        response = client.chat.completions.create(
-            model="Qwen/Qwen2.5-VL-72B-Instruct",
-            messages=[{"role": "user", "content": prompt_content}],
-            temperature=0.2,
-            max_tokens=512
-        )
-        return response
-    except Exception as e:
-        return {"error": str(e)}
-# 4. Parse Response
-def parse_response(response):
-    if isinstance(response, dict) and "error" in response:
-        return f"Error: {response['error']}"
-    try:
-        choice = response.choices[0]
-        if hasattr(choice, "message"):
-            return choice.message.content.strip()
-        else:
-            return choice.get("message", {}).get("content", "No message received.")
-    except Exception as e:
-        return f"Failed to parse response: {str(e)}"
-# MCP Core Function
-def answer_question(video_path: str, question: str) -> str:
-    try:
-        frames = extract_frames(video_path)
-        prompt = build_prompt(frames, question)
-        response = query_qwen(prompt)
-        return parse_response(response)
-    except Exception as e:
-        return f"Something went wrong: {str(e)}"
-# Gradio App UI
-def gradio_interface(video, question):
-    return answer_question(video, question)
-with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
-    gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
-    with gr.Row():
-        video_input = gr.Video(label="Upload Video")
-        question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
-    answer_output = gr.Textbox(label="Model Answer", lines=3)
-    submit_btn = gr.Button("Get Answer")
-    submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
-# Launch the interface and MCP server
-if __name__ == "__main__":
     demo.launch(mcp_server=True)

+import os
+import cv2
+import base64
+import gradio as gr
+from openai import OpenAI
+# 1. Frame Extraction
+def extract_frames(video_path: str, num_frames: int = 1, max_resolution: int = 720):
+    frames_base64 = []
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise RuntimeError(f"Cannot open video file: {video_path}")
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    step = max(total_frames // num_frames, 1)
+    frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
+    for index in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, index)
+        ret, frame = cap.read()
+        if not ret or frame is None:
+            continue
+        h, w, _ = frame.shape
+        if max(h, w) > max_resolution:
+            scale = max_resolution / float(max(h, w))
+            frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
+        success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
+        if success:
+            b64 = base64.b64encode(buffer).decode("utf-8")
+            data_uri = f"data:image/jpeg;base64,{b64}"
+            frames_base64.append(data_uri)
+    cap.release()
+    return frames_base64
+# 2. Prompt Construction
+def build_prompt(frames, question):
+    content = [{"type": "text", "text": question}]
+    for image_data_uri in frames:
+        content.append({
+            "type": "image_url",
+            "image_url": {"url": image_data_uri}
+        })
+    return content
+# 3. Nebius Inference Call
+def query_qwen(prompt_content):
+    api_key = os.getenv("NEBIUS_API_KEY")
+    print(api_key)  # Debugging line to check if API key is loaded correctly
+    if not api_key:
+        raise ValueError("NEBIUS_API_KEY not found in environment variables.")
+    client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
+    try:
+        response = client.chat.completions.create(
+            model="Qwen/Qwen2.5-VL-72B-Instruct",
+            messages=[{"role": "user", "content": prompt_content}],
+            temperature=0.2,
+            max_tokens=512
+        )
+        return response
+    except Exception as e:
+        return {"error": str(e)}
+# 4. Parse Response
+def parse_response(response):
+    if isinstance(response, dict) and "error" in response:
+        return f"Error: {response['error']}"
+    try:
+        choice = response.choices[0]
+        if hasattr(choice, "message"):
+            return choice.message.content.strip()
+        else:
+            return choice.get("message", {}).get("content", "No message received.")
+    except Exception as e:
+        return f"Failed to parse response: {str(e)}"
+# MCP Core Function
+def answer_question(video_path: str, question: str) -> str:
+    try:
+        frames = extract_frames(video_path)
+        prompt = build_prompt(frames, question)
+        response = query_qwen(prompt)
+        return parse_response(response)
+    except Exception as e:
+        return f"Something went wrong: {str(e)}"
+# Gradio App UI
+def gradio_interface(video, question):
+    return answer_question(video, question)
+with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
+    gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
+    with gr.Row():
+        video_input = gr.Video(label="Upload Video")
+        question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
+    answer_output = gr.Textbox(label="Model Answer", lines=3)
+    submit_btn = gr.Button("Get Answer")
+    submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
+# Launch the interface and MCP server
+if __name__ == "__main__":
     demo.launch(mcp_server=True)