yinde commited on
Commit
041032b
·
2 Parent(s): 6b0dd20 61a09f9

Merge branch 'master'

Browse files
Files changed (2) hide show
  1. app.py +111 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import base64
4
+ import gradio as gr
5
+ from openai import OpenAI
6
+
7
+ # 1. Frame Extraction
8
+ def extract_frames(video_path: str, num_frames: int = 8, max_resolution: int = 720):
9
+ frames_base64 = []
10
+ cap = cv2.VideoCapture(video_path)
11
+
12
+ if not cap.isOpened():
13
+ raise RuntimeError(f"Cannot open video file: {video_path}")
14
+
15
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
16
+ step = max(total_frames // num_frames, 1)
17
+ frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
18
+
19
+ for index in frame_indices:
20
+ cap.set(cv2.CAP_PROP_POS_FRAMES, index)
21
+ ret, frame = cap.read()
22
+ if not ret or frame is None:
23
+ continue
24
+
25
+ h, w, _ = frame.shape
26
+ if max(h, w) > max_resolution:
27
+ scale = max_resolution / float(max(h, w))
28
+ frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
29
+
30
+ success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
31
+ if success:
32
+ b64 = base64.b64encode(buffer).decode("utf-8")
33
+ data_uri = f"data:image/jpeg;base64,{b64}"
34
+ frames_base64.append(data_uri)
35
+
36
+ cap.release()
37
+ return frames_base64
38
+
39
+ # 2. Prompt Construction
40
+ def build_prompt(frames, question):
41
+ content = [{"type": "text", "text": question}]
42
+ for image_data_uri in frames:
43
+ content.append({
44
+ "type": "image_url",
45
+ "image_url": {"url": image_data_uri}
46
+ })
47
+ return content
48
+
49
+ # 3. Nebius Inference Call
50
+ def query_qwen(prompt_content):
51
+ api_key = os.getenv("NEBIUS_API_KEY")
52
+ print(api_key) # Debugging line to check if API key is loaded correctly
53
+ if not api_key:
54
+ raise ValueError("NEBIUS_API_KEY not found in environment variables.")
55
+
56
+ client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
57
+ try:
58
+ response = client.chat.completions.create(
59
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
60
+ messages=[{"role": "user", "content": prompt_content}],
61
+ temperature=0.2,
62
+ max_tokens=512
63
+ )
64
+ return response
65
+ except Exception as e:
66
+ return {"error": str(e)}
67
+
68
+ # 4. Parse Response
69
+ def parse_response(response):
70
+ if isinstance(response, dict) and "error" in response:
71
+ return f"Error: {response['error']}"
72
+
73
+ try:
74
+ choice = response.choices[0]
75
+ if hasattr(choice, "message"):
76
+ return choice.message.content.strip()
77
+ else:
78
+ return choice.get("message", {}).get("content", "No message received.")
79
+ except Exception as e:
80
+ return f"Failed to parse response: {str(e)}"
81
+
82
+ # MCP Core Function
83
+ def answer_question(video_path: str, question: str) -> str:
84
+ try:
85
+ frames = extract_frames(video_path)
86
+ prompt = build_prompt(frames, question)
87
+ response = query_qwen(prompt)
88
+ return parse_response(response)
89
+ except Exception as e:
90
+ return f"Something went wrong: {str(e)}"
91
+
92
+ # Gradio App UI
93
+ def gradio_interface(video, question):
94
+ return answer_question(video, question)
95
+
96
+ with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
97
+ gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
98
+
99
+ with gr.Row():
100
+ video_input = gr.Video(label="Upload Video")
101
+ question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
102
+
103
+ answer_output = gr.Textbox(label="Model Answer", lines=3)
104
+
105
+ submit_btn = gr.Button("Get Answer")
106
+ submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
107
+
108
+
109
+ # Launch the interface and MCP server
110
+ if __name__ == "__main__":
111
+ demo.launch(mcp_server=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio[mcp]
2
+ textblob
3
+ openai
4
+ opencv-python