yinde commited on
Commit
a0f484b
·
verified ·
1 Parent(s): 64f265e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -110
app.py CHANGED
@@ -1,111 +1,111 @@
1
- import os
2
- import cv2
3
- import base64
4
- import gradio as gr
5
- from openai import OpenAI
6
-
7
- # 1. Frame Extraction
8
- def extract_frames(video_path: str, num_frames: int = 8, max_resolution: int = 720):
9
- frames_base64 = []
10
- cap = cv2.VideoCapture(video_path)
11
-
12
- if not cap.isOpened():
13
- raise RuntimeError(f"Cannot open video file: {video_path}")
14
-
15
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
16
- step = max(total_frames // num_frames, 1)
17
- frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
18
-
19
- for index in frame_indices:
20
- cap.set(cv2.CAP_PROP_POS_FRAMES, index)
21
- ret, frame = cap.read()
22
- if not ret or frame is None:
23
- continue
24
-
25
- h, w, _ = frame.shape
26
- if max(h, w) > max_resolution:
27
- scale = max_resolution / float(max(h, w))
28
- frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
29
-
30
- success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
31
- if success:
32
- b64 = base64.b64encode(buffer).decode("utf-8")
33
- data_uri = f"data:image/jpeg;base64,{b64}"
34
- frames_base64.append(data_uri)
35
-
36
- cap.release()
37
- return frames_base64
38
-
39
- # 2. Prompt Construction
40
- def build_prompt(frames, question):
41
- content = [{"type": "text", "text": question}]
42
- for image_data_uri in frames:
43
- content.append({
44
- "type": "image_url",
45
- "image_url": {"url": image_data_uri}
46
- })
47
- return content
48
-
49
- # 3. Nebius Inference Call
50
- def query_qwen(prompt_content):
51
- api_key = os.getenv("NEBIUS_API_KEY")
52
- print(api_key) # Debugging line to check if API key is loaded correctly
53
- if not api_key:
54
- raise ValueError("NEBIUS_API_KEY not found in environment variables.")
55
-
56
- client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
57
- try:
58
- response = client.chat.completions.create(
59
- model="Qwen/Qwen2.5-VL-72B-Instruct",
60
- messages=[{"role": "user", "content": prompt_content}],
61
- temperature=0.2,
62
- max_tokens=512
63
- )
64
- return response
65
- except Exception as e:
66
- return {"error": str(e)}
67
-
68
- # 4. Parse Response
69
- def parse_response(response):
70
- if isinstance(response, dict) and "error" in response:
71
- return f"Error: {response['error']}"
72
-
73
- try:
74
- choice = response.choices[0]
75
- if hasattr(choice, "message"):
76
- return choice.message.content.strip()
77
- else:
78
- return choice.get("message", {}).get("content", "No message received.")
79
- except Exception as e:
80
- return f"Failed to parse response: {str(e)}"
81
-
82
- # MCP Core Function
83
- def answer_question(video_path: str, question: str) -> str:
84
- try:
85
- frames = extract_frames(video_path)
86
- prompt = build_prompt(frames, question)
87
- response = query_qwen(prompt)
88
- return parse_response(response)
89
- except Exception as e:
90
- return f"Something went wrong: {str(e)}"
91
-
92
- # Gradio App UI
93
- def gradio_interface(video, question):
94
- return answer_question(video, question)
95
-
96
- with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
97
- gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
98
-
99
- with gr.Row():
100
- video_input = gr.Video(label="Upload Video")
101
- question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
102
-
103
- answer_output = gr.Textbox(label="Model Answer", lines=3)
104
-
105
- submit_btn = gr.Button("Get Answer")
106
- submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
107
-
108
-
109
- # Launch the interface and MCP server
110
- if __name__ == "__main__":
111
  demo.launch(mcp_server=True)
 
1
+ import os
2
+ import cv2
3
+ import base64
4
+ import gradio as gr
5
+ from openai import OpenAI
6
+
7
+ # 1. Frame Extraction
8
+ def extract_frames(video_path: str, num_frames: int = 1, max_resolution: int = 720):
9
+ frames_base64 = []
10
+ cap = cv2.VideoCapture(video_path)
11
+
12
+ if not cap.isOpened():
13
+ raise RuntimeError(f"Cannot open video file: {video_path}")
14
+
15
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
16
+ step = max(total_frames // num_frames, 1)
17
+ frame_indices = [min(i * step, total_frames - 1) for i in range(num_frames)]
18
+
19
+ for index in frame_indices:
20
+ cap.set(cv2.CAP_PROP_POS_FRAMES, index)
21
+ ret, frame = cap.read()
22
+ if not ret or frame is None:
23
+ continue
24
+
25
+ h, w, _ = frame.shape
26
+ if max(h, w) > max_resolution:
27
+ scale = max_resolution / float(max(h, w))
28
+ frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
29
+
30
+ success, buffer = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 90])
31
+ if success:
32
+ b64 = base64.b64encode(buffer).decode("utf-8")
33
+ data_uri = f"data:image/jpeg;base64,{b64}"
34
+ frames_base64.append(data_uri)
35
+
36
+ cap.release()
37
+ return frames_base64
38
+
39
+ # 2. Prompt Construction
40
+ def build_prompt(frames, question):
41
+ content = [{"type": "text", "text": question}]
42
+ for image_data_uri in frames:
43
+ content.append({
44
+ "type": "image_url",
45
+ "image_url": {"url": image_data_uri}
46
+ })
47
+ return content
48
+
49
+ # 3. Nebius Inference Call
50
+ def query_qwen(prompt_content):
51
+ api_key = os.getenv("NEBIUS_API_KEY")
52
+ print(api_key) # Debugging line to check if API key is loaded correctly
53
+ if not api_key:
54
+ raise ValueError("NEBIUS_API_KEY not found in environment variables.")
55
+
56
+ client = OpenAI(api_key=api_key, base_url="https://api.studio.nebius.ai/v1/")
57
+ try:
58
+ response = client.chat.completions.create(
59
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
60
+ messages=[{"role": "user", "content": prompt_content}],
61
+ temperature=0.2,
62
+ max_tokens=512
63
+ )
64
+ return response
65
+ except Exception as e:
66
+ return {"error": str(e)}
67
+
68
+ # 4. Parse Response
69
+ def parse_response(response):
70
+ if isinstance(response, dict) and "error" in response:
71
+ return f"Error: {response['error']}"
72
+
73
+ try:
74
+ choice = response.choices[0]
75
+ if hasattr(choice, "message"):
76
+ return choice.message.content.strip()
77
+ else:
78
+ return choice.get("message", {}).get("content", "No message received.")
79
+ except Exception as e:
80
+ return f"Failed to parse response: {str(e)}"
81
+
82
+ # MCP Core Function
83
+ def answer_question(video_path: str, question: str) -> str:
84
+ try:
85
+ frames = extract_frames(video_path)
86
+ prompt = build_prompt(frames, question)
87
+ response = query_qwen(prompt)
88
+ return parse_response(response)
89
+ except Exception as e:
90
+ return f"Something went wrong: {str(e)}"
91
+
92
+ # Gradio App UI
93
+ def gradio_interface(video, question):
94
+ return answer_question(video, question)
95
+
96
+ with gr.Blocks(title="🎥 Video QA with Qwen2.5-VL") as demo:
97
+ gr.Markdown("## 🎥 Interactive Video Question Answering\nUpload a video and ask a question about it.")
98
+
99
+ with gr.Row():
100
+ video_input = gr.Video(label="Upload Video")
101
+ question_input = gr.Textbox(label="Your Question", placeholder="e.g., What color was the car in the first scene?")
102
+
103
+ answer_output = gr.Textbox(label="Model Answer", lines=3)
104
+
105
+ submit_btn = gr.Button("Get Answer")
106
+ submit_btn.click(fn=gradio_interface, inputs=[video_input, question_input], outputs=answer_output)
107
+
108
+
109
+ # Launch the interface and MCP server
110
+ if __name__ == "__main__":
111
  demo.launch(mcp_server=True)