jena-shreyas commited on
Commit
b70cd0c
Β·
1 Parent(s): bcf2256

Add UI features

Browse files
Files changed (2) hide show
  1. README.md +0 -1
  2. app.py +125 -18
README.md CHANGED
@@ -9,7 +9,6 @@ app_file: app.py
9
  python_version: "3.10"
10
  pinned: false
11
  license: apache-2.0
12
- preinstall: "bash pre-install.sh"
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  python_version: "3.10"
10
  pinned: false
11
  license: apache-2.0
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -52,7 +52,16 @@ def get_video_path(video_id: str):
52
  # ----------------------
53
  # Inference function
54
  # ----------------------
55
- def video_qa(video_id: str, prompt: str) -> str:
 
 
 
 
 
 
 
 
 
56
  if not video_id:
57
  return "❌ Please select a video ID."
58
 
@@ -61,16 +70,32 @@ def video_qa(video_id: str, prompt: str) -> str:
61
 
62
  video_path = get_video_path(video_id)
63
  if video_path is None:
64
- return f"❌ Video not found: {video_id}.webm"
65
 
66
  try:
67
- response = model.chat(
68
- prompt=prompt,
69
- video_path=video_path,
70
- fps=FPS,
71
- max_new_tokens=MAX_NEW_TOKENS,
72
- temperature=TEMPERATURE,
73
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  return response
75
 
76
  except Exception as e:
@@ -79,37 +104,110 @@ def video_qa(video_id: str, prompt: str) -> str:
79
  # ----------------------
80
  # Gradio UI
81
  # ----------------------
82
- with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2") as demo:
83
  gr.Markdown("## πŸŽ₯ Video Question Answering (LLaVa-Video-7B-Qwen2)")
84
 
85
  with gr.Row():
86
  # LEFT COLUMN
87
  with gr.Column(scale=1):
 
 
88
  video_id = gr.Dropdown(
89
  choices=VIDEO_IDS,
90
  label="Video ID",
91
  filterable=True,
92
- interactive=True
 
93
  )
94
 
95
  video_player = gr.Video(
96
  label="Selected Video",
97
- autoplay=True,
98
- height=240
 
 
 
 
 
 
 
 
 
 
 
99
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # RIGHT COLUMN
102
  with gr.Column(scale=2):
 
 
103
  prompt = gr.Textbox(
104
  label="Prompt",
105
- placeholder="Ask a question about the selected video",
106
- lines=4
 
107
  )
 
108
  answer = gr.Textbox(
109
  label="Model Answer",
110
- lines=8
 
111
  )
112
- run = gr.Button("Run Inference πŸš€")
 
 
 
 
 
 
 
 
 
 
113
 
114
  # Update video player when dropdown changes
115
  video_id.change(
@@ -121,7 +219,16 @@ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2") as demo:
121
  # Run inference
122
  run.click(
123
  fn=video_qa,
124
- inputs=[video_id, prompt],
 
 
 
 
 
 
 
 
 
125
  outputs=answer
126
  )
127
 
 
52
  # ----------------------
53
  # Inference function
54
  # ----------------------
55
+ def video_qa(
56
+ video_id: str,
57
+ prompt: str,
58
+ fps: float,
59
+ max_tokens: int,
60
+ temperature: float,
61
+ top_k: int,
62
+ top_p: float,
63
+ video_mode: str,
64
+ ) -> str:
65
  if not video_id:
66
  return "❌ Please select a video ID."
67
 
 
70
 
71
  video_path = get_video_path(video_id)
72
  if video_path is None:
73
+ return f"❌ Video not found: {video_id}.mp4"
74
 
75
  try:
76
+ # Prepare generation config
77
+ generation_config = {
78
+ "max_new_tokens": max_tokens,
79
+ "temperature": temperature,
80
+ "top_k": top_k,
81
+ "top_p": top_p,
82
+ }
83
+
84
+ # Add video_mode if supported by the model
85
+ kwargs = {
86
+ "prompt": prompt,
87
+ "video_path": video_path,
88
+ "fps": fps,
89
+ "generation_config": generation_config,
90
+ }
91
+
92
+ # Try to add video_mode (for Qwen models)
93
+ try:
94
+ response = model.chat(**kwargs, video_mode=video_mode)
95
+ except TypeError:
96
+ # If video_mode is not supported, fall back to without it
97
+ response = model.chat(**kwargs)
98
+
99
  return response
100
 
101
  except Exception as e:
 
104
  # ----------------------
105
  # Gradio UI
106
  # ----------------------
107
+ with gr.Blocks(title="Video QA – LLaVa-Video-7B-Qwen2", theme=gr.themes.Soft()) as demo:
108
  gr.Markdown("## πŸŽ₯ Video Question Answering (LLaVa-Video-7B-Qwen2)")
109
 
110
  with gr.Row():
111
  # LEFT COLUMN
112
  with gr.Column(scale=1):
113
+ gr.Markdown("### πŸ“ Video Selection")
114
+
115
  video_id = gr.Dropdown(
116
  choices=VIDEO_IDS,
117
  label="Video ID",
118
  filterable=True,
119
+ interactive=True,
120
+ value=VIDEO_IDS[0] if VIDEO_IDS else None
121
  )
122
 
123
  video_player = gr.Video(
124
  label="Selected Video",
125
+ autoplay=False,
126
+ height=300
127
+ )
128
+
129
+ gr.Markdown("### βš™οΈ Model Parameters")
130
+
131
+ fps_slider = gr.Slider(
132
+ minimum=0.5,
133
+ maximum=5.0,
134
+ step=0.5,
135
+ value=FPS,
136
+ label="🎞️ Frames Per Second (FPS)",
137
+ info="Sample rate for video frames"
138
  )
139
+
140
+ video_mode_radio = gr.Radio(
141
+ choices=["video", "frames"],
142
+ value="video",
143
+ label="πŸ“Ή Video Mode",
144
+ info="'video' for FPS-based, 'frames' for fixed count"
145
+ )
146
+
147
+ with gr.Accordion("πŸ”§ Advanced Settings", open=False):
148
+ max_tokens_slider = gr.Slider(
149
+ minimum=128,
150
+ maximum=2048,
151
+ step=128,
152
+ value=MAX_NEW_TOKENS,
153
+ label="Max New Tokens",
154
+ info="Maximum length of generated response"
155
+ )
156
+
157
+ temperature_slider = gr.Slider(
158
+ minimum=0.01,
159
+ maximum=2.0,
160
+ step=0.01,
161
+ value=TEMPERATURE,
162
+ label="🌑️ Temperature",
163
+ info="Higher = more creative, lower = more focused"
164
+ )
165
+
166
+ top_k_slider = gr.Slider(
167
+ minimum=1,
168
+ maximum=100,
169
+ step=1,
170
+ value=50,
171
+ label="πŸ” Top-K",
172
+ info="Sample from top K tokens"
173
+ )
174
+
175
+ top_p_slider = gr.Slider(
176
+ minimum=0.0,
177
+ maximum=1.0,
178
+ step=0.05,
179
+ value=0.95,
180
+ label="🎯 Top-P (Nucleus)",
181
+ info="Cumulative probability threshold"
182
+ )
183
 
184
  # RIGHT COLUMN
185
  with gr.Column(scale=2):
186
+ gr.Markdown("### πŸ’¬ Question & Answer")
187
+
188
  prompt = gr.Textbox(
189
  label="Prompt",
190
+ placeholder="Ask a question about the selected video...",
191
+ lines=4,
192
+ value="Describe what is happening in this video."
193
  )
194
+
195
  answer = gr.Textbox(
196
  label="Model Answer",
197
+ lines=20,
198
+ interactive=False
199
  )
200
+
201
+ run = gr.Button("πŸš€ Run Inference", variant="primary", size="lg")
202
+
203
+ gr.Markdown("""
204
+ ---
205
+ **ℹ️ Tips:**
206
+ - Adjust FPS to control video sampling rate (higher = more frames, slower inference)
207
+ - Use video_mode='frames' for fixed frame count (useful for very long videos)
208
+ - Temperature: Lower (0.01-0.5) for factual, higher (0.7-1.5) for creative responses
209
+ - Top-K and Top-P control output diversity
210
+ """)
211
 
212
  # Update video player when dropdown changes
213
  video_id.change(
 
219
  # Run inference
220
  run.click(
221
  fn=video_qa,
222
+ inputs=[
223
+ video_id,
224
+ prompt,
225
+ fps_slider,
226
+ max_tokens_slider,
227
+ temperature_slider,
228
+ top_k_slider,
229
+ top_p_slider,
230
+ video_mode_radio,
231
+ ],
232
  outputs=answer
233
  )
234