cweigendev commited on
Commit
8c79956
Β·
verified Β·
1 Parent(s): 1c7a7e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -8
app.py CHANGED
@@ -24,7 +24,6 @@ def load_videollama_model():
24
  print("πŸ”„ Loading VideoLLaMA model...")
25
 
26
  # Try to load a working multimodal model
27
- # Note: Replace with actual VideoLLaMA3 model when available
28
  model_name = "DAMO-NLP-SG/Video-LLaMA"
29
 
30
  # Configure quantization for memory efficiency
@@ -144,7 +143,6 @@ def generate_basic_analysis(video_info, question, frames):
144
 
145
  analysis_parts.append(f"- Average brightness: {'Bright' if avg_brightness > 127 else 'Dark'}")
146
  analysis_parts.append(f"- Color variance: {'High contrast' if color_variance > 1000 else 'Low contrast'}")
147
- analysis_parts.append(f"- Dominant colors: Analyzing RGB distribution...")
148
 
149
  # Simple color analysis
150
  r_avg = np.mean(first_frame[:,:,0])
@@ -206,10 +204,92 @@ def analyze_video_with_ai(video_file, question, progress=gr.Progress()):
206
  try:
207
  progress(0.7, desc="Running AI analysis...")
208
 
209
- # Prepare prompt for VideoLLaMA
210
- prompt = f"""Human: I have a video with the following details:
211
- - Duration: {video_info.get('duration', 0):.1f} seconds
212
- - {len(frames)} key frames extracted
213
- - Question: {question}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- Please analyze this video and provide a detailed response.
 
 
 
24
  print("πŸ”„ Loading VideoLLaMA model...")
25
 
26
  # Try to load a working multimodal model
 
27
  model_name = "DAMO-NLP-SG/Video-LLaMA"
28
 
29
  # Configure quantization for memory efficiency
 
143
 
144
  analysis_parts.append(f"- Average brightness: {'Bright' if avg_brightness > 127 else 'Dark'}")
145
  analysis_parts.append(f"- Color variance: {'High contrast' if color_variance > 1000 else 'Low contrast'}")
 
146
 
147
  # Simple color analysis
148
  r_avg = np.mean(first_frame[:,:,0])
 
204
  try:
205
  progress(0.7, desc="Running AI analysis...")
206
 
207
+ # For now, we'll use basic analysis since VideoLLaMA3 integration needs more work
208
+ result = generate_basic_analysis(video_info, question, frames)
209
+ result += "\n\nπŸ”„ **Status:** Currently using basic analysis. VideoLLaMA3 integration in progress."
210
+
211
+ progress(1.0, desc="Complete!")
212
+ return result
213
+
214
+ except Exception as model_error:
215
+ print(f"Model error: {model_error}")
216
+ # Fall back to basic analysis
217
+ pass
218
+
219
+ # Use basic analysis
220
+ progress(0.8, desc="Generating analysis...")
221
+ result = generate_basic_analysis(video_info, question, frames)
222
+ progress(1.0, desc="Complete!")
223
+
224
+ return result
225
+
226
+ except Exception as e:
227
+ return f"❌ Error analyzing video: {str(e)}"
228
+
229
+ def create_interface():
230
+ """Create the Gradio interface"""
231
+
232
+ # Try to load model on startup (non-blocking)
233
+ try:
234
+ load_videollama_model()
235
+ except:
236
+ print("Model loading failed, using basic analysis mode")
237
+
238
+ with gr.Blocks(title="VideoLLama3 Analyzer", theme=gr.themes.Soft()) as demo:
239
+ gr.Markdown("# πŸŽ₯ VideoLLama3 Video Analysis Tool")
240
+ gr.Markdown("Upload a video and ask questions about its content!")
241
+
242
+ with gr.Row():
243
+ with gr.Column(scale=1):
244
+ video_input = gr.Video(
245
+ label="Upload Video (MP4, AVI, MOV)",
246
+ height=300
247
+ )
248
+ question_input = gr.Textbox(
249
+ label="Ask a question about the video",
250
+ placeholder="What is happening in this video?",
251
+ lines=3
252
+ )
253
+ analyze_btn = gr.Button("πŸ” Analyze Video", variant="primary", size="lg")
254
+
255
+ with gr.Column(scale=1):
256
+ output = gr.Textbox(
257
+ label="Analysis Results",
258
+ lines=20,
259
+ max_lines=25
260
+ )
261
+
262
+ gr.Markdown("### πŸ’‘ Example Questions:")
263
+ examples = [
264
+ "What activities are happening in this video?",
265
+ "Describe the people or objects you see.",
266
+ "What is the setting or location?",
267
+ "Summarize the main events.",
268
+ "What emotions or mood does this convey?"
269
+ ]
270
+
271
+ with gr.Row():
272
+ for example in examples[:3]:
273
+ btn = gr.Button(example, size="sm")
274
+ btn.click(lambda x=example: x, outputs=question_input)
275
+
276
+ with gr.Row():
277
+ for example in examples[3:]:
278
+ btn = gr.Button(example, size="sm")
279
+ btn.click(lambda x=example: x, outputs=question_input)
280
+
281
+ analyze_btn.click(
282
+ analyze_video_with_ai,
283
+ inputs=[video_input, question_input],
284
+ outputs=output,
285
+ show_progress=True
286
+ )
287
+
288
+ gr.Markdown("---")
289
+ gr.Markdown("πŸš€ **Status**: Video processing active - Upload a video to test!")
290
+
291
+ return demo
292
 
293
+ if __name__ == "__main__":
294
+ demo = create_interface()
295
+ demo.launch()