Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,7 +24,6 @@ def load_videollama_model():
|
|
| 24 |
print("π Loading VideoLLaMA model...")
|
| 25 |
|
| 26 |
# Try to load a working multimodal model
|
| 27 |
-
# Note: Replace with actual VideoLLaMA3 model when available
|
| 28 |
model_name = "DAMO-NLP-SG/Video-LLaMA"
|
| 29 |
|
| 30 |
# Configure quantization for memory efficiency
|
|
@@ -144,7 +143,6 @@ def generate_basic_analysis(video_info, question, frames):
|
|
| 144 |
|
| 145 |
analysis_parts.append(f"- Average brightness: {'Bright' if avg_brightness > 127 else 'Dark'}")
|
| 146 |
analysis_parts.append(f"- Color variance: {'High contrast' if color_variance > 1000 else 'Low contrast'}")
|
| 147 |
-
analysis_parts.append(f"- Dominant colors: Analyzing RGB distribution...")
|
| 148 |
|
| 149 |
# Simple color analysis
|
| 150 |
r_avg = np.mean(first_frame[:,:,0])
|
|
@@ -206,10 +204,92 @@ def analyze_video_with_ai(video_file, question, progress=gr.Progress()):
|
|
| 206 |
try:
|
| 207 |
progress(0.7, desc="Running AI analysis...")
|
| 208 |
|
| 209 |
-
#
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
|
|
|
|
|
|
|
|
|
| 24 |
print("π Loading VideoLLaMA model...")
|
| 25 |
|
| 26 |
# Try to load a working multimodal model
|
|
|
|
| 27 |
model_name = "DAMO-NLP-SG/Video-LLaMA"
|
| 28 |
|
| 29 |
# Configure quantization for memory efficiency
|
|
|
|
| 143 |
|
| 144 |
analysis_parts.append(f"- Average brightness: {'Bright' if avg_brightness > 127 else 'Dark'}")
|
| 145 |
analysis_parts.append(f"- Color variance: {'High contrast' if color_variance > 1000 else 'Low contrast'}")
|
|
|
|
| 146 |
|
| 147 |
# Simple color analysis
|
| 148 |
r_avg = np.mean(first_frame[:,:,0])
|
|
|
|
| 204 |
try:
|
| 205 |
progress(0.7, desc="Running AI analysis...")
|
| 206 |
|
| 207 |
+
# For now, we'll use basic analysis since VideoLLaMA3 integration needs more work
|
| 208 |
+
result = generate_basic_analysis(video_info, question, frames)
|
| 209 |
+
result += "\n\nπ **Status:** Currently using basic analysis. VideoLLaMA3 integration in progress."
|
| 210 |
+
|
| 211 |
+
progress(1.0, desc="Complete!")
|
| 212 |
+
return result
|
| 213 |
+
|
| 214 |
+
except Exception as model_error:
|
| 215 |
+
print(f"Model error: {model_error}")
|
| 216 |
+
# Fall back to basic analysis
|
| 217 |
+
pass
|
| 218 |
+
|
| 219 |
+
# Use basic analysis
|
| 220 |
+
progress(0.8, desc="Generating analysis...")
|
| 221 |
+
result = generate_basic_analysis(video_info, question, frames)
|
| 222 |
+
progress(1.0, desc="Complete!")
|
| 223 |
+
|
| 224 |
+
return result
|
| 225 |
+
|
| 226 |
+
except Exception as e:
|
| 227 |
+
return f"β Error analyzing video: {str(e)}"
|
| 228 |
+
|
| 229 |
+
def create_interface():
|
| 230 |
+
"""Create the Gradio interface"""
|
| 231 |
+
|
| 232 |
+
# Try to load model on startup (non-blocking)
|
| 233 |
+
try:
|
| 234 |
+
load_videollama_model()
|
| 235 |
+
except:
|
| 236 |
+
print("Model loading failed, using basic analysis mode")
|
| 237 |
+
|
| 238 |
+
with gr.Blocks(title="VideoLLama3 Analyzer", theme=gr.themes.Soft()) as demo:
|
| 239 |
+
gr.Markdown("# π₯ VideoLLama3 Video Analysis Tool")
|
| 240 |
+
gr.Markdown("Upload a video and ask questions about its content!")
|
| 241 |
+
|
| 242 |
+
with gr.Row():
|
| 243 |
+
with gr.Column(scale=1):
|
| 244 |
+
video_input = gr.Video(
|
| 245 |
+
label="Upload Video (MP4, AVI, MOV)",
|
| 246 |
+
height=300
|
| 247 |
+
)
|
| 248 |
+
question_input = gr.Textbox(
|
| 249 |
+
label="Ask a question about the video",
|
| 250 |
+
placeholder="What is happening in this video?",
|
| 251 |
+
lines=3
|
| 252 |
+
)
|
| 253 |
+
analyze_btn = gr.Button("π Analyze Video", variant="primary", size="lg")
|
| 254 |
+
|
| 255 |
+
with gr.Column(scale=1):
|
| 256 |
+
output = gr.Textbox(
|
| 257 |
+
label="Analysis Results",
|
| 258 |
+
lines=20,
|
| 259 |
+
max_lines=25
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
gr.Markdown("### π‘ Example Questions:")
|
| 263 |
+
examples = [
|
| 264 |
+
"What activities are happening in this video?",
|
| 265 |
+
"Describe the people or objects you see.",
|
| 266 |
+
"What is the setting or location?",
|
| 267 |
+
"Summarize the main events.",
|
| 268 |
+
"What emotions or mood does this convey?"
|
| 269 |
+
]
|
| 270 |
+
|
| 271 |
+
with gr.Row():
|
| 272 |
+
for example in examples[:3]:
|
| 273 |
+
btn = gr.Button(example, size="sm")
|
| 274 |
+
btn.click(lambda x=example: x, outputs=question_input)
|
| 275 |
+
|
| 276 |
+
with gr.Row():
|
| 277 |
+
for example in examples[3:]:
|
| 278 |
+
btn = gr.Button(example, size="sm")
|
| 279 |
+
btn.click(lambda x=example: x, outputs=question_input)
|
| 280 |
+
|
| 281 |
+
analyze_btn.click(
|
| 282 |
+
analyze_video_with_ai,
|
| 283 |
+
inputs=[video_input, question_input],
|
| 284 |
+
outputs=output,
|
| 285 |
+
show_progress=True
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
gr.Markdown("---")
|
| 289 |
+
gr.Markdown("π **Status**: Video processing active - Upload a video to test!")
|
| 290 |
+
|
| 291 |
+
return demo
|
| 292 |
|
| 293 |
+
if __name__ == "__main__":
|
| 294 |
+
demo = create_interface()
|
| 295 |
+
demo.launch()
|