Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -123,23 +123,16 @@ def process_video(
|
|
| 123 |
"""Process video and generate captions"""
|
| 124 |
|
| 125 |
if not video_path:
|
| 126 |
-
return "Please upload a video first.", None
|
| 127 |
|
| 128 |
progress(0, desc="Extracting frames...")
|
| 129 |
frames = extract_frames(video_path, num_frames, sampling_method)
|
| 130 |
|
| 131 |
if not frames:
|
| 132 |
-
return "Failed to extract frames from video.", None
|
| 133 |
|
| 134 |
-
#
|
| 135 |
-
|
| 136 |
-
prompt = "Describe this image in detail, including all visible objects, actions, and the overall scene."
|
| 137 |
-
elif caption_mode == "Brief Summary":
|
| 138 |
-
prompt = "Provide a brief one-sentence description of what's happening in this image."
|
| 139 |
-
elif caption_mode == "Action Recognition":
|
| 140 |
-
prompt = "What action or activity is taking place in this image? Focus on the main action."
|
| 141 |
-
else: # Custom
|
| 142 |
-
prompt = custom_prompt if custom_prompt else "Describe this image."
|
| 143 |
|
| 144 |
captions = []
|
| 145 |
frame_previews = []
|
|
@@ -157,13 +150,11 @@ def process_video(
|
|
| 157 |
|
| 158 |
# Generate overall summary if multiple frames
|
| 159 |
if len(frames) > 1:
|
| 160 |
-
summary_prompt = f"Based on these frame descriptions, provide a coherent summary of the video:\n{full_caption}\n\nSummary:"
|
| 161 |
-
# For simplicity, we'll just combine the captions
|
| 162 |
video_summary = f"## Video Analysis ({len(frames)} frames analyzed)\n\n{full_caption}"
|
| 163 |
else:
|
| 164 |
video_summary = f"## Video Analysis\n\n{full_caption}"
|
| 165 |
|
| 166 |
-
return video_summary, frame_previews
|
| 167 |
|
| 168 |
# Create the Gradio interface
|
| 169 |
with gr.Blocks() as demo:
|
|
@@ -259,7 +250,7 @@ with gr.Blocks() as demo:
|
|
| 259 |
process_btn.click(
|
| 260 |
process_video,
|
| 261 |
inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt],
|
| 262 |
-
outputs=[output_text, frame_gallery
|
| 263 |
)
|
| 264 |
|
| 265 |
demo.launch()
|
|
|
|
| 123 |
"""Process video and generate captions"""
|
| 124 |
|
| 125 |
if not video_path:
|
| 126 |
+
return "Please upload a video first.", None
|
| 127 |
|
| 128 |
progress(0, desc="Extracting frames...")
|
| 129 |
frames = extract_frames(video_path, num_frames, sampling_method)
|
| 130 |
|
| 131 |
if not frames:
|
| 132 |
+
return "Failed to extract frames from video.", None
|
| 133 |
|
| 134 |
+
# Use brief one-sentence prompt for faster processing
|
| 135 |
+
prompt = "Provide a brief one-sentence description of what's happening in this image."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
captions = []
|
| 138 |
frame_previews = []
|
|
|
|
| 150 |
|
| 151 |
# Generate overall summary if multiple frames
|
| 152 |
if len(frames) > 1:
|
|
|
|
|
|
|
| 153 |
video_summary = f"## Video Analysis ({len(frames)} frames analyzed)\n\n{full_caption}"
|
| 154 |
else:
|
| 155 |
video_summary = f"## Video Analysis\n\n{full_caption}"
|
| 156 |
|
| 157 |
+
return video_summary, frame_previews
|
| 158 |
|
| 159 |
# Create the Gradio interface
|
| 160 |
with gr.Blocks() as demo:
|
|
|
|
| 250 |
process_btn.click(
|
| 251 |
process_video,
|
| 252 |
inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt],
|
| 253 |
+
outputs=[output_text, frame_gallery]
|
| 254 |
)
|
| 255 |
|
| 256 |
demo.launch()
|