Spaces:

akhaliq
/

FastVLM-7B

Runtime error

App Files Files Community

akhaliq HF Staff commited on Sep 2

Commit

71ca584

verified ·

1 Parent(s): 224eae3

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -15

app.py CHANGED Viewed

@@ -123,23 +123,16 @@ def process_video(
     """Process video and generate captions"""
     if not video_path:
-        return "Please upload a video first.", None, None
     progress(0, desc="Extracting frames...")
     frames = extract_frames(video_path, num_frames, sampling_method)
     if not frames:
-        return "Failed to extract frames from video.", None, None
-    # Prepare prompt based on mode
-    if caption_mode == "Detailed Description":
-        prompt = "Describe this image in detail, including all visible objects, actions, and the overall scene."
-    elif caption_mode == "Brief Summary":
-        prompt = "Provide a brief one-sentence description of what's happening in this image."
-    elif caption_mode == "Action Recognition":
-        prompt = "What action or activity is taking place in this image? Focus on the main action."
-    else:  # Custom
-        prompt = custom_prompt if custom_prompt else "Describe this image."
     captions = []
     frame_previews = []
@@ -157,13 +150,11 @@ def process_video(
     # Generate overall summary if multiple frames
     if len(frames) > 1:
-        summary_prompt = f"Based on these frame descriptions, provide a coherent summary of the video:\n{full_caption}\n\nSummary:"
-        # For simplicity, we'll just combine the captions
         video_summary = f"## Video Analysis ({len(frames)} frames analyzed)\n\n{full_caption}"
     else:
         video_summary = f"## Video Analysis\n\n{full_caption}"
-    return video_summary, frame_previews, video_path
 # Create the Gradio interface
 with gr.Blocks() as demo:
@@ -259,7 +250,7 @@ with gr.Blocks() as demo:
     process_btn.click(
         process_video,
         inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt],
-        outputs=[output_text, frame_gallery, video_display]
     )
     demo.launch()

     """Process video and generate captions"""
     if not video_path:
+        return "Please upload a video first.", None
     progress(0, desc="Extracting frames...")
     frames = extract_frames(video_path, num_frames, sampling_method)
     if not frames:
+        return "Failed to extract frames from video.", None
+    # Use brief one-sentence prompt for faster processing
+    prompt = "Provide a brief one-sentence description of what's happening in this image."
     captions = []
     frame_previews = []
     # Generate overall summary if multiple frames
     if len(frames) > 1:
         video_summary = f"## Video Analysis ({len(frames)} frames analyzed)\n\n{full_caption}"
     else:
         video_summary = f"## Video Analysis\n\n{full_caption}"
+    return video_summary, frame_previews
 # Create the Gradio interface
 with gr.Blocks() as demo:
     process_btn.click(
         process_video,
         inputs=[video_display, num_frames, sampling_method, caption_mode, custom_prompt],
+        outputs=[output_text, frame_gallery]
     )
     demo.launch()