Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,11 +30,11 @@ def frame_capture(video_path, num_frames=5):
|
|
| 30 |
return frames
|
| 31 |
|
| 32 |
# Function to generate text descriptions for frames
|
| 33 |
-
def generate_descriptions_for_frames(video_path):
|
| 34 |
frames = frame_capture(video_path)
|
| 35 |
images = [PIL.Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
|
| 36 |
|
| 37 |
-
prompt = "Describe what is happening in each of these frames in this video sequentially."
|
| 38 |
images_with_prompt = [prompt] + images
|
| 39 |
|
| 40 |
responses = model.generate_content(images_with_prompt)
|
|
@@ -49,11 +49,13 @@ def format_descriptions(descriptions):
|
|
| 49 |
|
| 50 |
# Define Gradio interface
|
| 51 |
video_input = gr.Video(label="Upload Video", autoplay=True)
|
|
|
|
| 52 |
output_text = gr.Textbox(label="What's in this video")
|
| 53 |
|
| 54 |
# Create Gradio app
|
| 55 |
-
gr.Interface(
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
| 30 |
return frames
|
| 31 |
|
| 32 |
# Function to generate text descriptions for frames
|
| 33 |
+
def generate_descriptions_for_frames(video_path, user_prompt):
|
| 34 |
frames = frame_capture(video_path)
|
| 35 |
images = [PIL.Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
|
| 36 |
|
| 37 |
+
prompt = f"Describe what is happening in each of these frames in this video sequentially. {user_prompt}"
|
| 38 |
images_with_prompt = [prompt] + images
|
| 39 |
|
| 40 |
responses = model.generate_content(images_with_prompt)
|
|
|
|
| 49 |
|
| 50 |
# Define Gradio interface
|
| 51 |
video_input = gr.Video(label="Upload Video", autoplay=True)
|
| 52 |
+
user_input = gr.Textbox(label="Ask something specific about the video", placeholder="E.g., Are there any cars in this video?")
|
| 53 |
output_text = gr.Textbox(label="What's in this video")
|
| 54 |
|
| 55 |
# Create Gradio app
|
| 56 |
+
gr.Interface(
|
| 57 |
+
fn=generate_descriptions_for_frames,
|
| 58 |
+
inputs=[video_input, user_input],
|
| 59 |
+
outputs=output_text,
|
| 60 |
+
title="Interactive Video Analysis System"
|
| 61 |
+
).launch()
|