Spaces:
Sleeping
Sleeping
File size: 3,347 Bytes
10c3a07 0e1249d 3d7f238 25080b2 3d7f238 25080b2 3d7f238 0e1249d 3d7f238 0e1249d 10c3a07 0e1249d 10c3a07 25080b2 10c3a07 0e1249d 10c3a07 0e1249d e05482a 0e1249d 6517f50 0e1249d 3d7f238 0e1249d 3d7f238 0e1249d 3d7f238 0e1249d 3d7f238 0e1249d e05482a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import cv2
import gradio as gr
import google.generativeai as genai
import os
import PIL.Image
# Configure the API key for Google Generative AI
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
# Define the Generative AI model
model = genai.GenerativeModel('gemini-1.5-flash')
# Function to capture frames from a video
def frame_capture(video_path, num_frames=5):
vidObj = cv2.VideoCapture(video_path)
frames = []
total_frames = int(vidObj.get(cv2.CAP_PROP_FRAME_COUNT))
frame_step = max(1, total_frames // num_frames)
count = 0
while len(frames) < num_frames:
vidObj.set(cv2.CAP_PROP_POS_FRAMES, count)
success, image = vidObj.read()
if not success:
break
frames.append(image)
count += frame_step
vidObj.release()
return frames
# Function to generate text descriptions for frames
def generate_descriptions_for_frames(video_path):
frames = frame_capture(video_path)
images = [PIL.Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
prompt = "Describe what is happening in each of these frames. Identify any potential railway defect or risk."
images_with_prompt = [prompt] + images
responses = model.generate_content(images_with_prompt)
descriptions = [response.text for response in responses]
formatted_description = format_descriptions(descriptions)
return formatted_description
# Function to handle chat interaction
def chat_interaction(video_path, chatbot, user_message):
frames = frame_capture(video_path)
images = [PIL.Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]
prompt = f"Based on these video frames, {user_message}"
images_with_prompt = [prompt] + images
responses = model.generate_content(images_with_prompt)
# Collect the text responses properly
response_text = "".join([response.text for response in responses])
chatbot.append((user_message, response_text))
return "", chatbot
# Helper function to format descriptions
def format_descriptions(descriptions):
return ' '.join(descriptions).strip()
# Define the Gradio interfaces for each tab
# Tab 1: Video Analysis System with Set Prompt
with gr.Blocks() as tab1:
with gr.Column():
gr.Markdown("### Video Analysis System")
video_input_1 = gr.Video(label="Upload Video", autoplay=True)
output_text = gr.Textbox(label="What's this video")
analyze_button = gr.Button("Analyze Video")
analyze_button.click(fn=generate_descriptions_for_frames, inputs=video_input_1, outputs=output_text)
# Tab 2: Interactive Chat Mode
with gr.Blocks() as tab2:
with gr.Column():
gr.Markdown("### Interactive Chat Mode")
video_input_2 = gr.Video(label="Upload Video", autoplay=True)
chatbot = gr.Chatbot(label="Video Analysis Chatbot")
user_input = gr.Textbox(label="Ask something specific about the video", placeholder="E.g., Are there any cars in this video?")
user_input.submit(fn=chat_interaction, inputs=[video_input_2, chatbot, user_input], outputs=[user_input, chatbot])
# Combine the two tabs into a single interface
with gr.Blocks() as demo:
gr.TabbedInterface([tab1, tab2], ["Video Analysis", "Interactive Chat"])
demo.launch()
|