Spaces:

Rahatara
/

Multimodal_Gemini

Sleeping

App Files Files Community

Multimodal_Gemini / gemini_video_analysis.py

Rahatara

Rename app.py to gemini_video_analysis.py

031d707 verified almost 2 years ago

raw

history blame contribute delete

2.42 kB

	import cv2
	import gradio as gr
	import google.generativeai as genai
	import os
	import PIL.Image

	YOUR_API_KEY= "AIzaSyBjb6LLerzZE6JIIE0YBK6Wn0hqdO9E1Zk"
	genai.configure(api_key="AIzaSyBjb6LLerzZE6JIIE0YBK6Wn0hqdO9E1Zk")

	# Define the Generative AI model
	model = genai.GenerativeModel('gemini-pro-vision')

	# Function to capture frames from a video
	def frame_capture(video_path, num_frames=5):
	vidObj = cv2.VideoCapture(video_path)
	frames = []
	count = 0
	while True:
	success, image = vidObj.read()
	if not success:
	break
	if count % (vidObj.get(cv2.CAP_PROP_FRAME_COUNT) // num_frames) == 0:
	frames.append(image)
	count += 1
	return frames


	#format description

	def format_descriptions(descriptions):
	# Join the descriptions into a single string
	formatted_description = ' '.join(descriptions)
	# Remove any leading or trailing whitespace
	formatted_description = formatted_description.strip()
	# Replace any occurrences of special characters with a space
	formatted_description = ''.join(char if char.isalnum() or char.isspace() else ' ' for char in formatted_description)
	return formatted_description

	# Function to generate text descriptions for frames
	def generate_descriptions_for_frames(video_path):
	# Capture frames from the video
	frames = frame_capture(video_path)

	# Prepare images for input to the model
	images = [PIL.Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in frames]

	# Prepare the prompt with images and instructions
	instructions = "Instructions: Consider the following frames from the video:"
	prompt = "What is shown in each of the frames?"
	images_with_prompt = [prompt] + [instructions] + images

	# Generate content using the model
	responses = model.generate_content(images_with_prompt)

	# Extract and return the text descriptions from the responses
	descriptions = [response.text for response in responses]
	formatted_description = format_descriptions(descriptions)

	return formatted_description


	# Define Gradio interface
	video_input = gr.Video(label="Upload Video", autoplay=True)
	output_text = gr.Textbox(label="Frame Descriptions")

	# Create Gradio app
	gr.Interface(fn= generate_descriptions_for_frames, inputs=video_input, outputs=output_text, title="Frame Description Generator").launch()