import os import google.generativeai as genai import gradio as gr from gradio_multimodalchatbot import MultimodalChatbot from PIL import Image import io # Configure the API genai.configure(api_key=os.environ["GEMINI_API_KEY"]) # Create the model generation_config = { "temperature": 1, "top_p": 0.95, "top_k": 40, "max_output_tokens": 8192, } model = genai.GenerativeModel( model_name="gemini-1.5-pro-latest", generation_config=generation_config, ) # Initialize the chat session chat_session = model.start_chat(history=[]) def process_file(file): if file.type.startswith('image'): return Image.open(file.path) elif file.type.startswith('audio') or file.type.startswith('video'): return file.path else: return None def respond(message, history): files = [] for file in message.get('files', []): processed_file = process_file(file['file']) if processed_file: files.append(processed_file) prompt = message['text'] if files: response = chat_session.send_message([prompt, *files]) else: response = chat_session.send_message(prompt) return {"text": response.text, "files": []} with gr.Blocks() as demo: gr.Markdown("# Gemini Multimodal Chatbot") gr.Markdown("Chat with the Gemini 1.5 Pro model. You can send text, images, audio, and video!") chatbot = MultimodalChatbot( height=600, bubble_full_width=False, avatar_images=(None, "https://lh3.googleusercontent.com/d/1pIo02xepBgqt9gMdFkJHSocJfH_A2dqj"), render_markdown=True ) chatbot.chat(respond, fill_height=False) if __name__ == "__main__": demo.launch()