Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| from typing import List, Tuple, Optional | |
| import google.generativeai as genai | |
| import gradio as gr | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| GOOGLE_API_KEY = os.environ.get("GEMINI_API_KEY") | |
| IMAGE_WIDTH = 512 | |
| IMAGE_WIDTH = 512 | |
| system_instruction_analysis = "You are an expert of the given topic. Analyze the provided text with a focus on the topic, identifying recent issues, recent insights, or improvements relevant to academic standards and effectiveness. Offer actionable advice for enhancing knowledge and suggest real-life examples." | |
| model_name = "gemini-2.5-flash-exp" | |
| #model = genai.GenerativeModel(model_name, system_instruction=system_instruction_analysis) | |
| #genai.configure(api_key=google_key) | |
| # Helper Functions | |
| def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]: | |
| return [seq.strip() for seq in stop_sequences.split(",")] if stop_sequences else None | |
| def preprocess_image(image: Image.Image) -> Image.Image: | |
| image_height = int(image.height * IMAGE_WIDTH / image.width) | |
| return image.resize((IMAGE_WIDTH, image_height)) | |
| def user(text_prompt: str, chatbot: List[Tuple[str, str]]): | |
| return "", chatbot + [[text_prompt, None]] | |
| def bot( | |
| google_key: str, | |
| image_prompt: Optional[Image.Image], | |
| temperature: float, | |
| max_output_tokens: int, | |
| stop_sequences: str, | |
| top_k: int, | |
| top_p: float, | |
| chatbot: List[Tuple[str, str]] | |
| ): | |
| #google_key = google_key or GOOGLE_API_KEY | |
| used_api_key = google_key if google_key else GOOGLE_API_KEY | |
| if not used_api_key: | |
| chatbot[-1][1] = "❌ No API Key found! Please provide it." | |
| yield chatbot | |
| return | |
| text_prompt = chatbot[-1][0].strip() if chatbot[-1][0] else None | |
| # Handle cases for text and/or image input | |
| if not text_prompt and not image_prompt: | |
| chatbot[-1][1] = "Prompt cannot be empty. Please provide input text or an image." | |
| yield chatbot | |
| return | |
| elif image_prompt and not text_prompt: | |
| # If only an image is provided | |
| text_prompt = "Describe the image" | |
| elif image_prompt and text_prompt: | |
| # If both text and image are provided, combine them | |
| text_prompt = f"{text_prompt}. Also, analyze the provided image." | |
| # Configure the model | |
| genai.configure(api_key=used_api_key) | |
| generation_config = genai.types.GenerationConfig( | |
| temperature=temperature, | |
| max_output_tokens=max_output_tokens, | |
| stop_sequences=preprocess_stop_sequences(stop_sequences), | |
| top_k=top_k, | |
| top_p=top_p, | |
| ) | |
| model = genai.GenerativeModel(model_name, system_instruction=system_instruction_analysis) | |
| # Prepare inputs | |
| text_prompt = chatbot[-1][0].strip() if chatbot[-1][0] else None | |
| inputs = [text_prompt] if image_prompt is None else [text_prompt, image_prompt] | |
| # Generate response | |
| try: | |
| response = model.generate_content(inputs, stream=True, generation_config=generation_config) | |
| response.resolve() | |
| except Exception as e: | |
| chatbot[-1][1] = f"Error occurred: {str(e)}" | |
| yield chatbot | |
| return | |
| # Stream the response back to the chatbot | |
| chatbot[-1][1] = "" | |
| for chunk in response: | |
| for i in range(0, len(chunk.text), 10): | |
| chatbot[-1][1] += chunk.text[i:i + 10] | |
| time.sleep(0.01) | |
| yield chatbot | |
| # Components | |
| google_key_component = gr.Textbox( | |
| label="Google API Key", | |
| type="password", | |
| placeholder="Enter your Google API Key", | |
| visible=GOOGLE_API_KEY is None | |
| ) | |
| image_prompt_component = gr.Image(type="pil", label="Input Image (Optional: Figure/Graph)") | |
| chatbot_component = gr.Chatbot(label="Chatbot", bubble_full_width=False) | |
| text_prompt_component = gr.Textbox( | |
| placeholder="Type your question here...", | |
| label="Ask", | |
| lines=3 | |
| ) | |
| run_button_component = gr.Button("Submit") | |
| temperature_component = gr.Slider( | |
| minimum=0, | |
| maximum=1.0, | |
| value=0.4, | |
| step=0.05, | |
| label="Creativity (Temperature)", | |
| info="Controls the randomness of the response. Higher values result in more creative answers." | |
| ) | |
| max_output_tokens_component = gr.Slider( | |
| minimum=1, | |
| maximum=2048, | |
| value=1024, | |
| step=1, | |
| label="Response Length (Token Limit)", | |
| info="Sets the maximum number of tokens in the output response." | |
| ) | |
| stop_sequences_component = gr.Textbox( | |
| label="Stop Sequences (Optional)", | |
| placeholder="Enter stop sequences, e.g., STOP, END", | |
| info="Specify sequences to stop the generation." | |
| ) | |
| top_k_component = gr.Slider( | |
| minimum=1, | |
| maximum=40, | |
| value=32, | |
| step=1, | |
| label="Top-K Sampling", | |
| info="Limits token selection to the top K most probable tokens. Lower values produce conservative outputs." | |
| ) | |
| top_p_component = gr.Slider( | |
| minimum=0, | |
| maximum=1, | |
| value=1, | |
| step=0.01, | |
| label="Top-P Sampling", | |
| info="Limits token selection to tokens with a cumulative probability up to P. Lower values produce conservative outputs." | |
| ) | |
| example_scenarios = [ | |
| "Describe Multimodal AI", | |
| "What are the difference between muliagent llm and multiagent system", | |
| "Why it's difficult to intgrate multimodality in prompt"] | |
| example_images = [["ex1.png"],["ex2.png"]] | |
| # Gradio Interface | |
| user_inputs = [text_prompt_component, chatbot_component] | |
| bot_inputs = [ | |
| google_key_component, | |
| image_prompt_component, | |
| temperature_component, | |
| max_output_tokens_component, | |
| stop_sequences_component, | |
| top_k_component, | |
| top_p_component, | |
| chatbot_component, | |
| ] | |
| with gr.Blocks(theme="earneleh/paris") as demo: | |
| gr.Markdown("<h1 style='font-size: 36px; font-weight: bold; font-family: Arial;'>Gemini Multimodal Chatbot</h1>") | |
| with gr.Row(): | |
| google_key_component.render() | |
| with gr.Row(): | |
| chatbot_component.render() | |
| with gr.Row(): | |
| with gr.Column(scale=0.5): | |
| text_prompt_component.render() | |
| with gr.Column(scale=0.5): | |
| image_prompt_component.render() | |
| with gr.Column(scale=0.5): | |
| run_button_component.render() | |
| with gr.Accordion("🧪Example Text 💬", open=False): | |
| example_radio = gr.Radio( | |
| choices=example_scenarios, | |
| label="Example Queries", | |
| info="Select an example query.") | |
| # Debug callback | |
| example_radio.change( | |
| fn=lambda query: query if query else "No query selected.", | |
| inputs=[example_radio], | |
| outputs=[text_prompt_component]) | |
| # Custom examples section with blue styling | |
| with gr.Accordion("🧪Example Image 🩻", open=False): | |
| gr.Examples( | |
| examples=example_images, | |
| inputs=[image_prompt_component], | |
| label="Example Figures", | |
| ) | |
| with gr.Accordion("🛠️Customize", open=False): | |
| temperature_component.render() | |
| max_output_tokens_component.render() | |
| stop_sequences_component.render() | |
| top_k_component.render() | |
| top_p_component.render() | |
| run_button_component.click( | |
| fn=user, inputs=user_inputs, outputs=[text_prompt_component, chatbot_component] | |
| ).then( | |
| fn=bot, inputs=bot_inputs, outputs=[chatbot_component] | |
| ) | |
| demo.launch() |