|
|
import os |
|
|
import time |
|
|
from typing import List, Tuple, Optional |
|
|
import google.generativeai as genai |
|
|
import gradio as gr |
|
|
from PIL import Image |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
GOOGLE_API_KEY = os.environ.get("GEMINI_API_KEY") |
|
|
|
|
|
IMAGE_WIDTH = 512 |
|
|
IMAGE_WIDTH = 512 |
|
|
|
|
|
system_instruction_analysis = "You are an expert of the given topic. Analyze the provided text with a focus on the topic, identifying recent issues, recent insights, or improvements relevant to academic standards and effectiveness. Offer actionable advice for enhancing knowledge and suggest real-life examples." |
|
|
model_name = "gemini-2.0-flash-exp" |
|
|
model = genai.GenerativeModel(model_name, system_instruction=system_instruction_analysis) |
|
|
|
|
|
|
|
|
|
|
|
def preprocess_stop_sequences(stop_sequences: str) -> Optional[List[str]]: |
|
|
return [seq.strip() for seq in stop_sequences.split(",")] if stop_sequences else None |
|
|
|
|
|
def preprocess_image(image: Image.Image) -> Image.Image: |
|
|
image_height = int(image.height * IMAGE_WIDTH / image.width) |
|
|
return image.resize((IMAGE_WIDTH, image_height)) |
|
|
|
|
|
def user(text_prompt: str, chatbot: List[Tuple[str, str]]): |
|
|
return "", chatbot + [[text_prompt, None]] |
|
|
|
|
|
def bot( |
|
|
google_key: str, |
|
|
image_prompt: Optional[Image.Image], |
|
|
temperature: float, |
|
|
max_output_tokens: int, |
|
|
stop_sequences: str, |
|
|
top_k: int, |
|
|
top_p: float, |
|
|
chatbot: List[Tuple[str, str]] |
|
|
): |
|
|
google_key = google_key or GOOGLE_API_KEY |
|
|
if not google_key: |
|
|
raise ValueError("GOOGLE_API_KEY is not set. Please set it up.") |
|
|
|
|
|
text_prompt = chatbot[-1][0].strip() if chatbot[-1][0] else None |
|
|
|
|
|
|
|
|
if not text_prompt and not image_prompt: |
|
|
chatbot[-1][1] = "Prompt cannot be empty. Please provide input text or an image." |
|
|
yield chatbot |
|
|
return |
|
|
elif image_prompt and not text_prompt: |
|
|
|
|
|
text_prompt = "Describe the image" |
|
|
elif image_prompt and text_prompt: |
|
|
|
|
|
text_prompt = f"{text_prompt}. Also, analyze the provided image." |
|
|
|
|
|
|
|
|
genai.configure(api_key=google_key) |
|
|
generation_config = genai.types.GenerationConfig( |
|
|
temperature=temperature, |
|
|
max_output_tokens=max_output_tokens, |
|
|
stop_sequences=preprocess_stop_sequences(stop_sequences), |
|
|
top_k=top_k, |
|
|
top_p=top_p, |
|
|
) |
|
|
|
|
|
|
|
|
inputs = [text_prompt] if image_prompt is None else [text_prompt, preprocess_image(image_prompt)] |
|
|
|
|
|
|
|
|
try: |
|
|
response = model.generate_content(inputs, stream=True, generation_config=generation_config) |
|
|
response.resolve() |
|
|
except Exception as e: |
|
|
chatbot[-1][1] = f"Error occurred: {str(e)}" |
|
|
yield chatbot |
|
|
return |
|
|
|
|
|
|
|
|
chatbot[-1][1] = "" |
|
|
for chunk in response: |
|
|
for i in range(0, len(chunk.text), 10): |
|
|
chatbot[-1][1] += chunk.text[i:i + 10] |
|
|
time.sleep(0.01) |
|
|
yield chatbot |
|
|
|
|
|
google_key_component = gr.Textbox( |
|
|
label="Google API Key", |
|
|
type="password", |
|
|
placeholder="Enter your Google API Key", |
|
|
visible=GOOGLE_API_KEY is None |
|
|
) |
|
|
|
|
|
image_prompt_component = gr.Image(type="pil", label="Input Image (Optional: Figure/Graph)") |
|
|
chatbot_component = gr.Chatbot(label="Chatbot", bubble_full_width=False) |
|
|
text_prompt_component = gr.Textbox( |
|
|
placeholder="Type your question here...", |
|
|
label="Ask", |
|
|
lines=3 |
|
|
) |
|
|
run_button_component = gr.Button("Submit") |
|
|
temperature_component = gr.Slider( |
|
|
minimum=0, |
|
|
maximum=1.0, |
|
|
value=0.4, |
|
|
step=0.05, |
|
|
label="Creativity (Temperature)", |
|
|
info="Controls the randomness of the response. Higher values result in more creative answers." |
|
|
) |
|
|
max_output_tokens_component = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=2048, |
|
|
value=1024, |
|
|
step=1, |
|
|
label="Response Length (Token Limit)", |
|
|
info="Sets the maximum number of tokens in the output response." |
|
|
) |
|
|
stop_sequences_component = gr.Textbox( |
|
|
label="Stop Sequences (Optional)", |
|
|
placeholder="Enter stop sequences, e.g., STOP, END", |
|
|
info="Specify sequences to stop the generation." |
|
|
) |
|
|
top_k_component = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=40, |
|
|
value=32, |
|
|
step=1, |
|
|
label="Top-K Sampling", |
|
|
info="Limits token selection to the top K most probable tokens. Lower values produce conservative outputs." |
|
|
) |
|
|
top_p_component = gr.Slider( |
|
|
minimum=0, |
|
|
maximum=1, |
|
|
value=1, |
|
|
step=0.01, |
|
|
label="Top-P Sampling", |
|
|
info="Limits token selection to tokens with a cumulative probability up to P. Lower values produce conservative outputs." |
|
|
) |
|
|
example_scenarios = [ |
|
|
"Describe Multimodal AI", |
|
|
"What are the difference between muliagent llm and multiagent system", |
|
|
"Why it's difficult to intgrate multimodality in prompt"] |
|
|
example_images = [["ex1.png"],["ex2.png"]] |
|
|
|
|
|
|
|
|
|
|
|
user_inputs = [text_prompt_component, chatbot_component] |
|
|
bot_inputs = [ |
|
|
google_key_component, |
|
|
image_prompt_component, |
|
|
temperature_component, |
|
|
max_output_tokens_component, |
|
|
stop_sequences_component, |
|
|
top_k_component, |
|
|
top_p_component, |
|
|
chatbot_component, |
|
|
] |
|
|
|
|
|
|
|
|
with gr.Blocks(theme="earneleh/paris") as demo: |
|
|
gr.Markdown("<h1 style='font-size: 36px; font-weight: bold; font-family: Arial;'>Gemini 2.0 Multimodal Chatbot</h1>") |
|
|
with gr.Row(): |
|
|
google_key_component.render() |
|
|
with gr.Row(): |
|
|
chatbot_component.render() |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=0.5): |
|
|
text_prompt_component.render() |
|
|
with gr.Column(scale=0.5): |
|
|
image_prompt_component.render() |
|
|
with gr.Column(scale=0.5): |
|
|
run_button_component.render() |
|
|
with gr.Accordion("🧪Example Text 💬", open=False): |
|
|
example_radio = gr.Radio( |
|
|
choices=example_scenarios, |
|
|
label="Example Queries", |
|
|
info="Select an example query.") |
|
|
|
|
|
example_radio.change( |
|
|
fn=lambda query: query if query else "No query selected.", |
|
|
inputs=[example_radio], |
|
|
outputs=[text_prompt_component]) |
|
|
|
|
|
with gr.Accordion("🧪Example Image 🩻", open=False): |
|
|
gr.Examples( |
|
|
examples=example_images, |
|
|
inputs=[image_prompt_component], |
|
|
label="Example Figures", |
|
|
) |
|
|
with gr.Accordion("🛠️Customize", open=False): |
|
|
temperature_component.render() |
|
|
max_output_tokens_component.render() |
|
|
stop_sequences_component.render() |
|
|
top_k_component.render() |
|
|
top_p_component.render() |
|
|
|
|
|
run_button_component.click( |
|
|
fn=user, inputs=user_inputs, outputs=[text_prompt_component, chatbot_component] |
|
|
).then( |
|
|
fn=bot, inputs=bot_inputs, outputs=[chatbot_component] |
|
|
) |
|
|
demo.launch() |