ImageGen / text2image.py
3v324v23's picture
content type
67cd09d
raw
history blame
1.88 kB
import os
import logging
from io import BytesIO
from PIL import Image
import gradio as gr
from google import genai
from google.genai import types
# 設定 logging
logging.basicConfig(
filename='app.log',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# 初始化 Gemini API
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
client = genai.Client(api_key=GEMINI_API_KEY)
# 定義「圖解釋文」功能
def explain_image(image: Image.Image):
# 將 PIL Image 轉成 Gemini 所需的格式
buffered = BytesIO()
image.save(buffered, format="PNG")
image_bytes = buffered.getvalue()
# 直接建立 types.Content
contents = [
types.Content(
parts=[
types.Part(
inline_data=types.Blob(
mime_type="image/png",
data=image_bytes
)
),
types.Part(
text="請用繁體中文詳細說明這張圖片的內容。"
)
]
)
]
# 呼叫 Gemini 模型
response = client.generate_content(
model="gemini-1.5-flash",
contents=contents,
generation_config=types.GenerationConfig(response_mime_type="text/plain")
)
# 回傳第一個回答
explanation = response.candidates[0].content.parts[0].text
logging.info("圖片說明成功取得。")
return explanation
# Gradio 介面
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)")
image_input = gr.Image(type="pil", label="上傳圖片")
explain_button = gr.Button("解釋圖片")
output_text = gr.Textbox(label="圖片說明", lines=5)
explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text)
if __name__ == "__main__":
demo.launch()