import os import logging from io import BytesIO from PIL import Image import gradio as gr from google import genai from google.genai import types # 設定 logging logging.basicConfig( filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) # 初始化 Gemini API GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") client = genai.Client(api_key=GEMINI_API_KEY) # 定義「圖解釋文」功能 def explain_image(image: Image.Image): # 將 PIL Image 轉成 Gemini 所需的格式 buffered = BytesIO() image.save(buffered, format="PNG") image_bytes = buffered.getvalue() # 直接建立 types.Content contents = [ types.Content( parts=[ types.Part( inline_data=types.Blob( mime_type="image/png", data=image_bytes ) ), types.Part( text="請用繁體中文詳細說明這張圖片的內容。" ) ] ) ] # 呼叫 Gemini 模型 response = client.generate_content( model="gemini-1.5-flash", contents=contents, generation_config=types.GenerationConfig(response_mime_type="text/plain") ) # 回傳第一個回答 explanation = response.candidates[0].content.parts[0].text logging.info("圖片說明成功取得。") return explanation # Gradio 介面 with gr.Blocks() as demo: gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)") image_input = gr.Image(type="pil", label="上傳圖片") explain_button = gr.Button("解釋圖片") output_text = gr.Textbox(label="圖片說明", lines=5) explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text) if __name__ == "__main__": demo.launch()