Linebotpic

Sleeping

App Files Files Community

alanchen1115 commited on Oct 20, 2025

Commit

1da4fc3

verified ·

1 Parent(s): 316365e

Update main.py

Browse files

Files changed (1) hide show

main.py +113 -49

main.py CHANGED Viewed

@@ -1,65 +1,77 @@
-import os
-import io
-from collections import defaultdict
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi import FastAPI, Request, Header, BackgroundTasks, HTTPException
-from fastapi.staticfiles import StaticFiles
-from google import genai
-from google.genai import types
-from linebot import LineBotApi, WebhookHandler
-from linebot.exceptions import InvalidSignatureError
-from linebot.models import (
     MessageEvent,
     TextMessage,
     TextSendMessage,
     ImageSendMessage,
     ImageMessage,
 )
-import PIL.Image
-import uvicorn
 # LangChain 相關匯入
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.tools import tool
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain.agents import AgentExecutor, create_tool_calling_agent
 # ==========================
 #  環境設定與工具函式
 # ==========================
-# 設置 Google AI API 金鑰
 google_api = os.environ["GOOGLE_API_KEY"]
 genai_client = genai.Client(api_key=google_api)
-# 設置 Line Bot 的 API 金鑰和秘密金鑰
 line_bot_api = LineBotApi(os.environ["CHANNEL_ACCESS_TOKEN"])
 line_handler = WebhookHandler(os.environ["CHANNEL_SECRET"])
-# 使用字典模擬用戶訊息歷史存儲
 user_message_history = defaultdict(list)
-# 建立 FastAPI 應用程式
 app = FastAPI()
 app.mount("/static", StaticFiles(directory="static"), name="static")
-# 設定 CORS
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
 def get_image_url_from_line(message_id):
     """
     從 Line 訊息 ID 獲取圖片內容並儲存到暫存檔案。
     """
     try:
         message_content = line_bot_api.get_message_content(message_id)
         file_path = f"/tmp/{message_id}.png"
         with open(file_path, "wb") as f:
             for chunk in message_content.iter_content():
                 f.write(chunk)
@@ -71,7 +83,12 @@ def get_image_url_from_line(message_id):
 def store_user_message(user_id, message_type, message_content):
     """
-    儲存用戶的訊息。
     """
     user_message_history[user_id].append(
         {"type": message_type, "content": message_content}
@@ -80,9 +97,17 @@ def store_user_message(user_id, message_type, message_content):
 def get_previous_message(user_id):
     """
     獲取用戶的上一則訊息。
     """
     if user_id in user_message_history and len(user_message_history[user_id]) > 0:
         return user_message_history[user_id][-1]
     return {"type": "text", "content": "No message!"}
@@ -102,24 +127,29 @@ def generate_and_upload_image(prompt: str) -> str:
         回傳生成圖片的 URL。
     """
     try:
         response = genai_client.models.generate_content(
-            model="gemini-2.0-flash-preview-image-generation",#"gemini-2.5-flash-image",
-            contents=prompt,
-            config=types.GenerateContentConfig(response_modalities=['Text', 'Image'])
         )
         image_binary = None
         for part in response.candidates[0].content.parts:
             if part.inline_data is not None:
                 image_binary = part.inline_data.data
                 break
         if image_binary:
             image = PIL.Image.open(io.BytesIO(image_binary))
-            # 隨機生成一個檔案名以避免衝突
             file_name = f"static/{os.urandom(16).hex()}.png"
             image.save(file_name, format="PNG")
             image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space
             return image_url
@@ -130,7 +160,7 @@ def generate_and_upload_image(prompt: str) -> str:
 @tool
 def analyze_image_with_text(image_path: str, user_text: str) -> str:
     """
-    這個工具可以根據圖片和文字提示來回答問題。
     Args:
         image_path: 圖片在本地端儲存的路徑。
@@ -140,13 +170,16 @@ def analyze_image_with_text(image_path: str, user_text: str) -> str:
         模型針對圖片和文字提示給出的回應。
     """
     try:
         if not os.path.exists(image_path):
             return "圖片路徑無效，無法進行分析。"
         img_user = PIL.Image.open(image_path)
         response = genai_client.models.generate_content(
                     model="gemini-2.5-flash",
-                    contents=[img_user, user_text]
         )
         if (response.text != None):
             out = response.text
@@ -163,23 +196,24 @@ def analyze_image_with_text(image_path: str, user_text: str) -> str:
 #  LangChain 代理人設定
 # ==========================
-# 結合所有工具
 tools = [generate_and_upload_image, analyze_image_with_text]
-# 建立 LLM 模型實例
 llm = ChatGoogleGenerativeAI(google_api_key=google_api, model="gemini-2.5-flash", temperature=0.2)
 # 建立提示模板
 prompt_template = ChatPromptTemplate([
     ("system", "你是一個強大的圖像生成與問答助理，可以根據用戶的請求使用提供的工具。當你執行 generate_and_upload_image 工具\
-    成功後會獲得一個 URL，然後你回答的 output 要包含有這個 URL 的完整資訊。如果工具有產生錯誤訊息請解讀並回應。"),
-    ("user", "{input}"),
-    ("placeholder", "{agent_scratchpad}"),
 ])
-# 建立代理人
 agent = create_tool_calling_agent(llm, tools, prompt_template)
-agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
 # ==========================
 #  FastAPI 路由
@@ -187,32 +221,49 @@ agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
 @app.get("/")
 def root():
     return {"title": "Line Bot"}
 @app.post("/webhook")
 async def webhook(
     request: Request,
     background_tasks: BackgroundTasks,
-    x_line_signature=Header(None),
 ):
     body = await request.body()
     try:
         background_tasks.add_task(
             line_handler.handle, body.decode("utf-8"), x_line_signature
         )
     except InvalidSignatureError:
         raise HTTPException(status_code=400, detail="Invalid signature")
     return "ok"
 @line_handler.add(MessageEvent, message=(ImageMessage, TextMessage))
 def handle_message(event):
     user_id = event.source.user_id
-    # 處理圖片上傳
     if event.message.type == "image":
         image_path = get_image_url_from_line(event.message.id)
         if image_path:
             store_user_message(user_id, "image", image_path)
             line_bot_api.reply_message(
                 event.reply_token, TextSendMessage(text="圖片已接收成功囉，幫我輸入你想詢問的問題喔~")
             )
@@ -221,29 +272,39 @@ def handle_message(event):
                 event.reply_token, TextSendMessage(text="沒有接收到圖片~")
             )
-    # 處理文字訊息
     elif event.message.type == "text":
-        user_text = event.message.text
         previous_message = get_previous_message(user_id)
-        print(previous_message)
-        # 根據上一則訊息類型，動態傳遞給代理人
         if previous_message["type"] == "image":
             image_path = previous_message["content"]
             agent_input = {
                 "input": f"請根據這張圖片回答問題。圖片的路徑是 {image_path}，我的問題是：{user_text}"
             }
-            # 清除上一則圖片訊息，避免重複觸發
             user_message_history[user_id].pop()
         else:
             agent_input = {"input": user_text}
         try:
-            # 運行代理人
             response = agent_executor.invoke(agent_input)
             out = response["output"]
             if 'https' in out:
                 img_tmp = 'https'+out.split('https')[1]
                 image_url = img_tmp.split('png')[0]+'png'
                 line_bot_api.push_message(
                     event.source.user_id,
                     [
@@ -252,11 +313,14 @@ def handle_message(event):
                     ]
                 )
             else:
                 line_bot_api.reply_message(event.reply_token, TextSendMessage(text=out))
         except Exception as e:
             print(f"代理人執行出錯: {e}")
             out = f"代理人執行出錯!錯誤訊息：{e}"
             line_bot_api.reply_message(event.reply_token, TextSendMessage(text=out))
 if __name__ == "__main__":
-    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)

+import os  # 匯入 os 模組，用於讀取環境變數
+import io  # 匯入 io 模組，用於處理二進位數據流
+from collections import defaultdict  # 匯入 defaultdict，用於建立預設值的字典
+from fastapi.middleware.cors import CORSMiddleware  # 匯入 FastAPI 的 CORS 中介軟體
+from fastapi import FastAPI, Request, Header, BackgroundTasks, HTTPException  # 匯入 FastAPI 相關元件
+from fastapi.staticfiles import StaticFiles  # 匯入 StaticFiles，用於提供靜態檔案（如圖片）
+from google import genai  # 匯入 Google GenAI 函式庫
+from google.genai import types  # 匯入 GenAI 的類型定義
+from linebot import LineBotApi, WebhookHandler  # 匯入 Line Bot SDK
+from linebot.exceptions import InvalidSignatureError  # 匯入 Line 簽章無效的例外
+from linebot.models import (  # 匯入 Line Bot 的各種訊息模型
     MessageEvent,
     TextMessage,
     TextSendMessage,
     ImageSendMessage,
     ImageMessage,
 )
+import PIL.Image  # 匯入 PIL (Pillow) 函式庫，用於處理圖片
+import uvicorn  # 匯入 uvicorn，用於運行 FastAPI 應用程式
 # LangChain 相關匯入
+from langchain_core.prompts import ChatPromptTemplate  # 匯入 LangChain 的聊天提示模板
+from langchain_core.tools import tool  # 匯入 LangChain 的工具裝飾器
+from langchain_google_genai import ChatGoogleGenerativeAI  # 匯入 LangChain 的 Google GenAI 聊天模型
+from langchain.agents import AgentExecutor, create_tool_calling_agent  # 匯入 LangChain 的代理人執行器和建立工具
 # ==========================
 #  環境設定與工具函式
 # ==========================
+# 設置 Google AI API 金鑰 (從環境變數讀取)
 google_api = os.environ["GOOGLE_API_KEY"]
+# 初始化 Google GenAI 客戶端
 genai_client = genai.Client(api_key=google_api)
+# 設置 Line Bot 的 API 金鑰和秘密金鑰 (從環境變數讀取)
 line_bot_api = LineBotApi(os.environ["CHANNEL_ACCESS_TOKEN"])
 line_handler = WebhookHandler(os.environ["CHANNEL_SECRET"])
+# 使用 defaultdict 模擬用戶訊息歷史存儲
+# 鍵(key)為 user_id，值(value)為一個儲存訊息的列表(list)
 user_message_history = defaultdict(list)
+# 建立 FastAPI 應用程式實例
 app = FastAPI()
+# 掛載 /static 路徑，使其指向 "static" 資料夾，用於存放和提供生成的圖片
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# 設定 CORS (跨來源資源共用)
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # 允許所有來源
+    allow_credentials=True,  # 允許憑證
+    allow_methods=["*"],  # 允許所有 HTTP 方法
+    allow_headers=["*"],  # 允許所有 HTTP 標頭
 )
 def get_image_url_from_line(message_id):
     """
     從 Line 訊息 ID 獲取圖片內容並儲存到暫存檔案。
+    Args:
+        message_id: Line 訊息的 ID。
+    Returns:
+        成功時回傳圖片儲存的本地路徑，失敗時回傳 None。
     """
     try:
+        # 透過 Line Bot API 獲取訊息內容
         message_content = line_bot_api.get_message_content(message_id)
+        # 定義暫存檔案路徑
         file_path = f"/tmp/{message_id}.png"
+        # 將圖片內容以二進位寫入模式寫入檔案
         with open(file_path, "wb") as f:
             for chunk in message_content.iter_content():
                 f.write(chunk)
 def store_user_message(user_id, message_type, message_content):
     """
+    儲存用戶的訊息到 user_message_history 字典中。
+    Args:
+        user_id: 用戶的 ID。
+        message_type: 訊息類型 (例如 "image" 或 "text")。
+        message_content: 訊息內容 (例如圖片路徑或文字)。
     """
     user_message_history[user_id].append(
         {"type": message_type, "content": message_content}
 def get_previous_message(user_id):
     """
     獲取用戶的上一則訊息。
+    Args:
+        user_id: 用戶的 ID。
+    Returns:
+        如果歷史紀錄存在，回傳上一則訊息的字典；否則回傳預設的文字訊息。
     """
     if user_id in user_message_history and len(user_message_history[user_id]) > 0:
+        # 回傳最後一則訊息
         return user_message_history[user_id][-1]
+    # 如果沒有歷史紀錄，回傳一個預設值
     return {"type": "text", "content": "No message!"}
         回傳生成圖片的 URL。
     """
     try:
+        # 呼叫 Google GenAI 模型生成內容
         response = genai_client.models.generate_content(
+            model="gemini-2.0-flash-preview-image-generation",#"gemini-2.5-flash-image", # 指定圖片生成模型
+            contents=prompt, # 傳入文字提示
+            config=types.GenerateContentConfig(response_modalities=['Text', 'Image']) # 指定回應類型
         )
         image_binary = None
+        # 遍歷回應的 parts，找到圖片的二進位數據
         for part in response.candidates[0].content.parts:
             if part.inline_data is not None:
                 image_binary = part.inline_data.data
                 break
         if image_binary:
+            # 使用 PIL 將二進位數據轉換為圖片物件
             image = PIL.Image.open(io.BytesIO(image_binary))
+            # 隨機生成一個檔案名以避免衝突，並儲存在 static 資料夾
             file_name = f"static/{os.urandom(16).hex()}.png"
             image.save(file_name, format="PNG")
+            # 從環境變數獲取 Hugging Face Space 的 URL (或你的伺服器 URL)
+            # 並組合完整的圖片 URL
             image_url = os.path.join(os.getenv("HF_SPACE"), file_name) # Embed this Space
             return image_url
 @tool
 def analyze_image_with_text(image_path: str, user_text: str) -> str:
     """
+    這個工具可以根據圖片和文字提示來回答問題 (多模態分析)。
     Args:
         image_path: 圖片在本地端儲存的路徑。
         模型針對圖片和文字提示給出的回應。
     """
     try:
+        # 檢查圖片路徑是否存在
         if not os.path.exists(image_path):
             return "圖片路徑無效，無法進行分析。"
+        # 使用 PIL 開啟圖片
         img_user = PIL.Image.open(image_path)
+        # 呼叫 Google GenAI 模型 (gemini-2.5-flash) 進行多模態分析
         response = genai_client.models.generate_content(
                     model="gemini-2.5-flash",
+                    contents=[img_user, user_text] # 同時傳入圖片物件和文字
         )
         if (response.text != None):
             out = response.text
 #  LangChain 代理人設定
 # ==========================
+# 結合所有定義的工具
 tools = [generate_and_upload_image, analyze_image_with_text]
+# 建立 LLM 模型實例 (使用 LangChain 的 ChatGoogleGenerativeAI)
 llm = ChatGoogleGenerativeAI(google_api_key=google_api, model="gemini-2.5-flash", temperature=0.2)
 # 建立提示模板
 prompt_template = ChatPromptTemplate([
     ("system", "你是一個強大的圖像生成與問答助理，可以根據用戶的請求使用提供的工具。當你執行 generate_and_upload_image 工具\
+    成功後會獲得一個 URL，然後你回答的 output 要包含有這個 URL 的完整資訊。如果工具有產生錯誤訊息請解讀並回應。"), # 系統提示 (System Prompt)
+    ("user", "{input}"), # 用戶輸入的佔位符
+    ("placeholder", "{agent_scratchpad}"), # 代理人思考過程的佔位符
 ])
+# 建立工具調用代理人 (Tool Calling Agent)
 agent = create_tool_calling_agent(llm, tools, prompt_template)
+# 建立代理人執行器 (Agent Executor)
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) # verbose=True 會在終端印出代理人的思考過程
 # ==========================
 #  FastAPI 路由
 @app.get("/")
 def root():
+    """
+    根路徑，用於基本測試。
+    """
     return {"title": "Line Bot"}
 @app.post("/webhook")
 async def webhook(
     request: Request,
     background_tasks: BackgroundTasks,
+    x_line_signature=Header(None), # 從標頭獲取 Line 的簽章
 ):
+    """
+    Line Bot 的 Webhook 路由。
+    """
+    # 獲取請求的原始內容 (body)
     body = await request.body()
     try:
+        # 使用背景任務來處理 Webhook，這樣可以立即回傳 200 OK 給 Line 伺服器
         background_tasks.add_task(
             line_handler.handle, body.decode("utf-8"), x_line_signature
         )
     except InvalidSignatureError:
+        # 如果簽章無效，拋出 400 錯誤
         raise HTTPException(status_code=400, detail="Invalid signature")
     return "ok"
+# 註冊訊息處理器，處理「圖片訊息」和「文字訊息」
 @line_handler.add(MessageEvent, message=(ImageMessage, TextMessage))
 def handle_message(event):
+    """
+    主要的訊息處理邏輯。
+    """
+    # 獲取用戶 ID
     user_id = event.source.user_id
+    # 情況一：處理圖片上傳
     if event.message.type == "image":
+        # 獲取 Line 傳來的圖片，並儲存到本地
         image_path = get_image_url_from_line(event.message.id)
         if image_path:
+            # 將圖片路徑儲存到用戶的訊息歷史中
             store_user_message(user_id, "image", image_path)
+            # 回覆用戶，告知圖片已收到，並請他輸入問題
             line_bot_api.reply_message(
                 event.reply_token, TextSendMessage(text="圖片已接收成功囉，幫我輸入你想詢問的問題喔~")
             )
                 event.reply_token, TextSendMessage(text="沒有接收到圖片~")
             )
+    # 情況二：處理文字訊息
     elif event.message.type == "text":
+        user_text = event.message.text # 獲取用戶傳來的文字
+        # 獲取該用戶的「上一則」訊息
         previous_message = get_previous_message(user_id)
+        print(f"上一則訊息: {previous_message}") # 在後台印出除錯訊息
+        # 根據上一則訊息類型，動態組合給代理人的輸入
         if previous_message["type"] == "image":
+            # 如果上一則是圖片，代表用戶現在的文字是「針對圖片的提問」
             image_path = previous_message["content"]
             agent_input = {
                 "input": f"請根據這張圖片回答問題。圖片的路徑是 {image_path}，我的問題是：{user_text}"
             }
+            # 清除上一則圖片訊息，避免下一次文字訊息還被當作是圖片問答
             user_message_history[user_id].pop()
         else:
+            # 如果上一則不是圖片 (或沒有上一則)，代表這是一般的文字提問 (可能是要求生成圖片)
             agent_input = {"input": user_text}
         try:
+            # 運行 LangChain 代理人
             response = agent_executor.invoke(agent_input)
+            # 獲取代理人最終的輸出
             out = response["output"]
+            # 檢查輸出中是否包含 'https' (判斷是否為生成的圖片 URL)
             if 'https' in out:
+                # 解析 URL (這裡的解析方式比較簡易，可能需要更穩健的正規表達式)
                 img_tmp = 'https'+out.split('https')[1]
                 image_url = img_tmp.split('png')[0]+'png'
+                # 使用 push_message 同時推送文字和圖片
                 line_bot_api.push_message(
                     event.source.user_id,
                     [
                     ]
                 )
             else:
+                # 如果輸出不是 URL，則直接回覆文字
                 line_bot_api.reply_message(event.reply_token, TextSendMessage(text=out))
         except Exception as e:
+            # 處理代理人執行時的錯誤
             print(f"代理人執行出錯: {e}")
             out = f"代理人執行出錯!錯誤訊息：{e}"
             line_bot_api.reply_message(event.reply_token, TextSendMessage(text=out))
 if __name__ == "__main__":
+    # 程式執行的進入點，使用 uvicorn 啟動 FastAPI 伺服器
+    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)