Spaces:

pcreem
/

aBot

Running

App Files Files Community

Song commited on 24 days ago

Commit

f2f2687

1 Parent(s): b3c381d

long return

Browse files

Files changed (1) hide show

app.py +16 -14

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 # ---------- 環境與快取設定 (應置於最前) ----------
 import os
 from typing import List, Dict
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request, HTTPException
@@ -18,7 +19,7 @@ from linebot.v3.messaging import (
 from linebot.v3.webhook import WebhookParser
 from linebot.v3.exceptions import InvalidSignatureError
-from openai import OpenAI
 from tavily import TavilyClient
 from sentence_transformers import SentenceTransformer, util
 from tenacity import retry, stop_after_attempt, wait_exponential
@@ -41,7 +42,7 @@ LLM_API_CONFIG = {
 LLM_MODEL_CONFIG = {
     "model": os.getenv("LLM_MODEL", "gemini-3-pro"),
-    "max_tokens": int(os.getenv("MAX_TOKENS", 2000)),
     "temperature": float(os.getenv("TEMPERATURE", 0.3)),
     "seed": int(os.getenv("LLM_SEED", 42)),
 }
@@ -125,7 +126,7 @@ def perform_web_search(query: str, max_results: int = 5) -> str:
 class ChatPipeline:
     def __init__(self):
         self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
-        self.llm_client = OpenAI(
             api_key=LLM_API_CONFIG["api_key"],
             base_url=LLM_API_CONFIG["base_url"],
             default_headers={
@@ -135,22 +136,22 @@ class ChatPipeline:
         )
     @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
-    def _llm_call(self, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
         token_est = estimate_tokens(messages)
         if token_est > 50000:
             raise ValueError("輸入過長")
-        response = self.llm_client.chat.completions.create(
             model=LLM_MODEL_CONFIG["model"],
             messages=messages,
             max_tokens=max_tokens or LLM_MODEL_CONFIG["max_tokens"],
             temperature=LLM_MODEL_CONFIG["temperature"],
             seed=LLM_MODEL_CONFIG["seed"],
-            timeout=30.0,
         )
         return response.choices[0].message.content or ""
-    def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
         """輕量判斷是否需要網路搜尋"""
         router_prompt = [
             {"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
@@ -161,7 +162,7 @@ class ChatPipeline:
             {"role": "user", "content": user_text}
         ]
         try:
-            decision = self._llm_call(router_prompt, max_tokens=10).strip().lower()
             print(f"搜尋需求判斷：{decision}（問題：{user_text}）")
             return decision == "yes"
         except Exception as e:
@@ -178,7 +179,7 @@ class ChatPipeline:
         conversations.pop(user_id, None)
         pending_chunks.pop(user_id, None)
-    def answer_question(self, user_id: str, user_text: str) -> str:
         if user_text.strip().lower() == "/clear":
             self.clear_conversation_history(user_id)
             return "對話紀錄已清除！現在開始新的對話。"
@@ -186,11 +187,12 @@ class ChatPipeline:
         history = self.get_conversation_history(user_id)
         # ---- 新增：判斷是否需要搜尋 ----
-        needs_search = self._needs_search(user_text, history)
         search_results = None
         if needs_search:
-            search_results = perform_web_search(user_text)
         # ---- 建構最終 prompt ----
         messages = [{"role": "system", "content": SYSTEM_PROMPT}]
@@ -200,7 +202,7 @@ class ChatPipeline:
         if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
             messages.append({"role": "system", "content": f"網路搜尋結果（僅在高度相關時使用）：{search_results}"})
-        response = self._llm_call(messages)
         response = response.replace('*', '')
         # 更新歷史（包含最終回應）
@@ -215,7 +217,7 @@ class ChatPipeline:
                 {"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要，保留關鍵事實和細節，長度控制在2000字元內。"},
                 {"role": "user", "content": response}
             ]
-            summary = self._llm_call(summary_prompt).replace('*', '')
             return summary + "\n\n(完整回應過長，已提供摘要。如需細節，請分次詢問或回覆「繼續」)"
         return response
@@ -277,7 +279,7 @@ async def line_webhook(request: Request):
                     continue
             # 正常回應
-            ai_response = chat_pipeline.answer_question(user_id, user_text)
             chunks = split_text_for_line(ai_response)
             if len(chunks) <= 5:

 # -*- coding: utf-8 -*-
 # ---------- 環境與快取設定 (應置於最前) ----------
 import os
+import asyncio
 from typing import List, Dict
 from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request, HTTPException
 from linebot.v3.webhook import WebhookParser
 from linebot.v3.exceptions import InvalidSignatureError
+from openai import AsyncOpenAI
 from tavily import TavilyClient
 from sentence_transformers import SentenceTransformer, util
 from tenacity import retry, stop_after_attempt, wait_exponential
 LLM_MODEL_CONFIG = {
     "model": os.getenv("LLM_MODEL", "gemini-3-pro"),
+    "max_tokens": int(os.getenv("MAX_TOKENS", 4000)),
     "temperature": float(os.getenv("TEMPERATURE", 0.3)),
     "seed": int(os.getenv("LLM_SEED", 42)),
 }
 class ChatPipeline:
     def __init__(self):
         self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        self.llm_client = AsyncOpenAI(
             api_key=LLM_API_CONFIG["api_key"],
             base_url=LLM_API_CONFIG["base_url"],
             default_headers={
         )
     @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+    async def _llm_call(self, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
         token_est = estimate_tokens(messages)
         if token_est > 50000:
             raise ValueError("輸入過長")
+        response = await self.llm_client.chat.completions.create(
             model=LLM_MODEL_CONFIG["model"],
             messages=messages,
             max_tokens=max_tokens or LLM_MODEL_CONFIG["max_tokens"],
             temperature=LLM_MODEL_CONFIG["temperature"],
             seed=LLM_MODEL_CONFIG["seed"],
+            timeout=120.0,
         )
         return response.choices[0].message.content or ""
+    async def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
         """輕量判斷是否需要網路搜尋"""
         router_prompt = [
             {"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
             {"role": "user", "content": user_text}
         ]
         try:
+            decision = (await self._llm_call(router_prompt, max_tokens=10)).strip().lower()
             print(f"搜尋需求判斷：{decision}（問題：{user_text}）")
             return decision == "yes"
         except Exception as e:
         conversations.pop(user_id, None)
         pending_chunks.pop(user_id, None)
+    async def answer_question(self, user_id: str, user_text: str) -> str:
         if user_text.strip().lower() == "/clear":
             self.clear_conversation_history(user_id)
             return "對話紀錄已清除！現在開始新的對話。"
         history = self.get_conversation_history(user_id)
         # ---- 新增：判斷是否需要搜尋 ----
+        needs_search = await self._needs_search(user_text, history)
         search_results = None
         if needs_search:
+            # search is sync, but fast. Consider wrapping in to_thread if blocking is an issue.
+            search_results = await asyncio.to_thread(perform_web_search, user_text)
         # ---- 建構最終 prompt ----
         messages = [{"role": "system", "content": SYSTEM_PROMPT}]
         if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
             messages.append({"role": "system", "content": f"網路搜尋結果（僅在高度相關時使用）：{search_results}"})
+        response = await self._llm_call(messages)
         response = response.replace('*', '')
         # 更新歷史（包含最終回應）
                 {"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要，保留關鍵事實和細節，長度控制在2000字元內。"},
                 {"role": "user", "content": response}
             ]
+            summary = (await self._llm_call(summary_prompt)).replace('*', '')
             return summary + "\n\n(完整回應過長，已提供摘要。如需細節，請分次詢問或回覆「繼續」)"
         return response
                     continue
             # 正常回應
+            ai_response = await chat_pipeline.answer_question(user_id, user_text)
             chunks = split_text_for_line(ai_response)
             if len(chunks) <= 5: