Song
commited on
Commit
·
fde8226
1
Parent(s):
f2f2687
openrouter
Browse files
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
app.py
CHANGED
|
@@ -19,7 +19,7 @@ from linebot.v3.messaging import (
|
|
| 19 |
from linebot.v3.webhook import WebhookParser
|
| 20 |
from linebot.v3.exceptions import InvalidSignatureError
|
| 21 |
|
| 22 |
-
from openai import AsyncOpenAI
|
| 23 |
from tavily import TavilyClient
|
| 24 |
from sentence_transformers import SentenceTransformer, util
|
| 25 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
@@ -34,14 +34,25 @@ def _require_env(var: str) -> str:
|
|
| 34 |
CHANNEL_SECRET = _require_env("CHANNEL_SECRET")
|
| 35 |
CHANNEL_ACCESS_TOKEN = _require_env("CHANNEL_ACCESS_TOKEN")
|
| 36 |
TAVILY_API_KEY = _require_env("TAVILY_API_KEY")
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
LLM_MODEL_CONFIG = {
|
| 44 |
-
"model": os.getenv("LLM_MODEL", "gemini-3-pro"),
|
| 45 |
"max_tokens": int(os.getenv("MAX_TOKENS", 4000)),
|
| 46 |
"temperature": float(os.getenv("TEMPERATURE", 0.3)),
|
| 47 |
"seed": int(os.getenv("LLM_SEED", 42)),
|
|
@@ -127,32 +138,61 @@ class ChatPipeline:
|
|
| 127 |
def __init__(self):
|
| 128 |
self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 129 |
self.llm_client = AsyncOpenAI(
|
| 130 |
-
api_key=
|
| 131 |
-
base_url=
|
| 132 |
default_headers={
|
| 133 |
"HTTP-Referer": os.getenv("SITE_URL", "https://your-line-bot.example.com"),
|
| 134 |
"X-Title": os.getenv("SITE_NAME", "My LINE Bot"),
|
| 135 |
}
|
| 136 |
)
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
async def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
|
| 155 |
-
"""輕量判斷是否需要網路搜尋"""
|
| 156 |
router_prompt = [
|
| 157 |
{"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
|
| 158 |
"如果是永恆知識(如聖經、數學原理、哲學、歷史經典等),回答 no。"
|
|
@@ -162,7 +202,9 @@ class ChatPipeline:
|
|
| 162 |
{"role": "user", "content": user_text}
|
| 163 |
]
|
| 164 |
try:
|
| 165 |
-
|
|
|
|
|
|
|
| 166 |
print(f"搜尋需求判斷:{decision}(問題:{user_text})")
|
| 167 |
return decision == "yes"
|
| 168 |
except Exception as e:
|
|
@@ -186,15 +228,13 @@ class ChatPipeline:
|
|
| 186 |
|
| 187 |
history = self.get_conversation_history(user_id)
|
| 188 |
|
| 189 |
-
# ---- 新增:判斷是否需要搜尋 ----
|
| 190 |
needs_search = await self._needs_search(user_text, history)
|
| 191 |
|
| 192 |
search_results = None
|
| 193 |
if needs_search:
|
| 194 |
-
# search is sync, but fast. Consider wrapping in to_thread if blocking is an issue.
|
| 195 |
search_results = await asyncio.to_thread(perform_web_search, user_text)
|
| 196 |
|
| 197 |
-
#
|
| 198 |
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 199 |
messages.extend(history)
|
| 200 |
messages.append({"role": "user", "content": user_text})
|
|
@@ -202,10 +242,10 @@ class ChatPipeline:
|
|
| 202 |
if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
|
| 203 |
messages.append({"role": "system", "content": f"網路搜尋結果(僅在高度相關時使用):{search_results}"})
|
| 204 |
|
| 205 |
-
response = await self.
|
| 206 |
response = response.replace('*', '')
|
| 207 |
|
| 208 |
-
#
|
| 209 |
history.append({"role": "user", "content": user_text})
|
| 210 |
history.append({"role": "assistant", "content": response})
|
| 211 |
self.update_conversation_history(user_id, history)
|
|
@@ -217,8 +257,12 @@ class ChatPipeline:
|
|
| 217 |
{"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要,保留關鍵事實和細節,長度控制在2000字元內。"},
|
| 218 |
{"role": "user", "content": response}
|
| 219 |
]
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
return response
|
| 224 |
|
|
@@ -259,7 +303,6 @@ async def line_webhook(request: Request):
|
|
| 259 |
continue
|
| 260 |
|
| 261 |
try:
|
| 262 |
-
# 處理「繼續」
|
| 263 |
if user_text.lower() == "繼續" and user_id in pending_chunks:
|
| 264 |
remaining = pending_chunks[user_id]
|
| 265 |
if not remaining:
|
|
@@ -278,7 +321,6 @@ async def line_webhook(request: Request):
|
|
| 278 |
await line_bot_api.reply_message(ReplyMessageRequest(reply_token=reply_token, messages=messages_to_send))
|
| 279 |
continue
|
| 280 |
|
| 281 |
-
# 正常回應
|
| 282 |
ai_response = await chat_pipeline.answer_question(user_id, user_text)
|
| 283 |
chunks = split_text_for_line(ai_response)
|
| 284 |
|
|
|
|
| 19 |
from linebot.v3.webhook import WebhookParser
|
| 20 |
from linebot.v3.exceptions import InvalidSignatureError
|
| 21 |
|
| 22 |
+
from openai import AsyncOpenAI, OpenAIError
|
| 23 |
from tavily import TavilyClient
|
| 24 |
from sentence_transformers import SentenceTransformer, util
|
| 25 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
|
|
| 34 |
CHANNEL_SECRET = _require_env("CHANNEL_SECRET")
|
| 35 |
CHANNEL_ACCESS_TOKEN = _require_env("CHANNEL_ACCESS_TOKEN")
|
| 36 |
TAVILY_API_KEY = _require_env("TAVILY_API_KEY")
|
| 37 |
+
OPENROUTER_API_KEY = _require_env("OPENROUTER_API_KEY")
|
| 38 |
+
|
| 39 |
+
# OpenRouter 官方 endpoint
|
| 40 |
+
LLM_BASE_URL = "https://openrouter.ai/api/v1"
|
| 41 |
+
|
| 42 |
+
# 模型 fallback 順序(免費模型優先)
|
| 43 |
+
FALLBACK_MODELS = [
|
| 44 |
+
"nvidia/nemotron-3-nano-30b-a3b:free",
|
| 45 |
+
"tngtech/tng-r1t-chimera:free",
|
| 46 |
+
"openai/gpt-oss-120b:free",
|
| 47 |
+
"meta-llama/llama-3.3-70b-instruct:free",
|
| 48 |
+
"tngtech/deepseek-r1t2-chimera:free",
|
| 49 |
+
"arcee-ai/trinity-large-preview:free",
|
| 50 |
+
"z-ai/glm-4.5-air:free",
|
| 51 |
+
"tngtech/deepseek-r1t-chimera:free",
|
| 52 |
+
"deepseek/deepseek-r1-0528:free",
|
| 53 |
+
]
|
| 54 |
|
| 55 |
LLM_MODEL_CONFIG = {
|
|
|
|
| 56 |
"max_tokens": int(os.getenv("MAX_TOKENS", 4000)),
|
| 57 |
"temperature": float(os.getenv("TEMPERATURE", 0.3)),
|
| 58 |
"seed": int(os.getenv("LLM_SEED", 42)),
|
|
|
|
| 138 |
def __init__(self):
|
| 139 |
self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
|
| 140 |
self.llm_client = AsyncOpenAI(
|
| 141 |
+
api_key=OPENROUTER_API_KEY,
|
| 142 |
+
base_url=LLM_BASE_URL,
|
| 143 |
default_headers={
|
| 144 |
"HTTP-Referer": os.getenv("SITE_URL", "https://your-line-bot.example.com"),
|
| 145 |
"X-Title": os.getenv("SITE_NAME", "My LINE Bot"),
|
| 146 |
}
|
| 147 |
)
|
| 148 |
|
| 149 |
+
async def _try_model(self, model: str, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
|
| 150 |
+
"""嘗試單一模型呼叫"""
|
| 151 |
+
try:
|
| 152 |
+
token_est = estimate_tokens(messages)
|
| 153 |
+
if token_est > 50000:
|
| 154 |
+
raise ValueError("輸入過長")
|
| 155 |
+
|
| 156 |
+
response = await self.llm_client.chat.completions.create(
|
| 157 |
+
model=model,
|
| 158 |
+
messages=messages,
|
| 159 |
+
max_tokens=max_tokens or LLM_MODEL_CONFIG["max_tokens"],
|
| 160 |
+
temperature=LLM_MODEL_CONFIG["temperature"],
|
| 161 |
+
seed=LLM_MODEL_CONFIG["seed"],
|
| 162 |
+
timeout=120.0,
|
| 163 |
+
)
|
| 164 |
+
content = response.choices[0].message.content or ""
|
| 165 |
+
print(f"成功使用模型: {model}")
|
| 166 |
+
return content
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"模型 {model} 失敗: {type(e).__name__} - {str(e)}")
|
| 169 |
+
raise # 讓外層 retry 或 fallback 處理
|
| 170 |
+
|
| 171 |
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15))
|
| 172 |
+
async def _llm_call_with_fallback(self, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
|
| 173 |
+
last_exception = None
|
| 174 |
|
| 175 |
+
for idx, model in enumerate(FALLBACK_MODELS, 1):
|
| 176 |
+
print(f"嘗試模型 {idx}/{len(FALLBACK_MODELS)}: {model}")
|
| 177 |
+
try:
|
| 178 |
+
return await self._try_model(model, messages, max_tokens)
|
| 179 |
+
except OpenAIError as e:
|
| 180 |
+
last_exception = e
|
| 181 |
+
if "rate limit" in str(e).lower() or "429" in str(e):
|
| 182 |
+
print("遇到 rate limit,等待後重試同一模型...")
|
| 183 |
+
continue # rate limit 時重試同一模型
|
| 184 |
+
# 其他錯誤(例如 400, 401, 403, 404 等)→ 換下一個模型
|
| 185 |
+
continue
|
| 186 |
+
except Exception as e:
|
| 187 |
+
last_exception = e
|
| 188 |
+
continue
|
| 189 |
+
|
| 190 |
+
# 全部模型都失敗
|
| 191 |
+
error_msg = f"所有模型皆失敗,最後錯誤:{type(last_exception).__name__} - {str(last_exception)}"
|
| 192 |
+
print(error_msg)
|
| 193 |
+
return f"抱歉,目前無法連接到 AI 模型,請稍後再試。\n(錯誤:{error_msg[:200]})"
|
| 194 |
|
| 195 |
async def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
|
|
|
|
| 196 |
router_prompt = [
|
| 197 |
{"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
|
| 198 |
"如果是永恆知識(如聖經、數學原理、哲學、歷史經典等),回答 no。"
|
|
|
|
| 202 |
{"role": "user", "content": user_text}
|
| 203 |
]
|
| 204 |
try:
|
| 205 |
+
# 這裡用最輕量的模型來判斷是否需要搜尋
|
| 206 |
+
decision = await self._try_model(FALLBACK_MODELS[0], router_prompt, max_tokens=10)
|
| 207 |
+
decision = decision.strip().lower()
|
| 208 |
print(f"搜尋需求判斷:{decision}(問題:{user_text})")
|
| 209 |
return decision == "yes"
|
| 210 |
except Exception as e:
|
|
|
|
| 228 |
|
| 229 |
history = self.get_conversation_history(user_id)
|
| 230 |
|
|
|
|
| 231 |
needs_search = await self._needs_search(user_text, history)
|
| 232 |
|
| 233 |
search_results = None
|
| 234 |
if needs_search:
|
|
|
|
| 235 |
search_results = await asyncio.to_thread(perform_web_search, user_text)
|
| 236 |
|
| 237 |
+
# 建構最終 prompt
|
| 238 |
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 239 |
messages.extend(history)
|
| 240 |
messages.append({"role": "user", "content": user_text})
|
|
|
|
| 242 |
if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
|
| 243 |
messages.append({"role": "system", "content": f"網路搜尋結果(僅在高度相關時使用):{search_results}"})
|
| 244 |
|
| 245 |
+
response = await self._llm_call_with_fallback(messages)
|
| 246 |
response = response.replace('*', '')
|
| 247 |
|
| 248 |
+
# 更新歷史
|
| 249 |
history.append({"role": "user", "content": user_text})
|
| 250 |
history.append({"role": "assistant", "content": response})
|
| 251 |
self.update_conversation_history(user_id, history)
|
|
|
|
| 257 |
{"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要,保留關鍵事實和細節,長度控制在2000字元內。"},
|
| 258 |
{"role": "user", "content": response}
|
| 259 |
]
|
| 260 |
+
try:
|
| 261 |
+
summary = await self._llm_call_with_fallback(summary_prompt)
|
| 262 |
+
summary = summary.replace('*', '')
|
| 263 |
+
return summary + "\n\n(完整回應過長,已提供摘要。如需細節,請分次詢問或回覆「繼續」)"
|
| 264 |
+
except:
|
| 265 |
+
return response # 摘要失敗就直接給完整內容
|
| 266 |
|
| 267 |
return response
|
| 268 |
|
|
|
|
| 303 |
continue
|
| 304 |
|
| 305 |
try:
|
|
|
|
| 306 |
if user_text.lower() == "繼續" and user_id in pending_chunks:
|
| 307 |
remaining = pending_chunks[user_id]
|
| 308 |
if not remaining:
|
|
|
|
| 321 |
await line_bot_api.reply_message(ReplyMessageRequest(reply_token=reply_token, messages=messages_to_send))
|
| 322 |
continue
|
| 323 |
|
|
|
|
| 324 |
ai_response = await chat_pipeline.answer_question(user_id, user_text)
|
| 325 |
chunks = split_text_for_line(ai_response)
|
| 326 |
|