Song commited on
Commit
fde8226
·
1 Parent(s): f2f2687

openrouter

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. app.py +76 -34
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -19,7 +19,7 @@ from linebot.v3.messaging import (
19
  from linebot.v3.webhook import WebhookParser
20
  from linebot.v3.exceptions import InvalidSignatureError
21
 
22
- from openai import AsyncOpenAI
23
  from tavily import TavilyClient
24
  from sentence_transformers import SentenceTransformer, util
25
  from tenacity import retry, stop_after_attempt, wait_exponential
@@ -34,14 +34,25 @@ def _require_env(var: str) -> str:
34
  CHANNEL_SECRET = _require_env("CHANNEL_SECRET")
35
  CHANNEL_ACCESS_TOKEN = _require_env("CHANNEL_ACCESS_TOKEN")
36
  TAVILY_API_KEY = _require_env("TAVILY_API_KEY")
37
-
38
- LLM_API_CONFIG = {
39
- "base_url": os.getenv("LLM_BASE_URL", "https://litellm-ekkks8gsocw.dgx-coolify.apmic.ai/"),
40
- "api_key": _require_env("OPENROUTER_API_KEY"),
41
- }
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  LLM_MODEL_CONFIG = {
44
- "model": os.getenv("LLM_MODEL", "gemini-3-pro"),
45
  "max_tokens": int(os.getenv("MAX_TOKENS", 4000)),
46
  "temperature": float(os.getenv("TEMPERATURE", 0.3)),
47
  "seed": int(os.getenv("LLM_SEED", 42)),
@@ -127,32 +138,61 @@ class ChatPipeline:
127
  def __init__(self):
128
  self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
129
  self.llm_client = AsyncOpenAI(
130
- api_key=LLM_API_CONFIG["api_key"],
131
- base_url=LLM_API_CONFIG["base_url"],
132
  default_headers={
133
  "HTTP-Referer": os.getenv("SITE_URL", "https://your-line-bot.example.com"),
134
  "X-Title": os.getenv("SITE_NAME", "My LINE Bot"),
135
  }
136
  )
137
 
138
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
139
- async def _llm_call(self, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
140
- token_est = estimate_tokens(messages)
141
- if token_est > 50000:
142
- raise ValueError("輸入過長")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
- response = await self.llm_client.chat.completions.create(
145
- model=LLM_MODEL_CONFIG["model"],
146
- messages=messages,
147
- max_tokens=max_tokens or LLM_MODEL_CONFIG["max_tokens"],
148
- temperature=LLM_MODEL_CONFIG["temperature"],
149
- seed=LLM_MODEL_CONFIG["seed"],
150
- timeout=120.0,
151
- )
152
- return response.choices[0].message.content or ""
 
 
 
 
 
 
 
 
 
 
153
 
154
  async def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
155
- """輕量判斷是否需要網路搜尋"""
156
  router_prompt = [
157
  {"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
158
  "如果是永恆知識(如聖經、數學原理、哲學、歷史經典等),回答 no。"
@@ -162,7 +202,9 @@ class ChatPipeline:
162
  {"role": "user", "content": user_text}
163
  ]
164
  try:
165
- decision = (await self._llm_call(router_prompt, max_tokens=10)).strip().lower()
 
 
166
  print(f"搜尋需求判斷:{decision}(問題:{user_text})")
167
  return decision == "yes"
168
  except Exception as e:
@@ -186,15 +228,13 @@ class ChatPipeline:
186
 
187
  history = self.get_conversation_history(user_id)
188
 
189
- # ---- 新增:判斷是否需要搜尋 ----
190
  needs_search = await self._needs_search(user_text, history)
191
 
192
  search_results = None
193
  if needs_search:
194
- # search is sync, but fast. Consider wrapping in to_thread if blocking is an issue.
195
  search_results = await asyncio.to_thread(perform_web_search, user_text)
196
 
197
- # ---- 建構最終 prompt ----
198
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
199
  messages.extend(history)
200
  messages.append({"role": "user", "content": user_text})
@@ -202,10 +242,10 @@ class ChatPipeline:
202
  if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
203
  messages.append({"role": "system", "content": f"網路搜尋結果(僅在高度相關時使用):{search_results}"})
204
 
205
- response = await self._llm_call(messages)
206
  response = response.replace('*', '')
207
 
208
- # 更新歷史(包含最終回應)
209
  history.append({"role": "user", "content": user_text})
210
  history.append({"role": "assistant", "content": response})
211
  self.update_conversation_history(user_id, history)
@@ -217,8 +257,12 @@ class ChatPipeline:
217
  {"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要,保留關鍵事實和細節,長度控制在2000字元內。"},
218
  {"role": "user", "content": response}
219
  ]
220
- summary = (await self._llm_call(summary_prompt)).replace('*', '')
221
- return summary + "\n\n(完整回應過長,已提供摘要。如需細節,請分次詢問或回覆「繼續」)"
 
 
 
 
222
 
223
  return response
224
 
@@ -259,7 +303,6 @@ async def line_webhook(request: Request):
259
  continue
260
 
261
  try:
262
- # 處理「繼續」
263
  if user_text.lower() == "繼續" and user_id in pending_chunks:
264
  remaining = pending_chunks[user_id]
265
  if not remaining:
@@ -278,7 +321,6 @@ async def line_webhook(request: Request):
278
  await line_bot_api.reply_message(ReplyMessageRequest(reply_token=reply_token, messages=messages_to_send))
279
  continue
280
 
281
- # 正常回應
282
  ai_response = await chat_pipeline.answer_question(user_id, user_text)
283
  chunks = split_text_for_line(ai_response)
284
 
 
19
  from linebot.v3.webhook import WebhookParser
20
  from linebot.v3.exceptions import InvalidSignatureError
21
 
22
+ from openai import AsyncOpenAI, OpenAIError
23
  from tavily import TavilyClient
24
  from sentence_transformers import SentenceTransformer, util
25
  from tenacity import retry, stop_after_attempt, wait_exponential
 
34
  CHANNEL_SECRET = _require_env("CHANNEL_SECRET")
35
  CHANNEL_ACCESS_TOKEN = _require_env("CHANNEL_ACCESS_TOKEN")
36
  TAVILY_API_KEY = _require_env("TAVILY_API_KEY")
37
+ OPENROUTER_API_KEY = _require_env("OPENROUTER_API_KEY")
38
+
39
+ # OpenRouter 官方 endpoint
40
+ LLM_BASE_URL = "https://openrouter.ai/api/v1"
41
+
42
+ # 模型 fallback 順序(免費模型優先)
43
+ FALLBACK_MODELS = [
44
+ "nvidia/nemotron-3-nano-30b-a3b:free",
45
+ "tngtech/tng-r1t-chimera:free",
46
+ "openai/gpt-oss-120b:free",
47
+ "meta-llama/llama-3.3-70b-instruct:free",
48
+ "tngtech/deepseek-r1t2-chimera:free",
49
+ "arcee-ai/trinity-large-preview:free",
50
+ "z-ai/glm-4.5-air:free",
51
+ "tngtech/deepseek-r1t-chimera:free",
52
+ "deepseek/deepseek-r1-0528:free",
53
+ ]
54
 
55
  LLM_MODEL_CONFIG = {
 
56
  "max_tokens": int(os.getenv("MAX_TOKENS", 4000)),
57
  "temperature": float(os.getenv("TEMPERATURE", 0.3)),
58
  "seed": int(os.getenv("LLM_SEED", 42)),
 
138
  def __init__(self):
139
  self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
140
  self.llm_client = AsyncOpenAI(
141
+ api_key=OPENROUTER_API_KEY,
142
+ base_url=LLM_BASE_URL,
143
  default_headers={
144
  "HTTP-Referer": os.getenv("SITE_URL", "https://your-line-bot.example.com"),
145
  "X-Title": os.getenv("SITE_NAME", "My LINE Bot"),
146
  }
147
  )
148
 
149
+ async def _try_model(self, model: str, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
150
+ """嘗試單一模型呼叫"""
151
+ try:
152
+ token_est = estimate_tokens(messages)
153
+ if token_est > 50000:
154
+ raise ValueError("輸入過長")
155
+
156
+ response = await self.llm_client.chat.completions.create(
157
+ model=model,
158
+ messages=messages,
159
+ max_tokens=max_tokens or LLM_MODEL_CONFIG["max_tokens"],
160
+ temperature=LLM_MODEL_CONFIG["temperature"],
161
+ seed=LLM_MODEL_CONFIG["seed"],
162
+ timeout=120.0,
163
+ )
164
+ content = response.choices[0].message.content or ""
165
+ print(f"成功使用模型: {model}")
166
+ return content
167
+ except Exception as e:
168
+ print(f"模型 {model} 失敗: {type(e).__name__} - {str(e)}")
169
+ raise # 讓外層 retry 或 fallback 處理
170
+
171
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15))
172
+ async def _llm_call_with_fallback(self, messages: List[Dict[str, str]], max_tokens: int = None) -> str:
173
+ last_exception = None
174
 
175
+ for idx, model in enumerate(FALLBACK_MODELS, 1):
176
+ print(f"嘗試模型 {idx}/{len(FALLBACK_MODELS)}: {model}")
177
+ try:
178
+ return await self._try_model(model, messages, max_tokens)
179
+ except OpenAIError as e:
180
+ last_exception = e
181
+ if "rate limit" in str(e).lower() or "429" in str(e):
182
+ print("遇到 rate limit,等待後重試同一模型...")
183
+ continue # rate limit 時重試同一模型
184
+ # 其他錯誤(例如 400, 401, 403, 404 等)→ 換下一個模型
185
+ continue
186
+ except Exception as e:
187
+ last_exception = e
188
+ continue
189
+
190
+ # 全部模型都失敗
191
+ error_msg = f"所有模型皆失敗,最後錯誤:{type(last_exception).__name__} - {str(last_exception)}"
192
+ print(error_msg)
193
+ return f"抱歉,目前無法連接到 AI 模型,請稍後再試。\n(錯誤:{error_msg[:200]})"
194
 
195
  async def _needs_search(self, user_text: str, history: List[Dict[str, str]]) -> bool:
 
196
  router_prompt = [
197
  {"role": "system", "content": "你只需要判斷用戶問題是否需要最新的網路資訊來回答。"
198
  "如果是永恆知識(如聖經、數學原理、哲學、歷史經典等),回答 no。"
 
202
  {"role": "user", "content": user_text}
203
  ]
204
  try:
205
+ # 這裡用最輕量的模型來判斷是否需要搜尋
206
+ decision = await self._try_model(FALLBACK_MODELS[0], router_prompt, max_tokens=10)
207
+ decision = decision.strip().lower()
208
  print(f"搜尋需求判斷:{decision}(問題:{user_text})")
209
  return decision == "yes"
210
  except Exception as e:
 
228
 
229
  history = self.get_conversation_history(user_id)
230
 
 
231
  needs_search = await self._needs_search(user_text, history)
232
 
233
  search_results = None
234
  if needs_search:
 
235
  search_results = await asyncio.to_thread(perform_web_search, user_text)
236
 
237
+ # 建構最終 prompt
238
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
239
  messages.extend(history)
240
  messages.append({"role": "user", "content": user_text})
 
242
  if search_results and "沒有找到" not in search_results and "錯誤" not in search_results:
243
  messages.append({"role": "system", "content": f"網路搜尋結果(僅在高度相關時使用):{search_results}"})
244
 
245
+ response = await self._llm_call_with_fallback(messages)
246
  response = response.replace('*', '')
247
 
248
+ # 更新歷史
249
  history.append({"role": "user", "content": user_text})
250
  history.append({"role": "assistant", "content": response})
251
  self.update_conversation_history(user_id, history)
 
257
  {"role": "system", "content": "請將以下內容生成一個簡潔但完整的中文摘要,保留關鍵事實和細節,長度控制在2000字元內。"},
258
  {"role": "user", "content": response}
259
  ]
260
+ try:
261
+ summary = await self._llm_call_with_fallback(summary_prompt)
262
+ summary = summary.replace('*', '')
263
+ return summary + "\n\n(完整回應過長,已提供摘要。如需細節,請分次詢問或回覆「繼續」)"
264
+ except:
265
+ return response # 摘要失敗就直接給完整內容
266
 
267
  return response
268
 
 
303
  continue
304
 
305
  try:
 
306
  if user_text.lower() == "繼續" and user_id in pending_chunks:
307
  remaining = pending_chunks[user_id]
308
  if not remaining:
 
321
  await line_bot_api.reply_message(ReplyMessageRequest(reply_token=reply_token, messages=messages_to_send))
322
  continue
323
 
 
324
  ai_response = await chat_pipeline.answer_question(user_id, user_text)
325
  chunks = split_text_for_line(ai_response)
326