s1144662 commited on
Commit
a7bf5a9
·
verified ·
1 Parent(s): 4ea8885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -35
app.py CHANGED
@@ -19,8 +19,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
20
 
21
  def perform_search(query: str) -> str:
22
- """搜尋工具:高精準度版"""
23
- # 邏輯題過濾:這類題目給模型自己想,不要搜尋
24
  skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
25
  if any(k in query.lower() for k in skip_keywords):
26
  print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
@@ -30,15 +30,17 @@ def perform_search(query: str) -> str:
30
 
31
  for attempt in range(3):
32
  try:
33
- time.sleep(random.uniform(3.0, 6.0))
34
  with DDGS() as ddgs:
35
- results = list(ddgs.text(query, max_results=5)) # 增加搜尋廣度
 
36
 
37
  if not results:
38
  return ""
39
-
 
40
  context = [f"- {r.get('body', '')}" for r in results]
41
- return "\n".join(context)[:1500] # 提供更多資訊給模型
42
 
43
  except Exception as e:
44
  print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
@@ -59,24 +61,15 @@ class GroqClient:
59
  "Content-Type": "application/json"
60
  }
61
 
62
- # ======================================================
63
- # 【核心升級】:Chain of Thought (CoT) Prompt
64
- # 要求模型先思考 (Reasoning),再把答案包在 <answer> 標籤裡
65
- # 這樣可以大幅提升複雜題目的準確度
66
- # ======================================================
67
  system_instruction = {
68
  "role": "system",
69
- "content": """You are an expert taking a high-stakes exam.
70
- 1. First, think step-by-step and verify facts.
71
- 2. Then, provide the FINAL exact answer inside <answer> tags.
72
- 3. The content inside <answer> tags must be SHORT (e.g., number, name, or short phrase). No punctuation.
73
 
74
- Example 1:
75
- Reasoning: The capital of France is Paris.
76
- Output: <answer>Paris</answer>
77
-
78
- Example 2:
79
- Reasoning: 5 + 5 is 10.
80
  Output: <answer>10</answer>
81
  """
82
  }
@@ -86,8 +79,8 @@ Output: <answer>10</answer>
86
  payload = {
87
  "model": model,
88
  "messages": final_messages,
89
- "temperature": 0.2, #稍微增加一點創造力讓它推理,但保持低溫
90
- "max_tokens": 1024 # 增加 token 數讓它有空間寫推理過程
91
  }
92
 
93
  for attempt in range(max_retries):
@@ -97,18 +90,17 @@ Output: <answer>10</answer>
97
  if response.status_code == 200:
98
  content = response.json()['choices'][0]['message']['content'].strip()
99
 
100
- # 【後處理】:使用正規表達式抓取 <answer> 裡面的內容
101
  match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
102
  if match:
103
  final_answer = match.group(1).strip()
104
  print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
105
  return final_answer
106
  else:
107
- # 如果模型忘記加標籤,就回傳原本的內容 (但通常有 Prompt 應該不會)
108
  return content
109
 
110
  if response.status_code == 429:
111
- wait_time = (attempt + 1) * 20
 
112
  print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
113
  time.sleep(wait_time)
114
  continue
@@ -132,7 +124,7 @@ def solve_question(question, client):
132
  {
133
  "role": "user",
134
  "content": [
135
- {"type": "text", "text": f"Analyze the image and answer the question: {question}. Think step by step. Put final answer in <answer> tags."},
136
  {"type": "image_url", "image_url": {"url": image_url}}
137
  ]
138
  }
@@ -144,9 +136,9 @@ def solve_question(question, client):
144
  context = perform_search(question)
145
 
146
  if context:
147
- user_msg = f"Context found on the web:\n{context}\n\nQuestion: {question}\n\nRemember: Think first, then output <answer>YOUR_ANSWER</answer>."
148
  else:
149
- user_msg = f"Question: {question}\n\nRemember: Think first, then output <answer>YOUR_ANSWER</answer>."
150
 
151
  messages = [{"role": "user", "content": user_msg}]
152
  return client.query(messages, model="llama-3.3-70b-versatile")
@@ -181,10 +173,14 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
181
  answers.append({"task_id": tid, "submitted_answer": ans})
182
  logs.append({"Task": tid, "Answer": str(ans)[:100]})
183
 
184
- # 保持 30~70 秒的安全延遲
185
- sleep_time = random.uniform(30, 70)
186
- print(f"💤 Sleeping {sleep_time:.2f}s (Slow & Smart mode)...")
 
 
 
187
  time.sleep(sleep_time)
 
188
 
189
  try:
190
  print("Submitting...")
@@ -203,9 +199,9 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
203
  except Exception as e:
204
  return f"Submit error: {str(e)}", pd.DataFrame(logs)
205
 
206
- with gr.Blocks(title="Final Agent (v5 Smart CoT)") as demo:
207
- gr.Markdown("# 🚀 Final Agent (v5 Smart CoT Mode)")
208
- gr.Markdown("此版本啟用「思維鏈 (Chain of Thought)」,AI 會先推理再回答準確度大幅提升。")
209
  with gr.Row():
210
  gr.LoginButton()
211
  btn = gr.Button("Run Evaluation", variant="primary")
 
19
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
20
 
21
  def perform_search(query: str) -> str:
22
+ """搜尋工具:節省 Token 版"""
23
+ # 邏輯題過濾
24
  skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
25
  if any(k in query.lower() for k in skip_keywords):
26
  print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
 
30
 
31
  for attempt in range(3):
32
  try:
33
+ time.sleep(random.uniform(3.0, 5.0))
34
  with DDGS() as ddgs:
35
+ # 【修改 1】減少搜尋數量至 3,避免 Token 爆量
36
+ results = list(ddgs.text(query, max_results=3))
37
 
38
  if not results:
39
  return ""
40
+
41
+ # 【修改 2】限制上下文長度在 800 字以內
42
  context = [f"- {r.get('body', '')}" for r in results]
43
+ return "\n".join(context)[:800]
44
 
45
  except Exception as e:
46
  print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
 
61
  "Content-Type": "application/json"
62
  }
63
 
 
 
 
 
 
64
  system_instruction = {
65
  "role": "system",
66
+ "content": """You are an expert.
67
+ 1. Think step-by-step briefly.
68
+ 2. Provide the FINAL exact answer inside <answer> tags.
69
+ 3. Content inside <answer> must be SHORT.
70
 
71
+ Example:
72
+ Reasoning: 5+5=10.
 
 
 
 
73
  Output: <answer>10</answer>
74
  """
75
  }
 
79
  payload = {
80
  "model": model,
81
  "messages": final_messages,
82
+ "temperature": 0.2,
83
+ "max_tokens": 512 # 【修改 3】限制輸出長度,避免 AI 廢話太多
84
  }
85
 
86
  for attempt in range(max_retries):
 
90
  if response.status_code == 200:
91
  content = response.json()['choices'][0]['message']['content'].strip()
92
 
 
93
  match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
94
  if match:
95
  final_answer = match.group(1).strip()
96
  print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
97
  return final_answer
98
  else:
 
99
  return content
100
 
101
  if response.status_code == 429:
102
+ # 【修改 4】遇到 429 休息時間加倍 (指數退避)
103
+ wait_time = (2 ** attempt) * 20 # 20, 40, 80, 160...
104
  print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
105
  time.sleep(wait_time)
106
  continue
 
124
  {
125
  "role": "user",
126
  "content": [
127
+ {"type": "text", "text": f"Identify the answer. Think step by step. Put final answer in <answer> tags. Question: {question}"},
128
  {"type": "image_url", "image_url": {"url": image_url}}
129
  ]
130
  }
 
136
  context = perform_search(question)
137
 
138
  if context:
139
+ user_msg = f"Context:\n{context}\n\nQuestion: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
140
  else:
141
+ user_msg = f"Question: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
142
 
143
  messages = [{"role": "user", "content": user_msg}]
144
  return client.query(messages, model="llama-3.3-70b-versatile")
 
173
  answers.append({"task_id": tid, "submitted_answer": ans})
174
  logs.append({"Task": tid, "Answer": str(ans)[:100]})
175
 
176
+ # ======================================================
177
+ # 【修改 5】為了適應 CoT 的高消耗,將休息時間大幅拉長
178
+ # 60 ~ 90 秒才能確保 TPM (Tokens Per Minute) 歸零
179
+ # ======================================================
180
+ sleep_time = random.uniform(60, 90)
181
+ print(f"💤 Sleeping {sleep_time:.2f}s (Recharging Tokens)...")
182
  time.sleep(sleep_time)
183
+ # ======================================================
184
 
185
  try:
186
  print("Submitting...")
 
199
  except Exception as e:
200
  return f"Submit error: {str(e)}", pd.DataFrame(logs)
201
 
202
+ with gr.Blocks(title="Final Agent (v6 Lite CoT)") as demo:
203
+ gr.Markdown("# 🚀 Final Agent (v6 Lite CoT)")
204
+ gr.Markdown("此版本保留了推理能力但減少了搜尋量與輸出長,並大幅拉長休息時間以適應 Groq 免費限制。")
205
  with gr.Row():
206
  gr.LoginButton()
207
  btn = gr.Button("Run Evaluation", variant="primary")