Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,8 +19,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 19 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 20 |
|
| 21 |
def perform_search(query: str) -> str:
|
| 22 |
-
"""搜尋工具:
|
| 23 |
-
# 邏輯題過濾
|
| 24 |
skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
|
| 25 |
if any(k in query.lower() for k in skip_keywords):
|
| 26 |
print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
|
|
@@ -30,15 +30,17 @@ def perform_search(query: str) -> str:
|
|
| 30 |
|
| 31 |
for attempt in range(3):
|
| 32 |
try:
|
| 33 |
-
time.sleep(random.uniform(3.0,
|
| 34 |
with DDGS() as ddgs:
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
if not results:
|
| 38 |
return ""
|
| 39 |
-
|
|
|
|
| 40 |
context = [f"- {r.get('body', '')}" for r in results]
|
| 41 |
-
return "\n".join(context)[:
|
| 42 |
|
| 43 |
except Exception as e:
|
| 44 |
print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
|
|
@@ -59,24 +61,15 @@ class GroqClient:
|
|
| 59 |
"Content-Type": "application/json"
|
| 60 |
}
|
| 61 |
|
| 62 |
-
# ======================================================
|
| 63 |
-
# 【核心升級】:Chain of Thought (CoT) Prompt
|
| 64 |
-
# 要求模型先思考 (Reasoning),再把答案包在 <answer> 標籤裡
|
| 65 |
-
# 這樣可以大幅提升複雜題目的準確度
|
| 66 |
-
# ======================================================
|
| 67 |
system_instruction = {
|
| 68 |
"role": "system",
|
| 69 |
-
"content": """You are an expert
|
| 70 |
-
1.
|
| 71 |
-
2.
|
| 72 |
-
3.
|
| 73 |
|
| 74 |
-
Example
|
| 75 |
-
Reasoning:
|
| 76 |
-
Output: <answer>Paris</answer>
|
| 77 |
-
|
| 78 |
-
Example 2:
|
| 79 |
-
Reasoning: 5 + 5 is 10.
|
| 80 |
Output: <answer>10</answer>
|
| 81 |
"""
|
| 82 |
}
|
|
@@ -86,8 +79,8 @@ Output: <answer>10</answer>
|
|
| 86 |
payload = {
|
| 87 |
"model": model,
|
| 88 |
"messages": final_messages,
|
| 89 |
-
"temperature": 0.2,
|
| 90 |
-
"max_tokens":
|
| 91 |
}
|
| 92 |
|
| 93 |
for attempt in range(max_retries):
|
|
@@ -97,18 +90,17 @@ Output: <answer>10</answer>
|
|
| 97 |
if response.status_code == 200:
|
| 98 |
content = response.json()['choices'][0]['message']['content'].strip()
|
| 99 |
|
| 100 |
-
# 【後處理】:使用正規表達式抓取 <answer> 裡面的內容
|
| 101 |
match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
|
| 102 |
if match:
|
| 103 |
final_answer = match.group(1).strip()
|
| 104 |
print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
|
| 105 |
return final_answer
|
| 106 |
else:
|
| 107 |
-
# 如果模型忘記加標籤,就回傳原本的內容 (但通常有 Prompt 應該不會)
|
| 108 |
return content
|
| 109 |
|
| 110 |
if response.status_code == 429:
|
| 111 |
-
|
|
|
|
| 112 |
print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
|
| 113 |
time.sleep(wait_time)
|
| 114 |
continue
|
|
@@ -132,7 +124,7 @@ def solve_question(question, client):
|
|
| 132 |
{
|
| 133 |
"role": "user",
|
| 134 |
"content": [
|
| 135 |
-
{"type": "text", "text": f"
|
| 136 |
{"type": "image_url", "image_url": {"url": image_url}}
|
| 137 |
]
|
| 138 |
}
|
|
@@ -144,9 +136,9 @@ def solve_question(question, client):
|
|
| 144 |
context = perform_search(question)
|
| 145 |
|
| 146 |
if context:
|
| 147 |
-
user_msg = f"Context
|
| 148 |
else:
|
| 149 |
-
user_msg = f"Question: {question}\n\
|
| 150 |
|
| 151 |
messages = [{"role": "user", "content": user_msg}]
|
| 152 |
return client.query(messages, model="llama-3.3-70b-versatile")
|
|
@@ -181,10 +173,14 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
|
|
| 181 |
answers.append({"task_id": tid, "submitted_answer": ans})
|
| 182 |
logs.append({"Task": tid, "Answer": str(ans)[:100]})
|
| 183 |
|
| 184 |
-
#
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
time.sleep(sleep_time)
|
|
|
|
| 188 |
|
| 189 |
try:
|
| 190 |
print("Submitting...")
|
|
@@ -203,9 +199,9 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
|
|
| 203 |
except Exception as e:
|
| 204 |
return f"Submit error: {str(e)}", pd.DataFrame(logs)
|
| 205 |
|
| 206 |
-
with gr.Blocks(title="Final Agent (
|
| 207 |
-
gr.Markdown("# 🚀 Final Agent (
|
| 208 |
-
gr.Markdown("此版本
|
| 209 |
with gr.Row():
|
| 210 |
gr.LoginButton()
|
| 211 |
btn = gr.Button("Run Evaluation", variant="primary")
|
|
|
|
| 19 |
GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 20 |
|
| 21 |
def perform_search(query: str) -> str:
|
| 22 |
+
"""搜尋工具:節省 Token 版"""
|
| 23 |
+
# 邏輯題過濾
|
| 24 |
skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
|
| 25 |
if any(k in query.lower() for k in skip_keywords):
|
| 26 |
print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
|
|
|
|
| 30 |
|
| 31 |
for attempt in range(3):
|
| 32 |
try:
|
| 33 |
+
time.sleep(random.uniform(3.0, 5.0))
|
| 34 |
with DDGS() as ddgs:
|
| 35 |
+
# 【修改 1】減少搜尋數量至 3,避免 Token 爆量
|
| 36 |
+
results = list(ddgs.text(query, max_results=3))
|
| 37 |
|
| 38 |
if not results:
|
| 39 |
return ""
|
| 40 |
+
|
| 41 |
+
# 【修改 2】限制上下文長度在 800 字以內
|
| 42 |
context = [f"- {r.get('body', '')}" for r in results]
|
| 43 |
+
return "\n".join(context)[:800]
|
| 44 |
|
| 45 |
except Exception as e:
|
| 46 |
print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
|
|
|
|
| 61 |
"Content-Type": "application/json"
|
| 62 |
}
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
system_instruction = {
|
| 65 |
"role": "system",
|
| 66 |
+
"content": """You are an expert.
|
| 67 |
+
1. Think step-by-step briefly.
|
| 68 |
+
2. Provide the FINAL exact answer inside <answer> tags.
|
| 69 |
+
3. Content inside <answer> must be SHORT.
|
| 70 |
|
| 71 |
+
Example:
|
| 72 |
+
Reasoning: 5+5=10.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
Output: <answer>10</answer>
|
| 74 |
"""
|
| 75 |
}
|
|
|
|
| 79 |
payload = {
|
| 80 |
"model": model,
|
| 81 |
"messages": final_messages,
|
| 82 |
+
"temperature": 0.2,
|
| 83 |
+
"max_tokens": 512 # 【修改 3】限制輸出長度,避免 AI 廢話太多
|
| 84 |
}
|
| 85 |
|
| 86 |
for attempt in range(max_retries):
|
|
|
|
| 90 |
if response.status_code == 200:
|
| 91 |
content = response.json()['choices'][0]['message']['content'].strip()
|
| 92 |
|
|
|
|
| 93 |
match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
|
| 94 |
if match:
|
| 95 |
final_answer = match.group(1).strip()
|
| 96 |
print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
|
| 97 |
return final_answer
|
| 98 |
else:
|
|
|
|
| 99 |
return content
|
| 100 |
|
| 101 |
if response.status_code == 429:
|
| 102 |
+
# 【修改 4】遇到 429 休息時間加倍 (指數退避)
|
| 103 |
+
wait_time = (2 ** attempt) * 20 # 20, 40, 80, 160...
|
| 104 |
print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
|
| 105 |
time.sleep(wait_time)
|
| 106 |
continue
|
|
|
|
| 124 |
{
|
| 125 |
"role": "user",
|
| 126 |
"content": [
|
| 127 |
+
{"type": "text", "text": f"Identify the answer. Think step by step. Put final answer in <answer> tags. Question: {question}"},
|
| 128 |
{"type": "image_url", "image_url": {"url": image_url}}
|
| 129 |
]
|
| 130 |
}
|
|
|
|
| 136 |
context = perform_search(question)
|
| 137 |
|
| 138 |
if context:
|
| 139 |
+
user_msg = f"Context:\n{context}\n\nQuestion: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
|
| 140 |
else:
|
| 141 |
+
user_msg = f"Question: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
|
| 142 |
|
| 143 |
messages = [{"role": "user", "content": user_msg}]
|
| 144 |
return client.query(messages, model="llama-3.3-70b-versatile")
|
|
|
|
| 173 |
answers.append({"task_id": tid, "submitted_answer": ans})
|
| 174 |
logs.append({"Task": tid, "Answer": str(ans)[:100]})
|
| 175 |
|
| 176 |
+
# ======================================================
|
| 177 |
+
# 【修改 5】為了適應 CoT 的高消耗,將休息時間大幅拉長
|
| 178 |
+
# 60 ~ 90 秒才能確保 TPM (Tokens Per Minute) 歸零
|
| 179 |
+
# ======================================================
|
| 180 |
+
sleep_time = random.uniform(60, 90)
|
| 181 |
+
print(f"💤 Sleeping {sleep_time:.2f}s (Recharging Tokens)...")
|
| 182 |
time.sleep(sleep_time)
|
| 183 |
+
# ======================================================
|
| 184 |
|
| 185 |
try:
|
| 186 |
print("Submitting...")
|
|
|
|
| 199 |
except Exception as e:
|
| 200 |
return f"Submit error: {str(e)}", pd.DataFrame(logs)
|
| 201 |
|
| 202 |
+
with gr.Blocks(title="Final Agent (v6 Lite CoT)") as demo:
|
| 203 |
+
gr.Markdown("# 🚀 Final Agent (v6 Lite CoT)")
|
| 204 |
+
gr.Markdown("此版本保留了推理能力,但減少了搜尋量與輸出長度,並大幅拉長休息時間以適應 Groq 免費限制。")
|
| 205 |
with gr.Row():
|
| 206 |
gr.LoginButton()
|
| 207 |
btn = gr.Button("Run Evaluation", variant="primary")
|