Final_Assignment_Template

Sleeping

App Files Files Community

s1144662 commited on Jan 1

Commit

a7bf5a9

verified ·

1 Parent(s): 4ea8885

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -35

app.py CHANGED Viewed

@@ -19,8 +19,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
 def perform_search(query: str) -> str:
-    """搜尋工具：高精準度版"""
-    # 邏輯題過濾：這類題目給模型自己想，不要搜尋
     skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
     if any(k in query.lower() for k in skip_keywords):
         print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
@@ -30,15 +30,17 @@ def perform_search(query: str) -> str:
     for attempt in range(3):
         try:
-            time.sleep(random.uniform(3.0, 6.0))
             with DDGS() as ddgs:
-                results = list(ddgs.text(query, max_results=5)) # 增加搜尋廣度
             if not results:
                 return ""
             context = [f"- {r.get('body', '')}" for r in results]
-            return "\n".join(context)[:1500] # 提供更多資訊給模型
         except Exception as e:
             print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
@@ -59,24 +61,15 @@ class GroqClient:
             "Content-Type": "application/json"
         }
-        # ======================================================
-        # 【核心升級】：Chain of Thought (CoT) Prompt
-        # 要求模型先思考 (Reasoning)，再把答案包在 <answer> 標籤裡
-        # 這樣可以大幅提升複雜題目的準確度
-        # ======================================================
         system_instruction = {
             "role": "system",
-            "content": """You are an expert taking a high-stakes exam.
-1. First, think step-by-step and verify facts.
-2. Then, provide the FINAL exact answer inside <answer> tags.
-3. The content inside <answer> tags must be SHORT (e.g., number, name, or short phrase). No punctuation.
-Example 1:
-Reasoning: The capital of France is Paris.
-Output: <answer>Paris</answer>
-Example 2:
-Reasoning: 5 + 5 is 10.
 Output: <answer>10</answer>
 """
         }
@@ -86,8 +79,8 @@ Output: <answer>10</answer>
         payload = {
             "model": model,
             "messages": final_messages,
-            "temperature": 0.2, #稍微增加一點創造力讓它推理，但保持低溫
-            "max_tokens": 1024  # 增加 token 數讓它有空間寫推理過程
         }
         for attempt in range(max_retries):
@@ -97,18 +90,17 @@ Output: <answer>10</answer>
                 if response.status_code == 200:
                     content = response.json()['choices'][0]['message']['content'].strip()
-                    # 【後處理】：使用正規表達式抓取 <answer> 裡面的內容
                     match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
                     if match:
                         final_answer = match.group(1).strip()
                         print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
                         return final_answer
                     else:
-                        # 如果模型忘記加標籤，就回傳原本的內容 (但通常有 Prompt 應該不會)
                         return content
                 if response.status_code == 429:
-                    wait_time = (attempt + 1) * 20
                     print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
                     time.sleep(wait_time)
                     continue
@@ -132,7 +124,7 @@ def solve_question(question, client):
             {
                 "role": "user",
                 "content": [
-                    {"type": "text", "text": f"Analyze the image and answer the question: {question}. Think step by step. Put final answer in <answer> tags."},
                     {"type": "image_url", "image_url": {"url": image_url}}
                 ]
             }
@@ -144,9 +136,9 @@ def solve_question(question, client):
         context = perform_search(question)
         if context:
-            user_msg = f"Context found on the web:\n{context}\n\nQuestion: {question}\n\nRemember: Think first, then output <answer>YOUR_ANSWER</answer>."
         else:
-            user_msg = f"Question: {question}\n\nRemember: Think first, then output <answer>YOUR_ANSWER</answer>."
         messages = [{"role": "user", "content": user_msg}]
         return client.query(messages, model="llama-3.3-70b-versatile")
@@ -181,10 +173,14 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
         answers.append({"task_id": tid, "submitted_answer": ans})
         logs.append({"Task": tid, "Answer": str(ans)[:100]})
-        # 保持 30~70 秒的安全延遲
-        sleep_time = random.uniform(30, 70)
-        print(f"💤 Sleeping {sleep_time:.2f}s (Slow & Smart mode)...")
         time.sleep(sleep_time)
     try:
         print("Submitting...")
@@ -203,9 +199,9 @@ def run_and_submit_all(profile: Optional[gr.OAuthProfile] = None):
     except Exception as e:
         return f"Submit error: {str(e)}", pd.DataFrame(logs)
-with gr.Blocks(title="Final Agent (v5 Smart CoT)") as demo:
-    gr.Markdown("# 🚀 Final Agent (v5 Smart CoT Mode)")
-    gr.Markdown("此版本啟用了「思維鏈 (Chain of Thought)」，AI 會先推理再回答，準確度大幅提升。")
     with gr.Row():
         gr.LoginButton()
         btn = gr.Button("Run Evaluation", variant="primary")

 GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
 def perform_search(query: str) -> str:
+    """搜尋工具：節省 Token 版"""
+    # 邏輯題過濾
     skip_keywords = ["reverse", "tfel", "python", "backwards", "spells", "spell", "letter"]
     if any(k in query.lower() for k in skip_keywords):
         print(f"🧠 Logic task detected, skipping search: {query[:30]}...")
     for attempt in range(3):
         try:
+            time.sleep(random.uniform(3.0, 5.0))
             with DDGS() as ddgs:
+                # 【修改 1】減少搜尋數量至 3，避免 Token 爆量
+                results = list(ddgs.text(query, max_results=3))
             if not results:
                 return ""
+            # 【修改 2】限制上下文長度在 800 字以內
             context = [f"- {r.get('body', '')}" for r in results]
+            return "\n".join(context)[:800]
         except Exception as e:
             print(f"⚠️ Search error (Attempt {attempt+1}): {e}")
             "Content-Type": "application/json"
         }
         system_instruction = {
             "role": "system",
+            "content": """You are an expert.
+1. Think step-by-step briefly.
+2. Provide the FINAL exact answer inside <answer> tags.
+3. Content inside <answer> must be SHORT.
+Example:
+Reasoning: 5+5=10.
 Output: <answer>10</answer>
 """
         }
         payload = {
             "model": model,
             "messages": final_messages,
+            "temperature": 0.2,
+            "max_tokens": 512 # 【修改 3】限制輸出長度，避免 AI 廢話太多
         }
         for attempt in range(max_retries):
                 if response.status_code == 200:
                     content = response.json()['choices'][0]['message']['content'].strip()
                     match = re.search(r"<answer>(.*?)</answer>", content, re.DOTALL)
                     if match:
                         final_answer = match.group(1).strip()
                         print(f"👻 (Reasoning Hidden) -> Final: {final_answer}")
                         return final_answer
                     else:
                         return content
                 if response.status_code == 429:
+                    # 【修改 4】遇到 429 休息時間加倍 (指數退避)
+                    wait_time = (2 ** attempt) * 20 # 20, 40, 80, 160...
                     print(f"⚠️ Groq Rate limit (429). Waiting {wait_time}s...")
                     time.sleep(wait_time)
                     continue
             {
                 "role": "user",
                 "content": [
+                    {"type": "text", "text": f"Identify the answer. Think step by step. Put final answer in <answer> tags. Question: {question}"},
                     {"type": "image_url", "image_url": {"url": image_url}}
                 ]
             }
         context = perform_search(question)
         if context:
+            user_msg = f"Context:\n{context}\n\nQuestion: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
         else:
+            user_msg = f"Question: {question}\n\nReflect then output <answer>YOUR_ANSWER</answer>."
         messages = [{"role": "user", "content": user_msg}]
         return client.query(messages, model="llama-3.3-70b-versatile")
         answers.append({"task_id": tid, "submitted_answer": ans})
         logs.append({"Task": tid, "Answer": str(ans)[:100]})
+        # ======================================================
+        # 【修改 5】為了適應 CoT 的高消耗，將休息時間大幅拉長
+        #  60 ~ 90 秒才能確保 TPM (Tokens Per Minute) 歸零
+        # ======================================================
+        sleep_time = random.uniform(60, 90)
+        print(f"💤 Sleeping {sleep_time:.2f}s (Recharging Tokens)...")
         time.sleep(sleep_time)
+        # ======================================================
     try:
         print("Submitting...")
     except Exception as e:
         return f"Submit error: {str(e)}", pd.DataFrame(logs)
+with gr.Blocks(title="Final Agent (v6 Lite CoT)") as demo:
+    gr.Markdown("# 🚀 Final Agent (v6 Lite CoT)")
+    gr.Markdown("此版本保留了推理能力，但減少了搜尋量與輸出長度，並大幅拉長休息時間以適應 Groq 免費限制。")
     with gr.Row():
         gr.LoginButton()
         btn = gr.Button("Run Evaluation", variant="primary")