Final_Assignment_Template

Sleeping

App Files Files Community

johnnychiang commited on Jan 9

Commit

4491259

verified ·

1 Parent(s): bfea800

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -145

app.py CHANGED Viewed

@@ -1,233 +1,190 @@
 import os
-import re
 import gradio as gr
 import requests
 import pandas as pd
 from huggingface_hub import InferenceClient
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
 class BasicAgent:
     """
-    Minimal LLM-based agent for GAIA level-1 style questions.
-    Target: >=30% (>=6/20 exact match) by keeping output clean.
     """
     def __init__(self):
-        print("BasicAgent initialized (LLM mode).")
-        # ✅ 必須在 Space 設定 Secret：HF_TOKEN
         self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not self.hf_token:
-            raise RuntimeError("Missing HF_TOKEN. Set it in Space Settings → Variables and secrets → New secret.")
-        # ✅ 先用 7B 穩定跑完；不夠分再用 Settings 加 MODEL_ID 升到 14B/32B
         self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
-        # ✅ 不要傳 base_url（會跟 model 參數衝突，造成你剛剛那個錯誤）
-        # 新版 huggingface_hub 會走新的推理路由；只要給 model + token 就行
         self.client = InferenceClient(
             model=self.model_id,
             token=self.hf_token,
             timeout=120,
         )
-        # 強制乾淨輸出（exact match）
         self.system = (
-            "You are a precise question-answering assistant.\n"
-            "Return ONLY the final answer, nothing else.\n"
-            "No explanation. No extra words.\n"
-            "No surrounding quotes unless the answer itself includes them.\n"
         )
     def _sanitize(self, text: str) -> str:
         if not text:
             return ""
         t = str(text).strip()
-        # 移除 FINAL ANSWER / Answer: 之類常見包裝
-        t = re.sub(r"(?i)\bFINAL ANSWER\b\s*[:\-]*\s*", "", t).strip()
-        t = re.sub(r"(?i)^\s*answer\s*[:\-]\s*", "", t).strip()
-        # 取最後一個非空行（模型如果亂輸出多行，答案常在最後）
         lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
         if lines:
             t = lines[-1]
-        # 去掉外層引號
-        t = t.strip().strip('"').strip("'").strip()
         return t
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 80 chars): {question[:80]}...")
         prompt = f"{self.system}\nQuestion: {question}\nAnswer:"
-        # 優先用 text_generation（快），失敗再用 chat_completion
         try:
             out = self.client.text_generation(
                 prompt,
-                max_new_tokens=128,
                 temperature=0.0,
                 do_sample=False,
                 return_full_text=False,
             )
-        except Exception as e:
-            print("text_generation failed, fallback to chat_completion:", e)
             out = self.client.chat_completion(
                 messages=[
                     {"role": "system", "content": self.system},
                     {"role": "user", "content": question},
                 ],
-                max_tokens=128,
                 temperature=0.0,
             ).choices[0].message.content
         ans = self._sanitize(out)
-        print(f"Agent answer: {ans}")
         return ans
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the agent on them, submits all answers,
-    and displays the results.
-    """
-    space_id = os.getenv("SPACE_ID")  # used for code link
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1) Instantiate agent
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # code link for verification
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
-    print("agent_code:", agent_code)
-    # 2) Fetch questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=30)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-            return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    # 3) Run agent
-    results_log = []
-    answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            continue
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append(
-                {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
-            )
         except Exception as e:
-            print(f"Error running agent on task {task_id}: {e}")
-            results_log.append(
-                {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
-            )
-    if not answers_payload:
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4) Prepare submission
-    submission_data = {
-        "username": username.strip(),
         "agent_code": agent_code,
-        "answers": answers_payload,
     }
-    # 5) Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=120)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        detail = f"Server responded with status {e.response.status_code}."
-        try:
-            detail_json = e.response.json()
-            detail += f" Detail: {detail_json.get('detail', e.response.text)}"
-        except Exception:
-            detail += f" Response: {e.response.text[:500]}"
-        results_df = pd.DataFrame(results_log)
-        return f"Submission Failed: {detail}", results_df
-    except requests.exceptions.Timeout:
-        results_df = pd.DataFrame(results_log)
-        return "Submission Failed: The request timed out.", results_df
-    except Exception as e:
-        results_df = pd.DataFrame(results_log)
-        return f"An unexpected error occurred during submission: {e}", results_df
-# --- Gradio UI ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-**Instructions:**
-1. Log in to your Hugging Face account using the button below.
-2. Click **Run Evaluation & Submit All Answers**.
-**Notes:**
-- This will run 20 questions and submit answers for scoring.
-- Exact match: the agent must output ONLY the answer (no extra words).
-"""
-    )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table],
-    )
-""
 if __name__ == "__main__":
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
 import pandas as pd
+import re
 from huggingface_hub import InferenceClient
+# ===============================
+# Constants (不要改)
+# ===============================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ===============================
+# Basic Agent (PASS VERSION)
+# ===============================
 class BasicAgent:
     """
+    Minimal GAIA Level-1 agent.
+    Goal: >=30% (>=6/20 exact match)
     """
     def __init__(self):
+        print("BasicAgent initialized (PASS MODE).")
+        # 必須在 Space → Settings → Secrets 設定 HF_TOKEN
         self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
         if not self.hf_token:
+            raise RuntimeError("HF_TOKEN missing. Set it in Space Settings → Secrets.")
+        # 可在 Space Variables 改模型，不用動 code
         self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        # ⚠️ 一定要用 router（避免 410）
         self.client = InferenceClient(
             model=self.model_id,
             token=self.hf_token,
+            base_url="https://router.huggingface.co",
             timeout=120,
         )
+        # 超嚴格 system prompt（EXACT MATCH 核心）
         self.system = (
+            "You answer questions with EXACT MATCH.\n"
+            "Return ONLY the final answer.\n"
+            "No explanation.\n"
+            "No extra words.\n"
+            "No punctuation unless required.\n"
+            "No quotes.\n"
+            "If the answer is a name, output the name only.\n"
+            "If the answer is a number or date, output it exactly.\n"
         )
     def _sanitize(self, text: str) -> str:
         if not text:
             return ""
         t = str(text).strip()
+        # 移除常見前綴
+        t = re.sub(r"(?i)final answer\s*[:\-]*", "", t)
+        t = re.sub(r"(?i)answer\s*[:\-]*", "", t)
+        # 只留最後一行
         lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
         if lines:
             t = lines[-1]
+        # 去掉引號
+        t = t.strip().strip('"').strip("'")
+        # 🔥 關鍵：移除句尾標點（GAIA 最常死在這）
+        t = re.sub(r"[.,;:!?]$", "", t)
         return t
     def __call__(self, question: str) -> str:
+        print(f"Q: {question[:60]}")
         prompt = f"{self.system}\nQuestion: {question}\nAnswer:"
         try:
             out = self.client.text_generation(
                 prompt,
+                max_new_tokens=64,
                 temperature=0.0,
                 do_sample=False,
                 return_full_text=False,
             )
+        except Exception:
+            # fallback（保險）
             out = self.client.chat_completion(
                 messages=[
                     {"role": "system", "content": self.system},
                     {"role": "user", "content": question},
                 ],
+                max_tokens=64,
                 temperature=0.0,
             ).choices[0].message.content
         ans = self._sanitize(out)
+        print(f"A: {ans}")
         return ans
+# ===============================
+# Run & Submit
+# ===============================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    space_id = os.getenv("SPACE_ID")
+    if not profile:
+        return "Please login with Hugging Face.", None
+    username = profile.username
+    print(f"User: {username}")
+    questions_url = f"{DEFAULT_API_URL}/questions"
+    submit_url = f"{DEFAULT_API_URL}/submit"
     try:
+        agent = BasicAgent()
     except Exception as e:
+        return f"Agent init error: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    # Fetch questions
+    resp = requests.get(questions_url, timeout=20)
+    resp.raise_for_status()
+    questions = resp.json()
+    answers_payload = []
+    log_rows = []
+    for q in questions:
+        task_id = q["task_id"]
+        question = q["question"]
         try:
+            ans = agent(question)
         except Exception as e:
+            ans = ""
+            print("Agent error:", e)
+        answers_payload.append({
+            "task_id": task_id,
+            "submitted_answer": ans
+        })
+        log_rows.append({
+            "Task ID": task_id,
+            "Question": question,
+            "Submitted Answer": ans
+        })
+    submission = {
+        "username": username,
         "agent_code": agent_code,
+        "answers": answers_payload
     }
+    resp = requests.post(submit_url, json=submission, timeout=60)
+    resp.raise_for_status()
+    result = resp.json()
+    status = (
+        f"Submission Successful!\n"
+        f"User: {result.get('username')}\n"
+        f"Score: {result.get('score')}% "
+        f"({result.get('correct_count')}/{result.get('total_attempted')})\n"
+        f"{result.get('message')}"
+    )
+    return status, pd.DataFrame(log_rows)
+# ===============================
+# Gradio UI
+# ===============================
 with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner (PASS MODE)")
     gr.LoginButton()
+    run_btn = gr.Button("Run Evaluation & Submit All Answers")
+    status = gr.Textbox(label="Result", lines=6)
+    table = gr.DataFrame(label="Answers", wrap=True)
+    run_btn.click(fn=run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
+    demo.launch()