Final_Assignment_Template

Sleeping

App Files Files Community

johnnychiang commited on Jan 9

Commit

11b435a

verified ·

1 Parent(s): 6051f37

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -468

app.py CHANGED Viewed

@@ -1,492 +1,100 @@
 import os
-import re
-import json
-import math
 import requests
 import pandas as pd
-import gradio as gr
-from bs4 import BeautifulSoup
-from sympy import sympify
-from pint import UnitRegistry
-try:
-    from huggingface_hub import InferenceClient
-except Exception:
-    InferenceClient = None
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
-HF_API_BASE = "https://huggingface.co/api"
-OPEN_METEO = "https://api.open-meteo.com/v1/forecast"
-ureg = UnitRegistry()
-Q = ureg.Quantity
-def http_get(url, timeout=20, headers=None, params=None):
-    headers = headers or {
-        "User-Agent": "Mozilla/5.0 (compatible; GAIA-Agent/1.0; +https://huggingface.co)"
-    }
-    r = requests.get(url, timeout=timeout, headers=headers, params=params)
-    r.raise_for_status()
-    return r
-def wikidata_query(sparql: str):
-    r = http_get(
-        WIKIDATA_SPARQL,
-        params={"format": "json", "query": sparql},
-        headers={"Accept": "application/sparql-results+json"}
-    )
-    return r.json()
-def clean_answer(s: str) -> str:
-    if s is None:
-        return ""
-    s = str(s).strip()
-    # remove FINAL ANSWER patterns
-    s = re.sub(r"(?i)\bFINAL\s*ANSWER\b\s*[:\-]*\s*", "", s).strip()
-    # remove markdown/code fences
-    s = re.sub(r"```.*?```", "", s, flags=re.S).strip()
-    # keep last non-empty line (common for model outputs)
-    lines = [ln.strip() for ln in s.splitlines() if ln.strip()]
-    if lines:
-        s = lines[-1]
-    # strip quotes
-    s = s.strip().strip('"').strip("'").strip()
-    # collapse spaces
-    s = re.sub(r"\s+", " ", s).strip()
-    return s
-def looks_like_math(q: str) -> bool:
-    # crude heuristic: contains digits and operators
-    return bool(re.search(r"\d", q)) and bool(re.search(r"[+\-*/^=()]", q))
-def try_solve_math(q: str):
-    """
-    Try to extract a math expression and evaluate.
-    """
-    # grab something that looks like an expression
-    m = re.search(r"([-+*/^().\d\s]+)", q)
-    if not m:
-        return None
-    expr = m.group(1).strip()
-    if len(expr) < 3:
-        return None
-    expr = expr.replace("^", "**")
-    try:
-        val = sympify(expr).evalf()
-        # if near int, output int
-        if abs(val - int(val)) < 1e-10:
-            return str(int(val))
-        return str(val)
-    except Exception:
-        return None
-def try_unit_convert(q: str):
-    """
-    Very basic unit conversion:
-    e.g., "Convert 5 miles to km"
-    """
-    # match "convert <num> <unit> to <unit>"
-    m = re.search(r"(?i)\bconvert\s+([-+]?\d+(?:\.\d+)?)\s*([a-zA-Z°]+)\s+to\s+([a-zA-Z°]+)\b", q)
-    if not m:
-        return None
-    num = float(m.group(1))
-    u1 = m.group(2)
-    u2 = m.group(3)
-    try:
-        out = (Q(num, u1)).to(u2)
-        # output without unit text unless question requires it; GAIA exact match often wants number only
-        # we'll return just magnitude, trimmed
-        mag = out.magnitude
-        if abs(mag - int(mag)) < 1e-10:
-            return str(int(mag))
-        return str(mag)
-    except Exception:
-        return None
-def ddg_search_snippet(query: str, max_results=5):
-    """
-    DuckDuckGo HTML scraping (no paid key).
-    Returns list of (title, url, snippet)
-    """
-    url = "https://duckduckgo.com/html/"
-    r = http_get(url, params={"q": query}, timeout=20)
-    soup = BeautifulSoup(r.text, "lxml")
-    results = []
-    for res in soup.select(".result")[:max_results]:
-        a = res.select_one(".result__a")
-        sn = res.select_one(".result__snippet")
-        if a:
-            title = a.get_text(" ", strip=True)
-            link = a.get("href")
-            snippet = sn.get_text(" ", strip=True) if sn else ""
-            results.append((title, link, snippet))
-    return results
-def hf_model_info(model_id: str):
-    r = http_get(f"{HF_API_BASE}/models/{model_id}", timeout=20)
-    return r.json()
-def hf_search_models(query: str, limit=5):
-    r = http_get(f"{HF_API_BASE}/models", params={"search": query, "limit": limit}, timeout=20)
-    return r.json()
-def open_meteo_weather(city: str):
-    # naive: use geocoding via Open-Meteo geocoding
-    geo = http_get(
-        "https://geocoding-api.open-meteo.com/v1/search",
-        params={"name": city, "count": 1, "language": "en", "format": "json"},
-        timeout=20
-    ).json()
-    if not geo.get("results"):
-        return None
-    lat = geo["results"][0]["latitude"]
-    lon = geo["results"][0]["longitude"]
-    data = http_get(
-        OPEN_METEO,
-        params={
-            "latitude": lat,
-            "longitude": lon,
-            "current": "temperature_2m,weather_code,wind_speed_10m",
-        },
-        timeout=20
-    ).json()
-    cur = data.get("current", {})
-    # return temperature only (often GAIA asks a single value)
-    if "temperature_2m" in cur:
-        t = cur["temperature_2m"]
-        if abs(t - int(t)) < 1e-10:
-            return str(int(t))
-        return str(t)
-    return None
-def wikidata_simple_lookup(entity: str, prop: str):
-    """
-    Use Wikidata to fetch a single property for a named entity.
-    prop: one of 'capital', 'population', 'area', 'birth', 'death', 'country', 'founder', etc.
-    We'll map prop -> Wikidata property IDs and return a clean string.
-    """
-    prop_map = {
-        "capital": "P36",
-        "population": "P1082",
-        "area": "P2046",
-        "birth": "P569",
-        "death": "P570",
-        "country": "P17",
-        "founder": "P112",
-        "headquarters": "P159",
-    }
-    pid = prop_map.get(prop)
-    if not pid:
-        return None
-    # Try entity as label search then property
-    sparql = f"""
-    SELECT ?valueLabel WHERE {{
-      ?item rdfs:label "{entity}"@en .
-      OPTIONAL {{ ?item wdt:{pid} ?value . }}
-      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
-    }}
-    LIMIT 1
-    """
-    try:
-        data = wikidata_query(sparql)
-        bindings = data.get("results", {}).get("bindings", [])
-        if not bindings:
-            return None
-        v = bindings[0].get("valueLabel", {}).get("value")
-        return clean_answer(v)
-    except Exception:
-        return None
-def download_task_file(task_id: str, save_dir="/tmp"):
-    url = f"{DEFAULT_API_URL}/files/{task_id}"
-    try:
-        r = http_get(url, timeout=30)
-        # try detect filename from headers
-        fname = f"{task_id}.bin"
-        cd = r.headers.get("content-disposition", "")
-        m = re.search(r'filename="?([^"]+)"?', cd)
-        if m:
-            fname = m.group(1)
-        path = os.path.join(save_dir, fname)
-        with open(path, "wb") as f:
-            f.write(r.content)
-        return path
-    except Exception:
-        return None
-class ToolFirstAgent:
-    """
-    Tool-first agent for GAIA Level-1 exact-match scoring.
-    Designed to work WITHOUT paid models.
-    Optional fallback to a free small model if HF_TOKEN is set.
-    """
     def __init__(self):
-        self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
-        self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
-        self.llm = None
-        if self.hf_token and InferenceClient is not None:
-            # IMPORTANT: do NOT pass both model and base_url in constructor.
-            # We'll use router and pass model at call-time (supported by huggingface_hub client).
-            try:
-                self.llm = InferenceClient(token=self.hf_token, base_url="https://router.huggingface.co", timeout=120)
-                print("✅ LLM fallback enabled via HF router.")
-            except Exception as e:
-                print("⚠️ LLM fallback init failed, continue tool-only:", e)
-                self.llm = None
-        else:
-            print("ℹ️ Running in tool-only mode (no HF_TOKEN or huggingface_hub missing).")
-    def llm_answer(self, question: str) -> str:
-        if not self.llm:
-            return ""
-        system = (
-            "Return ONLY the final answer for this question.\n"
-            "No explanation. No extra words.\n"
-            "If it is a name/number/date, output it exactly.\n"
         )
-        prompt = f"{system}\nQuestion: {question}\nAnswer:"
-        try:
-            out = self.llm.text_generation(
-                prompt,
-                model=self.model_id,
-                max_new_tokens=96,
-                temperature=0.0,
-                do_sample=False,
-                return_full_text=False,
-            )
-            return clean_answer(out)
-        except Exception as e:
-            print("LLM text_generation failed:", e)
-            return ""
-    def answer(self, question: str, task_id: str = None) -> str:
-        q = question.strip()
-        # 0) if task has a file, try download (some GAIA Qs rely on it)
-        if task_id:
-            fpath = download_task_file(task_id)
-            # For now, just note: without knowing file types, we won't parse deeply.
-            # But downloading sometimes is required; you can extend later.
-            if fpath:
-                print(f"Downloaded file for task {task_id}: {fpath}")
-        # 1) math
-        if looks_like_math(q):
-            m = try_solve_math(q)
-            if m:
-                return clean_answer(m)
-        # 2) unit conversion
-        u = try_unit_convert(q)
-        if u:
-            return clean_answer(u)
-        # 3) weather questions: "weather in <city>"
-        m = re.search(r"(?i)\bweather in ([A-Za-z \-]+)\b", q)
-        if m:
-            city = m.group(1).strip()
-            w = open_meteo_weather(city)
-            if w:
-                return clean_answer(w)
-        # 4) Hugging Face / model popularity questions
-        # e.g. "most downloaded model", "downloads of Qwen/..."
-        if "hugging face" in q.lower() or "download" in q.lower() or "downloads" in q.lower():
-            mm = re.search(r"([A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+)", q)
-            if mm:
-                mid = mm.group(1)
-                try:
-                    info = hf_model_info(mid)
-                    # common: downloads field
-                    if "downloads" in info:
-                        return clean_answer(str(info["downloads"]))
-                except Exception:
-                    pass
-        # 5) Wikidata lookups (capitals, birth, etc.)
-        # Capital of X
-        m = re.search(r"(?i)\bcapital of ([A-Za-z \-]+)\b", q)
-        if m:
-            ent = m.group(1).strip()
-            v = wikidata_simple_lookup(ent, "capital")
-            if v:
-                return clean_answer(v)
-        # Birth date of X
-        m = re.search(r"(?i)\bwhen was ([A-Za-z .\-]+) born\b", q)
-        if m:
-            ent = m.group(1).strip()
-            v = wikidata_simple_lookup(ent, "birth")
-            if v:
-                # often wikidata returns ISO datetime; keep only date part
-                v = v.split("T")[0]
-                return clean_answer(v)
-        # Population of X
-        m = re.search(r"(?i)\bpopulation of ([A-Za-z \-]+)\b", q)
-        if m:
-            ent = m.group(1).strip()
-            v = wikidata_simple_lookup(ent, "population")
-            if v:
-                # sometimes returns "1,234,567" vs "1234567"; exact match varies.
-                # keep as-is; but remove commas if question likely expects plain digits
-                if re.search(r"(?i)\bhow many\b|\bpopulation\b", q):
-                    v2 = v.replace(",", "")
-                    return clean_answer(v2)
-                return clean_answer(v)
-        # 6) lightweight web search fallback (snippets)
-        # Works for factoid questions with clear short answers
         try:
-            results = ddg_search_snippet(q, max_results=3)
-            if results:
-                # Heuristic: if question asks for a year, grab 4-digit year from snippet
-                if re.search(r"\b(19|20)\d{2}\b", q):
-                    for _, __, sn in results:
-                        yy = re.search(r"\b(19|20)\d{2}\b", sn)
-                        if yy:
-                            return clean_answer(yy.group(0))
-                # If asks "Who is ..." try first snippet capitalized name chunk
-                if q.lower().startswith("who is") or "who was" in q.lower():
-                    # naive: take first result title before "-" or "|"
-                    title = results[0][0]
-                    title = re.split(r"[-|–]", title)[0].strip()
-                    if title:
-                        return clean_answer(title)
         except Exception as e:
-            print("DDG fallback failed:", e)
-        # 7) optional LLM fallback (free small model) — last resort
-        llm = self.llm_answer(q)
-        if llm:
-            # If too long, ask again implicitly by trimming to last line already done.
-            # Also strip trailing punctuation
-            llm = re.sub(r"[.。!！]+$", "", llm).strip()
-            return clean_answer(llm)
-        # 8) final fallback
-        return "I don't know"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    try:
-        agent = ToolFirstAgent()
-    except Exception as e:
-        return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    # Fetch Questions
-    try:
-        response = requests.get(questions_url, timeout=20)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-            return "Fetched questions list is empty.", None
-    except Exception as e:
-        return f"Error fetching questions: {e}", None
-    results_log = []
-    answers_payload = []
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            continue
-        try:
-            submitted_answer = agent.answer(question_text, task_id=task_id)
-            submitted_answer = clean_answer(submitted_answer)
-        except Exception as e:
-            submitted_answer = f"AGENT ERROR: {e}"
-        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-        results_log.append(
-            {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
-        )
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
-        "answers": answers_payload,
     }
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=90)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        return f"Submission Failed: {e}", pd.DataFrame(results_log)
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner (Tool-first, no paid model)")
-    gr.Markdown(
-        """
-        **Instructions**
-        1. Login with the button.
-        2. Click Run to fetch questions, answer them, submit, and get score.
-        **Notes**
-        - Works without paid models.
-        - Optional HF_TOKEN enables small-model fallback (free tier permitting).
-        """
     )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
-if __name__ == "__main__":
-    demo.launch(debug=True, share=False)

 import os
+import gradio as gr
 import requests
 import pandas as pd
+import re
+from huggingface_hub import InferenceClient
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class BasicAgent:
     def __init__(self):
+        print("Agent init")
+        token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+        if not token:
+            raise RuntimeError("HF_TOKEN not set")
+        # 免費可用，穩定
+        self.client = InferenceClient(
+            "Qwen/Qwen2.5-7B-Instruct",
+            token=token,
         )
+    def clean(self, text: str) -> str:
+        text = text.strip()
+        text = re.sub(r"(?i)final answer[:\-]*", "", text)
+        lines = [l.strip() for l in text.splitlines() if l.strip()]
+        return lines[-1] if lines else text
+    def __call__(self, question: str) -> str:
+        system = (
+            "You are a precise QA agent.\n"
+            "Return ONLY the final answer.\n"
+            "No explanation.\n"
+            "No extra words.\n"
+        )
         try:
+            out = self.client.chat_completion(
+                messages=[
+                    {"role": "system", "content": system},
+                    {"role": "user", "content": question},
+                ],
+                temperature=0,
+                max_tokens=256,
+            ).choices[0].message.content
+            return self.clean(out)
         except Exception as e:
+            print("LLM error:", e)
+            return ""
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    if not profile:
+        return "Please login", None
+    username = profile.username
+    agent = BasicAgent()
+    questions = requests.get(f"{DEFAULT_API_URL}/questions").json()
+    answers = []
+    log = []
+    for q in questions:
+        ans = agent(q["question"])
+        answers.append({
+            "task_id": q["task_id"],
+            "submitted_answer": ans
+        })
+        log.append({
+            "task_id": q["task_id"],
+            "question": q["question"],
+            "answer": ans
+        })
+    payload = {
+        "username": username,
+        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
+        "answers": answers
     }
+    r = requests.post(f"{DEFAULT_API_URL}/submit", json=payload).json()
+    status = (
+        f"User: {r.get('username')}\n"
+        f"Score: {r.get('score')}%\n"
+        f"{r.get('correct_count')}/{r.get('total_attempted')} correct"
     )
+    return status, pd.DataFrame(log)
+with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Runner")
+    gr.LoginButton()
+    btn = gr.Button("Run Evaluation & Submit All Answers")
+    out = gr.Textbox(lines=4)
+    table = gr.DataFrame()
+    btn.click(run_and_submit_all, outputs=[out, table])
+demo.launch()