Final_Assignment_Template

Paused

App Files Files Community

wahibtim commited on 21 days ago

Commit

466c18b

verified ·

1 Parent(s): b6de177

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -42

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 import time
 import io
 import re
-from smolagents import HfApiModel, tool, CodeAgent
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -21,7 +21,7 @@ def web_search(query: str) -> str:
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=5)) # Increased results for better context
             if not results:
                 return "No results found."
             return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
@@ -40,14 +40,17 @@ def download_and_read_file(task_id: str) -> str:
         r = requests.get(url, timeout=30)
         r.raise_for_status()
         content_type = r.headers.get("content-type", "").lower()
         if "csv" in content_type or task_id.lower().endswith(".csv"):
             df = pd.read_csv(io.BytesIO(r.content))
             return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
         elif "text" in content_type or task_id.lower().endswith(".txt"):
             return f"Text Content (Snippet):\n{r.text[:2000]}"
         else:
-            return f"File downloaded. Size: {len(r.content)} bytes. If this is an image/pdf, use web_search to find related facts about task {task_id}."
     except Exception as e:
         return f"Download failed: {str(e)}"
@@ -55,105 +58,136 @@ def download_and_read_file(task_id: str) -> str:
 class GaiaAgent:
     def __init__(self):
-        # Qwen2.5-Coder is the best choice for reasoning and tool use
-        self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
         self.agent = CodeAgent(
             tools=[web_search, download_and_read_file],
             model=self.model,
             add_base_tools=True,
-            max_steps=12 # Increased for complex multi-step reasoning
         )
     def clean_answer(self, raw_result: str) -> str:
         """Removes conversational filler that fails the GAIA grader."""
         text = str(raw_result).strip()
-        # Remove common prefixes
-        text = re.sub(r'^(the answer is|final answer|result is)[:\s]*', '', text, flags=re.IGNORECASE)
-        # If it's a long sentence ending in a period, just take the last word/number if it looks like a value
-        if len(text.split()) > 10:
-            return text # Keep it if it's complex, but usually, GAIA wants short strings
-        return text.strip(".")
     def __call__(self, question: str, task_id: str) -> str:
         prompt = f"""Task ID: {task_id}
 Question: {question}
 INSTRUCTIONS:
-1. Use your tools to find the exact factual answer.
-2. If the question involves a file, download it first.
-3. YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF.
-   - Example: '1923' or 'Marie Curie' or '4.52'.
-   - Do NOT use sentences. Do NOT explain your reasoning in the final answer.
 """
         try:
             result = self.agent.run(prompt)
-            return self.clean_answer(result)
-        except Exception:
             return "Unknown"
 # ====================== MAIN LOGIC ======================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
-        return "❌ Error: Please Login with Hugging Face first!", None
     username = profile.username
-    agent = GaiaAgent()
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
         questions = resp.json()
     except Exception as e:
-        return f"Failed to fetch questions: {e}", None
     answers_payload = []
     results_log = []
     for i, item in enumerate(questions):
         t_id = item.get("task_id")
         q_text = item.get("question")
-        print(f"--- Processing {i+1}/20: {t_id} ---")
         answer = agent(q_text, t_id)
         answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
-        results_log.append({"Task": t_id, "Answer": str(answer)})
-        # 35s is safe, 40s is bulletproof for rate limits
-        time.sleep(38)
-    # FINAL SUBMISSION
     space_id = os.getenv("SPACE_ID", "unknown")
     submission_data = {
         "username": username,
         "agent_code": f"https://huggingface.co/spaces/{space_id}",
-        "answers": answers_payload
     }
     try:
         r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
         if r.status_code == 200:
             res = r.json()
-            return f"✅ SCORE: {res.get('score', 0)}% | {res.get('message', '')}", pd.DataFrame(results_log)
         else:
-            return f"❌ Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
     except Exception as e:
         return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
 # ====================== UI ======================
 with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("# 🏆 GAIA Certificate Auto-Submitter (Unit 4)")
-    gr.Markdown("Click Login, then Start. Wait 15 mins. Target: 30%+")
     with gr.Row():
         gr.LoginButton()
         run_btn = gr.Button("🚀 Start Evaluation", variant="primary")
     status_output = gr.Textbox(label="Final Result", lines=3)
-    table_output = gr.DataFrame(label="Attempt Details")
-    run_btn.click(run_and_submit_all, outputs=[status_output, table_output])
 if __name__ == "__main__":
     demo.launch()

 import time
 import io
 import re
+from smolagents import LiteLLMModel, tool, CodeAgent
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=5))
             if not results:
                 return "No results found."
             return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
         r = requests.get(url, timeout=30)
         r.raise_for_status()
         content_type = r.headers.get("content-type", "").lower()
         if "csv" in content_type or task_id.lower().endswith(".csv"):
             df = pd.read_csv(io.BytesIO(r.content))
             return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
         elif "text" in content_type or task_id.lower().endswith(".txt"):
             return f"Text Content (Snippet):\n{r.text[:2000]}"
         else:
+            return (
+                f"File downloaded. Size: {len(r.content)} bytes. "
+                f"If this is an image/pdf, use web_search to find related facts about task {task_id}."
+            )
     except Exception as e:
         return f"Download failed: {str(e)}"
 class GaiaAgent:
     def __init__(self):
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if not groq_api_key:
+            raise ValueError("❌ GROQ_API_KEY secret is not set! Add it in HF Spaces → Settings → Secrets.")
+        # llama-3.3-70b-versatile is the best free model on Groq for reasoning
+        self.model = LiteLLMModel(
+            model_id="groq/llama-3.3-70b-versatile",
+            api_key=groq_api_key,
+        )
         self.agent = CodeAgent(
             tools=[web_search, download_and_read_file],
             model=self.model,
             add_base_tools=True,
+            max_steps=12,
         )
     def clean_answer(self, raw_result: str) -> str:
         """Removes conversational filler that fails the GAIA grader."""
         text = str(raw_result).strip()
+        # Remove common prefixes like "The answer is:"
+        text = re.sub(
+            r'^(the answer is|final answer|result is|answer)[:\s]*',
+            '', text, flags=re.IGNORECASE
+        )
+        # Strip trailing punctuation
+        text = text.strip(".").strip()
+        return text
     def __call__(self, question: str, task_id: str) -> str:
         prompt = f"""Task ID: {task_id}
 Question: {question}
 INSTRUCTIONS:
+- Use your tools to find the exact factual answer.
+- If the question mentions a file or attachment, call download_and_read_file("{task_id}") first.
+- If you need up-to-date facts, use web_search.
+- YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF AND EXACT:
+  * Numbers: just the number, e.g. '42' or '4.52'
+  * Names: just the name, e.g. 'Marie Curie'
+  * Dates: just the date, e.g. '1923' or 'July 4, 1776'
+  * Lists: comma-separated, e.g. 'apple, banana, cherry'
+- Do NOT write sentences. Do NOT explain. Just the answer.
 """
         try:
             result = self.agent.run(prompt)
+            return self.clean_answer(str(result))
+        except Exception as e:
+            print(f"Agent error on task {task_id}: {e}")
             return "Unknown"
 # ====================== MAIN LOGIC ======================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "❌ Please Login with Hugging Face first!", None
     username = profile.username
+    print(f"✅ Logged in as: {username}")
+    try:
+        agent = GaiaAgent()
+    except ValueError as e:
+        return str(e), None
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
+        resp.raise_for_status()
         questions = resp.json()
     except Exception as e:
+        return f"❌ Failed to fetch questions: {e}", None
+    print(f"📋 Fetched {len(questions)} questions.")
     answers_payload = []
     results_log = []
     for i, item in enumerate(questions):
         t_id = item.get("task_id")
         q_text = item.get("question")
+        print(f"\n--- [{i+1}/{len(questions)}] Task: {t_id} ---")
+        print(f"Q: {q_text[:120]}...")
         answer = agent(q_text, t_id)
+        print(f"A: {answer}")
         answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
+        results_log.append({"Task ID": t_id, "Question": q_text[:80], "Answer": str(answer)})
+        # Small sleep — Groq free tier allows ~30 req/min, no need for 38s waits
+        time.sleep(3)
+    # ===== SUBMIT =====
     space_id = os.getenv("SPACE_ID", "unknown")
     submission_data = {
         "username": username,
         "agent_code": f"https://huggingface.co/spaces/{space_id}",
+        "answers": answers_payload,
     }
     try:
         r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
         if r.status_code == 200:
             res = r.json()
+            score = res.get("score", 0)
+            message = res.get("message", "")
+            return f"✅ SCORE: {score}% | {message}", pd.DataFrame(results_log)
         else:
+            return f"❌ Submission Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
     except Exception as e:
         return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
 # ====================== UI ======================
 with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🏆 GAIA Certificate Agent (Unit 4 Final)")
+    gr.Markdown(
+        "**Steps:** 1) Login with HF below → 2) Click Start → 3) Wait ~5 mins → 4) Check your score!\n\n"
+        "> Make sure `GROQ_API_KEY` is set in your Space **Settings → Secrets**."
+    )
     with gr.Row():
         gr.LoginButton()
         run_btn = gr.Button("🚀 Start Evaluation", variant="primary")
     status_output = gr.Textbox(label="Final Result", lines=3)
+    table_output = gr.DataFrame(label="Answer Log")
+    run_btn.click(fn=run_and_submit_all, outputs=[status_output, table_output])
 if __name__ == "__main__":
     demo.launch()