Final_Assignment_Template

Paused

App Files Files Community

wahibtim commited on 22 days ago

Commit

b6de177

verified ·

1 Parent(s): 41c61c7

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -65

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ import requests
 import pandas as pd
 import time
 import io
 from smolagents import HfApiModel, tool, CodeAgent
-from PIL import Image
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -21,7 +21,7 @@ def web_search(query: str) -> str:
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=3))
             if not results:
                 return "No results found."
             return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
@@ -31,114 +31,129 @@ def web_search(query: str) -> str:
 @tool
 def download_and_read_file(task_id: str) -> str:
     """
-    Downloads the file associated with a specific task_id and returns its content as text.
     Args:
-        task_id: The unique ID for the task to fetch the file for.
     """
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
-        r = requests.get(url, timeout=20)
         r.raise_for_status()
         content_type = r.headers.get("content-type", "").lower()
-        # Logic to handle different file types so the Agent can "read" them
-        if "text/csv" in content_type or task_id.endswith(".csv"):
             df = pd.read_csv(io.BytesIO(r.content))
-            return f"CSV Content (first 5 rows):\n{df.head().to_string()}"
-        elif "text/plain" in content_type:
-            return f"Text File Content:\n{r.text[:2000]}" # Limit size
-        elif "image" in content_type:
-            img = Image.open(io.BytesIO(r.content))
-            return f"Image downloaded. Dimensions: {img.size}. (Note: Use web search if you need to identify contents of specific historical images)."
         else:
-            return f"File downloaded ({len(r.content)} bytes), but format is binary/unsupported for direct text reading."
     except Exception as e:
-        return f"File download failed: {str(e)}"
-# ====================== AGENT LOGIC ======================
 class GaiaAgent:
     def __init__(self):
-        # We use Qwen because it's excellent at writing the code needed for CodeAgent
         self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
         self.agent = CodeAgent(
             tools=[web_search, download_and_read_file],
             model=self.model,
-            add_base_tools=True # Gives the agent access to things like 'line_viewer'
         )
     def __call__(self, question: str, task_id: str) -> str:
-        # We explicitly tell the agent the Task ID so it can use the download tool
-        full_query = f"Task ID: {task_id}\nQuestion: {question}\n\nPlease solve this and provide only the final answer."
         try:
-            # The .run() method starts the ReAct loop
-            result = self.agent.run(full_query)
-            return str(result).strip()
-        except Exception as e:
-            return f"Error: {str(e)}"
-# ====================== GRADIO INTERFACE & SUBMISSION ======================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
-        return "Please login with Hugging Face first.", None
     username = profile.username
-    questions_url = f"{DEFAULT_API_URL}/questions"
-    submit_url = f"{DEFAULT_API_URL}/submit"
     agent = GaiaAgent()
-    space_id = os.getenv("SPACE_ID")
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
-    # 1. Get Questions
     try:
-        resp = requests.get(questions_url, timeout=20)
-        questions_data = resp.json()
     except Exception as e:
         return f"Failed to fetch questions: {e}", None
     answers_payload = []
     results_log = []
-    # 2. Process Questions
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question = item.get("question")
-        # Run the agent!
-        answer = agent(question, task_id)
-        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-        results_log.append({"Task ID": task_id, "Answer": answer})
-        # CRITICAL: Sleep to stay within Hugging Face free API limits
-        time.sleep(35)
-    # 3. Submit Results
     submission_data = {
         "username": username,
-        "agent_code": agent_code,
         "answers": answers_payload
     }
     try:
-        r = requests.post(submit_url, json=submission_data, timeout=120)
-        r.raise_for_status()
-        result = r.json()
-        status = f"✅ Score: {result.get('score')}% | Correct: {result.get('correct_count')}/20"
-        return status, pd.DataFrame(results_log)
     except Exception as e:
-        return f"Submission failed: {e}", pd.DataFrame(results_log)
-# UI setup remains the same
-with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 Unit 4 GAIA Agent")
-    gr.LoginButton()
-    btn = gr.Button("🚀 Run & Submit", variant="primary")
-    status = gr.Textbox(label="Status")
-    table = gr.DataFrame(label="Results")
-    btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
     demo.launch()

 import pandas as pd
 import time
 import io
+import re
 from smolagents import HfApiModel, tool, CodeAgent
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=5)) # Increased results for better context
             if not results:
                 return "No results found."
             return "\n".join([f"{r.get('title')}: {r.get('body')}" for r in results])
 @tool
 def download_and_read_file(task_id: str) -> str:
     """
+    Downloads the file for a task and returns its content.
     Args:
+        task_id: The unique ID for the task file.
     """
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
+        r = requests.get(url, timeout=30)
         r.raise_for_status()
         content_type = r.headers.get("content-type", "").lower()
+        if "csv" in content_type or task_id.lower().endswith(".csv"):
             df = pd.read_csv(io.BytesIO(r.content))
+            return f"CSV Content (First 15 rows):\n{df.head(15).to_string()}\n\nColumns: {df.columns.tolist()}"
+        elif "text" in content_type or task_id.lower().endswith(".txt"):
+            return f"Text Content (Snippet):\n{r.text[:2000]}"
         else:
+            return f"File downloaded. Size: {len(r.content)} bytes. If this is an image/pdf, use web_search to find related facts about task {task_id}."
     except Exception as e:
+        return f"Download failed: {str(e)}"
+# ====================== AGENT ======================
 class GaiaAgent:
     def __init__(self):
+        # Qwen2.5-Coder is the best choice for reasoning and tool use
         self.model = HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
         self.agent = CodeAgent(
             tools=[web_search, download_and_read_file],
             model=self.model,
+            add_base_tools=True,
+            max_steps=12 # Increased for complex multi-step reasoning
         )
+    def clean_answer(self, raw_result: str) -> str:
+        """Removes conversational filler that fails the GAIA grader."""
+        text = str(raw_result).strip()
+        # Remove common prefixes
+        text = re.sub(r'^(the answer is|final answer|result is)[:\s]*', '', text, flags=re.IGNORECASE)
+        # If it's a long sentence ending in a period, just take the last word/number if it looks like a value
+        if len(text.split()) > 10:
+            return text # Keep it if it's complex, but usually, GAIA wants short strings
+        return text.strip(".")
     def __call__(self, question: str, task_id: str) -> str:
+        prompt = f"""Task ID: {task_id}
+Question: {question}
+INSTRUCTIONS:
+1. Use your tools to find the exact factual answer.
+2. If the question involves a file, download it first.
+3. YOUR FINAL ANSWER MUST BE EXTREMELY BRIEF.
+   - Example: '1923' or 'Marie Curie' or '4.52'.
+   - Do NOT use sentences. Do NOT explain your reasoning in the final answer.
+"""
         try:
+            result = self.agent.run(prompt)
+            return self.clean_answer(result)
+        except Exception:
+            return "Unknown"
+# ====================== MAIN LOGIC ======================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "❌ Error: Please Login with Hugging Face first!", None
     username = profile.username
     agent = GaiaAgent()
     try:
+        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
+        questions = resp.json()
     except Exception as e:
         return f"Failed to fetch questions: {e}", None
     answers_payload = []
     results_log = []
+    for i, item in enumerate(questions):
+        t_id = item.get("task_id")
+        q_text = item.get("question")
+        print(f"--- Processing {i+1}/20: {t_id} ---")
+        answer = agent(q_text, t_id)
+        answers_payload.append({"task_id": t_id, "submitted_answer": str(answer)})
+        results_log.append({"Task": t_id, "Answer": str(answer)})
+        # 35s is safe, 40s is bulletproof for rate limits
+        time.sleep(38)
+    # FINAL SUBMISSION
+    space_id = os.getenv("SPACE_ID", "unknown")
     submission_data = {
         "username": username,
+        "agent_code": f"https://huggingface.co/spaces/{space_id}",
         "answers": answers_payload
     }
     try:
+        r = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=300)
+        if r.status_code == 200:
+            res = r.json()
+            return f"✅ SCORE: {res.get('score', 0)}% | {res.get('message', '')}", pd.DataFrame(results_log)
+        else:
+            return f"❌ Error {r.status_code}: {r.text}", pd.DataFrame(results_log)
     except Exception as e:
+        return f"❌ Submission Failed: {str(e)}", pd.DataFrame(results_log)
+# ====================== UI ======================
+with gr.Blocks(theme=gr.themes.Default()) as demo:
+    gr.Markdown("# 🏆 GAIA Certificate Auto-Submitter (Unit 4)")
+    gr.Markdown("Click Login, then Start. Wait 15 mins. Target: 30%+")
+    with gr.Row():
+        gr.LoginButton()
+        run_btn = gr.Button("🚀 Start Evaluation", variant="primary")
+    status_output = gr.Textbox(label="Final Result", lines=3)
+    table_output = gr.DataFrame(label="Attempt Details")
+    run_btn.click(run_and_submit_all, outputs=[status_output, table_output])
 if __name__ == "__main__":
     demo.launch()