Final_Assignment_Template

Sleeping

App Files Files Community

Mouhamedamar commited on 5 days ago

Commit

eba54c1

verified ·

1 Parent(s): 4d09119

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -278

app.py CHANGED Viewed

@@ -1,330 +1,234 @@
 import os
-import re
-import time
 import requests
 import pandas as pd
-import gradio as gr
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# ── Imports officiels smolagents ──────────────────────────────────────
-from smolagents import (
-    CodeAgent,
-    InferenceClientModel,
-    DuckDuckGoSearchTool,
-    VisitWebpageTool,
-    tool,
-)
-# ── Prompt templates COMPLETS (obligatoires pour CodeAgent) ───────────
-def get_prompt_templates():
-    return {
-        "system_prompt": """You are an expert AI assistant solving GAIA benchmark tasks.
-You have access to tools and must use them to find accurate answers.
-RULES:
-- Always use Thought: then Code: sequences
-- Return ONLY the exact answer - no explanation
-- For reversed text: reverse it back then answer
-- For math/logic: write Python code to compute
-- For files: use the download tools
-- Answers are exact-match graded
-{{authorized_imports}}
-""",
-        "planning": """
-Facts given in the task:
-<<facts_given_in_task>>
-Facts needed:
-<<facts_needed>>
-Plan:
-<<plan>>
-<end_plan>
-""",
-        "managed_agent": """
-You are a managed agent. Return your result via final_answer().
-Task: {{task}}
-""",
-        "final_answer": """
-Return ONLY the final answer. No explanation. No punctuation unless required.
-- Numbers: digits only (e.g. 42)
-- Lists: comma-separated (e.g. apple, banana)
-- Names: as-is
-"""
-    }
-# ── Tools custom ──────────────────────────────────────────────────────
 @tool
-def wikipedia_search(query: str) -> str:
-    """Search Wikipedia and return the intro of the top article.
-    Args:
-        query: The search terms to look up on Wikipedia.
     """
-    try:
-        base = "https://en.wikipedia.org/w/api.php"
-        r = requests.get(base, params={
-            "action": "query", "list": "search",
-            "srsearch": query, "format": "json", "srlimit": 1,
-        }, timeout=15).json()
-        title = r["query"]["search"][0]["title"]
-        ex = requests.get(base, params={
-            "action": "query", "prop": "extracts",
-            "exintro": True, "explaintext": True,
-            "titles": title, "format": "json",
-        }, timeout=15).json()
-        pages = ex["query"]["pages"]
-        text = next(iter(pages.values())).get("extract", "")[:4000]
-        return f"# {title}\n{text}"
-    except Exception as e:
-        return f"Wikipedia error: {e}"
-@tool
-def download_file_for_task(task_id: str) -> str:
-    """Download and read any file attached to a GAIA task (PDF, Excel, audio, image, code).
     Args:
-        task_id: The GAIA task UUID string.
     """
     try:
-        r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30)
-        if r.status_code != 200:
             return "No file attached to this task."
-        data = r.content
-        ct = r.headers.get("content-type", "")
-        # PDF
-        if data[:4] == b"%PDF" or "pdf" in ct:
-            try:
-                import io
-                from pypdf import PdfReader
-                text = "\n".join(p.extract_text() or "" for p in PdfReader(io.BytesIO(data)).pages)
-                return text[:6000]
-            except Exception as e:
-                return f"PDF error: {e}"
-        # Audio → Whisper
-        if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
-            token = os.environ.get("HF_TOKEN", "")
-            url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
-            for _ in range(3):
-                resp = requests.post(url, headers={"Authorization": f"Bearer {token}"}, data=data, timeout=120)
-                if resp.status_code == 503:
-                    time.sleep(20); continue
-                if resp.status_code == 200:
-                    return resp.json().get("text", "")
-            return "Audio transcription failed."
-        # Excel / CSV
-        if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
             try:
-                import io
-                return pd.read_excel(io.BytesIO(data)).to_string(index=False)[:4000]
-            except Exception:
-                try:
-                    import io
-                    return pd.read_csv(io.BytesIO(data)).to_string(index=False)[:4000]
-                except Exception as e:
-                    return f"Spreadsheet error: {e}"
-        # Image → Llama Vision
-        if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
-            import base64
-            mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
-            b64 = base64.b64encode(data).decode()
-            token = os.environ.get("HF_TOKEN", "")
-            url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
-            payload = {
-                "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-                "messages": [{"role": "user", "content": [
-                    {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
-                    {"type": "text", "text": "Describe everything in detail. If chess: name every piece and square. Transcribe any text/numbers exactly."},
-                ]}],
-                "max_tokens": 1024,
-            }
-            for _ in range(3):
-                resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json=payload, timeout=120)
-                if resp.status_code == 503:
-                    time.sleep(20); continue
-                if resp.status_code == 200:
-                    return resp.json()["choices"][0]["message"]["content"]
-            return "Image analysis failed."
-        # Text / code fallback
-        return data.decode("utf-8", errors="replace")[:4000]
     except Exception as e:
-        return f"File download error: {e}"
 @tool
-def get_youtube_transcript(video_url: str) -> str:
-    """Fetch the transcript/captions from a YouTube video URL.
-    Args:
-        video_url: The full YouTube URL e.g. https://www.youtube.com/watch?v=XXXXX
     """
-    try:
-        from youtube_transcript_api import YouTubeTranscriptApi
-        m = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url)
-        if not m:
-            return "Could not extract video ID from URL."
-        transcript = YouTubeTranscriptApi.get_transcript(m.group(1), languages=["en", "en-US", "en-GB"])
-        return " ".join(t["text"] for t in transcript)[:5000]
-    except Exception as e:
-        return f"Transcript error: {e}"
-@tool
-def run_python_code(code: str) -> str:
-    """Execute Python code and return stdout. Use for math, logic, string ops, data processing.
     Args:
-        code: Valid Python code to execute.
     """
-    import subprocess, sys
     try:
-        r = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True, timeout=30)
-        return r.stdout.strip() or (f"stderr: {r.stderr.strip()}" if r.stderr.strip() else "(no output)")
-    except subprocess.TimeoutExpired:
-        return "Execution timed out."
     except Exception as e:
-        return f"Execution error: {e}"
-# ── Agent ─────────────────────────────────────────────────────────────
-class GAIAAgent:
-    def __init__(self):
-        model = InferenceClientModel(
-            model_id="meta-llama/Llama-3.3-70B-Instruct",
-            token=os.environ.get("HF_TOKEN", ""),
-        )
-        self.agent = CodeAgent(
-            tools=[
-                DuckDuckGoSearchTool(),
-                VisitWebpageTool(),
-                wikipedia_search,
-                download_file_for_task,
-                get_youtube_transcript,
-                run_python_code,
-            ],
-            model=model,
-            add_base_tools=True,
-            max_steps=10,
-            verbosity_level=1,
-            additional_authorized_imports=[
-                "re", "json", "math", "unicodedata",
-                "datetime", "collections", "itertools",
-                "pandas", "requests", "os", "time",
-            ],
-        )
-        print("GAIAAgent ready ✅")
-    def __call__(self, question: str, task_id: str = "") -> str:
-        print(f"\n{'='*60}\nQ: {question[:120]}")
-        task_hint = ""
-        if task_id:
-            task_hint = f"\n\n[task_id='{task_id}' — call download_file_for_task('{task_id}') if a file/image/audio is needed]"
-        prompt = (
-            "Solve this GAIA benchmark question precisely.\n"
-            "- Use tools to verify facts. Do NOT guess.\n"
-            "- YouTube URL → call get_youtube_transcript\n"
-            "- File/image/audio/excel/pdf → call download_file_for_task\n"
-            "- Math/logic/strings → call run_python_code\n"
-            "- Facts → wikipedia_search or DuckDuckGoSearchTool\n"
-            "- Reversed text → decode first, then answer\n"
-            "- Return ONLY the exact answer. No explanation.\n\n"
-            f"Question: {question}{task_hint}"
-        )
-        try:
-            result = self.agent.run(prompt)
-            answer = str(result).strip()
-            for prefix in ["the answer is", "answer:", "final answer:", "result:"]:
-                if answer.lower().startswith(prefix):
-                    answer = answer[len(prefix):].strip().lstrip(":").strip()
-            print(f"→ Answer: {answer}")
-            return answer
-        except Exception as e:
-            print(f"Agent error: {e}")
-            return "Unable to determine answer."
-# ── Gradio UI ─────────────────────────────────────────────────────────
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    if not profile:
-        return "Please login to Hugging Face first.", None
     username = profile.username
-    space_id = os.getenv("SPACE_ID", "")
-    api_url = DEFAULT_API_URL
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
     try:
-        agent = GAIAAgent()
     except Exception as e:
-        return f"Error initializing agent: {e}", None
-    try:
-        questions = requests.get(f"{api_url}/questions", timeout=15).json()
-        print(f"Fetched {len(questions)} questions.")
-    except Exception as e:
-        return f"Error fetching questions: {e}", None
     results_log = []
     answers_payload = []
     for item in questions:
-        task_id = item.get("task_id", "")
-        question_text = item.get("question", "")
-        if not task_id or not question_text:
-            continue
-        answer = agent(question_text, task_id=task_id)
-        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-        results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": answer})
-        time.sleep(2)
-    if not answers_payload:
-        return "No answers produced.", pd.DataFrame(results_log)
     try:
-        resp = requests.post(
-            f"{api_url}/submit",
-            json={"username": username, "agent_code": agent_code, "answers": answers_payload},
-            timeout=120,
-        )
-        resp.raise_for_status()
-        d = resp.json()
-        status = (
-            f"✅ Submission Successful!\n"
-            f"User: {d.get('username')}\n"
-            f"Score: {d.get('score', 'N/A')}% "
-            f"({d.get('correct_count', '?')}/{d.get('total_attempted', '?')} correct)\n"
-            f"Message: {d.get('message', '')}"
         )
     except Exception as e:
-        status = f"Submission error: {e}"
-    return status, pd.DataFrame(results_log)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 GAIA Agent — smolagents + HF Inference")
-    gr.Markdown("""
-**Models:** Llama-3.3-70B · Llama-3.2-11B-Vision · Whisper large-v3
-**Tools:** DuckDuckGo · Wikipedia · VisitWebpage · YouTube transcript · Python · File reader
-**Setup:** Ajoute `HF_TOKEN` dans les secrets de ton Space HF.
-    """)
-    gr.LoginButton()
-    run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
-    status_out = gr.Textbox(label="Status / Score", lines=6, interactive=False)
-    results_tbl = gr.DataFrame(label="Questions & Answers", wrap=True)
-    run_btn.click(fn=run_and_submit_all, outputs=[status_out, results_tbl])
 if __name__ == "__main__":
-    demo.launch(debug=True, share=False)

 import os
+import gradio as gr
 import requests
 import pandas as pd
+from smolagents import CodeAgent, DuckDuckGoSearchTool, tool, HfApiModel
+from smolagents.tools import WikipediaTool, VisitWebpageTool
+import re
+# ── Constants ──────────────────────────────────────────────────────────────────
+API_BASE   = "https://agents-course-unit4-scoring.hf.space"
+DEFAULT_API_URL = API_BASE
+# ── Custom tools ───────────────────────────────────────────────────────────────
 @tool
+def download_task_file(task_id: str) -> str:
     """
+    Download a file associated with a GAIA task and return its content as text.
+    For images, returns a description note. For CSVs/Excel, returns the raw text.
     Args:
+        task_id: The GAIA task ID string.
+    Returns:
+        File content as a string, or an error message.
     """
+    url = f"{API_BASE}/files/{task_id}"
     try:
+        response = requests.get(url, timeout=30)
+        if response.status_code == 404:
             return "No file attached to this task."
+        response.raise_for_status()
+        content_type = response.headers.get("content-type", "")
+        # Plain text / CSV / JSON / XML / code
+        if any(ct in content_type for ct in ["text", "json", "xml", "csv"]):
+            return response.text[:8000]
+        # Excel
+        if "spreadsheet" in content_type or "excel" in content_type:
+            import io
+            df = pd.read_excel(io.BytesIO(response.content))
+            return df.to_string()
+        # PDF – extract text with pdfplumber if available
+        if "pdf" in content_type:
             try:
+                import pdfplumber, io
+                with pdfplumber.open(io.BytesIO(response.content)) as pdf:
+                    text = "\n".join(p.extract_text() or "" for p in pdf.pages)
+                return text[:8000] if text.strip() else "PDF has no extractable text."
+            except ImportError:
+                return f"PDF file received ({len(response.content)} bytes) but pdfplumber not installed."
+        # Image
+        if "image" in content_type:
+            return (
+                f"Image file received (type: {content_type}, size: {len(response.content)} bytes). "
+                "Use visual reasoning to answer the question."
+            )
+        # Fallback: try decoding as UTF-8
+        try:
+            return response.content.decode("utf-8")[:8000]
+        except UnicodeDecodeError:
+            return f"Binary file received ({content_type}, {len(response.content)} bytes). Cannot display."
     except Exception as e:
+        return f"Error downloading file for task {task_id}: {e}"
 @tool
+def calculator(expression: str) -> str:
     """
+    Safely evaluate a mathematical expression and return the result.
     Args:
+        expression: A Python-compatible math expression, e.g. '3.14 * 10**2'.
+    Returns:
+        The computed result as a string.
     """
     try:
+        # Restrict to safe builtins
+        allowed = {k: v for k, v in vars(__import__("math")).items() if not k.startswith("_")}
+        allowed["__builtins__"] = {}
+        result = eval(expression, allowed)  # noqa: S307 – expression is validated above
+        return str(result)
     except Exception as e:
+        return f"Calculation error: {e}"
+# ── Agent factory ──────────────────────────────────────────────────────────────
+def build_agent():
+    """Build and return a CodeAgent with all necessary tools."""
+    model = HfApiModel(
+        model_id="Qwen/Qwen2.5-72B-Instruct",   # free HF Inference API – fast & capable
+        token=os.environ.get("HF_TOKEN"),
+    )
+    tools = [
+        DuckDuckGoSearchTool(),
+        VisitWebpageTool(),
+        WikipediaTool(),
+        download_task_file,
+        calculator,
+    ]
+    agent = CodeAgent(
+        tools=tools,
+        model=model,
+        max_steps=10,
+        additional_authorized_imports=["pandas", "re", "json", "math", "datetime"],
+    )
+    return agent
+SYSTEM_PROMPT = """You are a general AI assistant answering questions from the GAIA benchmark.
+Your goal is to provide a single, precise, final answer — nothing else.
+Rules:
+- Use tools (web search, Wikipedia, file download, calculator) as needed.
+- Think step-by-step before answering.
+- Your FINAL answer must be:
+  • As short as possible (a number, a name, a date, a list, etc.)
+  • Exactly matching the expected format described in the question.
+  • WITHOUT any prefix like "The answer is" or "FINAL ANSWER:".
+- Never hallucinate. If unsure, search again.
+"""
+def run_agent_on_question(agent: "CodeAgent", question: str, task_id: str) -> str:
+    """Run the agent on a single GAIA question."""
+    # If a file is attached, mention it in the prompt
+    file_hint = ""
+    test_file = download_task_file(task_id)
+    if test_file and "No file attached" not in test_file and "Error" not in test_file:
+        file_hint = f"\n\n[Attached file content for task {task_id}]:\n{test_file[:3000]}"
+    full_prompt = SYSTEM_PROMPT + f"\n\nQuestion: {question}{file_hint}"
+    try:
+        answer = agent.run(full_prompt)
+        # Strip any accidental "FINAL ANSWER:" prefix the model might add
+        answer = re.sub(r"(?i)^(final answer[:\s]*)", "", str(answer)).strip()
+        return answer
+    except Exception as e:
+        return f"AGENT_ERROR: {e}"
+# ── Gradio UI ──────────────────────────────────────────────────────────────────
+def run_and_submit(profile: gr.OAuthProfile | None):
+    """Fetch questions, run agent, submit answers, return results table + score."""
+    if profile is None:
+        return "⚠️ Please log in with your HuggingFace account first.", None
     username = profile.username
+    space_url = f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main"
+    # 1. Fetch questions
     try:
+        resp = requests.get(f"{API_BASE}/questions", timeout=15)
+        resp.raise_for_status()
+        questions = resp.json()
     except Exception as e:
+        return f"❌ Failed to fetch questions: {e}", None
+    # 2. Build agent
+    agent = build_agent()
+    # 3. Run agent on each question
     results_log = []
     answers_payload = []
     for item in questions:
+        task_id  = item.get("task_id", "")
+        question = item.get("question", "")
+        print(f"[{task_id}] Running agent…")
+        submitted_answer = run_agent_on_question(agent, question, task_id)
+        print(f"[{task_id}] Answer: {submitted_answer}")
+        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+        results_log.append({
+            "Task ID": task_id,
+            "Question": question[:80] + "…" if len(question) > 80 else question,
+            "Submitted Answer": submitted_answer,
+        })
+    # 4. Submit to scoring API
+    submission = {
+        "username": username,
+        "agent_code": space_url,
+        "answers": answers_payload,
+    }
     try:
+        submit_resp = requests.post(f"{API_BASE}/submit", json=submission, timeout=60)
+        submit_resp.raise_for_status()
+        result = submit_resp.json()
+        score_msg = (
+            f"✅ Submission successful!\n"
+            f"**Score:** {result.get('score', 'N/A')}%  "
+            f"({result.get('correct_count','?')}/{result.get('total_questions','?')} correct)\n"
+            f"**Message:** {result.get('message','')}"
         )
     except Exception as e:
+        score_msg = f"⚠️ Agent ran but submission failed: {e}"
+    df = pd.DataFrame(results_log)
+    return score_msg, df
+# ── App layout ─────────────────────────────────────────────────────────────────
+with gr.Blocks(title="GAIA Agent – HF Certification") as demo:
+    gr.Markdown(
+        """
+        # 🤖 GAIA Agent — HuggingFace Agents Course Final Assignment
+        Log in with your HuggingFace account, then click **Run Agent & Submit** to evaluate your agent on the 20 GAIA Level-1 questions.
+        """
+    )
+    login_btn = gr.LoginButton()
+    run_btn   = gr.Button("🚀 Run Agent & Submit", variant="primary")
+    status    = gr.Markdown("Status will appear here after submission.")
+    results_table = gr.DataFrame(label="Per-question results", wrap=True)
+    run_btn.click(fn=run_and_submit, outputs=[status, results_table])
 if __name__ == "__main__":
+    demo.launch()