Final_Assignment_Template

Sleeping

App Files Files Community

bhotta commited on 25 days ago

Commit

1b067ff

verified ·

1 Parent(s): f369d85

Update app.py

Browse files

Files changed (1) hide show

app.py +168 -181

app.py CHANGED Viewed

@@ -7,10 +7,13 @@ import tempfile
 import requests
 import pandas as pd
 import gradio as gr
-from openai import OpenAI
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ── helpers ───────────────────────────────────────────────────────────────────
@@ -45,12 +48,17 @@ def _strip_html(html: str) -> str:
 class BasicAgent:
     def __init__(self):
-        api_key = os.getenv("OPENAI_API_KEY")
-        if not api_key:
-            raise ValueError("OPENAI_API_KEY missing.")
-        self.client = OpenAI(api_key=api_key)
         self.api_url = DEFAULT_API_URL
-        print("✅ Agent initialised.")
     # ── raw file fetch ────────────────────────────────────────────────────────
@@ -64,110 +72,106 @@ class BasicAgent:
             pass
         return None, ""
-    # ── tools (called by the loop) ────────────────────────────────────────────
     def tool_check_file(self, task_id: str) -> str:
-        """Tell the model whether a file exists and what type it is."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "NO_FILE"
         ct_clean = ct.split(";")[0].strip().lower()
         return (
             f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
-            f"Use the appropriate tool to read it: "
-            f"image→analyse_image, python→run_python_file, "
             f"excel/xlsx→read_excel_file, audio→transcribe_audio, "
             f"text/pdf→read_text_file."
         )
     def tool_analyse_image(self, task_id: str, question: str) -> str:
-        """Pass the image to GPT-4o vision and return its answer."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No image found."
-        ct_clean = ct.split(";")[0].strip()
         if "image" not in ct_clean:
             return f"File is not an image (type={ct_clean})."
         b64 = base64.b64encode(fb).decode()
-        resp = self.client.chat.completions.create(
-            model="gpt-4o",
-            messages=[{
-                "role": "user",
-                "content": [
-                    {"type": "image_url",
-                     "image_url": {"url": f"data:{ct_clean};base64,{b64}",
-                                   "detail": "high"}},
-                    {"type": "text", "text": question},
-                ],
-            }],
-            max_tokens=800,
-            temperature=0,
         )
-        return resp.choices[0].message.content or "No response."
     def tool_run_python_file(self, task_id: str) -> str:
-        """Download the Python file, execute it, return stdout/stderr."""
-        fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         code = fb.decode("utf-8", errors="ignore")
         try:
-            with tempfile.NamedTemporaryFile(suffix=".py", delete=False,
-                                             mode="w") as f:
                 f.write(code)
                 fname = f.name
             result = subprocess.run(
                 ["python3", fname],
-                capture_output=True, text=True, timeout=30
             )
             out = result.stdout.strip()
             err = result.stderr.strip()
-            if out:
-                return f"STDOUT:\n{out}"
-            if err:
-                return f"STDERR:\n{err}"
-            return "No output."
         except Exception as e:
             return f"Execution error: {e}"
     def tool_read_excel_file(self, task_id: str, question: str) -> str:
-        """Download xlsx/csv, load with pandas, let GPT-4o answer about it."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
             import io
             ct_clean = ct.split(";")[0].strip().lower()
-            if "csv" in ct_clean or "text" in ct_clean:
-                df = pd.read_csv(io.BytesIO(fb))
-            else:
-                df = pd.read_excel(io.BytesIO(fb))
-            preview = df.to_string(max_rows=60, max_cols=20)
-            # Ask GPT-4o to answer the question from the data
-            resp = self.client.chat.completions.create(
-                model="gpt-4o",
-                messages=[{
-                    "role": "user",
-                    "content": (
-                        f"Here is a spreadsheet (first 60 rows):\n\n{preview}\n\n"
-                        f"Question: {question}\n"
-                        f"Answer with ONLY the final value, no explanation."
-                    ),
-                }],
-                max_tokens=200,
-                temperature=0,
             )
-            return resp.choices[0].message.content or "No answer."
         except Exception as e:
             return f"Excel read error: {e}"
     def tool_transcribe_audio(self, task_id: str) -> str:
-        """Download audio and transcribe with Whisper."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
-            # Guess extension
             ct_clean = ct.split(";")[0].strip().lower()
             ext_map = {
                 "audio/mpeg": ".mp3", "audio/mp3": ".mp3",
@@ -179,28 +183,28 @@ class BasicAgent:
             with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
                 f.write(fb)
                 fname = f.name
             with open(fname, "rb") as audio_f:
-                transcript = self.client.audio.transcriptions.create(
-                    model="whisper-1", file=audio_f
-                )
-            return transcript.text
         except Exception as e:
             return f"Transcription error: {e}"
     def tool_read_text_file(self, task_id: str) -> str:
-        """Read text/PDF file content."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
             ct_clean = ct.split(";")[0].strip().lower()
             if "pdf" in ct_clean:
-                # Try pdfminer or just decode bytes
                 try:
                     import pdfminer.high_level
                     import io
-                    text = pdfminer.high_level.extract_text(io.BytesIO(fb))
-                    return text[:6000]
                 except ImportError:
                     pass
             return fb.decode("utf-8", errors="ignore")[:6000]
@@ -208,13 +212,11 @@ class BasicAgent:
             return f"Read error: {e}"
     def tool_search_web(self, query: str) -> str:
-        """DuckDuckGo HTML search – stable from cloud IPs."""
         try:
             hdrs = {
                 "User-Agent": (
                     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                    "AppleWebKit/537.36 (KHTML, like Gecko) "
-                    "Chrome/124.0 Safari/537.36"
                 )
             }
             r = requests.get(
@@ -263,7 +265,6 @@ class BasicAgent:
             return f"Fetch error: {e}"
     def tool_fetch_wikipedia(self, title: str) -> str:
-        """Use Wikipedia REST API (no 403 issues)."""
         try:
             slug = requests.utils.quote(title.replace(" ", "_"))
             r = requests.get(
@@ -271,9 +272,7 @@ class BasicAgent:
                 timeout=12,
             )
             if r.status_code == 200:
-                data = r.json()
-                return data.get("extract", "Not found.")
-            # Fallback: full extract via w/api.php
             r2 = requests.get(
                 "https://en.wikipedia.org/w/api.php",
                 params={
@@ -305,8 +304,7 @@ class BasicAgent:
                    ("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
                 return (
                     "BLOCKED: YouTube blocks cloud IPs. "
-                    "Use search_web to find transcript/description of this video. "
-                    "Search for the video title + key phrase from the question."
                 )
             return f"Transcript error: {err}"
@@ -319,7 +317,7 @@ class BasicAgent:
                 "name": "check_file",
                 "description": (
                     "ALWAYS call this first. Checks if a file is attached to the task. "
-                    "Returns 'NO_FILE' or info about the file type and how to read it."
                 ),
                 "parameters": {
                     "type": "object",
@@ -333,15 +331,17 @@ class BasicAgent:
             "function": {
                 "name": "analyse_image",
                 "description": (
-                    "Analyse an image file attached to the task using GPT-4o vision. "
                     "Use for chess boards, diagrams, photos, screenshots."
                 ),
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "task_id": {"type": "string"},
-                        "question": {"type": "string",
-                                     "description": "What to find/answer from the image."},
                     },
                     "required": ["task_id", "question"],
                 },
@@ -353,7 +353,7 @@ class BasicAgent:
                 "name": "run_python_file",
                 "description": (
                     "Execute the Python file attached to the task and return its output. "
-                    "Use when the task asks for the output of Python code."
                 ),
                 "parameters": {
                     "type": "object",
@@ -366,10 +366,7 @@ class BasicAgent:
             "type": "function",
             "function": {
                 "name": "read_excel_file",
-                "description": (
-                    "Read an Excel or CSV file attached to the task and answer "
-                    "a question about its data."
-                ),
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -385,7 +382,7 @@ class BasicAgent:
             "function": {
                 "name": "transcribe_audio",
                 "description": (
-                    "Transcribe an audio file attached to the task using Whisper. "
                     "Use for voice memos, recordings, audio questions."
                 ),
                 "parameters": {
@@ -412,8 +409,8 @@ class BasicAgent:
             "function": {
                 "name": "youtube_transcript",
                 "description": (
-                    "Fetch YouTube video transcript. If cloud-blocked, "
-                    "returns instructions to use search_web instead."
                 ),
                 "parameters": {
                     "type": "object",
@@ -426,7 +423,7 @@ class BasicAgent:
             "type": "function",
             "function": {
                 "name": "search_web",
-                "description": "Search the web via DuckDuckGo. Returns top snippets.",
                 "parameters": {
                     "type": "object",
                     "properties": {"query": {"type": "string"}},
@@ -438,7 +435,7 @@ class BasicAgent:
             "type": "function",
             "function": {
                 "name": "fetch_webpage",
-                "description": "Fetch and read the full text content of any URL.",
                 "parameters": {
                     "type": "object",
                     "properties": {"url": {"type": "string"}},
@@ -451,8 +448,8 @@ class BasicAgent:
             "function": {
                 "name": "fetch_wikipedia",
                 "description": (
-                    "Fetch a Wikipedia article by exact title. "
-                    "Always use this instead of fetch_webpage for Wikipedia."
                 ),
                 "parameters": {
                     "type": "object",
@@ -492,152 +489,141 @@ class BasicAgent:
     SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
-MANDATORY WORKFLOW — follow every step, no exceptions:
-STEP 1 — Always call check_file(task_id) first, regardless of the question.
-  • If NO_FILE → go to STEP 2.
-  • If FILE_EXISTS image → call analyse_image(task_id, full_question).
-  • If FILE_EXISTS python → call run_python_file(task_id). The output IS the answer.
-  • If FILE_EXISTS excel/xlsx/csv → call read_excel_file(task_id, question).
-  • If FILE_EXISTS audio → call transcribe_audio(task_id), then answer from transcript.
-  • If FILE_EXISTS text/pdf → call read_text_file(task_id), then answer from content.
-  CRITICAL: NEVER return "NO_FILE" or any tool status string as your final answer.
-STEP 2 — Gather information using tools.
-  • YouTube URL in question → call youtube_transcript(url) first.
-    If BLOCKED → use search_web("video title + key phrase") to find the answer.
-  • Wikipedia question → call fetch_wikipedia("Exact Article Title").
-    For discography → look at Studio albums table. Count ONLY solo studio albums.
-    Do NOT count: collaborations, live albums, compilations, EPs.
-  • LibreTexts 1.E Exercises → fetch_webpage with EXACT URL:
     https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
-  • Wikipedia Featured Articles → fetch_webpage:
-    https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_promoted_in_2016
-    Then search for the specific article's nomination page.
-  • Sports stats → search_web("player name stat year site:baseball-reference.com")
-    then fetch_webpage the result URL for exact numbers.
-  • For ANY other factual question → search_web, then fetch_webpage top result.
-STEP 3 — If first search fails, try different search terms. Try at least 2-3
-  different approaches before giving up. Never say "I was unable to find."
-STEP 4 — Answer format:
-  • Return ONLY the final value. No explanation. No "The answer is".
-  • Numbers: just the number (e.g. "3" not "3 albums").
-  • Names: just the name.
-  • Yes/No: just "yes" or "no".
-  • Lists: comma-separated values."""
     # ── main call ─────────────────────────────────────────────────────────────
     def __call__(self, question: str, task_id: str = "") -> str:
         print(f"▶ Task {task_id[:8]}: {question[:80]}")
-        # Pre-attach image to messages if task has an image file
-        fb, ct = self._fetch_file(task_id)
-        ct_clean = (ct or "").split(";")[0].strip().lower()
-        user_content = []
-        if fb and "image" in ct_clean:
-            b64 = base64.b64encode(fb).decode()
-            user_content.append({
-                "type": "image_url",
-                "image_url": {"url": f"data:{ct_clean};base64,{b64}",
-                              "detail": "high"},
-            })
-        user_content.append({
-            "type": "text",
-            "text": f"task_id: {task_id}\n\nTask: {question}",
-        })
         messages = [
             {"role": "system", "content": self.SYSTEM},
-            {"role": "user", "content": user_content},
         ]
         for _round in range(10):
             try:
-                resp = self.client.chat.completions.create(
-                    model="gpt-4o",
                     messages=messages,
                     tools=self.TOOLS,
                     tool_choice="auto",
-                    temperature=0,
                     max_tokens=1500,
                 )
             except Exception as e:
-                print(f"  OpenAI error: {e}")
-                return "Error."
             msg = resp.choices[0].message
-            # No tool calls → we have the answer
-            if not msg.tool_calls:
                 answer = (msg.content or "").strip()
-                # Reject bad answers
-                bad = ("no_file", "file_exists", "i was unable",
-                       "i couldn't", "i can't access", "please provide",
-                       "you might want", "i'm unable")
-                if any(b in answer.lower() for b in bad):
-                    # Force a retry with a harder nudge
-                    messages.append({
-                        "role": "assistant",
-                        "content": answer,
-                    })
                     messages.append({
                         "role": "user",
                         "content": (
-                            "That answer is not acceptable. "
-                            "Use your tools to find the real answer. "
-                            "Try search_web or fetch_wikipedia. "
-                            "Return ONLY the final value."
                         ),
                     })
                     continue
                 return answer
-            # Append assistant turn
             messages.append({
                 "role": "assistant",
-                "content": msg.content,
                 "tool_calls": [
                     {
                         "id": tc.id,
                         "type": "function",
                         "function": {
                             "name": tc.function.name,
-                            "arguments": tc.function.arguments,
                         },
                     }
-                    for tc in msg.tool_calls
                 ],
             })
             # Execute tools
-            for tc in msg.tool_calls:
                 fn = tc.function.name
                 try:
-                    args = json.loads(tc.function.arguments)
                 except Exception:
                     args = {}
                 result = self._dispatch(fn, args, task_id, question)
-                print(f"   {fn}({list(args.values())[:1]}) → {str(result)[:80]}")
                 messages.append({
                     "role": "tool",
                     "tool_call_id": tc.id,
                     "content": result or "Empty result.",
                 })
-        # Force final answer
         try:
             messages.append({
                 "role": "user",
-                "content": "Final answer only – just the value, no explanation.",
             })
-            resp = self.client.chat.completions.create(
-                model="gpt-4o", messages=messages,
-                temperature=0, max_tokens=100,
             )
             return (resp.choices[0].message.content or "").strip()
         except Exception:
@@ -675,7 +661,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             answer = agent(question_text, task_id=task_id)
         except Exception as e:
             answer = f"Error: {e}"
-        print(f"  → Answer: {answer[:60]}")
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
         results_log.append({
@@ -709,10 +695,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 GAIA Agent Evaluation")
     gr.Markdown(
-        "Handles: images · Python execution · Excel · audio transcription · "
-        "Wikipedia · YouTube · web search"
     )
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")

 import requests
 import pandas as pd
 import gradio as gr
+from huggingface_hub import InferenceClient
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# Free HF model — best available for tool-calling
+HF_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 # ── helpers ───────────────────────────────────────────────────────────────────
 class BasicAgent:
     def __init__(self):
+        hf_token = os.getenv("HF_TOKEN")
+        if not hf_token:
+            raise ValueError(
+                "HF_TOKEN missing. Add your Hugging Face token to Space Secrets."
+            )
+        self.client = InferenceClient(
+            model=HF_MODEL,
+            token=hf_token,
+        )
         self.api_url = DEFAULT_API_URL
+        print(f"✅ Agent initialised with model: {HF_MODEL}")
     # ── raw file fetch ────────────────────────────────────────────────────────
             pass
         return None, ""
+    # ── tool implementations ──────────────────────────────────────────────────
     def tool_check_file(self, task_id: str) -> str:
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "NO_FILE"
         ct_clean = ct.split(";")[0].strip().lower()
         return (
             f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
+            f"Use the right tool: image→analyse_image, python→run_python_file, "
             f"excel/xlsx→read_excel_file, audio→transcribe_audio, "
             f"text/pdf→read_text_file."
         )
     def tool_analyse_image(self, task_id: str, question: str) -> str:
+        """Describe/analyse image using HF vision model."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No image found."
+        ct_clean = ct.split(";")[0].strip().lower()
         if "image" not in ct_clean:
             return f"File is not an image (type={ct_clean})."
         b64 = base64.b64encode(fb).decode()
+        # Use a vision-capable model via InferenceClient
+        vision_client = InferenceClient(
+            model="Qwen/Qwen2.5-VL-72B-Instruct",
+            token=os.getenv("HF_TOKEN"),
         )
+        try:
+            result = vision_client.chat_completion(
+                messages=[{
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{ct_clean};base64,{b64}"
+                            },
+                        },
+                        {"type": "text", "text": question},
+                    ],
+                }],
+                max_tokens=800,
+            )
+            return result.choices[0].message.content or "No response."
+        except Exception as e:
+            # Fallback to text-only description attempt
+            return f"Vision error: {e}. Try describing from context."
     def tool_run_python_file(self, task_id: str) -> str:
+        """Download and execute Python file, return stdout."""
+        fb, _ = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         code = fb.decode("utf-8", errors="ignore")
         try:
+            with tempfile.NamedTemporaryFile(
+                suffix=".py", delete=False, mode="w"
+            ) as f:
                 f.write(code)
                 fname = f.name
             result = subprocess.run(
                 ["python3", fname],
+                capture_output=True, text=True, timeout=30,
             )
             out = result.stdout.strip()
             err = result.stderr.strip()
+            return f"STDOUT:\n{out}" if out else f"STDERR:\n{err}" if err else "No output."
         except Exception as e:
             return f"Execution error: {e}"
     def tool_read_excel_file(self, task_id: str, question: str) -> str:
+        """Load Excel/CSV and answer a question about it."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
             import io
             ct_clean = ct.split(";")[0].strip().lower()
+            df = (
+                pd.read_csv(io.BytesIO(fb))
+                if ("csv" in ct_clean or "text" in ct_clean)
+                else pd.read_excel(io.BytesIO(fb))
+            )
+            preview = df.to_string(max_rows=80, max_cols=20)
+            # Ask the LLM inline (no extra API call – just return data+question)
+            return (
+                f"SPREADSHEET DATA:\n{preview}\n\n"
+                f"Answer the following about this data: {question}"
             )
         except Exception as e:
             return f"Excel read error: {e}"
     def tool_transcribe_audio(self, task_id: str) -> str:
+        """Transcribe audio using HF Whisper."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
             ct_clean = ct.split(";")[0].strip().lower()
             ext_map = {
                 "audio/mpeg": ".mp3", "audio/mp3": ".mp3",
             with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
                 f.write(fb)
                 fname = f.name
+            asr_client = InferenceClient(
+                model="openai/whisper-large-v3",
+                token=os.getenv("HF_TOKEN"),
+            )
             with open(fname, "rb") as audio_f:
+                result = asr_client.automatic_speech_recognition(audio_f)
+            return result.text if hasattr(result, "text") else str(result)
         except Exception as e:
             return f"Transcription error: {e}"
     def tool_read_text_file(self, task_id: str) -> str:
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
         try:
             ct_clean = ct.split(";")[0].strip().lower()
             if "pdf" in ct_clean:
                 try:
                     import pdfminer.high_level
                     import io
+                    return pdfminer.high_level.extract_text(io.BytesIO(fb))[:6000]
                 except ImportError:
                     pass
             return fb.decode("utf-8", errors="ignore")[:6000]
             return f"Read error: {e}"
     def tool_search_web(self, query: str) -> str:
         try:
             hdrs = {
                 "User-Agent": (
                     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                    "AppleWebKit/537.36 Chrome/124.0 Safari/537.36"
                 )
             }
             r = requests.get(
             return f"Fetch error: {e}"
     def tool_fetch_wikipedia(self, title: str) -> str:
         try:
             slug = requests.utils.quote(title.replace(" ", "_"))
             r = requests.get(
                 timeout=12,
             )
             if r.status_code == 200:
+                return r.json().get("extract", "Not found.")
             r2 = requests.get(
                 "https://en.wikipedia.org/w/api.php",
                 params={
                    ("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
                 return (
                     "BLOCKED: YouTube blocks cloud IPs. "
+                    "Use search_web to find transcript or description of this video."
                 )
             return f"Transcript error: {err}"
                 "name": "check_file",
                 "description": (
                     "ALWAYS call this first. Checks if a file is attached to the task. "
+                    "Returns NO_FILE or the file type and which tool to use next."
                 ),
                 "parameters": {
                     "type": "object",
             "function": {
                 "name": "analyse_image",
                 "description": (
+                    "Analyse an image file attached to the task using a vision model. "
                     "Use for chess boards, diagrams, photos, screenshots."
                 ),
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "task_id": {"type": "string"},
+                        "question": {
+                            "type": "string",
+                            "description": "What to find or answer from the image.",
+                        },
                     },
                     "required": ["task_id", "question"],
                 },
                 "name": "run_python_file",
                 "description": (
                     "Execute the Python file attached to the task and return its output. "
+                    "The stdout IS the answer."
                 ),
                 "parameters": {
                     "type": "object",
             "type": "function",
             "function": {
                 "name": "read_excel_file",
+                "description": "Read an Excel or CSV file and answer a question about its data.",
                 "parameters": {
                     "type": "object",
                     "properties": {
             "function": {
                 "name": "transcribe_audio",
                 "description": (
+                    "Transcribe an audio file using Whisper. "
                     "Use for voice memos, recordings, audio questions."
                 ),
                 "parameters": {
             "function": {
                 "name": "youtube_transcript",
                 "description": (
+                    "Fetch YouTube video transcript. "
+                    "If cloud-blocked, use search_web instead."
                 ),
                 "parameters": {
                     "type": "object",
             "type": "function",
             "function": {
                 "name": "search_web",
+                "description": "Search the web via DuckDuckGo. Returns top result snippets.",
                 "parameters": {
                     "type": "object",
                     "properties": {"query": {"type": "string"}},
             "type": "function",
             "function": {
                 "name": "fetch_webpage",
+                "description": "Fetch and read the full text of any URL.",
                 "parameters": {
                     "type": "object",
                     "properties": {"url": {"type": "string"}},
             "function": {
                 "name": "fetch_wikipedia",
                 "description": (
+                    "Fetch a Wikipedia article by exact title via REST API. "
+                    "Always prefer this over fetch_webpage for Wikipedia."
                 ),
                 "parameters": {
                     "type": "object",
     SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
+MANDATORY WORKFLOW:
+STEP 1 — Call check_file(task_id) first for every task.
+  • NO_FILE → go to STEP 2.
+  • image file → call analyse_image(task_id, question).
+  • python file → call run_python_file(task_id). Its output IS the answer.
+  • excel/csv file → call read_excel_file(task_id, question).
+  • audio file → call transcribe_audio(task_id), then answer from transcript.
+  • text/pdf file → call read_text_file(task_id), then answer from content.
+  NEVER return "NO_FILE" or tool status strings as your final answer.
+STEP 2 — Gather information.
+  • YouTube URL → call youtube_transcript(url). If BLOCKED → search_web.
+  • Wikipedia question → fetch_wikipedia("Exact Article Title").
+    Discography → count ONLY solo studio albums (not collaborations/live/EP).
+  • LibreTexts 1.E → fetch_webpage:
     https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
+  • Sports stats → search_web then fetch_webpage for exact numbers.
+  • Any other question → search_web, then fetch_webpage for details.
+STEP 3 — Try at least 2-3 different search queries before concluding.
+  Never say "I was unable to find." Always use tools to find the answer.
+STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
+  Numbers: just digits. Names: just the name. Lists: comma-separated."""
     # ── main call ─────────────────────────────────────────────────────────────
     def __call__(self, question: str, task_id: str = "") -> str:
         print(f"▶ Task {task_id[:8]}: {question[:80]}")
         messages = [
             {"role": "system", "content": self.SYSTEM},
+            {
+                "role": "user",
+                "content": f"task_id: {task_id}\n\nTask: {question}",
+            },
         ]
+        bad_phrases = (
+            "no_file", "file_exists", "i was unable", "i couldn't",
+            "i can't access", "please provide", "you might want",
+            "i'm unable", "i cannot", "i am unable",
+        )
         for _round in range(10):
             try:
+                resp = self.client.chat_completion(
                     messages=messages,
                     tools=self.TOOLS,
                     tool_choice="auto",
                     max_tokens=1500,
+                    temperature=0.1,
                 )
             except Exception as e:
+                print(f"  HF API error: {e}")
+                # Retry without tools if tool_choice unsupported
+                try:
+                    resp = self.client.chat_completion(
+                        messages=messages,
+                        max_tokens=500,
+                        temperature=0.1,
+                    )
+                    return (resp.choices[0].message.content or "").strip()
+                except Exception as e2:
+                    print(f"  Fallback error: {e2}")
+                    return "Error."
             msg = resp.choices[0].message
+            tool_calls = getattr(msg, "tool_calls", None)
+            # No tool calls → final answer
+            if not tool_calls:
                 answer = (msg.content or "").strip()
+                if any(b in answer.lower() for b in bad_phrases):
+                    messages.append({"role": "assistant", "content": answer})
                     messages.append({
                         "role": "user",
                         "content": (
+                            "That is not acceptable. Use your tools to find the "
+                            "real answer. Return ONLY the final value."
                         ),
                     })
                     continue
                 return answer
+            # Append assistant message with tool calls
             messages.append({
                 "role": "assistant",
+                "content": msg.content or "",
                 "tool_calls": [
                     {
                         "id": tc.id,
                         "type": "function",
                         "function": {
                             "name": tc.function.name,
+                            "arguments": tc.function.arguments
+                            if isinstance(tc.function.arguments, str)
+                            else json.dumps(tc.function.arguments),
                         },
                     }
+                    for tc in tool_calls
                 ],
             })
             # Execute tools
+            for tc in tool_calls:
                 fn = tc.function.name
                 try:
+                    raw_args = tc.function.arguments
+                    args = (
+                        json.loads(raw_args)
+                        if isinstance(raw_args, str)
+                        else raw_args
+                    )
                 except Exception:
                     args = {}
                 result = self._dispatch(fn, args, task_id, question)
+                print(f"   {fn} → {str(result)[:80]}")
                 messages.append({
                     "role": "tool",
                     "tool_call_id": tc.id,
                     "content": result or "Empty result.",
                 })
+        # Force final answer after max rounds
         try:
             messages.append({
                 "role": "user",
+                "content": "Final answer only — just the value, no explanation.",
             })
+            resp = self.client.chat_completion(
+                messages=messages, max_tokens=100, temperature=0.1,
             )
             return (resp.choices[0].message.content or "").strip()
         except Exception:
             answer = agent(question_text, task_id=task_id)
         except Exception as e:
             answer = f"Error: {e}"
+        print(f"  → {answer[:60]}")
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
         results_log.append({
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 GAIA Agent — Free HuggingFace Models")
     gr.Markdown(
+        f"**LLM:** `{HF_MODEL}` (free via HF Inference API)  \n"
+        "**Vision:** `Qwen/Qwen2.5-VL-72B-Instruct`  \n"
+        "**ASR:** `openai/whisper-large-v3`"
     )
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")