Final_Assignment_Template

Sleeping

App Files Files Community

bhotta commited on 11 days ago

Commit

d91971a

verified ·

1 Parent(s): 9c7a095

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -233

app.py CHANGED Viewed

@@ -8,25 +8,11 @@ import requests
 import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# Free HF model — best available for tool-calling
-HF_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 # ── helpers ───────────────────────────────────────────────────────────────────
-def _get_hf_token():
-    """
-    HF Spaces automatically injects the token under several variable names.
-    We try all of them. No manual secret needed.
-    """
-    for var in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN"):
-        token = os.getenv(var, "").strip()
-        if token:
-            return token
-    return None
 def _strip_html(html: str) -> str:
     from html.parser import HTMLParser
@@ -58,14 +44,26 @@ def _strip_html(html: str) -> str:
 class BasicAgent:
     def __init__(self):
-        hf_token = _get_hf_token()
-        self.hf_token = hf_token
-        self.client = InferenceClient(
-            model=HF_MODEL,
-            token=hf_token,
         )
         self.api_url = DEFAULT_API_URL
-        print(f"✅ Agent initialised with model: {HF_MODEL}")
     # ── raw file fetch ────────────────────────────────────────────────────────
@@ -94,7 +92,7 @@ class BasicAgent:
         )
     def tool_analyse_image(self, task_id: str, question: str) -> str:
-        """Describe/analyse image using HF vision model."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No image found."
@@ -103,31 +101,38 @@ class BasicAgent:
             return f"File is not an image (type={ct_clean})."
         b64 = base64.b64encode(fb).decode()
-        # Use a vision-capable model via InferenceClient
-        vision_client = InferenceClient(
-            model="Qwen/Qwen2.5-VL-72B-Instruct",
-            token=self.hf_token,
-        )
         try:
-            result = vision_client.chat_completion(
                 messages=[{
                     "role": "user",
                     "content": [
                         {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:{ct_clean};base64,{b64}"
                             },
                         },
                         {"type": "text", "text": question},
                     ],
                 }],
-                max_tokens=800,
             )
-            return result.choices[0].message.content or "No response."
         except Exception as e:
-            # Fallback to text-only description attempt
-            return f"Vision error: {e}. Try describing from context."
     def tool_run_python_file(self, task_id: str) -> str:
         """Download and execute Python file, return stdout."""
@@ -165,7 +170,6 @@ class BasicAgent:
                 else pd.read_excel(io.BytesIO(fb))
             )
             preview = df.to_string(max_rows=80, max_cols=20)
-            # Ask the LLM inline (no extra API call – just return data+question)
             return (
                 f"SPREADSHEET DATA:\n{preview}\n\n"
                 f"Answer the following about this data: {question}"
@@ -174,7 +178,7 @@ class BasicAgent:
             return f"Excel read error: {e}"
     def tool_transcribe_audio(self, task_id: str) -> str:
-        """Transcribe audio using HF Whisper."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
@@ -191,13 +195,16 @@ class BasicAgent:
                 f.write(fb)
                 fname = f.name
-            asr_client = InferenceClient(
-                model="openai/whisper-large-v3",
-                token=self.hf_token,
-            )
-            with open(fname, "rb") as audio_f:
-                result = asr_client.automatic_speech_recognition(audio_f)
-            return result.text if hasattr(result, "text") else str(result)
         except Exception as e:
             return f"Transcription error: {e}"
@@ -315,154 +322,124 @@ class BasicAgent:
                 )
             return f"Transcript error: {err}"
-    # ── tool dispatch ─────────────────────────────────────────────────────────
     TOOLS = [
         {
-            "type": "function",
-            "function": {
-                "name": "check_file",
-                "description": (
-                    "ALWAYS call this first. Checks if a file is attached to the task. "
-                    "Returns NO_FILE or the file type and which tool to use next."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {"task_id": {"type": "string"}},
-                    "required": ["task_id"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "analyse_image",
-                "description": (
-                    "Analyse an image file attached to the task using a vision model. "
-                    "Use for chess boards, diagrams, photos, screenshots."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "task_id": {"type": "string"},
-                        "question": {
-                            "type": "string",
-                            "description": "What to find or answer from the image.",
-                        },
                     },
-                    "required": ["task_id", "question"],
                 },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "run_python_file",
-                "description": (
-                    "Execute the Python file attached to the task and return its output. "
-                    "The stdout IS the answer."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {"task_id": {"type": "string"}},
-                    "required": ["task_id"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "read_excel_file",
-                "description": "Read an Excel or CSV file and answer a question about its data.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "task_id": {"type": "string"},
-                        "question": {"type": "string"},
-                    },
-                    "required": ["task_id", "question"],
                 },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "transcribe_audio",
-                "description": (
-                    "Transcribe an audio file using Whisper. "
-                    "Use for voice memos, recordings, audio questions."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {"task_id": {"type": "string"}},
-                    "required": ["task_id"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "read_text_file",
-                "description": "Read a text or PDF file attached to the task.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"task_id": {"type": "string"}},
-                    "required": ["task_id"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "youtube_transcript",
-                "description": (
-                    "Fetch YouTube video transcript. "
-                    "If cloud-blocked, use search_web instead."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {"video_url": {"type": "string"}},
-                    "required": ["video_url"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "search_web",
-                "description": "Search the web via DuckDuckGo. Returns top result snippets.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"query": {"type": "string"}},
-                    "required": ["query"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "fetch_webpage",
-                "description": "Fetch and read the full text of any URL.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"url": {"type": "string"}},
-                    "required": ["url"],
-                },
             },
         },
         {
-            "type": "function",
-            "function": {
-                "name": "fetch_wikipedia",
-                "description": (
-                    "Fetch a Wikipedia article by exact title via REST API. "
-                    "Always prefer this over fetch_webpage for Wikipedia."
-                ),
-                "parameters": {
-                    "type": "object",
-                    "properties": {"title": {"type": "string"}},
-                    "required": ["title"],
-                },
             },
         },
     ]
@@ -495,9 +472,7 @@ class BasicAgent:
     # ── system prompt ─────────────────────────────────────────────────────────
     SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
 MANDATORY WORKFLOW:
 STEP 1 — Call check_file(task_id) first for every task.
   • NO_FILE → go to STEP 2.
   • image file → call analyse_image(task_id, question).
@@ -506,7 +481,6 @@ STEP 1 — Call check_file(task_id) first for every task.
   • audio file → call transcribe_audio(task_id), then answer from transcript.
   • text/pdf file → call read_text_file(task_id), then answer from content.
   NEVER return "NO_FILE" or tool status strings as your final answer.
 STEP 2 — Gather information.
   • YouTube URL → call youtube_transcript(url). If BLOCKED → search_web.
   • Wikipedia question → fetch_wikipedia("Exact Article Title").
@@ -515,10 +489,8 @@ STEP 2 — Gather information.
     https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
   • Sports stats → search_web then fetch_webpage for exact numbers.
   • Any other question → search_web, then fetch_webpage for details.
 STEP 3 — Try at least 2-3 different search queries before concluding.
   Never say "I was unable to find." Always use tools to find the answer.
 STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
   Numbers: just digits. Names: just the name. Lists: comma-separated."""
@@ -528,7 +500,6 @@ STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
         print(f"▶ Task {task_id[:8]}: {question[:80]}")
         messages = [
-            {"role": "system", "content": self.SYSTEM},
             {
                 "role": "user",
                 "content": f"task_id: {task_id}\n\nTask: {question}",
@@ -543,35 +514,31 @@ STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
         for _round in range(10):
             try:
-                resp = self.client.chat_completion(
-                    messages=messages,
-                    tools=self.TOOLS,
-                    tool_choice="auto",
                     max_tokens=1500,
-                    temperature=0.1,
                 )
             except Exception as e:
-                print(f"  HF API error: {e}")
-                # Retry without tools if tool_choice unsupported
-                try:
-                    resp = self.client.chat_completion(
-                        messages=messages,
-                        max_tokens=500,
-                        temperature=0.1,
-                    )
-                    return (resp.choices[0].message.content or "").strip()
-                except Exception as e2:
-                    print(f"  Fallback error: {e2}")
-                    return "Error."
-            msg = resp.choices[0].message
-            tool_calls = getattr(msg, "tool_calls", None)
-            # No tool calls → final answer
-            if not tool_calls:
-                answer = (msg.content or "").strip()
                 if any(b in answer.lower() for b in bad_phrases):
-                    messages.append({"role": "assistant", "content": answer})
                     messages.append({
                         "role": "user",
                         "content": (
@@ -582,57 +549,35 @@ STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
                     continue
                 return answer
-            # Append assistant message with tool calls
-            messages.append({
-                "role": "assistant",
-                "content": msg.content or "",
-                "tool_calls": [
-                    {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {
-                            "name": tc.function.name,
-                            "arguments": tc.function.arguments
-                            if isinstance(tc.function.arguments, str)
-                            else json.dumps(tc.function.arguments),
-                        },
-                    }
-                    for tc in tool_calls
-                ],
-            })
-            # Execute tools
-            for tc in tool_calls:
-                fn = tc.function.name
-                try:
-                    raw_args = tc.function.arguments
-                    args = (
-                        json.loads(raw_args)
-                        if isinstance(raw_args, str)
-                        else raw_args
-                    )
-                except Exception:
-                    args = {}
                 result = self._dispatch(fn, args, task_id, question)
                 print(f"   {fn} → {str(result)[:80]}")
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tc.id,
                     "content": result or "Empty result.",
                 })
         # Force final answer after max rounds
         try:
             messages.append({
                 "role": "user",
                 "content": "Final answer only — just the value, no explanation.",
             })
-            resp = self.client.chat_completion(
-                messages=messages, max_tokens=100, temperature=0.1,
             )
-            return (resp.choices[0].message.content or "").strip()
         except Exception:
             return "Error."
@@ -702,11 +647,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 GAIA Agent — Free HuggingFace Models")
     gr.Markdown(
-        f"**LLM:** `{HF_MODEL}` (free via HF Inference API)  \n"
-        "**Vision:** `Qwen/Qwen2.5-VL-72B-Instruct`  \n"
-        "**ASR:** `openai/whisper-large-v3`"
     )
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")

 import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
+import anthropic
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ── helpers ───────────────────────────────────────────────────────────────────
 def _strip_html(html: str) -> str:
     from html.parser import HTMLParser
 class BasicAgent:
     def __init__(self):
+        # Use Anthropic API — no HF credits needed
+        self.anthropic_client = anthropic.Anthropic(
+            api_key=os.environ.get("ANTHROPIC_API_KEY", "")
         )
+        self.model = "claude-sonnet-4-20250514"
+        # Keep HF client only for Whisper ASR (free, no Inference Provider needed)
+        hf_token = self._get_hf_token()
+        self.hf_token = hf_token
+        self.hf_client = InferenceClient(token=hf_token) if hf_token else None
         self.api_url = DEFAULT_API_URL
+        print(f"✅ Agent initialised with model: {self.model}")
+    def _get_hf_token(self):
+        for var in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN"):
+            token = os.getenv(var, "").strip()
+            if token:
+                return token
+        return None
     # ── raw file fetch ────────────────────────────────────────────────────────
         )
     def tool_analyse_image(self, task_id: str, question: str) -> str:
+        """Analyse image using Claude's vision."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No image found."
             return f"File is not an image (type={ct_clean})."
         b64 = base64.b64encode(fb).decode()
+        # Map content type to Anthropic media type
+        media_map = {
+            "image/jpeg": "image/jpeg",
+            "image/jpg": "image/jpeg",
+            "image/png": "image/png",
+            "image/gif": "image/gif",
+            "image/webp": "image/webp",
+        }
+        media_type = media_map.get(ct_clean, "image/jpeg")
         try:
+            response = self.anthropic_client.messages.create(
+                model=self.model,
+                max_tokens=800,
                 messages=[{
                     "role": "user",
                     "content": [
                         {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": media_type,
+                                "data": b64,
                             },
                         },
                         {"type": "text", "text": question},
                     ],
                 }],
             )
+            return response.content[0].text
         except Exception as e:
+            return f"Vision error: {e}"
     def tool_run_python_file(self, task_id: str) -> str:
         """Download and execute Python file, return stdout."""
                 else pd.read_excel(io.BytesIO(fb))
             )
             preview = df.to_string(max_rows=80, max_cols=20)
             return (
                 f"SPREADSHEET DATA:\n{preview}\n\n"
                 f"Answer the following about this data: {question}"
             return f"Excel read error: {e}"
     def tool_transcribe_audio(self, task_id: str) -> str:
+        """Transcribe audio using HF Whisper (free ASR endpoint)."""
         fb, ct = self._fetch_file(task_id)
         if not fb:
             return "No file found."
                 f.write(fb)
                 fname = f.name
+            if self.hf_client:
+                asr_client = InferenceClient(
+                    model="openai/whisper-large-v3",
+                    token=self.hf_token,
+                )
+                with open(fname, "rb") as audio_f:
+                    result = asr_client.automatic_speech_recognition(audio_f)
+                return result.text if hasattr(result, "text") else str(result)
+            else:
+                return "No HF token available for audio transcription."
         except Exception as e:
             return f"Transcription error: {e}"
                 )
             return f"Transcript error: {err}"
+    # ── Anthropic tool definitions ────────────────────────────────────────────
     TOOLS = [
         {
+            "name": "check_file",
+            "description": (
+                "ALWAYS call this first. Checks if a file is attached to the task. "
+                "Returns NO_FILE or the file type and which tool to use next."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"task_id": {"type": "string"}},
+                "required": ["task_id"],
             },
         },
         {
+            "name": "analyse_image",
+            "description": (
+                "Analyse an image file attached to the task using vision. "
+                "Use for chess boards, diagrams, photos, screenshots."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "task_id": {"type": "string"},
+                    "question": {
+                        "type": "string",
+                        "description": "What to find or answer from the image.",
                     },
                 },
+                "required": ["task_id", "question"],
             },
         },
         {
+            "name": "run_python_file",
+            "description": (
+                "Execute the Python file attached to the task and return its output. "
+                "The stdout IS the answer."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"task_id": {"type": "string"}},
+                "required": ["task_id"],
             },
         },
         {
+            "name": "read_excel_file",
+            "description": "Read an Excel or CSV file and answer a question about its data.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "task_id": {"type": "string"},
+                    "question": {"type": "string"},
                 },
+                "required": ["task_id", "question"],
             },
         },
         {
+            "name": "transcribe_audio",
+            "description": (
+                "Transcribe an audio file using Whisper. "
+                "Use for voice memos, recordings, audio questions."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"task_id": {"type": "string"}},
+                "required": ["task_id"],
             },
         },
         {
+            "name": "read_text_file",
+            "description": "Read a text or PDF file attached to the task.",
+            "input_schema": {
+                "type": "object",
+                "properties": {"task_id": {"type": "string"}},
+                "required": ["task_id"],
             },
         },
         {
+            "name": "youtube_transcript",
+            "description": (
+                "Fetch YouTube video transcript. "
+                "If cloud-blocked, use search_web instead."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"video_url": {"type": "string"}},
+                "required": ["video_url"],
             },
         },
         {
+            "name": "search_web",
+            "description": "Search the web via DuckDuckGo. Returns top result snippets.",
+            "input_schema": {
+                "type": "object",
+                "properties": {"query": {"type": "string"}},
+                "required": ["query"],
             },
         },
         {
+            "name": "fetch_webpage",
+            "description": "Fetch and read the full text of any URL.",
+            "input_schema": {
+                "type": "object",
+                "properties": {"url": {"type": "string"}},
+                "required": ["url"],
             },
         },
         {
+            "name": "fetch_wikipedia",
+            "description": (
+                "Fetch a Wikipedia article by exact title via REST API. "
+                "Always prefer this over fetch_webpage for Wikipedia."
+            ),
+            "input_schema": {
+                "type": "object",
+                "properties": {"title": {"type": "string"}},
+                "required": ["title"],
             },
         },
     ]
     # ── system prompt ─────────────────────────────────────────────────────────
     SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
 MANDATORY WORKFLOW:
 STEP 1 — Call check_file(task_id) first for every task.
   • NO_FILE → go to STEP 2.
   • image file → call analyse_image(task_id, question).
   • audio file → call transcribe_audio(task_id), then answer from transcript.
   • text/pdf file → call read_text_file(task_id), then answer from content.
   NEVER return "NO_FILE" or tool status strings as your final answer.
 STEP 2 — Gather information.
   • YouTube URL → call youtube_transcript(url). If BLOCKED → search_web.
   • Wikipedia question → fetch_wikipedia("Exact Article Title").
     https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
   • Sports stats → search_web then fetch_webpage for exact numbers.
   • Any other question → search_web, then fetch_webpage for details.
 STEP 3 — Try at least 2-3 different search queries before concluding.
   Never say "I was unable to find." Always use tools to find the answer.
 STEP 4 — Final answer: ONLY the value. No explanation. No preamble.
   Numbers: just digits. Names: just the name. Lists: comma-separated."""
         print(f"▶ Task {task_id[:8]}: {question[:80]}")
         messages = [
             {
                 "role": "user",
                 "content": f"task_id: {task_id}\n\nTask: {question}",
         for _round in range(10):
             try:
+                resp = self.anthropic_client.messages.create(
+                    model=self.model,
                     max_tokens=1500,
+                    system=self.SYSTEM,
+                    tools=self.TOOLS,
+                    messages=messages,
                 )
             except Exception as e:
+                print(f"  Anthropic API error: {e}")
+                return "Error."
+            # Check stop reason
+            stop_reason = resp.stop_reason
+            # Collect text and tool use blocks
+            tool_uses = [b for b in resp.content if b.type == "tool_use"]
+            text_blocks = [b for b in resp.content if b.type == "text"]
+            # Append assistant message
+            messages.append({"role": "assistant", "content": resp.content})
+            if stop_reason == "end_turn" or not tool_uses:
+                # Final answer
+                answer = text_blocks[0].text.strip() if text_blocks else ""
                 if any(b in answer.lower() for b in bad_phrases):
                     messages.append({
                         "role": "user",
                         "content": (
                     continue
                 return answer
+            # Execute tool calls and collect results
+            tool_results = []
+            for tb in tool_uses:
+                fn = tb.name
+                args = tb.input if isinstance(tb.input, dict) else {}
                 result = self._dispatch(fn, args, task_id, question)
                 print(f"   {fn} → {str(result)[:80]}")
+                tool_results.append({
+                    "type": "tool_result",
+                    "tool_use_id": tb.id,
                     "content": result or "Empty result.",
                 })
+            messages.append({"role": "user", "content": tool_results})
         # Force final answer after max rounds
         try:
             messages.append({
                 "role": "user",
                 "content": "Final answer only — just the value, no explanation.",
             })
+            resp = self.anthropic_client.messages.create(
+                model=self.model,
+                max_tokens=100,
+                system=self.SYSTEM,
+                messages=messages,
             )
+            text_blocks = [b for b in resp.content if b.type == "text"]
+            return text_blocks[0].text.strip() if text_blocks else "Error."
         except Exception:
             return "Error."
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 GAIA Agent — Claude Sonnet")
     gr.Markdown(
+        f"**LLM:** `claude-sonnet-4-20250514` (Anthropic API)  \n"
+        "**Vision:** Claude native vision  \n"
+        "**ASR:** `openai/whisper-large-v3` (HF)"
     )
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")