Final_Assignment_Template

Running

App Files Files Community

Mouhamedamar commited on 7 days ago

Commit

0b1a31b

verified ·

1 Parent(s): 3339f5a

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -105

app.py CHANGED Viewed

@@ -7,57 +7,52 @@ import gradio as gr
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# ──────────────────────────────────────────────
-# smolagents — InferenceClientModel est dans smolagents directement
-# ──────────────────────────────────────────────
 from smolagents import (
-    tool,
     CodeAgent,
     InferenceClientModel,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
 )
-# ──────────────────────────────────────────────
-# Tools
-# ──────────────────────────────────────────────
 @tool
 def wikipedia_search(query: str) -> str:
     """Search Wikipedia and return the intro of the top article.
     Args:
-        query: search terms
     """
     try:
-        url = "https://en.wikipedia.org/w/api.php"
-        r = requests.get(url, params={
             "action": "query", "list": "search",
             "srsearch": query, "format": "json", "srlimit": 1,
         }, timeout=15).json()
-        title = r["query"]["search"][0]["title"]
-        ex = requests.get(url, params={
             "action": "query", "prop": "extracts",
             "exintro": True, "explaintext": True,
             "titles": title, "format": "json",
         }, timeout=15).json()
-        pages = ex["query"]["pages"]
-        extract = next(iter(pages.values())).get("extract", "")[:4000]
-        return f"# {title}\n{extract}"
     except Exception as e:
         return f"Wikipedia error: {e}"
 @tool
 def download_file_for_task(task_id: str) -> str:
-    """Download and read a file attached to a GAIA task. Returns text content.
     Args:
-        task_id: the GAIA task UUID
     """
     try:
-        url = f"{DEFAULT_API_URL}/files/{task_id}"
-        r = requests.get(url, timeout=30)
         if r.status_code != 200:
-            return "No file found for this task."
         data = r.content
         ct = r.headers.get("content-type", "")
@@ -66,20 +61,20 @@ def download_file_for_task(task_id: str) -> str:
             try:
                 import io
                 from pypdf import PdfReader
-                reader = PdfReader(io.BytesIO(data))
-                text = "\n".join(p.extract_text() or "" for p in reader.pages)
                 return text[:6000]
             except Exception as e:
                 return f"PDF error: {e}"
-        # Audio
         if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
-            hf_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
-            headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN','')}"}
             for _ in range(3):
-                resp = requests.post(hf_url, headers=headers, data=data, timeout=120)
                 if resp.status_code == 503:
-                    time.sleep(20); continue
                 if resp.status_code == 200:
                     return resp.json().get("text", "")
             return "Audio transcription failed."
@@ -88,66 +83,57 @@ def download_file_for_task(task_id: str) -> str:
         if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
             try:
                 import io
-                df = pd.read_excel(io.BytesIO(data))
-                return df.to_string(index=False)[:4000]
             except Exception:
                 try:
                     import io
-                    df = pd.read_csv(io.BytesIO(data))
-                    return df.to_string(index=False)[:4000]
                 except Exception as e:
                     return f"Spreadsheet error: {e}"
-        # Image → vision model
         if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
             import base64
-            b64 = base64.b64encode(data).decode()
             mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
-            hf_url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
-            headers = {
-                "Authorization": f"Bearer {os.environ.get('HF_TOKEN','')}",
-                "Content-Type": "application/json",
-            }
             payload = {
                 "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
                 "messages": [{"role": "user", "content": [
                     {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
-                    {"type": "text", "text": "Describe this image in full detail. If chess: name every piece and square. If text/numbers: transcribe exactly."},
                 ]}],
                 "max_tokens": 1024,
             }
             for _ in range(3):
-                resp = requests.post(hf_url, headers=headers, json=payload, timeout=120)
                 if resp.status_code == 503:
-                    time.sleep(20); continue
                 if resp.status_code == 200:
                     return resp.json()["choices"][0]["message"]["content"]
             return "Image analysis failed."
         # Text / code fallback
-        try:
-            return data.decode("utf-8", errors="replace")[:4000]
-        except Exception:
-            return "Binary file, cannot read."
     except Exception as e:
         return f"File download error: {e}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
-    """Get the transcript/captions from a YouTube video.
     Args:
-        video_url: full YouTube URL like https://www.youtube.com/watch?v=XXXXX
     """
     try:
         from youtube_transcript_api import YouTubeTranscriptApi
-        vid_match = re.search(r"v=([A-Za-z0-9_-]{11})", video_url)
-        if not vid_match:
-            vid_match = re.search(r"youtu\.be/([A-Za-z0-9_-]{11})", video_url)
-        if not vid_match:
-            return "Could not extract video ID."
-        vid_id = vid_match.group(1)
-        transcript = YouTubeTranscriptApi.get_transcript(vid_id, languages=["en", "en-US", "en-GB"])
         return " ".join(t["text"] for t in transcript)[:5000]
     except Exception as e:
         return f"Transcript error: {e}"
@@ -155,35 +141,28 @@ def get_youtube_transcript(video_url: str) -> str:
 @tool
 def run_python_code(code: str) -> str:
-    """Execute Python code and return the output. Useful for math, logic, data processing.
     Args:
-        code: Python code to execute
     """
     import subprocess, sys
     try:
-        result = subprocess.run(
-            [sys.executable, "-c", code],
-            capture_output=True, text=True, timeout=30
-        )
-        out = result.stdout.strip()
-        err = result.stderr.strip()
-        return out if out else (f"Error: {err}" if err else "(no output)")
     except subprocess.TimeoutExpired:
-        return "Timed out."
     except Exception as e:
-        return f"Error: {e}"
-# ──────────────────────────────────────────────
-# Agent
-# ──────────────────────────────────────────────
 class GAIAAgent:
     def __init__(self):
         model = InferenceClientModel(
             model_id="meta-llama/Llama-3.3-70B-Instruct",
             token=os.environ.get("HF_TOKEN", ""),
-            timeout=120,
         )
         self.agent = CodeAgent(
             tools=[
@@ -198,52 +177,59 @@ class GAIAAgent:
             max_steps=10,
             verbosity_level=1,
             additional_authorized_imports=[
-                "re", "json", "math", "datetime",
-                "collections", "itertools", "unicodedata",
             ],
         )
         print("GAIAAgent ready ✅")
     def __call__(self, question: str, task_id: str = "") -> str:
-        print(f"\n{'='*55}\nQ: {question[:120]}")
-        full_question = question
-        if task_id:
-            full_question = (
-                f"{question}\n\n"
-                f"[If this question needs a file, call download_file_for_task with task_id='{task_id}']"
-            )
         prompt = (
-            "You are solving a GAIA benchmark question. Rules:\n"
-            "- Think step by step and use tools when needed.\n"
-            "- For YouTube links: call get_youtube_transcript.\n"
-            "- For attached files (image/pdf/audio/excel): call download_file_for_task.\n"
-            "- For math/logic: call run_python_code.\n"
-            "- For facts: call wikipedia_search or DuckDuckGoSearchTool.\n"
-            "- Give ONLY the final answer, nothing else. Exact match required.\n"
-            "- For lists: comma-separated. For numbers: digits only.\n\n"
-            f"Question: {full_question}"
         )
         try:
             result = self.agent.run(prompt)
             answer = str(result).strip()
-            # Strip common LLM prefixes
-            for prefix in ["The answer is", "Answer:", "ANSWER:", "Final answer:", "Result:"]:
-                if answer.lower().startswith(prefix.lower()):
                     answer = answer[len(prefix):].strip().lstrip(":").strip()
-            print(f"→ {answer}")
             return answer
         except Exception as e:
             print(f"Agent error: {e}")
             return "Unable to determine answer."
-# ──────────────────────────────────────────────
-# Gradio UI
-# ──────────────────────────────────────────────
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please login to Hugging Face first.", None
     username = profile.username
     space_id = os.getenv("SPACE_ID", "")
     api_url = DEFAULT_API_URL
@@ -255,9 +241,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         return f"Error initializing agent: {e}", None
     try:
-        r = requests.get(f"{api_url}/questions", timeout=15)
-        r.raise_for_status()
-        questions = r.json()
         print(f"Fetched {len(questions)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
@@ -272,11 +256,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             continue
         answer = agent(question_text, task_id=task_id)
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
-        results_log.append({
-            "Task ID": task_id,
-            "Question": question_text[:80],
-            "Submitted Answer": answer,
-        })
         time.sleep(1)
     if not answers_payload:
@@ -306,9 +286,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 GAIA Agent — smolagents + HF Inference")
     gr.Markdown("""
-**Models:** Llama-3.3-70B (reasoning) · Llama-3.2-11B-Vision (images) · Whisper large-v3 (audio)
-**Tools:** DuckDuckGo · Wikipedia · Visit webpage · YouTube transcript · Python execution · File reader
-**Requires:** `HF_TOKEN` secret in your Space settings.
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")

 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ── Imports exacts depuis la doc officielle smolagents ──────────────────
 from smolagents import (
     CodeAgent,
     InferenceClientModel,
     DuckDuckGoSearchTool,
     VisitWebpageTool,
+    tool,
 )
+# ── Tools custom ────────────────────────────────────────────────────────
 @tool
 def wikipedia_search(query: str) -> str:
     """Search Wikipedia and return the intro of the top article.
     Args:
+        query: The search terms to look up on Wikipedia.
     """
     try:
+        base = "https://en.wikipedia.org/w/api.php"
+        search = requests.get(base, params={
             "action": "query", "list": "search",
             "srsearch": query, "format": "json", "srlimit": 1,
         }, timeout=15).json()
+        title = search["query"]["search"][0]["title"]
+        extract = requests.get(base, params={
             "action": "query", "prop": "extracts",
             "exintro": True, "explaintext": True,
             "titles": title, "format": "json",
         }, timeout=15).json()
+        pages = extract["query"]["pages"]
+        text = next(iter(pages.values())).get("extract", "")[:4000]
+        return f"# {title}\n{text}"
     except Exception as e:
         return f"Wikipedia error: {e}"
 @tool
 def download_file_for_task(task_id: str) -> str:
+    """Download and read any file attached to a GAIA task (PDF, Excel, audio, image, code).
     Args:
+        task_id: The GAIA task UUID string.
     """
     try:
+        r = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=30)
         if r.status_code != 200:
+            return "No file attached to this task."
         data = r.content
         ct = r.headers.get("content-type", "")
             try:
                 import io
                 from pypdf import PdfReader
+                text = "\n".join(p.extract_text() or "" for p in PdfReader(io.BytesIO(data)).pages)
                 return text[:6000]
             except Exception as e:
                 return f"PDF error: {e}"
+        # Audio → Whisper
         if any(x in ct for x in ["audio", "mpeg", "mp3", "wav"]) or data[:3] == b"ID3":
+            token = os.environ.get("HF_TOKEN", "")
+            url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
             for _ in range(3):
+                resp = requests.post(url, headers={"Authorization": f"Bearer {token}"}, data=data, timeout=120)
                 if resp.status_code == 503:
+                    time.sleep(20)
+                    continue
                 if resp.status_code == 200:
                     return resp.json().get("text", "")
             return "Audio transcription failed."
         if any(x in ct for x in ["spreadsheet", "excel", "csv"]) or data[:2] == b"PK":
             try:
                 import io
+                return pd.read_excel(io.BytesIO(data)).to_string(index=False)[:4000]
             except Exception:
                 try:
                     import io
+                    return pd.read_csv(io.BytesIO(data)).to_string(index=False)[:4000]
                 except Exception as e:
                     return f"Spreadsheet error: {e}"
+        # Image → Llama Vision
         if any(x in ct for x in ["image", "png", "jpg", "jpeg"]):
             import base64
             mime = "image/png" if data[:4] == b"\x89PNG" else "image/jpeg"
+            b64 = base64.b64encode(data).decode()
+            token = os.environ.get("HF_TOKEN", "")
+            url = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions"
             payload = {
                 "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
                 "messages": [{"role": "user", "content": [
                     {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
+                    {"type": "text", "text": "Describe everything in this image in detail. If it's a chess board, name every piece and its exact square. If there is text or numbers, transcribe them exactly."},
                 ]}],
                 "max_tokens": 1024,
             }
             for _ in range(3):
+                resp = requests.post(url, headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json=payload, timeout=120)
                 if resp.status_code == 503:
+                    time.sleep(20)
+                    continue
                 if resp.status_code == 200:
                     return resp.json()["choices"][0]["message"]["content"]
             return "Image analysis failed."
         # Text / code fallback
+        return data.decode("utf-8", errors="replace")[:4000]
     except Exception as e:
         return f"File download error: {e}"
 @tool
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetch the transcript/captions from a YouTube video URL.
     Args:
+        video_url: The full YouTube URL, e.g. https://www.youtube.com/watch?v=XXXXX
     """
     try:
         from youtube_transcript_api import YouTubeTranscriptApi
+        m = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url)
+        if not m:
+            return "Could not extract video ID from URL."
+        transcript = YouTubeTranscriptApi.get_transcript(m.group(1), languages=["en", "en-US", "en-GB"])
         return " ".join(t["text"] for t in transcript)[:5000]
     except Exception as e:
         return f"Transcript error: {e}"
 @tool
 def run_python_code(code: str) -> str:
+    """Execute Python code and return stdout. Use for math, logic, string manipulation, data processing.
     Args:
+        code: Valid Python code to execute.
     """
     import subprocess, sys
     try:
+        r = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True, timeout=30)
+        return r.stdout.strip() or (f"stderr: {r.stderr.strip()}" if r.stderr.strip() else "(no output)")
     except subprocess.TimeoutExpired:
+        return "Execution timed out."
     except Exception as e:
+        return f"Execution error: {e}"
+# ── Agent ───────────────────────────────────────────────────────────────
 class GAIAAgent:
     def __init__(self):
+        # Exactement comme dans la doc officielle smolagents
         model = InferenceClientModel(
             model_id="meta-llama/Llama-3.3-70B-Instruct",
             token=os.environ.get("HF_TOKEN", ""),
         )
         self.agent = CodeAgent(
             tools=[
             max_steps=10,
             verbosity_level=1,
             additional_authorized_imports=[
+                "re", "json", "math", "unicodedata",
+                "datetime", "collections", "itertools",
             ],
         )
         print("GAIAAgent ready ✅")
     def __call__(self, question: str, task_id: str = "") -> str:
+        print(f"\n{'='*60}\nQ: {question[:120]}")
+        # Détection de fichier joint ou YouTube dans la question
+        has_yt = bool(re.search(r"youtube\.com|youtu\.be", question))
+        has_file_hint = any(w in question.lower() for w in ["attached", "file", "image", "audio", "excel", "spreadsheet", "pdf", "code"])
+        task_hint = ""
+        if task_id and (has_file_hint or has_yt):
+            task_hint = f"\n\nNote: task_id='{task_id}' — use download_file_for_task('{task_id}') if a file is needed."
+        elif task_id:
+            task_hint = f"\n\n[task_id: '{task_id}' — use download_file_for_task if a file is mentioned]"
         prompt = (
+            "Solve this GAIA benchmark question. Important rules:\n"
+            "- Use tools to find/verify information. Do NOT guess.\n"
+            "- For YouTube URLs → call get_youtube_transcript.\n"
+            "- For attached files (pdf/image/audio/excel/code) → call download_file_for_task.\n"
+            "- For math/logic/string manipulation → call run_python_code.\n"
+            "- For factual lookups → call wikipedia_search or DuckDuckGoSearchTool.\n"
+            "- Your final answer must be SHORT and EXACT (exact string match is used for grading).\n"
+            "- For reversed text: decode it first, then answer.\n"
+            "- For counts: give only the number.\n"
+            "- For lists: comma-separated values only.\n\n"
+            f"Question: {question}{task_hint}"
         )
         try:
             result = self.agent.run(prompt)
             answer = str(result).strip()
+            # Nettoyer les préfixes verbeux du LLM
+            for prefix in ["the answer is", "answer:", "final answer:", "result:"]:
+                if answer.lower().startswith(prefix):
                     answer = answer[len(prefix):].strip().lstrip(":").strip()
+            print(f"→ Answer: {answer}")
             return answer
         except Exception as e:
             print(f"Agent error: {e}")
             return "Unable to determine answer."
+# ── Gradio UI ────────────────────────────────────────────────────────────
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
         return "Please login to Hugging Face first.", None
     username = profile.username
     space_id = os.getenv("SPACE_ID", "")
     api_url = DEFAULT_API_URL
         return f"Error initializing agent: {e}", None
     try:
+        questions = requests.get(f"{api_url}/questions", timeout=15).json()
         print(f"Fetched {len(questions)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
             continue
         answer = agent(question_text, task_id=task_id)
         answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+        results_log.append({"Task ID": task_id, "Question": question_text[:80], "Submitted Answer": answer})
         time.sleep(1)
     if not answers_payload:
 with gr.Blocks() as demo:
     gr.Markdown("# 🤖 GAIA Agent — smolagents + HF Inference")
     gr.Markdown("""
+**Models:** Llama-3.3-70B · Llama-3.2-11B-Vision · Whisper large-v3
+**Tools:** DuckDuckGo · Wikipedia · VisitWebpage · YouTube transcript · Python · File reader (PDF/Excel/Audio/Image)
+**Setup:** Ajoute `HF_TOKEN` dans les secrets de ton Space.
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")