Final_Assignment_Template_Gaia

Sleeping

App Files Files Community

lodist commited on Nov 2, 2025

Commit

3de1995

verified ·

1 Parent(s): fac64b9

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -217

app.py CHANGED Viewed

@@ -3,230 +3,60 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-import io
-import re
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
-    def __init__(self, tools: dict | None = None):
-        self.tools = tools or {}
-        try:
-            from duckduckgo_search import DDGS
-            self.tools["web_search"] = DDGS()
-        except Exception:
-            self.tools["web_search"] = None
-        try:
-            import wikipedia
-            self.tools["wikipedia"] = wikipedia
-            wikipedia.set_lang("en")
-        except Exception:
-            self.tools["wikipedia"] = None
-        try:
-            from PIL import Image
-            self.tools["pil"] = Image
-        except Exception:
-            self.tools["pil"] = None
-        try:
-            import pytesseract
-            self.tools["ocr"] = pytesseract
-        except Exception:
-            self.tools["ocr"] = None
-        print("BasicAgent initialized with tools:", {k: bool(v) for k, v in self.tools.items()})
-    def __call__(self, question: str, files=None) -> str:
-        q = (question or "").strip()
-        print(f"[Agent] Q: {q[:120]}{'...' if len(q) > 120 else ''}")
-        file_ctx = self._ingest_files(files or [])
-        handlers = [
-            self._handle_arithmetic,
-            self._handle_inline_aggregate_numbers,
-            self._handle_reversed_text,
-            self._handle_csv_xls_ops,
-            self._handle_ocr_if_requested,
-            self._handle_wikipedia_if_requested,
-            self._handle_web_search_if_requested,
-        ]
-        for h in handlers:
-            try:
-                ans = h(q, file_ctx)
-                if ans is not None:
-                    return self._fmt(ans)
-            except Exception as e:
-                print(f"[Agent] Handler {h.__name__} error: {e}")
-        return "This is a default answer."
-    def _ingest_files(self, files):
-        ctx = {"tabular": [], "images": [], "text": []}
-        for fname, blob in files:
-            low = (fname or "").lower()
-            if low.endswith((".csv", ".tsv")):
-                sep = "\t" if low.endswith(".tsv") else ","
-                try:
-                    df = pd.read_csv(io.BytesIO(blob), sep=sep)
-                    ctx["tabular"].append((fname, df))
-                except Exception as e:
-                    print(f"[Agent] CSV read failed for {fname}: {e}")
-            elif low.endswith((".xlsx", ".xls")):
-                try:
-                    df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
-                    ctx["tabular"].append((fname, df))
-                except Exception as e:
-                    print(f"[Agent] Excel read failed for {fname}: {e}")
-            elif low.endswith((".png", ".jpg", ".jpeg", ".bmp", ".webp", ".gif")):
-                ctx["images"].append((fname, blob))
-            elif low.endswith((".txt", ".md", ".json")):
-                try:
-                    text = io.BytesIO(blob).read().decode("utf-8", errors="replace")
-                    ctx["text"].append((fname, text))
-                except Exception as e:
-                    print(f"[Agent] Text read failed for {fname}: {e}")
-        return ctx
-    def _handle_arithmetic(self, q, _):
-        m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q) or re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
-        if not m: return None
-        a, opch, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
-        if opch in ("x", "*"): return a * b
-        if opch == "+": return a + b
-        if opch == "-": return a - b
-        if opch == "/": return "undefined" if b == 0 else (int(a/b) if (a/b).is_integer() else a/b)
-    def _handle_inline_aggregate_numbers(self, q, _):
-        nums = [float(x) for x in re.findall(r"\b\d+(?:\.\d+)?\b", q)]
-        if not nums: return None
-        if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return min(nums)
-        if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return max(nums)
-        if re.search(r"(?i)\b(sum|total|added up)\b", q):          return sum(nums)
-        if re.search(r"(?i)\b(avg|average|mean)\b", q):
-            avg = sum(nums)/len(nums)
-            return int(avg) if float(avg).is_integer() else round(avg, 2)
-    def _handle_reversed_text(self, q, _):
-        if re.search(r"(?i)\b(tfel)\b|^[^a-zA-Z]*[\w\W]{3,}$", q) and self._looks_reversed(q):
-            return self._reverse_text(q)
-        return None
-    def _looks_reversed(self, s: str) -> bool:
-        words = re.findall(r"[A-Za-z]{3,}", s)
-        if not words: return False
-        rev_like = sum(1 for w in words if w[::-1].lower() in s.lower())
-        return rev_like >= max(1, len(words)//4)
-    def _reverse_text(self, s: str) -> str:
-        return "".join(list(s))[::-1]
-    def _handle_csv_xls_ops(self, q, ctx):
-        if not ctx["tabular"]: return None
-        op = self._guess_op(q)
-        col = self._guess_col(q)
-        if not op or not col: return None
-        for fname, df in ctx["tabular"]:
-            target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
-            if not target: continue
-            s = pd.to_numeric(df[target], errors="coerce").dropna()
-            if s.empty: continue
-            if op == "sum": return s.sum()
-            if op == "min": return s.min()
-            if op == "max": return s.max()
-            if op == "avg": return s.mean()
-        return None
-    def _handle_ocr_if_requested(self, q, ctx):
-        if not re.search(r"(?i)\b(ocr|read|extract)\b.*\b(text|from image)\b", q): return None
-        if not ctx["images"] or not self.tools.get("pil") or not self.tools.get("ocr"): return None
-        Image = self.tools["pil"]; pytesseract = self.tools["ocr"]
-        texts = []
-        for fname, blob in ctx["images"]:
-            try:
-                img = Image.open(io.BytesIO(blob))
-                txt = pytesseract.image_to_string(img)
-                if txt.strip():
-                    texts.append(f"[{fname}]\n{txt.strip()}")
-            except Exception as e:
-                print(f"[Agent] OCR failed {fname}: {e}")
-        return "\n\n".join(texts) if texts else None
-    def _handle_wikipedia_if_requested(self, q, _):
-        if not self.tools.get("wikipedia"): return None
-        if not re.search(r"(?i)\b(wikipedia|on wikipedia)\b", q) and not re.search(r"(?i)\b(who|what|when|how many|where)\b", q):
-            return None
-        try:
-            wikipedia = self.tools["wikipedia"]
-            topic = q
-            m = re.search(r"(?i)\babout\s+(.+)$", q)
-            if m: topic = m.group(1)
-            m2 = re.search(r"(?i)\bof\s+(.+?)\??$", q)
-            if m2: topic = m2.group(1)
-            topic = re.sub(r"(?i)\bon wikipedia\b", "", topic).strip()
-            page = wikipedia.page(topic, auto_suggest=True, redirect=True)
-            summary = wikipedia.summary(page.title, sentences=3)
-            if re.search(r"(?i)\bhow many\b.*\b(\d{4}).*(\d{4})", q):
-                years = list(map(int, re.findall(r"\b\d{4}\b", q)))
-                years.sort()
-                text = page.content
-                hits = re.findall(r"\b(19|20)\d{2}\b", text)
-                nums = [int(h[0] + text[i+1:i+3]) for i, h in enumerate(re.finditer(r"\b(19|20)\d{2}\b", text))]
-                count = sum(1 for y in nums if years[0] <= y <= years[-1])
-                return f"{count}"
-            return summary
-        except Exception as e:
-            print(f"[Agent] Wikipedia error: {e}")
-            return None
-    def _handle_web_search_if_requested(self, q, _):
-        if not self.tools.get("web_search"): return None
-        if not re.search(r"(?i)\b(search|latest|news|find|look up)\b", q): return None
-        try:
-            ddgs = self.tools["web_search"]
-            results = list(ddgs.text(q, max_results=5))
-            if not results: return None
-            best = results[0]
-            pieces = [p for p in [best.get("title"), best.get("body"), best.get("href")] if p]
-            return " — ".join(pieces)
-        except Exception as e:
-            print(f"[Agent] web search error: {e}")
-            return None
-    def _guess_op(self, q: str):
-        if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
-        if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
-        if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return "max"
-        if re.search(r"(?i)\b(avg|average|mean)\b", q): return "avg"
-        return None
-    def _guess_col(self, q: str):
-        m = re.search(r"(?i)\bof\s+([A-Za-z0-9_]+)\b", q)
-        if m: return m.group(1)
-        m = re.search(r"[\"']([A-Za-z0-9_ ]+)[\"']", q)
-        if m: return m.group(1).strip()
-        return None
-    def _fuzzy_find_col(self, wanted: str, cols) -> str | None:
-        wl = (wanted or "").lower()
-        for c in cols:
-            s = str(c)
-            if s.lower() == wl: return s
-        for c in cols:
-            s = str(c)
-            if wl in s.lower(): return s
-        return None
-    def _fmt(self, x):
-        if isinstance(x, (int,)):
-            return str(x)
-        try:
-            fx = float(x)
-            return str(int(fx)) if fx.is_integer() else str(fx)
-        except Exception:
-            return str(x)
@@ -310,7 +140,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         try:
             # pass files to the agent
-            submitted_answer = agent(question_text, files=file_blobs)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import requests
 import inspect
 import pandas as pd
+from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, PythonInterpreterTool, OpenAIServerModel
+from agentsTools.toolVisitWebpage import visit_webpage
+from agentsTools.tool_fetch_task_file import fetch_task_file
+from agentsTools.tool_read_excel_as_json import read_excel_as_json
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+def get_gemini_model():
+    return OpenAIServerModel(
+        model_id="gemini-2.5-pro-preview-03-25",
+        api_base="https://generativelanguage.googleapis.com/v1beta/",
+        api_key=os.getenv("GEMINI_API_KEY_1", "your-gemini-api-key-here")
+    )
+MODEL = get_gemini_model()
 class BasicAgent:
+    def __init__(self):
+        self.agent = ToolCallingAgent(
+            tools=[
+                DuckDuckGoSearchTool(),
+                PythonInterpreterTool(),
+                visit_webpage,
+                fetch_task_file,       # fetches byte content via task_id + filename
+                read_excel_as_json,    # parses excel content into JSON
+            ],
+            model=MODEL,
+            max_steps=10
+        )
+        print("BasicAgent initialized (smolagents).")
+    def __call__(self, question: str, taskid: str) -> str:
+        prompt = f"""
+You are a general AI assistant.
+You may use up to 8 tool calls. Prefer Wikipedia when multiple links are available.
+If a page blocks access, skip it.
+You MUST end your output with:
+FINAL ANSWER: <concise result here>
+Rules for FINAL ANSWER:
+- If number: no commas for thousands, no units unless explicitly requested.
+- If string: no articles, no abbreviations (write city/country names fully), digits in plain text unless specified.
+- If comma-separated list: apply the above rules for each element.
+TaskId to use for any file fetching: {taskid}
+Question: {question}
+"""
+        result = self.agent.run(prompt)
+        print(f"[BasicAgent] FINAL ANSWER → {result[:250]}")
+        return result
         try:
             # pass files to the agent
+            submitted_answer = agent(question_text, task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e: