Final_Assignment_Template_Gaia

Sleeping

App Files Files Community

lodist commited on Nov 2, 2025

Commit

fac64b9

verified ·

1 Parent(s): 5d48a13

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -139

app.py CHANGED Viewed

@@ -14,87 +14,187 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str, files=None) -> str:
-        """
-        files: list of (filename, bytes) tuples. May be None/empty.
-        """
         q = (question or "").strip()
-        print(f"Agent received question (first 50 chars): {q[:50]}...")
-        if files:
-            print("Received files:", [f for f, _ in files])
-        op  = self._guess_op(q)    # "sum" | "min" | "max" | "avg" | None
-        col = self._guess_col(q)   # tries to extract a column name
-        # ---------- FILES: Excel / CSV / TSV ----------
-        if files:
-            for fname, blob in files:
-                low = fname.lower()
-                # 1) Excel
-                if low.endswith((".xlsx", ".xls")):
-                    try:
-                        df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
-                        # Special: "total sales from food (not including drinks)"
-                        if self._asks_food_not_drinks(q):
-                            total = self._sum_food_excluding_drinks(df)
-                            if total is not None:
-                                return f"{total:.2f}"
-                        # Generic ops on a named column
-                        if op and col:
-                            val = self._op_on_column(df, col, op)
-                            if val is not None:
-                                return self._fmt(val)
-                    except Exception as e:
-                        print(f"Excel read failed for {fname}: {e}")
-                # 2) CSV/TSV
-                elif low.endswith(".csv") or low.endswith(".tsv"):
-                    sep = "\t" if low.endswith(".tsv") else ","
-                    try:
-                        df = pd.read_csv(io.BytesIO(blob), sep=sep)
-                        # Special: "total sales from food (not including drinks)"
-                        if self._asks_food_not_drinks(q):
-                            total = self._sum_food_excluding_drinks(df)
-                            if total is not None:
-                                return f"{total:.2f}"
-                        # Generic ops on a named column
-                        if op and col:
-                            val = self._op_on_column(df, col, op)
-                            if val is not None:
-                                return self._fmt(val)
-                    except Exception as e:
-                        print(f"CSV/TSV read failed for {fname}: {e}")
-        # ---------- TEXT-ONLY FALLBACKS ----------
-        # trivial arithmetic like "what is 12 + 7"
-        m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
-        if not m:
-            m = re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
-        if m:
-            a, opchar, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
-            if opchar in ("x", "*"): return str(a * b)
-            if opchar == "+":        return str(a + b)
-            if opchar == "-":        return str(a - b)
-            if opchar == "/":        return "undefined" if b == 0 else (str(int(a/b)) if (a/b).is_integer() else str(a/b))
-        # min/max/sum/avg over numbers present in the question itself
-        nums = [int(x) for x in re.findall(r"\b\d+\b", q)]
-        if nums:
-            if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return str(min(nums))
-            if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return str(max(nums))
-            if re.search(r"(?i)\b(sum|total|added up)\b", q):          return str(sum(nums))
-            if re.search(r"(?i)\b(avg|average|mean)\b", q):
-                avg = sum(nums) / len(nums)
-                return str(int(avg)) if float(avg).is_integer() else f"{avg:.2f}"
-        # Final fallback
         return "This is a default answer."
-    # ----------------- Helpers -----------------
     def _guess_op(self, q: str):
         if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
         if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
@@ -113,74 +213,15 @@ class BasicAgent:
         wl = (wanted or "").lower()
         for c in cols:
             s = str(c)
-            if s.lower() == wl:
-                return s
         for c in cols:
             s = str(c)
-            if wl in s.lower():
-                return s
-        return None
-    def _op_on_column(self, df: pd.DataFrame, col: str, op: str):
-        target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
-        if not target:
-            return None
-        s = pd.to_numeric(df[target], errors="coerce").dropna()
-        if s.empty:
-            return None
-        if op == "sum": return s.sum()
-        if op == "min": return s.min()
-        if op == "max": return s.max()
-        if op == "avg": return s.mean()
         return None
-    def _asks_food_not_drinks(self, q: str) -> bool:
-        # matches questions like: "total sales from food (not including drinks)"
-        return bool(
-            re.search(r"(?i)total\s+sales.*food.*not.*drink", q) or
-            re.search(r"(?i)food.*not.*drink", q)
-        )
-    def _sum_food_excluding_drinks(self, df: pd.DataFrame):
-        # 1) Try find a categorical column
-        cat_col = None
-        for c in df.columns:
-            cl = str(c).lower()
-            if any(k in cl for k in ["category", "type", "group", "item", "product", "menu", "name"]):
-                cat_col = c
-                break
-        # 2) Try find a numeric money column
-        money_col = None
-        for c in df.columns:
-            cl = str(c).lower()
-            if any(k in cl for k in ["sales", "revenue", "amount", "total", "usd", "price", "value"]):
-                money_col = c
-                break
-        if money_col is None:
-            for c in df.columns:
-                s = pd.to_numeric(df[c], errors="coerce")
-                if s.notna().sum() >= max(3, int(0.5 * len(df))):
-                    money_col = c
-                    break
-        if money_col is None:
-            return None
-        mask = pd.Series([True] * len(df))
-        if cat_col is not None:
-            cats = df[cat_col].astype(str).str.lower()
-            exclude_words = ["drink", "beverage", "soda", "juice", "coffee", "tea", "cola", "water"]
-            include_words = ["food", "burger", "sandwich", "fries", "salad", "wrap", "meal",
-                             "nugget", "chicken", "beef", "fish", "pizza", "dessert"]
-            ex_mask = cats.str.contains("|".join(exclude_words), na=False)
-            in_mask = cats.str.contains("|".join(include_words), na=False)
-            mask = (~ex_mask) & (in_mask | True)  # keep non-drinks even if not explicitly labeled as food
-        s_money = pd.to_numeric(df[money_col], errors="coerce")
-        total = s_money[mask].dropna().sum()
-        return float(total) if pd.notna(total) else None
     def _fmt(self, x):
         try:
             fx = float(x)
             return str(int(fx)) if fx.is_integer() else str(fx)
@@ -188,6 +229,10 @@ class BasicAgent:
             return str(x)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
+    def __init__(self, tools: dict | None = None):
+        self.tools = tools or {}
+        try:
+            from duckduckgo_search import DDGS
+            self.tools["web_search"] = DDGS()
+        except Exception:
+            self.tools["web_search"] = None
+        try:
+            import wikipedia
+            self.tools["wikipedia"] = wikipedia
+            wikipedia.set_lang("en")
+        except Exception:
+            self.tools["wikipedia"] = None
+        try:
+            from PIL import Image
+            self.tools["pil"] = Image
+        except Exception:
+            self.tools["pil"] = None
+        try:
+            import pytesseract
+            self.tools["ocr"] = pytesseract
+        except Exception:
+            self.tools["ocr"] = None
+        print("BasicAgent initialized with tools:", {k: bool(v) for k, v in self.tools.items()})
     def __call__(self, question: str, files=None) -> str:
         q = (question or "").strip()
+        print(f"[Agent] Q: {q[:120]}{'...' if len(q) > 120 else ''}")
+        file_ctx = self._ingest_files(files or [])
+        handlers = [
+            self._handle_arithmetic,
+            self._handle_inline_aggregate_numbers,
+            self._handle_reversed_text,
+            self._handle_csv_xls_ops,
+            self._handle_ocr_if_requested,
+            self._handle_wikipedia_if_requested,
+            self._handle_web_search_if_requested,
+        ]
+        for h in handlers:
+            try:
+                ans = h(q, file_ctx)
+                if ans is not None:
+                    return self._fmt(ans)
+            except Exception as e:
+                print(f"[Agent] Handler {h.__name__} error: {e}")
         return "This is a default answer."
+    def _ingest_files(self, files):
+        ctx = {"tabular": [], "images": [], "text": []}
+        for fname, blob in files:
+            low = (fname or "").lower()
+            if low.endswith((".csv", ".tsv")):
+                sep = "\t" if low.endswith(".tsv") else ","
+                try:
+                    df = pd.read_csv(io.BytesIO(blob), sep=sep)
+                    ctx["tabular"].append((fname, df))
+                except Exception as e:
+                    print(f"[Agent] CSV read failed for {fname}: {e}")
+            elif low.endswith((".xlsx", ".xls")):
+                try:
+                    df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
+                    ctx["tabular"].append((fname, df))
+                except Exception as e:
+                    print(f"[Agent] Excel read failed for {fname}: {e}")
+            elif low.endswith((".png", ".jpg", ".jpeg", ".bmp", ".webp", ".gif")):
+                ctx["images"].append((fname, blob))
+            elif low.endswith((".txt", ".md", ".json")):
+                try:
+                    text = io.BytesIO(blob).read().decode("utf-8", errors="replace")
+                    ctx["text"].append((fname, text))
+                except Exception as e:
+                    print(f"[Agent] Text read failed for {fname}: {e}")
+        return ctx
+    def _handle_arithmetic(self, q, _):
+        m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q) or re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
+        if not m: return None
+        a, opch, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
+        if opch in ("x", "*"): return a * b
+        if opch == "+": return a + b
+        if opch == "-": return a - b
+        if opch == "/": return "undefined" if b == 0 else (int(a/b) if (a/b).is_integer() else a/b)
+    def _handle_inline_aggregate_numbers(self, q, _):
+        nums = [float(x) for x in re.findall(r"\b\d+(?:\.\d+)?\b", q)]
+        if not nums: return None
+        if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return min(nums)
+        if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return max(nums)
+        if re.search(r"(?i)\b(sum|total|added up)\b", q):          return sum(nums)
+        if re.search(r"(?i)\b(avg|average|mean)\b", q):
+            avg = sum(nums)/len(nums)
+            return int(avg) if float(avg).is_integer() else round(avg, 2)
+    def _handle_reversed_text(self, q, _):
+        if re.search(r"(?i)\b(tfel)\b|^[^a-zA-Z]*[\w\W]{3,}$", q) and self._looks_reversed(q):
+            return self._reverse_text(q)
+        return None
+    def _looks_reversed(self, s: str) -> bool:
+        words = re.findall(r"[A-Za-z]{3,}", s)
+        if not words: return False
+        rev_like = sum(1 for w in words if w[::-1].lower() in s.lower())
+        return rev_like >= max(1, len(words)//4)
+    def _reverse_text(self, s: str) -> str:
+        return "".join(list(s))[::-1]
+    def _handle_csv_xls_ops(self, q, ctx):
+        if not ctx["tabular"]: return None
+        op = self._guess_op(q)
+        col = self._guess_col(q)
+        if not op or not col: return None
+        for fname, df in ctx["tabular"]:
+            target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
+            if not target: continue
+            s = pd.to_numeric(df[target], errors="coerce").dropna()
+            if s.empty: continue
+            if op == "sum": return s.sum()
+            if op == "min": return s.min()
+            if op == "max": return s.max()
+            if op == "avg": return s.mean()
+        return None
+    def _handle_ocr_if_requested(self, q, ctx):
+        if not re.search(r"(?i)\b(ocr|read|extract)\b.*\b(text|from image)\b", q): return None
+        if not ctx["images"] or not self.tools.get("pil") or not self.tools.get("ocr"): return None
+        Image = self.tools["pil"]; pytesseract = self.tools["ocr"]
+        texts = []
+        for fname, blob in ctx["images"]:
+            try:
+                img = Image.open(io.BytesIO(blob))
+                txt = pytesseract.image_to_string(img)
+                if txt.strip():
+                    texts.append(f"[{fname}]\n{txt.strip()}")
+            except Exception as e:
+                print(f"[Agent] OCR failed {fname}: {e}")
+        return "\n\n".join(texts) if texts else None
+    def _handle_wikipedia_if_requested(self, q, _):
+        if not self.tools.get("wikipedia"): return None
+        if not re.search(r"(?i)\b(wikipedia|on wikipedia)\b", q) and not re.search(r"(?i)\b(who|what|when|how many|where)\b", q):
+            return None
+        try:
+            wikipedia = self.tools["wikipedia"]
+            topic = q
+            m = re.search(r"(?i)\babout\s+(.+)$", q)
+            if m: topic = m.group(1)
+            m2 = re.search(r"(?i)\bof\s+(.+?)\??$", q)
+            if m2: topic = m2.group(1)
+            topic = re.sub(r"(?i)\bon wikipedia\b", "", topic).strip()
+            page = wikipedia.page(topic, auto_suggest=True, redirect=True)
+            summary = wikipedia.summary(page.title, sentences=3)
+            if re.search(r"(?i)\bhow many\b.*\b(\d{4}).*(\d{4})", q):
+                years = list(map(int, re.findall(r"\b\d{4}\b", q)))
+                years.sort()
+                text = page.content
+                hits = re.findall(r"\b(19|20)\d{2}\b", text)
+                nums = [int(h[0] + text[i+1:i+3]) for i, h in enumerate(re.finditer(r"\b(19|20)\d{2}\b", text))]
+                count = sum(1 for y in nums if years[0] <= y <= years[-1])
+                return f"{count}"
+            return summary
+        except Exception as e:
+            print(f"[Agent] Wikipedia error: {e}")
+            return None
+    def _handle_web_search_if_requested(self, q, _):
+        if not self.tools.get("web_search"): return None
+        if not re.search(r"(?i)\b(search|latest|news|find|look up)\b", q): return None
+        try:
+            ddgs = self.tools["web_search"]
+            results = list(ddgs.text(q, max_results=5))
+            if not results: return None
+            best = results[0]
+            pieces = [p for p in [best.get("title"), best.get("body"), best.get("href")] if p]
+            return " — ".join(pieces)
+        except Exception as e:
+            print(f"[Agent] web search error: {e}")
+            return None
     def _guess_op(self, q: str):
         if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
         if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
         wl = (wanted or "").lower()
         for c in cols:
             s = str(c)
+            if s.lower() == wl: return s
         for c in cols:
             s = str(c)
+            if wl in s.lower(): return s
         return None
     def _fmt(self, x):
+        if isinstance(x, (int,)):
+            return str(x)
         try:
             fx = float(x)
             return str(int(fx)) if fx.is_integer() else str(fx)
             return str(x)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,