lodist commited on
Commit
fac64b9
·
verified ·
1 Parent(s): 5d48a13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -139
app.py CHANGED
@@ -14,87 +14,187 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  # --- Basic Agent Definition ---
15
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
  class BasicAgent:
17
- def __init__(self):
18
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def __call__(self, question: str, files=None) -> str:
21
- """
22
- files: list of (filename, bytes) tuples. May be None/empty.
23
- """
24
  q = (question or "").strip()
25
- print(f"Agent received question (first 50 chars): {q[:50]}...")
26
- if files:
27
- print("Received files:", [f for f, _ in files])
28
-
29
- op = self._guess_op(q) # "sum" | "min" | "max" | "avg" | None
30
- col = self._guess_col(q) # tries to extract a column name
31
-
32
- # ---------- FILES: Excel / CSV / TSV ----------
33
- if files:
34
- for fname, blob in files:
35
- low = fname.lower()
36
-
37
- # 1) Excel
38
- if low.endswith((".xlsx", ".xls")):
39
- try:
40
- df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
41
- # Special: "total sales from food (not including drinks)"
42
- if self._asks_food_not_drinks(q):
43
- total = self._sum_food_excluding_drinks(df)
44
- if total is not None:
45
- return f"{total:.2f}"
46
- # Generic ops on a named column
47
- if op and col:
48
- val = self._op_on_column(df, col, op)
49
- if val is not None:
50
- return self._fmt(val)
51
- except Exception as e:
52
- print(f"Excel read failed for {fname}: {e}")
53
-
54
- # 2) CSV/TSV
55
- elif low.endswith(".csv") or low.endswith(".tsv"):
56
- sep = "\t" if low.endswith(".tsv") else ","
57
- try:
58
- df = pd.read_csv(io.BytesIO(blob), sep=sep)
59
- # Special: "total sales from food (not including drinks)"
60
- if self._asks_food_not_drinks(q):
61
- total = self._sum_food_excluding_drinks(df)
62
- if total is not None:
63
- return f"{total:.2f}"
64
- # Generic ops on a named column
65
- if op and col:
66
- val = self._op_on_column(df, col, op)
67
- if val is not None:
68
- return self._fmt(val)
69
- except Exception as e:
70
- print(f"CSV/TSV read failed for {fname}: {e}")
71
-
72
- # ---------- TEXT-ONLY FALLBACKS ----------
73
- # trivial arithmetic like "what is 12 + 7"
74
- m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
75
- if not m:
76
- m = re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
77
- if m:
78
- a, opchar, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
79
- if opchar in ("x", "*"): return str(a * b)
80
- if opchar == "+": return str(a + b)
81
- if opchar == "-": return str(a - b)
82
- if opchar == "/": return "undefined" if b == 0 else (str(int(a/b)) if (a/b).is_integer() else str(a/b))
83
-
84
- # min/max/sum/avg over numbers present in the question itself
85
- nums = [int(x) for x in re.findall(r"\b\d+\b", q)]
86
- if nums:
87
- if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return str(min(nums))
88
- if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return str(max(nums))
89
- if re.search(r"(?i)\b(sum|total|added up)\b", q): return str(sum(nums))
90
- if re.search(r"(?i)\b(avg|average|mean)\b", q):
91
- avg = sum(nums) / len(nums)
92
- return str(int(avg)) if float(avg).is_integer() else f"{avg:.2f}"
93
-
94
- # Final fallback
95
  return "This is a default answer."
96
 
97
- # ----------------- Helpers -----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def _guess_op(self, q: str):
99
  if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
100
  if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
@@ -113,74 +213,15 @@ class BasicAgent:
113
  wl = (wanted or "").lower()
114
  for c in cols:
115
  s = str(c)
116
- if s.lower() == wl:
117
- return s
118
  for c in cols:
119
  s = str(c)
120
- if wl in s.lower():
121
- return s
122
- return None
123
-
124
- def _op_on_column(self, df: pd.DataFrame, col: str, op: str):
125
- target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
126
- if not target:
127
- return None
128
- s = pd.to_numeric(df[target], errors="coerce").dropna()
129
- if s.empty:
130
- return None
131
- if op == "sum": return s.sum()
132
- if op == "min": return s.min()
133
- if op == "max": return s.max()
134
- if op == "avg": return s.mean()
135
  return None
136
 
137
- def _asks_food_not_drinks(self, q: str) -> bool:
138
- # matches questions like: "total sales from food (not including drinks)"
139
- return bool(
140
- re.search(r"(?i)total\s+sales.*food.*not.*drink", q) or
141
- re.search(r"(?i)food.*not.*drink", q)
142
- )
143
-
144
- def _sum_food_excluding_drinks(self, df: pd.DataFrame):
145
- # 1) Try find a categorical column
146
- cat_col = None
147
- for c in df.columns:
148
- cl = str(c).lower()
149
- if any(k in cl for k in ["category", "type", "group", "item", "product", "menu", "name"]):
150
- cat_col = c
151
- break
152
-
153
- # 2) Try find a numeric money column
154
- money_col = None
155
- for c in df.columns:
156
- cl = str(c).lower()
157
- if any(k in cl for k in ["sales", "revenue", "amount", "total", "usd", "price", "value"]):
158
- money_col = c
159
- break
160
- if money_col is None:
161
- for c in df.columns:
162
- s = pd.to_numeric(df[c], errors="coerce")
163
- if s.notna().sum() >= max(3, int(0.5 * len(df))):
164
- money_col = c
165
- break
166
- if money_col is None:
167
- return None
168
-
169
- mask = pd.Series([True] * len(df))
170
- if cat_col is not None:
171
- cats = df[cat_col].astype(str).str.lower()
172
- exclude_words = ["drink", "beverage", "soda", "juice", "coffee", "tea", "cola", "water"]
173
- include_words = ["food", "burger", "sandwich", "fries", "salad", "wrap", "meal",
174
- "nugget", "chicken", "beef", "fish", "pizza", "dessert"]
175
- ex_mask = cats.str.contains("|".join(exclude_words), na=False)
176
- in_mask = cats.str.contains("|".join(include_words), na=False)
177
- mask = (~ex_mask) & (in_mask | True) # keep non-drinks even if not explicitly labeled as food
178
-
179
- s_money = pd.to_numeric(df[money_col], errors="coerce")
180
- total = s_money[mask].dropna().sum()
181
- return float(total) if pd.notna(total) else None
182
-
183
  def _fmt(self, x):
 
 
184
  try:
185
  fx = float(x)
186
  return str(int(fx)) if fx.is_integer() else str(fx)
@@ -188,6 +229,10 @@ class BasicAgent:
188
  return str(x)
189
 
190
 
 
 
 
 
191
  def run_and_submit_all( profile: gr.OAuthProfile | None):
192
  """
193
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
14
  # --- Basic Agent Definition ---
15
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
  class BasicAgent:
17
+ def __init__(self, tools: dict | None = None):
18
+ self.tools = tools or {}
19
+ try:
20
+ from duckduckgo_search import DDGS
21
+ self.tools["web_search"] = DDGS()
22
+ except Exception:
23
+ self.tools["web_search"] = None
24
+ try:
25
+ import wikipedia
26
+ self.tools["wikipedia"] = wikipedia
27
+ wikipedia.set_lang("en")
28
+ except Exception:
29
+ self.tools["wikipedia"] = None
30
+ try:
31
+ from PIL import Image
32
+ self.tools["pil"] = Image
33
+ except Exception:
34
+ self.tools["pil"] = None
35
+ try:
36
+ import pytesseract
37
+ self.tools["ocr"] = pytesseract
38
+ except Exception:
39
+ self.tools["ocr"] = None
40
+ print("BasicAgent initialized with tools:", {k: bool(v) for k, v in self.tools.items()})
41
 
42
  def __call__(self, question: str, files=None) -> str:
 
 
 
43
  q = (question or "").strip()
44
+ print(f"[Agent] Q: {q[:120]}{'...' if len(q) > 120 else ''}")
45
+ file_ctx = self._ingest_files(files or [])
46
+
47
+ handlers = [
48
+ self._handle_arithmetic,
49
+ self._handle_inline_aggregate_numbers,
50
+ self._handle_reversed_text,
51
+ self._handle_csv_xls_ops,
52
+ self._handle_ocr_if_requested,
53
+ self._handle_wikipedia_if_requested,
54
+ self._handle_web_search_if_requested,
55
+ ]
56
+ for h in handlers:
57
+ try:
58
+ ans = h(q, file_ctx)
59
+ if ans is not None:
60
+ return self._fmt(ans)
61
+ except Exception as e:
62
+ print(f"[Agent] Handler {h.__name__} error: {e}")
63
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  return "This is a default answer."
65
 
66
+ def _ingest_files(self, files):
67
+ ctx = {"tabular": [], "images": [], "text": []}
68
+ for fname, blob in files:
69
+ low = (fname or "").lower()
70
+ if low.endswith((".csv", ".tsv")):
71
+ sep = "\t" if low.endswith(".tsv") else ","
72
+ try:
73
+ df = pd.read_csv(io.BytesIO(blob), sep=sep)
74
+ ctx["tabular"].append((fname, df))
75
+ except Exception as e:
76
+ print(f"[Agent] CSV read failed for {fname}: {e}")
77
+ elif low.endswith((".xlsx", ".xls")):
78
+ try:
79
+ df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
80
+ ctx["tabular"].append((fname, df))
81
+ except Exception as e:
82
+ print(f"[Agent] Excel read failed for {fname}: {e}")
83
+ elif low.endswith((".png", ".jpg", ".jpeg", ".bmp", ".webp", ".gif")):
84
+ ctx["images"].append((fname, blob))
85
+ elif low.endswith((".txt", ".md", ".json")):
86
+ try:
87
+ text = io.BytesIO(blob).read().decode("utf-8", errors="replace")
88
+ ctx["text"].append((fname, text))
89
+ except Exception as e:
90
+ print(f"[Agent] Text read failed for {fname}: {e}")
91
+ return ctx
92
+
93
+ def _handle_arithmetic(self, q, _):
94
+ m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q) or re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
95
+ if not m: return None
96
+ a, opch, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
97
+ if opch in ("x", "*"): return a * b
98
+ if opch == "+": return a + b
99
+ if opch == "-": return a - b
100
+ if opch == "/": return "undefined" if b == 0 else (int(a/b) if (a/b).is_integer() else a/b)
101
+
102
+ def _handle_inline_aggregate_numbers(self, q, _):
103
+ nums = [float(x) for x in re.findall(r"\b\d+(?:\.\d+)?\b", q)]
104
+ if not nums: return None
105
+ if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return min(nums)
106
+ if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return max(nums)
107
+ if re.search(r"(?i)\b(sum|total|added up)\b", q): return sum(nums)
108
+ if re.search(r"(?i)\b(avg|average|mean)\b", q):
109
+ avg = sum(nums)/len(nums)
110
+ return int(avg) if float(avg).is_integer() else round(avg, 2)
111
+
112
+ def _handle_reversed_text(self, q, _):
113
+ if re.search(r"(?i)\b(tfel)\b|^[^a-zA-Z]*[\w\W]{3,}$", q) and self._looks_reversed(q):
114
+ return self._reverse_text(q)
115
+ return None
116
+
117
+ def _looks_reversed(self, s: str) -> bool:
118
+ words = re.findall(r"[A-Za-z]{3,}", s)
119
+ if not words: return False
120
+ rev_like = sum(1 for w in words if w[::-1].lower() in s.lower())
121
+ return rev_like >= max(1, len(words)//4)
122
+
123
+ def _reverse_text(self, s: str) -> str:
124
+ return "".join(list(s))[::-1]
125
+
126
+ def _handle_csv_xls_ops(self, q, ctx):
127
+ if not ctx["tabular"]: return None
128
+ op = self._guess_op(q)
129
+ col = self._guess_col(q)
130
+ if not op or not col: return None
131
+ for fname, df in ctx["tabular"]:
132
+ target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
133
+ if not target: continue
134
+ s = pd.to_numeric(df[target], errors="coerce").dropna()
135
+ if s.empty: continue
136
+ if op == "sum": return s.sum()
137
+ if op == "min": return s.min()
138
+ if op == "max": return s.max()
139
+ if op == "avg": return s.mean()
140
+ return None
141
+
142
+ def _handle_ocr_if_requested(self, q, ctx):
143
+ if not re.search(r"(?i)\b(ocr|read|extract)\b.*\b(text|from image)\b", q): return None
144
+ if not ctx["images"] or not self.tools.get("pil") or not self.tools.get("ocr"): return None
145
+ Image = self.tools["pil"]; pytesseract = self.tools["ocr"]
146
+ texts = []
147
+ for fname, blob in ctx["images"]:
148
+ try:
149
+ img = Image.open(io.BytesIO(blob))
150
+ txt = pytesseract.image_to_string(img)
151
+ if txt.strip():
152
+ texts.append(f"[{fname}]\n{txt.strip()}")
153
+ except Exception as e:
154
+ print(f"[Agent] OCR failed {fname}: {e}")
155
+ return "\n\n".join(texts) if texts else None
156
+
157
+ def _handle_wikipedia_if_requested(self, q, _):
158
+ if not self.tools.get("wikipedia"): return None
159
+ if not re.search(r"(?i)\b(wikipedia|on wikipedia)\b", q) and not re.search(r"(?i)\b(who|what|when|how many|where)\b", q):
160
+ return None
161
+ try:
162
+ wikipedia = self.tools["wikipedia"]
163
+ topic = q
164
+ m = re.search(r"(?i)\babout\s+(.+)$", q)
165
+ if m: topic = m.group(1)
166
+ m2 = re.search(r"(?i)\bof\s+(.+?)\??$", q)
167
+ if m2: topic = m2.group(1)
168
+ topic = re.sub(r"(?i)\bon wikipedia\b", "", topic).strip()
169
+ page = wikipedia.page(topic, auto_suggest=True, redirect=True)
170
+ summary = wikipedia.summary(page.title, sentences=3)
171
+ if re.search(r"(?i)\bhow many\b.*\b(\d{4}).*(\d{4})", q):
172
+ years = list(map(int, re.findall(r"\b\d{4}\b", q)))
173
+ years.sort()
174
+ text = page.content
175
+ hits = re.findall(r"\b(19|20)\d{2}\b", text)
176
+ nums = [int(h[0] + text[i+1:i+3]) for i, h in enumerate(re.finditer(r"\b(19|20)\d{2}\b", text))]
177
+ count = sum(1 for y in nums if years[0] <= y <= years[-1])
178
+ return f"{count}"
179
+ return summary
180
+ except Exception as e:
181
+ print(f"[Agent] Wikipedia error: {e}")
182
+ return None
183
+
184
+ def _handle_web_search_if_requested(self, q, _):
185
+ if not self.tools.get("web_search"): return None
186
+ if not re.search(r"(?i)\b(search|latest|news|find|look up)\b", q): return None
187
+ try:
188
+ ddgs = self.tools["web_search"]
189
+ results = list(ddgs.text(q, max_results=5))
190
+ if not results: return None
191
+ best = results[0]
192
+ pieces = [p for p in [best.get("title"), best.get("body"), best.get("href")] if p]
193
+ return " — ".join(pieces)
194
+ except Exception as e:
195
+ print(f"[Agent] web search error: {e}")
196
+ return None
197
+
198
  def _guess_op(self, q: str):
199
  if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
200
  if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
 
213
  wl = (wanted or "").lower()
214
  for c in cols:
215
  s = str(c)
216
+ if s.lower() == wl: return s
 
217
  for c in cols:
218
  s = str(c)
219
+ if wl in s.lower(): return s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  return None
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  def _fmt(self, x):
223
+ if isinstance(x, (int,)):
224
+ return str(x)
225
  try:
226
  fx = float(x)
227
  return str(int(fx)) if fx.is_integer() else str(fx)
 
229
  return str(x)
230
 
231
 
232
+
233
+
234
+
235
+
236
  def run_and_submit_all( profile: gr.OAuthProfile | None):
237
  """
238
  Fetches all questions, runs the BasicAgent on them, submits all answers,