lodist commited on
Commit
5d48a13
·
verified ·
1 Parent(s): a962d82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -28
app.py CHANGED
@@ -16,7 +16,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
  class BasicAgent:
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
-
20
  def __call__(self, question: str, files=None) -> str:
21
  """
22
  files: list of (filename, bytes) tuples. May be None/empty.
@@ -25,33 +25,76 @@ class BasicAgent:
25
  print(f"Agent received question (first 50 chars): {q[:50]}...")
26
  if files:
27
  print("Received files:", [f for f, _ in files])
28
- op = self._guess_op(q)
29
- col = self._guess_col(q)
30
- if files and op and col:
 
 
 
31
  for fname, blob in files:
32
  low = fname.lower()
33
- if low.endswith(".csv") or low.endswith(".tsv"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  sep = "\t" if low.endswith(".tsv") else ","
35
  try:
36
  df = pd.read_csv(io.BytesIO(blob), sep=sep)
37
- if col in df.columns:
38
- s = pd.to_numeric(df[col], errors="coerce").dropna()
39
- if len(s):
40
- return self._apply_op(op, s)
41
- # loose match if exact not found
42
- match = self._fuzzy_find_col(col, df.columns)
43
- if match:
44
- s = pd.to_numeric(df[match], errors="coerce").dropna()
45
- if len(s):
46
- return self._apply_op(op, s)
47
  except Exception as e:
48
- print(f"CSV read failed for {fname}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # Fallback: default answer (replace with your real logic next)
51
- fixed_answer = "This is a default answer."
52
- print(f"Agent returning fixed answer: {fixed_answer}")
53
- return fixed_answer
 
 
 
 
 
54
 
 
 
 
 
55
  def _guess_op(self, q: str):
56
  if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
57
  if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
@@ -70,18 +113,72 @@ class BasicAgent:
70
  wl = (wanted or "").lower()
71
  for c in cols:
72
  s = str(c)
73
- if s.lower() == wl: return s
 
74
  for c in cols:
75
  s = str(c)
76
- if wl in s.lower(): return s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  return None
78
 
79
- def _apply_op(self, op, series):
80
- if op == "sum": return self._fmt(series.sum())
81
- if op == "min": return self._fmt(series.min())
82
- if op == "max": return self._fmt(series.max())
83
- if op == "avg": return self._fmt(series.mean())
84
- return "unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  def _fmt(self, x):
87
  try:
@@ -90,6 +187,7 @@ class BasicAgent:
90
  except Exception:
91
  return str(x)
92
 
 
93
  def run_and_submit_all( profile: gr.OAuthProfile | None):
94
  """
95
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
16
  class BasicAgent:
17
  def __init__(self):
18
  print("BasicAgent initialized.")
19
+
20
  def __call__(self, question: str, files=None) -> str:
21
  """
22
  files: list of (filename, bytes) tuples. May be None/empty.
 
25
  print(f"Agent received question (first 50 chars): {q[:50]}...")
26
  if files:
27
  print("Received files:", [f for f, _ in files])
28
+
29
+ op = self._guess_op(q) # "sum" | "min" | "max" | "avg" | None
30
+ col = self._guess_col(q) # tries to extract a column name
31
+
32
+ # ---------- FILES: Excel / CSV / TSV ----------
33
+ if files:
34
  for fname, blob in files:
35
  low = fname.lower()
36
+
37
+ # 1) Excel
38
+ if low.endswith((".xlsx", ".xls")):
39
+ try:
40
+ df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
41
+ # Special: "total sales from food (not including drinks)"
42
+ if self._asks_food_not_drinks(q):
43
+ total = self._sum_food_excluding_drinks(df)
44
+ if total is not None:
45
+ return f"{total:.2f}"
46
+ # Generic ops on a named column
47
+ if op and col:
48
+ val = self._op_on_column(df, col, op)
49
+ if val is not None:
50
+ return self._fmt(val)
51
+ except Exception as e:
52
+ print(f"Excel read failed for {fname}: {e}")
53
+
54
+ # 2) CSV/TSV
55
+ elif low.endswith(".csv") or low.endswith(".tsv"):
56
  sep = "\t" if low.endswith(".tsv") else ","
57
  try:
58
  df = pd.read_csv(io.BytesIO(blob), sep=sep)
59
+ # Special: "total sales from food (not including drinks)"
60
+ if self._asks_food_not_drinks(q):
61
+ total = self._sum_food_excluding_drinks(df)
62
+ if total is not None:
63
+ return f"{total:.2f}"
64
+ # Generic ops on a named column
65
+ if op and col:
66
+ val = self._op_on_column(df, col, op)
67
+ if val is not None:
68
+ return self._fmt(val)
69
  except Exception as e:
70
+ print(f"CSV/TSV read failed for {fname}: {e}")
71
+
72
+ # ---------- TEXT-ONLY FALLBACKS ----------
73
+ # trivial arithmetic like "what is 12 + 7"
74
+ m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
75
+ if not m:
76
+ m = re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
77
+ if m:
78
+ a, opchar, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
79
+ if opchar in ("x", "*"): return str(a * b)
80
+ if opchar == "+": return str(a + b)
81
+ if opchar == "-": return str(a - b)
82
+ if opchar == "/": return "undefined" if b == 0 else (str(int(a/b)) if (a/b).is_integer() else str(a/b))
83
 
84
+ # min/max/sum/avg over numbers present in the question itself
85
+ nums = [int(x) for x in re.findall(r"\b\d+\b", q)]
86
+ if nums:
87
+ if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return str(min(nums))
88
+ if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return str(max(nums))
89
+ if re.search(r"(?i)\b(sum|total|added up)\b", q): return str(sum(nums))
90
+ if re.search(r"(?i)\b(avg|average|mean)\b", q):
91
+ avg = sum(nums) / len(nums)
92
+ return str(int(avg)) if float(avg).is_integer() else f"{avg:.2f}"
93
 
94
+ # Final fallback
95
+ return "This is a default answer."
96
+
97
+ # ----------------- Helpers -----------------
98
  def _guess_op(self, q: str):
99
  if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
100
  if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
 
113
  wl = (wanted or "").lower()
114
  for c in cols:
115
  s = str(c)
116
+ if s.lower() == wl:
117
+ return s
118
  for c in cols:
119
  s = str(c)
120
+ if wl in s.lower():
121
+ return s
122
+ return None
123
+
124
+ def _op_on_column(self, df: pd.DataFrame, col: str, op: str):
125
+ target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
126
+ if not target:
127
+ return None
128
+ s = pd.to_numeric(df[target], errors="coerce").dropna()
129
+ if s.empty:
130
+ return None
131
+ if op == "sum": return s.sum()
132
+ if op == "min": return s.min()
133
+ if op == "max": return s.max()
134
+ if op == "avg": return s.mean()
135
  return None
136
 
137
+ def _asks_food_not_drinks(self, q: str) -> bool:
138
+ # matches questions like: "total sales from food (not including drinks)"
139
+ return bool(
140
+ re.search(r"(?i)total\s+sales.*food.*not.*drink", q) or
141
+ re.search(r"(?i)food.*not.*drink", q)
142
+ )
143
+
144
+ def _sum_food_excluding_drinks(self, df: pd.DataFrame):
145
+ # 1) Try find a categorical column
146
+ cat_col = None
147
+ for c in df.columns:
148
+ cl = str(c).lower()
149
+ if any(k in cl for k in ["category", "type", "group", "item", "product", "menu", "name"]):
150
+ cat_col = c
151
+ break
152
+
153
+ # 2) Try find a numeric money column
154
+ money_col = None
155
+ for c in df.columns:
156
+ cl = str(c).lower()
157
+ if any(k in cl for k in ["sales", "revenue", "amount", "total", "usd", "price", "value"]):
158
+ money_col = c
159
+ break
160
+ if money_col is None:
161
+ for c in df.columns:
162
+ s = pd.to_numeric(df[c], errors="coerce")
163
+ if s.notna().sum() >= max(3, int(0.5 * len(df))):
164
+ money_col = c
165
+ break
166
+ if money_col is None:
167
+ return None
168
+
169
+ mask = pd.Series([True] * len(df))
170
+ if cat_col is not None:
171
+ cats = df[cat_col].astype(str).str.lower()
172
+ exclude_words = ["drink", "beverage", "soda", "juice", "coffee", "tea", "cola", "water"]
173
+ include_words = ["food", "burger", "sandwich", "fries", "salad", "wrap", "meal",
174
+ "nugget", "chicken", "beef", "fish", "pizza", "dessert"]
175
+ ex_mask = cats.str.contains("|".join(exclude_words), na=False)
176
+ in_mask = cats.str.contains("|".join(include_words), na=False)
177
+ mask = (~ex_mask) & (in_mask | True) # keep non-drinks even if not explicitly labeled as food
178
+
179
+ s_money = pd.to_numeric(df[money_col], errors="coerce")
180
+ total = s_money[mask].dropna().sum()
181
+ return float(total) if pd.notna(total) else None
182
 
183
  def _fmt(self, x):
184
  try:
 
187
  except Exception:
188
  return str(x)
189
 
190
+
191
  def run_and_submit_all( profile: gr.OAuthProfile | None):
192
  """
193
  Fetches all questions, runs the BasicAgent on them, submits all answers,