lodist commited on
Commit
3de1995
·
verified ·
1 Parent(s): fac64b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -217
app.py CHANGED
@@ -3,230 +3,60 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import io
7
- import re
8
-
 
9
 
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # --- Basic Agent Definition ---
15
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
16
  class BasicAgent:
17
- def __init__(self, tools: dict | None = None):
18
- self.tools = tools or {}
19
- try:
20
- from duckduckgo_search import DDGS
21
- self.tools["web_search"] = DDGS()
22
- except Exception:
23
- self.tools["web_search"] = None
24
- try:
25
- import wikipedia
26
- self.tools["wikipedia"] = wikipedia
27
- wikipedia.set_lang("en")
28
- except Exception:
29
- self.tools["wikipedia"] = None
30
- try:
31
- from PIL import Image
32
- self.tools["pil"] = Image
33
- except Exception:
34
- self.tools["pil"] = None
35
- try:
36
- import pytesseract
37
- self.tools["ocr"] = pytesseract
38
- except Exception:
39
- self.tools["ocr"] = None
40
- print("BasicAgent initialized with tools:", {k: bool(v) for k, v in self.tools.items()})
41
-
42
- def __call__(self, question: str, files=None) -> str:
43
- q = (question or "").strip()
44
- print(f"[Agent] Q: {q[:120]}{'...' if len(q) > 120 else ''}")
45
- file_ctx = self._ingest_files(files or [])
46
-
47
- handlers = [
48
- self._handle_arithmetic,
49
- self._handle_inline_aggregate_numbers,
50
- self._handle_reversed_text,
51
- self._handle_csv_xls_ops,
52
- self._handle_ocr_if_requested,
53
- self._handle_wikipedia_if_requested,
54
- self._handle_web_search_if_requested,
55
- ]
56
- for h in handlers:
57
- try:
58
- ans = h(q, file_ctx)
59
- if ans is not None:
60
- return self._fmt(ans)
61
- except Exception as e:
62
- print(f"[Agent] Handler {h.__name__} error: {e}")
63
-
64
- return "This is a default answer."
65
-
66
- def _ingest_files(self, files):
67
- ctx = {"tabular": [], "images": [], "text": []}
68
- for fname, blob in files:
69
- low = (fname or "").lower()
70
- if low.endswith((".csv", ".tsv")):
71
- sep = "\t" if low.endswith(".tsv") else ","
72
- try:
73
- df = pd.read_csv(io.BytesIO(blob), sep=sep)
74
- ctx["tabular"].append((fname, df))
75
- except Exception as e:
76
- print(f"[Agent] CSV read failed for {fname}: {e}")
77
- elif low.endswith((".xlsx", ".xls")):
78
- try:
79
- df = pd.read_excel(io.BytesIO(blob), sheet_name=0, engine=None)
80
- ctx["tabular"].append((fname, df))
81
- except Exception as e:
82
- print(f"[Agent] Excel read failed for {fname}: {e}")
83
- elif low.endswith((".png", ".jpg", ".jpeg", ".bmp", ".webp", ".gif")):
84
- ctx["images"].append((fname, blob))
85
- elif low.endswith((".txt", ".md", ".json")):
86
- try:
87
- text = io.BytesIO(blob).read().decode("utf-8", errors="replace")
88
- ctx["text"].append((fname, text))
89
- except Exception as e:
90
- print(f"[Agent] Text read failed for {fname}: {e}")
91
- return ctx
92
-
93
- def _handle_arithmetic(self, q, _):
94
- m = re.search(r"(?i)\bwhat\s+is\s+(\d+)\s*([+\-*/x])\s*(\d+)\b", q) or re.search(r"\b(\d+)\s*([+\-*/x])\s*(\d+)\b", q)
95
- if not m: return None
96
- a, opch, b = int(m.group(1)), m.group(2).lower(), int(m.group(3))
97
- if opch in ("x", "*"): return a * b
98
- if opch == "+": return a + b
99
- if opch == "-": return a - b
100
- if opch == "/": return "undefined" if b == 0 else (int(a/b) if (a/b).is_integer() else a/b)
101
-
102
- def _handle_inline_aggregate_numbers(self, q, _):
103
- nums = [float(x) for x in re.findall(r"\b\d+(?:\.\d+)?\b", q)]
104
- if not nums: return None
105
- if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return min(nums)
106
- if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return max(nums)
107
- if re.search(r"(?i)\b(sum|total|added up)\b", q): return sum(nums)
108
- if re.search(r"(?i)\b(avg|average|mean)\b", q):
109
- avg = sum(nums)/len(nums)
110
- return int(avg) if float(avg).is_integer() else round(avg, 2)
111
-
112
- def _handle_reversed_text(self, q, _):
113
- if re.search(r"(?i)\b(tfel)\b|^[^a-zA-Z]*[\w\W]{3,}$", q) and self._looks_reversed(q):
114
- return self._reverse_text(q)
115
- return None
116
-
117
- def _looks_reversed(self, s: str) -> bool:
118
- words = re.findall(r"[A-Za-z]{3,}", s)
119
- if not words: return False
120
- rev_like = sum(1 for w in words if w[::-1].lower() in s.lower())
121
- return rev_like >= max(1, len(words)//4)
122
-
123
- def _reverse_text(self, s: str) -> str:
124
- return "".join(list(s))[::-1]
125
-
126
- def _handle_csv_xls_ops(self, q, ctx):
127
- if not ctx["tabular"]: return None
128
- op = self._guess_op(q)
129
- col = self._guess_col(q)
130
- if not op or not col: return None
131
- for fname, df in ctx["tabular"]:
132
- target = col if col in df.columns else self._fuzzy_find_col(col, df.columns)
133
- if not target: continue
134
- s = pd.to_numeric(df[target], errors="coerce").dropna()
135
- if s.empty: continue
136
- if op == "sum": return s.sum()
137
- if op == "min": return s.min()
138
- if op == "max": return s.max()
139
- if op == "avg": return s.mean()
140
- return None
141
-
142
- def _handle_ocr_if_requested(self, q, ctx):
143
- if not re.search(r"(?i)\b(ocr|read|extract)\b.*\b(text|from image)\b", q): return None
144
- if not ctx["images"] or not self.tools.get("pil") or not self.tools.get("ocr"): return None
145
- Image = self.tools["pil"]; pytesseract = self.tools["ocr"]
146
- texts = []
147
- for fname, blob in ctx["images"]:
148
- try:
149
- img = Image.open(io.BytesIO(blob))
150
- txt = pytesseract.image_to_string(img)
151
- if txt.strip():
152
- texts.append(f"[{fname}]\n{txt.strip()}")
153
- except Exception as e:
154
- print(f"[Agent] OCR failed {fname}: {e}")
155
- return "\n\n".join(texts) if texts else None
156
 
157
- def _handle_wikipedia_if_requested(self, q, _):
158
- if not self.tools.get("wikipedia"): return None
159
- if not re.search(r"(?i)\b(wikipedia|on wikipedia)\b", q) and not re.search(r"(?i)\b(who|what|when|how many|where)\b", q):
160
- return None
161
- try:
162
- wikipedia = self.tools["wikipedia"]
163
- topic = q
164
- m = re.search(r"(?i)\babout\s+(.+)$", q)
165
- if m: topic = m.group(1)
166
- m2 = re.search(r"(?i)\bof\s+(.+?)\??$", q)
167
- if m2: topic = m2.group(1)
168
- topic = re.sub(r"(?i)\bon wikipedia\b", "", topic).strip()
169
- page = wikipedia.page(topic, auto_suggest=True, redirect=True)
170
- summary = wikipedia.summary(page.title, sentences=3)
171
- if re.search(r"(?i)\bhow many\b.*\b(\d{4}).*(\d{4})", q):
172
- years = list(map(int, re.findall(r"\b\d{4}\b", q)))
173
- years.sort()
174
- text = page.content
175
- hits = re.findall(r"\b(19|20)\d{2}\b", text)
176
- nums = [int(h[0] + text[i+1:i+3]) for i, h in enumerate(re.finditer(r"\b(19|20)\d{2}\b", text))]
177
- count = sum(1 for y in nums if years[0] <= y <= years[-1])
178
- return f"{count}"
179
- return summary
180
- except Exception as e:
181
- print(f"[Agent] Wikipedia error: {e}")
182
- return None
183
 
184
- def _handle_web_search_if_requested(self, q, _):
185
- if not self.tools.get("web_search"): return None
186
- if not re.search(r"(?i)\b(search|latest|news|find|look up)\b", q): return None
187
- try:
188
- ddgs = self.tools["web_search"]
189
- results = list(ddgs.text(q, max_results=5))
190
- if not results: return None
191
- best = results[0]
192
- pieces = [p for p in [best.get("title"), best.get("body"), best.get("href")] if p]
193
- return " ".join(pieces)
194
- except Exception as e:
195
- print(f"[Agent] web search error: {e}")
196
- return None
197
-
198
- def _guess_op(self, q: str):
199
- if re.search(r"(?i)\b(sum|total|added up)\b", q): return "sum"
200
- if re.search(r"(?i)\b(min|minimum|smallest|lowest)\b", q): return "min"
201
- if re.search(r"(?i)\b(max|maximum|largest|highest)\b", q): return "max"
202
- if re.search(r"(?i)\b(avg|average|mean)\b", q): return "avg"
203
- return None
204
-
205
- def _guess_col(self, q: str):
206
- m = re.search(r"(?i)\bof\s+([A-Za-z0-9_]+)\b", q)
207
- if m: return m.group(1)
208
- m = re.search(r"[\"']([A-Za-z0-9_ ]+)[\"']", q)
209
- if m: return m.group(1).strip()
210
- return None
211
-
212
- def _fuzzy_find_col(self, wanted: str, cols) -> str | None:
213
- wl = (wanted or "").lower()
214
- for c in cols:
215
- s = str(c)
216
- if s.lower() == wl: return s
217
- for c in cols:
218
- s = str(c)
219
- if wl in s.lower(): return s
220
- return None
221
-
222
- def _fmt(self, x):
223
- if isinstance(x, (int,)):
224
- return str(x)
225
- try:
226
- fx = float(x)
227
- return str(int(fx)) if fx.is_integer() else str(fx)
228
- except Exception:
229
- return str(x)
230
 
231
 
232
 
@@ -310,7 +140,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
310
 
311
  try:
312
  # pass files to the agent
313
- submitted_answer = agent(question_text, files=file_blobs)
314
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
315
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
316
  except Exception as e:
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, PythonInterpreterTool, OpenAIServerModel
7
+ from agentsTools.toolVisitWebpage import visit_webpage
8
+ from agentsTools.tool_fetch_task_file import fetch_task_file
9
+ from agentsTools.tool_read_excel_as_json import read_excel_as_json
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ def get_gemini_model():
16
+ return OpenAIServerModel(
17
+ model_id="gemini-2.5-pro-preview-03-25",
18
+ api_base="https://generativelanguage.googleapis.com/v1beta/",
19
+ api_key=os.getenv("GEMINI_API_KEY_1", "your-gemini-api-key-here")
20
+ )
21
+
22
+ MODEL = get_gemini_model()
23
+
24
  class BasicAgent:
25
+ def __init__(self):
26
+ self.agent = ToolCallingAgent(
27
+ tools=[
28
+ DuckDuckGoSearchTool(),
29
+ PythonInterpreterTool(),
30
+ visit_webpage,
31
+ fetch_task_file, # fetches byte content via task_id + filename
32
+ read_excel_as_json, # parses excel content into JSON
33
+ ],
34
+ model=MODEL,
35
+ max_steps=10
36
+ )
37
+ print("BasicAgent initialized (smolagents).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ def __call__(self, question: str, taskid: str) -> str:
40
+ prompt = f"""
41
+ You are a general AI assistant.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ You may use up to 8 tool calls. Prefer Wikipedia when multiple links are available.
44
+ If a page blocks access, skip it.
45
+
46
+ You MUST end your output with:
47
+ FINAL ANSWER: <concise result here>
48
+
49
+ Rules for FINAL ANSWER:
50
+ - If number: no commas for thousands, no units unless explicitly requested.
51
+ - If string: no articles, no abbreviations (write city/country names fully), digits in plain text unless specified.
52
+ - If comma-separated list: apply the above rules for each element.
53
+
54
+ TaskId to use for any file fetching: {taskid}
55
+ Question: {question}
56
+ """
57
+ result = self.agent.run(prompt)
58
+ print(f"[BasicAgent] FINAL ANSWER {result[:250]}")
59
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
 
 
140
 
141
  try:
142
  # pass files to the agent
143
+ submitted_answer = agent(question_text, task_id)
144
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
145
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
146
  except Exception as e: