Vinsmart06 commited on
Commit
7fda886
·
verified ·
1 Parent(s): c17808b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -49
app.py CHANGED
@@ -121,23 +121,32 @@ class BasicAgent:
121
  return None
122
 
123
  # --- Robust Wikipedia Tool ---
124
- def wiki_search(self, query):
125
  try:
126
- # Clean common AI hallucinations from the query
127
  query = query.strip(' "').replace('TOOL:', '').replace('INPUT:', '')
128
- url = f"https://en.wikipedia.org{query}&format=json"
129
- r = requests.get(url, timeout=10).json()
 
 
 
130
  if not r.get("query", {}).get("search"):
131
- return "No results found. Try a broader search term."
 
 
 
 
 
132
 
133
- title = r["query"]["search"][0]["title"]
134
- # Fetch the actual content summary
135
  summary_url = f"https://en.wikipedia.org{title.replace(' ', '_')}"
136
- page = requests.get(summary_url, timeout=10).json()
137
- return page.get("extract", "No summary available.")
138
- except:
139
- return "Wikipedia access error."
140
-
 
 
141
  def youtube_captions(self, url):
142
  try:
143
  ydl_opts = {"skip_download": True, "writesubtitles": True, "writeautomaticsub": True}
@@ -149,24 +158,31 @@ class BasicAgent:
149
  return f"YouTube error: {e}"
150
 
151
  def execute_tool(self, tool, input_data, file_url):
152
- # Clean input_data (remove extra quotes or labels)
153
  input_data = input_data.strip(' "')
154
-
155
- if tool == "read_image" or tool == "read_excel":
156
- # GAIA Fix: If agent says "none", use the file_url provided by the system
157
- target = file_url if (not input_data or input_data.lower() == "none") else input_data
158
- if not target: return "Error: No file provided."
159
-
160
  try:
161
  r = requests.get(target, timeout=15)
162
  with open("temp_file", "wb") as f: f.write(r.content)
163
-
164
  if tool == "read_image":
165
  return pytesseract.image_to_string(Image.open("temp_file"))
166
- else:
167
- return pd.read_excel("temp_file").to_string()[:5000]
168
  except Exception as e:
169
- return f"File tool error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  if tool == "wiki_search": return self.wiki_search(input_data)
172
 
@@ -200,45 +216,49 @@ class BasicAgent:
200
 
201
  def agent_loop(self, question, file_url):
202
  memory = ""
203
- # Explicitly tell the agent about the file
204
- context = f"A file for this task is located at: {file_url}" if file_url else "No file attached."
205
 
206
  for step in range(5):
207
- prompt = f"""You are a GAIA solver.
208
- Available tools: read_excel, read_image, wiki_search, calculator.
209
  {context}
210
 
211
  Question: {question}
212
  History: {memory}
213
 
214
- If you have the answer, respond with FINAL: [answer].
215
- If you need to use a tool, respond with:
216
- TOOL: [tool_name]
217
- INPUT: [input_data]"""
 
218
 
219
  response = self.client.chat.completions.create(
220
  model="gpt-4o-mini",
221
- temperature=0, # Crucial for accuracy
222
- messages=[{"role": "system", "content": "You are a precise agent. For 'opposite' questions, think carefully. For files, use the provided URL."},
223
  {"role": "user", "content": prompt}]
224
  )
225
 
226
- resp_text = response.choices[0].message.content.strip()
227
- print(f"Step {step}: {resp_text}")
228
-
229
- if "FINAL:" in resp_text:
230
- return resp_text.split("FINAL:")[-1].strip()
231
-
232
- if "TOOL:" in resp_text and "INPUT:" in resp_text:
233
- try:
234
- tool_name = re.search(r"TOOL:\s*(.*)", resp_text).group(1).split('\n')[0].strip()
235
- tool_input = re.search(r"INPUT:\s*(.*)", resp_text).group(1).strip()
236
- result = self.execute_tool(tool_name, tool_input, file_url)
237
- memory += f"\n- {tool_name} output: {result[:1000]}"
238
- except:
239
- memory += "\n- Failed to parse tool call."
240
- else:
241
- memory += f"\n- {resp_text}"
 
 
 
 
242
 
243
  return "No answer found."
244
 
 
121
  return None
122
 
123
  # --- Robust Wikipedia Tool ---
124
+ def wiki_search(self, query):
125
  try:
126
+ # Clean the query
127
  query = query.strip(' "').replace('TOOL:', '').replace('INPUT:', '')
128
+ # Step 1: Search for the page
129
+ search_url = "https://en.wikipedia.org"
130
+ params = {"action": "query", "list": "search", "srsearch": query, "format": "json"}
131
+ r = requests.get(search_url, params=params, timeout=10).json()
132
+
133
  if not r.get("query", {}).get("search"):
134
+ return "No results. Try simpler keywords."
135
+
136
+ # Step 2: Get the top result's snippet and title
137
+ best_result = r["query"]["search"][0]
138
+ title = best_result["title"]
139
+ snippet = BeautifulSoup(best_result["snippet"], "html.parser").get_text()
140
 
141
+ # Step 3: Get the page summary content
 
142
  summary_url = f"https://en.wikipedia.org{title.replace(' ', '_')}"
143
+ sum_r = requests.get(summary_url, timeout=10).json()
144
+ extract = sum_r.get("extract", snippet)
145
+
146
+ return f"Source: {title}\nContent: {extract}"
147
+ except Exception as e:
148
+ return f"Wiki error: {str(e)}"
149
+
150
  def youtube_captions(self, url):
151
  try:
152
  ydl_opts = {"skip_download": True, "writesubtitles": True, "writeautomaticsub": True}
 
158
  return f"YouTube error: {e}"
159
 
160
  def execute_tool(self, tool, input_data, file_url):
 
161
  input_data = input_data.strip(' "')
162
+
163
+ # Tool: Image or Excel
164
+ if tool in ["read_image", "read_excel"]:
165
+ target = file_url if (not input_data or "http" not in input_data) else input_data
166
+ if not target: return "Error: No file available for this task."
 
167
  try:
168
  r = requests.get(target, timeout=15)
169
  with open("temp_file", "wb") as f: f.write(r.content)
 
170
  if tool == "read_image":
171
  return pytesseract.image_to_string(Image.open("temp_file"))
172
+ return pd.read_excel("temp_file").to_string()[:5000]
 
173
  except Exception as e:
174
+ return f"File error: {str(e)}"
175
+
176
+ if tool == "wiki_search":
177
+ return self.wiki_search(input_data)
178
+
179
+ if tool == "calculator":
180
+ try:
181
+ expr = re.sub(r'[^\d\+\-\*\/\(\)\.]', '', input_data)
182
+ return str(eval(expr, {"__builtins__": {}}))
183
+ except: return "Math error."
184
+
185
+ return f"Tool {tool} not recognized."
186
 
187
  if tool == "wiki_search": return self.wiki_search(input_data)
188
 
 
216
 
217
  def agent_loop(self, question, file_url):
218
  memory = ""
219
+ context = f"File URL: {file_url}" if file_url else "No file provided."
 
220
 
221
  for step in range(5):
222
+ prompt = f"""You are a GAIA solver. Use tools only when necessary.
223
+ Available tools: wiki_search, read_image, read_excel, calculator.
224
  {context}
225
 
226
  Question: {question}
227
  History: {memory}
228
 
229
+ Output Format:
230
+ TOOL: tool_name
231
+ INPUT: tool_input
232
+ OR
233
+ FINAL: your_answer"""
234
 
235
  response = self.client.chat.completions.create(
236
  model="gpt-4o-mini",
237
+ temperature=0,
238
+ messages=[{"role": "system", "content": "Be concise. If you see a file URL in context, use it for 'read' tools."},
239
  {"role": "user", "content": prompt}]
240
  )
241
 
242
+ resp = response.choices[0].message.content.strip()
243
+ print(f"Step {step}: {resp}")
244
+
245
+ if "FINAL:" in resp:
246
+ return resp.split("FINAL:")[-1].strip()
247
+
248
+ # Robust Tool Parsing
249
+ try:
250
+ t_match = re.search(r"TOOL:\s*(.*)", resp, re.I)
251
+ i_match = re.search(r"INPUT:\s*(.*)", resp, re.I)
252
+
253
+ if t_match and i_match:
254
+ t_name = t_match.group(1).strip().lower()
255
+ t_input = i_match.group(1).strip()
256
+ result = self.execute_tool(t_name, t_input, file_url)
257
+ memory += f"\nStep {step} {t_name} output: {result[:800]}"
258
+ else:
259
+ memory += f"\nStep {step} info: {resp}"
260
+ except:
261
+ memory += f"\nStep {step}: Parsing error."
262
 
263
  return "No answer found."
264