thinhbtt commited on
Commit
c3887a9
·
verified ·
1 Parent(s): e3e2f42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +353 -126
app.py CHANGED
@@ -1,107 +1,353 @@
 
1
  import os
2
- import gradio as gr
 
 
 
3
  import requests
4
  import pandas as pd
5
- import re
6
 
7
- # -----------------------------------------------
8
- # CONSTANTS
9
- # -----------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
-
12
-
13
- # -----------------------------------------------
14
- # BASIC RULE-BASED AGENT (Không dùng OpenAI)
15
- # -----------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class BasicAgent:
17
  def __init__(self):
18
- print("Rule-Based Agent initialized.")
 
19
 
20
- # ---------- 1. Solve math expressions ----------
21
  def solve_math(self, text):
22
- # detect simple arithmetic 1+2, 5*7, 10/2...
23
  expr = re.findall(r"[-+]?\d+\.?\d*|\+|\-|\*|\/", text)
 
24
  if len(expr) >= 3:
25
  try:
26
- result = eval("".join(expr))
27
- if isinstance(result, float) and result.is_integer():
28
- result = int(result)
29
- return str(result)
30
- except:
 
 
 
 
31
  return None
32
  return None
33
 
34
- # ---------- 2. Count characters inside quotes ----------
35
  def solve_counting(self, text):
36
- m = re.search(r'"(.*?)"', text)
 
37
  if m:
38
  return str(len(m.group(1)))
 
 
 
 
 
 
 
39
  return None
40
 
41
- # ---------- 3. If question asks for “how many words” ----------
42
- def solve_word_count(self, text):
43
- m = re.search(r'count the words in "(.*?)"', text.lower())
44
- if m:
45
- return str(len(m.group(1).split()))
46
- return None
47
-
48
- # ---------- 4. Simple factual patterns ----------
49
- def solve_simple_fact(self, text):
50
- text_lower = text.lower()
51
-
52
- if "capital of france" in text_lower:
53
  return "Paris"
54
- if "capital of japan" in text_lower:
55
  return "Tokyo"
56
- if "pi to 2 decimals" in text_lower:
57
  return "3.14"
58
-
 
 
 
 
59
  return None
60
 
61
- # ---------- MAIN CALL ----------
62
- def __call__(self, question: str) -> str:
63
- print(f"Agent solving: {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
65
  # 1. math
66
- ans = self.solve_math(question)
67
  if ans:
68
- print("→ Math solved:", ans)
69
  return ans
70
-
71
- # 2. char counting
72
- ans = self.solve_counting(question)
73
  if ans:
74
- print("→ Counting solved:", ans)
75
  return ans
76
-
77
- # 3. word counting
78
- ans = self.solve_word_count(question)
79
  if ans:
80
- print("→ Word count solved:", ans)
81
  return ans
82
-
83
- # 4. simple fact patterns
84
- ans = self.solve_simple_fact(question)
85
  if ans:
86
- print("→ Fact solved:", ans)
87
  return ans
88
-
89
- # default fallback
90
- print("→ No rule matched → returning fallback")
91
  return "unknown"
92
 
93
-
94
- # ---------------------------------------------------------
95
- # SUBMISSION + UI CODE (giữ nguyên, không chỉnh sửa)
96
- # ---------------------------------------------------------
97
  def run_and_submit_all(profile: gr.OAuthProfile | None):
98
- space_id = os.getenv("SPACE_ID")
99
-
 
 
 
100
  if profile:
101
  username = f"{profile.username}"
102
- print(f"User logged in: {username}")
103
  else:
104
- print("User not logged in.")
105
  return "Please Login to Hugging Face with the button.", None
106
 
107
  api_url = DEFAULT_API_URL
@@ -115,93 +361,74 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
115
  return f"Error initializing agent: {e}", None
116
 
117
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
118
- print(agent_code)
119
 
120
  # Fetch Questions
121
  try:
122
- response = requests.get(questions_url, timeout=15)
123
- response.raise_for_status()
124
- questions_data = response.json()
125
- print(f"Fetched {len(questions_data)} questions.")
 
126
  except Exception as e:
127
  return f"Error fetching questions: {e}", None
128
 
129
- # Run Agent
130
  results_log = []
131
  answers_payload = []
132
-
133
  for item in questions_data:
134
  task_id = item.get("task_id")
135
- qtext = item.get("question")
136
-
137
- if not task_id or qtext is None:
138
  continue
139
-
140
  try:
141
- submitted_answer = agent(qtext)
142
- answers_payload.append({
143
- "task_id": task_id,
144
- "submitted_answer": submitted_answer
145
- })
146
-
147
- results_log.append({
148
- "Task ID": task_id,
149
- "Question": qtext,
150
- "Submitted Answer": submitted_answer
151
- })
152
  except Exception as e:
153
- results_log.append({
154
- "Task ID": task_id,
155
- "Question": qtext,
156
- "Submitted Answer": f"ERROR: {e}"
157
- })
158
-
159
- # Submit
160
- submission_data = {
161
- "username": username,
162
- "agent_code": agent_code,
163
- "answers": answers_payload
164
- }
165
 
166
- try:
167
- response = requests.post(submit_url, json=submission_data, timeout=60)
168
- response.raise_for_status()
169
- result_data = response.json()
170
 
 
 
 
 
171
  final_status = (
172
- f"Submission Successful!\n"
173
- f"User: {result_data.get('username')}\n"
174
- f"Score: {result_data.get('score')}% "
 
175
  )
176
-
177
  return final_status, pd.DataFrame(results_log)
178
-
 
 
 
 
 
 
179
  except Exception as e:
180
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
181
 
182
-
183
- # ---------------------------
184
- # GRADIO UI
185
- # ---------------------------
186
  with gr.Blocks() as demo:
187
- gr.Markdown("# Basic Agent Evaluation Runner")
188
-
189
- gr.Markdown("""
190
- **Instructions:**
191
- 1. Duplicate this space.
192
- 2. Modify your agent's logic in the BasicAgent class only.
193
- 3. Login to HuggingFace.
194
- 4. Press Run Evaluation & Submit.
195
- """)
196
-
197
  gr.LoginButton()
198
-
199
  run_button = gr.Button("Run Evaluation & Submit All Answers")
200
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5)
201
- results_table = gr.DataFrame(label="Questions and Agent Answers")
202
-
203
- run_button.click(run_and_submit_all, outputs=[status_output, results_table])
204
 
 
205
 
206
  if __name__ == "__main__":
207
- demo.launch()
 
1
+ # app.py (FULL - Rule-based Level 2 Agent using Wikipedia + file reading + heuristics)
2
  import os
3
+ import re
4
+ import io
5
+ import time
6
+ import json
7
  import requests
8
  import pandas as pd
9
+ import gradio as gr
10
 
11
+ # optional imports; agent works without them but will use if available
12
+ try:
13
+ from bs4 import BeautifulSoup
14
+ except Exception:
15
+ BeautifulSoup = None
16
+
17
+ try:
18
+ import PyPDF2
19
+ except Exception:
20
+ PyPDF2 = None
21
+
22
+ try:
23
+ from PIL import Image
24
+ import pytesseract
25
+ except Exception:
26
+ Image = None
27
+ pytesseract = None
28
+
29
+ # ------------------------------
30
+ # Constants
31
+ # ------------------------------
32
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
33
+ WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php"
34
+ USER_AGENT = {"User-Agent": "HF-GAIA-Agent/1.0 (contact: you@example.com)"}
35
+
36
+ # ------------------------------
37
+ # Utility functions
38
+ # ------------------------------
39
+ def extract_numbers(text):
40
+ """Return list of numeric strings found in text (integers or floats)."""
41
+ if not text:
42
+ return []
43
+ nums = re.findall(r"\d{1,4}(?:,\d{3})*(?:\.\d+)?|\d+\.\d+|\d+", text.replace("\xa0", " "))
44
+ # normalize commas
45
+ clean = [n.replace(",", "") for n in nums]
46
+ return clean
47
+
48
+ def simple_normalize(s):
49
+ return re.sub(r"\s+", " ", (s or "").strip()).lower()
50
+
51
+ def wikipedia_search_first_page(query):
52
+ """Search wikipedia and return first page title or None."""
53
+ params = {
54
+ "action": "query",
55
+ "list": "search",
56
+ "srsearch": query,
57
+ "format": "json",
58
+ "srlimit": 3,
59
+ }
60
+ try:
61
+ r = requests.get(WIKIPEDIA_API, params=params, headers=USER_AGENT, timeout=10)
62
+ r.raise_for_status()
63
+ data = r.json()
64
+ hits = data.get("query", {}).get("search", [])
65
+ if hits:
66
+ return hits[0].get("title")
67
+ except Exception:
68
+ return None
69
+ return None
70
+
71
+ def wikipedia_get_extract(title):
72
+ """Return extract (plain text) for a page title."""
73
+ params = {
74
+ "action": "query",
75
+ "prop": "extracts",
76
+ "explaintext": 1,
77
+ "titles": title,
78
+ "format": "json",
79
+ "redirects": 1,
80
+ }
81
+ try:
82
+ r = requests.get(WIKIPEDIA_API, params=params, headers=USER_AGENT, timeout=10)
83
+ r.raise_for_status()
84
+ data = r.json()
85
+ pages = data.get("query", {}).get("pages", {})
86
+ for pid, page in pages.items():
87
+ return page.get("extract", "")
88
+ except Exception:
89
+ return ""
90
+ return ""
91
+
92
+ def wiki_try_find_number(question):
93
+ """
94
+ Heuristic: attempt to craft a search query from question and find numeric answers in page extracts.
95
+ Returns a candidate numeric string or None.
96
+ """
97
+ q = question
98
+ # remove leading "How many" etc to get search hint
99
+ search_hint = q
100
+ search_hint = re.sub(r"(?i)how many|between.*|from.*to.*|included.*|in the video.*", "", search_hint)
101
+ # fallback use whole q
102
+ title = wikipedia_search_first_page(search_hint)
103
+ if not title:
104
+ # try full question
105
+ title = wikipedia_search_first_page(q)
106
+ if not title:
107
+ return None
108
+ extract = wikipedia_get_extract(title)
109
+ if not extract:
110
+ return None
111
+ # first try: context windows where words from question appear
112
+ words = re.findall(r"[A-Za-z]{3,}", q)
113
+ words = [w.lower() for w in words][:6]
114
+ best_context = extract
115
+ # find sentences containing relevant keywords
116
+ sentences = re.split(r'(?<=[\.\?\!])\s+', extract)
117
+ candidate_nums = []
118
+ for s in sentences:
119
+ s_low = s.lower()
120
+ # prefer sentences that contain several words from question or the phrase 'studio album(s)' etc
121
+ score = sum(1 for w in words if w in s_low)
122
+ if score >= 1 or any(k in s_low for k in ["studio album", "album", "species", "population", "released", "released in"]):
123
+ nums = extract_numbers(s)
124
+ for n in nums:
125
+ candidate_nums.append((n, score, s.strip()))
126
+ if candidate_nums:
127
+ # sort by score and choose top numeric
128
+ candidate_nums.sort(key=lambda x: (x[1], len(x[2])), reverse=True)
129
+ return candidate_nums[0][0]
130
+ # fallback: any number in extract
131
+ all_nums = extract_numbers(extract)
132
+ if all_nums:
133
+ return all_nums[0]
134
+ return None
135
+
136
+ def fetch_file_text(api_url, task_id):
137
+ """Call GET /files/{task_id} to fetch file content if present.
138
+ Returns text or None.
139
+ """
140
+ try:
141
+ files_url = f"{api_url}/files/{task_id}"
142
+ r = requests.get(files_url, headers=USER_AGENT, timeout=15)
143
+ if r.status_code == 200:
144
+ content_type = r.headers.get("Content-Type", "")
145
+ # some endpoints may return raw text or JSON with 'content' and 'filename'
146
+ if "application/json" in content_type:
147
+ j = r.json()
148
+ # expecting {'filename': ..., 'content': '...'} maybe
149
+ if isinstance(j, dict):
150
+ if j.get("content"):
151
+ return j.get("content")
152
+ # else maybe direct text in 'text' field
153
+ if j.get("text"):
154
+ return j.get("text")
155
+ # else if it's list, return aggregated
156
+ if isinstance(j, list):
157
+ texts = []
158
+ for it in j:
159
+ if isinstance(it, dict) and "content" in it:
160
+ texts.append(it.get("content", ""))
161
+ return "\n".join(texts) if texts else None
162
+ # if raw PDF or binary
163
+ raw = r.content
164
+ # try to interpret as text
165
+ try:
166
+ text = raw.decode("utf-8")
167
+ # if readable, return
168
+ if len(text.strip()) > 20:
169
+ return text
170
+ except Exception:
171
+ pass
172
+ # try pdf via PyPDF2 if available
173
+ if PyPDF2 is not None:
174
+ try:
175
+ reader = PyPDF2.PdfReader(io.BytesIO(raw))
176
+ pages = []
177
+ for p in reader.pages:
178
+ try:
179
+ pages.append(p.extract_text() or "")
180
+ except Exception:
181
+ continue
182
+ return "\n".join(pages).strip() or None
183
+ except Exception:
184
+ pass
185
+ # lastly if image and pytesseract available
186
+ if Image is not None and pytesseract is not None:
187
+ try:
188
+ img = Image.open(io.BytesIO(raw))
189
+ txt = pytesseract.image_to_string(img)
190
+ return txt
191
+ except Exception:
192
+ pass
193
+ except Exception:
194
+ pass
195
+ return None
196
+
197
+ def youtube_oembed_title_desc(url):
198
+ """Try to get title/description using oembed """
199
+ try:
200
+ oembed_url = "https://www.youtube.com/oembed"
201
+ r = requests.get(oembed_url, params={"url": url, "format": "json"}, headers=USER_AGENT, timeout=10)
202
+ if r.status_code == 200:
203
+ j = r.json()
204
+ title = j.get("title", "")
205
+ # description often not present in oembed; return title
206
+ return title
207
+ except Exception:
208
+ pass
209
+ # try noembed
210
+ try:
211
+ r = requests.get("https://noembed.com/embed", params={"url": url}, headers=USER_AGENT, timeout=10)
212
+ if r.status_code == 200:
213
+ j = r.json()
214
+ return j.get("title", "") + " " + (j.get("description") or "")
215
+ except Exception:
216
+ pass
217
+ return ""
218
+
219
+ # ------------------------------
220
+ # Agent
221
+ # ------------------------------
222
  class BasicAgent:
223
  def __init__(self):
224
+ print("Level-2 Rule Agent initialized (wiki + file tools).")
225
+ self.api_url = DEFAULT_API_URL
226
 
 
227
  def solve_math(self, text):
 
228
  expr = re.findall(r"[-+]?\d+\.?\d*|\+|\-|\*|\/", text)
229
+ # if pattern like "What is 12 + 5?" or "12 + 5 = ?"
230
  if len(expr) >= 3:
231
  try:
232
+ # join tokens but ensure it's a safe expression (only digits and ops)
233
+ safe = "".join(expr)
234
+ # limit length
235
+ if len(safe) < 100:
236
+ res = eval(safe)
237
+ if isinstance(res, float) and res.is_integer():
238
+ res = int(res)
239
+ return str(res)
240
+ except Exception:
241
  return None
242
  return None
243
 
 
244
  def solve_counting(self, text):
245
+ # detect patterns like 'how many characters in "..."' or 'How many words in "..."'
246
+ m = re.search(r'how many characters in\s*"(.*?)"', text, re.I)
247
  if m:
248
  return str(len(m.group(1)))
249
+ m2 = re.search(r'how many words in\s*"(.*?)"', text, re.I)
250
+ if m2:
251
+ return str(len(m2.group(1).split()))
252
+ # generic quoted count
253
+ m3 = re.search(r'"(.*?)"', text)
254
+ if m3 and ("characters" in text.lower() or "how many" in text.lower()):
255
+ return str(len(m3.group(1)))
256
  return None
257
 
258
+ def solve_simple_facts(self, text):
259
+ t = text.lower()
260
+ # trivial mappings
261
+ if "capital of france" in t:
 
 
 
 
 
 
 
 
262
  return "Paris"
263
+ if "capital of japan" in t:
264
  return "Tokyo"
265
+ if "pi to 2 decimals" in t or "pi to 2 decimal" in t:
266
  return "3.14"
267
+ # reversed sentence detection (some GAIA items)
268
+ if re.search(r'\bfi\b$', text.strip(), re.I) or ("reverse" in t and "text" in t):
269
+ # try a simple reverse of words if that seems to be asked
270
+ words = text.split()
271
+ return " ".join(w[::-1] for w in words)
272
  return None
273
 
274
+ def solve_with_wikipedia(self, question, task_id=None):
275
+ """Try to use wiki heuristics to find numeric answers."""
276
+ try:
277
+ # If the question contains a specific URL to analyze (youtube), handle that
278
+ m = re.search(r'https?://[^\s]+', question)
279
+ if m:
280
+ url = m.group(0)
281
+ # youtube special handling
282
+ if "youtube.com" in url or "youtu.be" in url:
283
+ txt = youtube_oembed_title_desc(url)
284
+ if txt:
285
+ nums = extract_numbers(txt)
286
+ if nums:
287
+ return nums[0]
288
+ # if other URL, try fetching content
289
+ try:
290
+ r = requests.get(url, headers=USER_AGENT, timeout=10)
291
+ r.raise_for_status()
292
+ page_text = r.text
293
+ nums = extract_numbers(page_text)
294
+ if nums:
295
+ return nums[0]
296
+ except Exception:
297
+ pass
298
+
299
+ # try to fetch possible file text via dataset files route (some tasks have attachments)
300
+ if task_id:
301
+ file_text = fetch_file_text(self.api_url, task_id)
302
+ if file_text:
303
+ n = extract_numbers(file_text)
304
+ if n:
305
+ return n[0]
306
+ # else attempt basic string match
307
+ return None
308
+
309
+ # else try wikipedia search heuristics
310
+ candidate = wiki_try_find_number(question)
311
+ if candidate:
312
+ return candidate
313
+ except Exception:
314
+ return None
315
+ return None
316
 
317
+ def __call__(self, question: str, task_id: str = None) -> str:
318
+ q = question or ""
319
+ print("Solving question:", q[:80].replace("\n", " ") + "...")
320
  # 1. math
321
+ ans = self.solve_math(q)
322
  if ans:
 
323
  return ans
324
+ # 2. counting
325
+ ans = self.solve_counting(q)
 
326
  if ans:
 
327
  return ans
328
+ # 3. trivial facts / simple patterns
329
+ ans = self.solve_simple_facts(q)
 
330
  if ans:
 
331
  return ans
332
+ # 4. wikipedia / files / url heuristics
333
+ ans = self.solve_with_wikipedia(q, task_id=task_id)
 
334
  if ans:
 
335
  return ans
336
+ # fallback
 
 
337
  return "unknown"
338
 
339
+ # ------------------------------
340
+ # Submission runner
341
+ # ------------------------------
 
342
  def run_and_submit_all(profile: gr.OAuthProfile | None):
343
+ """
344
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
345
+ and displays the results.
346
+ """
347
+ space_id = os.getenv("SPACE_ID") or "unknown-space"
348
  if profile:
349
  username = f"{profile.username}"
 
350
  else:
 
351
  return "Please Login to Hugging Face with the button.", None
352
 
353
  api_url = DEFAULT_API_URL
 
361
  return f"Error initializing agent: {e}", None
362
 
363
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
364
 
365
  # Fetch Questions
366
  try:
367
+ r = requests.get(questions_url, headers=USER_AGENT, timeout=15)
368
+ r.raise_for_status()
369
+ questions_data = r.json()
370
+ if not isinstance(questions_data, list):
371
+ return "Questions endpoint returned invalid format.", None
372
  except Exception as e:
373
  return f"Error fetching questions: {e}", None
374
 
 
375
  results_log = []
376
  answers_payload = []
 
377
  for item in questions_data:
378
  task_id = item.get("task_id")
379
+ question_text = item.get("question")
380
+ if not task_id or question_text is None:
 
381
  continue
 
382
  try:
383
+ ans = agent(question_text, task_id=task_id)
384
+ # ensure answers are strings
385
+ submitted_answer = str(ans) if ans is not None else "unknown"
386
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
387
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
388
+ time.sleep(0.2) # polite pause to avoid hammering external services
 
 
 
 
 
389
  except Exception as e:
390
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ if not answers_payload:
393
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
394
+
395
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
396
 
397
+ try:
398
+ resp = requests.post(submit_url, json=submission_data, headers=USER_AGENT, timeout=60)
399
+ resp.raise_for_status()
400
+ result = resp.json()
401
  final_status = (
402
+ f"Submission Successful!\nUser: {result.get('username')}\n"
403
+ f"Overall Score: {result.get('score', 'N/A')}% "
404
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
405
+ f"Message: {result.get('message', '')}"
406
  )
 
407
  return final_status, pd.DataFrame(results_log)
408
+ except requests.exceptions.HTTPError as e:
409
+ try:
410
+ body = e.response.json()
411
+ detail = body.get("detail") or json.dumps(body)[:400]
412
+ except Exception:
413
+ detail = e.response.text[:400]
414
+ return f"Submission Failed: HTTP {e.response.status_code} - {detail}", pd.DataFrame(results_log)
415
  except Exception as e:
416
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
417
 
418
+ # ------------------------------
419
+ # Gradio UI
420
+ # ------------------------------
 
421
  with gr.Blocks() as demo:
422
+ gr.Markdown("# Level-2 Agent (Rule-based + Wiki/File Tools)")
423
+ gr.Markdown(
424
+ "Duplicate this space, make it public, then login and press **Run Evaluation & Submit All Answers**."
425
+ )
 
 
 
 
 
 
426
  gr.LoginButton()
 
427
  run_button = gr.Button("Run Evaluation & Submit All Answers")
428
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
429
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
430
 
431
+ run_button.click(fn=run_and_submit_all, inputs=[], outputs=[status_output, results_table])
432
 
433
  if __name__ == "__main__":
434
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))