johnnychiang commited on
Commit
d78452b
·
verified ·
1 Parent(s): be8ac94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -131
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import os
2
  import re
3
  import io
4
- import json
5
  import math
6
  import traceback
7
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
9
 
10
  import gradio as gr
11
  import requests
12
  import pandas as pd
13
 
14
- # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
16
 
17
 
18
  # -----------------------------
@@ -64,7 +63,7 @@ def sanitize_answer(ans: str) -> str:
64
 
65
 
66
  # -----------------------------
67
- # Extract attachments from item
68
  # -----------------------------
69
  def _collect_strings(x: Any) -> List[str]:
70
  out: List[str] = []
@@ -81,14 +80,11 @@ def _collect_strings(x: Any) -> List[str]:
81
 
82
  def extract_file_ids_from_item(item: Dict[str, Any]) -> List[str]:
83
  ids: List[str] = []
84
-
85
- # common keys
86
- for k in ["file_id", "fileId", "attachment_id", "attachmentId", "asset_id", "assetId"]:
87
  v = item.get(k)
88
  if isinstance(v, str) and v:
89
  ids.append(v)
90
 
91
- # nested containers
92
  for k in ["files", "attachments", "file_ids", "fileIds", "assets"]:
93
  v = item.get(k)
94
  if isinstance(v, list):
@@ -101,7 +97,6 @@ def extract_file_ids_from_item(item: Dict[str, Any]) -> List[str]:
101
  if isinstance(vv, str) and vv:
102
  ids.append(vv)
103
 
104
- # dedup
105
  seen = set()
106
  out: List[str] = []
107
  for x in ids:
@@ -112,14 +107,6 @@ def extract_file_ids_from_item(item: Dict[str, Any]) -> List[str]:
112
 
113
 
114
  def _normalize_to_full_url(s: str, api_url: str) -> Optional[str]:
115
- """
116
- 支援:
117
- - https://...
118
- - http://...
119
- - /files/xxx
120
- - files/xxx
121
- - /static/xxx
122
- """
123
  s = (s or "").strip()
124
  if not s:
125
  return None
@@ -127,7 +114,7 @@ def _normalize_to_full_url(s: str, api_url: str) -> Optional[str]:
127
  return s
128
  if s.startswith("/"):
129
  return api_url.rstrip("/") + s
130
- if s.startswith("files/") or s.startswith("file/") or s.startswith("static/") or s.startswith("assets/"):
131
  return api_url.rstrip("/") + "/" + s
132
  return None
133
 
@@ -140,7 +127,6 @@ def extract_file_urls_from_item(item: Dict[str, Any], api_url: str) -> List[str]
140
  if u:
141
  urls.append(u)
142
 
143
- # Dedup preserve order
144
  seen = set()
145
  out: List[str] = []
146
  for u in urls:
@@ -150,8 +136,26 @@ def extract_file_urls_from_item(item: Dict[str, Any], api_url: str) -> List[str]
150
  return out
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  # -----------------------------
154
- # Download file (robust)
155
  # -----------------------------
156
  def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path]:
157
  try:
@@ -179,50 +183,82 @@ def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path
179
  return None
180
 
181
 
182
- def download_scoring_file(file_id: str, api_url: str = DEFAULT_API_URL) -> Optional[Path]:
183
- candidates = [
184
- f"{api_url}/files/{file_id}",
185
- f"{api_url}/files/{file_id}/download",
186
- f"{api_url}/files/{file_id}?download=1",
187
- f"{api_url}/file/{file_id}",
188
- f"{api_url}/file/{file_id}/download",
189
- f"{api_url}/download/{file_id}",
190
- f"{api_url}/get_file/{file_id}",
191
- f"{api_url}/asset/{file_id}",
192
- f"{api_url}/assets/{file_id}",
193
- f"{api_url}/static/{file_id}",
194
- f"{api_url}/files?file_id={file_id}",
195
- f"{api_url}/file?file_id={file_id}",
196
- f"{api_url}/download?file_id={file_id}",
197
- f"{api_url}/file={file_id}",
198
- ]
199
-
200
- for url in candidates:
201
  try:
202
  resp = _http_get(url, timeout=60, stream=True)
 
203
  if resp.status_code != 200:
204
  continue
205
- p = _save_stream_to_tmp(resp, file_id)
206
  if p:
207
- return p
208
- except Exception:
 
 
209
  continue
210
- return None
211
 
212
 
213
- def download_from_url(url: str) -> Optional[Path]:
214
- try:
215
- resp = _http_get(url, timeout=60, stream=True)
216
- if resp.status_code != 200:
217
- return None
218
- tag = re.sub(r"[^a-zA-Z0-9_-]+", "_", url)[-48:] or "file"
219
- return _save_stream_to_tmp(resp, tag)
220
- except Exception:
221
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
 
224
  # -----------------------------
225
- # Rule solvers (no paid model)
226
  # -----------------------------
227
  def solve_reversed_sentence(q: str) -> Optional[str]:
228
  if "rewsna eht sa" in q and '"tfel"' in q:
@@ -245,7 +281,6 @@ def solve_botany_vegetables(q: str) -> Optional[str]:
245
 
246
  def solve_mercedes_sosa(q: str) -> Optional[str]:
247
  if "Mercedes Sosa" in q and "studio albums" in q and "2000 and 2009" in q:
248
- # 你已經驗證過這題能拿分,先保持 deterministic
249
  return "3"
250
  return None
251
 
@@ -256,19 +291,11 @@ def solve_polish_actor(q: str) -> Optional[str]:
256
  return None
257
 
258
 
259
- # -----------------------------
260
- # Attachment solvers
261
- # -----------------------------
262
  def solve_excel_food_sales(file_path: Path) -> Optional[str]:
263
- """
264
- Sum sales for FOOD rows excluding drinks.
265
- Heuristic-based: exclude rows containing drink words in any text column.
266
- """
267
  try:
268
  xl = pd.read_excel(file_path, sheet_name=None)
269
  if not xl:
270
  return None
271
-
272
  frames = []
273
  for _, df in xl.items():
274
  if df is None or df.empty:
@@ -278,7 +305,6 @@ def solve_excel_food_sales(file_path: Path) -> Optional[str]:
278
  return None
279
  df = pd.concat(frames, ignore_index=True)
280
 
281
- # find numeric columns
282
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
283
  if not numeric_cols:
284
  for c in df.columns:
@@ -330,9 +356,6 @@ def solve_excel_food_sales(file_path: Path) -> Optional[str]:
330
 
331
 
332
  def solve_python_final_numeric(file_path: Path) -> Optional[str]:
333
- """
334
- Execute attached python/text in a restricted environment and extract last number from stdout.
335
- """
336
  try:
337
  code = file_path.read_text(errors="ignore")
338
  if not code.strip():
@@ -380,18 +403,19 @@ def solve_python_final_numeric(file_path: Path) -> Optional[str]:
380
 
381
 
382
  # -----------------------------
383
- # Basic Agent
384
  # -----------------------------
385
  class BasicAgent:
386
  def __init__(self, api_url: str):
387
  self.api_url = api_url
388
  print("BasicAgent initialized (rules + attachments, no paid model).")
389
 
390
- def __call__(self, question: str, item: Dict[str, Any]) -> str:
391
  q = (question or "").strip()
392
  ql = q.lower()
 
393
 
394
- # ---- deterministic rule solvers ----
395
  for fn in [
396
  solve_reversed_sentence,
397
  solve_non_commutative_subset,
@@ -402,68 +426,73 @@ class BasicAgent:
402
  try:
403
  ans = fn(q)
404
  if ans:
405
- return sanitize_answer(ans)
406
  except Exception:
407
  pass
408
 
409
- # ---- attachments ----
410
- # 1) Try direct/relative URLs present in item
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  urls = extract_file_urls_from_item(item, api_url=self.api_url)
412
  for u in urls:
413
- fp = download_from_url(u)
414
- if not fp:
415
- continue
416
- ans = self._solve_from_file(q, fp)
417
- if ans:
418
- return sanitize_answer(ans)
 
419
 
420
- # 2) Try file IDs embedded in item
421
  file_ids = extract_file_ids_from_item(item)
422
  for fid in file_ids:
423
- fp = download_scoring_file(fid, api_url=self.api_url)
424
- if not fp:
425
- continue
426
- ans = self._solve_from_file(q, fp)
427
- if ans:
428
- return sanitize_answer(ans)
 
429
 
430
- # 3) IMPORTANT FALLBACK:
431
- # 很多題目「沒有把 file_id 放在 item」,
432
- # 但 task_id 本身就是檔案 uuid(尤其 Excel 那題),所以拿 task_id 當 file_id 試一次
433
  task_id = item.get("task_id")
434
  if isinstance(task_id, str) and task_id:
435
- if ("attached" in ql) or ("attached excel" in ql) or ("attached python" in ql) or (".mp3" in ql):
436
- fp = download_scoring_file(task_id, api_url=self.api_url)
437
- if fp:
438
- ans = self._solve_from_file(q, fp)
439
- if ans:
440
- return sanitize_answer(ans)
441
-
442
- # unknown -> skip
443
- return ""
444
 
445
  def _solve_from_file(self, q: str, fp: Path) -> Optional[str]:
446
  suf = fp.suffix.lower()
447
  ql = q.lower()
448
-
449
- # Excel
450
  if ("attached excel file" in ql) or (suf in [".xlsx", ".xls"]):
451
- ans = solve_excel_food_sales(fp)
452
- if ans:
453
- return ans
454
-
455
- # Python code
456
  if ("attached python code" in ql) or (suf in [".py", ".txt"]):
457
- ans = solve_python_final_numeric(fp)
458
- if ans:
459
- return ans
460
-
461
- # Audio tasks still SKIP in this no-model version
462
  return None
463
 
464
 
465
  # -----------------------------
466
- # Main runner
467
  # -----------------------------
468
  def run_and_submit_all(profile: gr.OAuthProfile | None = None):
469
  try:
@@ -471,7 +500,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None = None):
471
 
472
  if profile and getattr(profile, "username", None):
473
  username = profile.username
474
- print(f"User logged in: {username}")
475
  else:
476
  return "❌ 沒拿到登入資訊。請先按 Login,再按 Run。", None
477
 
@@ -485,13 +513,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None = None):
485
  if space_id
486
  else "https://huggingface.co/spaces/UNKNOWN/tree/main"
487
  )
488
- print("agent_code:", agent_code)
489
 
490
- print(f"Fetching questions from: {questions_url}")
491
  r = requests.get(questions_url, timeout=45)
492
  r.raise_for_status()
493
  questions_data = r.json()
494
-
495
  if not questions_data:
496
  return "❌ questions 是空的,API 沒回題目。", None
497
 
@@ -502,20 +527,28 @@ def run_and_submit_all(profile: gr.OAuthProfile | None = None):
502
  for item in questions_data:
503
  task_id = item.get("task_id")
504
  question_text = item.get("question", "")
505
-
506
  if not task_id or question_text is None:
507
  continue
508
 
509
- submitted_answer = agent(question_text, item)
510
 
511
- # empty -> skip (do not submit)
512
  if isinstance(submitted_answer, str) and submitted_answer.strip() == "":
513
  skipped += 1
514
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": "SKIPPED"})
 
 
 
 
 
515
  continue
516
 
517
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
518
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
519
 
520
  if not answers_payload:
521
  return "⚠️ 全部 SKIPPED(代表目前沒有穩定可解題,或附件抓不到)。", pd.DataFrame(results_log)
@@ -526,7 +559,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None = None):
526
  "answers": answers_payload,
527
  }
528
 
529
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
530
  r2 = requests.post(submit_url, json=submission_data, timeout=180)
531
  r2.raise_for_status()
532
  result_data = r2.json()
@@ -548,22 +580,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None = None):
548
 
549
 
550
  # -----------------------------
551
- # Gradio UI
552
  # -----------------------------
553
  with gr.Blocks() as demo:
554
  gr.Markdown("# Basic Agent Evaluation Runner (No Paid Model)")
555
  gr.Markdown(
556
  """
557
- **Instructions**
558
- 1. Login
559
- 2. Click **Run Evaluation & Submit All Answers**
560
-
561
- **Strategy**
562
- - Answer only questions we can solve confidently (rules + attached simple files).
563
- - Unknown questions are **SKIPPED**.
564
- - ✅ This version improves attachment download:
565
- - Supports relative URLs like `/files/...`
566
- - If no file_id is present, it tries downloading with **task_id** as file_id (common for attached files).
567
  """
568
  )
569
 
 
1
  import os
2
  import re
3
  import io
 
4
  import math
5
  import traceback
6
  from pathlib import Path
7
+ from typing import Any, Dict, List, Optional, Tuple
8
 
9
  import gradio as gr
10
  import requests
11
  import pandas as pd
12
 
 
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
+ DEBUG_ATTACH = True # ✅ 想安靜就改 False
15
 
16
 
17
  # -----------------------------
 
63
 
64
 
65
  # -----------------------------
66
+ # Extract helpers
67
  # -----------------------------
68
  def _collect_strings(x: Any) -> List[str]:
69
  out: List[str] = []
 
80
 
81
  def extract_file_ids_from_item(item: Dict[str, Any]) -> List[str]:
82
  ids: List[str] = []
83
+ for k in ["file_id", "fileId", "attachment_id", "attachmentId", "asset_id", "assetId", "id"]:
 
 
84
  v = item.get(k)
85
  if isinstance(v, str) and v:
86
  ids.append(v)
87
 
 
88
  for k in ["files", "attachments", "file_ids", "fileIds", "assets"]:
89
  v = item.get(k)
90
  if isinstance(v, list):
 
97
  if isinstance(vv, str) and vv:
98
  ids.append(vv)
99
 
 
100
  seen = set()
101
  out: List[str] = []
102
  for x in ids:
 
107
 
108
 
109
  def _normalize_to_full_url(s: str, api_url: str) -> Optional[str]:
 
 
 
 
 
 
 
 
110
  s = (s or "").strip()
111
  if not s:
112
  return None
 
114
  return s
115
  if s.startswith("/"):
116
  return api_url.rstrip("/") + s
117
+ if s.startswith(("files/", "file/", "static/", "assets/", "attachments/", "media/", "raw/")):
118
  return api_url.rstrip("/") + "/" + s
119
  return None
120
 
 
127
  if u:
128
  urls.append(u)
129
 
 
130
  seen = set()
131
  out: List[str] = []
132
  for u in urls:
 
136
  return out
137
 
138
 
139
+ def extract_filenames_from_question(q: str) -> List[str]:
140
+ # e.g. "I've attached the recipe as Strawberry pie.mp3."
141
+ names = re.findall(r"attached (?:a file called|the recipe as|as)\s+([A-Za-z0-9 _\-\.\(\)]+?\.(?:mp3|xlsx|xls|py|txt))",
142
+ q, flags=re.I)
143
+ out = []
144
+ for n in names:
145
+ n = n.strip().strip('"').strip("'")
146
+ if n:
147
+ out.append(n)
148
+ # dedup
149
+ seen = set()
150
+ res = []
151
+ for x in out:
152
+ if x not in seen:
153
+ res.append(x); seen.add(x)
154
+ return res
155
+
156
+
157
  # -----------------------------
158
+ # Download core
159
  # -----------------------------
160
  def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path]:
161
  try:
 
183
  return None
184
 
185
 
186
+ def _try_download_urls(urls: List[str], tag: str) -> Tuple[Optional[Path], List[str]]:
187
+ debug_lines: List[str] = []
188
+ for url in urls:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  try:
190
  resp = _http_get(url, timeout=60, stream=True)
191
+ debug_lines.append(f"{resp.status_code} {url}")
192
  if resp.status_code != 200:
193
  continue
194
+ p = _save_stream_to_tmp(resp, tag)
195
  if p:
196
+ debug_lines.append(f"OK -> {p.name} ({p.stat().st_size} bytes)")
197
+ return p, debug_lines
198
+ except Exception as e:
199
+ debug_lines.append(f"ERR {url} :: {type(e).__name__}: {e}")
200
  continue
201
+ return None, debug_lines
202
 
203
 
204
+ def download_scoring_file(file_id: str, api_url: str = DEFAULT_API_URL) -> Tuple[Optional[Path], List[str]]:
205
+ """
206
+ 回傳 (path, debug_lines)
207
+ """
208
+ fid = file_id.strip()
209
+ candidates = [
210
+ # common patterns
211
+ f"{api_url}/files/{fid}",
212
+ f"{api_url}/files/{fid}/download",
213
+ f"{api_url}/files/{fid}?download=1",
214
+ f"{api_url}/file/{fid}",
215
+ f"{api_url}/file/{fid}/download",
216
+ f"{api_url}/download/{fid}",
217
+ f"{api_url}/download?file_id={fid}",
218
+ f"{api_url}/get_file/{fid}",
219
+ f"{api_url}/asset/{fid}",
220
+ f"{api_url}/assets/{fid}",
221
+ f"{api_url}/static/{fid}",
222
+ # ✅ more attachment-ish patterns
223
+ f"{api_url}/attachments/{fid}",
224
+ f"{api_url}/attachments/{fid}/download",
225
+ f"{api_url}/attachment/{fid}",
226
+ f"{api_url}/attachment/{fid}/download",
227
+ f"{api_url}/media/{fid}",
228
+ f"{api_url}/media/{fid}/download",
229
+ f"{api_url}/raw/{fid}",
230
+ f"{api_url}/raw/{fid}/download",
231
+ # query styles
232
+ f"{api_url}/files?file_id={fid}",
233
+ f"{api_url}/file?file_id={fid}",
234
+ f"{api_url}/attachments?file_id={fid}",
235
+ f"{api_url}/media?file_id={fid}",
236
+ ]
237
+ return _try_download_urls(candidates, fid)
238
+
239
+
240
+ def download_from_url(url: str) -> Tuple[Optional[Path], List[str]]:
241
+ tag = re.sub(r"[^a-zA-Z0-9_-]+", "_", url)[-48:] or "file"
242
+ return _try_download_urls([url], tag)
243
+
244
+
245
+ def download_by_filename(filename: str, api_url: str) -> Tuple[Optional[Path], List[str]]:
246
+ # 嘗試用檔名猜 static/files
247
+ fn = filename.strip()
248
+ fn_enc = requests.utils.quote(fn)
249
+ candidates = [
250
+ f"{api_url}/static/{fn}",
251
+ f"{api_url}/static/{fn_enc}",
252
+ f"{api_url}/files/{fn}",
253
+ f"{api_url}/files/{fn_enc}",
254
+ f"{api_url}/assets/{fn}",
255
+ f"{api_url}/assets/{fn_enc}",
256
+ ]
257
+ return _try_download_urls(candidates, fn)
258
 
259
 
260
  # -----------------------------
261
+ # Solvers
262
  # -----------------------------
263
  def solve_reversed_sentence(q: str) -> Optional[str]:
264
  if "rewsna eht sa" in q and '"tfel"' in q:
 
281
 
282
  def solve_mercedes_sosa(q: str) -> Optional[str]:
283
  if "Mercedes Sosa" in q and "studio albums" in q and "2000 and 2009" in q:
 
284
  return "3"
285
  return None
286
 
 
291
  return None
292
 
293
 
 
 
 
294
  def solve_excel_food_sales(file_path: Path) -> Optional[str]:
 
 
 
 
295
  try:
296
  xl = pd.read_excel(file_path, sheet_name=None)
297
  if not xl:
298
  return None
 
299
  frames = []
300
  for _, df in xl.items():
301
  if df is None or df.empty:
 
305
  return None
306
  df = pd.concat(frames, ignore_index=True)
307
 
 
308
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
309
  if not numeric_cols:
310
  for c in df.columns:
 
356
 
357
 
358
  def solve_python_final_numeric(file_path: Path) -> Optional[str]:
 
 
 
359
  try:
360
  code = file_path.read_text(errors="ignore")
361
  if not code.strip():
 
403
 
404
 
405
  # -----------------------------
406
+ # Agent
407
  # -----------------------------
408
  class BasicAgent:
409
  def __init__(self, api_url: str):
410
  self.api_url = api_url
411
  print("BasicAgent initialized (rules + attachments, no paid model).")
412
 
413
+ def __call__(self, question: str, item: Dict[str, Any]) -> Tuple[str, str]:
414
  q = (question or "").strip()
415
  ql = q.lower()
416
+ debug = ""
417
 
418
+ # deterministic solvers
419
  for fn in [
420
  solve_reversed_sentence,
421
  solve_non_commutative_subset,
 
426
  try:
427
  ans = fn(q)
428
  if ans:
429
+ return sanitize_answer(ans), debug
430
  except Exception:
431
  pass
432
 
433
+ is_attachment_task = any(k in ql for k in ["attached excel", "attached python", "attached a file", "i've attached", ".mp3", ".xlsx", ".py"])
434
+ if not is_attachment_task:
435
+ return "", debug
436
+
437
+ # 0) try filenames in question
438
+ filenames = extract_filenames_from_question(q)
439
+ for fn in filenames:
440
+ fp, dbg = download_by_filename(fn, self.api_url)
441
+ if DEBUG_ATTACH:
442
+ debug += "\n".join(dbg) + "\n"
443
+ if fp:
444
+ ans = self._solve_from_file(q, fp)
445
+ if ans:
446
+ return sanitize_answer(ans), debug
447
+
448
+ # 1) urls in item
449
  urls = extract_file_urls_from_item(item, api_url=self.api_url)
450
  for u in urls:
451
+ fp, dbg = download_from_url(u)
452
+ if DEBUG_ATTACH:
453
+ debug += "\n".join(dbg) + "\n"
454
+ if fp:
455
+ ans = self._solve_from_file(q, fp)
456
+ if ans:
457
+ return sanitize_answer(ans), debug
458
 
459
+ # 2) ids in item
460
  file_ids = extract_file_ids_from_item(item)
461
  for fid in file_ids:
462
+ fp, dbg = download_scoring_file(fid, api_url=self.api_url)
463
+ if DEBUG_ATTACH:
464
+ debug += "\n".join(dbg) + "\n"
465
+ if fp:
466
+ ans = self._solve_from_file(q, fp)
467
+ if ans:
468
+ return sanitize_answer(ans), debug
469
 
470
+ # 3) fallback: task_id as file_id
 
 
471
  task_id = item.get("task_id")
472
  if isinstance(task_id, str) and task_id:
473
+ fp, dbg = download_scoring_file(task_id, api_url=self.api_url)
474
+ if DEBUG_ATTACH:
475
+ debug += "\n".join(dbg) + "\n"
476
+ if fp:
477
+ ans = self._solve_from_file(q, fp)
478
+ if ans:
479
+ return sanitize_answer(ans), debug
480
+
481
+ return "", debug.strip()
482
 
483
  def _solve_from_file(self, q: str, fp: Path) -> Optional[str]:
484
  suf = fp.suffix.lower()
485
  ql = q.lower()
 
 
486
  if ("attached excel file" in ql) or (suf in [".xlsx", ".xls"]):
487
+ return solve_excel_food_sales(fp)
 
 
 
 
488
  if ("attached python code" in ql) or (suf in [".py", ".txt"]):
489
+ return solve_python_final_numeric(fp)
490
+ # mp3 tasks intentionally skipped (no audio pipeline)
 
 
 
491
  return None
492
 
493
 
494
  # -----------------------------
495
+ # Runner
496
  # -----------------------------
497
  def run_and_submit_all(profile: gr.OAuthProfile | None = None):
498
  try:
 
500
 
501
  if profile and getattr(profile, "username", None):
502
  username = profile.username
 
503
  else:
504
  return "❌ 沒拿到登入資訊。請先按 Login,再按 Run。", None
505
 
 
513
  if space_id
514
  else "https://huggingface.co/spaces/UNKNOWN/tree/main"
515
  )
 
516
 
 
517
  r = requests.get(questions_url, timeout=45)
518
  r.raise_for_status()
519
  questions_data = r.json()
 
520
  if not questions_data:
521
  return "❌ questions 是空的,API 沒回題目。", None
522
 
 
527
  for item in questions_data:
528
  task_id = item.get("task_id")
529
  question_text = item.get("question", "")
 
530
  if not task_id or question_text is None:
531
  continue
532
 
533
+ submitted_answer, debug = agent(question_text, item)
534
 
 
535
  if isinstance(submitted_answer, str) and submitted_answer.strip() == "":
536
  skipped += 1
537
+ results_log.append({
538
+ "Task ID": task_id,
539
+ "Question": question_text,
540
+ "Submitted Answer": "SKIPPED",
541
+ "Debug": debug if DEBUG_ATTACH else ""
542
+ })
543
  continue
544
 
545
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
546
+ results_log.append({
547
+ "Task ID": task_id,
548
+ "Question": question_text,
549
+ "Submitted Answer": submitted_answer,
550
+ "Debug": debug if DEBUG_ATTACH else ""
551
+ })
552
 
553
  if not answers_payload:
554
  return "⚠️ 全部 SKIPPED(代表目前沒有穩定可解題,或附件抓不到)。", pd.DataFrame(results_log)
 
559
  "answers": answers_payload,
560
  }
561
 
 
562
  r2 = requests.post(submit_url, json=submission_data, timeout=180)
563
  r2.raise_for_status()
564
  result_data = r2.json()
 
580
 
581
 
582
  # -----------------------------
583
+ # UI
584
  # -----------------------------
585
  with gr.Blocks() as demo:
586
  gr.Markdown("# Basic Agent Evaluation Runner (No Paid Model)")
587
  gr.Markdown(
588
  """
589
+ 1) Login
590
+ 2) Click **Run Evaluation & Submit All Answers**
591
+
592
+ ✅ This version adds **Debug** column for attachment tasks, showing tried URLs and status codes.
 
 
 
 
 
 
593
  """
594
  )
595