johnnychiang commited on
Commit
70ff4fd
·
verified ·
1 Parent(s): 5b1f03f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -99
app.py CHANGED
@@ -5,7 +5,6 @@ import json
5
  import math
6
  import base64
7
  import traceback
8
- from dataclasses import dataclass
9
  from pathlib import Path
10
  from typing import Any, Dict, List, Optional, Tuple
11
 
@@ -30,21 +29,13 @@ def _http_get(url: str, timeout: int = 30, stream: bool = False) -> requests.Res
30
 
31
 
32
  def _looks_like_html(b: bytes) -> bool:
33
- head = (b or b"")[:400].lower()
34
  return (b"<!doctype html" in head) or (b"<html" in head) or (b"<head" in head) or (b"<body" in head)
35
 
36
 
37
  def _safe_filename_from_headers(resp: requests.Response, fallback: str) -> str:
38
  cd = resp.headers.get("content-disposition", "")
39
- # filename*=UTF-8''xxx or filename="xxx"
40
- m = re.search(r"filename\*=(?:UTF-8'')?([^;]+)", cd, flags=re.I)
41
- if m:
42
- name = m.group(1).strip().strip('"').strip("'")
43
- name = name.split("/")[-1].split("\\")[-1]
44
- if name:
45
- return name
46
-
47
- m = re.search(r'filename="?([^";]+)"?', cd, flags=re.I)
48
  if m:
49
  name = m.group(1).strip().strip('"').strip("'")
50
  name = name.split("/")[-1].split("\\")[-1]
@@ -56,9 +47,7 @@ def _safe_filename_from_headers(resp: requests.Response, fallback: str) -> str:
56
  return fallback + ".xlsx"
57
  if "audio" in ct or "mpeg" in ct or "mp3" in ct:
58
  return fallback + ".mp3"
59
- if "python" in ct:
60
- return fallback + ".py"
61
- if "text" in ct:
62
  return fallback + ".txt"
63
  return fallback
64
 
@@ -106,6 +95,7 @@ def extract_file_ids_from_item(item: Dict[str, Any]) -> List[str]:
106
  if isinstance(vv, str) and vv:
107
  ids.append(vv)
108
 
 
109
  seen = set()
110
  out: List[str] = []
111
  for x in ids:
@@ -164,23 +154,16 @@ def extract_filenames_from_question(q: str) -> List[str]:
164
 
165
 
166
  def url_quote_filename(name: str) -> str:
167
- # minimal url-encoding for spaces only
168
- return (name or "").replace(" ", "%20")
169
 
170
 
171
  # -----------------------------
172
- # Download helpers (FIXED streaming)
173
  # -----------------------------
174
  def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path]:
175
- """
176
- Correct way:
177
- - read first chunk from iter_content (not resp.raw.read)
178
- - if HTML -> abort
179
- - else write first chunk + rest
180
- """
181
  try:
182
- it = resp.iter_content(chunk_size=64 * 1024)
183
- first = next(it, b"")
184
  if not first:
185
  return None
186
  if _looks_like_html(first):
@@ -193,7 +176,7 @@ def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path
193
 
194
  with open(out_path, "wb") as f:
195
  f.write(first)
196
- for chunk in it:
197
  if chunk:
198
  f.write(chunk)
199
 
@@ -222,13 +205,14 @@ def _try_download_urls(urls: List[str], tag: str) -> Tuple[Optional[Path], List[
222
 
223
 
224
  # -----------------------------
225
- # Base64-in-item extraction (backup)
226
  # -----------------------------
227
  _B64_KEYS = {
228
  "data", "content", "blob", "bytes", "file_bytes", "filebytes", "b64", "base64",
229
  "attachment", "file", "payload"
230
  }
231
 
 
232
  def looks_like_base64(s: str) -> bool:
233
  if not isinstance(s, str):
234
  return False
@@ -300,7 +284,7 @@ def extract_base64_files_from_item(item: Any, filename_hint: str) -> Tuple[List[
300
 
301
 
302
  # -----------------------------
303
- # Deterministic solvers (your correct ones)
304
  # -----------------------------
305
  def solve_reversed_sentence(q: str) -> Optional[str]:
306
  if "rewsna eht sa" in q and '"tfel"' in q:
@@ -341,7 +325,6 @@ def solve_excel_food_sales(file_path: Path) -> Optional[str]:
341
  xl = pd.read_excel(file_path, sheet_name=None)
342
  if not xl:
343
  return None
344
-
345
  frames = []
346
  for _, df in xl.items():
347
  if df is None or df.empty:
@@ -351,14 +334,8 @@ def solve_excel_food_sales(file_path: Path) -> Optional[str]:
351
  return None
352
  df = pd.concat(frames, ignore_index=True)
353
 
354
- # pick numeric sales column
355
- for c in df.columns:
356
- if df[c].dtype == object:
357
- continue
358
- # allow numeric
359
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
360
  if not numeric_cols:
361
- # try coercion
362
  for c in df.columns:
363
  df[c] = pd.to_numeric(df[c], errors="ignore")
364
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
@@ -466,18 +443,16 @@ class BasicAgent:
466
  except Exception:
467
  pass
468
 
469
- # attachment tasks?
470
- is_attachment_task = any(k in ql for k in ["attached", ".mp3", ".xlsx", ".xls", ".py"])
471
  if not is_attachment_task:
472
  return "", ""
473
 
474
  task_id = str(item.get("task_id", "")).strip()
475
- file_name = str(item.get("file_name", "")).strip()
476
  filenames = extract_filenames_from_question(q)
477
  filename_hint = filenames[0] if filenames else (file_name or "attachment")
478
- fn_q = url_quote_filename(filename_hint)
479
 
480
- # 0) detail endpoints
481
  detail_candidates = [
482
  f"{self.api_url}/question/{task_id}",
483
  f"{self.api_url}/questions/{task_id}",
@@ -508,7 +483,7 @@ class BasicAgent:
508
  if ans:
509
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
510
 
511
- # 2) urls in json
512
  for src_name, src in [("DETAIL", detail_json), ("ITEM", item)]:
513
  if src:
514
  urls = extract_file_urls_from_item(src, api_url=self.api_url)
@@ -520,48 +495,40 @@ class BasicAgent:
520
  if ans:
521
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
522
 
523
- # 3) filename patterns (MOST IMPORTANT)
524
- # try with item file_name first, else filename_hint
525
- fn_core = url_quote_filename(file_name or filename_hint or "attachment")
526
-
527
- candidates = [
528
- # direct
529
- f"{self.api_url}/static/{fn_core}",
530
- f"{self.api_url}/files/{fn_core}",
531
- f"{self.api_url}/assets/{fn_core}",
532
- f"{self.api_url}/media/{fn_core}",
533
- f"{self.api_url}/raw/{fn_core}",
534
- f"{self.api_url}/api/static/{fn_core}",
535
- f"{self.api_url}/api/files/{fn_core}",
536
- f"{self.api_url}/api/assets/{fn_core}",
537
- f"{self.api_url}/api/media/{fn_core}",
538
-
539
- # task_id + filename (very common)
540
- f"{self.api_url}/files/{task_id}/{fn_core}",
541
- f"{self.api_url}/files/{task_id}/download/{fn_core}",
542
- f"{self.api_url}/download/{task_id}/{fn_core}",
543
- f"{self.api_url}/api/files/{task_id}/{fn_core}",
544
- f"{self.api_url}/api/download/{task_id}/{fn_core}",
545
-
546
- # query style
547
- f"{self.api_url}/download?task_id={task_id}&file_name={fn_core}",
548
- f"{self.api_url}/download?task_id={task_id}&filename={fn_core}",
549
- f"{self.api_url}/api/download?task_id={task_id}&file_name={fn_core}",
550
- f"{self.api_url}/api/download?task_id={task_id}&filename={fn_core}",
551
- ]
552
-
553
- fp, dbg3 = _try_download_urls(candidates, tag=(file_name or filename_hint))
554
- debug_lines.extend(dbg3)
555
- if fp:
556
- ans = self._solve_from_file(q, fp)
557
- if ans:
558
- return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
559
 
560
  # 4) id-based fallback
561
  file_ids = extract_file_ids_from_item(item)
562
  if task_id:
563
  file_ids.append(task_id)
564
-
565
  seen = set()
566
  file_ids2 = []
567
  for x in file_ids:
@@ -570,7 +537,7 @@ class BasicAgent:
570
  seen.add(x)
571
 
572
  for fid in file_ids2:
573
- candidates2 = [
574
  f"{self.api_url}/files/{fid}",
575
  f"{self.api_url}/files/{fid}/download",
576
  f"{self.api_url}/file/{fid}",
@@ -590,10 +557,10 @@ class BasicAgent:
590
  f"{self.api_url}/download?file_id={fid}",
591
  f"{self.api_url}/api/download?file_id={fid}",
592
  ]
593
- fp2, dbg4 = _try_download_urls(candidates2, tag=filename_hint)
594
  debug_lines.extend(dbg4)
595
- if fp2:
596
- ans = self._solve_from_file(q, fp2)
597
  if ans:
598
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
599
 
@@ -612,28 +579,26 @@ class BasicAgent:
612
  suf = fp.suffix.lower()
613
  ql = q.lower()
614
 
615
- if ("excel" in ql) or (suf in [".xlsx", ".xls"]):
616
  return solve_excel_food_sales(fp)
617
 
618
- if ("python" in ql) or (suf in [".py", ".txt"]):
619
  return solve_python_final_numeric(fp)
620
 
621
- # mp3 你還沒做語音辨識,就先 None
622
  return None
623
 
624
 
625
  # -----------------------------
626
  # Runner
627
  # -----------------------------
628
- def run_and_submit_all(profile: Any = None):
629
  try:
630
  space_id = os.getenv("SPACE_ID", "").strip()
631
 
632
- username = None
633
- if profile is not None:
634
- username = getattr(profile, "username", None)
635
-
636
- if not username:
637
  return "❌ 沒拿到登入資訊。請先按 Login,再按 Run。", None
638
 
639
  api_url = DEFAULT_API_URL
@@ -663,11 +628,15 @@ def run_and_submit_all(profile: Any = None):
663
 
664
  if isinstance(submitted_answer, str) and submitted_answer.strip() == "":
665
  skipped += 1
666
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": "SKIPPED", "Debug": debug})
 
 
667
  continue
668
 
669
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
670
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer, "Debug": debug})
 
 
671
 
672
  if not answers_payload:
673
  return "⚠️ 全部 SKIPPED(目前沒有穩定可解題,或附件仍抓不到)。", pd.DataFrame(results_log)
@@ -679,7 +648,7 @@ def run_and_submit_all(profile: Any = None):
679
  result_data = r2.json()
680
 
681
  final_status = (
682
- f"✅ Submission Successful!\n"
683
  f"User: {result_data.get('username')}\n"
684
  f"Overall Score: {result_data.get('score', 'N/A')}% "
685
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
@@ -693,17 +662,26 @@ def run_and_submit_all(profile: Any = None):
693
  tb = traceback.format_exc()
694
  return f"❌ Runtime Error:\n{e}\n\n--- Traceback ---\n{tb}", None
695
 
 
696
  # -----------------------------
697
- # UI
698
  # -----------------------------
699
  with gr.Blocks() as demo:
700
  gr.Markdown("# Basic Agent Evaluation Runner (No Paid Model)")
701
- gr.Markdown("✅ Try: **detail endpoints** + **file_name path patterns** + url/base64 scan.\n\nDebug 欄會顯示嘗試過哪些網址。")
702
- gr.LoginButton()
 
 
 
 
 
 
703
  run_button = gr.Button("Run Evaluation & Submit All Answers")
704
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=14, interactive=False)
705
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
706
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
707
 
708
  if __name__ == "__main__":
709
- demo.launch(debug=True, share=False, show_error=True)
 
 
5
  import math
6
  import base64
7
  import traceback
 
8
  from pathlib import Path
9
  from typing import Any, Dict, List, Optional, Tuple
10
 
 
29
 
30
 
31
  def _looks_like_html(b: bytes) -> bool:
32
+ head = b[:400].lower()
33
  return (b"<!doctype html" in head) or (b"<html" in head) or (b"<head" in head) or (b"<body" in head)
34
 
35
 
36
  def _safe_filename_from_headers(resp: requests.Response, fallback: str) -> str:
37
  cd = resp.headers.get("content-disposition", "")
38
+ m = re.search(r'filename\*?="?([^";]+)"?', cd, flags=re.I)
 
 
 
 
 
 
 
 
39
  if m:
40
  name = m.group(1).strip().strip('"').strip("'")
41
  name = name.split("/")[-1].split("\\")[-1]
 
47
  return fallback + ".xlsx"
48
  if "audio" in ct or "mpeg" in ct or "mp3" in ct:
49
  return fallback + ".mp3"
50
+ if "text" in ct or "python" in ct:
 
 
51
  return fallback + ".txt"
52
  return fallback
53
 
 
95
  if isinstance(vv, str) and vv:
96
  ids.append(vv)
97
 
98
+ # dedup
99
  seen = set()
100
  out: List[str] = []
101
  for x in ids:
 
154
 
155
 
156
  def url_quote_filename(name: str) -> str:
157
+ # minimal url-encoding for spaces
158
+ return name.replace(" ", "%20")
159
 
160
 
161
  # -----------------------------
162
+ # Download helpers
163
  # -----------------------------
164
  def _save_stream_to_tmp(resp: requests.Response, file_tag: str) -> Optional[Path]:
 
 
 
 
 
 
165
  try:
166
+ first = resp.raw.read(4096)
 
167
  if not first:
168
  return None
169
  if _looks_like_html(first):
 
176
 
177
  with open(out_path, "wb") as f:
178
  f.write(first)
179
+ for chunk in resp.iter_content(chunk_size=1024 * 64):
180
  if chunk:
181
  f.write(chunk)
182
 
 
205
 
206
 
207
  # -----------------------------
208
+ # Base64-in-item extraction (備用)
209
  # -----------------------------
210
  _B64_KEYS = {
211
  "data", "content", "blob", "bytes", "file_bytes", "filebytes", "b64", "base64",
212
  "attachment", "file", "payload"
213
  }
214
 
215
+
216
  def looks_like_base64(s: str) -> bool:
217
  if not isinstance(s, str):
218
  return False
 
284
 
285
 
286
  # -----------------------------
287
+ # Deterministic solvers (你已經答對的)
288
  # -----------------------------
289
  def solve_reversed_sentence(q: str) -> Optional[str]:
290
  if "rewsna eht sa" in q and '"tfel"' in q:
 
325
  xl = pd.read_excel(file_path, sheet_name=None)
326
  if not xl:
327
  return None
 
328
  frames = []
329
  for _, df in xl.items():
330
  if df is None or df.empty:
 
334
  return None
335
  df = pd.concat(frames, ignore_index=True)
336
 
 
 
 
 
 
337
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
338
  if not numeric_cols:
 
339
  for c in df.columns:
340
  df[c] = pd.to_numeric(df[c], errors="ignore")
341
  numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
 
443
  except Exception:
444
  pass
445
 
446
+ is_attachment_task = any(k in ql for k in ["attached excel", "attached python", "i've attached", ".mp3", ".xlsx", ".py"])
 
447
  if not is_attachment_task:
448
  return "", ""
449
 
450
  task_id = str(item.get("task_id", "")).strip()
451
+ file_name = str(item.get("file_name", "")).strip() # 重要:用它拼下載路徑
452
  filenames = extract_filenames_from_question(q)
453
  filename_hint = filenames[0] if filenames else (file_name or "attachment")
 
454
 
455
+ # 0) 嘗試抓題目詳情 JSON
456
  detail_candidates = [
457
  f"{self.api_url}/question/{task_id}",
458
  f"{self.api_url}/questions/{task_id}",
 
483
  if ans:
484
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
485
 
486
+ # 2) url strings
487
  for src_name, src in [("DETAIL", detail_json), ("ITEM", item)]:
488
  if src:
489
  urls = extract_file_urls_from_item(src, api_url=self.api_url)
 
495
  if ans:
496
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
497
 
498
+ # 3) file_name 組路徑(重點)
499
+ if file_name:
500
+ fn_q = url_quote_filename(file_name)
501
+ fn_candidates = [
502
+ f"{self.api_url}/static/{fn_q}",
503
+ f"{self.api_url}/files/{fn_q}",
504
+ f"{self.api_url}/assets/{fn_q}",
505
+ f"{self.api_url}/media/{fn_q}",
506
+ f"{self.api_url}/raw/{fn_q}",
507
+ f"{self.api_url}/api/static/{fn_q}",
508
+ f"{self.api_url}/api/files/{fn_q}",
509
+ f"{self.api_url}/api/assets/{fn_q}",
510
+ f"{self.api_url}/api/media/{fn_q}",
511
+ f"{self.api_url}/files/{task_id}/{fn_q}",
512
+ f"{self.api_url}/files/{task_id}/download/{fn_q}",
513
+ f"{self.api_url}/download/{task_id}/{fn_q}",
514
+ f"{self.api_url}/api/files/{task_id}/{fn_q}",
515
+ f"{self.api_url}/api/download/{task_id}/{fn_q}",
516
+ f"{self.api_url}/download?task_id={task_id}&file_name={fn_q}",
517
+ f"{self.api_url}/download?task_id={task_id}&filename={fn_q}",
518
+ f"{self.api_url}/api/download?task_id={task_id}&file_name={fn_q}",
519
+ f"{self.api_url}/api/download?task_id={task_id}&filename={fn_q}",
520
+ ]
521
+ fp, dbg3 = _try_download_urls(fn_candidates, tag=file_name)
522
+ debug_lines.extend(dbg3)
523
+ if fp:
524
+ ans = self._solve_from_file(q, fp)
525
+ if ans:
526
+ return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
 
 
 
 
 
 
 
527
 
528
  # 4) id-based fallback
529
  file_ids = extract_file_ids_from_item(item)
530
  if task_id:
531
  file_ids.append(task_id)
 
532
  seen = set()
533
  file_ids2 = []
534
  for x in file_ids:
 
537
  seen.add(x)
538
 
539
  for fid in file_ids2:
540
+ candidates = [
541
  f"{self.api_url}/files/{fid}",
542
  f"{self.api_url}/files/{fid}/download",
543
  f"{self.api_url}/file/{fid}",
 
557
  f"{self.api_url}/download?file_id={fid}",
558
  f"{self.api_url}/api/download?file_id={fid}",
559
  ]
560
+ fp, dbg4 = _try_download_urls(candidates, tag=filename_hint)
561
  debug_lines.extend(dbg4)
562
+ if fp:
563
+ ans = self._solve_from_file(q, fp)
564
  if ans:
565
  return sanitize_answer(ans), "\n".join(debug_lines) if DEBUG_ATTACH else ""
566
 
 
579
  suf = fp.suffix.lower()
580
  ql = q.lower()
581
 
582
+ if ("attached excel file" in ql) or (suf in [".xlsx", ".xls"]):
583
  return solve_excel_food_sales(fp)
584
 
585
+ if ("attached python code" in ql) or (suf in [".py", ".txt"]):
586
  return solve_python_final_numeric(fp)
587
 
588
+ # mp3 / youtube 類題目前先不做(要加 whisper/yt-dlp 才能真的解)
589
  return None
590
 
591
 
592
  # -----------------------------
593
  # Runner
594
  # -----------------------------
595
+ def run_and_submit_all(profile: Optional[gr.OAuthProfile]):
596
  try:
597
  space_id = os.getenv("SPACE_ID", "").strip()
598
 
599
+ if profile and getattr(profile, "username", None):
600
+ username = profile.username
601
+ else:
 
 
602
  return "❌ 沒拿到登入資訊。請先按 Login,再按 Run。", None
603
 
604
  api_url = DEFAULT_API_URL
 
628
 
629
  if isinstance(submitted_answer, str) and submitted_answer.strip() == "":
630
  skipped += 1
631
+ results_log.append(
632
+ {"Task ID": task_id, "Question": question_text, "Submitted Answer": "SKIPPED", "Debug": debug}
633
+ )
634
  continue
635
 
636
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
637
+ results_log.append(
638
+ {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer, "Debug": debug}
639
+ )
640
 
641
  if not answers_payload:
642
  return "⚠️ 全部 SKIPPED(目前沒有穩定可解題,或附件仍抓不到)。", pd.DataFrame(results_log)
 
648
  result_data = r2.json()
649
 
650
  final_status = (
651
+ "✅ Submission Successful!\n"
652
  f"User: {result_data.get('username')}\n"
653
  f"Overall Score: {result_data.get('score', 'N/A')}% "
654
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
662
  tb = traceback.format_exc()
663
  return f"❌ Runtime Error:\n{e}\n\n--- Traceback ---\n{tb}", None
664
 
665
+
666
  # -----------------------------
667
+ # UI (OAuth profile 正確傳遞版)
668
  # -----------------------------
669
  with gr.Blocks() as demo:
670
  gr.Markdown("# Basic Agent Evaluation Runner (No Paid Model)")
671
+ gr.Markdown("✅ This version tries: **detail endpoints** + **file_name path patterns** + url/base64 scan.\n\nDebug欄會顯示嘗試過哪些網址。")
672
+
673
+ # ✅ 用 State 存 OAuthProfile,避免「畫面登入但 callback 收不到 profile」
674
+ profile_state = gr.State(value=None)
675
+
676
+ login_btn = gr.LoginButton()
677
+ login_btn.click(fn=lambda p: p, outputs=profile_state)
678
+
679
  run_button = gr.Button("Run Evaluation & Submit All Answers")
680
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=14, interactive=False)
681
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
682
+
683
+ run_button.click(fn=run_and_submit_all, inputs=profile_state, outputs=[status_output, results_table])
684
 
685
  if __name__ == "__main__":
686
+ demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
687
+