jinjiajie commited on
Commit
e3f8016
·
verified ·
1 Parent(s): 2b2c05f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -229
app.py CHANGED
@@ -1,9 +1,9 @@
1
- import datetime
2
- import html as html_lib
3
- import json
4
- import os
5
- import re
6
- from typing import Dict, List, Optional
7
 
8
  import gradio as gr
9
  from datasets import Dataset, load_dataset
@@ -15,12 +15,12 @@ from content import (
15
  format_log,
16
  format_warning,
17
  )
18
- from evaluator import SimpleEvaluator
19
-
20
- TOKEN = os.getenv("HF_TOKEN") or os.getenv("TOKEN")
21
- DATASET_REPO = "RUC-NLPIR/GISA"
22
- RESULTS_REPO = "RUC-NLPIR/GISA-leaderboard"
23
- META_FILE = "meta.jsonl"
24
  ANSWER_DIR = "answer"
25
  CACHE_DIR = "cache/answers"
26
  SEED_FILE = os.path.join(os.path.dirname(__file__), "seed.json")
@@ -34,61 +34,61 @@ ALLOWED_TYPES = {"item", "set", "list", "table"}
34
 
35
  os.makedirs(CACHE_DIR, exist_ok=True)
36
 
37
- api = HfApi()
38
- evaluator = SimpleEvaluator()
39
-
40
-
41
- def _extract_username(profile, request: Optional[gr.Request]) -> Optional[str]:
42
- """
43
- Best-effort extraction of the HF username across Gradio versions.
44
-
45
- On Hugging Face Spaces with hf_oauth enabled, Gradio can inject an OAuth profile object
46
- (usually exposing `.username`). Some versions also provide `request.username`.
47
- """
48
-
49
- if profile is not None:
50
- username = getattr(profile, "username", None)
51
- if username:
52
- return str(username)
53
- # Some versions may pass a dict-like profile
54
- if isinstance(profile, dict):
55
- for key in ("username", "preferred_username", "name"):
56
- val = profile.get(key)
57
- if val:
58
- return str(val)
59
-
60
- if request is None:
61
- return None
62
-
63
- username = getattr(request, "username", None)
64
- if username:
65
- return str(username)
66
-
67
- headers = getattr(request, "headers", None)
68
- if not headers:
69
- return None
70
-
71
- # Starlette Headers is case-insensitive; also tolerate plain dicts.
72
- for key in (
73
- "x-forwarded-user",
74
- "x-hf-user",
75
- "x-huggingface-user",
76
- "x-user",
77
- ):
78
- try:
79
- val = headers.get(key)
80
- except Exception:
81
- val = None
82
- if val:
83
- return str(val)
84
-
85
- return None
86
-
87
-
88
- def _safe_float(val):
89
- try:
90
- if val is None:
91
- return None
92
  if isinstance(val, str) and not val.strip():
93
  return None
94
  return float(val)
@@ -187,106 +187,106 @@ def _rows_from_source(source) -> List[dict]:
187
  return rows
188
 
189
 
190
- def load_seed_rows() -> List[dict]:
191
- if not os.path.exists(SEED_FILE):
192
- return _load_seed_from_root_script()
193
- try:
194
- with open(SEED_FILE, "r", encoding="utf-8") as f:
195
- data = json.load(f)
196
- return data if isinstance(data, list) else []
197
- except Exception:
198
- return _load_seed_from_root_script()
199
-
200
-
201
- def _load_seed_from_root_script() -> List[dict]:
202
- root_script = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "script.js"))
203
- if not os.path.exists(root_script):
204
- return []
205
- try:
206
- text = _load_text(root_script)
207
- match = re.search(r"const\\s+data\\s*=\\s*(\\[.*?\\]);", text, re.S)
208
- if not match:
209
- return []
210
- arr_text = match.group(1)
211
- arr_text = re.sub(r"(\\w+)\\s*:", r'\"\\1\":', arr_text)
212
- arr_text = re.sub(r",\\s*([}\\]])", r"\\1", arr_text)
213
- data = json.loads(arr_text)
214
- return data if isinstance(data, list) else []
215
- except Exception:
216
- return []
217
-
218
-
219
- def render_page() -> str:
220
- page = _load_text(INDEX_HTML)
221
- page = page.replace("__LEADERBOARD_DATA__", "")
222
- page = page.replace("__SCRIPT__", "")
223
- return page
224
-
225
-
226
- def _format_score(val: Optional[float]) -> str:
227
- if val is None:
228
- return "-"
229
- try:
230
- return f"{float(val):.2f}"
231
- except Exception:
232
- return "-"
233
-
234
-
235
- def _render_leaderboard_rows(data: List[dict]) -> str:
236
- # Render a static table body so the leaderboard is not empty even if client JS
237
- # runs before Gradio mounts the HTML.
238
- if not data:
239
- return '<tr><td colspan="14">No submissions yet.</td></tr>'
240
-
241
- # Default sort: Overall desc, then date desc (best-effort).
242
- def _date_key(s: str) -> int:
243
- m = re.match(r"^(\\d{4})-(\\d{2})-(\\d{2})$", str(s or ""))
244
- if not m:
245
- return 0
246
- return int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
247
-
248
- sorted_rows = sorted(
249
- data,
250
- key=lambda r: (
251
- _safe_float(r.get("overall")) or 0.0,
252
- _date_key(r.get("date")),
253
- ),
254
- reverse=True,
255
- )
256
-
257
- out = []
258
- for idx, row in enumerate(sorted_rows, start=1):
259
- model = html_lib.escape(str(row.get("model") or "-"))
260
- org = html_lib.escape(str(row.get("org") or "-"))
261
- framework = html_lib.escape(str(row.get("framework") or "-"))
262
- date = html_lib.escape(str(row.get("date") or "-"))
263
-
264
- out.append(
265
- "\n".join(
266
- [
267
- "<tr>",
268
- f" <td>{idx}</td>",
269
- ' <td class="model-cell">',
270
- f' <div class="model-name">{model}</div>',
271
- f' <div class="model-org">{org}</div>',
272
- " </td>",
273
- f" <td>{framework}</td>",
274
- f" <td>{date}</td>",
275
- f' <td class="highlight-em">{_format_score(_safe_float(row.get("overall")))}</td>',
276
- f" <td>{_format_score(_safe_float(row.get('item_em')))}</td>",
277
- f" <td>{_format_score(_safe_float(row.get('set_em')))}</td>",
278
- f" <td>{_format_score(_safe_float(row.get('set_f1')))}</td>",
279
- f" <td>{_format_score(_safe_float(row.get('list_em')))}</td>",
280
- f" <td>{_format_score(_safe_float(row.get('list_f1')))}</td>",
281
- f" <td>{_format_score(_safe_float(row.get('list_order')))}</td>",
282
- f" <td>{_format_score(_safe_float(row.get('table_em')))}</td>",
283
- f" <td>{_format_score(_safe_float(row.get('table_row_f1')))}</td>",
284
- f" <td>{_format_score(_safe_float(row.get('table_item_f1')))}</td>",
285
- "</tr>",
286
- ]
287
- )
288
- )
289
- return "\n".join(out)
290
 
291
 
292
  def build_js(data: List[dict]) -> str:
@@ -404,24 +404,24 @@ def parse_jsonl(file_path: str) -> Dict[str, str]:
404
  return preds
405
 
406
 
407
- def add_new_eval(
408
- model: str,
409
- org: str,
410
- framework: str,
411
- url: str,
412
- email: str,
413
- file_obj,
414
- profile: Optional[gr.OAuthProfile] = None,
415
- request: gr.Request = None,
416
- ):
417
- if not TOKEN:
418
- return format_error("Server misconfigured: HF_TOKEN is missing.")
419
-
420
- username = _extract_username(profile, request)
421
- if not username:
422
- return format_warning("Please log in with HuggingFace to submit.")
423
- if not model or not org:
424
- return format_warning("Please provide model name and organization.")
425
  if file_obj is None:
426
  return format_warning("Please upload a JSONL file.")
427
 
@@ -495,50 +495,50 @@ def add_new_eval(
495
  return format_log("Submission received! Please refresh the leaderboard to see your score.")
496
 
497
 
498
- seed_results_if_needed()
499
- leaderboard_data = build_leaderboard_rows()
500
- css = _load_text(STYLES_CSS)
501
- page_html = render_page()
502
- rows_html = _render_leaderboard_rows(leaderboard_data)
503
- page_html = page_html.replace(
504
- '<tbody id="leaderboard-body"></tbody>',
505
- f'<tbody id="leaderboard-body">{rows_html}</tbody>',
506
- )
507
- js = build_js(leaderboard_data)
508
-
509
- with gr.Blocks() as demo:
510
- gr.HTML(page_html)
511
-
512
- with gr.Accordion("Submit your results", open=True):
513
- gr.Markdown(SUBMISSION_TEXT)
514
- with gr.Row():
515
- with gr.Column():
516
- model_text = gr.Textbox(label="Model / System")
517
- org_text = gr.Textbox(label="Organization")
518
- framework_text = gr.Textbox(label="Framework", value="ReAct")
519
- url_text = gr.Textbox(label="Model URL", placeholder="Optional")
520
- with gr.Column():
521
- email_text = gr.Textbox(label="Contact email (public)")
522
- file_input = gr.File(label="Upload JSONL")
523
-
524
- with gr.Row():
525
- login_btn = gr.LoginButton()
526
- submit_btn = gr.Button("Submit")
527
-
528
- result_md = gr.Markdown()
529
-
530
- submit_btn.click(
531
- add_new_eval,
532
- inputs=[
533
- model_text,
534
- org_text,
535
- framework_text,
536
- url_text,
537
- email_text,
538
- file_input,
539
- ],
540
- outputs=result_md,
541
- )
542
 
543
  def _launch():
544
  demo.queue()
 
1
+ import datetime
2
+ import html as html_lib
3
+ import json
4
+ import os
5
+ import re
6
+ from typing import Dict, List, Optional
7
 
8
  import gradio as gr
9
  from datasets import Dataset, load_dataset
 
15
  format_log,
16
  format_warning,
17
  )
18
+ from evaluator import SimpleEvaluator
19
+
20
+ TOKEN = os.getenv("HF_TOKEN") or os.getenv("TOKEN")
21
+ DATASET_REPO = "RUC-NLPIR/GISA"
22
+ RESULTS_REPO = "RUC-NLPIR/GISA-leaderboard"
23
+ META_FILE = "encrypted_question.jsonl"
24
  ANSWER_DIR = "answer"
25
  CACHE_DIR = "cache/answers"
26
  SEED_FILE = os.path.join(os.path.dirname(__file__), "seed.json")
 
34
 
35
  os.makedirs(CACHE_DIR, exist_ok=True)
36
 
37
+ api = HfApi()
38
+ evaluator = SimpleEvaluator()
39
+
40
+
41
+ def _extract_username(profile, request: Optional[gr.Request]) -> Optional[str]:
42
+ """
43
+ Best-effort extraction of the HF username across Gradio versions.
44
+
45
+ On Hugging Face Spaces with hf_oauth enabled, Gradio can inject an OAuth profile object
46
+ (usually exposing `.username`). Some versions also provide `request.username`.
47
+ """
48
+
49
+ if profile is not None:
50
+ username = getattr(profile, "username", None)
51
+ if username:
52
+ return str(username)
53
+ # Some versions may pass a dict-like profile
54
+ if isinstance(profile, dict):
55
+ for key in ("username", "preferred_username", "name"):
56
+ val = profile.get(key)
57
+ if val:
58
+ return str(val)
59
+
60
+ if request is None:
61
+ return None
62
+
63
+ username = getattr(request, "username", None)
64
+ if username:
65
+ return str(username)
66
+
67
+ headers = getattr(request, "headers", None)
68
+ if not headers:
69
+ return None
70
+
71
+ # Starlette Headers is case-insensitive; also tolerate plain dicts.
72
+ for key in (
73
+ "x-forwarded-user",
74
+ "x-hf-user",
75
+ "x-huggingface-user",
76
+ "x-user",
77
+ ):
78
+ try:
79
+ val = headers.get(key)
80
+ except Exception:
81
+ val = None
82
+ if val:
83
+ return str(val)
84
+
85
+ return None
86
+
87
+
88
+ def _safe_float(val):
89
+ try:
90
+ if val is None:
91
+ return None
92
  if isinstance(val, str) and not val.strip():
93
  return None
94
  return float(val)
 
187
  return rows
188
 
189
 
190
+ def load_seed_rows() -> List[dict]:
191
+ if not os.path.exists(SEED_FILE):
192
+ return _load_seed_from_root_script()
193
+ try:
194
+ with open(SEED_FILE, "r", encoding="utf-8") as f:
195
+ data = json.load(f)
196
+ return data if isinstance(data, list) else []
197
+ except Exception:
198
+ return _load_seed_from_root_script()
199
+
200
+
201
+ def _load_seed_from_root_script() -> List[dict]:
202
+ root_script = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "script.js"))
203
+ if not os.path.exists(root_script):
204
+ return []
205
+ try:
206
+ text = _load_text(root_script)
207
+ match = re.search(r"const\\s+data\\s*=\\s*(\\[.*?\\]);", text, re.S)
208
+ if not match:
209
+ return []
210
+ arr_text = match.group(1)
211
+ arr_text = re.sub(r"(\\w+)\\s*:", r'\"\\1\":', arr_text)
212
+ arr_text = re.sub(r",\\s*([}\\]])", r"\\1", arr_text)
213
+ data = json.loads(arr_text)
214
+ return data if isinstance(data, list) else []
215
+ except Exception:
216
+ return []
217
+
218
+
219
+ def render_page() -> str:
220
+ page = _load_text(INDEX_HTML)
221
+ page = page.replace("__LEADERBOARD_DATA__", "")
222
+ page = page.replace("__SCRIPT__", "")
223
+ return page
224
+
225
+
226
+ def _format_score(val: Optional[float]) -> str:
227
+ if val is None:
228
+ return "-"
229
+ try:
230
+ return f"{float(val):.2f}"
231
+ except Exception:
232
+ return "-"
233
+
234
+
235
+ def _render_leaderboard_rows(data: List[dict]) -> str:
236
+ # Render a static table body so the leaderboard is not empty even if client JS
237
+ # runs before Gradio mounts the HTML.
238
+ if not data:
239
+ return '<tr><td colspan="14">No submissions yet.</td></tr>'
240
+
241
+ # Default sort: Overall desc, then date desc (best-effort).
242
+ def _date_key(s: str) -> int:
243
+ m = re.match(r"^(\\d{4})-(\\d{2})-(\\d{2})$", str(s or ""))
244
+ if not m:
245
+ return 0
246
+ return int(m.group(1)) * 10000 + int(m.group(2)) * 100 + int(m.group(3))
247
+
248
+ sorted_rows = sorted(
249
+ data,
250
+ key=lambda r: (
251
+ _safe_float(r.get("overall")) or 0.0,
252
+ _date_key(r.get("date")),
253
+ ),
254
+ reverse=True,
255
+ )
256
+
257
+ out = []
258
+ for idx, row in enumerate(sorted_rows, start=1):
259
+ model = html_lib.escape(str(row.get("model") or "-"))
260
+ org = html_lib.escape(str(row.get("org") or "-"))
261
+ framework = html_lib.escape(str(row.get("framework") or "-"))
262
+ date = html_lib.escape(str(row.get("date") or "-"))
263
+
264
+ out.append(
265
+ "\n".join(
266
+ [
267
+ "<tr>",
268
+ f" <td>{idx}</td>",
269
+ ' <td class="model-cell">',
270
+ f' <div class="model-name">{model}</div>',
271
+ f' <div class="model-org">{org}</div>',
272
+ " </td>",
273
+ f" <td>{framework}</td>",
274
+ f" <td>{date}</td>",
275
+ f' <td class="highlight-em">{_format_score(_safe_float(row.get("overall")))}</td>',
276
+ f" <td>{_format_score(_safe_float(row.get('item_em')))}</td>",
277
+ f" <td>{_format_score(_safe_float(row.get('set_em')))}</td>",
278
+ f" <td>{_format_score(_safe_float(row.get('set_f1')))}</td>",
279
+ f" <td>{_format_score(_safe_float(row.get('list_em')))}</td>",
280
+ f" <td>{_format_score(_safe_float(row.get('list_f1')))}</td>",
281
+ f" <td>{_format_score(_safe_float(row.get('list_order')))}</td>",
282
+ f" <td>{_format_score(_safe_float(row.get('table_em')))}</td>",
283
+ f" <td>{_format_score(_safe_float(row.get('table_row_f1')))}</td>",
284
+ f" <td>{_format_score(_safe_float(row.get('table_item_f1')))}</td>",
285
+ "</tr>",
286
+ ]
287
+ )
288
+ )
289
+ return "\n".join(out)
290
 
291
 
292
  def build_js(data: List[dict]) -> str:
 
404
  return preds
405
 
406
 
407
+ def add_new_eval(
408
+ model: str,
409
+ org: str,
410
+ framework: str,
411
+ url: str,
412
+ email: str,
413
+ file_obj,
414
+ profile: Optional[gr.OAuthProfile] = None,
415
+ request: gr.Request = None,
416
+ ):
417
+ if not TOKEN:
418
+ return format_error("Server misconfigured: HF_TOKEN is missing.")
419
+
420
+ username = _extract_username(profile, request)
421
+ if not username:
422
+ return format_warning("Please log in with HuggingFace to submit.")
423
+ if not model or not org:
424
+ return format_warning("Please provide model name and organization.")
425
  if file_obj is None:
426
  return format_warning("Please upload a JSONL file.")
427
 
 
495
  return format_log("Submission received! Please refresh the leaderboard to see your score.")
496
 
497
 
498
+ seed_results_if_needed()
499
+ leaderboard_data = build_leaderboard_rows()
500
+ css = _load_text(STYLES_CSS)
501
+ page_html = render_page()
502
+ rows_html = _render_leaderboard_rows(leaderboard_data)
503
+ page_html = page_html.replace(
504
+ '<tbody id="leaderboard-body"></tbody>',
505
+ f'<tbody id="leaderboard-body">{rows_html}</tbody>',
506
+ )
507
+ js = build_js(leaderboard_data)
508
+
509
+ with gr.Blocks() as demo:
510
+ gr.HTML(page_html)
511
+
512
+ with gr.Accordion("Submit your results", open=True):
513
+ gr.Markdown(SUBMISSION_TEXT)
514
+ with gr.Row():
515
+ with gr.Column():
516
+ model_text = gr.Textbox(label="Model / System")
517
+ org_text = gr.Textbox(label="Organization")
518
+ framework_text = gr.Textbox(label="Framework", value="ReAct")
519
+ url_text = gr.Textbox(label="Model URL", placeholder="Optional")
520
+ with gr.Column():
521
+ email_text = gr.Textbox(label="Contact email (public)")
522
+ file_input = gr.File(label="Upload JSONL")
523
+
524
+ with gr.Row():
525
+ login_btn = gr.LoginButton()
526
+ submit_btn = gr.Button("Submit")
527
+
528
+ result_md = gr.Markdown()
529
+
530
+ submit_btn.click(
531
+ add_new_eval,
532
+ inputs=[
533
+ model_text,
534
+ org_text,
535
+ framework_text,
536
+ url_text,
537
+ email_text,
538
+ file_input,
539
+ ],
540
+ outputs=result_md,
541
+ )
542
 
543
  def _launch():
544
  demo.queue()