博闻 commited on
Commit
13f06e2
·
1 Parent(s): 6a12be7

add emoji

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. chinatravel/ui/content.py +6 -6
app.py CHANGED
@@ -14,8 +14,9 @@ with gr.Blocks(title="ChinaTravel Benchmark Evaluation") as demo:
14
  gr.Markdown(content.INTRO_MARKDOWN)
15
  gr.Markdown(content.SUBMISSION_GUIDE)
16
 
17
- gr.Markdown("### Leaderboard")
18
  gr.Markdown("Methods marked with \* leverage Oracle DSL or an Oracle Verifier.")
 
19
  if SPLITS_LIST:
20
  with gr.Tabs():
21
  for split in SPLITS_LIST:
 
14
  gr.Markdown(content.INTRO_MARKDOWN)
15
  gr.Markdown(content.SUBMISSION_GUIDE)
16
 
17
+ gr.Markdown("### 🏆 Leaderboard")
18
  gr.Markdown("Methods marked with \* leverage Oracle DSL or an Oracle Verifier.")
19
+ gr.Markdown("✨ Methods marked with * leverage Oracle DSL or an Oracle Verifier.")
20
  if SPLITS_LIST:
21
  with gr.Tabs():
22
  for split in SPLITS_LIST:
chinatravel/ui/content.py CHANGED
@@ -1,24 +1,24 @@
1
  TITLE_HTML = """
2
- <h1 style=\"text-align:center; margin-bottom: 0.25rem;\">ChinaTravel Benchmark Evaluation</h1>
3
  """
4
 
5
  INTRO_MARKDOWN = """
6
- ChinaTravel is an open-ended travel planning benchmark with compositional constraint validation for language agents. (See our [paper](https://arxiv.org/abs/2412.13682) for more details.)
7
  """
8
 
9
  SUBMISSION_GUIDE = """
10
- **How to submit**
11
  - Pick a split. The split determines which query UIDs are expected.
12
  - Upload a `.zip` that contains JSON files named by query UIDs.
13
  - Each JSON must follow the target schema: see [chinatravel/evaluation/output_schema.json](chinatravel/evaluation/output_schema.json).
14
  - You can dry-run locally via `python eval_exp.py --splits <split> --method <your_method>` to mirror the hosted evaluation.
15
 
16
- **Output**
17
  - We compute DR (schema pass rate), EPR_micro/EPR_macro (commonsense), LPR_micro/LPR_macro/C-LPR (logic), and FPR (all-pass rate).
18
  - A detailed JSON report is produced for download after evaluation.
19
 
20
- **Contact**
21
  - If you are interested in showing your results on our leaderboard or have any questions, please contact [Jie-Jing Shao](shaojj@lamda.nju.edu.cn), [Bo-Wen Zhang](221900200@smail.nju.edu.cn), [Xiao-Wen Yang](yangxw@lamda.nju.edu.cn)
22
  """
23
 
24
- CONTACT = "Contact: zbw@smail.nju.edu.cn, shaojj@lamda.nju.edu.cn"
 
1
  TITLE_HTML = """
2
+ <h1 style=\"text-align:center; margin-bottom: 0.25rem;\">🧭 ChinaTravel Benchmark Evaluation</h1>
3
  """
4
 
5
  INTRO_MARKDOWN = """
6
+ ✈️ ChinaTravel is an open-ended travel planning benchmark with compositional constraint validation for language agents. (See our [paper](https://arxiv.org/abs/2412.13682) for more details.)
7
  """
8
 
9
  SUBMISSION_GUIDE = """
10
+ 📥 **How to submit**
11
  - Pick a split. The split determines which query UIDs are expected.
12
  - Upload a `.zip` that contains JSON files named by query UIDs.
13
  - Each JSON must follow the target schema: see [chinatravel/evaluation/output_schema.json](chinatravel/evaluation/output_schema.json).
14
  - You can dry-run locally via `python eval_exp.py --splits <split> --method <your_method>` to mirror the hosted evaluation.
15
 
16
+ 📊 **Output**
17
  - We compute DR (schema pass rate), EPR_micro/EPR_macro (commonsense), LPR_micro/LPR_macro/C-LPR (logic), and FPR (all-pass rate).
18
  - A detailed JSON report is produced for download after evaluation.
19
 
20
+ 📨 **Contact**
21
  - If you are interested in showing your results on our leaderboard or have any questions, please contact [Jie-Jing Shao](shaojj@lamda.nju.edu.cn), [Bo-Wen Zhang](221900200@smail.nju.edu.cn), [Xiao-Wen Yang](yangxw@lamda.nju.edu.cn)
22
  """
23
 
24
+ CONTACT = "Contact: ✉️ zbw@smail.nju.edu.cn, ✉️ shaojj@lamda.nju.edu.cn"