FredericFan commited on
Commit
ca6af8e
·
1 Parent(s): c96266d

Add GitHub and arXiv links to leaderboard page

Browse files

- Add styled button bar (GitHub + arXiv) under the title banner
- Link to paper in introduction text
- Direct users to GitHub repo for evaluation data and code
- Add arXiv reference in About tab reproducibility section

Files changed (3) hide show
  1. app.py +12 -0
  2. src/about.py +6 -2
  3. src/display/css_html_js.py +43 -0
app.py CHANGED
@@ -167,6 +167,18 @@ demo = gr.Blocks(css=custom_css)
167
  with demo:
168
  gr.HTML(TITLE)
169
  gr.HTML('<p id="space-subtitle">The First Comprehensive Benchmark for LLMs in Molecular Dynamics</p>')
 
 
 
 
 
 
 
 
 
 
 
 
170
  gr.HTML(build_metric_cards())
171
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
172
 
 
167
  with demo:
168
  gr.HTML(TITLE)
169
  gr.HTML('<p id="space-subtitle">The First Comprehensive Benchmark for LLMs in Molecular Dynamics</p>')
170
+ gr.HTML("""
171
+ <div class="link-bar">
172
+ <a class="link-github" href="https://github.com/FredericVAN/PKU_MDAgent2" target="_blank">
173
+ <svg viewBox="0 0 16 16"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>
174
+ GitHub
175
+ </a>
176
+ <a class="link-arxiv" href="https://arxiv.org/abs/2601.02075" target="_blank">
177
+ <svg viewBox="0 0 16 16"><path d="M2 1h12a1 1 0 011 1v12a1 1 0 01-1 1H2a1 1 0 01-1-1V2a1 1 0 011-1zm1.5 2v10h2V9.5L7.5 12h1.2L6.5 9l2-2.5H7.3L5.5 9V3h-2zm5 0v10h2V3h-2zm3.5 0v2h1V3h-1z"/></svg>
178
+ arXiv Paper
179
+ </a>
180
+ </div>
181
+ """)
182
  gr.HTML(build_metric_cards())
183
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
184
 
src/about.py CHANGED
@@ -29,7 +29,7 @@ TITLE = """<h1 align="center" id="space-title">🧪 MD-EvalBench Leaderboard</h1
29
 
30
  INTRODUCTION_TEXT = """
31
  **MD-EvalBench** is the first comprehensive benchmark for evaluating Large Language Models in the Molecular Dynamics (MD) domain,
32
- proposed in the paper *"MDAgent2: Large Language Model for Code Generation and Knowledge Q&A in Molecular Dynamics"*.
33
 
34
  The benchmark consists of three evaluation datasets:
35
  - **MD-KnowledgeEval** (336 questions): Theoretical knowledge assessment covering interatomic potentials, integration algorithms, equilibrium conditions, and statistical ensembles.
@@ -37,6 +37,8 @@ The benchmark consists of three evaluation datasets:
37
  - **LAMMPS-CodeGenEval** (566 tasks): Automatic code generation quality assessment for executable LAMMPS scripts.
38
 
39
  Models are evaluated on both **Question Answering** (knowledge + syntax) and **Code Generation** (execution success + human scoring) capabilities.
 
 
40
  """
41
 
42
  LLM_BENCHMARKS_TEXT = """
@@ -78,7 +80,9 @@ All experiments are repeated three times and the average results are reported.
78
 
79
  ## Reproducibility
80
 
81
- Models are evaluated using the MD-EvalBench benchmark suite. For detailed methodology, please refer to the paper.
 
 
82
  """
83
 
84
  EVALUATION_QUEUE_TEXT = """
 
29
 
30
  INTRODUCTION_TEXT = """
31
  **MD-EvalBench** is the first comprehensive benchmark for evaluating Large Language Models in the Molecular Dynamics (MD) domain,
32
+ proposed in the paper [*"MDAgent2: Large Language Model for Code Generation and Knowledge Q&A in Molecular Dynamics"*](https://arxiv.org/abs/2601.02075).
33
 
34
  The benchmark consists of three evaluation datasets:
35
  - **MD-KnowledgeEval** (336 questions): Theoretical knowledge assessment covering interatomic potentials, integration algorithms, equilibrium conditions, and statistical ensembles.
 
37
  - **LAMMPS-CodeGenEval** (566 tasks): Automatic code generation quality assessment for executable LAMMPS scripts.
38
 
39
  Models are evaluated on both **Question Answering** (knowledge + syntax) and **Code Generation** (execution success + human scoring) capabilities.
40
+
41
+ To access the evaluation datasets, code, and submission guidelines, please visit our [GitHub repository](https://github.com/FredericVAN/PKU_MDAgent2).
42
  """
43
 
44
  LLM_BENCHMARKS_TEXT = """
 
80
 
81
  ## Reproducibility
82
 
83
+ Models are evaluated using the MD-EvalBench benchmark suite.
84
+ For evaluation data, code, and detailed methodology, please visit our [GitHub repository](https://github.com/FredericVAN/PKU_MDAgent2).
85
+ For the full paper, see [arXiv:2601.02075](https://arxiv.org/abs/2601.02075).
86
  """
87
 
88
  EVALUATION_QUEUE_TEXT = """
src/display/css_html_js.py CHANGED
@@ -27,6 +27,49 @@ custom_css = """
27
  font-weight: 400;
28
  }
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  /* ===== Tabs ===== */
31
  .tab-buttons button {
32
  font-size: 17px !important;
 
27
  font-weight: 400;
28
  }
29
 
30
+ /* ===== Link Buttons Bar ===== */
31
+ .link-bar {
32
+ display: flex;
33
+ justify-content: center;
34
+ gap: 12px;
35
+ margin: 4px 0 18px 0;
36
+ flex-wrap: wrap;
37
+ }
38
+
39
+ .link-bar a {
40
+ display: inline-flex;
41
+ align-items: center;
42
+ gap: 6px;
43
+ padding: 7px 18px;
44
+ border-radius: 8px;
45
+ font-size: 14px;
46
+ font-weight: 600;
47
+ text-decoration: none;
48
+ transition: transform 0.12s, box-shadow 0.15s;
49
+ }
50
+
51
+ .link-bar a:hover {
52
+ transform: translateY(-1px);
53
+ box-shadow: 0 3px 10px rgba(0, 0, 0, 0.12);
54
+ }
55
+
56
+ .link-bar a.link-github {
57
+ background: #24292f;
58
+ color: #fff;
59
+ }
60
+
61
+ .link-bar a.link-arxiv {
62
+ background: #b31b1b;
63
+ color: #fff;
64
+ }
65
+
66
+ .link-bar a svg {
67
+ width: 16px;
68
+ height: 16px;
69
+ fill: currentColor;
70
+ flex-shrink: 0;
71
+ }
72
+
73
  /* ===== Tabs ===== */
74
  .tab-buttons button {
75
  font-size: 17px !important;