tangtang
commited on
Commit
Β·
24675aa
1
Parent(s):
6403908
Update space1
Browse files- app.py +2 -2
- src/about.py +2 -2
app.py
CHANGED
|
@@ -98,10 +98,10 @@ with demo:
|
|
| 98 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 99 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 100 |
|
| 101 |
-
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=
|
| 102 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 103 |
|
| 104 |
-
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=
|
| 105 |
with gr.Column():
|
| 106 |
with gr.Row():
|
| 107 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
|
| 98 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 99 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 100 |
|
| 101 |
+
with gr.TabItem("π About", elem_id="llm-benchmark-tab-about", id=2):
|
| 102 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 103 |
|
| 104 |
+
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-submit", id=3):
|
| 105 |
with gr.Column():
|
| 106 |
with gr.Row():
|
| 107 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
src/about.py
CHANGED
|
@@ -33,14 +33,14 @@ class Tasks(Enum):
|
|
| 33 |
Task3_ROUGE_L = Task("(Task3) ROUGE-L", "(Task3) ROUGE-L", "(T3) ROUGE-L (%)")
|
| 34 |
|
| 35 |
# Your leaderboard name
|
| 36 |
-
TITLE = """<h1 align="center" id="space-title">
|
| 37 |
|
| 38 |
# What does your leaderboard evaluate?
|
| 39 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 40 |
|
| 41 |
INTRODUCTION_TEXT = """This leaderboard evaluates Large Language Models (LLMs) on their ability to perform automated literature review tasks, including reference generation, abstract writing, and review composition.<br>
|
| 42 |
It is based on the study: **Large Language Models for Automated Literature Review: An Evaluation of Reference Generation, Abstract Writing, and Review Composition.**<br>
|
| 43 |
-
The leaderboard measures how well different models perform in
|
| 44 |
|
| 45 |
"""
|
| 46 |
EVALUATION_QUEUE_TEXT = """"""
|
|
|
|
| 33 |
Task3_ROUGE_L = Task("(Task3) ROUGE-L", "(Task3) ROUGE-L", "(T3) ROUGE-L (%)")
|
| 34 |
|
| 35 |
# Your leaderboard name
|
| 36 |
+
TITLE = """<h1 align="center" id="space-title">LLM-Based Automated Literature Review Evaluation Benchmark</h1>"""
|
| 37 |
|
| 38 |
# What does your leaderboard evaluate?
|
| 39 |
# Which evaluations are you running? how can people reproduce what you have?
|
| 40 |
|
| 41 |
INTRODUCTION_TEXT = """This leaderboard evaluates Large Language Models (LLMs) on their ability to perform automated literature review tasks, including reference generation, abstract writing, and review composition.<br>
|
| 42 |
It is based on the study: **Large Language Models for Automated Literature Review: An Evaluation of Reference Generation, Abstract Writing, and Review Composition.**<br>
|
| 43 |
+
The leaderboard measures how well different models perform in references generation, factually consistent, and stylistically appropriate academic texts.
|
| 44 |
|
| 45 |
"""
|
| 46 |
EVALUATION_QUEUE_TEXT = """"""
|