Spaces:

XuemeiTang
/

LLM4LitReview_Benchmark

Runtime error

tangtang commited on Nov 1, 2025

Commit

24675aa

1 Parent(s): 6403908

Update space1

Files changed (2) hide show

app.py CHANGED Viewed

@@ -98,10 +98,10 @@ with demo:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=1):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard = init_leaderboard(LEADERBOARD_DF)
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=3):
             with gr.Column():
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

src/about.py CHANGED Viewed

@@ -33,14 +33,14 @@ class Tasks(Enum):
     Task3_ROUGE_L = Task("(Task3) ROUGE-L", "(Task3) ROUGE-L", "(T3) ROUGE-L (%)")
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">ScholarEval: LLM4LitReview Benchmark</h1>"""
 # What does your leaderboard evaluate?
 # Which evaluations are you running? how can people reproduce what you have?
 INTRODUCTION_TEXT = """This leaderboard evaluates Large Language Models (LLMs) on their ability to perform automated literature review tasks, including reference generation, abstract writing, and review composition.<br>
 It is based on the study: **Large Language Models for Automated Literature Review: An Evaluation of Reference Generation, Abstract Writing, and Review Composition.**<br>
-The leaderboard measures how well different models perform in generating coherent, factually consistent, and stylistically appropriate academic texts.
 """
 EVALUATION_QUEUE_TEXT = """"""

     Task3_ROUGE_L = Task("(Task3) ROUGE-L", "(Task3) ROUGE-L", "(T3) ROUGE-L (%)")
 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">LLM-Based Automated Literature Review Evaluation Benchmark</h1>"""
 # What does your leaderboard evaluate?
 # Which evaluations are you running? how can people reproduce what you have?
 INTRODUCTION_TEXT = """This leaderboard evaluates Large Language Models (LLMs) on their ability to perform automated literature review tasks, including reference generation, abstract writing, and review composition.<br>
 It is based on the study: **Large Language Models for Automated Literature Review: An Evaluation of Reference Generation, Abstract Writing, and Review Composition.**<br>
+The leaderboard measures how well different models perform in references generation, factually consistent, and stylistically appropriate academic texts.
 """
 EVALUATION_QUEUE_TEXT = """"""