test_space

Runtime error

j_yoon.song commited on Jul 8, 2025

Commit

12995c6

1 Parent(s): 85323bc

minor

Files changed (2) hide show

src/about.py CHANGED Viewed

@@ -31,10 +31,7 @@ ProductivityBench is designed to evaluate LLMs for Productivity Assistants which
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
-## Reproducibility
-To reproduce our results, here is the commands you can run:
 """
 EVALUATION_QUEUE_TEXT = """

 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
+We utilize LLM Judge with human-crafted criteria to assess AI response.
 """
 EVALUATION_QUEUE_TEXT = """

src/envs.py CHANGED Viewed

@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
-OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"

 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+OWNER = "coms1580" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"