j_yoon.song commited on
Commit
12995c6
·
1 Parent(s): 85323bc
Files changed (2) hide show
  1. src/about.py +1 -4
  2. src/envs.py +1 -1
src/about.py CHANGED
@@ -31,10 +31,7 @@ ProductivityBench is designed to evaluate LLMs for Productivity Assistants which
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
  ## How it works
34
-
35
- ## Reproducibility
36
- To reproduce our results, here is the commands you can run:
37
-
38
  """
39
 
40
  EVALUATION_QUEUE_TEXT = """
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
  LLM_BENCHMARKS_TEXT = f"""
33
  ## How it works
34
+ We utilize LLM Judge with human-crafted criteria to assess AI response.
 
 
 
35
  """
36
 
37
  EVALUATION_QUEUE_TEXT = """
src/envs.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
6
  # ----------------------------------
7
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
 
6
  # ----------------------------------
7
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
+ OWNER = "coms1580" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"