Borchmann commited on
Commit
87993b5
·
verified ·
1 Parent(s): 7658988

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -255,6 +255,15 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
255
  def init_leaderboard(dataframe):
256
  if dataframe is None or dataframe.empty:
257
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
 
 
 
 
 
 
 
 
258
  return Leaderboard(
259
  value=dataframe,
260
  datatype=[c.type for c in fields(AutoEvalColumn)],
@@ -263,8 +272,26 @@ def init_leaderboard(dataframe):
263
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
264
  label="Select Columns to Display:",
265
  ),
266
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.submitted_by.name],
267
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  bool_checkboxgroup_label="Hide models",
269
  interactive=False,
270
  )
@@ -399,8 +426,8 @@ with demo:
399
  model_name_textbox = gr.Textbox(
400
  label="Model Name", placeholder="e.g., GPT-4-Turbo-Agent, Claude-3-Opus-Agent"
401
  )
402
- submitted_by_textbox = gr.Textbox(
403
- label="Submitted By", placeholder="e.g., your name, organization, or team name"
404
  )
405
  model_type = gr.Dropdown(
406
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
@@ -409,6 +436,11 @@ with demo:
409
  value=None,
410
  interactive=True,
411
  )
 
 
 
 
 
412
 
413
  with gr.Column():
414
  predictions_file = gr.File(label="Predictions JSONL File", file_types=[".jsonl"], type="filepath")
@@ -434,9 +466,10 @@ with demo:
434
  add_new_eval,
435
  [
436
  model_name_textbox,
437
- submitted_by_textbox,
438
  model_type,
439
  predictions_file,
 
440
  ],
441
  submission_result,
442
  )
 
255
  def init_leaderboard(dataframe):
256
  if dataframe is None or dataframe.empty:
257
  raise ValueError("Leaderboard DataFrame is empty or None.")
258
+
259
+ # Calculate dynamic filter ranges from actual data
260
+ max_agent_steps = int(dataframe[AutoEvalColumn.agent_steps.name].max()) if len(dataframe) > 0 else 1000
261
+ max_cost = float(dataframe[AutoEvalColumn.cost_usd.name].max()) if len(dataframe) > 0 else 10.0
262
+
263
+ # Add some headroom to max values
264
+ max_agent_steps = max(max_agent_steps + 100, 1000)
265
+ max_cost = max(max_cost + 1.0, 10.0)
266
+
267
  return Leaderboard(
268
  value=dataframe,
269
  datatype=[c.type for c in fields(AutoEvalColumn)],
 
272
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
273
  label="Select Columns to Display:",
274
  ),
275
+ search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.organization.name],
276
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden] + ["Type"],
277
+ filter_columns=[
278
+ ColumnFilter(
279
+ AutoEvalColumn.agent_steps.name,
280
+ type="slider",
281
+ min=0,
282
+ max=max_agent_steps,
283
+ default=[0, max_agent_steps],
284
+ label="Agent Steps",
285
+ ),
286
+ ColumnFilter(
287
+ AutoEvalColumn.cost_usd.name,
288
+ type="slider",
289
+ min=0.0,
290
+ max=max_cost,
291
+ default=[0.0, max_cost],
292
+ label="Cost (USD)",
293
+ ),
294
+ ],
295
  bool_checkboxgroup_label="Hide models",
296
  interactive=False,
297
  )
 
426
  model_name_textbox = gr.Textbox(
427
  label="Model Name", placeholder="e.g., GPT-4-Turbo-Agent, Claude-3-Opus-Agent"
428
  )
429
+ organization_textbox = gr.Textbox(
430
+ label="Organization", placeholder="e.g., OpenAI, Anthropic, Meta, or your organization name"
431
  )
432
  model_type = gr.Dropdown(
433
  choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
 
436
  value=None,
437
  interactive=True,
438
  )
439
+ link_textbox = gr.Textbox(
440
+ label="Link (Optional)",
441
+ placeholder="e.g., https://arxiv.org/abs/... or https://github.com/...",
442
+ info="Link to paper, code repository, or model card (optional)"
443
+ )
444
 
445
  with gr.Column():
446
  predictions_file = gr.File(label="Predictions JSONL File", file_types=[".jsonl"], type="filepath")
 
466
  add_new_eval,
467
  [
468
  model_name_textbox,
469
+ organization_textbox,
470
  model_type,
471
  predictions_file,
472
+ link_textbox,
473
  ],
474
  submission_result,
475
  )
requirements.txt CHANGED
@@ -4,10 +4,10 @@ datasets
4
  gradio
5
  gradio[oauth]
6
  gradio_client
7
- gradio_leaderboard>=0.0.13
8
  huggingface-hub>=0.18.0
9
  matplotlib
10
- numpy
11
  pandas
12
  plotly
13
  python-dateutil
 
4
  gradio
5
  gradio[oauth]
6
  gradio_client
7
+ gradio_leaderboard==0.0.13
8
  huggingface-hub>=0.18.0
9
  matplotlib
10
+ numpy<2.0
11
  pandas
12
  plotly
13
  python-dateutil
src/display/css_html_js.py CHANGED
@@ -54,12 +54,32 @@ table a:hover {
54
  padding: 0px;
55
  }
56
 
57
- /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
58
- #leaderboard-table td:nth-child(2),
59
- #leaderboard-table th:nth-child(2) {
60
- max-width: 400px;
61
- overflow: auto;
62
- white-space: nowrap;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
 
65
  /* Style for inline SVG icons in table */
 
54
  padding: 0px;
55
  }
56
 
57
+ /* Set width for the Model column (now first column after reordering) */
58
+ #leaderboard-table td:nth-child(1),
59
+ #leaderboard-table th:nth-child(1),
60
+ .leaderboard table td:first-child,
61
+ .leaderboard table th:first-child,
62
+ table td:first-child,
63
+ table th:first-child {
64
+ min-width: 350px !important;
65
+ width: 400px !important;
66
+ max-width: 500px !important;
67
+ overflow: visible !important;
68
+ white-space: normal !important;
69
+ word-break: break-word !important;
70
+ }
71
+
72
+ /* Also target the gradio leaderboard specifically with highest priority */
73
+ .gradio-container .gradio-leaderboard table td:first-child,
74
+ .gradio-container .gradio-leaderboard table th:first-child,
75
+ [class*="leaderboard"] table td:first-child,
76
+ [class*="leaderboard"] table th:first-child {
77
+ min-width: 350px !important;
78
+ width: 400px !important;
79
+ max-width: 500px !important;
80
+ white-space: normal !important;
81
+ word-break: break-word !important;
82
+ overflow-wrap: break-word !important;
83
  }
84
 
85
  /* Style for inline SVG icons in table */
src/display/formatting.py CHANGED
@@ -2,9 +2,13 @@ def model_hyperlink(link, model_name):
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
5
- def make_clickable_model(model_name):
6
- link = f"https://huggingface.co/{model_name}"
7
- return model_hyperlink(link, model_name)
 
 
 
 
8
 
9
 
10
  def styled_error(error):
 
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
5
+ def make_clickable_model(model_name, link=None):
6
+ """Create a clickable model name with optional custom link"""
7
+ if link and link.strip():
8
+ # Use provided link (paper/code)
9
+ return model_hyperlink(link, model_name)
10
+ # No link provided, just return plain text
11
+ return model_name
12
 
13
 
14
  def styled_error(error):
src/display/utils.py CHANGED
@@ -22,11 +22,15 @@ class ColumnContent:
22
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
- # Init
 
 
26
  auto_eval_column_dict.append(
27
- ("model_type_symbol", ColumnContent, ColumnContent("T", "markdown", True, never_hidden=True))
28
  )
29
- auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
 
 
30
  # Scores
31
  for idx, task in enumerate(Tasks):
32
  # Only show overall ANLS (first task) by default
@@ -34,13 +38,10 @@ for idx, task in enumerate(Tasks):
34
  auto_eval_column_dict.append(
35
  (task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default))
36
  )
37
- # Cost/Efficiency metrics
38
- auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True)))
39
- auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True)))
40
- # Model information
41
- auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Model Type", "str", True)))
42
- auto_eval_column_dict.append(("submitted_by", ColumnContent, ColumnContent("Submitted By", "str", False)))
43
  auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False)))
 
44
 
45
  # We use make dataclass to dynamically fill the scores from Tasks
46
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
@@ -50,7 +51,7 @@ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=
50
  class EvalQueueColumn: # Queue column
51
  model = ColumnContent("model", "markdown", True)
52
  model_type = ColumnContent("model_type", "str", True)
53
- submitted_by = ColumnContent("submitted_by", "str", True)
54
  status = ColumnContent("status", "str", True)
55
 
56
 
 
22
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
+ # Main columns (displayed by default, in order)
26
+ auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
27
+ auto_eval_column_dict.append(("organization", ColumnContent, ColumnContent("Organization", "str", True)))
28
  auto_eval_column_dict.append(
29
+ ("model_type_symbol", ColumnContent, ColumnContent("Model Type", "markdown", True, never_hidden=True))
30
  )
31
+ # Cost/Efficiency metrics
32
+ auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True)))
33
+ auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True)))
34
  # Scores
35
  for idx, task in enumerate(Tasks):
36
  # Only show overall ANLS (first task) by default
 
38
  auto_eval_column_dict.append(
39
  (task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default))
40
  )
41
+ # Model information (will be hidden in display but needed for filtering)
42
+ auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=False)))
 
 
 
 
43
  auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False)))
44
+ auto_eval_column_dict.append(("link", ColumnContent, ColumnContent("Link", "str", False, hidden=True)))
45
 
46
  # We use make dataclass to dynamically fill the scores from Tasks
47
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
51
  class EvalQueueColumn: # Queue column
52
  model = ColumnContent("model", "markdown", True)
53
  model_type = ColumnContent("model_type", "str", True)
54
+ organization = ColumnContent("organization", "str", True)
55
  status = ColumnContent("status", "str", True)
56
 
57
 
src/leaderboard/read_evals.py CHANGED
@@ -20,8 +20,9 @@ class EvalResult:
20
  agent_steps: int = 0
21
  cost_usd: float = 0.0
22
  model_type: ModelType = ModelType.Unknown # API or open-weight
23
- submitted_by: str = ""
24
  submission_date: str = ""
 
25
 
26
  @classmethod
27
  def init_from_json_file(self, json_filepath):
@@ -52,8 +53,9 @@ class EvalResult:
52
  model_type = ModelType.from_str(model_type_str)
53
 
54
  # Extract submission info (will be updated from request file)
55
- submitted_by = data.get("submitted_by", "")
56
  submission_date = data.get("submission_date", "")
 
57
 
58
  # Create unique eval name
59
  eval_name = model_name.replace("/", "_").replace(" ", "_")
@@ -65,8 +67,9 @@ class EvalResult:
65
  agent_steps=agent_steps,
66
  cost_usd=cost_usd,
67
  model_type=model_type,
68
- submitted_by=submitted_by,
69
  submission_date=submission_date,
 
70
  )
71
 
72
  def update_with_request_file(self, requests_path):
@@ -77,8 +80,9 @@ class EvalResult:
77
  with open(request_file, "r") as f:
78
  request = json.load(f)
79
  self.model_type = ModelType.from_str(request.get("model_type", ""))
80
- self.submitted_by = request.get("submitted_by", "")
81
  self.submission_date = request.get("submitted_time", "")
 
82
  except Exception as e:
83
  print(f"Could not find request file for {self.model_name}: {e}")
84
 
@@ -87,12 +91,13 @@ class EvalResult:
87
  data_dict = {
88
  "eval_name": self.eval_name, # not a column, just a save name
89
  AutoEvalColumn.model_type_symbol.name: get_model_type_icon(self.model_type),
90
- AutoEvalColumn.model.name: make_clickable_model(self.model_name),
91
  AutoEvalColumn.agent_steps.name: self.agent_steps,
92
  AutoEvalColumn.cost_usd.name: self.cost_usd,
93
  AutoEvalColumn.model_type.name: self.model_type.value.name,
94
- AutoEvalColumn.submitted_by.name: self.submitted_by,
95
  AutoEvalColumn.submission_date.name: self.submission_date,
 
96
  }
97
 
98
  # Add individual task scores
@@ -155,11 +160,22 @@ MODEL_TYPE_ICON_MAP = {
155
 
156
 
157
  def get_model_type_icon(model_type: ModelType) -> str:
 
158
  data_uri = MODEL_TYPE_ICON_MAP.get(model_type)
 
 
 
159
  if data_uri:
160
- alt_text = model_type.value.display_name or model_type.value.name or "model"
161
- return f'<img src="{data_uri}" alt="{alt_text} icon" class="table-icon-img" />'
162
- return model_type.value.symbol
 
 
 
 
 
 
 
163
 
164
 
165
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
 
20
  agent_steps: int = 0
21
  cost_usd: float = 0.0
22
  model_type: ModelType = ModelType.Unknown # API or open-weight
23
+ organization: str = ""
24
  submission_date: str = ""
25
+ link: str = "" # Optional link to paper or code
26
 
27
  @classmethod
28
  def init_from_json_file(self, json_filepath):
 
53
  model_type = ModelType.from_str(model_type_str)
54
 
55
  # Extract submission info (will be updated from request file)
56
+ organization = data.get("organization", data.get("submitted_by", "")) # Backward compatibility
57
  submission_date = data.get("submission_date", "")
58
+ link = data.get("link", "")
59
 
60
  # Create unique eval name
61
  eval_name = model_name.replace("/", "_").replace(" ", "_")
 
67
  agent_steps=agent_steps,
68
  cost_usd=cost_usd,
69
  model_type=model_type,
70
+ organization=organization,
71
  submission_date=submission_date,
72
+ link=link,
73
  )
74
 
75
  def update_with_request_file(self, requests_path):
 
80
  with open(request_file, "r") as f:
81
  request = json.load(f)
82
  self.model_type = ModelType.from_str(request.get("model_type", ""))
83
+ self.organization = request.get("organization", request.get("submitted_by", "")) # Backward compatibility
84
  self.submission_date = request.get("submitted_time", "")
85
+ self.link = request.get("link", "")
86
  except Exception as e:
87
  print(f"Could not find request file for {self.model_name}: {e}")
88
 
 
91
  data_dict = {
92
  "eval_name": self.eval_name, # not a column, just a save name
93
  AutoEvalColumn.model_type_symbol.name: get_model_type_icon(self.model_type),
94
+ AutoEvalColumn.model.name: make_clickable_model(self.model_name, self.link),
95
  AutoEvalColumn.agent_steps.name: self.agent_steps,
96
  AutoEvalColumn.cost_usd.name: self.cost_usd,
97
  AutoEvalColumn.model_type.name: self.model_type.value.name,
98
+ AutoEvalColumn.organization.name: self.organization,
99
  AutoEvalColumn.submission_date.name: self.submission_date,
100
+ AutoEvalColumn.link.name: self.link,
101
  }
102
 
103
  # Add individual task scores
 
160
 
161
 
162
  def get_model_type_icon(model_type: ModelType) -> str:
163
+ """Returns icon + colored text for model type"""
164
  data_uri = MODEL_TYPE_ICON_MAP.get(model_type)
165
+ type_name = model_type.value.name
166
+ type_color = model_type.value.color
167
+
168
  if data_uri:
169
+ # Icon + colored text in a flex container to keep them inline
170
+ alt_text = model_type.value.display_name or type_name or "model"
171
+ icon_html = f'<img src="{data_uri}" alt="{alt_text} icon" class="table-icon-img" style="vertical-align: middle;" />'
172
+ text_html = f'<span style="color: {type_color}; font-weight: 500; margin-left: 6px; vertical-align: middle;">{type_name}</span>'
173
+ return f'<div style="display: inline-flex; align-items: center; white-space: nowrap;">{icon_html}{text_html}</div>'
174
+
175
+ # Fallback: emoji + colored text
176
+ symbol = model_type.value.symbol
177
+ text_html = f'<span style="color: {type_color}; font-weight: 500; margin-left: 4px;">{type_name}</span>'
178
+ return f'<div style="display: inline-flex; align-items: center; white-space: nowrap;">{symbol}{text_html}</div>'
179
 
180
 
181
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
src/populate.py CHANGED
@@ -35,12 +35,15 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
35
  with open(file_path) as fp:
36
  data = json.load(fp)
37
 
38
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
 
 
 
 
39
  # Ensure all required columns are present
40
  if EvalQueueColumn.model_type.name not in data:
41
  data[EvalQueueColumn.model_type.name] = "unknown"
42
- if EvalQueueColumn.submitted_by.name not in data:
43
- data[EvalQueueColumn.submitted_by.name] = "unknown"
44
 
45
  all_evals.append(data)
46
  elif ".md" not in entry:
@@ -53,12 +56,15 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
53
  with open(file_path) as fp:
54
  data = json.load(fp)
55
 
56
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
 
 
 
 
57
  # Ensure all required columns are present
58
  if EvalQueueColumn.model_type.name not in data:
59
  data[EvalQueueColumn.model_type.name] = "unknown"
60
- if EvalQueueColumn.submitted_by.name not in data:
61
- data[EvalQueueColumn.submitted_by.name] = "unknown"
62
  all_evals.append(data)
63
 
64
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
 
35
  with open(file_path) as fp:
36
  data = json.load(fp)
37
 
38
+ # Handle organization (backward compatible with submitted_by)
39
+ org = data.get("organization", data.get("submitted_by", "unknown"))
40
+ link = data.get("link", "")
41
+
42
+ data[EvalQueueColumn.model.name] = make_clickable_model(data["model"], link)
43
  # Ensure all required columns are present
44
  if EvalQueueColumn.model_type.name not in data:
45
  data[EvalQueueColumn.model_type.name] = "unknown"
46
+ data[EvalQueueColumn.organization.name] = org
 
47
 
48
  all_evals.append(data)
49
  elif ".md" not in entry:
 
56
  with open(file_path) as fp:
57
  data = json.load(fp)
58
 
59
+ # Handle organization (backward compatible with submitted_by)
60
+ org = data.get("organization", data.get("submitted_by", "unknown"))
61
+ link = data.get("link", "")
62
+
63
+ data[EvalQueueColumn.model.name] = make_clickable_model(data["model"], link)
64
  # Ensure all required columns are present
65
  if EvalQueueColumn.model_type.name not in data:
66
  data[EvalQueueColumn.model_type.name] = "unknown"
67
+ data[EvalQueueColumn.organization.name] = org
 
68
  all_evals.append(data)
69
 
70
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
src/submission/submit.py CHANGED
@@ -66,9 +66,10 @@ def validate_jsonl_submission(file_path):
66
 
67
  def add_new_eval(
68
  model_name: str,
69
- submitted_by: str,
70
  model_type: str,
71
  predictions_file,
 
72
  ):
73
  global REQUESTED_MODELS
74
  global USERS_TO_SUBMISSION_DATES
@@ -81,8 +82,8 @@ def add_new_eval(
81
  if not model_name or model_name.strip() == "":
82
  return styled_error("Please provide a model name.")
83
 
84
- if not submitted_by or submitted_by.strip() == "":
85
- return styled_error("Please provide your name/organization.")
86
 
87
  if model_type is None or model_type == "":
88
  return styled_error("Please select a model type (API or Open-weight).")
@@ -107,8 +108,8 @@ def add_new_eval(
107
  print("Adding new eval")
108
 
109
  # Prepare directories
110
- OUT_DIR = f"{EVAL_REQUESTS_PATH}/{submitted_by}"
111
- PREDICTIONS_DIR = f"{EVAL_RESULTS_PATH}/{submitted_by}"
112
  os.makedirs(OUT_DIR, exist_ok=True)
113
  os.makedirs(PREDICTIONS_DIR, exist_ok=True)
114
 
@@ -139,9 +140,10 @@ def add_new_eval(
139
  "cost_usd": 0.0, # Placeholder
140
  "model_type": model_type.lower(),
141
  },
142
- "submitted_by": submitted_by,
143
  "submission_date": current_time,
144
  "num_predictions": num_predictions,
 
145
  }
146
 
147
  # Save results file
@@ -152,10 +154,11 @@ def add_new_eval(
152
  # Create request entry for queue
153
  eval_request = {
154
  "model": model_name,
155
- "submitted_by": submitted_by,
156
  "model_type": model_type,
157
  "status": "PENDING", # Will be set to FINISHED after evaluation
158
  "submitted_time": current_time,
 
159
  }
160
 
161
  # Save request file
 
66
 
67
  def add_new_eval(
68
  model_name: str,
69
+ organization: str,
70
  model_type: str,
71
  predictions_file,
72
+ link: str = "",
73
  ):
74
  global REQUESTED_MODELS
75
  global USERS_TO_SUBMISSION_DATES
 
82
  if not model_name or model_name.strip() == "":
83
  return styled_error("Please provide a model name.")
84
 
85
+ if not organization or organization.strip() == "":
86
+ return styled_error("Please provide your organization name.")
87
 
88
  if model_type is None or model_type == "":
89
  return styled_error("Please select a model type (API or Open-weight).")
 
108
  print("Adding new eval")
109
 
110
  # Prepare directories
111
+ OUT_DIR = f"{EVAL_REQUESTS_PATH}/{organization}"
112
+ PREDICTIONS_DIR = f"{EVAL_RESULTS_PATH}/{organization}"
113
  os.makedirs(OUT_DIR, exist_ok=True)
114
  os.makedirs(PREDICTIONS_DIR, exist_ok=True)
115
 
 
140
  "cost_usd": 0.0, # Placeholder
141
  "model_type": model_type.lower(),
142
  },
143
+ "organization": organization,
144
  "submission_date": current_time,
145
  "num_predictions": num_predictions,
146
+ "link": link.strip() if link else "",
147
  }
148
 
149
  # Save results file
 
154
  # Create request entry for queue
155
  eval_request = {
156
  "model": model_name,
157
+ "organization": organization,
158
  "model_type": model_type,
159
  "status": "PENDING", # Will be set to FINISHED after evaluation
160
  "submitted_time": current_time,
161
+ "link": link.strip() if link else "",
162
  }
163
 
164
  # Save request file