Fhrozen commited on
Commit
8eaaf2d
·
1 Parent(s): 5bba573

add Dataset Class for management of requests

Browse files
.gitignore CHANGED
@@ -7,6 +7,7 @@ __pycache__/
7
  *ipynb
8
  .vscode/
9
 
 
10
  eval-queue/
11
  eval-results/
12
  eval-queue-bk/
 
7
  *ipynb
8
  .vscode/
9
 
10
+ hf_cache/
11
  eval-queue/
12
  eval-results/
13
  eval-queue-bk/
app.py CHANGED
@@ -3,7 +3,6 @@ ESPnet Leaderboard - A Gradio-based leaderboard with multiple tabs and paginatio
3
  """
4
  from dotenv import load_dotenv
5
  import gradio as gr
6
- import os
7
 
8
  from espn_ldbd.display.css_html_js import CUSTOM_CSS
9
  from espn_ldbd.display.tabs import (
@@ -40,11 +39,17 @@ def create_app():
40
  # Create tabs for different tasks
41
  with gr.Tabs():
42
  for task in leaderboard_ds.get_tasks:
 
43
  with gr.Tab(task["task_title"]):
44
- create_leaderboard_tab(task["task_title"], leaderboard_data, rows_per_page=30)
 
 
 
 
 
45
 
46
  with gr.Tab("Request a model"):
47
- create_submit_tab()
48
 
49
  # Footer
50
  gr.HTML("""
@@ -62,6 +67,7 @@ if __name__ == "__main__":
62
  # Initialize leaderboard data manager
63
  leaderboard_data = LeaderboardData()
64
  leaderboard_ds = LeaderboardDataset()
 
65
 
66
  # Launch app
67
  app = create_app()
 
3
  """
4
  from dotenv import load_dotenv
5
  import gradio as gr
 
6
 
7
  from espn_ldbd.display.css_html_js import CUSTOM_CSS
8
  from espn_ldbd.display.tabs import (
 
39
  # Create tabs for different tasks
40
  with gr.Tabs():
41
  for task in leaderboard_ds.get_tasks:
42
+ print(f"[App] Making Tab for task: {task["task_id"]}")
43
  with gr.Tab(task["task_title"]):
44
+ create_leaderboard_tab(
45
+ task["task_title"],
46
+ leaderboard_ds.get_subtasks(task["task_id"]),
47
+ leaderboard_data,
48
+ rows_per_page=30
49
+ )
50
 
51
  with gr.Tab("Request a model"):
52
+ create_submit_tab(leaderboard_ds)
53
 
54
  # Footer
55
  gr.HTML("""
 
67
  # Initialize leaderboard data manager
68
  leaderboard_data = LeaderboardData()
69
  leaderboard_ds = LeaderboardDataset()
70
+ leaderboard_ds.start_register_submission(minutes=30)
71
 
72
  # Launch app
73
  app = create_app()
espn_ldbd/display/tabs.py CHANGED
@@ -2,12 +2,16 @@ import gradio as gr
2
  import math
3
  import re
4
 
5
- from espn_ldbd.leaderboard.data import LeaderboardData
 
 
 
6
  from espn_ldbd.submission.submit import submit_repo
7
 
8
 
9
  def create_leaderboard_tab(
10
  task_name: str,
 
11
  leaderboard_data: LeaderboardData,
12
  rows_per_page: int = 30
13
  ):
@@ -15,22 +19,34 @@ def create_leaderboard_tab(
15
 
16
  # State to track current page
17
  page_state = gr.State(value=1)
 
18
 
19
  with gr.Column():
20
  # Info section
21
  gr.Markdown(f"## {task_name} Leaderboard")
22
  gr.Markdown(f"Showing top performing models for {task_name.lower()} task")
23
-
24
- # Rows per page selector
25
  with gr.Row():
26
- rows_dropdown = gr.Dropdown(
27
- choices=[10, 20, 30, 50],
28
- value=rows_per_page,
29
- label="Rows per page",
30
- scale=1
31
- )
32
- refresh_btn = gr.Button("🔄 Refresh Data", scale=1)
33
-
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Dataframe display
35
  dataframe = gr.Dataframe(
36
  value=leaderboard_data.get_paginated_data(task_name, 1, rows_per_page)[0],
@@ -109,7 +125,8 @@ def create_leaderboard_tab(
109
  return dataframe, page_info, page_state
110
 
111
 
112
- def create_submit_tab():
 
113
  def _submit_model(model_text):
114
  if (model_text is None) or len(model_text) < 1:
115
  return model_text
@@ -131,11 +148,21 @@ def create_submit_tab():
131
  )
132
  return model_text
133
 
134
- gr.Info(
135
- f"Model id <b>{model_text}</b> submitted.<br/>"
136
- "Thank you for your submission."
 
 
 
 
 
 
 
 
 
137
  )
138
- return ""
 
139
 
140
  with gr.Column():
141
  gr.Markdown("## Request a model here!")
 
2
  import math
3
  import re
4
 
5
+ from espn_ldbd.leaderboard.data import (
6
+ LeaderboardData,
7
+ LeaderboardDataset,
8
+ )
9
  from espn_ldbd.submission.submit import submit_repo
10
 
11
 
12
  def create_leaderboard_tab(
13
  task_name: str,
14
+ sub_tasks: dict,
15
  leaderboard_data: LeaderboardData,
16
  rows_per_page: int = 30
17
  ):
 
19
 
20
  # State to track current page
21
  page_state = gr.State(value=1)
22
+ selected_subtask = gr.State(value=0)
23
 
24
  with gr.Column():
25
  # Info section
26
  gr.Markdown(f"## {task_name} Leaderboard")
27
  gr.Markdown(f"Showing top performing models for {task_name.lower()} task")
28
+
 
29
  with gr.Row():
30
+ with gr.Column():
31
+ subtask_dropdown = gr.Dropdown(
32
+ label="Sub Tasks",
33
+ choices=[(sub_tasks[x]["sub_task_title"], x) for x in range(len(sub_tasks))],
34
+ value=0
35
+ )
36
+ with gr.Column():
37
+ pass
38
+
39
+ with gr.Row():
40
+ with gr.Column():
41
+ rows_dropdown = gr.Dropdown(
42
+ choices=[10, 20, 30, 50],
43
+ value=rows_per_page,
44
+ label="Rows per page",
45
+ scale=1
46
+ )
47
+ with gr.Column():
48
+ refresh_btn = gr.Button("🔄 Refresh Data", scale=1)
49
+
50
  # Dataframe display
51
  dataframe = gr.Dataframe(
52
  value=leaderboard_data.get_paginated_data(task_name, 1, rows_per_page)[0],
 
125
  return dataframe, page_info, page_state
126
 
127
 
128
+ def create_submit_tab(leaderboard_ds: LeaderboardDataset):
129
+
130
  def _submit_model(model_text):
131
  if (model_text is None) or len(model_text) < 1:
132
  return model_text
 
148
  )
149
  return model_text
150
 
151
+ ok_submit, message = leaderboard_ds.submit_repoid(model_text)
152
+
153
+ if ok_submit:
154
+ gr.Info(
155
+ f"Model id <b>{model_text}</b> submitted.<br/>"
156
+ "Thank you for your submission."
157
+ )
158
+ return ""
159
+
160
+ gr.Warning(
161
+ f"Model id <b>{model_text}</b> cannot be submitted.<br/>"
162
+ f"{message}."
163
  )
164
+ return model_text
165
+
166
 
167
  with gr.Column():
168
  gr.Markdown("## Request a model here!")
espn_ldbd/leaderboard/data.py CHANGED
@@ -1,12 +1,14 @@
1
  import os
2
- import time
3
  import threading
4
  import math
5
  from typing import Dict, Tuple
6
 
7
  import pandas as pd
8
 
9
- from datasets import Dataset, load_dataset
 
 
 
10
 
11
 
12
  class LeaderboardData:
@@ -117,26 +119,89 @@ class LeaderboardData:
117
 
118
 
119
  class LeaderboardDataset:
120
- def __init__(self):
121
  repo_id = os.environ.get("ESPNET_DB", None)
122
  assert repo_id is not None
123
-
124
  self._tasks = load_dataset(repo_id, "task_db", split="train")
 
 
125
  self._new_submits = []
126
 
127
  self._stop_event = threading.Event()
128
  self._background_submission = None
 
 
 
 
129
  return
130
 
131
  @property
132
  def get_tasks(self):
133
  return self._tasks
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def _periodic_submissions_update(self):
136
  if len(self._new_submits) == 0:
 
137
  return
 
 
 
138
  new_ds = Dataset.from_list(self._new_submits)
139
-
 
 
140
  self._new_submits = []
141
  return
142
 
 
1
  import os
 
2
  import threading
3
  import math
4
  from typing import Dict, Tuple
5
 
6
  import pandas as pd
7
 
8
+ from datasets import Dataset, load_dataset, concatenate_datasets
9
+
10
+ from huggingface_hub import HfApi
11
+ from huggingface_hub.utils import HfHubHTTPError
12
 
13
 
14
  class LeaderboardData:
 
119
 
120
 
121
  class LeaderboardDataset:
122
+ def __init__(self, min_time_submit: float = 10.0):
123
  repo_id = os.environ.get("ESPNET_DB", None)
124
  assert repo_id is not None
125
+ self._repo_id = repo_id
126
  self._tasks = load_dataset(repo_id, "task_db", split="train")
127
+ self._subtasks_db = load_dataset(repo_id, "sub_task_db", split="train")
128
+ self._subtasks_cache = {}
129
  self._new_submits = []
130
 
131
  self._stop_event = threading.Event()
132
  self._background_submission = None
133
+ self._hf_api = HfApi()
134
+ self._submit_lock = threading.Lock()
135
+ self._submit_timer = None
136
+ self._submit_period = min_time_submit
137
  return
138
 
139
  @property
140
  def get_tasks(self):
141
  return self._tasks
142
 
143
+ def get_subtasks(self, task_id: str):
144
+ if task_id not in self._subtasks_cache:
145
+ subtasks = self._subtasks_db.filter(
146
+ lambda sample: sample["task_id"] == task_id
147
+ )
148
+ self._subtasks_cache[task_id] = subtasks
149
+ return self._subtasks_cache[task_id]
150
+
151
+ def submit_repoid(self, repo_id: str) -> bool:
152
+ # Try to acquire lock without blocking
153
+ if not self._submit_lock.acquire(blocking=False):
154
+ return False, "Submission already in progress."
155
+
156
+ message = f"Error in submitting {repo_id}."
157
+ try:
158
+ # Cancel any existing timer
159
+ if self._submit_timer is not None:
160
+ self._submit_timer.cancel()
161
+
162
+ # Set a timer to release the lock after execution
163
+ def _release_lock():
164
+ self._submit_lock.release()
165
+ # print(f"[Dataset] Submit lock released after timeout")
166
+
167
+ # Validate existance of repository
168
+ try:
169
+ self._hf_api.model_info(repo_id)
170
+ except HfHubHTTPError as e:
171
+ return False, "The submitted repository does not exist."
172
+
173
+ # TODO(Fhrozen): add validation of repo_id for ESPnet format
174
+ self._new_submits.append({
175
+ "model_id": repo_id,
176
+ "reviewed": False,
177
+ "date_review": "",
178
+ "commit_version": "",
179
+ "valid_repo": True,
180
+ "need_review": True,
181
+ })
182
+ # Set timer to auto-release lock after 5 seconds
183
+ self._submit_timer = threading.Timer(self._submit_period, _release_lock)
184
+ self._submit_timer.start()
185
+
186
+ return True, ""
187
+ except Exception as e:
188
+ # Release lock on any error
189
+ self._submit_lock.release()
190
+ # print(f"[Dataset] Error in submit_repoid: {e}")
191
+ message += f" {e}"
192
+ return False, message
193
+
194
  def _periodic_submissions_update(self):
195
  if len(self._new_submits) == 0:
196
+ print("[Dataset] No additional submits were found.")
197
  return
198
+
199
+ request_ds = load_dataset(self._repo_id, "request_db", split="train")
200
+ num_rows = len(self._new_submits)
201
  new_ds = Dataset.from_list(self._new_submits)
202
+ request_ds = concatenate_datasets([request_ds, new_ds])
203
+ print(f"[Dataset] Adding {num_rows} new rows to request db.")
204
+ request_ds.push_to_hub(self._repo_id, config_name="request_db")
205
  self._new_submits = []
206
  return
207
 
requirements.txt CHANGED
@@ -6,13 +6,13 @@ gradio
6
  gradio[oauth]
7
  gradio_client
8
  plotly
9
- huggingface-hub>=0.18.0
10
  matplotlib
11
  numpy
12
  pandas
13
  python-dateutil
14
  tqdm
15
- transformers
16
  tokenizers>=0.15.0
17
  sentencepiece
18
  python-dotenv
 
6
  gradio[oauth]
7
  gradio_client
8
  plotly
9
+ huggingface-hub>=1.1.0
10
  matplotlib
11
  numpy
12
  pandas
13
  python-dateutil
14
  tqdm
15
+ transformers @ git+https://github.com/huggingface/transformers.git@2a61590
16
  tokenizers>=0.15.0
17
  sentencepiece
18
  python-dotenv