SuveenE commited on
Commit
7747e34
·
1 Parent(s): cdb0177
Files changed (1) hide show
  1. app.py +41 -102
app.py CHANGED
@@ -57,57 +57,53 @@ def search_datasets_fn(query: str) -> List[str]:
57
  return []
58
 
59
 
60
- def download_and_list_episodes(repo_id: str, progress=gr.Progress()):
61
  """Download dataset and list available episodes"""
62
  if not repo_id:
63
- return "Please provide a dataset repo ID.", []
64
 
65
  token = os.environ.get("HF_TOKEN")
66
  temp_dir = tempfile.mkdtemp(prefix="episode_delete_")
67
 
68
  try:
69
- progress(0.3, desc="Downloading dataset...")
70
  download_dataset(repo_id, temp_dir, hf_token=token)
71
-
72
- progress(0.7, desc="Listing episodes...")
73
  episodes = list_episodes(temp_dir)
74
 
75
- if not episodes:
76
- return f"No episodes found in {repo_id}", []
77
 
78
- # Format episodes as choices
79
- episode_choices = [f"Episode {ep}" for ep in episodes]
80
 
81
- return (
82
- f"Downloaded {repo_id}. Found {len(episodes)} episodes.",
83
- gr.update(choices=episode_choices, value=[]),
84
- )
85
  except Exception as e:
86
- return f"Error: {str(e)}", gr.update(choices=[], value=[])
 
87
 
88
 
89
- def delete_episodes_stream(repo_id: str, selected_episodes: List[str], dest_repo_id: str):
90
  """Delete selected episodes and upload to destination repo"""
91
  if not repo_id:
92
  yield "Please provide a source dataset repo ID."
93
  return
94
 
95
- if not selected_episodes:
96
- yield "Please select at least one episode to delete."
97
  return
98
 
99
- if not dest_repo_id:
100
- yield "Please provide a destination repo ID."
101
- return
102
 
103
- # Parse episode numbers from selection (format: "Episode 0", "Episode 1", etc.)
104
  episode_indexes = []
105
- for ep_str in selected_episodes:
106
  try:
107
- ep_num = int(ep_str.replace("Episode ", ""))
108
  episode_indexes.append(ep_num)
109
  except ValueError:
110
- yield f"Invalid episode format: {ep_str}"
111
  return
112
 
113
  token = os.environ.get("HF_TOKEN")
@@ -182,105 +178,48 @@ def delete_episodes_stream(repo_id: str, selected_episodes: List[str], dest_repo
182
 
183
 
184
  # Build the Gradio interface
185
- with gr.Blocks(title="LeRobot Episode Deleter", theme=gr.themes.Soft()) as demo:
186
- gr.Markdown("# 🗑️ LeRobot Dataset Episode Deleter")
187
- gr.Markdown(
188
- "Delete specific episodes from a HuggingFace LeRobot dataset and upload the cleaned version."
189
- )
190
 
191
  # Load initial datasets
192
  _initial_choices = search_datasets_fn("griffinlabs-cortex")
193
 
194
- with gr.Row():
195
- search_box = gr.Textbox(
196
- label="Search Datasets (optional)",
197
- placeholder="Enter keyword or organization name (e.g., 'lerobot', 'griffinlabs-cortex')",
198
- value="griffinlabs-cortex"
199
- )
200
- search_btn = gr.Button("🔍 Search")
201
-
202
- with gr.Row():
203
- dataset_dropdown = gr.Dropdown(
204
- label="Select Dataset",
205
- choices=_initial_choices,
206
- interactive=True,
207
- allow_custom_value=True,
208
- )
209
-
210
- with gr.Row():
211
- download_btn = gr.Button("📥 Download & List Episodes", variant="secondary", size="lg")
212
 
213
- download_status = gr.Textbox(
214
- label="Status",
215
- lines=2,
216
  interactive=False,
 
217
  )
218
 
219
- gr.Markdown("---")
220
- gr.Markdown("### Select Episodes to Delete")
221
-
222
- episodes_selector = gr.CheckboxGroup(
223
- label="Episodes (select to delete)",
224
- choices=[],
225
- interactive=True,
226
  )
227
 
228
- selected_count = gr.Markdown("*No episodes selected*")
229
-
230
- gr.Markdown("---")
231
- gr.Markdown("### Configure Destination & Execute")
232
-
233
  dest_repo_input = gr.Textbox(
234
- label="Destination Repository ID",
235
- placeholder="your-org/cleaned-dataset",
236
- info="The HuggingFace repo where the cleaned dataset will be uploaded"
237
  )
238
 
239
- with gr.Row():
240
- execute_btn = gr.Button(
241
- "🚀 Delete Episodes & Upload",
242
- variant="primary",
243
- size="lg",
244
- )
245
 
246
- progress_log = gr.Textbox(
247
- label="Progress Log",
248
- lines=20,
249
- interactive=False,
250
- max_lines=25,
251
- )
252
 
253
  # Event handlers
254
- def update_search_results(query):
255
- results = search_datasets_fn(query)
256
- return gr.update(choices=results, value=None)
257
-
258
- search_btn.click(
259
- update_search_results,
260
- inputs=search_box,
261
- outputs=dataset_dropdown,
262
- )
263
-
264
- download_btn.click(
265
- download_and_list_episodes,
266
  inputs=dataset_dropdown,
267
- outputs=[download_status, episodes_selector],
268
- )
269
-
270
- def update_selected_count(selected):
271
- if not selected:
272
- return "*No episodes selected*"
273
- return f"**{len(selected)} episode(s) selected for deletion**"
274
-
275
- episodes_selector.change(
276
- update_selected_count,
277
- inputs=episodes_selector,
278
- outputs=selected_count,
279
  )
280
 
281
  execute_btn.click(
282
  delete_episodes_stream,
283
- inputs=[dataset_dropdown, episodes_selector, dest_repo_input],
284
  outputs=progress_log,
285
  )
286
 
 
57
  return []
58
 
59
 
60
+ def load_episodes_for_dataset(repo_id: str):
61
  """Download dataset and list available episodes"""
62
  if not repo_id:
63
+ return ""
64
 
65
  token = os.environ.get("HF_TOKEN")
66
  temp_dir = tempfile.mkdtemp(prefix="episode_delete_")
67
 
68
  try:
 
69
  download_dataset(repo_id, temp_dir, hf_token=token)
 
 
70
  episodes = list_episodes(temp_dir)
71
 
72
+ # Cleanup temp directory
73
+ shutil.rmtree(temp_dir, ignore_errors=True)
74
 
75
+ if not episodes:
76
+ return "No episodes found"
77
 
78
+ # Return info about available episodes
79
+ return f"Found {len(episodes)} episodes: {', '.join(map(str, episodes))}"
 
 
80
  except Exception as e:
81
+ print(f"Error loading episodes: {e}")
82
+ return f"Error: {e}"
83
 
84
 
85
+ def delete_episodes_stream(repo_id: str, episode_indexes_str: str, dest_repo_id: str):
86
  """Delete selected episodes and upload to destination repo"""
87
  if not repo_id:
88
  yield "Please provide a source dataset repo ID."
89
  return
90
 
91
+ if not episode_indexes_str or not episode_indexes_str.strip():
92
+ yield "Please provide at least one episode index to delete."
93
  return
94
 
95
+ # If no destination provided, use the same repo name
96
+ if not dest_repo_id or not dest_repo_id.strip():
97
+ dest_repo_id = repo_id
98
 
99
+ # Parse comma-separated episode indexes
100
  episode_indexes = []
101
+ for ep_str in episode_indexes_str.split(","):
102
  try:
103
+ ep_num = int(ep_str.strip())
104
  episode_indexes.append(ep_num)
105
  except ValueError:
106
+ yield f"Invalid episode index: {ep_str.strip()}"
107
  return
108
 
109
  token = os.environ.get("HF_TOKEN")
 
178
 
179
 
180
  # Build the Gradio interface
181
+ with gr.Blocks(title="LeRobot Episode Deleter") as demo:
182
+ gr.Markdown("**Delete specific episodes from a Hugging Face dataset (LeRobot format).**")
 
 
 
183
 
184
  # Load initial datasets
185
  _initial_choices = search_datasets_fn("griffinlabs-cortex")
186
 
187
+ dataset_dropdown = gr.Dropdown(
188
+ label="Select dataset",
189
+ choices=_initial_choices,
190
+ interactive=True,
191
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ episodes_info = gr.Textbox(
194
+ label="Available episodes",
 
195
  interactive=False,
196
+ lines=2
197
  )
198
 
199
+ episodes_input = gr.Textbox(
200
+ label="Episode indexes to delete (comma-separated)",
201
+ placeholder="0, 1, 2"
 
 
 
 
202
  )
203
 
 
 
 
 
 
204
  dest_repo_input = gr.Textbox(
205
+ label="Destination repo id (leave empty to use same repo)",
206
+ placeholder="org/cleaned_dataset"
 
207
  )
208
 
209
+ execute_btn = gr.Button("Delete Episodes and Upload")
 
 
 
 
 
210
 
211
+ progress_log = gr.Textbox(label="Progress log", lines=20)
 
 
 
 
 
212
 
213
  # Event handlers
214
+ dataset_dropdown.change(
215
+ load_episodes_for_dataset,
 
 
 
 
 
 
 
 
 
 
216
  inputs=dataset_dropdown,
217
+ outputs=episodes_info,
 
 
 
 
 
 
 
 
 
 
 
218
  )
219
 
220
  execute_btn.click(
221
  delete_episodes_stream,
222
+ inputs=[dataset_dropdown, episodes_input, dest_repo_input],
223
  outputs=progress_log,
224
  )
225