Rachel Ding commited on
Commit
afef5d4
·
1 Parent(s): 5609e0c

Reduce audio row height (compact-audio CSS); sync dataset_loader

Browse files
Files changed (2) hide show
  1. app.py +19 -15
  2. dataset_loader.py +41 -9
app.py CHANGED
@@ -68,6 +68,10 @@ with gr.Blocks(
68
  .gradio-container { max-width: 1400px; }
69
  /* Results view: force all 4 spec images (Gaussian, Youtube, Ours, NN baseline) to same size */
70
  #results-column img { width: 700px !important; height: 280px !important; object-fit: contain; }
 
 
 
 
71
  """,
72
  ) as app:
73
  gr.Markdown("# NearestNeighbor Audio Demo")
@@ -103,9 +107,9 @@ with gr.Blocks(
103
  nn_outputs.append(nn_p_md)
104
  nn_img = gr.Image(label=f"NN{i+1}", show_label=True, height=480)
105
  nn_outputs.append(nn_img)
106
- nn_bg = gr.Audio(label="BG", show_label=True)
107
- nn_fg = gr.Audio(label="FG", show_label=True)
108
- nn_m = gr.Audio(label="Mix", show_label=True)
109
  nn_outputs.extend([nn_bg, nn_fg, nn_m])
110
 
111
  # ---- Results View: 3 prompts, each with 2 rows (Gaussian|Youtube, Ours|NN baseline) ----
@@ -122,26 +126,26 @@ with gr.Blocks(
122
  with gr.Row():
123
  with gr.Column():
124
  res_outputs.append(gr.Image(label="Gaussian", **spec_size))
125
- res_outputs.append(gr.Audio(label="BG"))
126
- res_outputs.append(gr.Audio(label="FG"))
127
- res_outputs.append(gr.Audio(label="Mix"))
128
  with gr.Column():
129
  res_outputs.append(gr.Image(label="Youtube", **spec_size))
130
- res_outputs.append(gr.Audio(label="BG"))
131
- res_outputs.append(gr.Audio(label="FG"))
132
- res_outputs.append(gr.Audio(label="Mix"))
133
  # Row 2: Ours | NN baseline (spec + BG/FG/Mix under each)
134
  with gr.Row():
135
  with gr.Column():
136
  res_outputs.append(gr.Image(label="Ours", **spec_size))
137
- res_outputs.append(gr.Audio(label="BG"))
138
- res_outputs.append(gr.Audio(label="FG"))
139
- res_outputs.append(gr.Audio(label="Mix"))
140
  with gr.Column():
141
  res_outputs.append(gr.Image(label="NN baseline", **spec_size))
142
- res_outputs.append(gr.Audio(label="BG"))
143
- res_outputs.append(gr.Audio(label="FG"))
144
- res_outputs.append(gr.Audio(label="Mix"))
145
 
146
  def on_change(sid, view):
147
  use_dasheng = view in ("Nearest Neighbor (Dasheng)", "Results (Dasheng)")
 
68
  .gradio-container { max-width: 1400px; }
69
  /* Results view: force all 4 spec images (Gaussian, Youtube, Ours, NN baseline) to same size */
70
  #results-column img { width: 700px !important; height: 280px !important; object-fit: contain; }
71
+ /* Reduce audio player row height (BG/FG/Mix) */
72
+ .compact-audio .gr-form { min-height: 0 !important; }
73
+ .compact-audio > div { min-height: 0 !important; max-height: 72px !important; }
74
+ .compact-audio audio { max-height: 48px !important; }
75
  """,
76
  ) as app:
77
  gr.Markdown("# NearestNeighbor Audio Demo")
 
107
  nn_outputs.append(nn_p_md)
108
  nn_img = gr.Image(label=f"NN{i+1}", show_label=True, height=480)
109
  nn_outputs.append(nn_img)
110
+ nn_bg = gr.Audio(label="BG", show_label=True, elem_classes=["compact-audio"])
111
+ nn_fg = gr.Audio(label="FG", show_label=True, elem_classes=["compact-audio"])
112
+ nn_m = gr.Audio(label="Mix", show_label=True, elem_classes=["compact-audio"])
113
  nn_outputs.extend([nn_bg, nn_fg, nn_m])
114
 
115
  # ---- Results View: 3 prompts, each with 2 rows (Gaussian|Youtube, Ours|NN baseline) ----
 
126
  with gr.Row():
127
  with gr.Column():
128
  res_outputs.append(gr.Image(label="Gaussian", **spec_size))
129
+ res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
130
+ res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
131
+ res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
132
  with gr.Column():
133
  res_outputs.append(gr.Image(label="Youtube", **spec_size))
134
+ res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
135
+ res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
136
+ res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
137
  # Row 2: Ours | NN baseline (spec + BG/FG/Mix under each)
138
  with gr.Row():
139
  with gr.Column():
140
  res_outputs.append(gr.Image(label="Ours", **spec_size))
141
+ res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
142
+ res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
143
+ res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
144
  with gr.Column():
145
  res_outputs.append(gr.Image(label="NN baseline", **spec_size))
146
+ res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
147
+ res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
148
+ res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
149
 
150
  def on_change(sid, view):
151
  use_dasheng = view in ("Nearest Neighbor (Dasheng)", "Results (Dasheng)")
dataset_loader.py CHANGED
@@ -248,6 +248,24 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
248
  folders.sort(key=lambda x: x[0])
249
  return folders
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  baseline_folders = get_baseline_folders(baseline_inner, baseline_files)
252
  youtube_folders = get_youtube_folders()
253
 
@@ -267,12 +285,11 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
267
 
268
  bl_youtube = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
269
  if use_dasheng:
270
- # Dasheng: match by prompt -> folder name (spaces to underscores)
271
  folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
272
- for fn, fp in youtube_folders:
273
- if fn == folder_name:
274
- bl_youtube = _collect_block(youtube_files, fp)
275
- break
276
  else:
277
  for fn, fp in youtube_folders:
278
  if fn.startswith(rel_prefix):
@@ -280,11 +297,26 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
280
  break
281
 
282
  if use_dasheng:
283
- # Dasheng: "ours" = prompt-named folder under inner
284
  folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
285
- ours_prefix = f"{inner}/{folder_name}"
286
- nn_files = [f for f in files if f.startswith(ours_prefix + "/")]
287
- ours_block = _collect_block(nn_files, ours_prefix)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  else:
289
  nn_files = [f for f in files if f.replace(inner + "/", "").startswith(rel_prefix)]
290
  ours_block = _collect_block(nn_files, inner + "/" + rel_prefix)
 
248
  folders.sort(key=lambda x: x[0])
249
  return folders
250
 
251
+ def _match_dasheng_folder(folder_name: str, folders: list[tuple[str, str]]) -> Optional[tuple[str, str]]:
252
+ """Match prompt-derived folder_name to actual folder; allow truncated names (e.g. ..._the_autumn -> ..._the)."""
253
+ if not folder_name or not folders:
254
+ return None
255
+ # Exact match
256
+ for fn, fp in folders:
257
+ if fn == folder_name:
258
+ return (fn, fp)
259
+ # Folder may be truncated: actual fn is prefix of folder_name (e.g. fn="..._the", folder_name="..._the_autumn")
260
+ candidates = [(fn, fp) for fn, fp in folders if folder_name.startswith(fn)]
261
+ if candidates:
262
+ return max(candidates, key=lambda x: len(x[0]))
263
+ # Or folder_name is prefix of fn
264
+ candidates = [(fn, fp) for fn, fp in folders if fn.startswith(folder_name)]
265
+ if candidates:
266
+ return min(candidates, key=lambda x: len(x[0]))
267
+ return None
268
+
269
  baseline_folders = get_baseline_folders(baseline_inner, baseline_files)
270
  youtube_folders = get_youtube_folders()
271
 
 
285
 
286
  bl_youtube = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
287
  if use_dasheng:
 
288
  folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
289
+ matched = _match_dasheng_folder(folder_name, youtube_folders)
290
+ if matched:
291
+ fn, fp = matched
292
+ bl_youtube = _collect_block(youtube_files, fp)
293
  else:
294
  for fn, fp in youtube_folders:
295
  if fn.startswith(rel_prefix):
 
297
  break
298
 
299
  if use_dasheng:
 
300
  folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
301
+ # Ours: list prompt-named dirs under inner (exclude baseline, gaussian_baseline, youtube_noise_baseline)
302
+ skip = {"baseline", "youtube_noise_baseline", "gaussian_baseline"}
303
+ inner_dirs = set()
304
+ for f in files:
305
+ if not f.startswith(inner + "/"):
306
+ continue
307
+ rest = f.replace(inner + "/", "", 1)
308
+ if "/" in rest:
309
+ top = rest.split("/")[0]
310
+ if top not in skip and not top.startswith("generated_baseline"):
311
+ inner_dirs.add(top)
312
+ inner_folders = [(d, inner + "/" + d) for d in sorted(inner_dirs)]
313
+ ours_fn_fp = _match_dasheng_folder(folder_name, inner_folders)
314
+ if ours_fn_fp:
315
+ fn, fp = ours_fn_fp
316
+ nn_files = [f for f in files if f.startswith(fp + "/")]
317
+ ours_block = _collect_block(nn_files, fp)
318
+ else:
319
+ ours_block = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
320
  else:
321
  nn_files = [f for f in files if f.replace(inner + "/", "").startswith(rel_prefix)]
322
  ours_block = _collect_block(nn_files, inner + "/" + rel_prefix)