Spaces:
Running
Running
Rachel Ding commited on
Commit ·
afef5d4
1
Parent(s): 5609e0c
Reduce audio row height (compact-audio CSS); sync dataset_loader
Browse files- app.py +19 -15
- dataset_loader.py +41 -9
app.py
CHANGED
|
@@ -68,6 +68,10 @@ with gr.Blocks(
|
|
| 68 |
.gradio-container { max-width: 1400px; }
|
| 69 |
/* Results view: force all 4 spec images (Gaussian, Youtube, Ours, NN baseline) to same size */
|
| 70 |
#results-column img { width: 700px !important; height: 280px !important; object-fit: contain; }
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
""",
|
| 72 |
) as app:
|
| 73 |
gr.Markdown("# NearestNeighbor Audio Demo")
|
|
@@ -103,9 +107,9 @@ with gr.Blocks(
|
|
| 103 |
nn_outputs.append(nn_p_md)
|
| 104 |
nn_img = gr.Image(label=f"NN{i+1}", show_label=True, height=480)
|
| 105 |
nn_outputs.append(nn_img)
|
| 106 |
-
nn_bg = gr.Audio(label="BG", show_label=True)
|
| 107 |
-
nn_fg = gr.Audio(label="FG", show_label=True)
|
| 108 |
-
nn_m = gr.Audio(label="Mix", show_label=True)
|
| 109 |
nn_outputs.extend([nn_bg, nn_fg, nn_m])
|
| 110 |
|
| 111 |
# ---- Results View: 3 prompts, each with 2 rows (Gaussian|Youtube, Ours|NN baseline) ----
|
|
@@ -122,26 +126,26 @@ with gr.Blocks(
|
|
| 122 |
with gr.Row():
|
| 123 |
with gr.Column():
|
| 124 |
res_outputs.append(gr.Image(label="Gaussian", **spec_size))
|
| 125 |
-
res_outputs.append(gr.Audio(label="BG"))
|
| 126 |
-
res_outputs.append(gr.Audio(label="FG"))
|
| 127 |
-
res_outputs.append(gr.Audio(label="Mix"))
|
| 128 |
with gr.Column():
|
| 129 |
res_outputs.append(gr.Image(label="Youtube", **spec_size))
|
| 130 |
-
res_outputs.append(gr.Audio(label="BG"))
|
| 131 |
-
res_outputs.append(gr.Audio(label="FG"))
|
| 132 |
-
res_outputs.append(gr.Audio(label="Mix"))
|
| 133 |
# Row 2: Ours | NN baseline (spec + BG/FG/Mix under each)
|
| 134 |
with gr.Row():
|
| 135 |
with gr.Column():
|
| 136 |
res_outputs.append(gr.Image(label="Ours", **spec_size))
|
| 137 |
-
res_outputs.append(gr.Audio(label="BG"))
|
| 138 |
-
res_outputs.append(gr.Audio(label="FG"))
|
| 139 |
-
res_outputs.append(gr.Audio(label="Mix"))
|
| 140 |
with gr.Column():
|
| 141 |
res_outputs.append(gr.Image(label="NN baseline", **spec_size))
|
| 142 |
-
res_outputs.append(gr.Audio(label="BG"))
|
| 143 |
-
res_outputs.append(gr.Audio(label="FG"))
|
| 144 |
-
res_outputs.append(gr.Audio(label="Mix"))
|
| 145 |
|
| 146 |
def on_change(sid, view):
|
| 147 |
use_dasheng = view in ("Nearest Neighbor (Dasheng)", "Results (Dasheng)")
|
|
|
|
| 68 |
.gradio-container { max-width: 1400px; }
|
| 69 |
/* Results view: force all 4 spec images (Gaussian, Youtube, Ours, NN baseline) to same size */
|
| 70 |
#results-column img { width: 700px !important; height: 280px !important; object-fit: contain; }
|
| 71 |
+
/* Reduce audio player row height (BG/FG/Mix) */
|
| 72 |
+
.compact-audio .gr-form { min-height: 0 !important; }
|
| 73 |
+
.compact-audio > div { min-height: 0 !important; max-height: 72px !important; }
|
| 74 |
+
.compact-audio audio { max-height: 48px !important; }
|
| 75 |
""",
|
| 76 |
) as app:
|
| 77 |
gr.Markdown("# NearestNeighbor Audio Demo")
|
|
|
|
| 107 |
nn_outputs.append(nn_p_md)
|
| 108 |
nn_img = gr.Image(label=f"NN{i+1}", show_label=True, height=480)
|
| 109 |
nn_outputs.append(nn_img)
|
| 110 |
+
nn_bg = gr.Audio(label="BG", show_label=True, elem_classes=["compact-audio"])
|
| 111 |
+
nn_fg = gr.Audio(label="FG", show_label=True, elem_classes=["compact-audio"])
|
| 112 |
+
nn_m = gr.Audio(label="Mix", show_label=True, elem_classes=["compact-audio"])
|
| 113 |
nn_outputs.extend([nn_bg, nn_fg, nn_m])
|
| 114 |
|
| 115 |
# ---- Results View: 3 prompts, each with 2 rows (Gaussian|Youtube, Ours|NN baseline) ----
|
|
|
|
| 126 |
with gr.Row():
|
| 127 |
with gr.Column():
|
| 128 |
res_outputs.append(gr.Image(label="Gaussian", **spec_size))
|
| 129 |
+
res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
|
| 130 |
+
res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
|
| 131 |
+
res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
|
| 132 |
with gr.Column():
|
| 133 |
res_outputs.append(gr.Image(label="Youtube", **spec_size))
|
| 134 |
+
res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
|
| 135 |
+
res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
|
| 136 |
+
res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
|
| 137 |
# Row 2: Ours | NN baseline (spec + BG/FG/Mix under each)
|
| 138 |
with gr.Row():
|
| 139 |
with gr.Column():
|
| 140 |
res_outputs.append(gr.Image(label="Ours", **spec_size))
|
| 141 |
+
res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
|
| 142 |
+
res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
|
| 143 |
+
res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
|
| 144 |
with gr.Column():
|
| 145 |
res_outputs.append(gr.Image(label="NN baseline", **spec_size))
|
| 146 |
+
res_outputs.append(gr.Audio(label="BG", elem_classes=["compact-audio"]))
|
| 147 |
+
res_outputs.append(gr.Audio(label="FG", elem_classes=["compact-audio"]))
|
| 148 |
+
res_outputs.append(gr.Audio(label="Mix", elem_classes=["compact-audio"]))
|
| 149 |
|
| 150 |
def on_change(sid, view):
|
| 151 |
use_dasheng = view in ("Nearest Neighbor (Dasheng)", "Results (Dasheng)")
|
dataset_loader.py
CHANGED
|
@@ -248,6 +248,24 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
|
|
| 248 |
folders.sort(key=lambda x: x[0])
|
| 249 |
return folders
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
baseline_folders = get_baseline_folders(baseline_inner, baseline_files)
|
| 252 |
youtube_folders = get_youtube_folders()
|
| 253 |
|
|
@@ -267,12 +285,11 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
|
|
| 267 |
|
| 268 |
bl_youtube = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
|
| 269 |
if use_dasheng:
|
| 270 |
-
# Dasheng: match by prompt -> folder name (spaces to underscores)
|
| 271 |
folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
else:
|
| 277 |
for fn, fp in youtube_folders:
|
| 278 |
if fn.startswith(rel_prefix):
|
|
@@ -280,11 +297,26 @@ def get_results_demo_paths(bid: str, root_prefix: Optional[str] = None) -> dict:
|
|
| 280 |
break
|
| 281 |
|
| 282 |
if use_dasheng:
|
| 283 |
-
# Dasheng: "ours" = prompt-named folder under inner
|
| 284 |
folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
else:
|
| 289 |
nn_files = [f for f in files if f.replace(inner + "/", "").startswith(rel_prefix)]
|
| 290 |
ours_block = _collect_block(nn_files, inner + "/" + rel_prefix)
|
|
|
|
| 248 |
folders.sort(key=lambda x: x[0])
|
| 249 |
return folders
|
| 250 |
|
| 251 |
+
def _match_dasheng_folder(folder_name: str, folders: list[tuple[str, str]]) -> Optional[tuple[str, str]]:
|
| 252 |
+
"""Match prompt-derived folder_name to actual folder; allow truncated names (e.g. ..._the_autumn -> ..._the)."""
|
| 253 |
+
if not folder_name or not folders:
|
| 254 |
+
return None
|
| 255 |
+
# Exact match
|
| 256 |
+
for fn, fp in folders:
|
| 257 |
+
if fn == folder_name:
|
| 258 |
+
return (fn, fp)
|
| 259 |
+
# Folder may be truncated: actual fn is prefix of folder_name (e.g. fn="..._the", folder_name="..._the_autumn")
|
| 260 |
+
candidates = [(fn, fp) for fn, fp in folders if folder_name.startswith(fn)]
|
| 261 |
+
if candidates:
|
| 262 |
+
return max(candidates, key=lambda x: len(x[0]))
|
| 263 |
+
# Or folder_name is prefix of fn
|
| 264 |
+
candidates = [(fn, fp) for fn, fp in folders if fn.startswith(folder_name)]
|
| 265 |
+
if candidates:
|
| 266 |
+
return min(candidates, key=lambda x: len(x[0]))
|
| 267 |
+
return None
|
| 268 |
+
|
| 269 |
baseline_folders = get_baseline_folders(baseline_inner, baseline_files)
|
| 270 |
youtube_folders = get_youtube_folders()
|
| 271 |
|
|
|
|
| 285 |
|
| 286 |
bl_youtube = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
|
| 287 |
if use_dasheng:
|
|
|
|
| 288 |
folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
|
| 289 |
+
matched = _match_dasheng_folder(folder_name, youtube_folders)
|
| 290 |
+
if matched:
|
| 291 |
+
fn, fp = matched
|
| 292 |
+
bl_youtube = _collect_block(youtube_files, fp)
|
| 293 |
else:
|
| 294 |
for fn, fp in youtube_folders:
|
| 295 |
if fn.startswith(rel_prefix):
|
|
|
|
| 297 |
break
|
| 298 |
|
| 299 |
if use_dasheng:
|
|
|
|
| 300 |
folder_name = prompt_text.replace(" ", "_") if prompt_text else ""
|
| 301 |
+
# Ours: list prompt-named dirs under inner (exclude baseline, gaussian_baseline, youtube_noise_baseline)
|
| 302 |
+
skip = {"baseline", "youtube_noise_baseline", "gaussian_baseline"}
|
| 303 |
+
inner_dirs = set()
|
| 304 |
+
for f in files:
|
| 305 |
+
if not f.startswith(inner + "/"):
|
| 306 |
+
continue
|
| 307 |
+
rest = f.replace(inner + "/", "", 1)
|
| 308 |
+
if "/" in rest:
|
| 309 |
+
top = rest.split("/")[0]
|
| 310 |
+
if top not in skip and not top.startswith("generated_baseline"):
|
| 311 |
+
inner_dirs.add(top)
|
| 312 |
+
inner_folders = [(d, inner + "/" + d) for d in sorted(inner_dirs)]
|
| 313 |
+
ours_fn_fp = _match_dasheng_folder(folder_name, inner_folders)
|
| 314 |
+
if ours_fn_fp:
|
| 315 |
+
fn, fp = ours_fn_fp
|
| 316 |
+
nn_files = [f for f in files if f.startswith(fp + "/")]
|
| 317 |
+
ours_block = _collect_block(nn_files, fp)
|
| 318 |
+
else:
|
| 319 |
+
ours_block = {"spec": None, "bg_wav": None, "fg_wav": None, "m_wav": None}
|
| 320 |
else:
|
| 321 |
nn_files = [f for f in files if f.replace(inner + "/", "").startswith(rel_prefix)]
|
| 322 |
ours_block = _collect_block(nn_files, inner + "/" + rel_prefix)
|