JackIsNotInTheBox commited on
Commit
53f384c
·
1 Parent(s): 8a267b7

Show slot 0 by default; reveal extra slots on Generations slider drag

Browse files
Files changed (1) hide show
  1. app.py +35 -39
app.py CHANGED
@@ -380,21 +380,25 @@ with gr.Blocks(title="TARO: Video-to-Audio Synthesis") as demo:
380
  run_btn = gr.Button("Generate", variant="primary")
381
 
382
  with gr.Column():
383
- # Pre-build MAX_SLOTS output slots; hide all initially
 
 
384
  slot_videos = []
385
  slot_audios = []
 
386
  for i in range(MAX_SLOTS):
387
- with gr.Group(visible=False) as grp:
388
- sv = gr.Video(label=f"Sample {i+1} — Video")
389
- sa = gr.Audio(label=f"Sample {i+1} — Audio")
390
- slot_videos.append((grp, sv))
391
- slot_audios.append((grp, sa))
392
-
393
- # ------------------------------------------------------------------ #
394
- # Events #
395
- # ------------------------------------------------------------------ #
396
-
397
- # Update samples slider max when video uploaded or relevant sliders change
 
398
  def _update_samples_slider(video_file, num_steps, crossfade_s):
399
  return on_video_upload(video_file, num_steps, crossfade_s)
400
 
@@ -405,41 +409,33 @@ with gr.Blocks(title="TARO: Video-to-Audio Synthesis") as demo:
405
  outputs=[samples_input],
406
  )
407
 
408
- # Collect all output components (flat: grp_visible, video, audio per slot)
409
- all_outputs = []
410
- for grp, sv in slot_videos:
411
- all_outputs.append(grp)
412
- for _, sa in slot_audios:
413
- all_outputs.append(sa)
414
- # Actually build properly: interleaved group + video + audio
415
- all_outputs = []
416
- slot_video_comps = [sv for _, sv in slot_videos]
417
- slot_audio_comps = [sa for _, sa in slot_audios]
418
- slot_grp_comps = [grp for grp, _ in slot_videos]
419
 
 
420
  def _generate_and_update(video_file, seed_val, cfg_scale, num_steps, mode,
421
- crossfade_s, crossfade_db, num_samples):
422
  flat = generate_audio(video_file, seed_val, cfg_scale, num_steps, mode,
423
- crossfade_s, crossfade_db, num_samples)
424
- num_samples = int(num_samples)
425
- # flat = [vid0, aud0, vid1, aud1, ...]
426
- grp_updates = []
427
- video_updates = []
428
- audio_updates = []
429
- for i in range(MAX_SLOTS):
430
- visible = i < num_samples
431
- vid = flat[i * 2]
432
- aud = flat[i * 2 + 1]
433
- grp_updates.append(gr.update(visible=visible))
434
- video_updates.append(gr.update(value=vid))
435
- audio_updates.append(gr.update(value=aud))
436
  return grp_updates + video_updates + audio_updates
437
 
438
  run_btn.click(
439
  fn=_generate_and_update,
440
  inputs=[video_input, seed_input, cfg_input, steps_input, mode_input,
441
  cf_dur_input, cf_db_input, samples_input],
442
- outputs=slot_grp_comps + slot_video_comps + slot_audio_comps,
443
  )
444
 
445
- demo.queue().launch()
 
380
  run_btn = gr.Button("Generate", variant="primary")
381
 
382
  with gr.Column():
383
+ # All MAX_SLOTS slots pre-built.
384
+ # Slot 0 is always visible (shows loading progress during inference).
385
+ # Slots 1-N become visible when user drags the Generations slider.
386
  slot_videos = []
387
  slot_audios = []
388
+ slot_grps = []
389
  for i in range(MAX_SLOTS):
390
+ with gr.Group(visible=(i == 0)) as grp:
391
+ sv = gr.Video(label=f"Generation {i+1} — Video")
392
+ sa = gr.Audio(label=f"Generation {i+1} — Audio")
393
+ slot_grps.append(grp)
394
+ slot_videos.append(sv)
395
+ slot_audios.append(sa)
396
+
397
+ # -------------------------------------------------------------- #
398
+ # Events #
399
+ # -------------------------------------------------------------- #
400
+
401
+ # Update Generations slider max on video upload / steps / crossfade change
402
  def _update_samples_slider(video_file, num_steps, crossfade_s):
403
  return on_video_upload(video_file, num_steps, crossfade_s)
404
 
 
409
  outputs=[samples_input],
410
  )
411
 
412
+ # Show/hide output slots instantly when Generations slider is dragged
413
+ def _update_slot_visibility(num_samples):
414
+ n = int(num_samples)
415
+ return [gr.update(visible=(i < n)) for i in range(MAX_SLOTS)]
416
+
417
+ samples_input.change(
418
+ fn=_update_slot_visibility,
419
+ inputs=[samples_input],
420
+ outputs=slot_grps,
421
+ )
 
422
 
423
+ # Main generate: calls inference then populates slots
424
  def _generate_and_update(video_file, seed_val, cfg_scale, num_steps, mode,
425
+ crossfade_s, crossfade_db, num_samples):
426
  flat = generate_audio(video_file, seed_val, cfg_scale, num_steps, mode,
427
+ crossfade_s, crossfade_db, num_samples)
428
+ n = int(num_samples)
429
+ grp_updates = [gr.update(visible=(i < n)) for i in range(MAX_SLOTS)]
430
+ video_updates = [gr.update(value=flat[i * 2]) for i in range(MAX_SLOTS)]
431
+ audio_updates = [gr.update(value=flat[i * 2 + 1]) for i in range(MAX_SLOTS)]
 
 
 
 
 
 
 
 
432
  return grp_updates + video_updates + audio_updates
433
 
434
  run_btn.click(
435
  fn=_generate_and_update,
436
  inputs=[video_input, seed_input, cfg_input, steps_input, mode_input,
437
  cf_dur_input, cf_db_input, samples_input],
438
+ outputs=slot_grps + slot_videos + slot_audios,
439
  )
440
 
441
+ demo.queue().launch()