Gong Junmin commited on
Commit
da41c7b
·
unverified ·
2 Parent(s): 13537d2 7404af9

Merge pull request #3 from ace-step/fix_lrc_bugs

Browse files
acestep/constants.py CHANGED
@@ -70,6 +70,8 @@ TASK_TYPES_BASE = ["text2music", "repaint", "cover", "extract", "lego", "complet
70
  DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
71
  DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
72
  DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
 
 
73
 
74
  # Instruction templates for each task type
75
  # Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}
 
70
  DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
71
  DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
72
  DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
73
+ DEFAULT_LM_INSPIRED_INSTRUCTION = "Expand the user's input into a more detailed and specific musical description:"
74
+ DEFAULT_LM_REWRITE_INSTRUCTION = "Format the user's input into a more detailed and specific musical description:"
75
 
76
  # Instruction templates for each task type
77
  # Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}
acestep/gradio_ui/events/__init__.py CHANGED
@@ -373,7 +373,8 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
373
  outputs=[
374
  results_section[f"lrc_display_{btn_idx}"],
375
  results_section[f"details_accordion_{btn_idx}"],
376
- # Audio subtitles now auto-updated via lrc_display.change()
 
377
  results_section["batch_queue"]
378
  ]
379
  )
@@ -723,14 +724,21 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
723
  )
724
 
725
  # ========== LRC Display Change Handlers ==========
726
- # When lrc_display textbox changes, update the corresponding audio component's subtitles
727
- for i in range(1, 9):
728
- results_section[f"lrc_display_{i}"].change(
 
 
 
 
 
 
729
  fn=res_h.update_audio_subtitles_from_lrc,
730
  inputs=[
731
- results_section[f"lrc_display_{i}"],
732
- results_section[f"generated_audio_{i}"],
733
- generation_section["audio_duration"],
734
  ],
735
- outputs=[results_section[f"generated_audio_{i}"]]
 
 
736
  )
 
373
  outputs=[
374
  results_section[f"lrc_display_{btn_idx}"],
375
  results_section[f"details_accordion_{btn_idx}"],
376
+ # NOTE: Removed generated_audio output!
377
+ # Audio subtitles are now updated via lrc_display.change() event.
378
  results_section["batch_queue"]
379
  ]
380
  )
 
724
  )
725
 
726
  # ========== LRC Display Change Handlers ==========
727
+ # NEW APPROACH: Use lrc_display.change() to update audio subtitles
728
+ # This decouples audio value updates from subtitle updates, avoiding flickering.
729
+ #
730
+ # When lrc_display text changes (from generate, LRC button, or manual edit):
731
+ # 1. lrc_display.change() is triggered
732
+ # 2. update_audio_subtitles_from_lrc() parses LRC and updates audio subtitles
733
+ # 3. Audio value is NEVER updated here - only subtitles
734
+ for lrc_idx in range(1, 9):
735
+ results_section[f"lrc_display_{lrc_idx}"].change(
736
  fn=res_h.update_audio_subtitles_from_lrc,
737
  inputs=[
738
+ results_section[f"lrc_display_{lrc_idx}"],
739
+ # audio_duration not needed - parse_lrc_to_subtitles calculates end time from timestamps
 
740
  ],
741
+ outputs=[
742
+ results_section[f"generated_audio_{lrc_idx}"], # Only updates subtitles, not value
743
+ ]
744
  )
acestep/gradio_ui/events/results_handlers.py CHANGED
@@ -40,9 +40,10 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
40
  lines = lrc_text.strip().split('\n')
41
 
42
  # Regex patterns for LRC timestamps
43
- # Pattern 1: [MM:SS.ss] - standard LRC with start time only
44
  # Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
45
- timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2})\]'
 
46
 
47
  parsed_lines = []
48
 
@@ -62,14 +63,17 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
62
  continue
63
 
64
  # Parse first timestamp as start time
 
65
  start_minutes, start_seconds, start_centiseconds = timestamps[0]
66
- start_time = int(start_minutes) * 60 + int(start_seconds) + int(start_centiseconds) / 100.0
 
67
 
68
  # If there's a second timestamp, use it as end time
69
  end_time = None
70
  if len(timestamps) >= 2:
71
  end_minutes, end_seconds, end_centiseconds = timestamps[1]
72
- end_time = int(end_minutes) * 60 + int(end_seconds) + int(end_centiseconds) / 100.0
 
73
 
74
  parsed_lines.append({
75
  'start': start_time,
@@ -101,6 +105,83 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
101
  return subtitles
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def _build_generation_info(
105
  lm_metadata: Optional[Dict[str, Any]],
106
  time_costs: Dict[str, float],
@@ -477,15 +558,25 @@ def generate_with_progress(
477
  audios = result.audios
478
  progress(0.99, "Converting audio to mp3...")
479
 
480
- # Clear all scores, codes, and lrc displays at the start of generation
481
  # Note: Create independent gr.update objects (not references to the same object)
482
- clear_scores = [gr.update(value="", visible=False) for _ in range(8)]
483
- clear_codes = [gr.update(value="", visible=False) for _ in range(8)]
484
- clear_lrcs = [gr.update(value="", visible=False) for _ in range(8)]
485
- clear_accordions = [gr.update(visible=False) for _ in range(8)]
 
 
 
 
 
 
 
 
 
486
  yield (
487
- # Audio outputs (keep as skip, will be updated in loop)
488
- gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
 
489
  None, # all_audio_paths (clear batch files)
490
  generation_info,
491
  "Clearing previous results...",
@@ -507,6 +598,7 @@ def generate_with_progress(
507
  None, # extra_outputs placeholder
508
  None, # raw_codes placeholder
509
  )
 
510
 
511
  for i in range(8):
512
  if i < len(audios):
@@ -613,9 +705,9 @@ def generate_with_progress(
613
  lrc_text = lrc_result.get("lrc_text", "")
614
  final_lrcs_list[i] = lrc_text
615
  logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
616
- # Parse LRC to subtitles format
617
- subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(actual_duration))
618
- final_subtitles_list[i] = subtitles_data
619
  else:
620
  logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
621
  except Exception as e:
@@ -624,53 +716,88 @@ def generate_with_progress(
624
  total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
625
 
626
  status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
 
 
 
 
 
 
 
627
  current_audio_updates = [gr.skip() for _ in range(8)]
628
- # Always set audio path first, subtitles will be applied via Audio component's subtitles parameter
629
  current_audio_updates[i] = audio_path
630
-
631
- # Codes display updates (for results section)
632
- codes_display_updates = [gr.skip() for _ in range(8)]
633
- codes_display_updates[i] = gr.update(value=code_str, visible=bool(code_str))
634
 
635
- # LRC display updates
636
- lrc_display_updates = [gr.skip() for _ in range(8)]
637
- has_lrc = bool(final_lrcs_list[i])
638
- if auto_lrc and has_lrc:
639
- lrc_display_updates[i] = gr.update(value=final_lrcs_list[i], visible=True)
640
 
641
- # Details accordion updates (show if code OR lrc OR score exists)
642
  details_accordion_updates = [gr.skip() for _ in range(8)]
643
- has_score = bool(score_str) and score_str != "Done!"
644
- has_content = bool(code_str) or has_lrc or has_score
645
- details_accordion_updates[i] = gr.update(visible=has_content)
 
 
 
646
 
647
  yield (
648
  current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
649
  current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
650
- all_audio_paths, # Real-time update of Batch File list
651
  generation_info,
652
  status_message,
653
  seed_value_for_ui,
654
- # Scores
655
  scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
656
- # Codes display in results section
657
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
658
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
659
- # Details accordion visibility
660
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
661
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
662
- # LRC display
663
- lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
664
- lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
665
  lm_generated_metadata,
666
  is_format_caption,
667
- None, # Placeholder for extra_outputs (only filled in final yield)
668
- None, # Placeholder for raw_codes_list (only filled in final yield)
669
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
  else:
671
  # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
672
  pass
673
- time_module.sleep(0.1)
674
 
675
  # Record audio conversion time
676
  audio_conversion_end_time = time_module.time()
@@ -697,26 +824,19 @@ def generate_with_progress(
697
  num_audios=len(result.audios),
698
  )
699
 
700
- # Build final codes display, LRC display, and accordion visibility updates
701
- final_codes_display_updates = []
702
- final_lrc_display_updates = []
703
- final_accordion_updates = []
704
- for i in range(8):
705
- code_str = final_codes_list[i]
706
- lrc_text = final_lrcs_list[i]
707
- score_str = final_scores_list[i]
708
- has_code = bool(code_str)
709
- has_lrc = bool(lrc_text)
710
- has_score = bool(score_str) and score_str != "Done!"
711
- # Show accordion if code OR LRC OR score exists
712
- has_content = has_code or has_lrc or has_score
713
- final_codes_display_updates.append(gr.update(value=code_str, visible=has_code))
714
- final_lrc_display_updates.append(gr.update(value=lrc_text, visible=has_lrc))
715
- final_accordion_updates.append(gr.update(visible=has_content))
716
 
 
 
 
 
 
717
  yield (
718
- gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 1-4: SKIP
719
- gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 5-8: SKIP
720
  all_audio_paths,
721
  generation_info,
722
  "Generation Complete",
@@ -730,8 +850,8 @@ def generate_with_progress(
730
  final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
731
  final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
732
  # LRC display
733
- final_lrc_display_updates[0], final_lrc_display_updates[1], final_lrc_display_updates[2], final_lrc_display_updates[3],
734
- final_lrc_display_updates[4], final_lrc_display_updates[5], final_lrc_display_updates[6], final_lrc_display_updates[7],
735
  lm_generated_metadata,
736
  is_format_caption,
737
  {
@@ -1006,10 +1126,10 @@ def calculate_score_handler_with_selection(
1006
  batch_queue[current_batch_index]["scores"] = [""] * 8
1007
  batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
1008
 
1009
- # Return: score_display (content + visible), accordion visible, batch_queue
1010
  return (
1011
- gr.update(value=score_display, visible=True), # score_display with content
1012
- gr.update(visible=True), # details_accordion
1013
  batch_queue
1014
  )
1015
 
@@ -1017,11 +1137,14 @@ def calculate_score_handler_with_selection(
1017
  def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
1018
  """
1019
  Generate LRC timestamps for a specific audio sample.
1020
-
1021
  This function retrieves cached generation data from batch_queue and calls
1022
  the handler's get_lyric_timestamp method to generate LRC format lyrics.
1023
- Audio subtitles are automatically updated via lrc_display.change() event.
1024
 
 
 
 
 
1025
  Args:
1026
  dit_handler: DiT handler instance with get_lyric_timestamp method
1027
  sample_idx: Which sample to generate LRC for (1-8)
@@ -1029,21 +1152,23 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
1029
  batch_queue: Dictionary storing all batch generation data
1030
  vocal_language: Language code for lyrics
1031
  inference_steps: Number of inference steps used in generation
1032
-
1033
  Returns:
1034
  Tuple of (lrc_display_update, details_accordion_update, batch_queue)
 
1035
  """
1036
  import torch
1037
 
1038
  if current_batch_index not in batch_queue:
1039
  return gr.skip(), gr.skip(), batch_queue
1040
-
1041
  batch_data = batch_queue[current_batch_index]
1042
  extra_outputs = batch_data.get("extra_outputs", {})
1043
-
1044
  # Check if required data is available
 
1045
  if not extra_outputs:
1046
- return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.update(visible=True), batch_queue
1047
 
1048
  pred_latents = extra_outputs.get("pred_latents")
1049
  encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
@@ -1052,7 +1177,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
1052
  lyric_token_idss = extra_outputs.get("lyric_token_idss")
1053
 
1054
  if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
1055
- return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.update(visible=True), batch_queue
1056
 
1057
  # Adjust sample_idx to 0-based
1058
  sample_idx_0based = sample_idx - 1
@@ -1060,7 +1185,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
1060
  # Check if sample exists in batch
1061
  batch_size = pred_latents.shape[0]
1062
  if sample_idx_0based >= batch_size:
1063
- return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.update(visible=True), batch_queue
1064
 
1065
  # Extract the specific sample's data
1066
  try:
@@ -1098,72 +1223,62 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
1098
  if result.get("success"):
1099
  lrc_text = result.get("lrc_text", "")
1100
  if not lrc_text:
1101
- return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.update(visible=True), batch_queue
1102
 
1103
  # Store LRC in batch_queue for later retrieval when switching batches
1104
  if "lrcs" not in batch_queue[current_batch_index]:
1105
  batch_queue[current_batch_index]["lrcs"] = [""] * 8
1106
  batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
1107
 
1108
- # Parse LRC to subtitles format for storage (audio subtitles will be updated via lrc_display.change())
1109
- subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(audio_duration))
1110
-
1111
- # Store subtitles in batch_queue for batch navigation
1112
  if "subtitles" not in batch_queue[current_batch_index]:
1113
  batch_queue[current_batch_index]["subtitles"] = [None] * 8
1114
- batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = subtitles_data
1115
 
1116
  # Return: lrc_display, details_accordion, batch_queue
1117
- # Audio subtitles are automatically updated via lrc_display.change() event
 
 
1118
  return (
1119
  gr.update(value=lrc_text, visible=True),
1120
- gr.update(visible=True),
1121
  batch_queue
1122
  )
1123
  else:
1124
  error_msg = result.get("error", "Unknown error")
1125
- return gr.update(value=f"❌ {error_msg}", visible=True), gr.update(visible=True), batch_queue
1126
 
1127
  except Exception as e:
1128
  logger.exception("[generate_lrc_handler] Error generating LRC")
1129
- return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.update(visible=True), batch_queue
1130
 
1131
 
1132
- def update_audio_subtitles_from_lrc(lrc_text: str, audio_component_value, audio_duration: float = None):
1133
  """
1134
  Update Audio component's subtitles based on LRC text content.
1135
 
1136
- This function is triggered when lrc_display textbox changes.
1137
- It parses the LRC text and updates the corresponding Audio component's subtitles.
 
1138
 
1139
  Args:
1140
  lrc_text: LRC format lyrics string from lrc_display textbox
1141
- audio_component_value: Current value of the audio component (path or dict)
1142
  audio_duration: Optional audio duration for calculating last line's end time
1143
 
1144
  Returns:
1145
- gr.update for the Audio component with subtitles
1146
  """
1147
- # If no LRC text, skip update (don't clear subtitles to avoid flickering)
1148
  if not lrc_text or not lrc_text.strip():
1149
- return gr.skip()
1150
-
1151
- # Get audio path from component value
1152
- audio_path = None
1153
- if audio_component_value:
1154
- if isinstance(audio_component_value, dict):
1155
- audio_path = audio_component_value.get("path") or audio_component_value.get("value")
1156
- else:
1157
- audio_path = audio_component_value
1158
 
1159
- if not audio_path:
1160
- return gr.skip()
1161
 
1162
- # Parse LRC to subtitles format
1163
- subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=audio_duration)
1164
-
1165
- # Return updated audio with subtitles
1166
- return gr.update(value=audio_path, subtitles=subtitles_data if subtitles_data else None)
1167
 
1168
 
1169
  def capture_current_params(
@@ -1374,7 +1489,8 @@ def generate_with_batch_management(
1374
 
1375
  # Extract extra_outputs from result tuple (index 46 after adding lrc_display)
1376
  # Note: index 47 is raw_codes_list which we already extracted above
1377
- extra_outputs_from_result = result[46] if len(result) > 46 else {}
 
1378
 
1379
  # Store current batch in queue
1380
  batch_queue = store_batch_in_queue(
@@ -1420,9 +1536,18 @@ def generate_with_batch_management(
1420
  # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
1421
  # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
1422
  # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
1423
- # Note: Audio subtitles are already included in the intermediate yields from generate_with_progress
 
 
 
1424
  ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
1425
 
 
 
 
 
 
 
1426
  yield ui_result + (
1427
  current_batch_index,
1428
  total_batches,
@@ -1610,7 +1735,24 @@ def generate_next_batch_background(
1610
  generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
1611
 
1612
  # Extract extra_outputs for LRC generation (index 46)
1613
- extra_outputs_from_bg = final_result[46] if len(final_result) > 46 else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
 
1615
  # Determine which codes to store
1616
  batch_size = params.get("batch_size_input", 2)
@@ -1626,6 +1768,7 @@ def generate_next_batch_background(
1626
  logger.info(f" - batch_size: {batch_size}")
1627
  logger.info(f" - generated_codes_single exists: {bool(generated_codes_single)}")
1628
  logger.info(f" - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
 
1629
  if isinstance(codes_to_store, list):
1630
  logger.info(f" - codes_to_store: LIST with {len(codes_to_store)} items")
1631
  for idx, code in enumerate(codes_to_store):
@@ -1633,7 +1776,7 @@ def generate_next_batch_background(
1633
  else:
1634
  logger.info(f" - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
1635
 
1636
- # Store next batch in queue with codes, batch settings, and ALL generation params
1637
  batch_queue = store_batch_in_queue(
1638
  batch_queue,
1639
  next_batch_idx,
@@ -1641,6 +1784,7 @@ def generate_next_batch_background(
1641
  generation_info,
1642
  seed_value_for_ui,
1643
  codes=codes_to_store,
 
1644
  allow_lm_batch=allow_lm_batch,
1645
  batch_size=int(batch_size),
1646
  generation_params=params,
@@ -1649,6 +1793,16 @@ def generate_next_batch_background(
1649
  status="completed"
1650
  )
1651
 
 
 
 
 
 
 
 
 
 
 
1652
  logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
1653
 
1654
  # Success message
@@ -1683,10 +1837,17 @@ def generate_next_batch_background(
1683
 
1684
 
1685
  def navigate_to_previous_batch(current_batch_index, batch_queue):
1686
- """Navigate to previous batch (Result View Only - Never touches Input UI)"""
 
 
 
 
 
 
1687
  if current_batch_index <= 0:
1688
  gr.Warning(t("messages.at_first_batch"))
1689
- return [gr.update()] * 48 # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
 
1690
 
1691
  # Move to previous batch
1692
  new_batch_index = current_batch_index - 1
@@ -1694,25 +1855,23 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
1694
  # Load batch data from queue
1695
  if new_batch_index not in batch_queue:
1696
  gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
1697
- return [gr.update()] * 48
 
1698
 
1699
  batch_data = batch_queue[new_batch_index]
1700
  audio_paths = batch_data.get("audio_paths", [])
1701
  generation_info_text = batch_data.get("generation_info", "")
1702
 
1703
- # Prepare audio outputs (up to 8) with subtitles
1704
  real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
1705
- stored_subtitles = batch_data.get("subtitles", [None] * 8)
1706
 
1707
  audio_updates = []
1708
  for idx in range(8):
1709
  if idx < len(real_audio_paths):
1710
  audio_path = real_audio_paths[idx]
1711
- subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
1712
- # Use gr.update to set both value and subtitles
1713
- audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
1714
  else:
1715
- audio_updates.append(gr.update(value=None, subtitles=None))
1716
 
1717
  # Update batch indicator
1718
  total_batches = len(batch_queue)
@@ -1736,6 +1895,7 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
1736
 
1737
  codes_display_updates = []
1738
  lrc_display_updates = []
 
1739
  details_accordion_updates = []
1740
  for i in range(8):
1741
  if stored_allow_lm_batch and isinstance(stored_codes, list):
@@ -1746,18 +1906,14 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
1746
  lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
1747
  score_str = score_displays[i] if i < len(score_displays) else ""
1748
 
1749
- has_code = bool(code_str) and i < batch_size
1750
- has_lrc = bool(lrc_str)
1751
- has_score = bool(score_str)
1752
-
1753
- # Show accordion if any content exists
1754
- has_content = has_code or has_lrc or has_score
1755
-
1756
- codes_display_updates.append(gr.update(value=code_str, visible=has_code))
1757
- lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
1758
- details_accordion_updates.append(gr.update(visible=has_content))
1759
 
1760
- return (
 
1761
  audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
1762
  audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
1763
  audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
@@ -1767,19 +1923,54 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
1767
  score_displays[4], score_displays[5], score_displays[6], score_displays[7],
1768
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
1769
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
1770
- lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
1771
- lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
 
1772
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
1773
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
1774
  gr.update(interactive=True),
1775
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1776
 
1777
 
1778
  def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
1779
- """Navigate to next batch (Result View Only - Never touches Input UI)"""
 
 
 
 
 
 
1780
  if current_batch_index >= total_batches - 1:
1781
  gr.Warning(t("messages.at_last_batch"))
1782
- return [gr.update()] * 49 # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
 
1783
 
1784
  # Move to next batch
1785
  new_batch_index = current_batch_index + 1
@@ -1787,25 +1978,23 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
1787
  # Load batch data from queue
1788
  if new_batch_index not in batch_queue:
1789
  gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
1790
- return [gr.update()] * 49
 
1791
 
1792
  batch_data = batch_queue[new_batch_index]
1793
  audio_paths = batch_data.get("audio_paths", [])
1794
  generation_info_text = batch_data.get("generation_info", "")
1795
 
1796
- # Prepare audio outputs (up to 8) with subtitles
1797
  real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
1798
- stored_subtitles = batch_data.get("subtitles", [None] * 8)
1799
 
1800
  audio_updates = []
1801
  for idx in range(8):
1802
  if idx < len(real_audio_paths):
1803
  audio_path = real_audio_paths[idx]
1804
- subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
1805
- # Use gr.update to set both value and subtitles
1806
- audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
1807
  else:
1808
- audio_updates.append(gr.update(value=None, subtitles=None))
1809
 
1810
  # Update batch indicator
1811
  batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
@@ -1834,6 +2023,7 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
1834
 
1835
  codes_display_updates = []
1836
  lrc_display_updates = []
 
1837
  details_accordion_updates = []
1838
  for i in range(8):
1839
  if stored_allow_lm_batch and isinstance(stored_codes, list):
@@ -1842,20 +2032,15 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
1842
  code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
1843
 
1844
  lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
1845
- score_str = score_displays[i] if i < len(score_displays) else ""
1846
 
1847
- has_code = bool(code_str) and i < batch_size
1848
- has_lrc = bool(lrc_str)
1849
- has_score = bool(score_str)
1850
-
1851
- # Show accordion if any content exists
1852
- has_content = has_code or has_lrc or has_score
1853
-
1854
- codes_display_updates.append(gr.update(value=code_str, visible=has_code))
1855
- lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
1856
- details_accordion_updates.append(gr.update(visible=has_content))
1857
 
1858
- return (
 
1859
  audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
1860
  audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
1861
  audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
@@ -1865,12 +2050,40 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
1865
  score_displays[4], score_displays[5], score_displays[6], score_displays[7],
1866
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
1867
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
1868
- lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
1869
- lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
 
1870
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
1871
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
1872
  gr.update(interactive=True),
1873
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1874
 
1875
 
1876
  def restore_batch_parameters(current_batch_index, batch_queue):
 
40
  lines = lrc_text.strip().split('\n')
41
 
42
  # Regex patterns for LRC timestamps
43
+ # Pattern 1: [MM:SS.ss] or [MM:SS.sss] - standard LRC with start time only
44
  # Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
45
+ # Support both 2-digit (centiseconds) and 3-digit (milliseconds) formats
46
+ timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2,3})\]'
47
 
48
  parsed_lines = []
49
 
 
63
  continue
64
 
65
  # Parse first timestamp as start time
66
+ # Handle both 2-digit (centiseconds, /100) and 3-digit (milliseconds, /1000) formats
67
  start_minutes, start_seconds, start_centiseconds = timestamps[0]
68
+ cs = int(start_centiseconds)
69
+ start_time = int(start_minutes) * 60 + int(start_seconds) + (cs / 100.0 if len(start_centiseconds) == 2 else cs / 1000.0)
70
 
71
  # If there's a second timestamp, use it as end time
72
  end_time = None
73
  if len(timestamps) >= 2:
74
  end_minutes, end_seconds, end_centiseconds = timestamps[1]
75
+ cs_end = int(end_centiseconds)
76
+ end_time = int(end_minutes) * 60 + int(end_seconds) + (cs_end / 100.0 if len(end_centiseconds) == 2 else cs_end / 1000.0)
77
 
78
  parsed_lines.append({
79
  'start': start_time,
 
105
  return subtitles
106
 
107
 
108
+ def _format_vtt_timestamp(seconds: float) -> str:
109
+ """
110
+ Format seconds to VTT timestamp format: HH:MM:SS.mmm
111
+
112
+ Args:
113
+ seconds: Time in seconds
114
+
115
+ Returns:
116
+ Formatted timestamp string
117
+ """
118
+ hours = int(seconds // 3600)
119
+ minutes = int((seconds % 3600) // 60)
120
+ secs = int(seconds % 60)
121
+ millis = int((seconds % 1) * 1000)
122
+ return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millis:03d}"
123
+
124
+
125
+ def lrc_to_vtt_file(lrc_text: str, total_duration: float = None) -> Optional[str]:
126
+ """
127
+ Convert LRC text to a VTT file and return the file path.
128
+
129
+ This creates a WebVTT subtitle file that Gradio can use as a native
130
+ <track src="..."> element, which is more stable than JS-based subtitle injection.
131
+
132
+ VTT format example:
133
+ WEBVTT
134
+
135
+ 00:00:00.000 --> 00:00:05.000
136
+ First subtitle line
137
+
138
+ 00:00:05.000 --> 00:00:10.000
139
+ Second subtitle line
140
+
141
+ Args:
142
+ lrc_text: LRC format lyrics string
143
+ total_duration: Total audio duration in seconds (used for last line's end time)
144
+
145
+ Returns:
146
+ Path to the generated VTT file, or None if conversion fails
147
+ """
148
+ if not lrc_text or not lrc_text.strip():
149
+ return None
150
+
151
+ # Parse LRC to subtitles data
152
+ subtitles = parse_lrc_to_subtitles(lrc_text, total_duration=total_duration)
153
+
154
+ if not subtitles:
155
+ return None
156
+
157
+ # Build VTT content
158
+ vtt_lines = ["WEBVTT", ""] # VTT header with blank line
159
+
160
+ for i, subtitle in enumerate(subtitles):
161
+ start_time = subtitle['timestamp'][0]
162
+ end_time = subtitle['timestamp'][1]
163
+ text = subtitle['text']
164
+
165
+ # Add cue with index (optional but helpful for debugging)
166
+ vtt_lines.append(str(i + 1))
167
+ vtt_lines.append(f"{_format_vtt_timestamp(start_time)} --> {_format_vtt_timestamp(end_time)}")
168
+ vtt_lines.append(text)
169
+ vtt_lines.append("") # Blank line between cues
170
+
171
+ vtt_content = "\n".join(vtt_lines)
172
+
173
+ # Create temp directory and save VTT file
174
+ try:
175
+ temp_dir = tempfile.mkdtemp(prefix="acestep_vtt_")
176
+ vtt_path = os.path.join(temp_dir, "subtitles.vtt")
177
+ with open(vtt_path, "w", encoding="utf-8") as f:
178
+ f.write(vtt_content)
179
+ return vtt_path
180
+ except Exception as e:
181
+ logger.error(f"[lrc_to_vtt_file] Failed to create VTT file: {e}")
182
+ return None
183
+
184
+
185
  def _build_generation_info(
186
  lm_metadata: Optional[Dict[str, Any]],
187
  time_costs: Dict[str, float],
 
558
  audios = result.audios
559
  progress(0.99, "Converting audio to mp3...")
560
 
561
+ # Clear all scores, codes, lrc displays at the start of generation
562
  # Note: Create independent gr.update objects (not references to the same object)
563
+ #
564
+ # NEW APPROACH: Don't update audio subtitles directly!
565
+ # Clearing lrc_display will trigger lrc_display.change() which clears subtitles automatically.
566
+ # This decouples audio value updates from subtitle updates, avoiding flickering.
567
+ #
568
+ # IMPORTANT: Keep visible=True to ensure .change() event is properly triggered by Gradio.
569
+ # These should always remain visible=True so users can expand accordion anytime.
570
+ clear_scores = [gr.update(value="", visible=True) for _ in range(8)]
571
+ clear_codes = [gr.update(value="", visible=True) for _ in range(8)]
572
+ # Clear lrc_display with empty string - this triggers .change() to clear subtitles
573
+ clear_lrcs = [gr.update(value="", visible=True) for _ in range(8)]
574
+ clear_accordions = [gr.skip() for _ in range(8)] # Don't change accordion visibility
575
+ dump_audio = [None for _ in range(8)]
576
  yield (
577
+ # Audio outputs - just skip, value will be updated in loop
578
+ # Subtitles will be cleared via lrc_display.change()
579
+ dump_audio[0], dump_audio[1], dump_audio[2], dump_audio[3], dump_audio[4], dump_audio[5], dump_audio[6], dump_audio[7],
580
  None, # all_audio_paths (clear batch files)
581
  generation_info,
582
  "Clearing previous results...",
 
598
  None, # extra_outputs placeholder
599
  None, # raw_codes placeholder
600
  )
601
+ time_module.sleep(0.1)
602
 
603
  for i in range(8):
604
  if i < len(audios):
 
705
  lrc_text = lrc_result.get("lrc_text", "")
706
  final_lrcs_list[i] = lrc_text
707
  logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
708
+ # Convert LRC to VTT file for storage (consistent with new VTT-based approach)
709
+ vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(actual_duration))
710
+ final_subtitles_list[i] = vtt_path
711
  else:
712
  logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
713
  except Exception as e:
 
716
  total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
717
 
718
  status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
719
+ has_lrc = bool(final_lrcs_list[i])
720
+ has_score = bool(score_str) and score_str != "Done!"
721
+ has_content = bool(code_str) or has_lrc or has_score
722
+
723
+ # ============== STEP 1: Yield audio + CLEAR LRC ==============
724
+ # First, update audio and clear LRC to avoid race condition
725
+ # (audio needs to load before subtitles are set via .change() event)
726
  current_audio_updates = [gr.skip() for _ in range(8)]
 
727
  current_audio_updates[i] = audio_path
 
 
 
 
728
 
729
+ codes_display_updates = [gr.skip() for _ in range(8)]
730
+ codes_display_updates[i] = gr.update(value=code_str, visible=True) # Keep visible=True
 
 
 
731
 
 
732
  details_accordion_updates = [gr.skip() for _ in range(8)]
733
+ # Don't change accordion visibility - keep it always expandable
734
+
735
+ # Clear LRC first (this triggers .change() to clear subtitles)
736
+ # Keep visible=True to ensure .change() event is properly triggered
737
+ lrc_clear_updates = [gr.skip() for _ in range(8)]
738
+ lrc_clear_updates[i] = gr.update(value="", visible=True)
739
 
740
  yield (
741
  current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
742
  current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
743
+ all_audio_paths,
744
  generation_info,
745
  status_message,
746
  seed_value_for_ui,
 
747
  scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
 
748
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
749
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
 
750
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
751
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
752
+ # LRC display - CLEAR first
753
+ lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
754
+ lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
755
  lm_generated_metadata,
756
  is_format_caption,
757
+ None,
758
+ None,
759
  )
760
+
761
+ # Wait for audio to load before setting subtitles
762
+ time_module.sleep(0.05)
763
+
764
+ # ============== STEP 2: Skip audio + SET actual LRC ==============
765
+ # Now set the actual LRC content, which triggers .change() to set subtitles
766
+ # This two-step approach (same as navigate_to_batch) ensures audio is loaded first
767
+ if has_lrc:
768
+ skip_audio = [gr.skip() for _ in range(8)]
769
+ skip_scores = [gr.skip() for _ in range(8)]
770
+ skip_codes = [gr.skip() for _ in range(8)]
771
+ skip_accordions = [gr.skip() for _ in range(8)]
772
+
773
+ lrc_actual_updates = [gr.skip() for _ in range(8)]
774
+ lrc_actual_updates[i] = gr.update(value=final_lrcs_list[i], visible=True) # Keep visible=True
775
+
776
+ yield (
777
+ skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
778
+ skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
779
+ gr.skip(), # all_audio_paths
780
+ gr.skip(), # generation_info
781
+ gr.skip(), # status_message
782
+ gr.skip(), # seed
783
+ skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
784
+ skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
785
+ skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
786
+ skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
787
+ skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
788
+ skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
789
+ # LRC display - SET actual content (triggers .change() to set subtitles)
790
+ lrc_actual_updates[0], lrc_actual_updates[1], lrc_actual_updates[2], lrc_actual_updates[3],
791
+ lrc_actual_updates[4], lrc_actual_updates[5], lrc_actual_updates[6], lrc_actual_updates[7],
792
+ gr.skip(), # lm_generated_metadata
793
+ gr.skip(), # is_format_caption
794
+ None,
795
+ None,
796
+ )
797
  else:
798
  # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
799
  pass
800
+ time_module.sleep(0.05)
801
 
802
  # Record audio conversion time
803
  audio_conversion_end_time = time_module.time()
 
824
  num_audios=len(result.audios),
825
  )
826
 
827
+ # Build final codes display, LRC display, accordion visibility updates
828
+ final_codes_display_updates = [gr.skip() for _ in range(8)]
829
+ # final_lrc_display_updates = [gr.skip() for _ in range(8)]
830
+ final_accordion_updates = [gr.skip() for _ in range(8)]
 
 
 
 
 
 
 
 
 
 
 
 
831
 
832
+ # NEW APPROACH: Don't update audio subtitles directly in final yield!
833
+ # The lrc_display was already updated in the loop yields above.
834
+ # lrc_display.change() event will automatically update the audio subtitles.
835
+ # This decouples audio value updates from subtitle updates, avoiding flickering.
836
+
837
  yield (
838
+ # Audio - just skip, subtitles are updated via lrc_display.change()
839
+ gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
840
  all_audio_paths,
841
  generation_info,
842
  "Generation Complete",
 
850
  final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
851
  final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
852
  # LRC display
853
+ final_lrcs_list[0], final_lrcs_list[1], final_lrcs_list[2], final_lrcs_list[3],
854
+ final_lrcs_list[4], final_lrcs_list[5], final_lrcs_list[6], final_lrcs_list[7],
855
  lm_generated_metadata,
856
  is_format_caption,
857
  {
 
1126
  batch_queue[current_batch_index]["scores"] = [""] * 8
1127
  batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
1128
 
1129
+ # Return: score_display (with visible=True), accordion skip, batch_queue
1130
  return (
1131
+ gr.update(value=score_display, visible=True), # score_display with content, keep visible=True
1132
+ gr.skip(), # details_accordion - don't change visibility
1133
  batch_queue
1134
  )
1135
 
 
1137
  def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
1138
  """
1139
  Generate LRC timestamps for a specific audio sample.
1140
+
1141
  This function retrieves cached generation data from batch_queue and calls
1142
  the handler's get_lyric_timestamp method to generate LRC format lyrics.
 
1143
 
1144
+ NEW APPROACH: Only update lrc_display, NOT audio subtitles directly!
1145
+ Audio subtitles will be updated via lrc_display.change() event.
1146
+ This decouples audio value updates from subtitle updates, avoiding flickering.
1147
+
1148
  Args:
1149
  dit_handler: DiT handler instance with get_lyric_timestamp method
1150
  sample_idx: Which sample to generate LRC for (1-8)
 
1152
  batch_queue: Dictionary storing all batch generation data
1153
  vocal_language: Language code for lyrics
1154
  inference_steps: Number of inference steps used in generation
1155
+
1156
  Returns:
1157
  Tuple of (lrc_display_update, details_accordion_update, batch_queue)
1158
+ Note: No audio_update - subtitles updated via lrc_display.change()
1159
  """
1160
  import torch
1161
 
1162
  if current_batch_index not in batch_queue:
1163
  return gr.skip(), gr.skip(), batch_queue
1164
+
1165
  batch_data = batch_queue[current_batch_index]
1166
  extra_outputs = batch_data.get("extra_outputs", {})
1167
+
1168
  # Check if required data is available
1169
+ # Keep visible=True to ensure .change() event is properly triggered
1170
  if not extra_outputs:
1171
+ return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.skip(), batch_queue
1172
 
1173
  pred_latents = extra_outputs.get("pred_latents")
1174
  encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
 
1177
  lyric_token_idss = extra_outputs.get("lyric_token_idss")
1178
 
1179
  if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
1180
+ return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.skip(), batch_queue
1181
 
1182
  # Adjust sample_idx to 0-based
1183
  sample_idx_0based = sample_idx - 1
 
1185
  # Check if sample exists in batch
1186
  batch_size = pred_latents.shape[0]
1187
  if sample_idx_0based >= batch_size:
1188
+ return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.skip(), batch_queue
1189
 
1190
  # Extract the specific sample's data
1191
  try:
 
1223
  if result.get("success"):
1224
  lrc_text = result.get("lrc_text", "")
1225
  if not lrc_text:
1226
+ return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.skip(), batch_queue
1227
 
1228
  # Store LRC in batch_queue for later retrieval when switching batches
1229
  if "lrcs" not in batch_queue[current_batch_index]:
1230
  batch_queue[current_batch_index]["lrcs"] = [""] * 8
1231
  batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
1232
 
1233
+ # Convert LRC to VTT file and store path for batch navigation (consistent with VTT-based approach)
1234
+ vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(audio_duration))
 
 
1235
  if "subtitles" not in batch_queue[current_batch_index]:
1236
  batch_queue[current_batch_index]["subtitles"] = [None] * 8
1237
+ batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = vtt_path
1238
 
1239
  # Return: lrc_display, details_accordion, batch_queue
1240
+ # NEW APPROACH: Only update lrc_display, NOT audio subtitles!
1241
+ # Audio subtitles will be updated via lrc_display.change() event.
1242
+ # Keep visible=True to ensure .change() event is properly triggered
1243
  return (
1244
  gr.update(value=lrc_text, visible=True),
1245
+ gr.skip(),
1246
  batch_queue
1247
  )
1248
  else:
1249
  error_msg = result.get("error", "Unknown error")
1250
+ return gr.update(value=f"❌ {error_msg}", visible=True), gr.skip(), batch_queue
1251
 
1252
  except Exception as e:
1253
  logger.exception("[generate_lrc_handler] Error generating LRC")
1254
+ return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.skip(), batch_queue
1255
 
1256
 
1257
+ def update_audio_subtitles_from_lrc(lrc_text: str, audio_duration: float = None):
1258
  """
1259
  Update Audio component's subtitles based on LRC text content.
1260
 
1261
+ This function generates a VTT file from LRC text and passes the file path
1262
+ to Gradio, which renders it as a native <track src="..."> element.
1263
+ This is more stable than JS-based subtitle injection.
1264
 
1265
  Args:
1266
  lrc_text: LRC format lyrics string from lrc_display textbox
 
1267
  audio_duration: Optional audio duration for calculating last line's end time
1268
 
1269
  Returns:
1270
+ gr.update for the Audio component with subtitles file path
1271
  """
1272
+ # If LRC text is empty, clear subtitles
1273
  if not lrc_text or not lrc_text.strip():
1274
+ return gr.update(subtitles=None)
 
 
 
 
 
 
 
 
1275
 
1276
+ # Convert LRC to VTT file and get file path
1277
+ vtt_path = lrc_to_vtt_file(lrc_text, total_duration=audio_duration)
1278
 
1279
+ # Return file path for native <track> rendering
1280
+ # If conversion failed, clear subtitles
1281
+ return gr.update(subtitles=vtt_path)
 
 
1282
 
1283
 
1284
  def capture_current_params(
 
1489
 
1490
  # Extract extra_outputs from result tuple (index 46 after adding lrc_display)
1491
  # Note: index 47 is raw_codes_list which we already extracted above
1492
+ # Must check both length AND that the value is not None (intermediate yields use None as placeholder)
1493
+ extra_outputs_from_result = result[46] if len(result) > 46 and result[46] is not None else {}
1494
 
1495
  # Store current batch in queue
1496
  batch_queue = store_batch_in_queue(
 
1536
  # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
1537
  # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
1538
  # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
1539
+ #
1540
+ # IMPORTANT: Audio updates (including subtitles) were already sent in the for-loop above.
1541
+ # We must NOT send them again here, otherwise the audio component receives duplicate updates
1542
+ # which can cause subtitle flickering. Replace audio updates (indices 0-7) with gr.skip().
1543
  ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
1544
 
1545
+ # Replace audio outputs (0-7) with gr.skip() to avoid duplicate updates
1546
+ ui_result_list = list(ui_result)
1547
+ for i in range(8):
1548
+ ui_result_list[i] = gr.skip()
1549
+ ui_result = tuple(ui_result_list)
1550
+
1551
  yield ui_result + (
1552
  current_batch_index,
1553
  total_batches,
 
1735
  generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
1736
 
1737
  # Extract extra_outputs for LRC generation (index 46)
1738
+ # Must check both length AND that the value is not None (intermediate yields use None as placeholder)
1739
+ extra_outputs_from_bg = final_result[46] if len(final_result) > 46 and final_result[46] is not None else {}
1740
+
1741
+ # Extract scores from final_result (indices 12-19)
1742
+ # This is critical for auto_score to work when navigating to background-generated batches
1743
+ scores_from_bg = []
1744
+ for score_idx in range(12, 20):
1745
+ if score_idx < len(final_result):
1746
+ score_val = final_result[score_idx]
1747
+ # Handle gr.update objects - extract value if present, otherwise use empty string
1748
+ if hasattr(score_val, 'value'):
1749
+ scores_from_bg.append(score_val.value if score_val.value else "")
1750
+ elif isinstance(score_val, str):
1751
+ scores_from_bg.append(score_val)
1752
+ else:
1753
+ scores_from_bg.append("")
1754
+ else:
1755
+ scores_from_bg.append("")
1756
 
1757
  # Determine which codes to store
1758
  batch_size = params.get("batch_size_input", 2)
 
1768
  logger.info(f" - batch_size: {batch_size}")
1769
  logger.info(f" - generated_codes_single exists: {bool(generated_codes_single)}")
1770
  logger.info(f" - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
1771
+ logger.info(f" - scores_from_bg: {[bool(s) for s in scores_from_bg]}")
1772
  if isinstance(codes_to_store, list):
1773
  logger.info(f" - codes_to_store: LIST with {len(codes_to_store)} items")
1774
  for idx, code in enumerate(codes_to_store):
 
1776
  else:
1777
  logger.info(f" - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
1778
 
1779
+ # Store next batch in queue with codes, batch settings, scores, and ALL generation params
1780
  batch_queue = store_batch_in_queue(
1781
  batch_queue,
1782
  next_batch_idx,
 
1784
  generation_info,
1785
  seed_value_for_ui,
1786
  codes=codes_to_store,
1787
+ scores=scores_from_bg, # FIX: Now passing scores from background generation
1788
  allow_lm_batch=allow_lm_batch,
1789
  batch_size=int(batch_size),
1790
  generation_params=params,
 
1793
  status="completed"
1794
  )
1795
 
1796
+ # FIX: Extract auto_lrc results from extra_outputs (same as generate_with_batch_management)
1797
+ # This ensures LRC and subtitles are properly stored for batch navigation
1798
+ auto_lrc = params.get("auto_lrc", False)
1799
+ if auto_lrc and extra_outputs_from_bg:
1800
+ lrcs_from_extra = extra_outputs_from_bg.get("lrcs", [""] * 8)
1801
+ subtitles_from_extra = extra_outputs_from_bg.get("subtitles", [None] * 8)
1802
+ batch_queue[next_batch_idx]["lrcs"] = lrcs_from_extra
1803
+ batch_queue[next_batch_idx]["subtitles"] = subtitles_from_extra
1804
+ logger.info(f" - auto_lrc results stored: {[bool(l) for l in lrcs_from_extra]}")
1805
+
1806
  logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
1807
 
1808
  # Success message
 
1837
 
1838
 
1839
  def navigate_to_previous_batch(current_batch_index, batch_queue):
1840
+ """Navigate to previous batch (Result View Only - Never touches Input UI)
1841
+
1842
+ Uses two-step yield to avoid subtitle flickering:
1843
+ 1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
1844
+ 2. Sleep 50ms (let audio load)
1845
+ 3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
1846
+ """
1847
  if current_batch_index <= 0:
1848
  gr.Warning(t("messages.at_first_batch"))
1849
+ yield tuple([gr.update()] * 48) # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
1850
+ return
1851
 
1852
  # Move to previous batch
1853
  new_batch_index = current_batch_index - 1
 
1855
  # Load batch data from queue
1856
  if new_batch_index not in batch_queue:
1857
  gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
1858
+ yield tuple([gr.update()] * 48)
1859
+ return
1860
 
1861
  batch_data = batch_queue[new_batch_index]
1862
  audio_paths = batch_data.get("audio_paths", [])
1863
  generation_info_text = batch_data.get("generation_info", "")
1864
 
1865
+ # Prepare audio outputs (up to 8)
1866
  real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
 
1867
 
1868
  audio_updates = []
1869
  for idx in range(8):
1870
  if idx < len(real_audio_paths):
1871
  audio_path = real_audio_paths[idx]
1872
+ audio_updates.append(gr.update(value=audio_path))
 
 
1873
  else:
1874
+ audio_updates.append(gr.update(value=None))
1875
 
1876
  # Update batch indicator
1877
  total_batches = len(batch_queue)
 
1895
 
1896
  codes_display_updates = []
1897
  lrc_display_updates = []
1898
+ lrc_clear_updates = [] # For first yield - clear LRC
1899
  details_accordion_updates = []
1900
  for i in range(8):
1901
  if stored_allow_lm_batch and isinstance(stored_codes, list):
 
1906
  lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
1907
  score_str = score_displays[i] if i < len(score_displays) else ""
1908
 
1909
+ # Keep visible=True to ensure .change() event is properly triggered
1910
+ codes_display_updates.append(gr.update(value=code_str, visible=True))
1911
+ lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
1912
+ lrc_clear_updates.append(gr.update(value="", visible=True)) # Clear first
1913
+ details_accordion_updates.append(gr.skip()) # Don't change accordion visibility
 
 
 
 
 
1914
 
1915
+ # ============== STEP 1: Yield audio + CLEAR LRC ==============
1916
+ yield (
1917
  audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
1918
  audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
1919
  audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
 
1923
  score_displays[4], score_displays[5], score_displays[6], score_displays[7],
1924
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
1925
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
1926
+ # LRC display - CLEAR first (triggers .change() to clear subtitles)
1927
+ lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
1928
+ lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
1929
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
1930
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
1931
  gr.update(interactive=True),
1932
  )
1933
+
1934
+ # Wait for audio to load before setting subtitles
1935
+ time_module.sleep(0.05)
1936
+
1937
+ # ============== STEP 2: Yield skip audio + SET actual LRC ==============
1938
+ skip_audio = [gr.skip() for _ in range(8)]
1939
+ skip_scores = [gr.skip() for _ in range(8)]
1940
+ skip_codes = [gr.skip() for _ in range(8)]
1941
+ skip_accordions = [gr.skip() for _ in range(8)]
1942
+
1943
+ yield (
1944
+ skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
1945
+ skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
1946
+ gr.skip(), gr.skip(), gr.skip(), gr.skip(), # audio_paths, generation_info, batch_index, indicator
1947
+ gr.skip(), gr.skip(), # prev/next buttons
1948
+ gr.skip(), # status
1949
+ skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
1950
+ skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
1951
+ skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
1952
+ skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
1953
+ # LRC display - SET actual content (triggers .change() to set subtitles)
1954
+ lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
1955
+ lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
1956
+ skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
1957
+ skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
1958
+ gr.skip(), # restore button
1959
+ )
1960
 
1961
 
1962
  def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
1963
+ """Navigate to next batch (Result View Only - Never touches Input UI)
1964
+
1965
+ Uses two-step yield to avoid subtitle flickering:
1966
+ 1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
1967
+ 2. Sleep 50ms (let audio load)
1968
+ 3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
1969
+ """
1970
  if current_batch_index >= total_batches - 1:
1971
  gr.Warning(t("messages.at_last_batch"))
1972
+ yield tuple([gr.update()] * 49) # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
1973
+ return
1974
 
1975
  # Move to next batch
1976
  new_batch_index = current_batch_index + 1
 
1978
  # Load batch data from queue
1979
  if new_batch_index not in batch_queue:
1980
  gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
1981
+ yield tuple([gr.update()] * 49)
1982
+ return
1983
 
1984
  batch_data = batch_queue[new_batch_index]
1985
  audio_paths = batch_data.get("audio_paths", [])
1986
  generation_info_text = batch_data.get("generation_info", "")
1987
 
1988
+ # Prepare audio outputs (up to 8)
1989
  real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
 
1990
 
1991
  audio_updates = []
1992
  for idx in range(8):
1993
  if idx < len(real_audio_paths):
1994
  audio_path = real_audio_paths[idx]
1995
+ audio_updates.append(gr.update(value=audio_path))
 
 
1996
  else:
1997
+ audio_updates.append(gr.update(value=None))
1998
 
1999
  # Update batch indicator
2000
  batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
 
2023
 
2024
  codes_display_updates = []
2025
  lrc_display_updates = []
2026
+ lrc_clear_updates = [] # For first yield - clear LRC
2027
  details_accordion_updates = []
2028
  for i in range(8):
2029
  if stored_allow_lm_batch and isinstance(stored_codes, list):
 
2032
  code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
2033
 
2034
  lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
 
2035
 
2036
+ # Keep visible=True to ensure .change() event is properly triggered
2037
+ codes_display_updates.append(gr.update(value=code_str, visible=True))
2038
+ lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
2039
+ lrc_clear_updates.append(gr.update(value="", visible=True)) # Clear first
2040
+ details_accordion_updates.append(gr.skip()) # Don't change accordion visibility
 
 
 
 
 
2041
 
2042
+ # ============== STEP 1: Yield audio + CLEAR LRC ==============
2043
+ yield (
2044
  audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
2045
  audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
2046
  audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
 
2050
  score_displays[4], score_displays[5], score_displays[6], score_displays[7],
2051
  codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
2052
  codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
2053
+ # LRC display - CLEAR first (triggers .change() to clear subtitles)
2054
+ lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
2055
+ lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
2056
  details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
2057
  details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
2058
  gr.update(interactive=True),
2059
  )
2060
+
2061
+ # Wait for audio to load before setting subtitles
2062
+ time_module.sleep(0.05)
2063
+
2064
+ # ============== STEP 2: Yield skip audio + SET actual LRC ==============
2065
+ skip_audio = [gr.skip() for _ in range(8)]
2066
+ skip_scores = [gr.skip() for _ in range(8)]
2067
+ skip_codes = [gr.skip() for _ in range(8)]
2068
+ skip_accordions = [gr.skip() for _ in range(8)]
2069
+
2070
+ yield (
2071
+ skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
2072
+ skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
2073
+ gr.skip(), gr.skip(), gr.skip(), gr.skip(), # audio_paths, generation_info, batch_index, indicator
2074
+ gr.skip(), gr.skip(), # prev/next buttons
2075
+ gr.skip(), gr.skip(), # status, next_batch_status
2076
+ skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
2077
+ skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
2078
+ skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
2079
+ skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
2080
+ # LRC display - SET actual content (triggers .change() to set subtitles)
2081
+ lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
2082
+ lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
2083
+ skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
2084
+ skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
2085
+ gr.skip(), # restore button
2086
+ )
2087
 
2088
 
2089
  def restore_batch_parameters(current_batch_index, batch_queue):
acestep/gradio_ui/interfaces/result.py CHANGED
@@ -56,27 +56,30 @@ def create_results_section(dit_handler) -> dict:
56
  size="sm",
57
  scale=1
58
  )
59
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_1:
60
  codes_display_1 = gr.Textbox(
61
  label=t("results.codes_label", n=1),
62
  interactive=False,
63
  buttons=["copy"],
 
64
  max_lines=4,
65
- visible=False
66
  )
67
  score_display_1 = gr.Textbox(
68
  label=t("results.quality_score_label", n=1),
69
  interactive=False,
70
  buttons=["copy"],
 
71
  max_lines=6,
72
- visible=False
73
  )
74
  lrc_display_1 = gr.Textbox(
75
  label=t("results.lrc_label", n=1),
76
  interactive=True,
77
  buttons=["copy"],
 
78
  max_lines=8,
79
- visible=False
80
  )
81
  with gr.Column(visible=True) as audio_col_2:
82
  generated_audio_2 = gr.Audio(
@@ -110,27 +113,30 @@ def create_results_section(dit_handler) -> dict:
110
  size="sm",
111
  scale=1
112
  )
113
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_2:
114
  codes_display_2 = gr.Textbox(
115
  label=t("results.codes_label", n=2),
116
  interactive=False,
117
  buttons=["copy"],
 
118
  max_lines=4,
119
- visible=False
120
  )
121
  score_display_2 = gr.Textbox(
122
  label=t("results.quality_score_label", n=2),
123
  interactive=False,
124
  buttons=["copy"],
 
125
  max_lines=6,
126
- visible=False
127
  )
128
  lrc_display_2 = gr.Textbox(
129
  label=t("results.lrc_label", n=2),
130
  interactive=True,
131
  buttons=["copy"],
 
132
  max_lines=8,
133
- visible=False
134
  )
135
  with gr.Column(visible=False) as audio_col_3:
136
  generated_audio_3 = gr.Audio(
@@ -164,27 +170,30 @@ def create_results_section(dit_handler) -> dict:
164
  size="sm",
165
  scale=1
166
  )
167
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_3:
168
  codes_display_3 = gr.Textbox(
169
  label=t("results.codes_label", n=3),
170
  interactive=False,
171
  buttons=["copy"],
 
172
  max_lines=4,
173
- visible=False
174
  )
175
  score_display_3 = gr.Textbox(
176
  label=t("results.quality_score_label", n=3),
177
  interactive=False,
178
  buttons=["copy"],
 
179
  max_lines=6,
180
- visible=False
181
  )
182
  lrc_display_3 = gr.Textbox(
183
  label=t("results.lrc_label", n=3),
184
  interactive=True,
185
  buttons=["copy"],
 
186
  max_lines=8,
187
- visible=False
188
  )
189
  with gr.Column(visible=False) as audio_col_4:
190
  generated_audio_4 = gr.Audio(
@@ -218,27 +227,30 @@ def create_results_section(dit_handler) -> dict:
218
  size="sm",
219
  scale=1
220
  )
221
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_4:
222
  codes_display_4 = gr.Textbox(
223
  label=t("results.codes_label", n=4),
224
  interactive=False,
225
  buttons=["copy"],
 
226
  max_lines=4,
227
- visible=False
228
  )
229
  score_display_4 = gr.Textbox(
230
  label=t("results.quality_score_label", n=4),
231
  interactive=False,
232
  buttons=["copy"],
 
233
  max_lines=6,
234
- visible=False
235
  )
236
  lrc_display_4 = gr.Textbox(
237
  label=t("results.lrc_label", n=4),
238
  interactive=True,
239
  buttons=["copy"],
 
240
  max_lines=8,
241
- visible=False
242
  )
243
 
244
  # Second row for batch size 5-8 (initially hidden)
@@ -255,27 +267,30 @@ def create_results_section(dit_handler) -> dict:
255
  save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
256
  score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
257
  lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
258
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_5:
259
  codes_display_5 = gr.Textbox(
260
  label=t("results.codes_label", n=5),
261
  interactive=False,
262
  buttons=["copy"],
 
263
  max_lines=4,
264
- visible=False
265
  )
266
  score_display_5 = gr.Textbox(
267
  label=t("results.quality_score_label", n=5),
268
  interactive=False,
269
  buttons=["copy"],
 
270
  max_lines=6,
271
- visible=False
272
  )
273
  lrc_display_5 = gr.Textbox(
274
  label=t("results.lrc_label", n=5),
275
  interactive=True,
276
  buttons=["copy"],
 
277
  max_lines=8,
278
- visible=False
279
  )
280
  with gr.Column() as audio_col_6:
281
  generated_audio_6 = gr.Audio(
@@ -289,27 +304,30 @@ def create_results_section(dit_handler) -> dict:
289
  save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
290
  score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
291
  lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
292
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_6:
293
  codes_display_6 = gr.Textbox(
294
  label=t("results.codes_label", n=6),
295
  interactive=False,
296
  buttons=["copy"],
 
297
  max_lines=4,
298
- visible=False
299
  )
300
  score_display_6 = gr.Textbox(
301
  label=t("results.quality_score_label", n=6),
302
  interactive=False,
303
  buttons=["copy"],
 
304
  max_lines=6,
305
- visible=False
306
  )
307
  lrc_display_6 = gr.Textbox(
308
  label=t("results.lrc_label", n=6),
309
  interactive=True,
310
  buttons=["copy"],
 
311
  max_lines=8,
312
- visible=False
313
  )
314
  with gr.Column() as audio_col_7:
315
  generated_audio_7 = gr.Audio(
@@ -323,27 +341,30 @@ def create_results_section(dit_handler) -> dict:
323
  save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
324
  score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
325
  lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
326
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_7:
327
  codes_display_7 = gr.Textbox(
328
  label=t("results.codes_label", n=7),
329
  interactive=False,
330
  buttons=["copy"],
 
331
  max_lines=4,
332
- visible=False
333
  )
334
  score_display_7 = gr.Textbox(
335
  label=t("results.quality_score_label", n=7),
336
  interactive=False,
337
  buttons=["copy"],
 
338
  max_lines=6,
339
- visible=False
340
  )
341
  lrc_display_7 = gr.Textbox(
342
  label=t("results.lrc_label", n=7),
343
  interactive=True,
344
  buttons=["copy"],
 
345
  max_lines=8,
346
- visible=False
347
  )
348
  with gr.Column() as audio_col_8:
349
  generated_audio_8 = gr.Audio(
@@ -357,27 +378,30 @@ def create_results_section(dit_handler) -> dict:
357
  save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
358
  score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
359
  lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
360
- with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_8:
361
  codes_display_8 = gr.Textbox(
362
  label=t("results.codes_label", n=8),
363
  interactive=False,
364
  buttons=["copy"],
 
365
  max_lines=4,
366
- visible=False
367
  )
368
  score_display_8 = gr.Textbox(
369
  label=t("results.quality_score_label", n=8),
370
  interactive=False,
371
  buttons=["copy"],
 
372
  max_lines=6,
373
- visible=False
374
  )
375
  lrc_display_8 = gr.Textbox(
376
  label=t("results.lrc_label", n=8),
377
  interactive=True,
378
  buttons=["copy"],
 
379
  max_lines=8,
380
- visible=False
381
  )
382
 
383
  status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)
 
56
  size="sm",
57
  scale=1
58
  )
59
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_1:
60
  codes_display_1 = gr.Textbox(
61
  label=t("results.codes_label", n=1),
62
  interactive=False,
63
  buttons=["copy"],
64
+ lines=4,
65
  max_lines=4,
66
+ visible=True
67
  )
68
  score_display_1 = gr.Textbox(
69
  label=t("results.quality_score_label", n=1),
70
  interactive=False,
71
  buttons=["copy"],
72
+ lines=6,
73
  max_lines=6,
74
+ visible=True
75
  )
76
  lrc_display_1 = gr.Textbox(
77
  label=t("results.lrc_label", n=1),
78
  interactive=True,
79
  buttons=["copy"],
80
+ lines=8,
81
  max_lines=8,
82
+ visible=True
83
  )
84
  with gr.Column(visible=True) as audio_col_2:
85
  generated_audio_2 = gr.Audio(
 
113
  size="sm",
114
  scale=1
115
  )
116
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_2:
117
  codes_display_2 = gr.Textbox(
118
  label=t("results.codes_label", n=2),
119
  interactive=False,
120
  buttons=["copy"],
121
+ lines=4,
122
  max_lines=4,
123
+ visible=True
124
  )
125
  score_display_2 = gr.Textbox(
126
  label=t("results.quality_score_label", n=2),
127
  interactive=False,
128
  buttons=["copy"],
129
+ lines=6,
130
  max_lines=6,
131
+ visible=True
132
  )
133
  lrc_display_2 = gr.Textbox(
134
  label=t("results.lrc_label", n=2),
135
  interactive=True,
136
  buttons=["copy"],
137
+ lines=8,
138
  max_lines=8,
139
+ visible=True
140
  )
141
  with gr.Column(visible=False) as audio_col_3:
142
  generated_audio_3 = gr.Audio(
 
170
  size="sm",
171
  scale=1
172
  )
173
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_3:
174
  codes_display_3 = gr.Textbox(
175
  label=t("results.codes_label", n=3),
176
  interactive=False,
177
  buttons=["copy"],
178
+ lines=4,
179
  max_lines=4,
180
+ visible=True
181
  )
182
  score_display_3 = gr.Textbox(
183
  label=t("results.quality_score_label", n=3),
184
  interactive=False,
185
  buttons=["copy"],
186
+ lines=6,
187
  max_lines=6,
188
+ visible=True
189
  )
190
  lrc_display_3 = gr.Textbox(
191
  label=t("results.lrc_label", n=3),
192
  interactive=True,
193
  buttons=["copy"],
194
+ lines=8,
195
  max_lines=8,
196
+ visible=True
197
  )
198
  with gr.Column(visible=False) as audio_col_4:
199
  generated_audio_4 = gr.Audio(
 
227
  size="sm",
228
  scale=1
229
  )
230
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_4:
231
  codes_display_4 = gr.Textbox(
232
  label=t("results.codes_label", n=4),
233
  interactive=False,
234
  buttons=["copy"],
235
+ lines=4,
236
  max_lines=4,
237
+ visible=True
238
  )
239
  score_display_4 = gr.Textbox(
240
  label=t("results.quality_score_label", n=4),
241
  interactive=False,
242
  buttons=["copy"],
243
+ lines=6,
244
  max_lines=6,
245
+ visible=True
246
  )
247
  lrc_display_4 = gr.Textbox(
248
  label=t("results.lrc_label", n=4),
249
  interactive=True,
250
  buttons=["copy"],
251
+ lines=8,
252
  max_lines=8,
253
+ visible=True
254
  )
255
 
256
  # Second row for batch size 5-8 (initially hidden)
 
267
  save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
268
  score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
269
  lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
270
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_5:
271
  codes_display_5 = gr.Textbox(
272
  label=t("results.codes_label", n=5),
273
  interactive=False,
274
  buttons=["copy"],
275
+ lines=4,
276
  max_lines=4,
277
+ visible=True
278
  )
279
  score_display_5 = gr.Textbox(
280
  label=t("results.quality_score_label", n=5),
281
  interactive=False,
282
  buttons=["copy"],
283
+ lines=6,
284
  max_lines=6,
285
+ visible=True
286
  )
287
  lrc_display_5 = gr.Textbox(
288
  label=t("results.lrc_label", n=5),
289
  interactive=True,
290
  buttons=["copy"],
291
+ lines=8,
292
  max_lines=8,
293
+ visible=True
294
  )
295
  with gr.Column() as audio_col_6:
296
  generated_audio_6 = gr.Audio(
 
304
  save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
305
  score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
306
  lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
307
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_6:
308
  codes_display_6 = gr.Textbox(
309
  label=t("results.codes_label", n=6),
310
  interactive=False,
311
  buttons=["copy"],
312
+ lines=4,
313
  max_lines=4,
314
+ visible=True
315
  )
316
  score_display_6 = gr.Textbox(
317
  label=t("results.quality_score_label", n=6),
318
  interactive=False,
319
  buttons=["copy"],
320
+ lines=6,
321
  max_lines=6,
322
+ visible=True
323
  )
324
  lrc_display_6 = gr.Textbox(
325
  label=t("results.lrc_label", n=6),
326
  interactive=True,
327
  buttons=["copy"],
328
+ lines=8,
329
  max_lines=8,
330
+ visible=True
331
  )
332
  with gr.Column() as audio_col_7:
333
  generated_audio_7 = gr.Audio(
 
341
  save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
342
  score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
343
  lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
344
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_7:
345
  codes_display_7 = gr.Textbox(
346
  label=t("results.codes_label", n=7),
347
  interactive=False,
348
  buttons=["copy"],
349
+ lines=4,
350
  max_lines=4,
351
+ visible=True
352
  )
353
  score_display_7 = gr.Textbox(
354
  label=t("results.quality_score_label", n=7),
355
  interactive=False,
356
  buttons=["copy"],
357
+ lines=6,
358
  max_lines=6,
359
+ visible=True
360
  )
361
  lrc_display_7 = gr.Textbox(
362
  label=t("results.lrc_label", n=7),
363
  interactive=True,
364
  buttons=["copy"],
365
+ lines=8,
366
  max_lines=8,
367
+ visible=True
368
  )
369
  with gr.Column() as audio_col_8:
370
  generated_audio_8 = gr.Audio(
 
378
  save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
379
  score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
380
  lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
381
+ with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_8:
382
  codes_display_8 = gr.Textbox(
383
  label=t("results.codes_label", n=8),
384
  interactive=False,
385
  buttons=["copy"],
386
+ lines=4,
387
  max_lines=4,
388
+ visible=True
389
  )
390
  score_display_8 = gr.Textbox(
391
  label=t("results.quality_score_label", n=8),
392
  interactive=False,
393
  buttons=["copy"],
394
+ lines=6,
395
  max_lines=6,
396
+ visible=True
397
  )
398
  lrc_display_8 = gr.Textbox(
399
  label=t("results.lrc_label", n=8),
400
  interactive=True,
401
  buttons=["copy"],
402
+ lines=8,
403
  max_lines=8,
404
+ visible=True
405
  )
406
 
407
  status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)