Spaces:

ACE-Step
/

Ace-Step-v1.5

Running on Zero

App Files Files Community

Gong Junmin commited on Jan 14

Commit

da41c7b

unverified ·

2 Parent(s): 13537d2 7404af9

Merge pull request #3 from ace-step/fix_lrc_bugs

Browse files

Files changed (4) hide show

acestep/constants.py +2 -0
acestep/gradio_ui/events/__init__.py +16 -8
acestep/gradio_ui/events/results_handlers.py +363 -150
acestep/gradio_ui/interfaces/result.py +56 -32

acestep/constants.py CHANGED Viewed

@@ -70,6 +70,8 @@ TASK_TYPES_BASE = ["text2music", "repaint", "cover", "extract", "lego", "complet
 DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
 DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
 DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
 # Instruction templates for each task type
 # Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}

 DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
 DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
 DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
+DEFAULT_LM_INSPIRED_INSTRUCTION = "Expand the user's input into a more detailed and specific musical description:"
+DEFAULT_LM_REWRITE_INSTRUCTION = "Format the user's input into a more detailed and specific musical description:"
 # Instruction templates for each task type
 # Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}

acestep/gradio_ui/events/__init__.py CHANGED Viewed

@@ -373,7 +373,8 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
             outputs=[
                 results_section[f"lrc_display_{btn_idx}"],
                 results_section[f"details_accordion_{btn_idx}"],
-                # Audio subtitles now auto-updated via lrc_display.change()
                 results_section["batch_queue"]
             ]
         )
@@ -723,14 +724,21 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
     )
     # ========== LRC Display Change Handlers ==========
-    # When lrc_display textbox changes, update the corresponding audio component's subtitles
-    for i in range(1, 9):
-        results_section[f"lrc_display_{i}"].change(
             fn=res_h.update_audio_subtitles_from_lrc,
             inputs=[
-                results_section[f"lrc_display_{i}"],
-                results_section[f"generated_audio_{i}"],
-                generation_section["audio_duration"],
             ],
-            outputs=[results_section[f"generated_audio_{i}"]]
         )

             outputs=[
                 results_section[f"lrc_display_{btn_idx}"],
                 results_section[f"details_accordion_{btn_idx}"],
+                # NOTE: Removed generated_audio output!
+                # Audio subtitles are now updated via lrc_display.change() event.
                 results_section["batch_queue"]
             ]
         )
     )
     # ========== LRC Display Change Handlers ==========
+    # NEW APPROACH: Use lrc_display.change() to update audio subtitles
+    # This decouples audio value updates from subtitle updates, avoiding flickering.
+    #
+    # When lrc_display text changes (from generate, LRC button, or manual edit):
+    # 1. lrc_display.change() is triggered
+    # 2. update_audio_subtitles_from_lrc() parses LRC and updates audio subtitles
+    # 3. Audio value is NEVER updated here - only subtitles
+    for lrc_idx in range(1, 9):
+        results_section[f"lrc_display_{lrc_idx}"].change(
             fn=res_h.update_audio_subtitles_from_lrc,
             inputs=[
+                results_section[f"lrc_display_{lrc_idx}"],
+                # audio_duration not needed - parse_lrc_to_subtitles calculates end time from timestamps
             ],
+            outputs=[
+                results_section[f"generated_audio_{lrc_idx}"],  # Only updates subtitles, not value
+            ]
         )

acestep/gradio_ui/events/results_handlers.py CHANGED Viewed

@@ -40,9 +40,10 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
     lines = lrc_text.strip().split('\n')
     # Regex patterns for LRC timestamps
-    # Pattern 1: [MM:SS.ss] - standard LRC with start time only
     # Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
-    timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2})\]'
     parsed_lines = []
@@ -62,14 +63,17 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
             continue
         # Parse first timestamp as start time
         start_minutes, start_seconds, start_centiseconds = timestamps[0]
-        start_time = int(start_minutes) * 60 + int(start_seconds) + int(start_centiseconds) / 100.0
         # If there's a second timestamp, use it as end time
         end_time = None
         if len(timestamps) >= 2:
             end_minutes, end_seconds, end_centiseconds = timestamps[1]
-            end_time = int(end_minutes) * 60 + int(end_seconds) + int(end_centiseconds) / 100.0
         parsed_lines.append({
             'start': start_time,
@@ -101,6 +105,83 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
     return subtitles
 def _build_generation_info(
     lm_metadata: Optional[Dict[str, Any]],
     time_costs: Dict[str, float],
@@ -477,15 +558,25 @@ def generate_with_progress(
     audios = result.audios
     progress(0.99, "Converting audio to mp3...")
-    # Clear all scores, codes, and lrc displays at the start of generation
     # Note: Create independent gr.update objects (not references to the same object)
-    clear_scores = [gr.update(value="", visible=False) for _ in range(8)]
-    clear_codes = [gr.update(value="", visible=False) for _ in range(8)]
-    clear_lrcs = [gr.update(value="", visible=False) for _ in range(8)]
-    clear_accordions = [gr.update(visible=False) for _ in range(8)]
     yield (
-        # Audio outputs (keep as skip, will be updated in loop)
-        gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
         None,  # all_audio_paths (clear batch files)
         generation_info,
         "Clearing previous results...",
@@ -507,6 +598,7 @@ def generate_with_progress(
         None,  # extra_outputs placeholder
         None,  # raw_codes placeholder
     )
     for i in range(8):
         if i < len(audios):
@@ -613,9 +705,9 @@ def generate_with_progress(
                             lrc_text = lrc_result.get("lrc_text", "")
                             final_lrcs_list[i] = lrc_text
                             logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
-                            # Parse LRC to subtitles format
-                            subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(actual_duration))
-                            final_subtitles_list[i] = subtitles_data
                     else:
                         logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
                 except Exception as e:
@@ -624,53 +716,88 @@ def generate_with_progress(
                 total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
             status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
             current_audio_updates = [gr.skip() for _ in range(8)]
-            # Always set audio path first, subtitles will be applied via Audio component's subtitles parameter
             current_audio_updates[i] = audio_path
-            # Codes display updates (for results section)
-            codes_display_updates = [gr.skip() for _ in range(8)]
-            codes_display_updates[i] = gr.update(value=code_str, visible=bool(code_str))
-            # LRC display updates
-            lrc_display_updates = [gr.skip() for _ in range(8)]
-            has_lrc = bool(final_lrcs_list[i])
-            if auto_lrc and has_lrc:
-                lrc_display_updates[i] = gr.update(value=final_lrcs_list[i], visible=True)
-            # Details accordion updates (show if code OR lrc OR score exists)
             details_accordion_updates = [gr.skip() for _ in range(8)]
-            has_score = bool(score_str) and score_str != "Done!"
-            has_content = bool(code_str) or has_lrc or has_score
-            details_accordion_updates[i] = gr.update(visible=has_content)
             yield (
                 current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
                 current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
-                all_audio_paths,   # Real-time update of Batch File list
                 generation_info,
                 status_message,
                 seed_value_for_ui,
-                # Scores
                 scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
-                # Codes display in results section
                 codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
                 codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
-                # Details accordion visibility
                 details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
                 details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
-                # LRC display
-                lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
-                lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
                 lm_generated_metadata,
                 is_format_caption,
-                None,  # Placeholder for extra_outputs (only filled in final yield)
-                None,  # Placeholder for raw_codes_list (only filled in final yield)
             )
         else:
             # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
             pass
-        time_module.sleep(0.1)
     # Record audio conversion time
     audio_conversion_end_time = time_module.time()
@@ -697,26 +824,19 @@ def generate_with_progress(
         num_audios=len(result.audios),
     )
-    # Build final codes display, LRC display, and accordion visibility updates
-    final_codes_display_updates = []
-    final_lrc_display_updates = []
-    final_accordion_updates = []
-    for i in range(8):
-        code_str = final_codes_list[i]
-        lrc_text = final_lrcs_list[i]
-        score_str = final_scores_list[i]
-        has_code = bool(code_str)
-        has_lrc = bool(lrc_text)
-        has_score = bool(score_str) and score_str != "Done!"
-        # Show accordion if code OR LRC OR score exists
-        has_content = has_code or has_lrc or has_score
-        final_codes_display_updates.append(gr.update(value=code_str, visible=has_code))
-        final_lrc_display_updates.append(gr.update(value=lrc_text, visible=has_lrc))
-        final_accordion_updates.append(gr.update(visible=has_content))
     yield (
-        gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 1-4: SKIP
-        gr.skip(), gr.skip(), gr.skip(), gr.skip(), # Audio 5-8: SKIP
         all_audio_paths,
         generation_info,
         "Generation Complete",
@@ -730,8 +850,8 @@ def generate_with_progress(
         final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
         final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
         # LRC display
-        final_lrc_display_updates[0], final_lrc_display_updates[1], final_lrc_display_updates[2], final_lrc_display_updates[3],
-        final_lrc_display_updates[4], final_lrc_display_updates[5], final_lrc_display_updates[6], final_lrc_display_updates[7],
         lm_generated_metadata,
         is_format_caption,
         {
@@ -1006,10 +1126,10 @@ def calculate_score_handler_with_selection(
             batch_queue[current_batch_index]["scores"] = [""] * 8
         batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
-    # Return: score_display (content + visible), accordion visible, batch_queue
     return (
-        gr.update(value=score_display, visible=True),  # score_display with content
-        gr.update(visible=True),  # details_accordion
         batch_queue
     )
@@ -1017,11 +1137,14 @@ def calculate_score_handler_with_selection(
 def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
     """
     Generate LRC timestamps for a specific audio sample.
     This function retrieves cached generation data from batch_queue and calls
     the handler's get_lyric_timestamp method to generate LRC format lyrics.
-    Audio subtitles are automatically updated via lrc_display.change() event.
     Args:
         dit_handler: DiT handler instance with get_lyric_timestamp method
         sample_idx: Which sample to generate LRC for (1-8)
@@ -1029,21 +1152,23 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
         batch_queue: Dictionary storing all batch generation data
         vocal_language: Language code for lyrics
         inference_steps: Number of inference steps used in generation
     Returns:
         Tuple of (lrc_display_update, details_accordion_update, batch_queue)
     """
     import torch
     if current_batch_index not in batch_queue:
         return gr.skip(), gr.skip(), batch_queue
     batch_data = batch_queue[current_batch_index]
     extra_outputs = batch_data.get("extra_outputs", {})
     # Check if required data is available
     if not extra_outputs:
-        return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.update(visible=True), batch_queue
     pred_latents = extra_outputs.get("pred_latents")
     encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
@@ -1052,7 +1177,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
     lyric_token_idss = extra_outputs.get("lyric_token_idss")
     if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
-        return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.update(visible=True), batch_queue
     # Adjust sample_idx to 0-based
     sample_idx_0based = sample_idx - 1
@@ -1060,7 +1185,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
     # Check if sample exists in batch
     batch_size = pred_latents.shape[0]
     if sample_idx_0based >= batch_size:
-        return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.update(visible=True), batch_queue
     # Extract the specific sample's data
     try:
@@ -1098,72 +1223,62 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
         if result.get("success"):
             lrc_text = result.get("lrc_text", "")
             if not lrc_text:
-                return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.update(visible=True), batch_queue
             # Store LRC in batch_queue for later retrieval when switching batches
             if "lrcs" not in batch_queue[current_batch_index]:
                 batch_queue[current_batch_index]["lrcs"] = [""] * 8
             batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
-            # Parse LRC to subtitles format for storage (audio subtitles will be updated via lrc_display.change())
-            subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=float(audio_duration))
-            # Store subtitles in batch_queue for batch navigation
             if "subtitles" not in batch_queue[current_batch_index]:
                 batch_queue[current_batch_index]["subtitles"] = [None] * 8
-            batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = subtitles_data
             # Return: lrc_display, details_accordion, batch_queue
-            # Audio subtitles are automatically updated via lrc_display.change() event
             return (
                 gr.update(value=lrc_text, visible=True),
-                gr.update(visible=True),
                 batch_queue
             )
         else:
             error_msg = result.get("error", "Unknown error")
-            return gr.update(value=f"❌ {error_msg}", visible=True), gr.update(visible=True), batch_queue
     except Exception as e:
         logger.exception("[generate_lrc_handler] Error generating LRC")
-        return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.update(visible=True), batch_queue
-def update_audio_subtitles_from_lrc(lrc_text: str, audio_component_value, audio_duration: float = None):
     """
     Update Audio component's subtitles based on LRC text content.
-    This function is triggered when lrc_display textbox changes.
-    It parses the LRC text and updates the corresponding Audio component's subtitles.
     Args:
         lrc_text: LRC format lyrics string from lrc_display textbox
-        audio_component_value: Current value of the audio component (path or dict)
         audio_duration: Optional audio duration for calculating last line's end time
     Returns:
-        gr.update for the Audio component with subtitles
     """
-    # If no LRC text, skip update (don't clear subtitles to avoid flickering)
     if not lrc_text or not lrc_text.strip():
-        return gr.skip()
-    # Get audio path from component value
-    audio_path = None
-    if audio_component_value:
-        if isinstance(audio_component_value, dict):
-            audio_path = audio_component_value.get("path") or audio_component_value.get("value")
-        else:
-            audio_path = audio_component_value
-    if not audio_path:
-        return gr.skip()
-    # Parse LRC to subtitles format
-    subtitles_data = parse_lrc_to_subtitles(lrc_text, total_duration=audio_duration)
-    # Return updated audio with subtitles
-    return gr.update(value=audio_path, subtitles=subtitles_data if subtitles_data else None)
 def capture_current_params(
@@ -1374,7 +1489,8 @@ def generate_with_batch_management(
     # Extract extra_outputs from result tuple (index 46 after adding lrc_display)
     # Note: index 47 is raw_codes_list which we already extracted above
-    extra_outputs_from_result = result[46] if len(result) > 46 else {}
     # Store current batch in queue
     batch_queue = store_batch_in_queue(
@@ -1420,9 +1536,18 @@ def generate_with_batch_management(
     # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
     # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
     # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
-    # Note: Audio subtitles are already included in the intermediate yields from generate_with_progress
     ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
     yield ui_result + (
         current_batch_index,
         total_batches,
@@ -1610,7 +1735,24 @@ def generate_next_batch_background(
         generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
         # Extract extra_outputs for LRC generation (index 46)
-        extra_outputs_from_bg = final_result[46] if len(final_result) > 46 else None
         # Determine which codes to store
         batch_size = params.get("batch_size_input", 2)
@@ -1626,6 +1768,7 @@ def generate_next_batch_background(
         logger.info(f"  - batch_size: {batch_size}")
         logger.info(f"  - generated_codes_single exists: {bool(generated_codes_single)}")
         logger.info(f"  - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
         if isinstance(codes_to_store, list):
             logger.info(f"  - codes_to_store: LIST with {len(codes_to_store)} items")
             for idx, code in enumerate(codes_to_store):
@@ -1633,7 +1776,7 @@ def generate_next_batch_background(
         else:
             logger.info(f"  - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
-        # Store next batch in queue with codes, batch settings, and ALL generation params
         batch_queue = store_batch_in_queue(
             batch_queue,
             next_batch_idx,
@@ -1641,6 +1784,7 @@ def generate_next_batch_background(
             generation_info,
             seed_value_for_ui,
             codes=codes_to_store,
             allow_lm_batch=allow_lm_batch,
             batch_size=int(batch_size),
             generation_params=params,
@@ -1649,6 +1793,16 @@ def generate_next_batch_background(
             status="completed"
         )
         logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
         # Success message
@@ -1683,10 +1837,17 @@ def generate_next_batch_background(
 def navigate_to_previous_batch(current_batch_index, batch_queue):
-    """Navigate to previous batch (Result View Only - Never touches Input UI)"""
     if current_batch_index <= 0:
         gr.Warning(t("messages.at_first_batch"))
-        return [gr.update()] * 48  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
     # Move to previous batch
     new_batch_index = current_batch_index - 1
@@ -1694,25 +1855,23 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
-        return [gr.update()] * 48
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
-    # Prepare audio outputs (up to 8) with subtitles
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
-    stored_subtitles = batch_data.get("subtitles", [None] * 8)
     audio_updates = []
     for idx in range(8):
         if idx < len(real_audio_paths):
             audio_path = real_audio_paths[idx]
-            subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
-            # Use gr.update to set both value and subtitles
-            audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
         else:
-            audio_updates.append(gr.update(value=None, subtitles=None))
     # Update batch indicator
     total_batches = len(batch_queue)
@@ -1736,6 +1895,7 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
     codes_display_updates = []
     lrc_display_updates = []
     details_accordion_updates = []
     for i in range(8):
         if stored_allow_lm_batch and isinstance(stored_codes, list):
@@ -1746,18 +1906,14 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
         lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
         score_str = score_displays[i] if i < len(score_displays) else ""
-        has_code = bool(code_str) and i < batch_size
-        has_lrc = bool(lrc_str)
-        has_score = bool(score_str)
-        # Show accordion if any content exists
-        has_content = has_code or has_lrc or has_score
-        codes_display_updates.append(gr.update(value=code_str, visible=has_code))
-        lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
-        details_accordion_updates.append(gr.update(visible=has_content))
-    return (
         audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
         audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
@@ -1767,19 +1923,54 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
         codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
-        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
-        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
         details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
         details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
 def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
-    """Navigate to next batch (Result View Only - Never touches Input UI)"""
     if current_batch_index >= total_batches - 1:
         gr.Warning(t("messages.at_last_batch"))
-        return [gr.update()] * 49  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
     # Move to next batch
     new_batch_index = current_batch_index + 1
@@ -1787,25 +1978,23 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
-        return [gr.update()] * 49
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
-    # Prepare audio outputs (up to 8) with subtitles
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
-    stored_subtitles = batch_data.get("subtitles", [None] * 8)
     audio_updates = []
     for idx in range(8):
         if idx < len(real_audio_paths):
             audio_path = real_audio_paths[idx]
-            subtitles_data = stored_subtitles[idx] if idx < len(stored_subtitles) else None
-            # Use gr.update to set both value and subtitles
-            audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
         else:
-            audio_updates.append(gr.update(value=None, subtitles=None))
     # Update batch indicator
     batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
@@ -1834,6 +2023,7 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
     codes_display_updates = []
     lrc_display_updates = []
     details_accordion_updates = []
     for i in range(8):
         if stored_allow_lm_batch and isinstance(stored_codes, list):
@@ -1842,20 +2032,15 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
             code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
         lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
-        score_str = score_displays[i] if i < len(score_displays) else ""
-        has_code = bool(code_str) and i < batch_size
-        has_lrc = bool(lrc_str)
-        has_score = bool(score_str)
-        # Show accordion if any content exists
-        has_content = has_code or has_lrc or has_score
-        codes_display_updates.append(gr.update(value=code_str, visible=has_code))
-        lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
-        details_accordion_updates.append(gr.update(visible=has_content))
-    return (
         audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
         audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
@@ -1865,12 +2050,40 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
         codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
-        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
-        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
         details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
         details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
 def restore_batch_parameters(current_batch_index, batch_queue):

     lines = lrc_text.strip().split('\n')
     # Regex patterns for LRC timestamps
+    # Pattern 1: [MM:SS.ss] or [MM:SS.sss] - standard LRC with start time only
     # Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
+    # Support both 2-digit (centiseconds) and 3-digit (milliseconds) formats
+    timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2,3})\]'
     parsed_lines = []
             continue
         # Parse first timestamp as start time
+        # Handle both 2-digit (centiseconds, /100) and 3-digit (milliseconds, /1000) formats
         start_minutes, start_seconds, start_centiseconds = timestamps[0]
+        cs = int(start_centiseconds)
+        start_time = int(start_minutes) * 60 + int(start_seconds) + (cs / 100.0 if len(start_centiseconds) == 2 else cs / 1000.0)
         # If there's a second timestamp, use it as end time
         end_time = None
         if len(timestamps) >= 2:
             end_minutes, end_seconds, end_centiseconds = timestamps[1]
+            cs_end = int(end_centiseconds)
+            end_time = int(end_minutes) * 60 + int(end_seconds) + (cs_end / 100.0 if len(end_centiseconds) == 2 else cs_end / 1000.0)
         parsed_lines.append({
             'start': start_time,
     return subtitles
+def _format_vtt_timestamp(seconds: float) -> str:
+    """
+    Format seconds to VTT timestamp format: HH:MM:SS.mmm
+    Args:
+        seconds: Time in seconds
+    Returns:
+        Formatted timestamp string
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    millis = int((seconds % 1) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millis:03d}"
+def lrc_to_vtt_file(lrc_text: str, total_duration: float = None) -> Optional[str]:
+    """
+    Convert LRC text to a VTT file and return the file path.
+    This creates a WebVTT subtitle file that Gradio can use as a native
+    <track src="..."> element, which is more stable than JS-based subtitle injection.
+    VTT format example:
+    WEBVTT
+    00:00:00.000 --> 00:00:05.000
+    First subtitle line
+    00:00:05.000 --> 00:00:10.000
+    Second subtitle line
+    Args:
+        lrc_text: LRC format lyrics string
+        total_duration: Total audio duration in seconds (used for last line's end time)
+    Returns:
+        Path to the generated VTT file, or None if conversion fails
+    """
+    if not lrc_text or not lrc_text.strip():
+        return None
+    # Parse LRC to subtitles data
+    subtitles = parse_lrc_to_subtitles(lrc_text, total_duration=total_duration)
+    if not subtitles:
+        return None
+    # Build VTT content
+    vtt_lines = ["WEBVTT", ""]  # VTT header with blank line
+    for i, subtitle in enumerate(subtitles):
+        start_time = subtitle['timestamp'][0]
+        end_time = subtitle['timestamp'][1]
+        text = subtitle['text']
+        # Add cue with index (optional but helpful for debugging)
+        vtt_lines.append(str(i + 1))
+        vtt_lines.append(f"{_format_vtt_timestamp(start_time)} --> {_format_vtt_timestamp(end_time)}")
+        vtt_lines.append(text)
+        vtt_lines.append("")  # Blank line between cues
+    vtt_content = "\n".join(vtt_lines)
+    # Create temp directory and save VTT file
+    try:
+        temp_dir = tempfile.mkdtemp(prefix="acestep_vtt_")
+        vtt_path = os.path.join(temp_dir, "subtitles.vtt")
+        with open(vtt_path, "w", encoding="utf-8") as f:
+            f.write(vtt_content)
+        return vtt_path
+    except Exception as e:
+        logger.error(f"[lrc_to_vtt_file] Failed to create VTT file: {e}")
+        return None
 def _build_generation_info(
     lm_metadata: Optional[Dict[str, Any]],
     time_costs: Dict[str, float],
     audios = result.audios
     progress(0.99, "Converting audio to mp3...")
+    # Clear all scores, codes, lrc displays at the start of generation
     # Note: Create independent gr.update objects (not references to the same object)
+    #
+    # NEW APPROACH: Don't update audio subtitles directly!
+    # Clearing lrc_display will trigger lrc_display.change() which clears subtitles automatically.
+    # This decouples audio value updates from subtitle updates, avoiding flickering.
+    #
+    # IMPORTANT: Keep visible=True to ensure .change() event is properly triggered by Gradio.
+    # These should always remain visible=True so users can expand accordion anytime.
+    clear_scores = [gr.update(value="", visible=True) for _ in range(8)]
+    clear_codes = [gr.update(value="", visible=True) for _ in range(8)]
+    # Clear lrc_display with empty string - this triggers .change() to clear subtitles
+    clear_lrcs = [gr.update(value="", visible=True) for _ in range(8)]
+    clear_accordions = [gr.skip() for _ in range(8)]  # Don't change accordion visibility
+    dump_audio = [None for _ in range(8)]
     yield (
+        # Audio outputs - just skip, value will be updated in loop
+        # Subtitles will be cleared via lrc_display.change()
+        dump_audio[0], dump_audio[1], dump_audio[2], dump_audio[3], dump_audio[4], dump_audio[5], dump_audio[6], dump_audio[7],
         None,  # all_audio_paths (clear batch files)
         generation_info,
         "Clearing previous results...",
         None,  # extra_outputs placeholder
         None,  # raw_codes placeholder
     )
+    time_module.sleep(0.1)
     for i in range(8):
         if i < len(audios):
                             lrc_text = lrc_result.get("lrc_text", "")
                             final_lrcs_list[i] = lrc_text
                             logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
+                            # Convert LRC to VTT file for storage (consistent with new VTT-based approach)
+                            vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(actual_duration))
+                            final_subtitles_list[i] = vtt_path
                     else:
                         logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
                 except Exception as e:
                 total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
             status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
+            has_lrc = bool(final_lrcs_list[i])
+            has_score = bool(score_str) and score_str != "Done!"
+            has_content = bool(code_str) or has_lrc or has_score
+            # ============== STEP 1: Yield audio + CLEAR LRC ==============
+            # First, update audio and clear LRC to avoid race condition
+            # (audio needs to load before subtitles are set via .change() event)
             current_audio_updates = [gr.skip() for _ in range(8)]
             current_audio_updates[i] = audio_path
+            codes_display_updates = [gr.skip() for _ in range(8)]
+            codes_display_updates[i] = gr.update(value=code_str, visible=True)  # Keep visible=True
             details_accordion_updates = [gr.skip() for _ in range(8)]
+            # Don't change accordion visibility - keep it always expandable
+            # Clear LRC first (this triggers .change() to clear subtitles)
+            # Keep visible=True to ensure .change() event is properly triggered
+            lrc_clear_updates = [gr.skip() for _ in range(8)]
+            lrc_clear_updates[i] = gr.update(value="", visible=True)
             yield (
                 current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
                 current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
+                all_audio_paths,
                 generation_info,
                 status_message,
                 seed_value_for_ui,
                 scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
                 codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
                 codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
                 details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
                 details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
+                # LRC display - CLEAR first
+                lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
+                lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
                 lm_generated_metadata,
                 is_format_caption,
+                None,
+                None,
             )
+            # Wait for audio to load before setting subtitles
+            time_module.sleep(0.05)
+            # ============== STEP 2: Skip audio + SET actual LRC ==============
+            # Now set the actual LRC content, which triggers .change() to set subtitles
+            # This two-step approach (same as navigate_to_batch) ensures audio is loaded first
+            if has_lrc:
+                skip_audio = [gr.skip() for _ in range(8)]
+                skip_scores = [gr.skip() for _ in range(8)]
+                skip_codes = [gr.skip() for _ in range(8)]
+                skip_accordions = [gr.skip() for _ in range(8)]
+                lrc_actual_updates = [gr.skip() for _ in range(8)]
+                lrc_actual_updates[i] = gr.update(value=final_lrcs_list[i], visible=True)  # Keep visible=True
+                yield (
+                    skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
+                    skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
+                    gr.skip(),  # all_audio_paths
+                    gr.skip(),  # generation_info
+                    gr.skip(),  # status_message
+                    gr.skip(),  # seed
+                    skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
+                    skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
+                    skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
+                    skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
+                    skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
+                    skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
+                    # LRC display - SET actual content (triggers .change() to set subtitles)
+                    lrc_actual_updates[0], lrc_actual_updates[1], lrc_actual_updates[2], lrc_actual_updates[3],
+                    lrc_actual_updates[4], lrc_actual_updates[5], lrc_actual_updates[6], lrc_actual_updates[7],
+                    gr.skip(),  # lm_generated_metadata
+                    gr.skip(),  # is_format_caption
+                    None,
+                    None,
+                )
         else:
             # If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
             pass
+        time_module.sleep(0.05)
     # Record audio conversion time
     audio_conversion_end_time = time_module.time()
         num_audios=len(result.audios),
     )
+    # Build final codes display, LRC display, accordion visibility updates
+    final_codes_display_updates = [gr.skip() for _ in range(8)]
+    # final_lrc_display_updates = [gr.skip() for _ in range(8)]
+    final_accordion_updates = [gr.skip() for _ in range(8)]
+    # NEW APPROACH: Don't update audio subtitles directly in final yield!
+    # The lrc_display was already updated in the loop yields above.
+    # lrc_display.change() event will automatically update the audio subtitles.
+    # This decouples audio value updates from subtitle updates, avoiding flickering.
     yield (
+        # Audio - just skip, subtitles are updated via lrc_display.change()
+        gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
         all_audio_paths,
         generation_info,
         "Generation Complete",
         final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
         final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
         # LRC display
+        final_lrcs_list[0], final_lrcs_list[1], final_lrcs_list[2], final_lrcs_list[3],
+        final_lrcs_list[4], final_lrcs_list[5], final_lrcs_list[6], final_lrcs_list[7],
         lm_generated_metadata,
         is_format_caption,
         {
             batch_queue[current_batch_index]["scores"] = [""] * 8
         batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
+    # Return: score_display (with visible=True), accordion skip, batch_queue
     return (
+        gr.update(value=score_display, visible=True),  # score_display with content, keep visible=True
+        gr.skip(),  # details_accordion - don't change visibility
         batch_queue
     )
 def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
     """
     Generate LRC timestamps for a specific audio sample.
     This function retrieves cached generation data from batch_queue and calls
     the handler's get_lyric_timestamp method to generate LRC format lyrics.
+    NEW APPROACH: Only update lrc_display, NOT audio subtitles directly!
+    Audio subtitles will be updated via lrc_display.change() event.
+    This decouples audio value updates from subtitle updates, avoiding flickering.
     Args:
         dit_handler: DiT handler instance with get_lyric_timestamp method
         sample_idx: Which sample to generate LRC for (1-8)
         batch_queue: Dictionary storing all batch generation data
         vocal_language: Language code for lyrics
         inference_steps: Number of inference steps used in generation
     Returns:
         Tuple of (lrc_display_update, details_accordion_update, batch_queue)
+        Note: No audio_update - subtitles updated via lrc_display.change()
     """
     import torch
     if current_batch_index not in batch_queue:
         return gr.skip(), gr.skip(), batch_queue
     batch_data = batch_queue[current_batch_index]
     extra_outputs = batch_data.get("extra_outputs", {})
     # Check if required data is available
+    # Keep visible=True to ensure .change() event is properly triggered
     if not extra_outputs:
+        return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.skip(), batch_queue
     pred_latents = extra_outputs.get("pred_latents")
     encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
     lyric_token_idss = extra_outputs.get("lyric_token_idss")
     if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
+        return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.skip(), batch_queue
     # Adjust sample_idx to 0-based
     sample_idx_0based = sample_idx - 1
     # Check if sample exists in batch
     batch_size = pred_latents.shape[0]
     if sample_idx_0based >= batch_size:
+        return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.skip(), batch_queue
     # Extract the specific sample's data
     try:
         if result.get("success"):
             lrc_text = result.get("lrc_text", "")
             if not lrc_text:
+                return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.skip(), batch_queue
             # Store LRC in batch_queue for later retrieval when switching batches
             if "lrcs" not in batch_queue[current_batch_index]:
                 batch_queue[current_batch_index]["lrcs"] = [""] * 8
             batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
+            # Convert LRC to VTT file and store path for batch navigation (consistent with VTT-based approach)
+            vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(audio_duration))
             if "subtitles" not in batch_queue[current_batch_index]:
                 batch_queue[current_batch_index]["subtitles"] = [None] * 8
+            batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = vtt_path
             # Return: lrc_display, details_accordion, batch_queue
+            # NEW APPROACH: Only update lrc_display, NOT audio subtitles!
+            # Audio subtitles will be updated via lrc_display.change() event.
+            # Keep visible=True to ensure .change() event is properly triggered
             return (
                 gr.update(value=lrc_text, visible=True),
+                gr.skip(),
                 batch_queue
             )
         else:
             error_msg = result.get("error", "Unknown error")
+            return gr.update(value=f"❌ {error_msg}", visible=True), gr.skip(), batch_queue
     except Exception as e:
         logger.exception("[generate_lrc_handler] Error generating LRC")
+        return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.skip(), batch_queue
+def update_audio_subtitles_from_lrc(lrc_text: str, audio_duration: float = None):
     """
     Update Audio component's subtitles based on LRC text content.
+    This function generates a VTT file from LRC text and passes the file path
+    to Gradio, which renders it as a native <track src="..."> element.
+    This is more stable than JS-based subtitle injection.
     Args:
         lrc_text: LRC format lyrics string from lrc_display textbox
         audio_duration: Optional audio duration for calculating last line's end time
     Returns:
+        gr.update for the Audio component with subtitles file path
     """
+    # If LRC text is empty, clear subtitles
     if not lrc_text or not lrc_text.strip():
+        return gr.update(subtitles=None)
+    # Convert LRC to VTT file and get file path
+    vtt_path = lrc_to_vtt_file(lrc_text, total_duration=audio_duration)
+    # Return file path for native <track> rendering
+    # If conversion failed, clear subtitles
+    return gr.update(subtitles=vtt_path)
 def capture_current_params(
     # Extract extra_outputs from result tuple (index 46 after adding lrc_display)
     # Note: index 47 is raw_codes_list which we already extracted above
+    # Must check both length AND that the value is not None (intermediate yields use None as placeholder)
+    extra_outputs_from_result = result[46] if len(result) > 46 and result[46] is not None else {}
     # Store current batch in queue
     batch_queue = store_batch_in_queue(
     # 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
     # 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
     # 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
+    #
+    # IMPORTANT: Audio updates (including subtitles) were already sent in the for-loop above.
+    # We must NOT send them again here, otherwise the audio component receives duplicate updates
+    # which can cause subtitle flickering. Replace audio updates (indices 0-7) with gr.skip().
     ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
+    # Replace audio outputs (0-7) with gr.skip() to avoid duplicate updates
+    ui_result_list = list(ui_result)
+    for i in range(8):
+        ui_result_list[i] = gr.skip()
+    ui_result = tuple(ui_result_list)
     yield ui_result + (
         current_batch_index,
         total_batches,
         generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
         # Extract extra_outputs for LRC generation (index 46)
+        # Must check both length AND that the value is not None (intermediate yields use None as placeholder)
+        extra_outputs_from_bg = final_result[46] if len(final_result) > 46 and final_result[46] is not None else {}
+        # Extract scores from final_result (indices 12-19)
+        # This is critical for auto_score to work when navigating to background-generated batches
+        scores_from_bg = []
+        for score_idx in range(12, 20):
+            if score_idx < len(final_result):
+                score_val = final_result[score_idx]
+                # Handle gr.update objects - extract value if present, otherwise use empty string
+                if hasattr(score_val, 'value'):
+                    scores_from_bg.append(score_val.value if score_val.value else "")
+                elif isinstance(score_val, str):
+                    scores_from_bg.append(score_val)
+                else:
+                    scores_from_bg.append("")
+            else:
+                scores_from_bg.append("")
         # Determine which codes to store
         batch_size = params.get("batch_size_input", 2)
         logger.info(f"  - batch_size: {batch_size}")
         logger.info(f"  - generated_codes_single exists: {bool(generated_codes_single)}")
         logger.info(f"  - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
+        logger.info(f"  - scores_from_bg: {[bool(s) for s in scores_from_bg]}")
         if isinstance(codes_to_store, list):
             logger.info(f"  - codes_to_store: LIST with {len(codes_to_store)} items")
             for idx, code in enumerate(codes_to_store):
         else:
             logger.info(f"  - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
+        # Store next batch in queue with codes, batch settings, scores, and ALL generation params
         batch_queue = store_batch_in_queue(
             batch_queue,
             next_batch_idx,
             generation_info,
             seed_value_for_ui,
             codes=codes_to_store,
+            scores=scores_from_bg,  # FIX: Now passing scores from background generation
             allow_lm_batch=allow_lm_batch,
             batch_size=int(batch_size),
             generation_params=params,
             status="completed"
         )
+        # FIX: Extract auto_lrc results from extra_outputs (same as generate_with_batch_management)
+        # This ensures LRC and subtitles are properly stored for batch navigation
+        auto_lrc = params.get("auto_lrc", False)
+        if auto_lrc and extra_outputs_from_bg:
+            lrcs_from_extra = extra_outputs_from_bg.get("lrcs", [""] * 8)
+            subtitles_from_extra = extra_outputs_from_bg.get("subtitles", [None] * 8)
+            batch_queue[next_batch_idx]["lrcs"] = lrcs_from_extra
+            batch_queue[next_batch_idx]["subtitles"] = subtitles_from_extra
+            logger.info(f"  - auto_lrc results stored: {[bool(l) for l in lrcs_from_extra]}")
         logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
         # Success message
 def navigate_to_previous_batch(current_batch_index, batch_queue):
+    """Navigate to previous batch (Result View Only - Never touches Input UI)
+    Uses two-step yield to avoid subtitle flickering:
+    1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
+    2. Sleep 50ms (let audio load)
+    3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
+    """
     if current_batch_index <= 0:
         gr.Warning(t("messages.at_first_batch"))
+        yield tuple([gr.update()] * 48)  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
+        return
     # Move to previous batch
     new_batch_index = current_batch_index - 1
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
+        yield tuple([gr.update()] * 48)
+        return
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
+    # Prepare audio outputs (up to 8)
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
     audio_updates = []
     for idx in range(8):
         if idx < len(real_audio_paths):
             audio_path = real_audio_paths[idx]
+            audio_updates.append(gr.update(value=audio_path))
         else:
+            audio_updates.append(gr.update(value=None))
     # Update batch indicator
     total_batches = len(batch_queue)
     codes_display_updates = []
     lrc_display_updates = []
+    lrc_clear_updates = []  # For first yield - clear LRC
     details_accordion_updates = []
     for i in range(8):
         if stored_allow_lm_batch and isinstance(stored_codes, list):
         lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
         score_str = score_displays[i] if i < len(score_displays) else ""
+        # Keep visible=True to ensure .change() event is properly triggered
+        codes_display_updates.append(gr.update(value=code_str, visible=True))
+        lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
+        lrc_clear_updates.append(gr.update(value="", visible=True))  # Clear first
+        details_accordion_updates.append(gr.skip())  # Don't change accordion visibility
+    # ============== STEP 1: Yield audio + CLEAR LRC ==============
+    yield (
         audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
         audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
         codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
+        # LRC display - CLEAR first (triggers .change() to clear subtitles)
+        lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
+        lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
         details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
         details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
+    # Wait for audio to load before setting subtitles
+    time_module.sleep(0.05)
+    # ============== STEP 2: Yield skip audio + SET actual LRC ==============
+    skip_audio = [gr.skip() for _ in range(8)]
+    skip_scores = [gr.skip() for _ in range(8)]
+    skip_codes = [gr.skip() for _ in range(8)]
+    skip_accordions = [gr.skip() for _ in range(8)]
+    yield (
+        skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
+        skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
+        gr.skip(), gr.skip(), gr.skip(), gr.skip(),  # audio_paths, generation_info, batch_index, indicator
+        gr.skip(), gr.skip(),  # prev/next buttons
+        gr.skip(),  # status
+        skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
+        skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
+        skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
+        skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
+        # LRC display - SET actual content (triggers .change() to set subtitles)
+        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
+        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
+        skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
+        skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
+        gr.skip(),  # restore button
+    )
 def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
+    """Navigate to next batch (Result View Only - Never touches Input UI)
+    Uses two-step yield to avoid subtitle flickering:
+    1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
+    2. Sleep 50ms (let audio load)
+    3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
+    """
     if current_batch_index >= total_batches - 1:
         gr.Warning(t("messages.at_last_batch"))
+        yield tuple([gr.update()] * 49)  # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
+        return
     # Move to next batch
     new_batch_index = current_batch_index + 1
     # Load batch data from queue
     if new_batch_index not in batch_queue:
         gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
+        yield tuple([gr.update()] * 49)
+        return
     batch_data = batch_queue[new_batch_index]
     audio_paths = batch_data.get("audio_paths", [])
     generation_info_text = batch_data.get("generation_info", "")
+    # Prepare audio outputs (up to 8)
     real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
     audio_updates = []
     for idx in range(8):
         if idx < len(real_audio_paths):
             audio_path = real_audio_paths[idx]
+            audio_updates.append(gr.update(value=audio_path))
         else:
+            audio_updates.append(gr.update(value=None))
     # Update batch indicator
     batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
     codes_display_updates = []
     lrc_display_updates = []
+    lrc_clear_updates = []  # For first yield - clear LRC
     details_accordion_updates = []
     for i in range(8):
         if stored_allow_lm_batch and isinstance(stored_codes, list):
             code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
         lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
+        # Keep visible=True to ensure .change() event is properly triggered
+        codes_display_updates.append(gr.update(value=code_str, visible=True))
+        lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
+        lrc_clear_updates.append(gr.update(value="", visible=True))  # Clear first
+        details_accordion_updates.append(gr.skip())  # Don't change accordion visibility
+    # ============== STEP 1: Yield audio + CLEAR LRC ==============
+    yield (
         audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
         audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
         audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
         score_displays[4], score_displays[5], score_displays[6], score_displays[7],
         codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
         codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
+        # LRC display - CLEAR first (triggers .change() to clear subtitles)
+        lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
+        lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
         details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
         details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
         gr.update(interactive=True),
     )
+    # Wait for audio to load before setting subtitles
+    time_module.sleep(0.05)
+    # ============== STEP 2: Yield skip audio + SET actual LRC ==============
+    skip_audio = [gr.skip() for _ in range(8)]
+    skip_scores = [gr.skip() for _ in range(8)]
+    skip_codes = [gr.skip() for _ in range(8)]
+    skip_accordions = [gr.skip() for _ in range(8)]
+    yield (
+        skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
+        skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
+        gr.skip(), gr.skip(), gr.skip(), gr.skip(),  # audio_paths, generation_info, batch_index, indicator
+        gr.skip(), gr.skip(),  # prev/next buttons
+        gr.skip(), gr.skip(),  # status, next_batch_status
+        skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
+        skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
+        skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
+        skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
+        # LRC display - SET actual content (triggers .change() to set subtitles)
+        lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
+        lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
+        skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
+        skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
+        gr.skip(),  # restore button
+    )
 def restore_batch_parameters(current_batch_index, batch_queue):

acestep/gradio_ui/interfaces/result.py CHANGED Viewed

@@ -56,27 +56,30 @@ def create_results_section(dit_handler) -> dict:
                         size="sm",
                         scale=1
                     )
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_1:
                     codes_display_1 = gr.Textbox(
                         label=t("results.codes_label", n=1),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_1 = gr.Textbox(
                         label=t("results.lrc_label", n=1),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column(visible=True) as audio_col_2:
                 generated_audio_2 = gr.Audio(
@@ -110,27 +113,30 @@ def create_results_section(dit_handler) -> dict:
                         size="sm",
                         scale=1
                     )
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_2:
                     codes_display_2 = gr.Textbox(
                         label=t("results.codes_label", n=2),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_2 = gr.Textbox(
                         label=t("results.lrc_label", n=2),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column(visible=False) as audio_col_3:
                 generated_audio_3 = gr.Audio(
@@ -164,27 +170,30 @@ def create_results_section(dit_handler) -> dict:
                         size="sm",
                         scale=1
                     )
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_3:
                     codes_display_3 = gr.Textbox(
                         label=t("results.codes_label", n=3),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_3 = gr.Textbox(
                         label=t("results.lrc_label", n=3),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column(visible=False) as audio_col_4:
                 generated_audio_4 = gr.Audio(
@@ -218,27 +227,30 @@ def create_results_section(dit_handler) -> dict:
                         size="sm",
                         scale=1
                     )
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_4:
                     codes_display_4 = gr.Textbox(
                         label=t("results.codes_label", n=4),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_4 = gr.Textbox(
                         label=t("results.lrc_label", n=4),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
         # Second row for batch size 5-8 (initially hidden)
@@ -255,27 +267,30 @@ def create_results_section(dit_handler) -> dict:
                     save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_5:
                     codes_display_5 = gr.Textbox(
                         label=t("results.codes_label", n=5),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_5 = gr.Textbox(
                         label=t("results.lrc_label", n=5),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column() as audio_col_6:
                 generated_audio_6 = gr.Audio(
@@ -289,27 +304,30 @@ def create_results_section(dit_handler) -> dict:
                     save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_6:
                     codes_display_6 = gr.Textbox(
                         label=t("results.codes_label", n=6),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_6 = gr.Textbox(
                         label=t("results.lrc_label", n=6),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column() as audio_col_7:
                 generated_audio_7 = gr.Audio(
@@ -323,27 +341,30 @@ def create_results_section(dit_handler) -> dict:
                     save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_7:
                     codes_display_7 = gr.Textbox(
                         label=t("results.codes_label", n=7),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_7 = gr.Textbox(
                         label=t("results.lrc_label", n=7),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
             with gr.Column() as audio_col_8:
                 generated_audio_8 = gr.Audio(
@@ -357,27 +378,30 @@ def create_results_section(dit_handler) -> dict:
                     save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
-                with gr.Accordion(t("results.details_accordion"), open=False, visible=False) as details_accordion_8:
                     codes_display_8 = gr.Textbox(
                         label=t("results.codes_label", n=8),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=4,
-                        visible=False
                     )
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
                         buttons=["copy"],
                         max_lines=6,
-                        visible=False
                     )
                     lrc_display_8 = gr.Textbox(
                         label=t("results.lrc_label", n=8),
                         interactive=True,
                         buttons=["copy"],
                         max_lines=8,
-                        visible=False
                     )
         status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)

                         size="sm",
                         scale=1
                     )
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_1:
                     codes_display_1 = gr.Textbox(
                         label=t("results.codes_label", n=1),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_1 = gr.Textbox(
                         label=t("results.quality_score_label", n=1),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_1 = gr.Textbox(
                         label=t("results.lrc_label", n=1),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column(visible=True) as audio_col_2:
                 generated_audio_2 = gr.Audio(
                         size="sm",
                         scale=1
                     )
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_2:
                     codes_display_2 = gr.Textbox(
                         label=t("results.codes_label", n=2),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_2 = gr.Textbox(
                         label=t("results.quality_score_label", n=2),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_2 = gr.Textbox(
                         label=t("results.lrc_label", n=2),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column(visible=False) as audio_col_3:
                 generated_audio_3 = gr.Audio(
                         size="sm",
                         scale=1
                     )
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_3:
                     codes_display_3 = gr.Textbox(
                         label=t("results.codes_label", n=3),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_3 = gr.Textbox(
                         label=t("results.quality_score_label", n=3),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_3 = gr.Textbox(
                         label=t("results.lrc_label", n=3),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column(visible=False) as audio_col_4:
                 generated_audio_4 = gr.Audio(
                         size="sm",
                         scale=1
                     )
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_4:
                     codes_display_4 = gr.Textbox(
                         label=t("results.codes_label", n=4),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_4 = gr.Textbox(
                         label=t("results.quality_score_label", n=4),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_4 = gr.Textbox(
                         label=t("results.lrc_label", n=4),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
         # Second row for batch size 5-8 (initially hidden)
                     save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_5:
                     codes_display_5 = gr.Textbox(
                         label=t("results.codes_label", n=5),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_5 = gr.Textbox(
                         label=t("results.quality_score_label", n=5),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_5 = gr.Textbox(
                         label=t("results.lrc_label", n=5),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column() as audio_col_6:
                 generated_audio_6 = gr.Audio(
                     save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_6:
                     codes_display_6 = gr.Textbox(
                         label=t("results.codes_label", n=6),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_6 = gr.Textbox(
                         label=t("results.quality_score_label", n=6),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_6 = gr.Textbox(
                         label=t("results.lrc_label", n=6),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column() as audio_col_7:
                 generated_audio_7 = gr.Audio(
                     save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_7:
                     codes_display_7 = gr.Textbox(
                         label=t("results.codes_label", n=7),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_7 = gr.Textbox(
                         label=t("results.quality_score_label", n=7),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_7 = gr.Textbox(
                         label=t("results.lrc_label", n=7),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
             with gr.Column() as audio_col_8:
                 generated_audio_8 = gr.Audio(
                     save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
                     score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
                     lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
+                with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_8:
                     codes_display_8 = gr.Textbox(
                         label=t("results.codes_label", n=8),
                         interactive=False,
                         buttons=["copy"],
+                        lines=4,
                         max_lines=4,
+                        visible=True
                     )
                     score_display_8 = gr.Textbox(
                         label=t("results.quality_score_label", n=8),
                         interactive=False,
                         buttons=["copy"],
+                        lines=6,
                         max_lines=6,
+                        visible=True
                     )
                     lrc_display_8 = gr.Textbox(
                         label=t("results.lrc_label", n=8),
                         interactive=True,
                         buttons=["copy"],
+                        lines=8,
                         max_lines=8,
+                        visible=True
                     )
         status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)