Spaces:
Running on Zero
Running on Zero
Merge pull request #3 from ace-step/fix_lrc_bugs
Browse files
acestep/constants.py
CHANGED
|
@@ -70,6 +70,8 @@ TASK_TYPES_BASE = ["text2music", "repaint", "cover", "extract", "lego", "complet
|
|
| 70 |
DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
|
| 71 |
DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
|
| 72 |
DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# Instruction templates for each task type
|
| 75 |
# Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}
|
|
|
|
| 70 |
DEFAULT_DIT_INSTRUCTION = "Fill the audio semantic mask based on the given conditions:"
|
| 71 |
DEFAULT_LM_INSTRUCTION = "Generate audio semantic tokens based on the given conditions:"
|
| 72 |
DEFAULT_LM_UNDERSTAND_INSTRUCTION = "Understand the given musical conditions and describe the audio semantics accordingly:"
|
| 73 |
+
DEFAULT_LM_INSPIRED_INSTRUCTION = "Expand the user's input into a more detailed and specific musical description:"
|
| 74 |
+
DEFAULT_LM_REWRITE_INSTRUCTION = "Format the user's input into a more detailed and specific musical description:"
|
| 75 |
|
| 76 |
# Instruction templates for each task type
|
| 77 |
# Note: Some instructions use placeholders like {TRACK_NAME} or {TRACK_CLASSES}
|
acestep/gradio_ui/events/__init__.py
CHANGED
|
@@ -373,7 +373,8 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
|
|
| 373 |
outputs=[
|
| 374 |
results_section[f"lrc_display_{btn_idx}"],
|
| 375 |
results_section[f"details_accordion_{btn_idx}"],
|
| 376 |
-
#
|
|
|
|
| 377 |
results_section["batch_queue"]
|
| 378 |
]
|
| 379 |
)
|
|
@@ -723,14 +724,21 @@ def setup_event_handlers(demo, dit_handler, llm_handler, dataset_handler, datase
|
|
| 723 |
)
|
| 724 |
|
| 725 |
# ========== LRC Display Change Handlers ==========
|
| 726 |
-
#
|
| 727 |
-
|
| 728 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
fn=res_h.update_audio_subtitles_from_lrc,
|
| 730 |
inputs=[
|
| 731 |
-
results_section[f"lrc_display_{
|
| 732 |
-
|
| 733 |
-
generation_section["audio_duration"],
|
| 734 |
],
|
| 735 |
-
outputs=[
|
|
|
|
|
|
|
| 736 |
)
|
|
|
|
| 373 |
outputs=[
|
| 374 |
results_section[f"lrc_display_{btn_idx}"],
|
| 375 |
results_section[f"details_accordion_{btn_idx}"],
|
| 376 |
+
# NOTE: Removed generated_audio output!
|
| 377 |
+
# Audio subtitles are now updated via lrc_display.change() event.
|
| 378 |
results_section["batch_queue"]
|
| 379 |
]
|
| 380 |
)
|
|
|
|
| 724 |
)
|
| 725 |
|
| 726 |
# ========== LRC Display Change Handlers ==========
|
| 727 |
+
# NEW APPROACH: Use lrc_display.change() to update audio subtitles
|
| 728 |
+
# This decouples audio value updates from subtitle updates, avoiding flickering.
|
| 729 |
+
#
|
| 730 |
+
# When lrc_display text changes (from generate, LRC button, or manual edit):
|
| 731 |
+
# 1. lrc_display.change() is triggered
|
| 732 |
+
# 2. update_audio_subtitles_from_lrc() parses LRC and updates audio subtitles
|
| 733 |
+
# 3. Audio value is NEVER updated here - only subtitles
|
| 734 |
+
for lrc_idx in range(1, 9):
|
| 735 |
+
results_section[f"lrc_display_{lrc_idx}"].change(
|
| 736 |
fn=res_h.update_audio_subtitles_from_lrc,
|
| 737 |
inputs=[
|
| 738 |
+
results_section[f"lrc_display_{lrc_idx}"],
|
| 739 |
+
# audio_duration not needed - parse_lrc_to_subtitles calculates end time from timestamps
|
|
|
|
| 740 |
],
|
| 741 |
+
outputs=[
|
| 742 |
+
results_section[f"generated_audio_{lrc_idx}"], # Only updates subtitles, not value
|
| 743 |
+
]
|
| 744 |
)
|
acestep/gradio_ui/events/results_handlers.py
CHANGED
|
@@ -40,9 +40,10 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
|
|
| 40 |
lines = lrc_text.strip().split('\n')
|
| 41 |
|
| 42 |
# Regex patterns for LRC timestamps
|
| 43 |
-
# Pattern 1: [MM:SS.ss] - standard LRC with start time only
|
| 44 |
# Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
|
| 45 |
-
|
|
|
|
| 46 |
|
| 47 |
parsed_lines = []
|
| 48 |
|
|
@@ -62,14 +63,17 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
|
|
| 62 |
continue
|
| 63 |
|
| 64 |
# Parse first timestamp as start time
|
|
|
|
| 65 |
start_minutes, start_seconds, start_centiseconds = timestamps[0]
|
| 66 |
-
|
|
|
|
| 67 |
|
| 68 |
# If there's a second timestamp, use it as end time
|
| 69 |
end_time = None
|
| 70 |
if len(timestamps) >= 2:
|
| 71 |
end_minutes, end_seconds, end_centiseconds = timestamps[1]
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
parsed_lines.append({
|
| 75 |
'start': start_time,
|
|
@@ -101,6 +105,83 @@ def parse_lrc_to_subtitles(lrc_text: str, total_duration: Optional[float] = None
|
|
| 101 |
return subtitles
|
| 102 |
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
def _build_generation_info(
|
| 105 |
lm_metadata: Optional[Dict[str, Any]],
|
| 106 |
time_costs: Dict[str, float],
|
|
@@ -477,15 +558,25 @@ def generate_with_progress(
|
|
| 477 |
audios = result.audios
|
| 478 |
progress(0.99, "Converting audio to mp3...")
|
| 479 |
|
| 480 |
-
# Clear all scores, codes,
|
| 481 |
# Note: Create independent gr.update objects (not references to the same object)
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
yield (
|
| 487 |
-
# Audio outputs
|
| 488 |
-
|
|
|
|
| 489 |
None, # all_audio_paths (clear batch files)
|
| 490 |
generation_info,
|
| 491 |
"Clearing previous results...",
|
|
@@ -507,6 +598,7 @@ def generate_with_progress(
|
|
| 507 |
None, # extra_outputs placeholder
|
| 508 |
None, # raw_codes placeholder
|
| 509 |
)
|
|
|
|
| 510 |
|
| 511 |
for i in range(8):
|
| 512 |
if i < len(audios):
|
|
@@ -613,9 +705,9 @@ def generate_with_progress(
|
|
| 613 |
lrc_text = lrc_result.get("lrc_text", "")
|
| 614 |
final_lrcs_list[i] = lrc_text
|
| 615 |
logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
|
| 616 |
-
#
|
| 617 |
-
|
| 618 |
-
final_subtitles_list[i] =
|
| 619 |
else:
|
| 620 |
logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
|
| 621 |
except Exception as e:
|
|
@@ -624,53 +716,88 @@ def generate_with_progress(
|
|
| 624 |
total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
|
| 625 |
|
| 626 |
status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
current_audio_updates = [gr.skip() for _ in range(8)]
|
| 628 |
-
# Always set audio path first, subtitles will be applied via Audio component's subtitles parameter
|
| 629 |
current_audio_updates[i] = audio_path
|
| 630 |
-
|
| 631 |
-
# Codes display updates (for results section)
|
| 632 |
-
codes_display_updates = [gr.skip() for _ in range(8)]
|
| 633 |
-
codes_display_updates[i] = gr.update(value=code_str, visible=bool(code_str))
|
| 634 |
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
has_lrc = bool(final_lrcs_list[i])
|
| 638 |
-
if auto_lrc and has_lrc:
|
| 639 |
-
lrc_display_updates[i] = gr.update(value=final_lrcs_list[i], visible=True)
|
| 640 |
|
| 641 |
-
# Details accordion updates (show if code OR lrc OR score exists)
|
| 642 |
details_accordion_updates = [gr.skip() for _ in range(8)]
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
|
|
|
|
|
|
|
|
|
| 646 |
|
| 647 |
yield (
|
| 648 |
current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
|
| 649 |
current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
|
| 650 |
-
all_audio_paths,
|
| 651 |
generation_info,
|
| 652 |
status_message,
|
| 653 |
seed_value_for_ui,
|
| 654 |
-
# Scores
|
| 655 |
scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
|
| 656 |
-
# Codes display in results section
|
| 657 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 658 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
| 659 |
-
# Details accordion visibility
|
| 660 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 661 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 662 |
-
# LRC display
|
| 663 |
-
|
| 664 |
-
|
| 665 |
lm_generated_metadata,
|
| 666 |
is_format_caption,
|
| 667 |
-
None,
|
| 668 |
-
None,
|
| 669 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
else:
|
| 671 |
# If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
|
| 672 |
pass
|
| 673 |
-
time_module.sleep(0.
|
| 674 |
|
| 675 |
# Record audio conversion time
|
| 676 |
audio_conversion_end_time = time_module.time()
|
|
@@ -697,26 +824,19 @@ def generate_with_progress(
|
|
| 697 |
num_audios=len(result.audios),
|
| 698 |
)
|
| 699 |
|
| 700 |
-
# Build final codes display, LRC display,
|
| 701 |
-
final_codes_display_updates = []
|
| 702 |
-
final_lrc_display_updates = []
|
| 703 |
-
final_accordion_updates = []
|
| 704 |
-
for i in range(8):
|
| 705 |
-
code_str = final_codes_list[i]
|
| 706 |
-
lrc_text = final_lrcs_list[i]
|
| 707 |
-
score_str = final_scores_list[i]
|
| 708 |
-
has_code = bool(code_str)
|
| 709 |
-
has_lrc = bool(lrc_text)
|
| 710 |
-
has_score = bool(score_str) and score_str != "Done!"
|
| 711 |
-
# Show accordion if code OR LRC OR score exists
|
| 712 |
-
has_content = has_code or has_lrc or has_score
|
| 713 |
-
final_codes_display_updates.append(gr.update(value=code_str, visible=has_code))
|
| 714 |
-
final_lrc_display_updates.append(gr.update(value=lrc_text, visible=has_lrc))
|
| 715 |
-
final_accordion_updates.append(gr.update(visible=has_content))
|
| 716 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
yield (
|
| 718 |
-
|
| 719 |
-
gr.skip(), gr.skip(), gr.skip(), gr.skip(),
|
| 720 |
all_audio_paths,
|
| 721 |
generation_info,
|
| 722 |
"Generation Complete",
|
|
@@ -730,8 +850,8 @@ def generate_with_progress(
|
|
| 730 |
final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
|
| 731 |
final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
|
| 732 |
# LRC display
|
| 733 |
-
|
| 734 |
-
|
| 735 |
lm_generated_metadata,
|
| 736 |
is_format_caption,
|
| 737 |
{
|
|
@@ -1006,10 +1126,10 @@ def calculate_score_handler_with_selection(
|
|
| 1006 |
batch_queue[current_batch_index]["scores"] = [""] * 8
|
| 1007 |
batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
|
| 1008 |
|
| 1009 |
-
# Return: score_display (
|
| 1010 |
return (
|
| 1011 |
-
gr.update(value=score_display, visible=True), # score_display with content
|
| 1012 |
-
gr.
|
| 1013 |
batch_queue
|
| 1014 |
)
|
| 1015 |
|
|
@@ -1017,11 +1137,14 @@ def calculate_score_handler_with_selection(
|
|
| 1017 |
def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
|
| 1018 |
"""
|
| 1019 |
Generate LRC timestamps for a specific audio sample.
|
| 1020 |
-
|
| 1021 |
This function retrieves cached generation data from batch_queue and calls
|
| 1022 |
the handler's get_lyric_timestamp method to generate LRC format lyrics.
|
| 1023 |
-
Audio subtitles are automatically updated via lrc_display.change() event.
|
| 1024 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1025 |
Args:
|
| 1026 |
dit_handler: DiT handler instance with get_lyric_timestamp method
|
| 1027 |
sample_idx: Which sample to generate LRC for (1-8)
|
|
@@ -1029,21 +1152,23 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
|
|
| 1029 |
batch_queue: Dictionary storing all batch generation data
|
| 1030 |
vocal_language: Language code for lyrics
|
| 1031 |
inference_steps: Number of inference steps used in generation
|
| 1032 |
-
|
| 1033 |
Returns:
|
| 1034 |
Tuple of (lrc_display_update, details_accordion_update, batch_queue)
|
|
|
|
| 1035 |
"""
|
| 1036 |
import torch
|
| 1037 |
|
| 1038 |
if current_batch_index not in batch_queue:
|
| 1039 |
return gr.skip(), gr.skip(), batch_queue
|
| 1040 |
-
|
| 1041 |
batch_data = batch_queue[current_batch_index]
|
| 1042 |
extra_outputs = batch_data.get("extra_outputs", {})
|
| 1043 |
-
|
| 1044 |
# Check if required data is available
|
|
|
|
| 1045 |
if not extra_outputs:
|
| 1046 |
-
return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.
|
| 1047 |
|
| 1048 |
pred_latents = extra_outputs.get("pred_latents")
|
| 1049 |
encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
|
|
@@ -1052,7 +1177,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
|
|
| 1052 |
lyric_token_idss = extra_outputs.get("lyric_token_idss")
|
| 1053 |
|
| 1054 |
if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
|
| 1055 |
-
return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.
|
| 1056 |
|
| 1057 |
# Adjust sample_idx to 0-based
|
| 1058 |
sample_idx_0based = sample_idx - 1
|
|
@@ -1060,7 +1185,7 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
|
|
| 1060 |
# Check if sample exists in batch
|
| 1061 |
batch_size = pred_latents.shape[0]
|
| 1062 |
if sample_idx_0based >= batch_size:
|
| 1063 |
-
return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.
|
| 1064 |
|
| 1065 |
# Extract the specific sample's data
|
| 1066 |
try:
|
|
@@ -1098,72 +1223,62 @@ def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_que
|
|
| 1098 |
if result.get("success"):
|
| 1099 |
lrc_text = result.get("lrc_text", "")
|
| 1100 |
if not lrc_text:
|
| 1101 |
-
return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.
|
| 1102 |
|
| 1103 |
# Store LRC in batch_queue for later retrieval when switching batches
|
| 1104 |
if "lrcs" not in batch_queue[current_batch_index]:
|
| 1105 |
batch_queue[current_batch_index]["lrcs"] = [""] * 8
|
| 1106 |
batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
|
| 1107 |
|
| 1108 |
-
#
|
| 1109 |
-
|
| 1110 |
-
|
| 1111 |
-
# Store subtitles in batch_queue for batch navigation
|
| 1112 |
if "subtitles" not in batch_queue[current_batch_index]:
|
| 1113 |
batch_queue[current_batch_index]["subtitles"] = [None] * 8
|
| 1114 |
-
batch_queue[current_batch_index]["subtitles"][sample_idx_0based] =
|
| 1115 |
|
| 1116 |
# Return: lrc_display, details_accordion, batch_queue
|
| 1117 |
-
#
|
|
|
|
|
|
|
| 1118 |
return (
|
| 1119 |
gr.update(value=lrc_text, visible=True),
|
| 1120 |
-
gr.
|
| 1121 |
batch_queue
|
| 1122 |
)
|
| 1123 |
else:
|
| 1124 |
error_msg = result.get("error", "Unknown error")
|
| 1125 |
-
return gr.update(value=f"❌ {error_msg}", visible=True), gr.
|
| 1126 |
|
| 1127 |
except Exception as e:
|
| 1128 |
logger.exception("[generate_lrc_handler] Error generating LRC")
|
| 1129 |
-
return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.
|
| 1130 |
|
| 1131 |
|
| 1132 |
-
def update_audio_subtitles_from_lrc(lrc_text: str,
|
| 1133 |
"""
|
| 1134 |
Update Audio component's subtitles based on LRC text content.
|
| 1135 |
|
| 1136 |
-
This function
|
| 1137 |
-
|
|
|
|
| 1138 |
|
| 1139 |
Args:
|
| 1140 |
lrc_text: LRC format lyrics string from lrc_display textbox
|
| 1141 |
-
audio_component_value: Current value of the audio component (path or dict)
|
| 1142 |
audio_duration: Optional audio duration for calculating last line's end time
|
| 1143 |
|
| 1144 |
Returns:
|
| 1145 |
-
gr.update for the Audio component with subtitles
|
| 1146 |
"""
|
| 1147 |
-
# If
|
| 1148 |
if not lrc_text or not lrc_text.strip():
|
| 1149 |
-
return gr.
|
| 1150 |
-
|
| 1151 |
-
# Get audio path from component value
|
| 1152 |
-
audio_path = None
|
| 1153 |
-
if audio_component_value:
|
| 1154 |
-
if isinstance(audio_component_value, dict):
|
| 1155 |
-
audio_path = audio_component_value.get("path") or audio_component_value.get("value")
|
| 1156 |
-
else:
|
| 1157 |
-
audio_path = audio_component_value
|
| 1158 |
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
|
| 1162 |
-
#
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
# Return updated audio with subtitles
|
| 1166 |
-
return gr.update(value=audio_path, subtitles=subtitles_data if subtitles_data else None)
|
| 1167 |
|
| 1168 |
|
| 1169 |
def capture_current_params(
|
|
@@ -1374,7 +1489,8 @@ def generate_with_batch_management(
|
|
| 1374 |
|
| 1375 |
# Extract extra_outputs from result tuple (index 46 after adding lrc_display)
|
| 1376 |
# Note: index 47 is raw_codes_list which we already extracted above
|
| 1377 |
-
|
|
|
|
| 1378 |
|
| 1379 |
# Store current batch in queue
|
| 1380 |
batch_queue = store_batch_in_queue(
|
|
@@ -1420,9 +1536,18 @@ def generate_with_batch_management(
|
|
| 1420 |
# 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
|
| 1421 |
# 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
|
| 1422 |
# 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
|
| 1423 |
-
#
|
|
|
|
|
|
|
|
|
|
| 1424 |
ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
|
| 1425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1426 |
yield ui_result + (
|
| 1427 |
current_batch_index,
|
| 1428 |
total_batches,
|
|
@@ -1610,7 +1735,24 @@ def generate_next_batch_background(
|
|
| 1610 |
generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
|
| 1611 |
|
| 1612 |
# Extract extra_outputs for LRC generation (index 46)
|
| 1613 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1614 |
|
| 1615 |
# Determine which codes to store
|
| 1616 |
batch_size = params.get("batch_size_input", 2)
|
|
@@ -1626,6 +1768,7 @@ def generate_next_batch_background(
|
|
| 1626 |
logger.info(f" - batch_size: {batch_size}")
|
| 1627 |
logger.info(f" - generated_codes_single exists: {bool(generated_codes_single)}")
|
| 1628 |
logger.info(f" - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
|
|
|
|
| 1629 |
if isinstance(codes_to_store, list):
|
| 1630 |
logger.info(f" - codes_to_store: LIST with {len(codes_to_store)} items")
|
| 1631 |
for idx, code in enumerate(codes_to_store):
|
|
@@ -1633,7 +1776,7 @@ def generate_next_batch_background(
|
|
| 1633 |
else:
|
| 1634 |
logger.info(f" - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
|
| 1635 |
|
| 1636 |
-
# Store next batch in queue with codes, batch settings, and ALL generation params
|
| 1637 |
batch_queue = store_batch_in_queue(
|
| 1638 |
batch_queue,
|
| 1639 |
next_batch_idx,
|
|
@@ -1641,6 +1784,7 @@ def generate_next_batch_background(
|
|
| 1641 |
generation_info,
|
| 1642 |
seed_value_for_ui,
|
| 1643 |
codes=codes_to_store,
|
|
|
|
| 1644 |
allow_lm_batch=allow_lm_batch,
|
| 1645 |
batch_size=int(batch_size),
|
| 1646 |
generation_params=params,
|
|
@@ -1649,6 +1793,16 @@ def generate_next_batch_background(
|
|
| 1649 |
status="completed"
|
| 1650 |
)
|
| 1651 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1652 |
logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
|
| 1653 |
|
| 1654 |
# Success message
|
|
@@ -1683,10 +1837,17 @@ def generate_next_batch_background(
|
|
| 1683 |
|
| 1684 |
|
| 1685 |
def navigate_to_previous_batch(current_batch_index, batch_queue):
|
| 1686 |
-
"""Navigate to previous batch (Result View Only - Never touches Input UI)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1687 |
if current_batch_index <= 0:
|
| 1688 |
gr.Warning(t("messages.at_first_batch"))
|
| 1689 |
-
|
|
|
|
| 1690 |
|
| 1691 |
# Move to previous batch
|
| 1692 |
new_batch_index = current_batch_index - 1
|
|
@@ -1694,25 +1855,23 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
|
|
| 1694 |
# Load batch data from queue
|
| 1695 |
if new_batch_index not in batch_queue:
|
| 1696 |
gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
|
| 1697 |
-
|
|
|
|
| 1698 |
|
| 1699 |
batch_data = batch_queue[new_batch_index]
|
| 1700 |
audio_paths = batch_data.get("audio_paths", [])
|
| 1701 |
generation_info_text = batch_data.get("generation_info", "")
|
| 1702 |
|
| 1703 |
-
# Prepare audio outputs (up to 8)
|
| 1704 |
real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
|
| 1705 |
-
stored_subtitles = batch_data.get("subtitles", [None] * 8)
|
| 1706 |
|
| 1707 |
audio_updates = []
|
| 1708 |
for idx in range(8):
|
| 1709 |
if idx < len(real_audio_paths):
|
| 1710 |
audio_path = real_audio_paths[idx]
|
| 1711 |
-
|
| 1712 |
-
# Use gr.update to set both value and subtitles
|
| 1713 |
-
audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
|
| 1714 |
else:
|
| 1715 |
-
audio_updates.append(gr.update(value=None
|
| 1716 |
|
| 1717 |
# Update batch indicator
|
| 1718 |
total_batches = len(batch_queue)
|
|
@@ -1736,6 +1895,7 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
|
|
| 1736 |
|
| 1737 |
codes_display_updates = []
|
| 1738 |
lrc_display_updates = []
|
|
|
|
| 1739 |
details_accordion_updates = []
|
| 1740 |
for i in range(8):
|
| 1741 |
if stored_allow_lm_batch and isinstance(stored_codes, list):
|
|
@@ -1746,18 +1906,14 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
|
|
| 1746 |
lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
|
| 1747 |
score_str = score_displays[i] if i < len(score_displays) else ""
|
| 1748 |
|
| 1749 |
-
|
| 1750 |
-
|
| 1751 |
-
|
| 1752 |
-
|
| 1753 |
-
#
|
| 1754 |
-
has_content = has_code or has_lrc or has_score
|
| 1755 |
-
|
| 1756 |
-
codes_display_updates.append(gr.update(value=code_str, visible=has_code))
|
| 1757 |
-
lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
|
| 1758 |
-
details_accordion_updates.append(gr.update(visible=has_content))
|
| 1759 |
|
| 1760 |
-
|
|
|
|
| 1761 |
audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
|
| 1762 |
audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
|
| 1763 |
audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
|
|
@@ -1767,19 +1923,54 @@ def navigate_to_previous_batch(current_batch_index, batch_queue):
|
|
| 1767 |
score_displays[4], score_displays[5], score_displays[6], score_displays[7],
|
| 1768 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 1769 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
| 1770 |
-
|
| 1771 |
-
|
|
|
|
| 1772 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 1773 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 1774 |
gr.update(interactive=True),
|
| 1775 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1776 |
|
| 1777 |
|
| 1778 |
def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
|
| 1779 |
-
"""Navigate to next batch (Result View Only - Never touches Input UI)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1780 |
if current_batch_index >= total_batches - 1:
|
| 1781 |
gr.Warning(t("messages.at_last_batch"))
|
| 1782 |
-
|
|
|
|
| 1783 |
|
| 1784 |
# Move to next batch
|
| 1785 |
new_batch_index = current_batch_index + 1
|
|
@@ -1787,25 +1978,23 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
|
|
| 1787 |
# Load batch data from queue
|
| 1788 |
if new_batch_index not in batch_queue:
|
| 1789 |
gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
|
| 1790 |
-
|
|
|
|
| 1791 |
|
| 1792 |
batch_data = batch_queue[new_batch_index]
|
| 1793 |
audio_paths = batch_data.get("audio_paths", [])
|
| 1794 |
generation_info_text = batch_data.get("generation_info", "")
|
| 1795 |
|
| 1796 |
-
# Prepare audio outputs (up to 8)
|
| 1797 |
real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
|
| 1798 |
-
stored_subtitles = batch_data.get("subtitles", [None] * 8)
|
| 1799 |
|
| 1800 |
audio_updates = []
|
| 1801 |
for idx in range(8):
|
| 1802 |
if idx < len(real_audio_paths):
|
| 1803 |
audio_path = real_audio_paths[idx]
|
| 1804 |
-
|
| 1805 |
-
# Use gr.update to set both value and subtitles
|
| 1806 |
-
audio_updates.append(gr.update(value=audio_path, subtitles=subtitles_data))
|
| 1807 |
else:
|
| 1808 |
-
audio_updates.append(gr.update(value=None
|
| 1809 |
|
| 1810 |
# Update batch indicator
|
| 1811 |
batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
|
|
@@ -1834,6 +2023,7 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
|
|
| 1834 |
|
| 1835 |
codes_display_updates = []
|
| 1836 |
lrc_display_updates = []
|
|
|
|
| 1837 |
details_accordion_updates = []
|
| 1838 |
for i in range(8):
|
| 1839 |
if stored_allow_lm_batch and isinstance(stored_codes, list):
|
|
@@ -1842,20 +2032,15 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
|
|
| 1842 |
code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
|
| 1843 |
|
| 1844 |
lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
|
| 1845 |
-
score_str = score_displays[i] if i < len(score_displays) else ""
|
| 1846 |
|
| 1847 |
-
|
| 1848 |
-
|
| 1849 |
-
|
| 1850 |
-
|
| 1851 |
-
#
|
| 1852 |
-
has_content = has_code or has_lrc or has_score
|
| 1853 |
-
|
| 1854 |
-
codes_display_updates.append(gr.update(value=code_str, visible=has_code))
|
| 1855 |
-
lrc_display_updates.append(gr.update(value=lrc_str, visible=has_lrc))
|
| 1856 |
-
details_accordion_updates.append(gr.update(visible=has_content))
|
| 1857 |
|
| 1858 |
-
|
|
|
|
| 1859 |
audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
|
| 1860 |
audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
|
| 1861 |
audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
|
|
@@ -1865,12 +2050,40 @@ def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches,
|
|
| 1865 |
score_displays[4], score_displays[5], score_displays[6], score_displays[7],
|
| 1866 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 1867 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
| 1868 |
-
|
| 1869 |
-
|
|
|
|
| 1870 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 1871 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 1872 |
gr.update(interactive=True),
|
| 1873 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1874 |
|
| 1875 |
|
| 1876 |
def restore_batch_parameters(current_batch_index, batch_queue):
|
|
|
|
| 40 |
lines = lrc_text.strip().split('\n')
|
| 41 |
|
| 42 |
# Regex patterns for LRC timestamps
|
| 43 |
+
# Pattern 1: [MM:SS.ss] or [MM:SS.sss] - standard LRC with start time only
|
| 44 |
# Pattern 2: [MM:SS.ss][MM:SS.ss] - LRC with both start and end time
|
| 45 |
+
# Support both 2-digit (centiseconds) and 3-digit (milliseconds) formats
|
| 46 |
+
timestamp_pattern = r'\[(\d{2}):(\d{2})\.(\d{2,3})\]'
|
| 47 |
|
| 48 |
parsed_lines = []
|
| 49 |
|
|
|
|
| 63 |
continue
|
| 64 |
|
| 65 |
# Parse first timestamp as start time
|
| 66 |
+
# Handle both 2-digit (centiseconds, /100) and 3-digit (milliseconds, /1000) formats
|
| 67 |
start_minutes, start_seconds, start_centiseconds = timestamps[0]
|
| 68 |
+
cs = int(start_centiseconds)
|
| 69 |
+
start_time = int(start_minutes) * 60 + int(start_seconds) + (cs / 100.0 if len(start_centiseconds) == 2 else cs / 1000.0)
|
| 70 |
|
| 71 |
# If there's a second timestamp, use it as end time
|
| 72 |
end_time = None
|
| 73 |
if len(timestamps) >= 2:
|
| 74 |
end_minutes, end_seconds, end_centiseconds = timestamps[1]
|
| 75 |
+
cs_end = int(end_centiseconds)
|
| 76 |
+
end_time = int(end_minutes) * 60 + int(end_seconds) + (cs_end / 100.0 if len(end_centiseconds) == 2 else cs_end / 1000.0)
|
| 77 |
|
| 78 |
parsed_lines.append({
|
| 79 |
'start': start_time,
|
|
|
|
| 105 |
return subtitles
|
| 106 |
|
| 107 |
|
| 108 |
+
def _format_vtt_timestamp(seconds: float) -> str:
|
| 109 |
+
"""
|
| 110 |
+
Format seconds to VTT timestamp format: HH:MM:SS.mmm
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
seconds: Time in seconds
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
Formatted timestamp string
|
| 117 |
+
"""
|
| 118 |
+
hours = int(seconds // 3600)
|
| 119 |
+
minutes = int((seconds % 3600) // 60)
|
| 120 |
+
secs = int(seconds % 60)
|
| 121 |
+
millis = int((seconds % 1) * 1000)
|
| 122 |
+
return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millis:03d}"
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def lrc_to_vtt_file(lrc_text: str, total_duration: float = None) -> Optional[str]:
|
| 126 |
+
"""
|
| 127 |
+
Convert LRC text to a VTT file and return the file path.
|
| 128 |
+
|
| 129 |
+
This creates a WebVTT subtitle file that Gradio can use as a native
|
| 130 |
+
<track src="..."> element, which is more stable than JS-based subtitle injection.
|
| 131 |
+
|
| 132 |
+
VTT format example:
|
| 133 |
+
WEBVTT
|
| 134 |
+
|
| 135 |
+
00:00:00.000 --> 00:00:05.000
|
| 136 |
+
First subtitle line
|
| 137 |
+
|
| 138 |
+
00:00:05.000 --> 00:00:10.000
|
| 139 |
+
Second subtitle line
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
lrc_text: LRC format lyrics string
|
| 143 |
+
total_duration: Total audio duration in seconds (used for last line's end time)
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
Path to the generated VTT file, or None if conversion fails
|
| 147 |
+
"""
|
| 148 |
+
if not lrc_text or not lrc_text.strip():
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
# Parse LRC to subtitles data
|
| 152 |
+
subtitles = parse_lrc_to_subtitles(lrc_text, total_duration=total_duration)
|
| 153 |
+
|
| 154 |
+
if not subtitles:
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
# Build VTT content
|
| 158 |
+
vtt_lines = ["WEBVTT", ""] # VTT header with blank line
|
| 159 |
+
|
| 160 |
+
for i, subtitle in enumerate(subtitles):
|
| 161 |
+
start_time = subtitle['timestamp'][0]
|
| 162 |
+
end_time = subtitle['timestamp'][1]
|
| 163 |
+
text = subtitle['text']
|
| 164 |
+
|
| 165 |
+
# Add cue with index (optional but helpful for debugging)
|
| 166 |
+
vtt_lines.append(str(i + 1))
|
| 167 |
+
vtt_lines.append(f"{_format_vtt_timestamp(start_time)} --> {_format_vtt_timestamp(end_time)}")
|
| 168 |
+
vtt_lines.append(text)
|
| 169 |
+
vtt_lines.append("") # Blank line between cues
|
| 170 |
+
|
| 171 |
+
vtt_content = "\n".join(vtt_lines)
|
| 172 |
+
|
| 173 |
+
# Create temp directory and save VTT file
|
| 174 |
+
try:
|
| 175 |
+
temp_dir = tempfile.mkdtemp(prefix="acestep_vtt_")
|
| 176 |
+
vtt_path = os.path.join(temp_dir, "subtitles.vtt")
|
| 177 |
+
with open(vtt_path, "w", encoding="utf-8") as f:
|
| 178 |
+
f.write(vtt_content)
|
| 179 |
+
return vtt_path
|
| 180 |
+
except Exception as e:
|
| 181 |
+
logger.error(f"[lrc_to_vtt_file] Failed to create VTT file: {e}")
|
| 182 |
+
return None
|
| 183 |
+
|
| 184 |
+
|
| 185 |
def _build_generation_info(
|
| 186 |
lm_metadata: Optional[Dict[str, Any]],
|
| 187 |
time_costs: Dict[str, float],
|
|
|
|
| 558 |
audios = result.audios
|
| 559 |
progress(0.99, "Converting audio to mp3...")
|
| 560 |
|
| 561 |
+
# Clear all scores, codes, lrc displays at the start of generation
|
| 562 |
# Note: Create independent gr.update objects (not references to the same object)
|
| 563 |
+
#
|
| 564 |
+
# NEW APPROACH: Don't update audio subtitles directly!
|
| 565 |
+
# Clearing lrc_display will trigger lrc_display.change() which clears subtitles automatically.
|
| 566 |
+
# This decouples audio value updates from subtitle updates, avoiding flickering.
|
| 567 |
+
#
|
| 568 |
+
# IMPORTANT: Keep visible=True to ensure .change() event is properly triggered by Gradio.
|
| 569 |
+
# These should always remain visible=True so users can expand accordion anytime.
|
| 570 |
+
clear_scores = [gr.update(value="", visible=True) for _ in range(8)]
|
| 571 |
+
clear_codes = [gr.update(value="", visible=True) for _ in range(8)]
|
| 572 |
+
# Clear lrc_display with empty string - this triggers .change() to clear subtitles
|
| 573 |
+
clear_lrcs = [gr.update(value="", visible=True) for _ in range(8)]
|
| 574 |
+
clear_accordions = [gr.skip() for _ in range(8)] # Don't change accordion visibility
|
| 575 |
+
dump_audio = [None for _ in range(8)]
|
| 576 |
yield (
|
| 577 |
+
# Audio outputs - just skip, value will be updated in loop
|
| 578 |
+
# Subtitles will be cleared via lrc_display.change()
|
| 579 |
+
dump_audio[0], dump_audio[1], dump_audio[2], dump_audio[3], dump_audio[4], dump_audio[5], dump_audio[6], dump_audio[7],
|
| 580 |
None, # all_audio_paths (clear batch files)
|
| 581 |
generation_info,
|
| 582 |
"Clearing previous results...",
|
|
|
|
| 598 |
None, # extra_outputs placeholder
|
| 599 |
None, # raw_codes placeholder
|
| 600 |
)
|
| 601 |
+
time_module.sleep(0.1)
|
| 602 |
|
| 603 |
for i in range(8):
|
| 604 |
if i < len(audios):
|
|
|
|
| 705 |
lrc_text = lrc_result.get("lrc_text", "")
|
| 706 |
final_lrcs_list[i] = lrc_text
|
| 707 |
logger.info(f"[auto_lrc] LRC text length for sample {i + 1}: {len(lrc_text)}")
|
| 708 |
+
# Convert LRC to VTT file for storage (consistent with new VTT-based approach)
|
| 709 |
+
vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(actual_duration))
|
| 710 |
+
final_subtitles_list[i] = vtt_path
|
| 711 |
else:
|
| 712 |
logger.warning(f"[auto_lrc] Missing required extra_outputs for sample {i + 1}")
|
| 713 |
except Exception as e:
|
|
|
|
| 716 |
total_auto_lrc_time += (auto_lrc_end - auto_lrc_start)
|
| 717 |
|
| 718 |
status_message = f"Encoding & Ready: {i+1}/{len(audios)}"
|
| 719 |
+
has_lrc = bool(final_lrcs_list[i])
|
| 720 |
+
has_score = bool(score_str) and score_str != "Done!"
|
| 721 |
+
has_content = bool(code_str) or has_lrc or has_score
|
| 722 |
+
|
| 723 |
+
# ============== STEP 1: Yield audio + CLEAR LRC ==============
|
| 724 |
+
# First, update audio and clear LRC to avoid race condition
|
| 725 |
+
# (audio needs to load before subtitles are set via .change() event)
|
| 726 |
current_audio_updates = [gr.skip() for _ in range(8)]
|
|
|
|
| 727 |
current_audio_updates[i] = audio_path
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
|
| 729 |
+
codes_display_updates = [gr.skip() for _ in range(8)]
|
| 730 |
+
codes_display_updates[i] = gr.update(value=code_str, visible=True) # Keep visible=True
|
|
|
|
|
|
|
|
|
|
| 731 |
|
|
|
|
| 732 |
details_accordion_updates = [gr.skip() for _ in range(8)]
|
| 733 |
+
# Don't change accordion visibility - keep it always expandable
|
| 734 |
+
|
| 735 |
+
# Clear LRC first (this triggers .change() to clear subtitles)
|
| 736 |
+
# Keep visible=True to ensure .change() event is properly triggered
|
| 737 |
+
lrc_clear_updates = [gr.skip() for _ in range(8)]
|
| 738 |
+
lrc_clear_updates[i] = gr.update(value="", visible=True)
|
| 739 |
|
| 740 |
yield (
|
| 741 |
current_audio_updates[0], current_audio_updates[1], current_audio_updates[2], current_audio_updates[3],
|
| 742 |
current_audio_updates[4], current_audio_updates[5], current_audio_updates[6], current_audio_updates[7],
|
| 743 |
+
all_audio_paths,
|
| 744 |
generation_info,
|
| 745 |
status_message,
|
| 746 |
seed_value_for_ui,
|
|
|
|
| 747 |
scores_ui_updates[0], scores_ui_updates[1], scores_ui_updates[2], scores_ui_updates[3], scores_ui_updates[4], scores_ui_updates[5], scores_ui_updates[6], scores_ui_updates[7],
|
|
|
|
| 748 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 749 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
|
|
|
| 750 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 751 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 752 |
+
# LRC display - CLEAR first
|
| 753 |
+
lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
|
| 754 |
+
lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
|
| 755 |
lm_generated_metadata,
|
| 756 |
is_format_caption,
|
| 757 |
+
None,
|
| 758 |
+
None,
|
| 759 |
)
|
| 760 |
+
|
| 761 |
+
# Wait for audio to load before setting subtitles
|
| 762 |
+
time_module.sleep(0.05)
|
| 763 |
+
|
| 764 |
+
# ============== STEP 2: Skip audio + SET actual LRC ==============
|
| 765 |
+
# Now set the actual LRC content, which triggers .change() to set subtitles
|
| 766 |
+
# This two-step approach (same as navigate_to_batch) ensures audio is loaded first
|
| 767 |
+
if has_lrc:
|
| 768 |
+
skip_audio = [gr.skip() for _ in range(8)]
|
| 769 |
+
skip_scores = [gr.skip() for _ in range(8)]
|
| 770 |
+
skip_codes = [gr.skip() for _ in range(8)]
|
| 771 |
+
skip_accordions = [gr.skip() for _ in range(8)]
|
| 772 |
+
|
| 773 |
+
lrc_actual_updates = [gr.skip() for _ in range(8)]
|
| 774 |
+
lrc_actual_updates[i] = gr.update(value=final_lrcs_list[i], visible=True) # Keep visible=True
|
| 775 |
+
|
| 776 |
+
yield (
|
| 777 |
+
skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
|
| 778 |
+
skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
|
| 779 |
+
gr.skip(), # all_audio_paths
|
| 780 |
+
gr.skip(), # generation_info
|
| 781 |
+
gr.skip(), # status_message
|
| 782 |
+
gr.skip(), # seed
|
| 783 |
+
skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
|
| 784 |
+
skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
|
| 785 |
+
skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
|
| 786 |
+
skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
|
| 787 |
+
skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
|
| 788 |
+
skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
|
| 789 |
+
# LRC display - SET actual content (triggers .change() to set subtitles)
|
| 790 |
+
lrc_actual_updates[0], lrc_actual_updates[1], lrc_actual_updates[2], lrc_actual_updates[3],
|
| 791 |
+
lrc_actual_updates[4], lrc_actual_updates[5], lrc_actual_updates[6], lrc_actual_updates[7],
|
| 792 |
+
gr.skip(), # lm_generated_metadata
|
| 793 |
+
gr.skip(), # is_format_caption
|
| 794 |
+
None,
|
| 795 |
+
None,
|
| 796 |
+
)
|
| 797 |
else:
|
| 798 |
# If i exceeds the generated count (e.g., batch=2, i=2..7), do not yield
|
| 799 |
pass
|
| 800 |
+
time_module.sleep(0.05)
|
| 801 |
|
| 802 |
# Record audio conversion time
|
| 803 |
audio_conversion_end_time = time_module.time()
|
|
|
|
| 824 |
num_audios=len(result.audios),
|
| 825 |
)
|
| 826 |
|
| 827 |
+
# Build final codes display, LRC display, accordion visibility updates
|
| 828 |
+
final_codes_display_updates = [gr.skip() for _ in range(8)]
|
| 829 |
+
# final_lrc_display_updates = [gr.skip() for _ in range(8)]
|
| 830 |
+
final_accordion_updates = [gr.skip() for _ in range(8)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
|
| 832 |
+
# NEW APPROACH: Don't update audio subtitles directly in final yield!
|
| 833 |
+
# The lrc_display was already updated in the loop yields above.
|
| 834 |
+
# lrc_display.change() event will automatically update the audio subtitles.
|
| 835 |
+
# This decouples audio value updates from subtitle updates, avoiding flickering.
|
| 836 |
+
|
| 837 |
yield (
|
| 838 |
+
# Audio - just skip, subtitles are updated via lrc_display.change()
|
| 839 |
+
gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(), gr.skip(),
|
| 840 |
all_audio_paths,
|
| 841 |
generation_info,
|
| 842 |
"Generation Complete",
|
|
|
|
| 850 |
final_accordion_updates[0], final_accordion_updates[1], final_accordion_updates[2], final_accordion_updates[3],
|
| 851 |
final_accordion_updates[4], final_accordion_updates[5], final_accordion_updates[6], final_accordion_updates[7],
|
| 852 |
# LRC display
|
| 853 |
+
final_lrcs_list[0], final_lrcs_list[1], final_lrcs_list[2], final_lrcs_list[3],
|
| 854 |
+
final_lrcs_list[4], final_lrcs_list[5], final_lrcs_list[6], final_lrcs_list[7],
|
| 855 |
lm_generated_metadata,
|
| 856 |
is_format_caption,
|
| 857 |
{
|
|
|
|
| 1126 |
batch_queue[current_batch_index]["scores"] = [""] * 8
|
| 1127 |
batch_queue[current_batch_index]["scores"][sample_idx - 1] = score_display
|
| 1128 |
|
| 1129 |
+
# Return: score_display (with visible=True), accordion skip, batch_queue
|
| 1130 |
return (
|
| 1131 |
+
gr.update(value=score_display, visible=True), # score_display with content, keep visible=True
|
| 1132 |
+
gr.skip(), # details_accordion - don't change visibility
|
| 1133 |
batch_queue
|
| 1134 |
)
|
| 1135 |
|
|
|
|
| 1137 |
def generate_lrc_handler(dit_handler, sample_idx, current_batch_index, batch_queue, vocal_language, inference_steps):
|
| 1138 |
"""
|
| 1139 |
Generate LRC timestamps for a specific audio sample.
|
| 1140 |
+
|
| 1141 |
This function retrieves cached generation data from batch_queue and calls
|
| 1142 |
the handler's get_lyric_timestamp method to generate LRC format lyrics.
|
|
|
|
| 1143 |
|
| 1144 |
+
NEW APPROACH: Only update lrc_display, NOT audio subtitles directly!
|
| 1145 |
+
Audio subtitles will be updated via lrc_display.change() event.
|
| 1146 |
+
This decouples audio value updates from subtitle updates, avoiding flickering.
|
| 1147 |
+
|
| 1148 |
Args:
|
| 1149 |
dit_handler: DiT handler instance with get_lyric_timestamp method
|
| 1150 |
sample_idx: Which sample to generate LRC for (1-8)
|
|
|
|
| 1152 |
batch_queue: Dictionary storing all batch generation data
|
| 1153 |
vocal_language: Language code for lyrics
|
| 1154 |
inference_steps: Number of inference steps used in generation
|
| 1155 |
+
|
| 1156 |
Returns:
|
| 1157 |
Tuple of (lrc_display_update, details_accordion_update, batch_queue)
|
| 1158 |
+
Note: No audio_update - subtitles updated via lrc_display.change()
|
| 1159 |
"""
|
| 1160 |
import torch
|
| 1161 |
|
| 1162 |
if current_batch_index not in batch_queue:
|
| 1163 |
return gr.skip(), gr.skip(), batch_queue
|
| 1164 |
+
|
| 1165 |
batch_data = batch_queue[current_batch_index]
|
| 1166 |
extra_outputs = batch_data.get("extra_outputs", {})
|
| 1167 |
+
|
| 1168 |
# Check if required data is available
|
| 1169 |
+
# Keep visible=True to ensure .change() event is properly triggered
|
| 1170 |
if not extra_outputs:
|
| 1171 |
+
return gr.update(value=t("messages.lrc_no_extra_outputs"), visible=True), gr.skip(), batch_queue
|
| 1172 |
|
| 1173 |
pred_latents = extra_outputs.get("pred_latents")
|
| 1174 |
encoder_hidden_states = extra_outputs.get("encoder_hidden_states")
|
|
|
|
| 1177 |
lyric_token_idss = extra_outputs.get("lyric_token_idss")
|
| 1178 |
|
| 1179 |
if any(x is None for x in [pred_latents, encoder_hidden_states, encoder_attention_mask, context_latents, lyric_token_idss]):
|
| 1180 |
+
return gr.update(value=t("messages.lrc_missing_tensors"), visible=True), gr.skip(), batch_queue
|
| 1181 |
|
| 1182 |
# Adjust sample_idx to 0-based
|
| 1183 |
sample_idx_0based = sample_idx - 1
|
|
|
|
| 1185 |
# Check if sample exists in batch
|
| 1186 |
batch_size = pred_latents.shape[0]
|
| 1187 |
if sample_idx_0based >= batch_size:
|
| 1188 |
+
return gr.update(value=t("messages.lrc_sample_not_exist"), visible=True), gr.skip(), batch_queue
|
| 1189 |
|
| 1190 |
# Extract the specific sample's data
|
| 1191 |
try:
|
|
|
|
| 1223 |
if result.get("success"):
|
| 1224 |
lrc_text = result.get("lrc_text", "")
|
| 1225 |
if not lrc_text:
|
| 1226 |
+
return gr.update(value=t("messages.lrc_empty_result"), visible=True), gr.skip(), batch_queue
|
| 1227 |
|
| 1228 |
# Store LRC in batch_queue for later retrieval when switching batches
|
| 1229 |
if "lrcs" not in batch_queue[current_batch_index]:
|
| 1230 |
batch_queue[current_batch_index]["lrcs"] = [""] * 8
|
| 1231 |
batch_queue[current_batch_index]["lrcs"][sample_idx_0based] = lrc_text
|
| 1232 |
|
| 1233 |
+
# Convert LRC to VTT file and store path for batch navigation (consistent with VTT-based approach)
|
| 1234 |
+
vtt_path = lrc_to_vtt_file(lrc_text, total_duration=float(audio_duration))
|
|
|
|
|
|
|
| 1235 |
if "subtitles" not in batch_queue[current_batch_index]:
|
| 1236 |
batch_queue[current_batch_index]["subtitles"] = [None] * 8
|
| 1237 |
+
batch_queue[current_batch_index]["subtitles"][sample_idx_0based] = vtt_path
|
| 1238 |
|
| 1239 |
# Return: lrc_display, details_accordion, batch_queue
|
| 1240 |
+
# NEW APPROACH: Only update lrc_display, NOT audio subtitles!
|
| 1241 |
+
# Audio subtitles will be updated via lrc_display.change() event.
|
| 1242 |
+
# Keep visible=True to ensure .change() event is properly triggered
|
| 1243 |
return (
|
| 1244 |
gr.update(value=lrc_text, visible=True),
|
| 1245 |
+
gr.skip(),
|
| 1246 |
batch_queue
|
| 1247 |
)
|
| 1248 |
else:
|
| 1249 |
error_msg = result.get("error", "Unknown error")
|
| 1250 |
+
return gr.update(value=f"❌ {error_msg}", visible=True), gr.skip(), batch_queue
|
| 1251 |
|
| 1252 |
except Exception as e:
|
| 1253 |
logger.exception("[generate_lrc_handler] Error generating LRC")
|
| 1254 |
+
return gr.update(value=f"❌ Error: {str(e)}", visible=True), gr.skip(), batch_queue
|
| 1255 |
|
| 1256 |
|
| 1257 |
+
def update_audio_subtitles_from_lrc(lrc_text: str, audio_duration: float = None):
|
| 1258 |
"""
|
| 1259 |
Update Audio component's subtitles based on LRC text content.
|
| 1260 |
|
| 1261 |
+
This function generates a VTT file from LRC text and passes the file path
|
| 1262 |
+
to Gradio, which renders it as a native <track src="..."> element.
|
| 1263 |
+
This is more stable than JS-based subtitle injection.
|
| 1264 |
|
| 1265 |
Args:
|
| 1266 |
lrc_text: LRC format lyrics string from lrc_display textbox
|
|
|
|
| 1267 |
audio_duration: Optional audio duration for calculating last line's end time
|
| 1268 |
|
| 1269 |
Returns:
|
| 1270 |
+
gr.update for the Audio component with subtitles file path
|
| 1271 |
"""
|
| 1272 |
+
# If LRC text is empty, clear subtitles
|
| 1273 |
if not lrc_text or not lrc_text.strip():
|
| 1274 |
+
return gr.update(subtitles=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1275 |
|
| 1276 |
+
# Convert LRC to VTT file and get file path
|
| 1277 |
+
vtt_path = lrc_to_vtt_file(lrc_text, total_duration=audio_duration)
|
| 1278 |
|
| 1279 |
+
# Return file path for native <track> rendering
|
| 1280 |
+
# If conversion failed, clear subtitles
|
| 1281 |
+
return gr.update(subtitles=vtt_path)
|
|
|
|
|
|
|
| 1282 |
|
| 1283 |
|
| 1284 |
def capture_current_params(
|
|
|
|
| 1489 |
|
| 1490 |
# Extract extra_outputs from result tuple (index 46 after adding lrc_display)
|
| 1491 |
# Note: index 47 is raw_codes_list which we already extracted above
|
| 1492 |
+
# Must check both length AND that the value is not None (intermediate yields use None as placeholder)
|
| 1493 |
+
extra_outputs_from_result = result[46] if len(result) > 46 and result[46] is not None else {}
|
| 1494 |
|
| 1495 |
# Store current batch in queue
|
| 1496 |
batch_queue = store_batch_in_queue(
|
|
|
|
| 1536 |
# 0-7: audio_outputs, 8: all_audio_paths, 9: generation_info, 10: status, 11: seed
|
| 1537 |
# 12-19: scores, 20-27: codes_display, 28-35: details_accordion, 36-43: lrc_display
|
| 1538 |
# 44: lm_metadata, 45: is_format_caption, 46: extra_outputs, 47: raw_codes_list
|
| 1539 |
+
#
|
| 1540 |
+
# IMPORTANT: Audio updates (including subtitles) were already sent in the for-loop above.
|
| 1541 |
+
# We must NOT send them again here, otherwise the audio component receives duplicate updates
|
| 1542 |
+
# which can cause subtitle flickering. Replace audio updates (indices 0-7) with gr.skip().
|
| 1543 |
ui_result = result[:-2] if len(result) > 47 else (result[:-1] if len(result) > 46 else result)
|
| 1544 |
|
| 1545 |
+
# Replace audio outputs (0-7) with gr.skip() to avoid duplicate updates
|
| 1546 |
+
ui_result_list = list(ui_result)
|
| 1547 |
+
for i in range(8):
|
| 1548 |
+
ui_result_list[i] = gr.skip()
|
| 1549 |
+
ui_result = tuple(ui_result_list)
|
| 1550 |
+
|
| 1551 |
yield ui_result + (
|
| 1552 |
current_batch_index,
|
| 1553 |
total_batches,
|
|
|
|
| 1735 |
generated_codes_single = generated_codes_batch[0] if generated_codes_batch else ""
|
| 1736 |
|
| 1737 |
# Extract extra_outputs for LRC generation (index 46)
|
| 1738 |
+
# Must check both length AND that the value is not None (intermediate yields use None as placeholder)
|
| 1739 |
+
extra_outputs_from_bg = final_result[46] if len(final_result) > 46 and final_result[46] is not None else {}
|
| 1740 |
+
|
| 1741 |
+
# Extract scores from final_result (indices 12-19)
|
| 1742 |
+
# This is critical for auto_score to work when navigating to background-generated batches
|
| 1743 |
+
scores_from_bg = []
|
| 1744 |
+
for score_idx in range(12, 20):
|
| 1745 |
+
if score_idx < len(final_result):
|
| 1746 |
+
score_val = final_result[score_idx]
|
| 1747 |
+
# Handle gr.update objects - extract value if present, otherwise use empty string
|
| 1748 |
+
if hasattr(score_val, 'value'):
|
| 1749 |
+
scores_from_bg.append(score_val.value if score_val.value else "")
|
| 1750 |
+
elif isinstance(score_val, str):
|
| 1751 |
+
scores_from_bg.append(score_val)
|
| 1752 |
+
else:
|
| 1753 |
+
scores_from_bg.append("")
|
| 1754 |
+
else:
|
| 1755 |
+
scores_from_bg.append("")
|
| 1756 |
|
| 1757 |
# Determine which codes to store
|
| 1758 |
batch_size = params.get("batch_size_input", 2)
|
|
|
|
| 1768 |
logger.info(f" - batch_size: {batch_size}")
|
| 1769 |
logger.info(f" - generated_codes_single exists: {bool(generated_codes_single)}")
|
| 1770 |
logger.info(f" - extra_outputs_from_bg exists: {extra_outputs_from_bg is not None}")
|
| 1771 |
+
logger.info(f" - scores_from_bg: {[bool(s) for s in scores_from_bg]}")
|
| 1772 |
if isinstance(codes_to_store, list):
|
| 1773 |
logger.info(f" - codes_to_store: LIST with {len(codes_to_store)} items")
|
| 1774 |
for idx, code in enumerate(codes_to_store):
|
|
|
|
| 1776 |
else:
|
| 1777 |
logger.info(f" - codes_to_store: STRING with {len(codes_to_store) if codes_to_store else 0} chars")
|
| 1778 |
|
| 1779 |
+
# Store next batch in queue with codes, batch settings, scores, and ALL generation params
|
| 1780 |
batch_queue = store_batch_in_queue(
|
| 1781 |
batch_queue,
|
| 1782 |
next_batch_idx,
|
|
|
|
| 1784 |
generation_info,
|
| 1785 |
seed_value_for_ui,
|
| 1786 |
codes=codes_to_store,
|
| 1787 |
+
scores=scores_from_bg, # FIX: Now passing scores from background generation
|
| 1788 |
allow_lm_batch=allow_lm_batch,
|
| 1789 |
batch_size=int(batch_size),
|
| 1790 |
generation_params=params,
|
|
|
|
| 1793 |
status="completed"
|
| 1794 |
)
|
| 1795 |
|
| 1796 |
+
# FIX: Extract auto_lrc results from extra_outputs (same as generate_with_batch_management)
|
| 1797 |
+
# This ensures LRC and subtitles are properly stored for batch navigation
|
| 1798 |
+
auto_lrc = params.get("auto_lrc", False)
|
| 1799 |
+
if auto_lrc and extra_outputs_from_bg:
|
| 1800 |
+
lrcs_from_extra = extra_outputs_from_bg.get("lrcs", [""] * 8)
|
| 1801 |
+
subtitles_from_extra = extra_outputs_from_bg.get("subtitles", [None] * 8)
|
| 1802 |
+
batch_queue[next_batch_idx]["lrcs"] = lrcs_from_extra
|
| 1803 |
+
batch_queue[next_batch_idx]["subtitles"] = subtitles_from_extra
|
| 1804 |
+
logger.info(f" - auto_lrc results stored: {[bool(l) for l in lrcs_from_extra]}")
|
| 1805 |
+
|
| 1806 |
logger.info(f"Batch {next_batch_idx + 1} stored in queue successfully")
|
| 1807 |
|
| 1808 |
# Success message
|
|
|
|
| 1837 |
|
| 1838 |
|
| 1839 |
def navigate_to_previous_batch(current_batch_index, batch_queue):
|
| 1840 |
+
"""Navigate to previous batch (Result View Only - Never touches Input UI)
|
| 1841 |
+
|
| 1842 |
+
Uses two-step yield to avoid subtitle flickering:
|
| 1843 |
+
1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
|
| 1844 |
+
2. Sleep 50ms (let audio load)
|
| 1845 |
+
3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
|
| 1846 |
+
"""
|
| 1847 |
if current_batch_index <= 0:
|
| 1848 |
gr.Warning(t("messages.at_first_batch"))
|
| 1849 |
+
yield tuple([gr.update()] * 48) # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
|
| 1850 |
+
return
|
| 1851 |
|
| 1852 |
# Move to previous batch
|
| 1853 |
new_batch_index = current_batch_index - 1
|
|
|
|
| 1855 |
# Load batch data from queue
|
| 1856 |
if new_batch_index not in batch_queue:
|
| 1857 |
gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
|
| 1858 |
+
yield tuple([gr.update()] * 48)
|
| 1859 |
+
return
|
| 1860 |
|
| 1861 |
batch_data = batch_queue[new_batch_index]
|
| 1862 |
audio_paths = batch_data.get("audio_paths", [])
|
| 1863 |
generation_info_text = batch_data.get("generation_info", "")
|
| 1864 |
|
| 1865 |
+
# Prepare audio outputs (up to 8)
|
| 1866 |
real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
|
|
|
|
| 1867 |
|
| 1868 |
audio_updates = []
|
| 1869 |
for idx in range(8):
|
| 1870 |
if idx < len(real_audio_paths):
|
| 1871 |
audio_path = real_audio_paths[idx]
|
| 1872 |
+
audio_updates.append(gr.update(value=audio_path))
|
|
|
|
|
|
|
| 1873 |
else:
|
| 1874 |
+
audio_updates.append(gr.update(value=None))
|
| 1875 |
|
| 1876 |
# Update batch indicator
|
| 1877 |
total_batches = len(batch_queue)
|
|
|
|
| 1895 |
|
| 1896 |
codes_display_updates = []
|
| 1897 |
lrc_display_updates = []
|
| 1898 |
+
lrc_clear_updates = [] # For first yield - clear LRC
|
| 1899 |
details_accordion_updates = []
|
| 1900 |
for i in range(8):
|
| 1901 |
if stored_allow_lm_batch and isinstance(stored_codes, list):
|
|
|
|
| 1906 |
lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
|
| 1907 |
score_str = score_displays[i] if i < len(score_displays) else ""
|
| 1908 |
|
| 1909 |
+
# Keep visible=True to ensure .change() event is properly triggered
|
| 1910 |
+
codes_display_updates.append(gr.update(value=code_str, visible=True))
|
| 1911 |
+
lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
|
| 1912 |
+
lrc_clear_updates.append(gr.update(value="", visible=True)) # Clear first
|
| 1913 |
+
details_accordion_updates.append(gr.skip()) # Don't change accordion visibility
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1914 |
|
| 1915 |
+
# ============== STEP 1: Yield audio + CLEAR LRC ==============
|
| 1916 |
+
yield (
|
| 1917 |
audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
|
| 1918 |
audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
|
| 1919 |
audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
|
|
|
|
| 1923 |
score_displays[4], score_displays[5], score_displays[6], score_displays[7],
|
| 1924 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 1925 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
| 1926 |
+
# LRC display - CLEAR first (triggers .change() to clear subtitles)
|
| 1927 |
+
lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
|
| 1928 |
+
lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
|
| 1929 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 1930 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 1931 |
gr.update(interactive=True),
|
| 1932 |
)
|
| 1933 |
+
|
| 1934 |
+
# Wait for audio to load before setting subtitles
|
| 1935 |
+
time_module.sleep(0.05)
|
| 1936 |
+
|
| 1937 |
+
# ============== STEP 2: Yield skip audio + SET actual LRC ==============
|
| 1938 |
+
skip_audio = [gr.skip() for _ in range(8)]
|
| 1939 |
+
skip_scores = [gr.skip() for _ in range(8)]
|
| 1940 |
+
skip_codes = [gr.skip() for _ in range(8)]
|
| 1941 |
+
skip_accordions = [gr.skip() for _ in range(8)]
|
| 1942 |
+
|
| 1943 |
+
yield (
|
| 1944 |
+
skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
|
| 1945 |
+
skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
|
| 1946 |
+
gr.skip(), gr.skip(), gr.skip(), gr.skip(), # audio_paths, generation_info, batch_index, indicator
|
| 1947 |
+
gr.skip(), gr.skip(), # prev/next buttons
|
| 1948 |
+
gr.skip(), # status
|
| 1949 |
+
skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
|
| 1950 |
+
skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
|
| 1951 |
+
skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
|
| 1952 |
+
skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
|
| 1953 |
+
# LRC display - SET actual content (triggers .change() to set subtitles)
|
| 1954 |
+
lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
|
| 1955 |
+
lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
|
| 1956 |
+
skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
|
| 1957 |
+
skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
|
| 1958 |
+
gr.skip(), # restore button
|
| 1959 |
+
)
|
| 1960 |
|
| 1961 |
|
| 1962 |
def navigate_to_next_batch(autogen_enabled, current_batch_index, total_batches, batch_queue):
|
| 1963 |
+
"""Navigate to next batch (Result View Only - Never touches Input UI)
|
| 1964 |
+
|
| 1965 |
+
Uses two-step yield to avoid subtitle flickering:
|
| 1966 |
+
1. First yield: audio + clear LRC (triggers .change() to clear subtitles)
|
| 1967 |
+
2. Sleep 50ms (let audio load)
|
| 1968 |
+
3. Second yield: skip audio + set actual LRC (triggers .change() to set subtitles)
|
| 1969 |
+
"""
|
| 1970 |
if current_batch_index >= total_batches - 1:
|
| 1971 |
gr.Warning(t("messages.at_last_batch"))
|
| 1972 |
+
yield tuple([gr.update()] * 49) # 8 audio + 2 batch files/info + 1 index + 1 indicator + 2 btns + 1 status + 1 next_status + 8 scores + 8 codes + 8 lrc + 8 accordions + 1 restore
|
| 1973 |
+
return
|
| 1974 |
|
| 1975 |
# Move to next batch
|
| 1976 |
new_batch_index = current_batch_index + 1
|
|
|
|
| 1978 |
# Load batch data from queue
|
| 1979 |
if new_batch_index not in batch_queue:
|
| 1980 |
gr.Warning(t("messages.batch_not_found", n=new_batch_index + 1))
|
| 1981 |
+
yield tuple([gr.update()] * 49)
|
| 1982 |
+
return
|
| 1983 |
|
| 1984 |
batch_data = batch_queue[new_batch_index]
|
| 1985 |
audio_paths = batch_data.get("audio_paths", [])
|
| 1986 |
generation_info_text = batch_data.get("generation_info", "")
|
| 1987 |
|
| 1988 |
+
# Prepare audio outputs (up to 8)
|
| 1989 |
real_audio_paths = [p for p in audio_paths if not p.lower().endswith('.json')]
|
|
|
|
| 1990 |
|
| 1991 |
audio_updates = []
|
| 1992 |
for idx in range(8):
|
| 1993 |
if idx < len(real_audio_paths):
|
| 1994 |
audio_path = real_audio_paths[idx]
|
| 1995 |
+
audio_updates.append(gr.update(value=audio_path))
|
|
|
|
|
|
|
| 1996 |
else:
|
| 1997 |
+
audio_updates.append(gr.update(value=None))
|
| 1998 |
|
| 1999 |
# Update batch indicator
|
| 2000 |
batch_indicator_text = update_batch_indicator(new_batch_index, total_batches)
|
|
|
|
| 2023 |
|
| 2024 |
codes_display_updates = []
|
| 2025 |
lrc_display_updates = []
|
| 2026 |
+
lrc_clear_updates = [] # For first yield - clear LRC
|
| 2027 |
details_accordion_updates = []
|
| 2028 |
for i in range(8):
|
| 2029 |
if stored_allow_lm_batch and isinstance(stored_codes, list):
|
|
|
|
| 2032 |
code_str = stored_codes if isinstance(stored_codes, str) and i == 0 else ""
|
| 2033 |
|
| 2034 |
lrc_str = lrc_displays[i] if i < len(lrc_displays) else ""
|
|
|
|
| 2035 |
|
| 2036 |
+
# Keep visible=True to ensure .change() event is properly triggered
|
| 2037 |
+
codes_display_updates.append(gr.update(value=code_str, visible=True))
|
| 2038 |
+
lrc_display_updates.append(gr.update(value=lrc_str, visible=True))
|
| 2039 |
+
lrc_clear_updates.append(gr.update(value="", visible=True)) # Clear first
|
| 2040 |
+
details_accordion_updates.append(gr.skip()) # Don't change accordion visibility
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2041 |
|
| 2042 |
+
# ============== STEP 1: Yield audio + CLEAR LRC ==============
|
| 2043 |
+
yield (
|
| 2044 |
audio_updates[0], audio_updates[1], audio_updates[2], audio_updates[3],
|
| 2045 |
audio_updates[4], audio_updates[5], audio_updates[6], audio_updates[7],
|
| 2046 |
audio_paths, generation_info_text, new_batch_index, batch_indicator_text,
|
|
|
|
| 2050 |
score_displays[4], score_displays[5], score_displays[6], score_displays[7],
|
| 2051 |
codes_display_updates[0], codes_display_updates[1], codes_display_updates[2], codes_display_updates[3],
|
| 2052 |
codes_display_updates[4], codes_display_updates[5], codes_display_updates[6], codes_display_updates[7],
|
| 2053 |
+
# LRC display - CLEAR first (triggers .change() to clear subtitles)
|
| 2054 |
+
lrc_clear_updates[0], lrc_clear_updates[1], lrc_clear_updates[2], lrc_clear_updates[3],
|
| 2055 |
+
lrc_clear_updates[4], lrc_clear_updates[5], lrc_clear_updates[6], lrc_clear_updates[7],
|
| 2056 |
details_accordion_updates[0], details_accordion_updates[1], details_accordion_updates[2], details_accordion_updates[3],
|
| 2057 |
details_accordion_updates[4], details_accordion_updates[5], details_accordion_updates[6], details_accordion_updates[7],
|
| 2058 |
gr.update(interactive=True),
|
| 2059 |
)
|
| 2060 |
+
|
| 2061 |
+
# Wait for audio to load before setting subtitles
|
| 2062 |
+
time_module.sleep(0.05)
|
| 2063 |
+
|
| 2064 |
+
# ============== STEP 2: Yield skip audio + SET actual LRC ==============
|
| 2065 |
+
skip_audio = [gr.skip() for _ in range(8)]
|
| 2066 |
+
skip_scores = [gr.skip() for _ in range(8)]
|
| 2067 |
+
skip_codes = [gr.skip() for _ in range(8)]
|
| 2068 |
+
skip_accordions = [gr.skip() for _ in range(8)]
|
| 2069 |
+
|
| 2070 |
+
yield (
|
| 2071 |
+
skip_audio[0], skip_audio[1], skip_audio[2], skip_audio[3],
|
| 2072 |
+
skip_audio[4], skip_audio[5], skip_audio[6], skip_audio[7],
|
| 2073 |
+
gr.skip(), gr.skip(), gr.skip(), gr.skip(), # audio_paths, generation_info, batch_index, indicator
|
| 2074 |
+
gr.skip(), gr.skip(), # prev/next buttons
|
| 2075 |
+
gr.skip(), gr.skip(), # status, next_batch_status
|
| 2076 |
+
skip_scores[0], skip_scores[1], skip_scores[2], skip_scores[3],
|
| 2077 |
+
skip_scores[4], skip_scores[5], skip_scores[6], skip_scores[7],
|
| 2078 |
+
skip_codes[0], skip_codes[1], skip_codes[2], skip_codes[3],
|
| 2079 |
+
skip_codes[4], skip_codes[5], skip_codes[6], skip_codes[7],
|
| 2080 |
+
# LRC display - SET actual content (triggers .change() to set subtitles)
|
| 2081 |
+
lrc_display_updates[0], lrc_display_updates[1], lrc_display_updates[2], lrc_display_updates[3],
|
| 2082 |
+
lrc_display_updates[4], lrc_display_updates[5], lrc_display_updates[6], lrc_display_updates[7],
|
| 2083 |
+
skip_accordions[0], skip_accordions[1], skip_accordions[2], skip_accordions[3],
|
| 2084 |
+
skip_accordions[4], skip_accordions[5], skip_accordions[6], skip_accordions[7],
|
| 2085 |
+
gr.skip(), # restore button
|
| 2086 |
+
)
|
| 2087 |
|
| 2088 |
|
| 2089 |
def restore_batch_parameters(current_batch_index, batch_queue):
|
acestep/gradio_ui/interfaces/result.py
CHANGED
|
@@ -56,27 +56,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 56 |
size="sm",
|
| 57 |
scale=1
|
| 58 |
)
|
| 59 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 60 |
codes_display_1 = gr.Textbox(
|
| 61 |
label=t("results.codes_label", n=1),
|
| 62 |
interactive=False,
|
| 63 |
buttons=["copy"],
|
|
|
|
| 64 |
max_lines=4,
|
| 65 |
-
visible=
|
| 66 |
)
|
| 67 |
score_display_1 = gr.Textbox(
|
| 68 |
label=t("results.quality_score_label", n=1),
|
| 69 |
interactive=False,
|
| 70 |
buttons=["copy"],
|
|
|
|
| 71 |
max_lines=6,
|
| 72 |
-
visible=
|
| 73 |
)
|
| 74 |
lrc_display_1 = gr.Textbox(
|
| 75 |
label=t("results.lrc_label", n=1),
|
| 76 |
interactive=True,
|
| 77 |
buttons=["copy"],
|
|
|
|
| 78 |
max_lines=8,
|
| 79 |
-
visible=
|
| 80 |
)
|
| 81 |
with gr.Column(visible=True) as audio_col_2:
|
| 82 |
generated_audio_2 = gr.Audio(
|
|
@@ -110,27 +113,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 110 |
size="sm",
|
| 111 |
scale=1
|
| 112 |
)
|
| 113 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 114 |
codes_display_2 = gr.Textbox(
|
| 115 |
label=t("results.codes_label", n=2),
|
| 116 |
interactive=False,
|
| 117 |
buttons=["copy"],
|
|
|
|
| 118 |
max_lines=4,
|
| 119 |
-
visible=
|
| 120 |
)
|
| 121 |
score_display_2 = gr.Textbox(
|
| 122 |
label=t("results.quality_score_label", n=2),
|
| 123 |
interactive=False,
|
| 124 |
buttons=["copy"],
|
|
|
|
| 125 |
max_lines=6,
|
| 126 |
-
visible=
|
| 127 |
)
|
| 128 |
lrc_display_2 = gr.Textbox(
|
| 129 |
label=t("results.lrc_label", n=2),
|
| 130 |
interactive=True,
|
| 131 |
buttons=["copy"],
|
|
|
|
| 132 |
max_lines=8,
|
| 133 |
-
visible=
|
| 134 |
)
|
| 135 |
with gr.Column(visible=False) as audio_col_3:
|
| 136 |
generated_audio_3 = gr.Audio(
|
|
@@ -164,27 +170,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 164 |
size="sm",
|
| 165 |
scale=1
|
| 166 |
)
|
| 167 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 168 |
codes_display_3 = gr.Textbox(
|
| 169 |
label=t("results.codes_label", n=3),
|
| 170 |
interactive=False,
|
| 171 |
buttons=["copy"],
|
|
|
|
| 172 |
max_lines=4,
|
| 173 |
-
visible=
|
| 174 |
)
|
| 175 |
score_display_3 = gr.Textbox(
|
| 176 |
label=t("results.quality_score_label", n=3),
|
| 177 |
interactive=False,
|
| 178 |
buttons=["copy"],
|
|
|
|
| 179 |
max_lines=6,
|
| 180 |
-
visible=
|
| 181 |
)
|
| 182 |
lrc_display_3 = gr.Textbox(
|
| 183 |
label=t("results.lrc_label", n=3),
|
| 184 |
interactive=True,
|
| 185 |
buttons=["copy"],
|
|
|
|
| 186 |
max_lines=8,
|
| 187 |
-
visible=
|
| 188 |
)
|
| 189 |
with gr.Column(visible=False) as audio_col_4:
|
| 190 |
generated_audio_4 = gr.Audio(
|
|
@@ -218,27 +227,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 218 |
size="sm",
|
| 219 |
scale=1
|
| 220 |
)
|
| 221 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 222 |
codes_display_4 = gr.Textbox(
|
| 223 |
label=t("results.codes_label", n=4),
|
| 224 |
interactive=False,
|
| 225 |
buttons=["copy"],
|
|
|
|
| 226 |
max_lines=4,
|
| 227 |
-
visible=
|
| 228 |
)
|
| 229 |
score_display_4 = gr.Textbox(
|
| 230 |
label=t("results.quality_score_label", n=4),
|
| 231 |
interactive=False,
|
| 232 |
buttons=["copy"],
|
|
|
|
| 233 |
max_lines=6,
|
| 234 |
-
visible=
|
| 235 |
)
|
| 236 |
lrc_display_4 = gr.Textbox(
|
| 237 |
label=t("results.lrc_label", n=4),
|
| 238 |
interactive=True,
|
| 239 |
buttons=["copy"],
|
|
|
|
| 240 |
max_lines=8,
|
| 241 |
-
visible=
|
| 242 |
)
|
| 243 |
|
| 244 |
# Second row for batch size 5-8 (initially hidden)
|
|
@@ -255,27 +267,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 255 |
save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 256 |
score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 257 |
lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 258 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 259 |
codes_display_5 = gr.Textbox(
|
| 260 |
label=t("results.codes_label", n=5),
|
| 261 |
interactive=False,
|
| 262 |
buttons=["copy"],
|
|
|
|
| 263 |
max_lines=4,
|
| 264 |
-
visible=
|
| 265 |
)
|
| 266 |
score_display_5 = gr.Textbox(
|
| 267 |
label=t("results.quality_score_label", n=5),
|
| 268 |
interactive=False,
|
| 269 |
buttons=["copy"],
|
|
|
|
| 270 |
max_lines=6,
|
| 271 |
-
visible=
|
| 272 |
)
|
| 273 |
lrc_display_5 = gr.Textbox(
|
| 274 |
label=t("results.lrc_label", n=5),
|
| 275 |
interactive=True,
|
| 276 |
buttons=["copy"],
|
|
|
|
| 277 |
max_lines=8,
|
| 278 |
-
visible=
|
| 279 |
)
|
| 280 |
with gr.Column() as audio_col_6:
|
| 281 |
generated_audio_6 = gr.Audio(
|
|
@@ -289,27 +304,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 289 |
save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 290 |
score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 291 |
lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 292 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 293 |
codes_display_6 = gr.Textbox(
|
| 294 |
label=t("results.codes_label", n=6),
|
| 295 |
interactive=False,
|
| 296 |
buttons=["copy"],
|
|
|
|
| 297 |
max_lines=4,
|
| 298 |
-
visible=
|
| 299 |
)
|
| 300 |
score_display_6 = gr.Textbox(
|
| 301 |
label=t("results.quality_score_label", n=6),
|
| 302 |
interactive=False,
|
| 303 |
buttons=["copy"],
|
|
|
|
| 304 |
max_lines=6,
|
| 305 |
-
visible=
|
| 306 |
)
|
| 307 |
lrc_display_6 = gr.Textbox(
|
| 308 |
label=t("results.lrc_label", n=6),
|
| 309 |
interactive=True,
|
| 310 |
buttons=["copy"],
|
|
|
|
| 311 |
max_lines=8,
|
| 312 |
-
visible=
|
| 313 |
)
|
| 314 |
with gr.Column() as audio_col_7:
|
| 315 |
generated_audio_7 = gr.Audio(
|
|
@@ -323,27 +341,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 323 |
save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 324 |
score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 325 |
lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 326 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 327 |
codes_display_7 = gr.Textbox(
|
| 328 |
label=t("results.codes_label", n=7),
|
| 329 |
interactive=False,
|
| 330 |
buttons=["copy"],
|
|
|
|
| 331 |
max_lines=4,
|
| 332 |
-
visible=
|
| 333 |
)
|
| 334 |
score_display_7 = gr.Textbox(
|
| 335 |
label=t("results.quality_score_label", n=7),
|
| 336 |
interactive=False,
|
| 337 |
buttons=["copy"],
|
|
|
|
| 338 |
max_lines=6,
|
| 339 |
-
visible=
|
| 340 |
)
|
| 341 |
lrc_display_7 = gr.Textbox(
|
| 342 |
label=t("results.lrc_label", n=7),
|
| 343 |
interactive=True,
|
| 344 |
buttons=["copy"],
|
|
|
|
| 345 |
max_lines=8,
|
| 346 |
-
visible=
|
| 347 |
)
|
| 348 |
with gr.Column() as audio_col_8:
|
| 349 |
generated_audio_8 = gr.Audio(
|
|
@@ -357,27 +378,30 @@ def create_results_section(dit_handler) -> dict:
|
|
| 357 |
save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 358 |
score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 359 |
lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 360 |
-
with gr.Accordion(t("results.details_accordion"), open=False, visible=
|
| 361 |
codes_display_8 = gr.Textbox(
|
| 362 |
label=t("results.codes_label", n=8),
|
| 363 |
interactive=False,
|
| 364 |
buttons=["copy"],
|
|
|
|
| 365 |
max_lines=4,
|
| 366 |
-
visible=
|
| 367 |
)
|
| 368 |
score_display_8 = gr.Textbox(
|
| 369 |
label=t("results.quality_score_label", n=8),
|
| 370 |
interactive=False,
|
| 371 |
buttons=["copy"],
|
|
|
|
| 372 |
max_lines=6,
|
| 373 |
-
visible=
|
| 374 |
)
|
| 375 |
lrc_display_8 = gr.Textbox(
|
| 376 |
label=t("results.lrc_label", n=8),
|
| 377 |
interactive=True,
|
| 378 |
buttons=["copy"],
|
|
|
|
| 379 |
max_lines=8,
|
| 380 |
-
visible=
|
| 381 |
)
|
| 382 |
|
| 383 |
status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)
|
|
|
|
| 56 |
size="sm",
|
| 57 |
scale=1
|
| 58 |
)
|
| 59 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_1:
|
| 60 |
codes_display_1 = gr.Textbox(
|
| 61 |
label=t("results.codes_label", n=1),
|
| 62 |
interactive=False,
|
| 63 |
buttons=["copy"],
|
| 64 |
+
lines=4,
|
| 65 |
max_lines=4,
|
| 66 |
+
visible=True
|
| 67 |
)
|
| 68 |
score_display_1 = gr.Textbox(
|
| 69 |
label=t("results.quality_score_label", n=1),
|
| 70 |
interactive=False,
|
| 71 |
buttons=["copy"],
|
| 72 |
+
lines=6,
|
| 73 |
max_lines=6,
|
| 74 |
+
visible=True
|
| 75 |
)
|
| 76 |
lrc_display_1 = gr.Textbox(
|
| 77 |
label=t("results.lrc_label", n=1),
|
| 78 |
interactive=True,
|
| 79 |
buttons=["copy"],
|
| 80 |
+
lines=8,
|
| 81 |
max_lines=8,
|
| 82 |
+
visible=True
|
| 83 |
)
|
| 84 |
with gr.Column(visible=True) as audio_col_2:
|
| 85 |
generated_audio_2 = gr.Audio(
|
|
|
|
| 113 |
size="sm",
|
| 114 |
scale=1
|
| 115 |
)
|
| 116 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_2:
|
| 117 |
codes_display_2 = gr.Textbox(
|
| 118 |
label=t("results.codes_label", n=2),
|
| 119 |
interactive=False,
|
| 120 |
buttons=["copy"],
|
| 121 |
+
lines=4,
|
| 122 |
max_lines=4,
|
| 123 |
+
visible=True
|
| 124 |
)
|
| 125 |
score_display_2 = gr.Textbox(
|
| 126 |
label=t("results.quality_score_label", n=2),
|
| 127 |
interactive=False,
|
| 128 |
buttons=["copy"],
|
| 129 |
+
lines=6,
|
| 130 |
max_lines=6,
|
| 131 |
+
visible=True
|
| 132 |
)
|
| 133 |
lrc_display_2 = gr.Textbox(
|
| 134 |
label=t("results.lrc_label", n=2),
|
| 135 |
interactive=True,
|
| 136 |
buttons=["copy"],
|
| 137 |
+
lines=8,
|
| 138 |
max_lines=8,
|
| 139 |
+
visible=True
|
| 140 |
)
|
| 141 |
with gr.Column(visible=False) as audio_col_3:
|
| 142 |
generated_audio_3 = gr.Audio(
|
|
|
|
| 170 |
size="sm",
|
| 171 |
scale=1
|
| 172 |
)
|
| 173 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_3:
|
| 174 |
codes_display_3 = gr.Textbox(
|
| 175 |
label=t("results.codes_label", n=3),
|
| 176 |
interactive=False,
|
| 177 |
buttons=["copy"],
|
| 178 |
+
lines=4,
|
| 179 |
max_lines=4,
|
| 180 |
+
visible=True
|
| 181 |
)
|
| 182 |
score_display_3 = gr.Textbox(
|
| 183 |
label=t("results.quality_score_label", n=3),
|
| 184 |
interactive=False,
|
| 185 |
buttons=["copy"],
|
| 186 |
+
lines=6,
|
| 187 |
max_lines=6,
|
| 188 |
+
visible=True
|
| 189 |
)
|
| 190 |
lrc_display_3 = gr.Textbox(
|
| 191 |
label=t("results.lrc_label", n=3),
|
| 192 |
interactive=True,
|
| 193 |
buttons=["copy"],
|
| 194 |
+
lines=8,
|
| 195 |
max_lines=8,
|
| 196 |
+
visible=True
|
| 197 |
)
|
| 198 |
with gr.Column(visible=False) as audio_col_4:
|
| 199 |
generated_audio_4 = gr.Audio(
|
|
|
|
| 227 |
size="sm",
|
| 228 |
scale=1
|
| 229 |
)
|
| 230 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_4:
|
| 231 |
codes_display_4 = gr.Textbox(
|
| 232 |
label=t("results.codes_label", n=4),
|
| 233 |
interactive=False,
|
| 234 |
buttons=["copy"],
|
| 235 |
+
lines=4,
|
| 236 |
max_lines=4,
|
| 237 |
+
visible=True
|
| 238 |
)
|
| 239 |
score_display_4 = gr.Textbox(
|
| 240 |
label=t("results.quality_score_label", n=4),
|
| 241 |
interactive=False,
|
| 242 |
buttons=["copy"],
|
| 243 |
+
lines=6,
|
| 244 |
max_lines=6,
|
| 245 |
+
visible=True
|
| 246 |
)
|
| 247 |
lrc_display_4 = gr.Textbox(
|
| 248 |
label=t("results.lrc_label", n=4),
|
| 249 |
interactive=True,
|
| 250 |
buttons=["copy"],
|
| 251 |
+
lines=8,
|
| 252 |
max_lines=8,
|
| 253 |
+
visible=True
|
| 254 |
)
|
| 255 |
|
| 256 |
# Second row for batch size 5-8 (initially hidden)
|
|
|
|
| 267 |
save_btn_5 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 268 |
score_btn_5 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 269 |
lrc_btn_5 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 270 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_5:
|
| 271 |
codes_display_5 = gr.Textbox(
|
| 272 |
label=t("results.codes_label", n=5),
|
| 273 |
interactive=False,
|
| 274 |
buttons=["copy"],
|
| 275 |
+
lines=4,
|
| 276 |
max_lines=4,
|
| 277 |
+
visible=True
|
| 278 |
)
|
| 279 |
score_display_5 = gr.Textbox(
|
| 280 |
label=t("results.quality_score_label", n=5),
|
| 281 |
interactive=False,
|
| 282 |
buttons=["copy"],
|
| 283 |
+
lines=6,
|
| 284 |
max_lines=6,
|
| 285 |
+
visible=True
|
| 286 |
)
|
| 287 |
lrc_display_5 = gr.Textbox(
|
| 288 |
label=t("results.lrc_label", n=5),
|
| 289 |
interactive=True,
|
| 290 |
buttons=["copy"],
|
| 291 |
+
lines=8,
|
| 292 |
max_lines=8,
|
| 293 |
+
visible=True
|
| 294 |
)
|
| 295 |
with gr.Column() as audio_col_6:
|
| 296 |
generated_audio_6 = gr.Audio(
|
|
|
|
| 304 |
save_btn_6 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 305 |
score_btn_6 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 306 |
lrc_btn_6 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 307 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_6:
|
| 308 |
codes_display_6 = gr.Textbox(
|
| 309 |
label=t("results.codes_label", n=6),
|
| 310 |
interactive=False,
|
| 311 |
buttons=["copy"],
|
| 312 |
+
lines=4,
|
| 313 |
max_lines=4,
|
| 314 |
+
visible=True
|
| 315 |
)
|
| 316 |
score_display_6 = gr.Textbox(
|
| 317 |
label=t("results.quality_score_label", n=6),
|
| 318 |
interactive=False,
|
| 319 |
buttons=["copy"],
|
| 320 |
+
lines=6,
|
| 321 |
max_lines=6,
|
| 322 |
+
visible=True
|
| 323 |
)
|
| 324 |
lrc_display_6 = gr.Textbox(
|
| 325 |
label=t("results.lrc_label", n=6),
|
| 326 |
interactive=True,
|
| 327 |
buttons=["copy"],
|
| 328 |
+
lines=8,
|
| 329 |
max_lines=8,
|
| 330 |
+
visible=True
|
| 331 |
)
|
| 332 |
with gr.Column() as audio_col_7:
|
| 333 |
generated_audio_7 = gr.Audio(
|
|
|
|
| 341 |
save_btn_7 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 342 |
score_btn_7 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 343 |
lrc_btn_7 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 344 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_7:
|
| 345 |
codes_display_7 = gr.Textbox(
|
| 346 |
label=t("results.codes_label", n=7),
|
| 347 |
interactive=False,
|
| 348 |
buttons=["copy"],
|
| 349 |
+
lines=4,
|
| 350 |
max_lines=4,
|
| 351 |
+
visible=True
|
| 352 |
)
|
| 353 |
score_display_7 = gr.Textbox(
|
| 354 |
label=t("results.quality_score_label", n=7),
|
| 355 |
interactive=False,
|
| 356 |
buttons=["copy"],
|
| 357 |
+
lines=6,
|
| 358 |
max_lines=6,
|
| 359 |
+
visible=True
|
| 360 |
)
|
| 361 |
lrc_display_7 = gr.Textbox(
|
| 362 |
label=t("results.lrc_label", n=7),
|
| 363 |
interactive=True,
|
| 364 |
buttons=["copy"],
|
| 365 |
+
lines=8,
|
| 366 |
max_lines=8,
|
| 367 |
+
visible=True
|
| 368 |
)
|
| 369 |
with gr.Column() as audio_col_8:
|
| 370 |
generated_audio_8 = gr.Audio(
|
|
|
|
| 378 |
save_btn_8 = gr.Button(t("results.save_btn"), variant="primary", size="sm", scale=1)
|
| 379 |
score_btn_8 = gr.Button(t("results.score_btn"), variant="secondary", size="sm", scale=1)
|
| 380 |
lrc_btn_8 = gr.Button(t("results.lrc_btn"), variant="secondary", size="sm", scale=1)
|
| 381 |
+
with gr.Accordion(t("results.details_accordion"), open=False, visible=True) as details_accordion_8:
|
| 382 |
codes_display_8 = gr.Textbox(
|
| 383 |
label=t("results.codes_label", n=8),
|
| 384 |
interactive=False,
|
| 385 |
buttons=["copy"],
|
| 386 |
+
lines=4,
|
| 387 |
max_lines=4,
|
| 388 |
+
visible=True
|
| 389 |
)
|
| 390 |
score_display_8 = gr.Textbox(
|
| 391 |
label=t("results.quality_score_label", n=8),
|
| 392 |
interactive=False,
|
| 393 |
buttons=["copy"],
|
| 394 |
+
lines=6,
|
| 395 |
max_lines=6,
|
| 396 |
+
visible=True
|
| 397 |
)
|
| 398 |
lrc_display_8 = gr.Textbox(
|
| 399 |
label=t("results.lrc_label", n=8),
|
| 400 |
interactive=True,
|
| 401 |
buttons=["copy"],
|
| 402 |
+
lines=8,
|
| 403 |
max_lines=8,
|
| 404 |
+
visible=True
|
| 405 |
)
|
| 406 |
|
| 407 |
status_output = gr.Textbox(label=t("results.generation_status"), interactive=False)
|