Approximetal commited on
Commit
1cec52c
·
verified ·
1 Parent(s): b68fd46

Update gradio_mix.py

Browse files
Files changed (1) hide show
  1. gradio_mix.py +43 -0
gradio_mix.py CHANGED
@@ -1073,6 +1073,49 @@ def get_app():
1073
  output_audio = gr.Audio(label="Output Audio", type="numpy")
1074
  with gr.Accordion("Inference transcript", open=True):
1075
  inference_transcript = gr.Textbox(label="Inference transcript", lines=5, interactive=False, info="Inference was performed on this transcript.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
  with gr.Group(visible=False) as long_tts_sentence_editor:
1077
  sentence_selector = gr.Dropdown(label="Sentence", value=None,
1078
  info="Select sentence you want to regenerate")
 
1073
  output_audio = gr.Audio(label="Output Audio", type="numpy")
1074
  with gr.Accordion("Inference transcript", open=True):
1075
  inference_transcript = gr.Textbox(label="Inference transcript", lines=5, interactive=False, info="Inference was performed on this transcript.")
1076
+ # Simple in-app README to guide users through the editing workflow.
1077
+ # Use HTML so we can cap the height (~12 lines) and enable scrolling.
1078
+ readme_help = gr.HTML(
1079
+ value=(
1080
+ '<div style="max-height: 12em; overflow-y: auto; white-space: pre-wrap;">'
1081
+ "<h4>README: How to Use This Tool</h4>"
1082
+ "<p><b>1. Load models</b><br>"
1083
+ "Click <b>&ldquo;Load Models&rdquo;</b> and wait for all models to finish loading. "
1084
+ "Note that <b>WhisperX</b> takes the longest to initialize, so please be patient.</p>"
1085
+ "<p><b>2. Upload input audio</b><br>"
1086
+ "Click <b>&ldquo;Input Audio&rdquo;</b> and upload the audio file you want to edit.</p>"
1087
+ "<p><b>3. Transcribe and correct text</b><br>"
1088
+ "Click <b>&ldquo;Transcribe&rdquo;</b> to perform speech recognition. If the transcription is inaccurate, "
1089
+ "edit the text in <b>&ldquo;Original transcript&rdquo;</b>, then click <b>&ldquo;ReAlign&rdquo;</b> to recompute "
1090
+ "word-level timestamps.</p>"
1091
+ "<p><b>4. (Optional) Denoise noisy audio</b><br>"
1092
+ "If the input audio is noisy and affects recognition or synthesis quality, click "
1093
+ "<b>&ldquo;Denoise&rdquo;</b> to apply noise reduction. If you are not satisfied with the denoised result, "
1094
+ "click <b>&ldquo;Cancel Denoise&rdquo;</b> to restore the original audio, or switch to a different denoiser "
1095
+ "under <b>&ldquo;Select models&rdquo;</b> and reload.</p>"
1096
+ "<p><b>5. Select the edit span</b><br>"
1097
+ "Use <b>&ldquo;First word to edit&rdquo;</b> and <b>&ldquo;Last word to edit&rdquo;</b> to specify the region to modify, "
1098
+ "then click <b>&ldquo;Check edit words&rdquo;</b> to preview the selection. For finer control, you may also adjust "
1099
+ "<b>&ldquo;Edit from time&rdquo;</b> and <b>&ldquo;Edit to time&rdquo;</b>.</p>"
1100
+ "<p><b>6. Enter the new text</b><br>"
1101
+ "In the <b>&ldquo;Text&rdquo;</b> box, enter the text that should replace the selected segment.</p>"
1102
+ "<p><b>7. Run the edit</b><br>"
1103
+ "Click <b>&ldquo;Run&rdquo;</b> and wait for the model to generate the edited audio.</p>"
1104
+ "<p><b>8. Inspect the result</b><br>"
1105
+ "The edited waveform will appear in <b>&ldquo;Output Audio&rdquo;</b>, and the corresponding edited text will be "
1106
+ "shown under <b>&ldquo;Inference transcript&rdquo;</b>.</p>"
1107
+ "<p><b>9. Refine or change models</b><br>"
1108
+ "If the result is not satisfactory, try adjusting the <b>&ldquo;Generation Parameters&rdquo;</b> or selecting a "
1109
+ "different <b>&ldquo;Edit Model&rdquo;</b> under <b>&ldquo;Select models&rdquo;</b>, then run again.</p>"
1110
+ "<p><b>10. Feedback</b><br>"
1111
+ "For bug reports or feature requests, feel free to:<br>"
1112
+ "1) Open a GitHub issue<br>"
1113
+ "2) Post on the Hugging Face community page<br>"
1114
+ "3) Contact us via email at <code>approximetal@gmail.com</code></p>"
1115
+ "</div>"
1116
+ )
1117
+ )
1118
+
1119
  with gr.Group(visible=False) as long_tts_sentence_editor:
1120
  sentence_selector = gr.Dropdown(label="Sentence", value=None,
1121
  info="Select sentence you want to regenerate")