Spaces:
Running
on
Zero
Running
on
Zero
Update gradio_mix.py
Browse files- gradio_mix.py +43 -0
gradio_mix.py
CHANGED
|
@@ -1073,6 +1073,49 @@ def get_app():
|
|
| 1073 |
output_audio = gr.Audio(label="Output Audio", type="numpy")
|
| 1074 |
with gr.Accordion("Inference transcript", open=True):
|
| 1075 |
inference_transcript = gr.Textbox(label="Inference transcript", lines=5, interactive=False, info="Inference was performed on this transcript.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1076 |
with gr.Group(visible=False) as long_tts_sentence_editor:
|
| 1077 |
sentence_selector = gr.Dropdown(label="Sentence", value=None,
|
| 1078 |
info="Select sentence you want to regenerate")
|
|
|
|
| 1073 |
output_audio = gr.Audio(label="Output Audio", type="numpy")
|
| 1074 |
with gr.Accordion("Inference transcript", open=True):
|
| 1075 |
inference_transcript = gr.Textbox(label="Inference transcript", lines=5, interactive=False, info="Inference was performed on this transcript.")
|
| 1076 |
+
# Simple in-app README to guide users through the editing workflow.
|
| 1077 |
+
# Use HTML so we can cap the height (~12 lines) and enable scrolling.
|
| 1078 |
+
readme_help = gr.HTML(
|
| 1079 |
+
value=(
|
| 1080 |
+
'<div style="max-height: 12em; overflow-y: auto; white-space: pre-wrap;">'
|
| 1081 |
+
"<h4>README: How to Use This Tool</h4>"
|
| 1082 |
+
"<p><b>1. Load models</b><br>"
|
| 1083 |
+
"Click <b>“Load Models”</b> and wait for all models to finish loading. "
|
| 1084 |
+
"Note that <b>WhisperX</b> takes the longest to initialize, so please be patient.</p>"
|
| 1085 |
+
"<p><b>2. Upload input audio</b><br>"
|
| 1086 |
+
"Click <b>“Input Audio”</b> and upload the audio file you want to edit.</p>"
|
| 1087 |
+
"<p><b>3. Transcribe and correct text</b><br>"
|
| 1088 |
+
"Click <b>“Transcribe”</b> to perform speech recognition. If the transcription is inaccurate, "
|
| 1089 |
+
"edit the text in <b>“Original transcript”</b>, then click <b>“ReAlign”</b> to recompute "
|
| 1090 |
+
"word-level timestamps.</p>"
|
| 1091 |
+
"<p><b>4. (Optional) Denoise noisy audio</b><br>"
|
| 1092 |
+
"If the input audio is noisy and affects recognition or synthesis quality, click "
|
| 1093 |
+
"<b>“Denoise”</b> to apply noise reduction. If you are not satisfied with the denoised result, "
|
| 1094 |
+
"click <b>“Cancel Denoise”</b> to restore the original audio, or switch to a different denoiser "
|
| 1095 |
+
"under <b>“Select models”</b> and reload.</p>"
|
| 1096 |
+
"<p><b>5. Select the edit span</b><br>"
|
| 1097 |
+
"Use <b>“First word to edit”</b> and <b>“Last word to edit”</b> to specify the region to modify, "
|
| 1098 |
+
"then click <b>“Check edit words”</b> to preview the selection. For finer control, you may also adjust "
|
| 1099 |
+
"<b>“Edit from time”</b> and <b>“Edit to time”</b>.</p>"
|
| 1100 |
+
"<p><b>6. Enter the new text</b><br>"
|
| 1101 |
+
"In the <b>“Text”</b> box, enter the text that should replace the selected segment.</p>"
|
| 1102 |
+
"<p><b>7. Run the edit</b><br>"
|
| 1103 |
+
"Click <b>“Run”</b> and wait for the model to generate the edited audio.</p>"
|
| 1104 |
+
"<p><b>8. Inspect the result</b><br>"
|
| 1105 |
+
"The edited waveform will appear in <b>“Output Audio”</b>, and the corresponding edited text will be "
|
| 1106 |
+
"shown under <b>“Inference transcript”</b>.</p>"
|
| 1107 |
+
"<p><b>9. Refine or change models</b><br>"
|
| 1108 |
+
"If the result is not satisfactory, try adjusting the <b>“Generation Parameters”</b> or selecting a "
|
| 1109 |
+
"different <b>“Edit Model”</b> under <b>“Select models”</b>, then run again.</p>"
|
| 1110 |
+
"<p><b>10. Feedback</b><br>"
|
| 1111 |
+
"For bug reports or feature requests, feel free to:<br>"
|
| 1112 |
+
"1) Open a GitHub issue<br>"
|
| 1113 |
+
"2) Post on the Hugging Face community page<br>"
|
| 1114 |
+
"3) Contact us via email at <code>approximetal@gmail.com</code></p>"
|
| 1115 |
+
"</div>"
|
| 1116 |
+
)
|
| 1117 |
+
)
|
| 1118 |
+
|
| 1119 |
with gr.Group(visible=False) as long_tts_sentence_editor:
|
| 1120 |
sentence_selector = gr.Dropdown(label="Sentence", value=None,
|
| 1121 |
info="Select sentence you want to regenerate")
|