Spaces:

SohomToom
/

DocToAudioConverted

Sleeping

App Files Files Community

SohomToom commited on May 6, 2025

Commit

96c6f9a

verified ·

1 Parent(s): f4a6ae0

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -32

app.py CHANGED Viewed

@@ -137,36 +137,45 @@ def generate_sample_audio(sample_text, speaker_label):
 def generate_audio(docx_file, speaker_label):
     speaker_id = get_speaker_id_from_label(speaker_label)
-    model = TTS("tts_models/en/vctk/vits")
-    paragraphs = extract_paragraphs_from_docx(docx_file)
-    combined_audio = AudioSegment.empty()
-    temp_files = []
-    try:
-        for idx, para in enumerate(paragraphs):
-            tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-            model.tts_to_file(text=para, speaker="p"+speaker_id, file_path=tmp.name)
-            audio_chunk = AudioSegment.from_wav(tmp.name)
-            combined_audio += audio_chunk
-            temp_files.append(tmp.name)
-            tmp.close()
-    except Exception as e:
-        print("Generation interrupted. Saving partial output.", e)
-    output_dir = tempfile.mkdtemp()
-    final_output_path = os.path.join(output_dir, "final_output.wav")
-    combined_audio.export(final_output_path, format="wav")
-    zip_path = os.path.join(output_dir, "output.zip")
-    with zipfile.ZipFile(zip_path, 'w') as zipf:
-        zipf.write(final_output_path, arcname="final_output.wav")
-    for f in temp_files:
-        os.remove(f)
-    return zip_path
 # --- UI ---
 speaker_choices = list_speaker_choices()
@@ -177,14 +186,22 @@ with gr.Blocks() as demo:
     with gr.Row():
         speaker_dropdown = gr.Dropdown(label="Select Voice", choices=speaker_choices)
     with gr.Row():
         sample_textbox = gr.Textbox(label="Enter Sample Text (Max 500 characters)", max_lines=5)
         sample_button = gr.Button("Generate Sample")
         clear_button = gr.Button("Clear Sample")
     sample_audio = gr.Audio(label="Sample Output", type="filepath")
-    sample_button.click(fn=generate_sample_audio, inputs=[sample_textbox, speaker_dropdown], outputs=[sample_audio])
     clear_button.click(fn=lambda: None, inputs=[], outputs=[sample_audio])
     with gr.Row():
@@ -192,7 +209,11 @@ with gr.Blocks() as demo:
         generate_button = gr.Button("Generate Full Audio")
         download_output = gr.File(label="Download Output Zip")
-    generate_button.click(fn=generate_audio, inputs=[docx_input, speaker_dropdown], outputs=[download_output])
 if __name__ == "__main__":
     demo.launch()

 def generate_audio(docx_file, speaker_label):
     speaker_id = get_speaker_id_from_label(speaker_label)
+    if engine_choice == "Bark":
+        from bark import generate_audio
+        from bark.generation import preload_models
+        preload_models()
+        audio_array = generate_audio(sample_text)
+        tmp_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+        AudioSegment(audio_array.tobytes(), frame_rate=24000, sample_width=2, channels=1).export(tmp_path, format="wav")
+        return tmp_path
+    else:
+        model = TTS("tts_models/en/vctk/vits")
+        paragraphs = extract_paragraphs_from_docx(docx_file)
+        combined_audio = AudioSegment.empty()
+        temp_files = []
+        try:
+            for idx, para in enumerate(paragraphs):
+              tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+              model.tts_to_file(text=para, speaker="p"+speaker_id, file_path=tmp.name)
+              audio_chunk = AudioSegment.from_wav(tmp.name)
+              combined_audio += audio_chunk
+              temp_files.append(tmp.name)
+              tmp.close()
+        except Exception as e:
+            print("Generation interrupted. Saving partial output.", e)
+            output_dir = tempfile.mkdtemp()
+            final_output_path = os.path.join(output_dir, "final_output.wav")
+            combined_audio.export(final_output_path, format="wav")
+            zip_path = os.path.join(output_dir, "output.zip")
+            with zipfile.ZipFile(zip_path, 'w') as zipf:
+            zipf.write(final_output_path, arcname="final_output.wav")
+            for f in temp_files:
+              os.remove(f)
+        return zip_path
 # --- UI ---
 speaker_choices = list_speaker_choices()
     with gr.Row():
         speaker_dropdown = gr.Dropdown(label="Select Voice", choices=speaker_choices)
     with gr.Row():
         sample_textbox = gr.Textbox(label="Enter Sample Text (Max 500 characters)", max_lines=5)
         sample_button = gr.Button("Generate Sample")
         clear_button = gr.Button("Clear Sample")
+    tts_engine_dropdown = gr.Dropdown(label="TTS Engine", choices=["Coqui (XTTS)", "Bark"], value="Coqui (XTTS)")
     sample_audio = gr.Audio(label="Sample Output", type="filepath")
+    sample_button.click(
+    fn=generate_sample_audio,
+    inputs=[sample_textbox, speaker_dropdown, tts_engine_dropdown],
+    outputs=[sample_audio]
+)
     clear_button.click(fn=lambda: None, inputs=[], outputs=[sample_audio])
     with gr.Row():
         generate_button = gr.Button("Generate Full Audio")
         download_output = gr.File(label="Download Output Zip")
+    generate_button.click(
+    fn=generate_audio,
+    inputs=[docx_input, speaker_dropdown, tts_engine_dropdown],
+    outputs=[download_output]
+)
 if __name__ == "__main__":
     demo.launch()