insta-maker

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

bccb8c6

verified ·

1 Parent(s): 6c892fc

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -20

app.py CHANGED Viewed

@@ -42,12 +42,12 @@ def split_text_into_segments(text):
     return segments
-# Function to generate SRT with accurate timing per batch
-async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate):
     audio_file = f"batch_{batch_num}_audio.wav"
     # Generate the audio using edge-tts with pitch and rate adjustment
-    tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate=f"{rate}%", pitch=f"{pitch}Hz")
     await tts.save(audio_file)
     actual_length = get_audio_length(audio_file)
@@ -58,6 +58,7 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
     srt_content = ""
     for index, segment in enumerate(segments):
         end_time = start_time + segment_duration
         if end_time > start_offset + actual_length:
             end_time = start_offset + actual_length
@@ -69,14 +70,15 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
     return srt_content, audio_file, start_time
-async def batch_process_srt_and_audio(script_text, pitch, rate, progress=gr.Progress()):
     batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
     all_srt_content = ""
     combined_audio = AudioSegment.empty()
     start_offset = 0.0
     for batch_num, batch_text in enumerate(batches):
-        srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate)
         all_srt_content += srt_content
         batch_audio = AudioSegment.from_file(audio_file)
@@ -99,33 +101,40 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, progress=gr.Prog
         validated_srt_content += line + "\n"
     unique_id = uuid.uuid4()
-    final_audio_path = f"final_audio_{unique_id}.mp3"  # Export as mp3
     final_srt_path = f"final_subtitles_{unique_id}.srt"
-    combined_audio.export(final_audio_path, format="mp3", bitrate="320k")  # Export as MP3 at 320kbps
     with open(final_srt_path, "w") as srt_file:
         srt_file.write(validated_srt_content)
     return final_srt_path, final_audio_path
-async def process_script(script_text, pitch, rate):
-    srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, rate)
     return srt_path, audio_path, audio_path
-# Function to get the available US English voices
-async def get_available_voices():
-    voices = await edge_tts.list_voices()
-    return [voice for voice in voices if 'en-US' in voice['name']]
-# Main execution to fetch voices
-available_voices = asyncio.run(get_available_voices())
-# Gradio interface setup
 app = gr.Interface(
     fn=process_script,
     inputs=[
         gr.Textbox(label="Enter Script Text", lines=10),
-        gr.Dropdown(label="Select Voice", choices=[voice['name'] for voice in available_voices], value=available_voices[0]['name']),
         gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
         gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
     ],
@@ -134,7 +143,7 @@ app = gr.Interface(
         gr.File(label="Download Audio File"),
         gr.Audio(label="Play Audio")
     ],
-    description="HIVEcorp TTS Generator with adjustable speech rate and pitch."
 )
 app.launch()

     return segments
+# Function to generate SRT with accurate timing per batch and cross-check timing
+async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate):
     audio_file = f"batch_{batch_num}_audio.wav"
     # Generate the audio using edge-tts with pitch and rate adjustment
+    tts = edge_tts.Communicate(batch_text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
     await tts.save(audio_file)
     actual_length = get_audio_length(audio_file)
     srt_content = ""
     for index, segment in enumerate(segments):
         end_time = start_time + segment_duration
         if end_time > start_offset + actual_length:
             end_time = start_offset + actual_length
     return srt_content, audio_file, start_time
+# Batch processing function with cumulative timing, progress indicator, and final SRT validation
+async def batch_process_srt_and_audio(script_text, pitch, voice, rate, progress=gr.Progress()):
     batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
     all_srt_content = ""
     combined_audio = AudioSegment.empty()
     start_offset = 0.0
     for batch_num, batch_text in enumerate(batches):
+        srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate)
         all_srt_content += srt_content
         batch_audio = AudioSegment.from_file(audio_file)
         validated_srt_content += line + "\n"
     unique_id = uuid.uuid4()
+    final_audio_path = f"final_audio_{unique_id}.mp3"
     final_srt_path = f"final_subtitles_{unique_id}.srt"
+    combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
     with open(final_srt_path, "w") as srt_file:
         srt_file.write(validated_srt_content)
     return final_srt_path, final_audio_path
+# Gradio interface function
+async def process_script(script_text, pitch, voice, rate):
+    srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, voice, rate)
     return srt_path, audio_path, audio_path
+# List of available voices
+voices = {
+    "Jenny": "en-US-JennyNeural",
+    "Guy": "en-US-GuyNeural",
+    "Ana": "en-US-AnaNeural",
+    "Aria": "en-US-AriaNeural",
+    "Brian": "en-US-BrianNeural",
+    "Christopher": "en-US-ChristopherNeural",
+    "Eric": "en-US-EricNeural",
+    "Michelle": "en-US-MichelleNeural",
+    "Roger": "en-US-RogerNeural",
+}
+# Gradio interface setup with voice selection and speech rate adjustment
 app = gr.Interface(
     fn=process_script,
     inputs=[
         gr.Textbox(label="Enter Script Text", lines=10),
+        gr.Dropdown(label="Select Voice", choices=list(voices.keys()), value="Jenny"),
         gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
         gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
     ],
         gr.File(label="Download Audio File"),
         gr.Audio(label="Play Audio")
     ],
+    description="HIVEcorp TTS Generator with customizable voice, speech rate, and pitch adjustments."
 )
 app.launch()