Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,12 +42,12 @@ def split_text_into_segments(text):
|
|
| 42 |
|
| 43 |
return segments
|
| 44 |
|
| 45 |
-
# Function to generate SRT with accurate timing per batch
|
| 46 |
-
async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate):
|
| 47 |
audio_file = f"batch_{batch_num}_audio.wav"
|
| 48 |
|
| 49 |
# Generate the audio using edge-tts with pitch and rate adjustment
|
| 50 |
-
tts = edge_tts.Communicate(batch_text,
|
| 51 |
await tts.save(audio_file)
|
| 52 |
|
| 53 |
actual_length = get_audio_length(audio_file)
|
|
@@ -58,6 +58,7 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
|
|
| 58 |
srt_content = ""
|
| 59 |
for index, segment in enumerate(segments):
|
| 60 |
end_time = start_time + segment_duration
|
|
|
|
| 61 |
if end_time > start_offset + actual_length:
|
| 62 |
end_time = start_offset + actual_length
|
| 63 |
|
|
@@ -69,14 +70,15 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
|
|
| 69 |
|
| 70 |
return srt_content, audio_file, start_time
|
| 71 |
|
| 72 |
-
|
|
|
|
| 73 |
batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
|
| 74 |
all_srt_content = ""
|
| 75 |
combined_audio = AudioSegment.empty()
|
| 76 |
start_offset = 0.0
|
| 77 |
|
| 78 |
for batch_num, batch_text in enumerate(batches):
|
| 79 |
-
srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate)
|
| 80 |
all_srt_content += srt_content
|
| 81 |
|
| 82 |
batch_audio = AudioSegment.from_file(audio_file)
|
|
@@ -99,33 +101,40 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, progress=gr.Prog
|
|
| 99 |
validated_srt_content += line + "\n"
|
| 100 |
|
| 101 |
unique_id = uuid.uuid4()
|
| 102 |
-
final_audio_path = f"final_audio_{unique_id}.mp3"
|
| 103 |
final_srt_path = f"final_subtitles_{unique_id}.srt"
|
| 104 |
|
| 105 |
-
combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
|
|
|
|
| 106 |
with open(final_srt_path, "w") as srt_file:
|
| 107 |
srt_file.write(validated_srt_content)
|
| 108 |
|
| 109 |
return final_srt_path, final_audio_path
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
return srt_path, audio_path, audio_path
|
| 114 |
|
| 115 |
-
#
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
app = gr.Interface(
|
| 125 |
fn=process_script,
|
| 126 |
inputs=[
|
| 127 |
gr.Textbox(label="Enter Script Text", lines=10),
|
| 128 |
-
gr.Dropdown(label="Select Voice", choices=
|
| 129 |
gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
|
| 130 |
gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
|
| 131 |
],
|
|
@@ -134,7 +143,7 @@ app = gr.Interface(
|
|
| 134 |
gr.File(label="Download Audio File"),
|
| 135 |
gr.Audio(label="Play Audio")
|
| 136 |
],
|
| 137 |
-
description="HIVEcorp TTS Generator with
|
| 138 |
)
|
| 139 |
|
| 140 |
app.launch()
|
|
|
|
| 42 |
|
| 43 |
return segments
|
| 44 |
|
| 45 |
+
# Function to generate SRT with accurate timing per batch and cross-check timing
|
| 46 |
+
async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate):
|
| 47 |
audio_file = f"batch_{batch_num}_audio.wav"
|
| 48 |
|
| 49 |
# Generate the audio using edge-tts with pitch and rate adjustment
|
| 50 |
+
tts = edge_tts.Communicate(batch_text, voice, rate=f"{rate}%", pitch=f"{pitch}Hz")
|
| 51 |
await tts.save(audio_file)
|
| 52 |
|
| 53 |
actual_length = get_audio_length(audio_file)
|
|
|
|
| 58 |
srt_content = ""
|
| 59 |
for index, segment in enumerate(segments):
|
| 60 |
end_time = start_time + segment_duration
|
| 61 |
+
|
| 62 |
if end_time > start_offset + actual_length:
|
| 63 |
end_time = start_offset + actual_length
|
| 64 |
|
|
|
|
| 70 |
|
| 71 |
return srt_content, audio_file, start_time
|
| 72 |
|
| 73 |
+
# Batch processing function with cumulative timing, progress indicator, and final SRT validation
|
| 74 |
+
async def batch_process_srt_and_audio(script_text, pitch, voice, rate, progress=gr.Progress()):
|
| 75 |
batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
|
| 76 |
all_srt_content = ""
|
| 77 |
combined_audio = AudioSegment.empty()
|
| 78 |
start_offset = 0.0
|
| 79 |
|
| 80 |
for batch_num, batch_text in enumerate(batches):
|
| 81 |
+
srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset, pitch, voice, rate)
|
| 82 |
all_srt_content += srt_content
|
| 83 |
|
| 84 |
batch_audio = AudioSegment.from_file(audio_file)
|
|
|
|
| 101 |
validated_srt_content += line + "\n"
|
| 102 |
|
| 103 |
unique_id = uuid.uuid4()
|
| 104 |
+
final_audio_path = f"final_audio_{unique_id}.mp3"
|
| 105 |
final_srt_path = f"final_subtitles_{unique_id}.srt"
|
| 106 |
|
| 107 |
+
combined_audio.export(final_audio_path, format="mp3", bitrate="320k")
|
| 108 |
+
|
| 109 |
with open(final_srt_path, "w") as srt_file:
|
| 110 |
srt_file.write(validated_srt_content)
|
| 111 |
|
| 112 |
return final_srt_path, final_audio_path
|
| 113 |
|
| 114 |
+
# Gradio interface function
|
| 115 |
+
async def process_script(script_text, pitch, voice, rate):
|
| 116 |
+
srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch, voice, rate)
|
| 117 |
return srt_path, audio_path, audio_path
|
| 118 |
|
| 119 |
+
# List of available voices
|
| 120 |
+
voices = {
|
| 121 |
+
"Jenny": "en-US-JennyNeural",
|
| 122 |
+
"Guy": "en-US-GuyNeural",
|
| 123 |
+
"Ana": "en-US-AnaNeural",
|
| 124 |
+
"Aria": "en-US-AriaNeural",
|
| 125 |
+
"Brian": "en-US-BrianNeural",
|
| 126 |
+
"Christopher": "en-US-ChristopherNeural",
|
| 127 |
+
"Eric": "en-US-EricNeural",
|
| 128 |
+
"Michelle": "en-US-MichelleNeural",
|
| 129 |
+
"Roger": "en-US-RogerNeural",
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
# Gradio interface setup with voice selection and speech rate adjustment
|
| 133 |
app = gr.Interface(
|
| 134 |
fn=process_script,
|
| 135 |
inputs=[
|
| 136 |
gr.Textbox(label="Enter Script Text", lines=10),
|
| 137 |
+
gr.Dropdown(label="Select Voice", choices=list(voices.keys()), value="Jenny"),
|
| 138 |
gr.Slider(label="Speech Rate Adjustment (%)", minimum=0, maximum=2, step=0.1, value=1),
|
| 139 |
gr.Slider(label="Pitch Adjustment (Hz)", minimum=-100, maximum=100, step=1, value=1)
|
| 140 |
],
|
|
|
|
| 143 |
gr.File(label="Download Audio File"),
|
| 144 |
gr.Audio(label="Play Audio")
|
| 145 |
],
|
| 146 |
+
description="HIVEcorp TTS Generator with customizable voice, speech rate, and pitch adjustments."
|
| 147 |
)
|
| 148 |
|
| 149 |
app.launch()
|