Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,7 +54,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 54 |
current_rate = rate
|
| 55 |
current_pitch = pitch
|
| 56 |
processed_text = text_segment.strip()
|
| 57 |
-
print(f"Processing this text segment: {processed_text}") # Debug
|
| 58 |
voice_map = {
|
| 59 |
"1F": "en-GB-SoniaNeural",
|
| 60 |
"2M": "en-GB-RyanNeural",
|
|
@@ -102,12 +102,23 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 102 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 103 |
audio_path = tmp_file.name
|
| 104 |
await communicate.save(audio_path)
|
| 105 |
-
|
|
|
|
| 106 |
audio = AudioSegment.from_mp3(audio_path)
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
return audio_path
|
| 111 |
except Exception as e:
|
| 112 |
print(f"Edge TTS error processing '{processed_text}': {e}")
|
| 113 |
return None
|
|
@@ -124,7 +135,6 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
|
|
| 124 |
int(start_s) * 1000 +
|
| 125 |
int(start_ms)
|
| 126 |
)
|
| 127 |
-
|
| 128 |
audio_segments = []
|
| 129 |
split_parts = re.split(r'[“”"]', text_parts)
|
| 130 |
process_next = False
|
|
@@ -140,7 +150,28 @@ async def process_transcript_line(line, next_line_start_time, default_voice, rat
|
|
| 140 |
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
|
| 141 |
if audio_path:
|
| 142 |
audio_segments.append(audio_path)
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
return None, None, None
|
| 145 |
|
| 146 |
async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):
|
|
|
|
| 54 |
current_rate = rate
|
| 55 |
current_pitch = pitch
|
| 56 |
processed_text = text_segment.strip()
|
| 57 |
+
print(f"Processing this text segment: '{processed_text}'") # Debug
|
| 58 |
voice_map = {
|
| 59 |
"1F": "en-GB-SoniaNeural",
|
| 60 |
"2M": "en-GB-RyanNeural",
|
|
|
|
| 102 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 103 |
audio_path = tmp_file.name
|
| 104 |
await communicate.save(audio_path)
|
| 105 |
+
|
| 106 |
+
if os.path.exists(audio_path):
|
| 107 |
audio = AudioSegment.from_mp3(audio_path)
|
| 108 |
+
# Trim leading and trailing silence
|
| 109 |
+
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
|
| 110 |
+
trim_ms = 0
|
| 111 |
+
assert chunk_size > 0 # to avoid infinite loop
|
| 112 |
+
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
|
| 113 |
+
trim_ms += chunk_size
|
| 114 |
+
return trim_ms
|
| 115 |
+
|
| 116 |
+
start_trim = detect_leading_silence(audio)
|
| 117 |
+
end_trim = detect_leading_silence(audio.reverse())
|
| 118 |
+
trimmed_audio = audio[start_trim:len(audio)-end_trim]
|
| 119 |
+
trimmed_audio.export(audio_path, format="mp3") # Overwrite with trimmed version
|
| 120 |
+
return audio_path
|
| 121 |
|
|
|
|
| 122 |
except Exception as e:
|
| 123 |
print(f"Edge TTS error processing '{processed_text}': {e}")
|
| 124 |
return None
|
|
|
|
| 135 |
int(start_s) * 1000 +
|
| 136 |
int(start_ms)
|
| 137 |
)
|
|
|
|
| 138 |
audio_segments = []
|
| 139 |
split_parts = re.split(r'[“”"]', text_parts)
|
| 140 |
process_next = False
|
|
|
|
| 150 |
audio_path = await generate_audio_with_voice_prefix(part, default_voice, rate, pitch, overall_duration_ms, speed_adjustment_factor)
|
| 151 |
if audio_path:
|
| 152 |
audio_segments.append(audio_path)
|
| 153 |
+
|
| 154 |
+
if audio_segments:
|
| 155 |
+
combined_audio = AudioSegment.empty()
|
| 156 |
+
for segment_path in audio_segments:
|
| 157 |
+
try:
|
| 158 |
+
segment = AudioSegment.from_mp3(segment_path)
|
| 159 |
+
combined_audio += segment
|
| 160 |
+
os.remove(segment_path) # Clean up individual segment files
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"Error loading or combining audio segment {segment_path}: {e}")
|
| 163 |
+
return None, None, None
|
| 164 |
+
|
| 165 |
+
combined_audio_path = f"combined_audio_{start_time_ms}.mp3"
|
| 166 |
+
try:
|
| 167 |
+
combined_audio.export(combined_audio_path, format="mp3")
|
| 168 |
+
return start_time_ms, [combined_audio_path], overall_duration_ms
|
| 169 |
+
except Exception as e:
|
| 170 |
+
print(f"Error exporting combined audio: {e}")
|
| 171 |
+
return None, None, None
|
| 172 |
+
|
| 173 |
+
return start_time_ms, [], overall_duration_ms # Return empty list if no audio generated
|
| 174 |
+
|
| 175 |
return None, None, None
|
| 176 |
|
| 177 |
async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjustment_factor):
|