Update app.py
Browse filesMulti send to Edge to speed up
app.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
## Low pass filtering applied to final Audio
|
| 2 |
-
|
| 3 |
import spaces
|
| 4 |
import gradio as gr
|
| 5 |
import edge_tts
|
|
@@ -36,9 +34,9 @@ def apply_low_pass_filter(audio_segment, cutoff_freq, sample_rate, order=5):
|
|
| 36 |
|
| 37 |
filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
|
| 38 |
filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
return filtered_audio
|
| 43 |
|
| 44 |
|
|
@@ -113,7 +111,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 113 |
processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
|
| 114 |
break
|
| 115 |
#match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
|
| 116 |
-
match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
|
| 117 |
if match:
|
| 118 |
#prefix_pitch = match.group(1)
|
| 119 |
number = match.group(1)
|
|
@@ -123,7 +121,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 123 |
#processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
|
| 124 |
processed_text = match.group(2)
|
| 125 |
#elif detect:
|
| 126 |
-
#
|
| 127 |
|
| 128 |
if processed_text:
|
| 129 |
rate_str = f"{current_rate:+d}%"
|
|
@@ -281,10 +279,9 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
| 281 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
| 282 |
for segment in timed_audio_segments:
|
| 283 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
| 284 |
-
|
| 285 |
# Apply the low-pass filter here
|
| 286 |
cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
|
| 287 |
-
print(f"Applying Low pass filter, cut off frequency: {cutoff_frequency}")
|
| 288 |
filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
|
| 289 |
|
| 290 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
|
@@ -292,7 +289,6 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
| 292 |
filtered_final_audio.export(combined_audio_path, format="mp3")
|
| 293 |
return combined_audio_path, None
|
| 294 |
|
| 295 |
-
|
| 296 |
@spaces.GPU
|
| 297 |
def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
|
| 298 |
audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
|
|
@@ -352,4 +348,4 @@ async def create_demo():
|
|
| 352 |
|
| 353 |
if __name__ == "__main__":
|
| 354 |
demo = asyncio.run(create_demo())
|
| 355 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
| 1 |
import spaces
|
| 2 |
import gradio as gr
|
| 3 |
import edge_tts
|
|
|
|
| 34 |
|
| 35 |
filtered_data_int16 = (filtered_data * (2**15 - 1)).astype(np.int16)
|
| 36 |
filtered_audio = AudioSegment(filtered_data_int16.tobytes(),
|
| 37 |
+
frame_rate=sample_rate,
|
| 38 |
+
sample_width=audio_segment.sample_width,
|
| 39 |
+
channels=audio_segment.channels)
|
| 40 |
return filtered_audio
|
| 41 |
|
| 42 |
|
|
|
|
| 111 |
processed_text = processed_text[len(prefix):].strip() #this removes the Prefix and leave only number or text after it.
|
| 112 |
break
|
| 113 |
#match = re.search(r'([A-Za-z]+)-?(\d+)', processed_text)
|
| 114 |
+
match = re.search(r"^(-?\d+)\s*(.*)", processed_text)
|
| 115 |
if match:
|
| 116 |
#prefix_pitch = match.group(1)
|
| 117 |
number = match.group(1)
|
|
|
|
| 121 |
#processed_text = re.sub(r'([A-Za-z]+)([-]?\d*)', '', processed_text, count=1).strip()
|
| 122 |
processed_text = match.group(2)
|
| 123 |
#elif detect:
|
| 124 |
+
# processed_text = processed_text.lstrip('-0123456789').strip() # Remove potential leftover numbers
|
| 125 |
|
| 126 |
if processed_text:
|
| 127 |
rate_str = f"{current_rate:+d}%"
|
|
|
|
| 279 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
| 280 |
for segment in timed_audio_segments:
|
| 281 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
| 282 |
+
|
| 283 |
# Apply the low-pass filter here
|
| 284 |
cutoff_frequency = 3500 # 3.5 kHz (you can make this a user-configurable parameter later)
|
|
|
|
| 285 |
filtered_final_audio = apply_low_pass_filter(final_audio, cutoff_frequency, final_audio.frame_rate)
|
| 286 |
|
| 287 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
|
|
|
| 289 |
filtered_final_audio.export(combined_audio_path, format="mp3")
|
| 290 |
return combined_audio_path, None
|
| 291 |
|
|
|
|
| 292 |
@spaces.GPU
|
| 293 |
def tts_interface(transcript, voice, rate, pitch, speed_adjustment_factor):
|
| 294 |
audio, warning = asyncio.run(transcript_to_speech(transcript, voice, rate, pitch, speed_adjustment_factor))
|
|
|
|
| 348 |
|
| 349 |
if __name__ == "__main__":
|
| 350 |
demo = asyncio.run(create_demo())
|
| 351 |
+
demo.launch()
|