Update app.py
Browse filesRestore working version without post filter
app.py
CHANGED
|
@@ -12,45 +12,6 @@ import soundfile as sf
|
|
| 12 |
import numpy as np
|
| 13 |
from pydub import AudioSegment
|
| 14 |
from pydub.playback import play
|
| 15 |
-
import math
|
| 16 |
-
from scipy.signal import butter, sosfiltfilt
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def apply_low_pass_filter(audio_segment, cutoff_freq, order=6):
|
| 20 |
-
"""
|
| 21 |
-
Applies a low-pass filter to an AudioSegment.
|
| 22 |
-
|
| 23 |
-
Args:
|
| 24 |
-
audio_segment: The AudioSegment to filter.
|
| 25 |
-
cutoff_freq: The cutoff frequency in Hz.
|
| 26 |
-
order: The order of the Butterworth filter.
|
| 27 |
-
|
| 28 |
-
Returns:
|
| 29 |
-
A new AudioSegment with the filtered audio.
|
| 30 |
-
"""
|
| 31 |
-
segment_array = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 32 |
-
frame_rate = audio_segment.frame_rate
|
| 33 |
-
nyquist_freq = 0.5 * frame_rate
|
| 34 |
-
normalized_cutoff = cutoff_freq / nyquist_freq
|
| 35 |
-
sos = butter(order, normalized_cutoff, btype='low', output='sos')
|
| 36 |
-
filtered_array = sosfiltfilt(sos, segment_array)
|
| 37 |
-
|
| 38 |
-
sample_width = audio_segment.sample_width
|
| 39 |
-
dtype = None
|
| 40 |
-
if sample_width == 1:
|
| 41 |
-
dtype = np.int8
|
| 42 |
-
elif sample_width == 2:
|
| 43 |
-
dtype = np.int16
|
| 44 |
-
elif sample_width == 3:
|
| 45 |
-
dtype = np.int32 # Or potentially a custom type depending on the library
|
| 46 |
-
elif sample_width == 4:
|
| 47 |
-
dtype = np.int32
|
| 48 |
-
|
| 49 |
-
if dtype is not None:
|
| 50 |
-
return audio_segment._spawn(filtered_array.astype(dtype))
|
| 51 |
-
else:
|
| 52 |
-
raise ValueError(f"Unsupported sample width: {sample_width}")
|
| 53 |
-
|
| 54 |
|
| 55 |
|
| 56 |
def get_silence(duration_ms=1000):
|
|
@@ -128,19 +89,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 128 |
match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
|
| 129 |
if match:
|
| 130 |
prefix_pitch = match.group(1)
|
| 131 |
-
|
| 132 |
-
if number_str: # Check if the second group (number part) is not empty
|
| 133 |
-
try:
|
| 134 |
-
number = int(number_str)
|
| 135 |
-
# Now you can use the 'number' variable
|
| 136 |
-
print(f"Prefix: {prefix_pitch}, Number: {number}") # Example usage
|
| 137 |
-
except ValueError as e:
|
| 138 |
-
print(f"Error converting number string to int: {e}")
|
| 139 |
-
number = 0 # Or some other default value
|
| 140 |
-
else:
|
| 141 |
-
number = 0 # Or some other default value if no number is found
|
| 142 |
-
print(f"Prefix: {prefix_pitch}, No number found.") # Example handling
|
| 143 |
-
|
| 144 |
if prefix_pitch in voice_map:
|
| 145 |
current_pitch += number
|
| 146 |
#processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
|
|
@@ -278,32 +227,18 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
| 278 |
os.remove(path)
|
| 279 |
except FileNotFoundError:
|
| 280 |
print(f"Warning: Audio file not found: {path}")
|
| 281 |
-
Rem1='''
|
| 282 |
-
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
| 283 |
-
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
| 284 |
-
if speed_factor > 0:
|
| 285 |
-
if speed_factor < 1.0:
|
| 286 |
-
speed_factor = 1.0
|
| 287 |
-
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
| 288 |
|
| 289 |
-
if combined_line_audio:
|
| 290 |
-
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
| 291 |
-
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
| 292 |
-
'''
|
| 293 |
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
| 294 |
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
| 295 |
if speed_factor > 0:
|
| 296 |
if speed_factor < 1.0:
|
| 297 |
speed_factor = 1.0
|
| 298 |
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
| 299 |
-
# Apply low-pass filter AFTER speed adjustment
|
| 300 |
-
cutoff_freq = 7000.0 # Adjust as needed
|
| 301 |
-
combined_line_audio = apply_low_pass_filter(combined_line_audio, cutoff_freq)
|
| 302 |
|
| 303 |
if combined_line_audio:
|
| 304 |
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
| 305 |
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
| 306 |
-
|
| 307 |
elif audio_paths:
|
| 308 |
for path in audio_paths:
|
| 309 |
if path:
|
|
@@ -311,38 +246,14 @@ async def transcript_to_speech(transcript_text, voice, rate, pitch, speed_adjust
|
|
| 311 |
os.remove(path)
|
| 312 |
except FileNotFoundError:
|
| 313 |
pass # Clean up even if no timestamp
|
| 314 |
-
|
| 315 |
-
|
| 316 |
|
| 317 |
if not timed_audio_segments:
|
| 318 |
return None, "No processable audio segments found."
|
| 319 |
|
| 320 |
-
oldx= '''
|
| 321 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
| 322 |
for segment in timed_audio_segments:
|
| 323 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
| 324 |
-
'''
|
| 325 |
-
final_audio = AudioSegment.silent(duration=int(max_end_time_ms * 1000 + 500), frame_rate=24000)
|
| 326 |
-
|
| 327 |
-
for segment in timed_audio_segments:
|
| 328 |
-
start_position_ms = int(segment['start'] * 1000)
|
| 329 |
-
audio_to_overlay = segment['audio']
|
| 330 |
-
|
| 331 |
-
if start_position_ms + len(audio_to_overlay) > len(final_audio):
|
| 332 |
-
padding_needed = (start_position_ms + len(audio_to_overlay)) - len(final_audio)
|
| 333 |
-
final_audio += AudioSegment.silent(duration=padding_needed + 100, frame_rate=final_audio.frame_rate)
|
| 334 |
-
|
| 335 |
-
try:
|
| 336 |
-
final_audio = final_audio.overlay(audio_to_overlay, position=start_position_ms)
|
| 337 |
-
except Exception as e:
|
| 338 |
-
print(f"Error during overlay: {e}")
|
| 339 |
-
print(f" - Start position (ms): {start_position_ms}")
|
| 340 |
-
print(f" - Length of audio to overlay (ms): {len(audio_to_overlay)}")
|
| 341 |
-
print(f" - Length of final_audio (ms): {len(final_audio)}")
|
| 342 |
-
# Consider adding logic here to handle the error, e.g., truncating audio_to_overlay
|
| 343 |
-
# or skipping the overlay if it consistently fails.
|
| 344 |
|
| 345 |
-
|
| 346 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
| 347 |
final_audio.export(combined_audio_path, format="mp3")
|
| 348 |
return combined_audio_path, None
|
|
|
|
| 12 |
import numpy as np
|
| 13 |
from pydub import AudioSegment
|
| 14 |
from pydub.playback import play
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def get_silence(duration_ms=1000):
|
|
|
|
| 89 |
match = re.search(r'([A-Za-z]+)([-]?\d*)', processed_text)
|
| 90 |
if match:
|
| 91 |
prefix_pitch = match.group(1)
|
| 92 |
+
number = int(match.group(2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
if prefix_pitch in voice_map:
|
| 94 |
current_pitch += number
|
| 95 |
#processed_text = re.sub(r'[A-Za-z]+-?\d+', '', processed_text, count=1).strip()
|
|
|
|
| 227 |
os.remove(path)
|
| 228 |
except FileNotFoundError:
|
| 229 |
print(f"Warning: Audio file not found: {path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
if combined_line_audio and overall_duration_ms is not None and overall_duration_ms > 0 and total_generated_duration_ms > overall_duration_ms:
|
| 232 |
speed_factor = (total_generated_duration_ms / overall_duration_ms) * speed_adjustment_factor
|
| 233 |
if speed_factor > 0:
|
| 234 |
if speed_factor < 1.0:
|
| 235 |
speed_factor = 1.0
|
| 236 |
combined_line_audio = combined_line_audio.speedup(playback_speed=speed_factor)
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
if combined_line_audio:
|
| 239 |
timed_audio_segments.append({'start': start_time, 'audio': combined_line_audio})
|
| 240 |
max_end_time_ms = max(max_end_time_ms, start_time + len(combined_line_audio))
|
| 241 |
+
|
| 242 |
elif audio_paths:
|
| 243 |
for path in audio_paths:
|
| 244 |
if path:
|
|
|
|
| 246 |
os.remove(path)
|
| 247 |
except FileNotFoundError:
|
| 248 |
pass # Clean up even if no timestamp
|
|
|
|
|
|
|
| 249 |
|
| 250 |
if not timed_audio_segments:
|
| 251 |
return None, "No processable audio segments found."
|
| 252 |
|
|
|
|
| 253 |
final_audio = AudioSegment.silent(duration=max_end_time_ms, frame_rate=24000)
|
| 254 |
for segment in timed_audio_segments:
|
| 255 |
final_audio = final_audio.overlay(segment['audio'], position=segment['start'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
|
|
|
| 257 |
combined_audio_path = tempfile.mktemp(suffix=".mp3")
|
| 258 |
final_audio.export(combined_audio_path, format="mp3")
|
| 259 |
return combined_audio_path, None
|