Update app.py
Browse filesattempt to fix voice distortion when stretch is applied
app.py
CHANGED
|
@@ -102,7 +102,7 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 102 |
if target_duration_ms is not None and os.path.exists(audio_path):
|
| 103 |
audio = AudioSegment.from_mp3(audio_path)
|
| 104 |
audio_duration_ms = len(audio)
|
| 105 |
-
|
| 106 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
| 107 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
| 108 |
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
|
@@ -111,7 +111,24 @@ async def generate_audio_with_voice_prefix(text_segment, default_voice, rate, pi
|
|
| 111 |
speed_factor = 1.0
|
| 112 |
y, sr = librosa.load(audio_path, sr=None)
|
| 113 |
y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
sf.write(audio_path, y_stretched, sr)
|
|
|
|
| 115 |
else:
|
| 116 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|
| 117 |
return audio_path
|
|
|
|
| 102 |
if target_duration_ms is not None and os.path.exists(audio_path):
|
| 103 |
audio = AudioSegment.from_mp3(audio_path)
|
| 104 |
audio_duration_ms = len(audio)
|
| 105 |
+
""" #print(f"Generated audio duration: {audio_duration_ms}ms, Target duration: {target_duration_ms}ms") # Debug
|
| 106 |
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
| 107 |
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
| 108 |
#print(f"Speed factor (after user adjustment): {speed_factor}") # Debug
|
|
|
|
| 111 |
speed_factor = 1.0
|
| 112 |
y, sr = librosa.load(audio_path, sr=None)
|
| 113 |
y_stretched = librosa.effects.time_stretch(y, rate=speed_factor)
|
| 114 |
+
sf.write(audio_path, y_stretched, sr) """
|
| 115 |
+
|
| 116 |
+
if audio_duration_ms > target_duration_ms and target_duration_ms > 0:
|
| 117 |
+
speed_factor = (audio_duration_ms / target_duration_ms) * speed_adjustment_factor
|
| 118 |
+
if speed_factor > 0:
|
| 119 |
+
if speed_factor < 1.0:
|
| 120 |
+
speed_factor = 1.0
|
| 121 |
+
y, sr = librosa.load(audio_path, sr=None)
|
| 122 |
+
|
| 123 |
+
# Use the phase vocoder for time stretching without pitch change
|
| 124 |
+
hop_length = 512 # You can adjust this parameter
|
| 125 |
+
phase_vocoder_output = librosa.phase_vocoder(y, rate=speed_factor, hop_length=hop_length)
|
| 126 |
+
|
| 127 |
+
# Reconstruct the audio signal from the phase vocoder output
|
| 128 |
+
y_stretched = librosa.istft(phase_vocoder_output, hop_length=hop_length, length=len(y) if speed_factor < 1 else None)
|
| 129 |
+
|
| 130 |
sf.write(audio_path, y_stretched, sr)
|
| 131 |
+
|
| 132 |
else:
|
| 133 |
print("Generated audio is not longer than target duration, no speed adjustment.") # Debug
|
| 134 |
return audio_path
|