Spaces:
Sleeping
Sleeping
| import sys | |
| from pydub import AudioSegment | |
| import soundfile as sf | |
| import pyrubberband as pyrb | |
| import numpy as np | |
| from io import BytesIO | |
| def audiosegment_to_librosawav(audiosegment): | |
| channel_sounds = audiosegment.split_to_mono() | |
| samples = [s.get_array_of_samples() for s in channel_sounds] | |
| fp_arr = np.array(samples).T.astype(np.float32) | |
| fp_arr /= np.iinfo(samples[0].typecode).max | |
| fp_arr = fp_arr.reshape(-1) | |
| return fp_arr | |
| def ndarray_to_segment(ndarray, frame_rate): | |
| buffer = BytesIO() | |
| sf.write(buffer, ndarray, frame_rate, format="wav") | |
| buffer.seek(0) | |
| sound = AudioSegment.from_wav( | |
| buffer, | |
| ) | |
| return sound | |
| def time_stretch(input_segment: AudioSegment, time_factor: float) -> AudioSegment: | |
| """ | |
| factor range -> [0.2,10] | |
| """ | |
| time_factor = np.clip(time_factor, 0.2, 10) | |
| sr = input_segment.frame_rate | |
| y = audiosegment_to_librosawav(input_segment) | |
| y_stretch = pyrb.time_stretch(y, sr, time_factor) | |
| sound = ndarray_to_segment( | |
| y_stretch, | |
| frame_rate=sr, | |
| ) | |
| return sound | |
| def pitch_shift( | |
| input_segment: AudioSegment, | |
| pitch_shift_factor: float, | |
| ) -> AudioSegment: | |
| """ | |
| factor range -> [-12,12] | |
| """ | |
| pitch_shift_factor = np.clip(pitch_shift_factor, -12, 12) | |
| sr = input_segment.frame_rate | |
| y = audiosegment_to_librosawav(input_segment) | |
| y_shift = pyrb.pitch_shift(y, sr, pitch_shift_factor) | |
| sound = ndarray_to_segment( | |
| y_shift, | |
| frame_rate=sr, | |
| ) | |
| return sound | |
| def apply_prosody_to_audio_data( | |
| audio_data: np.ndarray, rate: float, volume: float, pitch: float, sr: int | |
| ) -> np.ndarray: | |
| if rate != 1: | |
| audio_data = pyrb.time_stretch(audio_data, sr=sr, rate=rate) | |
| if volume != 0: | |
| audio_data = audio_data * volume | |
| if pitch != 0: | |
| audio_data = pyrb.pitch_shift(audio_data, sr=sr, n_steps=pitch) | |
| return audio_data | |
| if __name__ == "__main__": | |
| input_file = sys.argv[1] | |
| time_stretch_factors = [0.5, 0.75, 1.5, 1.0] | |
| pitch_shift_factors = [-12, -5, 0, 5, 12] | |
| input_sound = AudioSegment.from_mp3(input_file) | |
| for time_factor in time_stretch_factors: | |
| output_wav = f"time_stretched_{int(time_factor * 100)}.wav" | |
| sound = time_stretch(input_sound, time_factor) | |
| sound.export(output_wav, format="wav") | |
| for pitch_factor in pitch_shift_factors: | |
| output_wav = f"pitch_shifted_{int(pitch_factor * 100)}.wav" | |
| sound = pitch_shift(input_sound, pitch_factor) | |
| sound.export(output_wav, format="wav") | |