import librosa import numpy as np import soundfile as sf import gradio as gr import argparse def auto_tune(audio_path, target_pitch=440.0): # Load the audio file y, sr = librosa.load(audio_path) # Extract pitch using librosa's piptrack method pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr) # Convert 2D pitch and magnitude arrays to 1D pitches = pitches[magnitudes > np.median(magnitudes)] # Filter out zero values (unvoiced frames) pitches = pitches[pitches > 0] if len(pitches) == 0: raise ValueError("No pitch detected in the audio. Ensure the input audio contains a detectable pitch.") # Calculate the tuning ratio to shift the pitch to the target pitch median_pitch = np.median(pitches) tuning_ratio = target_pitch / median_pitch # Adjust the pitch of the audio y_tuned = librosa.effects.pitch_shift(y, sr, n_steps=np.log2(tuning_ratio)) return y_tuned, sr def run_gradio(): def gradio_interface(audio, target_pitch): if audio is None: return "Error: No audio input provided." try: # Save the input audio file audio_path = "input_audio.wav" sf.write(audio_path, audio[1], audio[0]) # Apply auto-tune y_tuned, sr = auto_tune(audio_path, target_pitch=target_pitch) # Return the tuned audio to Gradio for playback return (sr, y_tuned) except Exception as e: return f"Error occurred: {str(e)}" iface = gr.Interface( fn=gradio_interface, inputs=[gr.Audio(type="numpy"), gr.Number(value=440.0, label="Target Pitch")], outputs=gr.Audio(type="numpy", label="Tuned Audio"), title="Auto-Tune Voice", description="Upload or record your voice and apply auto-tune.", ) iface.launch() if __name__ == "__main__": run_gradio()