Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| from scipy import signal | |
| import io | |
| def change_voice(audio, voice_preset, custom_pitch, custom_speed): | |
| """ | |
| Transform voice with selected preset or custom settings | |
| """ | |
| if audio is None: | |
| return None, "Please record or upload audio first!" | |
| presets = { | |
| "Sophia (Soft)": {"pitch": 1.5, "speed": 1.0}, | |
| "Emma (Professional)": {"pitch": 1.4, "speed": 1.1}, | |
| "Olivia (Young)": {"pitch": 1.7, "speed": 1.15}, | |
| "Ava (Mature)": {"pitch": 1.3, "speed": 0.95}, | |
| "Isabella (Sweet)": {"pitch": 1.6, "speed": 1.05}, | |
| "Mia (Dramatic)": {"pitch": 1.55, "speed": 0.9}, | |
| "Custom": {"pitch": custom_pitch, "speed": custom_speed} | |
| } | |
| settings = presets[voice_preset] | |
| pitch_factor = settings["pitch"] | |
| speed_factor = settings["speed"] | |
| try: | |
| sr, y = audio | |
| if y.dtype == np.int16: | |
| y = y.astype(np.float32) / 32768.0 | |
| elif y.dtype == np.int32: | |
| y = y.astype(np.float32) / 2147483648.0 | |
| if len(y.shape) > 1: | |
| y = np.mean(y, axis=1) | |
| y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=12 * np.log2(pitch_factor)) | |
| if speed_factor != 1.0: | |
| y_shifted = librosa.effects.time_stretch(y_shifted, rate=speed_factor) | |
| if pitch_factor > 1.2: | |
| nyquist = sr / 2 | |
| formant_freqs = [800, 1150, 2900, 3900] | |
| for freq in formant_freqs: | |
| if freq < nyquist: | |
| b, a = signal.butter(2, [max(freq-100, 20)/nyquist, min(freq+100, nyquist-1)/nyquist], btype='band') | |
| filtered = signal.filtfilt(b, a, y_shifted) | |
| y_shifted = y_shifted + filtered * 0.1 | |
| y_shifted = y_shifted / np.max(np.abs(y_shifted)) * 0.9 | |
| y_shifted = (y_shifted * 32768).astype(np.int16) | |
| message = f"β Voice transformed to {voice_preset}!\nPitch: {pitch_factor:.2f}x | Speed: {speed_factor:.2f}x" | |
| return (sr, y_shifted), message | |
| except Exception as e: | |
| return None, f"β Error processing audio: {str(e)}" | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink")) as demo: | |
| gr.Markdown(""" | |
| # π€ Professional Voice Changer | |
| ### Transform your voice with AI-powered female voice presets | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### ποΈ Input Audio") | |
| audio_input = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Record or Upload Audio") | |
| gr.Markdown("### π΅ Voice Settings") | |
| voice_preset = gr.Radio( | |
| choices=["Sophia (Soft)", "Emma (Professional)", "Olivia (Young)", "Ava (Mature)", "Isabella (Sweet)", "Mia (Dramatic)", "Custom"], | |
| value="Sophia (Soft)", | |
| label="Choose Voice Profile", | |
| info="Select a preset or use Custom for manual control" | |
| ) | |
| with gr.Accordion("ποΈ Custom Settings", open=False): | |
| custom_pitch = gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.1, label="Pitch Multiplier", info="Higher = More feminine") | |
| custom_speed = gr.Slider(minimum=0.5, maximum=1.5, value=1.0, step=0.05, label="Speed Multiplier", info="Adjust speaking speed") | |
| transform_btn = gr.Button("β¨ Transform Voice", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Output Audio") | |
| audio_output = gr.Audio(label="Transformed Voice", type="numpy") | |
| status_output = gr.Textbox(label="Status", lines=3, interactive=False) | |
| gr.Markdown(""" | |
| ### π Voice Profile Details | |
| - **Sophia (Soft)**: Gentle and warm tone | |
| - **Emma (Professional)**: Clear and confident | |
| - **Olivia (Young)**: Energetic and bright | |
| - **Ava (Mature)**: Deep and authoritative | |
| - **Isabella (Sweet)**: Friendly and cheerful | |
| - **Mia (Dramatic)**: Expressive and bold | |
| - **Custom**: Set your own pitch and speed | |
| """) | |
| transform_btn.click(fn=change_voice, inputs=[audio_input, voice_preset, custom_pitch, custom_speed], outputs=[audio_output, status_output]) | |
| gr.Markdown(""" | |
| --- | |
| ### π‘ Tips: | |
| - π€ **Recording**: Speak clearly and at normal volume | |
| - π **Upload**: Supports WAV, MP3, and other audio formats | |
| - ποΈ **Pitch**: Range 1.0-2.0 (higher = more feminine) | |
| - β‘ **Speed**: Range 0.5-1.5 (adjust speaking pace) | |
| - πΎ **Download**: Click the download button on the output audio player | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |