voice_egyptian / app.py
FILMITO's picture
Create app.py
bef0e2d verified
import gradio as gr
import numpy as np
import librosa
import soundfile as sf
from scipy import signal
import io
def change_voice(audio, voice_preset, custom_pitch, custom_speed):
"""
Transform voice with selected preset or custom settings
"""
if audio is None:
return None, "Please record or upload audio first!"
presets = {
"Sophia (Soft)": {"pitch": 1.5, "speed": 1.0},
"Emma (Professional)": {"pitch": 1.4, "speed": 1.1},
"Olivia (Young)": {"pitch": 1.7, "speed": 1.15},
"Ava (Mature)": {"pitch": 1.3, "speed": 0.95},
"Isabella (Sweet)": {"pitch": 1.6, "speed": 1.05},
"Mia (Dramatic)": {"pitch": 1.55, "speed": 0.9},
"Custom": {"pitch": custom_pitch, "speed": custom_speed}
}
settings = presets[voice_preset]
pitch_factor = settings["pitch"]
speed_factor = settings["speed"]
try:
sr, y = audio
if y.dtype == np.int16:
y = y.astype(np.float32) / 32768.0
elif y.dtype == np.int32:
y = y.astype(np.float32) / 2147483648.0
if len(y.shape) > 1:
y = np.mean(y, axis=1)
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=12 * np.log2(pitch_factor))
if speed_factor != 1.0:
y_shifted = librosa.effects.time_stretch(y_shifted, rate=speed_factor)
if pitch_factor > 1.2:
nyquist = sr / 2
formant_freqs = [800, 1150, 2900, 3900]
for freq in formant_freqs:
if freq < nyquist:
b, a = signal.butter(2, [max(freq-100, 20)/nyquist, min(freq+100, nyquist-1)/nyquist], btype='band')
filtered = signal.filtfilt(b, a, y_shifted)
y_shifted = y_shifted + filtered * 0.1
y_shifted = y_shifted / np.max(np.abs(y_shifted)) * 0.9
y_shifted = (y_shifted * 32768).astype(np.int16)
message = f"βœ… Voice transformed to {voice_preset}!\nPitch: {pitch_factor:.2f}x | Speed: {speed_factor:.2f}x"
return (sr, y_shifted), message
except Exception as e:
return None, f"❌ Error processing audio: {str(e)}"
with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink")) as demo:
gr.Markdown("""
# 🎀 Professional Voice Changer
### Transform your voice with AI-powered female voice presets
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸŽ™οΈ Input Audio")
audio_input = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Record or Upload Audio")
gr.Markdown("### 🎡 Voice Settings")
voice_preset = gr.Radio(
choices=["Sophia (Soft)", "Emma (Professional)", "Olivia (Young)", "Ava (Mature)", "Isabella (Sweet)", "Mia (Dramatic)", "Custom"],
value="Sophia (Soft)",
label="Choose Voice Profile",
info="Select a preset or use Custom for manual control"
)
with gr.Accordion("πŸŽ›οΈ Custom Settings", open=False):
custom_pitch = gr.Slider(minimum=1.0, maximum=2.0, value=1.5, step=0.1, label="Pitch Multiplier", info="Higher = More feminine")
custom_speed = gr.Slider(minimum=0.5, maximum=1.5, value=1.0, step=0.05, label="Speed Multiplier", info="Adjust speaking speed")
transform_btn = gr.Button("✨ Transform Voice", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### πŸ”Š Output Audio")
audio_output = gr.Audio(label="Transformed Voice", type="numpy")
status_output = gr.Textbox(label="Status", lines=3, interactive=False)
gr.Markdown("""
### πŸ“ Voice Profile Details
- **Sophia (Soft)**: Gentle and warm tone
- **Emma (Professional)**: Clear and confident
- **Olivia (Young)**: Energetic and bright
- **Ava (Mature)**: Deep and authoritative
- **Isabella (Sweet)**: Friendly and cheerful
- **Mia (Dramatic)**: Expressive and bold
- **Custom**: Set your own pitch and speed
""")
transform_btn.click(fn=change_voice, inputs=[audio_input, voice_preset, custom_pitch, custom_speed], outputs=[audio_output, status_output])
gr.Markdown("""
---
### πŸ’‘ Tips:
- 🎀 **Recording**: Speak clearly and at normal volume
- πŸ“ **Upload**: Supports WAV, MP3, and other audio formats
- 🎚️ **Pitch**: Range 1.0-2.0 (higher = more feminine)
- ⚑ **Speed**: Range 0.5-1.5 (adjust speaking pace)
- πŸ’Ύ **Download**: Click the download button on the output audio player
""")
if __name__ == "__main__":
demo.launch()