import gradio as gr import edge_tts import asyncio import tempfile import random import re from pydub import AudioSegment from pydub.effects import normalize # 🎙️ Best voices VOICES = { "US Female (Best)": "en-US-JennyNeural", "Indian Female": "en-IN-NeerjaNeural", "Indian Male": "en-IN-PrabhatNeural" } # 🧠 Emotion + intensity (natural) def add_emotion(text, mood, intensity): if mood == "Happy": prefix = "(smiling)" if intensity <= 6 else "(excited)" elif mood == "Sad": prefix = "(softly)" if intensity <= 6 else "(low tone)" elif mood == "Angry": prefix = "(firm)" if intensity <= 6 else "(serious)" else: prefix = "" return f"{prefix} {text}".strip() # 🧠 Smart pauses (clean, not overdone) def smart_pause(text): text = re.sub(r'\.', '. ', text) text = re.sub(r',', ', ', text) text = re.sub(r'\?', '? ', text) text = re.sub(r'!', '! ', text) return text # 🧠 Humanizer (light, not forced) def humanize(text): text = smart_pause(text) # occasional natural filler if random.random() > 0.6: text = "hmm... " + text return text # 🔊 Audio enhancement (clean & natural) def enhance_audio(file_path): audio = AudioSegment.from_file(file_path) # normalize audio = normalize(audio) # clarity boost audio = audio.high_pass_filter(100) audio = audio.low_pass_filter(4000) # very light ambience (not echo-heavy) bg = audio - 35 audio = audio.overlay(bg) # slight gain audio = audio + 1 out = file_path.replace(".mp3", "_final.wav") audio.export(out, format="wav") return out # 🔁 TTS generator async def tts(text, voice): file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name # controlled variation (natural range only) rate = random.choice(["-5%", "-6%", "-7%"]) pitch = random.choice(["+1Hz", "+2Hz"]) communicate = edge_tts.Communicate( text=text, voice=voice, rate=rate, pitch=pitch ) await communicate.save(file_path) return file_path # 🎯 Main function def generate(text, mood, intensity, voice_name): if not text.strip(): return None voice = VOICES[voice_name] text = add_emotion(text, mood, intensity) text = humanize(text) mp3_file = asyncio.run(tts(text, voice)) final_audio = enhance_audio(mp3_file) return final_audio # 🎨 UI with gr.Blocks() as demo: gr.Markdown("## 🔥 Pro Human-like AI Voice (Free)") text_input = gr.Textbox( label="Enter Text", placeholder="Example: kya tum theek ho?" ) mood = gr.Dropdown( ["Normal", "Happy", "Sad", "Angry"], value="Normal", label="Emotion" ) intensity = gr.Slider(1, 10, value=5, label="Emotion Intensity") voice_select = gr.Dropdown( choices=list(VOICES.keys()), value="US Female (Best)", label="Voice" ) output_audio = gr.Audio(label="Generated Voice") btn = gr.Button("Generate") btn.click(generate, inputs=[text_input, mood, intensity, voice_select], outputs=output_audio) demo.launch()