nice / app.py
abhiXai's picture
Update app.py
dcca7d6 verified
import gradio as gr
import edge_tts
import asyncio
import tempfile
import random
import re
from pydub import AudioSegment
from pydub.effects import normalize
# πŸŽ™οΈ Best voices
VOICES = {
"US Female (Best)": "en-US-JennyNeural",
"Indian Female": "en-IN-NeerjaNeural",
"Indian Male": "en-IN-PrabhatNeural"
}
# 🧠 Emotion + intensity (natural)
def add_emotion(text, mood, intensity):
if mood == "Happy":
prefix = "(smiling)" if intensity <= 6 else "(excited)"
elif mood == "Sad":
prefix = "(softly)" if intensity <= 6 else "(low tone)"
elif mood == "Angry":
prefix = "(firm)" if intensity <= 6 else "(serious)"
else:
prefix = ""
return f"{prefix} {text}".strip()
# 🧠 Smart pauses (clean, not overdone)
def smart_pause(text):
text = re.sub(r'\.', '. ', text)
text = re.sub(r',', ', ', text)
text = re.sub(r'\?', '? ', text)
text = re.sub(r'!', '! ', text)
return text
# 🧠 Humanizer (light, not forced)
def humanize(text):
text = smart_pause(text)
# occasional natural filler
if random.random() > 0.6:
text = "hmm... " + text
return text
# πŸ”Š Audio enhancement (clean & natural)
def enhance_audio(file_path):
audio = AudioSegment.from_file(file_path)
# normalize
audio = normalize(audio)
# clarity boost
audio = audio.high_pass_filter(100)
audio = audio.low_pass_filter(4000)
# very light ambience (not echo-heavy)
bg = audio - 35
audio = audio.overlay(bg)
# slight gain
audio = audio + 1
out = file_path.replace(".mp3", "_final.wav")
audio.export(out, format="wav")
return out
# πŸ” TTS generator
async def tts(text, voice):
file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
# controlled variation (natural range only)
rate = random.choice(["-5%", "-6%", "-7%"])
pitch = random.choice(["+1Hz", "+2Hz"])
communicate = edge_tts.Communicate(
text=text,
voice=voice,
rate=rate,
pitch=pitch
)
await communicate.save(file_path)
return file_path
# 🎯 Main function
def generate(text, mood, intensity, voice_name):
if not text.strip():
return None
voice = VOICES[voice_name]
text = add_emotion(text, mood, intensity)
text = humanize(text)
mp3_file = asyncio.run(tts(text, voice))
final_audio = enhance_audio(mp3_file)
return final_audio
# 🎨 UI
with gr.Blocks() as demo:
gr.Markdown("## πŸ”₯ Pro Human-like AI Voice (Free)")
text_input = gr.Textbox(
label="Enter Text",
placeholder="Example: kya tum theek ho?"
)
mood = gr.Dropdown(
["Normal", "Happy", "Sad", "Angry"],
value="Normal",
label="Emotion"
)
intensity = gr.Slider(1, 10, value=5, label="Emotion Intensity")
voice_select = gr.Dropdown(
choices=list(VOICES.keys()),
value="US Female (Best)",
label="Voice"
)
output_audio = gr.Audio(label="Generated Voice")
btn = gr.Button("Generate")
btn.click(generate, inputs=[text_input, mood, intensity, voice_select], outputs=output_audio)
demo.launch()