Spaces:

abhiXai
/

nice

Running

App Files Files Community

nice / app.py

abhiXai

Update app.py

dcca7d6 verified 4 days ago

raw

history blame contribute delete

3.19 kB

	import gradio as gr
	import edge_tts
	import asyncio
	import tempfile
	import random
	import re
	from pydub import AudioSegment
	from pydub.effects import normalize

	# 🎙️ Best voices
	VOICES = {
	"US Female (Best)": "en-US-JennyNeural",
	"Indian Female": "en-IN-NeerjaNeural",
	"Indian Male": "en-IN-PrabhatNeural"
	}

	# 🧠 Emotion + intensity (natural)
	def add_emotion(text, mood, intensity):
	if mood == "Happy":
	prefix = "(smiling)" if intensity <= 6 else "(excited)"
	elif mood == "Sad":
	prefix = "(softly)" if intensity <= 6 else "(low tone)"
	elif mood == "Angry":
	prefix = "(firm)" if intensity <= 6 else "(serious)"
	else:
	prefix = ""

	return f"{prefix} {text}".strip()

	# 🧠 Smart pauses (clean, not overdone)
	def smart_pause(text):
	text = re.sub(r'\.', '. ', text)
	text = re.sub(r',', ', ', text)
	text = re.sub(r'\?', '? ', text)
	text = re.sub(r'!', '! ', text)
	return text

	# 🧠 Humanizer (light, not forced)
	def humanize(text):
	text = smart_pause(text)

	# occasional natural filler
	if random.random() > 0.6:
	text = "hmm... " + text

	return text

	# 🔊 Audio enhancement (clean & natural)
	def enhance_audio(file_path):
	audio = AudioSegment.from_file(file_path)

	# normalize
	audio = normalize(audio)

	# clarity boost
	audio = audio.high_pass_filter(100)
	audio = audio.low_pass_filter(4000)

	# very light ambience (not echo-heavy)
	bg = audio - 35
	audio = audio.overlay(bg)

	# slight gain
	audio = audio + 1

	out = file_path.replace(".mp3", "_final.wav")
	audio.export(out, format="wav")
	return out

	# 🔁 TTS generator
	async def tts(text, voice):
	file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name

	# controlled variation (natural range only)
	rate = random.choice(["-5%", "-6%", "-7%"])
	pitch = random.choice(["+1Hz", "+2Hz"])

	communicate = edge_tts.Communicate(
	text=text,
	voice=voice,
	rate=rate,
	pitch=pitch
	)

	await communicate.save(file_path)
	return file_path

	# 🎯 Main function
	def generate(text, mood, intensity, voice_name):
	if not text.strip():
	return None

	voice = VOICES[voice_name]

	text = add_emotion(text, mood, intensity)
	text = humanize(text)

	mp3_file = asyncio.run(tts(text, voice))
	final_audio = enhance_audio(mp3_file)

	return final_audio

	# 🎨 UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🔥 Pro Human-like AI Voice (Free)")

	text_input = gr.Textbox(
	label="Enter Text",
	placeholder="Example: kya tum theek ho?"
	)

	mood = gr.Dropdown(
	["Normal", "Happy", "Sad", "Angry"],
	value="Normal",
	label="Emotion"
	)

	intensity = gr.Slider(1, 10, value=5, label="Emotion Intensity")

	voice_select = gr.Dropdown(
	choices=list(VOICES.keys()),
	value="US Female (Best)",
	label="Voice"
	)

	output_audio = gr.Audio(label="Generated Voice")

	btn = gr.Button("Generate")
	btn.click(generate, inputs=[text_input, mood, intensity, voice_select], outputs=output_audio)

	demo.launch()