ContentVoiceGen / app.py
Musabbirkm's picture
Update app.py
093a373 verified
raw
history blame
6.94 kB
import gradio as gr
import asyncio
import tempfile
import logging
import requests
from VOCALIS import Agent, ContentGenerator
from edgeTTsLang import languages
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
def generate_the_content(content_type, language,output_style,content_length, theme, expectations):
try:
temperature_map = {
"Precise (Deterministic)": 0.1,
"Very Focused (Low Randomness)": 0.3,
"Moderately Focused (Slight Randomness)": 0.4,
"Balanced (Moderate Creativity)": 0.5,
"Slightly Creative (Moderate Randomness)": 0.6,
"Creative (High Randomness)": 0.7,
"Highly Creative (Very High Randomness)": 0.8,
"Experimental (Maximum Randomness)": 0.95,
}
temperature = temperature_map.get(output_style, 0.6)
agent = Agent(model="gemini-2.0-flash", temperature=temperature)
generator = ContentGenerator(agent, content_type, language, content_length, theme, expectations)
output = generator.generate_content()
return output
except ValueError as ve:
return f"Input Error: {ve}"
except requests.exceptions.ConnectionError:
return "Network Error: Could not connect to API. Please check your internet connection."
except Exception as e:
return f"General Error: {e}"
async def text_to_speech(text, voice, rate, pitch):
import edge_tts
if not text.strip():
return None, "Please enter text to convert."
if not voice:
return None, "Please select a voice."
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
async def tts_interface(content_type, language, voice, output_style, content_length, theme, Customization, rate, pitch):
text_output = generate_the_content(content_type, language, output_style, content_length, theme, Customization)
if text_output.startswith("Error:"):
return None, None, gr.Markdown(text_output)
audio_file, warning = await text_to_speech(text_output, languages[language][voice], rate, pitch)
if warning:
return text_output, gr.Markdown(warning)
return text_output, audio_file, None
def create_demo():
language_choices = list(languages.keys())
custom_theme = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Montserrat"), "Arial", "sans-serif"],
)
demo = gr.Interface(
fn=tts_interface,
theme=custom_theme,
inputs=[
gr.Dropdown(label="Content Type", choices=[
"story", "social", "news", "motivational", "explainer", "advertisement", "interview", "podcast",
"testimonial", "comedy", "audiobook", "documentary", "meditation", "education", "poem", "recipe",
"script", "summary", "email", "blog"
], value="story"),
gr.Dropdown(label="Language", choices=language_choices, value=language_choices[0] if language_choices else ""),
gr.Dropdown(label="Voice", choices=["Female", "Male"], value="Female"),
gr.Dropdown(label="Output Style", choices=[
"Precise (Deterministic)", "Very Focused (Low Randomness)", "Moderately Focused (Slight Randomness)",
"Balanced (Moderate Creativity)", "Slightly Creative (Moderate Randomness)",
"Creative (High Randomness)", "Highly Creative (Very High Randomness)",
"Experimental (Maximum Randomness)"
], value="Balanced (Moderate Creativity)"),
gr.Slider(label="Content Length (Words)", minimum=100, maximum=1000, value=200, step=10),
gr.Dropdown(label="Theme/Nature (Optional)", choices=[
"General/None", "Narrative/Storytelling", "Informative/Educational", "Descriptive/Atmospheric",
"Persuasive/Argumentative", "Humorous/Comedic", "Emotional/Inspirational", "Technical/Scientific",
"Historical/Cultural", "Modern/Contemporary", "Futuristic/Sci-Fi", "Fantasy/Mythical",
"Mystery/Suspense", "Adventure/Exploration", "Realistic/Documentary", "Philosophical/Reflective",
"Social/Relational", "Environmental/Nature", "Personal/Anecdotal"
], value="General/None"),
gr.Textbox(label="Customization", placeholder="Add any extra information to help customize the generated content"),
gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
],
outputs=[
gr.Textbox(label="Generated Text"),
gr.Audio(label="Generated Audio", type="filepath"),
gr.Markdown(label="Error/Warning", visible=True)
],
title="✨ AI VoiceCraft: Text-to-Speech Studio πŸŽ™οΈ",
description="""
**Crafted by MusabirKm**
πŸš€ Transform your text into captivating audio! πŸš€
This tool generates AI-powered content and converts it into lifelike speech using Microsoft Edge TTS.
πŸ”Ή **Features at a Glance:**
🌍 Supports multiple languages and voices
🎚️ Adjust speech rate and pitch for natural delivery
πŸ“ Generate dynamic content: stories, news, podcasts & more
🎭 Customize tone, length, and style to fit your needs
""",
article="""
# 🌟 Welcome to AI VoiceCraft! 🌟
**Unleash the power of AI-driven text-to-speech.**
This advanced application blends **cutting-edge AI content generation** with high-quality speech synthesis to create immersive audio experiences.
## 🎀 Key Highlights:
πŸ”Š Natural and expressive voice output
πŸ“– AI-powered script generation tailored for speech
βš™οΈ Fine-tune pitch, rate, and delivery style
πŸ”— [Discover more AI tools@github/musabbirkm](https://github.com/musabbirkm)
πŸ”— [Follow MusabirKm on Hugging Face](https://huggingface.com/musabbirkm)
""",
allow_flagging="never",
api_name=None,
)
return demo
async def main():
demo = create_demo()
demo.queue(default_concurrency_limit=5)
demo.launch(show_api=False)
if __name__ == "__main__":
try:
asyncio.run(main())
except RuntimeError:
import nest_asyncio
nest_asyncio.apply()
asyncio.run(main())