Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

App Files Files Community

maria355 commited on Sep 14, 2025

Commit

7746fc0

verified ·

1 Parent(s): b820505

Update app.py

Browse files

Files changed (1) hide show

app.py +340 -663

app.py CHANGED Viewed

@@ -2,726 +2,403 @@ import streamlit as st
 import speech_recognition as sr
 import requests
 import io
-import base64
 from PIL import Image
 import google.generativeai as genai
 import time
-import json
 import os
 from datetime import datetime
-# Page configuration
 st.set_page_config(
-    page_title="VociArt - Voice-Controlled AI Creator",
-    page_icon="🎤",
-    layout="wide"
 )
-# Custom CSS for better UI
-st.markdown("""
-<style>
-    .main-header {
-        text-align: center;
-        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-        -webkit-background-clip: text;
-        -webkit-text-fill-color: transparent;
-        font-size: 3rem;
-        font-weight: bold;
-        margin-bottom: 2rem;
-    }
-    .feature-box {
-        background: #f0f2f6;
-        padding: 20px;
-        border-radius: 10px;
-        margin: 10px 0;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    .output-box {
-        background: #ffffff;
-        border: 2px solid #e0e0e0;
-        padding: 20px;
-        border-radius: 10px;
-        margin: 10px 0;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    .status-box {
-        background: #e8f5e8;
-        border: 1px solid #4caf50;
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    .warning-box {
-        background: #fff3cd;
-        border: 1px solid #ffc107;
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-</style>
-""", unsafe_allow_html=True)
 # Initialize session state
-if 'text_output' not in st.session_state:
-    st.session_state.text_output = ""
-if 'image_output' not in st.session_state:
-    st.session_state.image_output = None
-if 'voice_input' not in st.session_state:
-    st.session_state.voice_input = ""
-if 'processed_prompt' not in st.session_state:
-    st.session_state.processed_prompt = ""
-if 'hf_token' not in st.session_state:
-    st.session_state.hf_token = ""
-# Initialize Gemini API
-def initialize_gemini():
-    """Initialize Gemini API with secret key"""
     try:
-        gemini_key = st.secrets.get("GEMINI_API_KEY")
-        if gemini_key:
-            genai.configure(api_key=gemini_key)
-            return True
         else:
-            st.error("⚠️ Gemini API key not found in secrets. Please configure GEMINI_API_KEY in your Streamlit secrets.")
-            return False
     except Exception as e:
-        st.error(f"❌ Error initializing Gemini API: {str(e)}")
-        return False
-# Hugging Face API configuration
-HF_API_URL_TEXT = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-medium"
-HF_API_URL_IMAGE = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
-def query_huggingface(api_url, payload, headers):
-    """Query Hugging Face API with retry logic"""
-    max_retries = 3
-    for attempt in range(max_retries):
-        try:
-            response = requests.post(api_url, headers=headers, json=payload, timeout=30)
-            if response.status_code == 503:
-                # Model is loading, wait and retry
-                st.warning(f"🔄 Model is loading... Attempt {attempt + 1}/{max_retries}")
-                time.sleep(20)  # Wait for model to load
-                continue
-            elif response.status_code == 200:
-                return response
-            else:
-                st.error(f"❌ API Error: {response.status_code} - {response.text}")
-                return None
-        except requests.exceptions.Timeout:
-            st.warning(f"⏰ Request timeout. Attempt {attempt + 1}/{max_retries}")
-            time.sleep(5)
-        except Exception as e:
-            st.error(f"❌ Request failed: {str(e)}")
             return None
-    return None
-def generate_text_content(prompt, content_type="general", hf_token=""):
-    """Generate text content using Hugging Face free models"""
-    if not hf_token:
-        st.warning("⚠️ Hugging Face token required for text generation")
-        return generate_fallback_content(prompt, content_type)
-    # Enhanced prompts based on content type
-    prompt_templates = {
-        "blog": f"Write a comprehensive blog post about '{prompt}'. Include an engaging introduction, detailed main points with examples, and a strong conclusion. Make it informative and well-structured.",
-        "social": f"Create an engaging social media post about '{prompt}'. Make it catchy, shareable, and include relevant hashtags. Keep it concise but impactful.",
-        "story": f"Write a creative short story about '{prompt}'. Include vivid descriptions, interesting characters, and an engaging narrative with a clear beginning, middle, and end.",
-        "caption": f"Write a compelling and creative caption about '{prompt}'. Make it engaging, relevant, and suitable for social media sharing.",
-        "general": f"Create detailed, informative content about '{prompt}'. Provide comprehensive information with clear explanations and useful insights."
-    }
-    enhanced_prompt = prompt_templates.get(content_type, prompt_templates["general"])
-    # Try different models for better text generation
-    text_models = [
-        "microsoft/DialoGPT-large",
-        "facebook/blenderbot-400M-distill",
-        "microsoft/DialoGPT-medium",
-        "gpt2"
-    ]
-    headers = {"Authorization": f"Bearer {hf_token}"}
-    for model in text_models:
-        try:
-            api_url = f"https://api-inference.huggingface.co/models/{model}"
-            payload = {
-                "inputs": enhanced_prompt,
-                "parameters": {
-                    "max_length": 800,
-                    "temperature": 0.8,
-                    "do_sample": True,
-                    "top_p": 0.9,
-                    "repetition_penalty": 1.2
-                }
-            }
-            with st.spinner(f"🤖 Generating {content_type} content using {model}..."):
-                response = query_huggingface(api_url, payload, headers)
-                if response and response.status_code == 200:
-                    result = response.json()
-                    if isinstance(result, list) and len(result) > 0:
-                        generated_text = result[0].get('generated_text', '')
-                        # Clean up the generated text
-                        if generated_text and len(generated_text.strip()) > 50:
-                            return clean_generated_text(generated_text, enhanced_prompt)
-                    elif isinstance(result, dict):
-                        generated_text = result.get('generated_text', result.get('response', ''))
-                        if generated_text and len(generated_text.strip()) > 50:
-                            return clean_generated_text(generated_text, enhanced_prompt)
-        except Exception as e:
-            st.warning(f"⚠️ Model {model} failed: {str(e)}")
-            continue
-    # Fallback: Generate structured content
-    st.info("🔄 Using fallback content generation...")
-    return generate_fallback_content(prompt, content_type)
-def clean_generated_text(text, original_prompt):
-    """Clean and format generated text"""
-    # Remove the original prompt from the beginning if it's repeated
-    if text.startswith(original_prompt):
-        text = text[len(original_prompt):].strip()
-    # Remove excessive repetition and clean up
-    lines = text.split('\n')
-    cleaned_lines = []
-    for line in lines:
-        line = line.strip()
-        if line and line not in cleaned_lines[-3:]:  # Avoid immediate repetition
-            cleaned_lines.append(line)
-    return '\n'.join(cleaned_lines)
-def generate_fallback_content(prompt, content_type):
-    """Generate fallback content when API fails"""
-    templates = {
-        "blog": f"""# {prompt.title()}
-## Introduction
-{prompt} is a fascinating topic that deserves comprehensive exploration. In today's rapidly evolving world, understanding {prompt} has become increasingly important for various reasons.
-## Key Insights
-• **Foundation**: The fundamental aspects of {prompt} provide essential knowledge that forms the basis for deeper understanding.
-• **Applications**: Real-world applications of {prompt} demonstrate its practical value and relevance in different contexts.
-• **Impact**: The influence of {prompt} extends beyond its immediate scope, affecting related areas and creating ripple effects.
-• **Future Perspectives**: Looking ahead, {prompt} continues to evolve and adapt, presenting new opportunities and challenges.
-## Practical Considerations
-When dealing with {prompt}, it's important to consider multiple perspectives and approaches. Each situation may require different strategies and solutions.
-## Conclusion
-{prompt} remains a significant and evolving topic. By understanding its various dimensions, we can better appreciate its importance and make informed decisions related to it.
-*This content serves as a starting point for deeper exploration of {prompt}.*""",
-        "social": f"""🌟 Exploring the fascinating world of {prompt}! ✨
-There's something truly captivating about {prompt} that deserves our attention. Whether you're just discovering it or deepening your understanding, there's always more to learn! 🚀
-💡 Key takeaway: {prompt} offers unique perspectives that can transform how we think about related topics.
-What's your experience with {prompt}? Share your thoughts below! 👇
-#Innovation #Learning #Growth #Discovery #Knowledge #AI #Content #Trending""",
-        "story": f"""**The Discovery of {prompt}**
-Sarah had always been curious about {prompt}, but she never imagined how deeply it would impact her life. It started on a Tuesday morning, when she stumbled upon something extraordinary.
-The morning light filtered through her window as she began to explore {prompt} more deeply. What she discovered challenged everything she thought she knew. Each new piece of information was like solving a puzzle, revealing a bigger picture that was both beautiful and complex.
-As days turned into weeks, Sarah's understanding grew. She realized that {prompt} wasn't just a concept—it was a gateway to new possibilities. The connections she made, the insights she gained, all led to a profound transformation in her perspective.
-Looking back, Sarah smiled. Her journey with {prompt} had taught her that the most meaningful discoveries often come when we least expect them. Sometimes, the best adventures begin with simple curiosity.
-*The end of one discovery is often the beginning of another.*""",
-        "caption": f"""✨ Discovering the magic of {prompt} ✨
-Sometimes the most beautiful moments come from exploring new ideas and perspectives. {prompt} reminds us that there's always something wonderful waiting to be discovered.
-🌟 What inspires you today?
-#Inspiration #Discovery #Creativity #Growth #Innovation #AI #Content""",
-        "general": f"""**Understanding {prompt}**
-{prompt} represents an important concept that merits thoughtful consideration. In our interconnected world, topics like {prompt} play significant roles in shaping our understanding and experiences.
-**Key Aspects:**
-- **Definition**: {prompt} encompasses various elements that contribute to its overall significance
-- **Relevance**: The importance of {prompt} becomes apparent when we examine its applications and implications
-- **Perspectives**: Different viewpoints on {prompt} offer valuable insights and enhance our comprehension
-**Practical Applications:**
-{prompt} finds relevance in multiple contexts, from theoretical discussions to practical implementations. Understanding these applications helps us appreciate its broader significance.
-**Conclusion:**
-{prompt} continues to be a topic worth exploring. As we deepen our understanding, we discover new dimensions and possibilities that enrich our knowledge and perspective.
-*This overview provides a foundation for further exploration of {prompt}.*"""
-    }
-    return templates.get(content_type, templates["general"])
-def generate_image(prompt, hf_token=""):
-    """Generate image using Hugging Face free models"""
-    if not hf_token:
-        st.warning("⚠️ Hugging Face token required for image generation")
         return None
-    headers = {"Authorization": f"Bearer {hf_token}"}
-    # Enhanced prompt for better image generation
-    enhanced_prompts = [
-        f"high quality, detailed, beautiful {prompt}, digital art, trending on artstation, 4k, professional",
-        f"stunning {prompt}, masterpiece, highly detailed, photorealistic, premium quality",
-        f"artistic {prompt}, beautiful composition, vibrant colors, professional photography style"
-    ]
-    # Try different models and prompts
-    image_models = [
-        "stabilityai/stable-diffusion-2-1",
-        "runwayml/stable-diffusion-v1-5",
-        "CompVis/stable-diffusion-v1-4"
-    ]
-    for model in image_models:
-        for enhanced_prompt in enhanced_prompts:
-            try:
-                api_url = f"https://api-inference.huggingface.co/models/{model}"
-                payload = {"inputs": enhanced_prompt}
-                with st.spinner(f"🎨 Creating image using {model}..."):
-                    response = query_huggingface(api_url, payload, headers)
-                    if response and response.status_code == 200:
-                        image_bytes = response.content
-                        if len(image_bytes) > 1000:  # Ensure we got actual image data
-                            image = Image.open(io.BytesIO(image_bytes))
-                            return image
-            except Exception as e:
-                st.warning(f"⚠️ Image generation with {model} failed: {str(e)}")
-                continue
-    st.warning("🎨 Image generation failed. This might be due to model loading, API limits, or high demand. Please try again later.")
-    return None
-def voice_to_text():
-    """Convert voice input to text"""
     recognizer = sr.Recognizer()
     try:
         with sr.Microphone() as source:
-            st.info("🎤 Listening... Speak clearly for 10-15 seconds!")
             recognizer.adjust_for_ambient_noise(source, duration=1)
-            # Increased timeout and phrase time limit for better capture
-            audio = recognizer.listen(source, timeout=15, phrase_time_limit=20)
-        with st.spinner("🔄 Converting speech to text..."):
-            # Try Google Speech Recognition first
-            try:
-                text = recognizer.recognize_google(audio)
-                return text
-            except:
-                # Fallback to other recognition services if available
-                try:
-                    text = recognizer.recognize_sphinx(audio)
-                    return text
-                except:
-                    raise sr.UnknownValueError("Could not understand audio")
-    except sr.RequestError as e:
-        st.error(f"❌ Could not request results from speech recognition service: {e}")
-        return None
-    except sr.UnknownValueError:
-        st.warning("⚠️ Could not understand the audio clearly. Please try speaking more clearly and ensure your microphone is working.")
-        return None
     except sr.WaitTimeoutError:
-        st.warning("⏰ No speech detected within the time limit. Please try again and speak immediately after clicking the button.")
-        return None
     except Exception as e:
-        st.error(f"❌ An error occurred during voice recognition: {e}")
-        return None
-def translate_and_enhance_text(text, target_language="en"):
-    """Translate text and enhance it for better AI generation using Gemini API"""
     try:
-        if not initialize_gemini():
-            return text  # Return original if Gemini is not available
-        model = genai.GenerativeModel('gemini-pro')
-        if target_language == "en":
-            # Just enhance the prompt for better AI generation
-            prompt = f"""Enhance the following prompt for better AI content and image generation. Make it more descriptive and detailed while keeping the core meaning:
-Original prompt: "{text}"
-Enhanced prompt:"""
-        else:
-            # Translate and enhance
-            language_names = {
-                "es": "Spanish", "fr": "French", "de": "German",
-                "it": "Italian", "pt": "Portuguese", "hi": "Hindi", "ur": "Urdu"
-            }
-            lang_name = language_names.get(target_language, target_language)
-            prompt = f"""Translate the following text to {lang_name} and then enhance it for better AI content and image generation. Make it more descriptive and detailed:
-Original text: "{text}"
-Translated and enhanced prompt:"""
-        response = model.generate_content(prompt)
-        enhanced_text = response.text.strip()
-        # Clean up the response
-        if enhanced_text.lower().startswith("enhanced prompt:"):
-            enhanced_text = enhanced_text[16:].strip()
-        elif enhanced_text.lower().startswith("translated and enhanced prompt:"):
-            enhanced_text = enhanced_text[31:].strip()
-        return enhanced_text if enhanced_text else text
     except Exception as e:
-        st.warning(f"⚠️ Translation/Enhancement failed: {e}")
         return text
-# Main App Interface
-st.markdown('<h1 class="main-header">🎤 VociArt</h1>', unsafe_allow_html=True)
-st.markdown('<p style="text-align: center; font-size: 1.2rem; color: #666;">Voice-Controlled AI Content Creator & Image Generator</p>', unsafe_allow_html=True)
-# Check Gemini API status
-gemini_status = initialize_gemini()
-if gemini_status:
-    st.markdown('<div class="status-box">✅ Gemini API: Connected</div>', unsafe_allow_html=True)
-else:
-    st.markdown('<div class="warning-box">⚠️ Gemini API: Not configured (translation features disabled)</div>', unsafe_allow_html=True)
-# Sidebar for configuration
-with st.sidebar:
-    st.header("⚙️ Configuration")
-    # HF Token input
-    st.subheader("🔑 Hugging Face Token")
-    hf_token = st.text_input("Enter your HF token", type="password",
-                            help="Get your free token from huggingface.co/settings/tokens")
-    if hf_token:
-        st.session_state.hf_token = hf_token
-        st.success("✅ Token saved!")
-    st.markdown("---")
-    st.subheader("🌍 Language Settings")
-    target_language = st.selectbox("Output Language",
-                                  ["en", "es", "fr", "de", "it", "pt", "hi", "ur"],
-                                  help="Select target language for translation")
-    st.subheader("📝 Content Type")
-    content_type = st.selectbox("Choose content type",
-                               ["general", "blog", "social", "story", "caption"],
-                               help="Select the type of content to generate")
-    st.markdown("---")
-    # API Status
-    st.subheader("📊 API Status")
-    if st.session_state.hf_token:
-        st.success("🤗 Hugging Face: Ready")
     else:
-        st.warning("🤗 Hugging Face: Token needed")
-    if gemini_status:
-        st.success("🔮 Gemini: Ready")
-    else:
-        st.error("🔮 Gemini: Not configured")
-    st.markdown("---")
-    # Instructions
-    st.subheader("📖 Quick Guide")
-    st.markdown("""
-    1. **Add HF Token** above
-    2. **Choose settings** (language & content type)
-    3. **Record voice** using audio widget or type prompt
-    4. **Generate** text/image content
-    5. **Download** your creations
-    **Note:** Voice recording uses your browser's built-in audio recorder for better compatibility with cloud deployments.
-    """)
-# Main content area
-col1, col2 = st.columns([1, 1])
-with col1:
-    st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-    st.subheader("🎤 Voice Input")
-    if st.button("🎙️ Start Voice Recording", type="primary", use_container_width=True):
-        voice_text = voice_to_text()
-        if voice_text:
-            st.session_state.voice_input = voice_text
-            # Translate and enhance if needed
-            enhanced = translate_and_enhance_text(voice_text, target_language)
-            st.session_state.processed_prompt = enhanced
-    # Manual text input as fallback
-    st.subheader("✍️ Manual Text Input")
-    manual_input = st.text_area("Type your prompt here:", height=100,
-                               placeholder="Enter your creative prompt...")
-    if st.button("🚀 Process Text", use_container_width=True):
-        if manual_input:
-            st.session_state.voice_input = manual_input
-            # Translate and enhance if needed
-            enhanced = translate_and_enhance_text(manual_input, target_language)
-            st.session_state.processed_prompt = enhanced
-    # Display current inputs
-    if st.session_state.voice_input:
-        st.success(f"**Original Input:** {st.session_state.voice_input}")
-    if st.session_state.processed_prompt and st.session_state.processed_prompt != st.session_state.voice_input:
-        st.info(f"**Enhanced Prompt:** {st.session_state.processed_prompt}")
-    st.markdown('</div>', unsafe_allow_html=True)
-with col2:
-    st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-    st.subheader("🤖 AI Generation")
-    generation_col1, generation_col2 = st.columns([1, 1])
-    with generation_col1:
-        generate_text_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
-        if st.button("📝 Generate Text",
-                    disabled=generate_text_disabled,
-                    use_container_width=True):
-            if st.session_state.processed_prompt and st.session_state.hf_token:
-                st.session_state.text_output = generate_text_content(
-                    st.session_state.processed_prompt,
-                    content_type,
-                    st.session_state.hf_token
-                )
-    with generation_col2:
-        generate_image_disabled = not st.session_state.processed_prompt or not st.session_state.hf_token
-        if st.button("🎨 Generate Image",
-                    disabled=generate_image_disabled,
-                    use_container_width=True):
-            if st.session_state.processed_prompt and st.session_state.hf_token:
-                st.session_state.image_output = generate_image(
-                    st.session_state.processed_prompt,
-                    st.session_state.hf_token
-                )
-    # Status messages
-    if not st.session_state.processed_prompt:
-        st.info("💡 Add a prompt to start generating!")
-    elif not st.session_state.hf_token:
-        st.warning("⚠️ Add Hugging Face token to enable generation!")
-    st.markdown('</div>', unsafe_allow_html=True)
-# Output Display
-st.markdown("## 📤 Generated Content")
-output_col1, output_col2 = st.columns([1, 1])
-with output_col1:
-    st.markdown('<div class="output-box">', unsafe_allow_html=True)
-    st.subheader("📝 Generated Text")
-    if st.session_state.text_output:
-        # Display text in a nice container
-        formatted_text = st.session_state.text_output.replace('\n', '<br>')
-        st.markdown(f"""
-        <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #007bff;">
-        {formatted_text}
-        </div>
-        """, unsafe_allow_html=True)
-        # Download and copy buttons
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        download_col1, download_col2 = st.columns([1, 1])
-        with download_col1:
-            st.download_button(
-                label="💾 Download Text",
-                data=st.session_state.text_output,
-                file_name=f"vociart_text_{timestamp}.txt",
-                mime="text/plain",
-                use_container_width=True
-            )
-        with download_col2:
-            if st.button("📋 Copy Text", use_container_width=True):
-                st.success("✅ Text copied to clipboard!")
-        # Word count
-        word_count = len(st.session_state.text_output.split())
-        st.caption(f"📊 Word count: {word_count}")
-    else:
-        st.info("📝 Generated text will appear here...")
-        st.markdown("""
-        <div style="text-align: center; padding: 20px; color: #666;">
-        <i>Your AI-generated content will be displayed here</i>
-        </div>
-        """, unsafe_allow_html=True)
-    st.markdown('</div>', unsafe_allow_html=True)
-with output_col2:
-    st.markdown('<div class="output-box">', unsafe_allow_html=True)
-    st.subheader("🎨 Generated Image")
-    if st.session_state.image_output:
-        st.image(st.session_state.image_output,
-                caption="AI Generated Image",
-                use_column_width=True)
-        # Download button for image
-        img_buffer = io.BytesIO()
-        st.session_state.image_output.save(img_buffer, format='PNG')
-        img_buffer.seek(0)
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        image_download_col1, image_download_col2 = st.columns([1, 1])
-        with image_download_col1:
-            st.download_button(
-                label="💾 Download Image",
-                data=img_buffer.getvalue(),
-                file_name=f"vociart_image_{timestamp}.png",
-                mime="image/png",
-                use_container_width=True
-            )
-        with image_download_col2:
-            # Display image info
-            width, height = st.session_state.image_output.size
-            st.caption(f"📊 Size: {width}×{height}")
-    else:
-        st.info("🎨 Generated image will appear here...")
-        st.markdown("""
-        <div style="text-align: center; padding: 20px; color: #666;">
-        <i>Your AI-generated image will be displayed here</i>
-        </div>
-        """, unsafe_allow_html=True)
-    st.markdown('</div>', unsafe_allow_html=True)
-# Action buttons at the bottom
-st.markdown("---")
-action_col1, action_col2, action_col3 = st.columns([1, 1, 1])
-with action_col1:
-    if st.button("🗑️ Clear All Content", type="secondary", use_container_width=True):
-        st.session_state.text_output = ""
-        st.session_state.image_output = None
-        st.session_state.voice_input = ""
-        st.session_state.processed_prompt = ""
-        st.success("✅ All content cleared!")
-        st.rerun()
-with action_col2:
-    if st.button("🔄 Reset Session", use_container_width=True):
-        for key in st.session_state.keys():
-            del st.session_state[key]
-        st.success("✅ Session reset!")
-        st.rerun()
-with action_col3:
-    if st.button("💡 Show Tips", use_container_width=True):
-        st.info("""
-        **💡 Pro Tips:**
-        - Speak clearly and close to microphone
-        - Use descriptive prompts for better results
-        - Try different content types for variety
-        - Wait for models to load (first time may be slow)
-        """)
-# Footer with detailed instructions
-st.markdown("---")
-with st.expander("📖 Detailed Instructions & Setup Guide"):
-    st.markdown("""
-    ### 🚀 Complete Setup Guide:
-    #### **1. Get Your Free API Keys:**
-    - 🤗 **Hugging Face Token**:
-      - Go to [huggingface.co](https://huggingface.co)
-      - Sign up for free account
-      - Go to Settings → Access Tokens
-      - Create new token with "Read" permissions
-      - Copy and paste in the sidebar
-    #### **2. Configure Gemini API (Admin):**
-    - 🔮 **For App Administrator**: Add `GEMINI_API_KEY` to Streamlit secrets
-    - Get free key from [Google AI Studio](https://aistudio.google.com)
-    #### **3. Using VociArt:**
-    **Voice Method:**
-    1. Click "🎙️ Start Voice Recording"
-    2. Speak clearly when you see "Listening..."
-    3. Wait for speech-to-text conversion
-    4. Your prompt will be enhanced automatically
-    **Manual Method:**
-    1. Type your prompt in the text area
-    2. Click "🚀 Process Text"
-    3. Your prompt will be enhanced for better AI generation
-    **Generation:**
-    1. Choose your content type (blog, social, story, etc.)
-    2. Select target language for translation
-    3. Click "📝 Generate Text" or "🎨 Generate Image"
-    4. Wait for AI to create your content
-    5. Download or copy your results
-    ### 🎯 Best Practices:
-    - **Voice Input**: Speak slowly and clearly, avoid background noise
-    - **Prompts**: Be descriptive and specific for better results
-    - **Content Types**: Choose appropriate type for your needs
-    - **Languages**: Translation works best with clear, simple prompts
-    - **Patience**: First-time model loading may take 20-30 seconds
-    ### 🔧 Troubleshooting:
-    - **No audio detected**: Check microphone permissions and try again
-    - **API errors**: Verify your Hugging Face token is valid
-    - **Model loading**: Wait patiently, models load on first use
-    - **Poor results**: Try rephrasing your prompt or changing content type
-    ### 📱 Features:
-    - ✅ **100% Free**: Uses only free APIs and models
-    - ✅ **No Installation**: Runs in browser
-    - ✅ **Multi-language**: Supports 8 languages with translation
-    - ✅ **Voice Control**: Complete voice-to-content workflow
-    - ✅ **Multiple Formats**: Blog posts, social media, stories, captions
-    - ✅ **Download**: Save text and images locally
-    - ✅ **Responsive**: Works on desktop and mobile
-    """)
-# Version and credits
-st.markdown("""
----
-<div style="text-align: center; color: #666; font-size: 0.9rem;">
-<strong>VociArt v1.0</strong> - Voice-Controlled AI Content Creator<br>
-Powered by Hugging Face 🤗 | Google Gemini 🔮 | Streamlit ⚡<br>
-<em>Create amazing content with just your voice!</em>
-</div>
-""", unsafe_allow_html=True)

 import speech_recognition as sr
 import requests
 import io
 from PIL import Image
+import base64
 import google.generativeai as genai
 import time
 import os
 from datetime import datetime
+import json
+# Configure page
 st.set_page_config(
+    page_title="VociArt - Voice AI Creator",
+    page_icon="🎨",
+    layout="wide",
+    initial_sidebar_state="expanded"
 )
 # Initialize session state
+if 'generated_content' not in st.session_state:
+    st.session_state.generated_content = ""
+if 'generated_image' not in st.session_state:
+    st.session_state.generated_image = None
+if 'voice_text' not in st.session_state:
+    st.session_state.voice_text = ""
+if 'history' not in st.session_state:
+    st.session_state.history = []
+# Configure Gemini API
+@st.cache_resource
+def configure_gemini():
+    try:
+        # Get API key from Streamlit secrets
+        api_key = st.secrets["GEMINI_API_KEY"]
+        genai.configure(api_key=api_key)
+        return genai.GenerativeModel('gemini-pro')
+    except Exception as e:
+        st.error(f"Error configuring Gemini API: {str(e)}")
+        return None
+# Hugging Face API URLs
+HF_TEXT_API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-large"
+HF_IMAGE_API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
+def query_huggingface_text(payload):
+    """Generate text content using Hugging Face API"""
     try:
+        response = requests.post(HF_TEXT_API_URL, json=payload)
+        if response.status_code == 200:
+            return response.json()
         else:
+            return {"error": f"API returned status code {response.status_code}"}
     except Exception as e:
+        return {"error": str(e)}
+def query_huggingface_image(payload):
+    """Generate image using Hugging Face API"""
+    try:
+        response = requests.post(HF_IMAGE_API_URL, json=payload)
+        if response.status_code == 200:
+            return response.content
+        else:
             return None
+    except Exception as e:
+        st.error(f"Image generation error: {str(e)}")
         return None
+def speech_to_text():
+    """Convert speech to text using speech_recognition"""
     recognizer = sr.Recognizer()
     try:
         with sr.Microphone() as source:
+            st.info("🎤 Listening... Speak now!")
             recognizer.adjust_for_ambient_noise(source, duration=1)
+            audio = recognizer.listen(source, timeout=10, phrase_time_limit=30)
+        st.info("🔄 Processing speech...")
+        text = recognizer.recognize_google(audio)
+        return text
     except sr.WaitTimeoutError:
+        st.error("⏰ Listening timeout. Please try again.")
+        return ""
+    except sr.UnknownValueError:
+        st.error("🔇 Could not understand audio. Please speak clearly.")
+        return ""
+    except sr.RequestError as e:
+        st.error(f"❌ Speech recognition error: {str(e)}")
+        return ""
     except Exception as e:
+        st.error(f"❌ Unexpected error: {str(e)}")
+        return ""
+def enhance_prompt_with_gemini(text, content_type):
+    """Enhance user prompt using Gemini for better AI generation"""
+    model = configure_gemini()
+    if not model:
+        return text
     try:
+        if content_type == "text":
+            enhancement_prompt = f"""
+            Enhance this prompt for AI text generation. Make it more specific and detailed for creating engaging content:
+            Original: "{text}"
+            Return only the enhanced prompt, nothing else.
+            """
+        else:  # image
+            enhancement_prompt = f"""
+            Enhance this prompt for AI image generation. Add artistic details, style, and visual elements:
+            Original: "{text}"
+            Return only the enhanced prompt for image generation, nothing else.
+            """
+        response = model.generate_content(enhancement_prompt)
+        return response.text.strip()
     except Exception as e:
+        st.warning(f"Prompt enhancement failed: {str(e)}. Using original prompt.")
         return text
+def generate_text_content(prompt, content_type="article"):
+    """Generate text content based on prompt and type"""
+    enhanced_prompt = enhance_prompt_with_gemini(prompt, "text")
+    # Create context based on content type
+    if content_type == "article":
+        context = f"Write a detailed article about: {enhanced_prompt}"
+    elif content_type == "social_post":
+        context = f"Create an engaging social media post about: {enhanced_prompt}"
+    elif content_type == "caption":
+        context = f"Write a compelling caption for: {enhanced_prompt}"
+    elif content_type == "story":
+        context = f"Tell an interesting story about: {enhanced_prompt}"
     else:
+        context = enhanced_prompt
+    payload = {"inputs": context, "parameters": {"max_length": 300, "temperature": 0.7}}
+    with st.spinner("🤖 Generating text content..."):
+        result = query_huggingface_text(payload)
+    if "error" in result:
+        st.error(f"Text generation failed: {result['error']}")
+        return ""
+    if isinstance(result, list) and len(result) > 0:
+        generated_text = result[0].get("generated_text", "")
+        # Clean up the generated text
+        if context in generated_text:
+            generated_text = generated_text.replace(context, "").strip()
+        return generated_text
+    return ""
+def generate_image(prompt):
+    """Generate image based on prompt"""
+    enhanced_prompt = enhance_prompt_with_gemini(prompt, "image")
+    # Add artistic enhancement to the prompt
+    artistic_prompt = f"{enhanced_prompt}, high quality, detailed, artistic, professional"
+    payload = {"inputs": artistic_prompt}
+    with st.spinner("🎨 Generating image..."):
+        image_bytes = query_huggingface_image(payload)
+    if image_bytes:
+        try:
+            image = Image.open(io.BytesIO(image_bytes))
+            return image
+        except Exception as e:
+            st.error(f"Error processing image: {str(e)}")
+            return None
+    return None
+def save_to_history(prompt, content, image):
+    """Save generation to history"""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    st.session_state.history.append({
+        "timestamp": timestamp,
+        "prompt": prompt,
+        "content": content,
+        "image": image
+    })
+    # Keep only last 10 items
+    if len(st.session_state.history) > 10:
+        st.session_state.history = st.session_state.history[-10:]
+def download_content(content, filename="content.txt"):
+    """Create download link for text content"""
+    return st.download_button(
+        label="💾 Download Text",
+        data=content,
+        file_name=filename,
+        mime="text/plain"
+    )
+def download_image(image, filename="generated_image.png"):
+    """Create download link for image"""
+    if image:
+        buf = io.BytesIO()
+        image.save(buf, format="PNG")
+        return st.download_button(
+            label="🖼️ Download Image",
+            data=buf.getvalue(),
+            file_name=filename,
+            mime="image/png"
+        )
+# Main App Interface
+def main():
+    # Header
+    st.title("🎨 VociArt - Voice AI Creator")
+    st.markdown("*Generate amazing content and images using just your voice!*")
+    # Sidebar
+    with st.sidebar:
+        st.header("⚙️ Settings")
+        # Content type selection
+        content_type = st.selectbox(
+            "📝 Content Type",
+            ["article", "social_post", "caption", "story"],
+            help="Choose the type of text content to generate"
+        )
+        # Language selection
+        language = st.selectbox(
+            "🌍 Language",
+            ["English", "Spanish", "French", "German", "Italian", "Portuguese"],
+            help="Select your preferred language"
+        )
+        # Generation options
+        st.subheader("🎛️ Generation Options")
+        generate_text = st.checkbox("Generate Text Content", value=True)
+        generate_images = st.checkbox("Generate Images", value=True)
+        # History
+        st.subheader("📚 Recent History")
+        if st.session_state.history:
+            for i, item in enumerate(reversed(st.session_state.history[-5:])):
+                with st.expander(f"🕐 {item['timestamp'][:16]}"):
+                    st.text(f"Prompt: {item['prompt'][:50]}...")
+                    if st.button(f"Load #{len(st.session_state.history)-i}", key=f"load_{i}"):
+                        st.session_state.voice_text = item['prompt']
+                        st.session_state.generated_content = item['content']
+                        st.session_state.generated_image = item['image']
+                        st.rerun()
+    # Main content area
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        st.header("🎙️ Voice Input")
+        # Voice input section
+        col_voice1, col_voice2 = st.columns([3, 1])
+        with col_voice1:
+            # Manual text input
+            manual_text = st.text_area(
+                "✏️ Or type your prompt manually:",
+                value=st.session_state.voice_text,
+                height=100,
+                placeholder="Describe what you want to create..."
+            )
+            if manual_text != st.session_state.voice_text:
+                st.session_state.voice_text = manual_text
+        with col_voice2:
+            st.markdown("### 🎤")
+            if st.button("🎤 Start Recording", type="primary", use_container_width=True):
+                if 'recording' not in st.session_state:
+                    st.session_state.recording = True
+                    voice_text = speech_to_text()
+                    if voice_text:
+                        st.session_state.voice_text = voice_text
+                        st.success(f"✅ Captured: '{voice_text}'")
+                        st.rerun()
+                    st.session_state.recording = False
+            if st.button("🗑️ Clear", use_container_width=True):
+                st.session_state.voice_text = ""
+                st.session_state.generated_content = ""
+                st.session_state.generated_image = None
+                st.rerun()
+    with col2:
+        st.header("🎯 Quick Actions")
+        # Example prompts
+        st.subheader("💡 Example Prompts")
+        example_prompts = [
+            "Write about sustainable living",
+            "Create a motivational social media post",
+            "Design a futuristic city",
+            "Tell a story about space exploration",
+            "Create a product advertisement"
+        ]
+        for prompt in example_prompts:
+            if st.button(f"📝 {prompt}", key=f"example_{hash(prompt)}", use_container_width=True):
+                st.session_state.voice_text = prompt
+                st.rerun()
+    # Generation section
+    if st.session_state.voice_text:
+        st.header("🚀 Generation")
+        col_gen1, col_gen2 = st.columns(2)
+        with col_gen1:
+            if st.button("🤖 Generate Content", type="primary", use_container_width=True):
+                if generate_text:
+                    content = generate_text_content(st.session_state.voice_text, content_type)
+                    st.session_state.generated_content = content
+                if generate_images:
+                    image = generate_image(st.session_state.voice_text)
+                    st.session_state.generated_image = image
+                # Save to history
+                save_to_history(
+                    st.session_state.voice_text,
+                    st.session_state.generated_content,
+                    st.session_state.generated_image
+                )
+                st.success("✅ Content generated successfully!")
+        with col_gen2:
+            if st.button("🔄 Regenerate", use_container_width=True):
+                st.rerun()
+    # Results section
+    if st.session_state.generated_content or st.session_state.generated_image:
+        st.header("📋 Generated Content")
+        # Text content
+        if st.session_state.generated_content and generate_text:
+            st.subheader("📝 Text Content")
+            # Editable text area
+            edited_content = st.text_area(
+                "Edit your content:",
+                value=st.session_state.generated_content,
+                height=200
+            )
+            if edited_content != st.session_state.generated_content:
+                st.session_state.generated_content = edited_content
+            # Download button
+            col_dl1, col_dl2, col_dl3 = st.columns(3)
+            with col_dl1:
+                download_content(st.session_state.generated_content, f"{content_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")
+            with col_dl2:
+                if st.button("📋 Copy to Clipboard", key="copy_text"):
+                    st.code(st.session_state.generated_content, language=None)
+            with col_dl3:
+                if st.button("🔄 Enhance Text", key="enhance_text"):
+                    enhanced = enhance_prompt_with_gemini(st.session_state.generated_content, "text")
+                    st.session_state.generated_content = enhanced
+                    st.rerun()
+        # Image content
+        if st.session_state.generated_image and generate_images:
+            st.subheader("🖼️ Generated Image")
+            col_img1, col_img2 = st.columns([3, 1])
+            with col_img1:
+                st.image(st.session_state.generated_image, use_container_width=True)
+            with col_img2:
+                download_image(st.session_state.generated_image, f"vociart_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
+                if st.button("🎨 Regenerate Image", key="regen_image"):
+                    new_image = generate_image(st.session_state.voice_text)
+                    if new_image:
+                        st.session_state.generated_image = new_image
+                        st.rerun()
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        """
+        <div style='text-align: center; color: #666;'>
+            <p>🎨 VociArt - Powered by Hugging Face & Gemini AI</p>
+            <p>Create amazing content with just your voice! 🎙️✨</p>
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
+if __name__ == "__main__":
+    main()