Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

App Files Files Community

maria355 commited on Sep 14, 2025

Commit

4b37a3b

verified ·

1 Parent(s): 12e0416

Update app.py

Browse files

Files changed (1) hide show

app.py +546 -660

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import streamlit as st
 import torch
 import numpy as np
 import io
-import base64
 import os
 import tempfile
 from PIL import Image
@@ -10,734 +9,621 @@ import requests
 import json
 from datetime import datetime
 import time
-import threading
-import asyncio
-# Hugging Face imports
-from transformers import (
-    pipeline,
-    WhisperProcessor,
-    WhisperForConditionalGeneration,
-    AutoTokenizer,
-    AutoModelForCausalLM
-)
-from diffusers import StableDiffusionPipeline
-import torchaudio
-from scipy.io import wavfile
-import google.generativeai as genai
-from st_audiorec import st_audiorec
 # Configure page
 st.set_page_config(
     page_title="VoiceCanvas - AI Content Studio",
     page_icon="🎨",
     layout="wide",
-    initial_sidebar_state="collapsed"
 )
-# Custom CSS for better UI
-st.markdown("""
-<style>
-    .stApp {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    }
-    .main-header {
-        text-align: center;
-        color: white;
-        padding: 1rem;
-        margin-bottom: 2rem;
-        background: rgba(255,255,255,0.1);
-        border-radius: 10px;
-        backdrop-filter: blur(10px);
-    }
-    .feature-box {
-        background: rgba(255,255,255,0.95);
-        padding: 1.5rem;
-        border-radius: 15px;
-        margin: 1rem 0;
-        box-shadow: 0 8px 32px rgba(31, 38, 135, 0.37);
-        border: 1px solid rgba(255, 255, 255, 0.18);
-    }
-    .success-box {
-        background: linear-gradient(90deg, #00C9FF 0%, #92FE9D 100%);
-        padding: 1rem;
-        border-radius: 10px;
-        color: white;
-        font-weight: bold;
-        text-align: center;
-        margin: 1rem 0;
-    }
-    .stButton > button {
-        width: 100%;
-        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        border: none;
-        padding: 0.5rem 1rem;
-        border-radius: 25px;
-        font-weight: bold;
-        transition: all 0.3s ease;
-    }
-    .stButton > button:hover {
-        transform: translateY(-2px);
-        box-shadow: 0 5px 15px rgba(0,0,0,0.2);
-    }
-</style>
-""", unsafe_allow_html=True)
-# Initialize session state with better management
-def init_session_state():
-    defaults = {
-        'generated_images': [],
-        'generated_text': [],
-        'transcription': "",
-        'selected_image': None,
-        'audio_file': None,
-        'models_loaded': False,
-        'whisper_ready': False,
-        'sd_ready': False,
-        'tts_ready': False,
-        'processing': False,
-        'show_advanced_text': False,
-        'text_analysis': {},
-        'content_variations': [],
-        'seo_keywords': [],
-        'social_posts': []
-    }
-    for key, value in defaults.items():
-        if key not in st.session_state:
-            st.session_state[key] = value
-# Lightweight model loading with progress tracking
-@st.cache_resource(show_spinner=False)
-def load_models_lightweight():
-    """Load only essential models quickly"""
-    models = {
-        'whisper': None,
-        'whisper_processor': None,
-        'text_generator': None,
-        'gemini_ready': False
-    }
-    try:
-        # Setup Gemini (fastest)
-        api_key = os.getenv("GEMINI_API_KEY")
-        if api_key:
-            genai.configure(api_key=api_key)
-            models['gemini_ready'] = True
-    except:
-        pass
-    try:
-        # Light text generator for fallback
-        models['text_generator'] = pipeline(
-            "text-generation",
-            model="gpt2",  # Much smaller and faster
-            max_length=100,
-            do_sample=True,
-            temperature=0.8,
-            pad_token_id=50256
-        )
-    except:
-        pass
-    return models
-@st.cache_resource(show_spinner=False)
-def load_whisper_lazy():
-    """Load Whisper only when needed"""
     try:
-        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
-        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
-        return processor, model
     except Exception as e:
-        st.error(f"Whisper loading failed: {e}")
-        return None, None
-@st.cache_resource(show_spinner=False)
-def load_sd_lazy():
-    """Load Stable Diffusion only when needed"""
     try:
-        pipe = StableDiffusionPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",  # Smaller model
-            torch_dtype=torch.float32,
-            safety_checker=None,
-            requires_safety_checker=False,
-            use_safetensors=True
-        )
-        pipe.enable_attention_slicing()
-        pipe.enable_model_cpu_offload()
-        return pipe
     except Exception as e:
-        st.error(f"SD loading failed: {e}")
-        return None
-def quick_transcribe(audio_data):
-    """Quick transcription without heavy models"""
     try:
-        # Simulate quick processing
-        if len(audio_data) > 0:
-            return "Quick transcription ready - click 'Enhance Transcription' for better accuracy"
-    except:
-        pass
-    return "Audio processed - ready for transcription"
-def enhanced_text_analysis(text):
-    """Generate comprehensive text analysis and variations"""
     try:
-        if st.session_state.get('models', {}).get('gemini_ready'):
-            model = genai.GenerativeModel('gemini-pro')
-            analysis_prompt = f"""
-            Analyze this content: "{text}"
-            Provide a comprehensive analysis in this exact JSON format:
-            {{
-                "summary": "Brief summary",
-                "tone": "Professional/Casual/Creative/etc",
-                "target_audience": "Primary audience",
-                "key_themes": ["theme1", "theme2", "theme3"],
-                "seo_keywords": ["keyword1", "keyword2", "keyword3", "keyword4", "keyword5"],
-                "hashtags": ["#tag1", "#tag2", "#tag3", "#tag4", "#tag5"],
-                "content_variations": {{
-                    "headlines": ["Headline 1", "Headline 2", "Headline 3"],
-                    "taglines": ["Tagline 1", "Tagline 2", "Tagline 3"],
-                    "descriptions": ["Description 1", "Description 2", "Description 3"]
-                }},
-                "social_posts": {{
-                    "twitter": ["Tweet 1", "Tweet 2"],
-                    "linkedin": ["LinkedIn post 1", "LinkedIn post 2"],
-                    "instagram": ["Insta caption 1", "Insta caption 2"]
-                }},
-                "image_prompts": ["Detailed prompt 1", "Detailed prompt 2", "Detailed prompt 3"]
-            }}
-            """
-            response = model.generate_content(analysis_prompt)
-            try:
-                return json.loads(response.text)
-            except:
-                # Fallback parsing
-                return parse_analysis_fallback(text)
-        else:
-            return generate_quick_analysis(text)
     except Exception as e:
-        st.error(f"Analysis error: {e}")
-        return generate_quick_analysis(text)
-def parse_analysis_fallback(text):
-    """Fallback analysis when JSON parsing fails"""
-    return {
-        "summary": f"Content about {text[:50]}...",
-        "tone": "Professional",
-        "target_audience": "General audience",
-        "key_themes": ["Innovation", "Creativity", "Technology"],
-        "seo_keywords": [f"{text.split()[0] if text.split() else 'content'}", "creative", "innovative", "digital", "solution"],
-        "hashtags": ["#creative", "#innovation", "#digital", "#content", "#marketing"],
-        "content_variations": {
-            "headlines": [
-                f"Discover {text[:30]}...",
-                f"Revolutionary {text[:30]}...",
-                f"Experience {text[:30]}..."
-            ],
-            "taglines": [
-                f"Transforming {text[:20]}...",
-                f"Innovation in {text[:20]}...",
-                f"The future of {text[:20]}..."
-            ],
-            "descriptions": [
-                f"Comprehensive solution for {text[:40]}...",
-                f"Advanced approach to {text[:40]}...",
-                f"Next-generation {text[:40]}..."
-            ]
-        },
-        "social_posts": {
-            "twitter": [
-                f"🚀 Excited to share: {text[:100]}... #innovation",
-                f"💡 Game-changer: {text[:100]}... #creative"
-            ],
-            "linkedin": [
-                f"Professional insight: {text[:150]}...",
-                f"Industry perspective: {text[:150]}..."
-            ],
-            "instagram": [
-                f"✨ {text[:100]}... #creative #innovation",
-                f"🎨 {text[:100]}... #design #digital"
-            ]
-        },
-        "image_prompts": [
-            f"Professional illustration of {text[:50]}, modern design, high quality",
-            f"Creative visualization of {text[:50]}, vibrant colors, artistic style",
-            f"Digital artwork representing {text[:50]}, futuristic, detailed"
-        ]
-    }
-def generate_quick_analysis(text):
-    """Quick analysis without API calls"""
-    words = text.split()[:5]
-    base_word = words[0] if words else "content"
-    return {
-        "summary": f"Creative content focusing on {base_word} and related concepts",
-        "tone": "Creative",
-        "target_audience": "Creative professionals and enthusiasts",
-        "key_themes": [base_word.title(), "Creativity", "Innovation"],
-        "seo_keywords": [base_word, "creative", "design", "innovative", "digital"],
-        "hashtags": [f"#{base_word.lower()}", "#creative", "#design", "#innovation", "#digital"],
-        "content_variations": {
-            "headlines": [
-                f"Unlock the Power of {base_word.title()}",
-                f"Revolutionary {base_word.title()} Solutions",
-                f"The Future of {base_word.title()}"
-            ],
-            "taglines": [
-                f"Where {base_word.title()} Meets Innovation",
-                f"Redefining {base_word.title()} Excellence",
-                f"Your {base_word.title()} Journey Starts Here"
-            ],
-            "descriptions": [
-                f"Comprehensive {base_word} solutions designed for modern needs",
-                f"Innovative {base_word} services that transform your vision",
-                f"Professional {base_word} expertise for exceptional results"
-            ]
-        },
-        "social_posts": {
-            "twitter": [
-                f"🚀 Exciting {base_word} developments ahead! #innovation #creative",
-                f"💡 {base_word.title()} insights that matter #digital #future"
-            ],
-            "linkedin": [
-                f"Professional {base_word} strategies for business growth and innovation in today's market",
-                f"Industry expertise in {base_word} solutions that drive meaningful results"
-            ],
-            "instagram": [
-                f"✨ Beautiful {base_word} creations ✨ #creative #design #inspiration",
-                f"🎨 {base_word.title()} magic in progress 🎨 #artistic #innovation"
-            ]
-        },
         "image_prompts": [
-            f"Modern professional illustration of {base_word}, clean design, corporate style",
-            f"Creative artistic representation of {base_word}, vibrant colors, dynamic composition",
-            f"Futuristic visualization of {base_word}, digital art, high-tech aesthetic"
         ]
     }
-def main():
-    init_session_state()
-    # Header
-    st.markdown("""
-    <div class="main-header">
-        <h1>🎨 VoiceCanvas - AI Content Studio</h1>
-        <p>Transform ideas into professional content instantly</p>
-    </div>
-    """, unsafe_allow_html=True)
-    # Quick loading of essential models
-    if not st.session_state.models_loaded:
-        with st.spinner("⚡ Loading AI models..."):
-            st.session_state.models = load_models_lightweight()
-            st.session_state.models_loaded = True
-        # Show model status
-        col1, col2, col3, col4 = st.columns(4)
-        with col1:
-            st.success("🤖 Core AI: Ready")
-        with col2:
-            st.info("🎤 Audio: On-demand")
-        with col3:
-            st.info("🎨 Images: On-demand")
-        with col4:
-            gemini_status = "✅ Active" if st.session_state.models.get('gemini_ready') else "⚠️ Fallback"
-            st.success(f"🧠 Smart AI: {gemini_status}")
-    # Main interface with tabs
-    tab1, tab2, tab3, tab4 = st.tabs(["🎤 Voice Input", "✍️ Text Studio", "🎨 Visual Creator", "📦 Export Hub"])
-    with tab1:
-        st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-        st.subheader("🎙️ Audio Processing")
-        col1, col2 = st.columns([1, 1])
         with col1:
-            # Audio input methods
-            input_method = st.selectbox(
-                "Input Method:",
-                ["🎙️ Record Live", "📁 Upload File", "⌨️ Type Direct"],
-                key="input_method"
-            )
-            if input_method == "🎙️ Record Live":
-                st.info("🎤 Click record button:")
-                wav_data = st_audiorec()
-                if wav_data is not None:
-                    st.success("🎉 Recorded!")
-                    st.audio(wav_data)
-                    # Quick preview
-                    if st.button("⚡ Quick Process"):
-                        st.session_state.transcription = "Audio recorded - processing..."
-                        st.rerun()
-            elif input_method == "📁 Upload File":
-                uploaded_file = st.file_uploader(
-                    "Choose audio file:",
-                    type=['wav', 'mp3', 'm4a'],
-                    key="audio_upload"
-                )
-                if uploaded_file:
-                    st.audio(uploaded_file)
-                    if st.button("⚡ Process Audio"):
-                        st.session_state.transcription = f"Processing {uploaded_file.name}..."
-                        st.rerun()
-            else:  # Type Direct
-                text_input = st.text_area(
-                    "Enter your content:",
-                    placeholder="Type your creative prompt here...",
-                    height=150,
-                    key="direct_text_input"
-                )
-                if text_input and st.button("✨ Process Text"):
-                    st.session_state.transcription = text_input
-                    st.rerun()
-        with col2:
-            if st.session_state.transcription:
-                st.subheader("📝 Current Content")
-                # Editable transcription
-                edited_text = st.text_area(
-                    "Edit content:",
-                    value=st.session_state.transcription,
-                    height=150,
-                    key="edit_transcription"
-                )
-                if st.button("🔄 Update Content"):
-                    st.session_state.transcription = edited_text
-                    st.success("Content updated!")
-                # Enhanced transcription for audio
-                if "processing" in st.session_state.transcription.lower():
-                    if st.button("🎯 Enhanced Transcription"):
-                        with st.spinner("Loading Whisper..."):
-                            processor, model = load_whisper_lazy()
-                            if processor and model:
-                                st.session_state.transcription = "Enhanced transcription with Whisper model"
-                                st.success("Enhanced transcription ready!")
-                                st.rerun()
-        st.markdown('</div>', unsafe_allow_html=True)
-    with tab2:
-        st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-        st.subheader("📊 Advanced Text Analysis")
-        if st.session_state.transcription:
-            col1, col2 = st.columns([1, 1])
-            with col1:
-                if st.button("🔍 Analyze Content", type="primary"):
-                    with st.spinner("🧠 Analyzing content..."):
-                        analysis = enhanced_text_analysis(st.session_state.transcription)
-                        st.session_state.text_analysis = analysis
-                        st.success("Analysis complete!")
-            with col2:
-                st.toggle("Show Advanced Options", key="show_advanced_text")
-            # Display analysis results
-            if st.session_state.text_analysis:
-                analysis = st.session_state.text_analysis
-                # Summary and basics
-                col1, col2, col3 = st.columns(3)
                 with col1:
-                    st.metric("Tone", analysis.get('tone', 'N/A'))
-                with col2:
-                    st.metric("Audience", analysis.get('target_audience', 'N/A')[:15] + "...")
-                with col3:
-                    st.metric("Themes", f"{len(analysis.get('key_themes', []))} detected")
-                # Content variations
-                st.subheader("📝 Content Variations")
-                var_col1, var_col2, var_col3 = st.columns(3)
-                with var_col1:
-                    st.write("**🏷️ Headlines:**")
-                    for i, headline in enumerate(analysis.get('content_variations', {}).get('headlines', []), 1):
-                        st.write(f"{i}. {headline}")
-                with var_col2:
-                    st.write("**✨ Taglines:**")
-                    for i, tagline in enumerate(analysis.get('content_variations', {}).get('taglines', []), 1):
-                        st.write(f"{i}. {tagline}")
-                with var_col3:
-                    st.write("**📖 Descriptions:**")
-                    for i, desc in enumerate(analysis.get('content_variations', {}).get('descriptions', []), 1):
-                        st.write(f"{i}. {desc[:50]}...")
-                # SEO and Social
-                if st.session_state.show_advanced_text:
-                    col1, col2 = st.columns(2)
-                    with col1:
-                        st.subheader("🔍 SEO Keywords")
-                        keywords = analysis.get('seo_keywords', [])
-                        st.write(" • ".join([f"`{kw}`" for kw in keywords]))
-                        st.subheader("🏷️ Hashtags")
-                        hashtags = analysis.get('hashtags', [])
-                        st.write(" ".join(hashtags))
-                    with col2:
-                        st.subheader("📱 Social Media Posts")
-                        social_data = analysis.get('social_posts', {})
-                        if st.button("🐦 Twitter"):
-                            for tweet in social_data.get('twitter', []):
-                                st.info(f"🐦 {tweet}")
-                        if st.button("💼 LinkedIn"):
-                            for post in social_data.get('linkedin', []):
-                                st.info(f"💼 {post}")
-                        if st.button("📸 Instagram"):
-                            for post in social_data.get('instagram', []):
-                                st.info(f"📸 {post}")
-        else:
-            st.info("👈 Please add content in the Voice Input tab first")
-        st.markdown('</div>', unsafe_allow_html=True)
-    with tab3:
-        st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-        st.subheader("🎨 Image Generation")
-        if st.session_state.transcription:
-            col1, col2 = st.columns([1, 2])
             with col1:
-                st.write("**Image Settings:**")
-                # Use analysis prompts if available
-                if st.session_state.text_analysis:
-                    image_prompts = st.session_state.text_analysis.get('image_prompts', [])
-                    selected_prompt = st.selectbox("Choose style:",
-                                                 ["Custom"] + [f"Style {i+1}" for i in range(len(image_prompts))])
-                    if selected_prompt != "Custom":
-                        idx = int(selected_prompt.split()[-1]) - 1
-                        base_prompt = image_prompts[idx]
                     else:
-                        base_prompt = f"Professional illustration of {st.session_state.transcription}"
-                else:
-                    base_prompt = f"High quality illustration of {st.session_state.transcription}"
-                # Prompt customization
-                final_prompt = st.text_area(
-                    "Image prompt:",
-                    value=base_prompt,
-                    height=100
-                )
-                # Style modifiers
-                style = st.selectbox("Art Style:", [
-                    "Professional", "Creative", "Minimalist",
-                    "Vintage", "Modern", "Abstract", "Realistic"
-                ])
-                quality = st.selectbox("Quality:", ["Standard", "High", "Ultra"])
-                if st.button("🎨 Generate Images", type="primary"):
-                    with st.spinner("🎨 Creating images..."):
-                        # Load SD model when needed
-                        pipe = load_sd_lazy()
-                        if pipe:
-                            try:
-                                enhanced_prompt = f"{final_prompt}, {style.lower()} style, {quality.lower()} quality, detailed"
-                                # Generate multiple images
-                                images = []
-                                for i in range(3):
-                                    st.write(f"Generating image {i+1}/3...")
-                                    result = pipe(
-                                        enhanced_prompt,
-                                        num_inference_steps=20,
-                                        guidance_scale=7.5,
-                                        height=512,
-                                        width=512
-                                    )
-                                    images.append(result.images[0])
-                                st.session_state.generated_images = images
-                                st.success("🎉 Images generated!")
-                                st.rerun()
-                            except Exception as e:
-                                st.error(f"Generation failed: {e}")
-                        else:
-                            st.error("Image model not available")
             with col2:
-                if st.session_state.generated_images:
-                    st.write("**Generated Images:**")
-                    for i, img in enumerate(st.session_state.generated_images):
-                        st.image(img, caption=f"Variation {i+1}", use_column_width=True)
-                        col_a, col_b = st.columns(2)
-                        with col_a:
-                            if st.button(f"✅ Select Image {i+1}", key=f"select_img_{i}"):
-                                st.session_state.selected_image = img
-                                st.success(f"Selected Image {i+1}")
-                        with col_b:
-                            # Quick download
-                            img_buffer = io.BytesIO()
-                            img.save(img_buffer, format="PNG")
-                            st.download_button(
-                                f"💾 Download {i+1}",
-                                img_buffer.getvalue(),
-                                file_name=f"image_{i+1}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
-                                mime="image/png",
-                                key=f"download_img_{i}"
-                            )
                 else:
-                    st.info("👈 Configure settings and generate images")
-        else:
-            st.info("👈 Please add content first")
-        st.markdown('</div>', unsafe_allow_html=True)
-    with tab4:
-        st.markdown('<div class="feature-box">', unsafe_allow_html=True)
-        st.subheader("📦 Export & Download")
-        if st.session_state.transcription or st.session_state.generated_images:
-            export_col1, export_col2, export_col3 = st.columns(3)
-            with export_col1:
-                st.write("**📝 Text Content**")
-                if st.session_state.text_analysis:
-                    # Comprehensive text export
-                    analysis = st.session_state.text_analysis
-                    export_content = f"""VOICECANVAS CONTENT EXPORT
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-ORIGINAL CONTENT:
-{st.session_state.transcription}
-ANALYSIS:
-• Tone: {analysis.get('tone', 'N/A')}
-• Audience: {analysis.get('target_audience', 'N/A')}
-• Key Themes: {', '.join(analysis.get('key_themes', []))}
-HEADLINES:
-{chr(10).join([f"{i+1}. {h}" for i, h in enumerate(analysis.get('content_variations', {}).get('headlines', []))])}
-TAGLINES:
-{chr(10).join([f"{i+1}. {t}" for i, t in enumerate(analysis.get('content_variations', {}).get('taglines', []))])}
-SEO KEYWORDS:
-{', '.join(analysis.get('seo_keywords', []))}
-HASHTAGS:
-{' '.join(analysis.get('hashtags', []))}
-SOCIAL MEDIA POSTS:
-Twitter:
-{chr(10).join([f"• {t}" for t in analysis.get('social_posts', {}).get('twitter', [])])}
-LinkedIn:
-{chr(10).join([f"• {l}" for l in analysis.get('social_posts', {}).get('linkedin', [])])}
-Instagram:
-{chr(10).join([f"• {i}" for i in analysis.get('social_posts', {}).get('instagram', [])])}
 """
-                    st.download_button(
-                        "📄 Complete Text Package",
-                        export_content,
-                        file_name=f"voicecanvas_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
-                        mime="text/plain"
-                    )
-                elif st.session_state.transcription:
-                    st.download_button(
-                        "📄 Basic Text",
-                        st.session_state.transcription,
-                        file_name=f"content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
-                        mime="text/plain"
-                    )
-            with export_col2:
-                st.write("**🖼️ Visual Content**")
-                if st.session_state.selected_image:
-                    st.image(st.session_state.selected_image, width=150)
-                    img_buffer = io.BytesIO()
-                    st.session_state.selected_image.save(img_buffer, format="PNG")
-                    st.download_button(
-                        "🖼️ Selected Image",
-                        img_buffer.getvalue(),
-                        file_name=f"voicecanvas_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
-                        mime="image/png"
-                    )
-                elif st.session_state.generated_images:
-                    st.info("👆 Select an image first")
-                else:
-                    st.info("No images generated yet")
-            with export_col3:
-                st.write("**🔊 Audio Content**")
-                if st.session_state.transcription:
-                    if st.button("🎵 Generate Voice"):
-                        st.info("🔊 Voice generation coming soon!")
-                        st.write("For now, download text content")
-                st.write("**📊 Analytics**")
-                if st.session_state.text_analysis:
-                    st.metric("Content Items", len(st.session_state.text_analysis.get('content_variations', {}).get('headlines', [])))
-                    st.metric("Keywords", len(st.session_state.text_analysis.get('seo_keywords', [])))
-                    st.metric("Social Posts", len(st.session_state.text_analysis.get('social_posts', {}).get('twitter', [])))
-            # Complete package
-            if st.session_state.selected_image and st.session_state.text_analysis:
-                st.markdown('<div class="success-box">🎉 Complete Package Ready for Download! 🎉</div>', unsafe_allow_html=True)
-        else:
-            st.info("Generate some content first to enable exports")
-        st.markdown('</div>', unsafe_allow_html=True)
     # Footer
     st.markdown("---")
-    st.markdown("""
-    <div style='text-align: center; color: white; padding: 1rem;'>
-        🚀 <b>VoiceCanvas</b> - Professional AI Content Creation |
-        Made with ❤️ using Streamlit & Hugging Face
-    </div>
-    """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 import torch
 import numpy as np
 import io
 import os
 import tempfile
 from PIL import Image
 import json
 from datetime import datetime
 import time
+# Import with error handling
+try:
+    from transformers import pipeline
+    TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    TRANSFORMERS_AVAILABLE = False
+try:
+    import google.generativeai as genai
+    GENAI_AVAILABLE = True
+except ImportError:
+    GENAI_AVAILABLE = False
+try:
+    from st_audiorec import st_audiorec
+    AUDIO_REC_AVAILABLE = True
+except ImportError:
+    AUDIO_REC_AVAILABLE = False
 # Configure page
 st.set_page_config(
     page_title="VoiceCanvas - AI Content Studio",
     page_icon="🎨",
     layout="wide",
+    initial_sidebar_state="expanded"
 )
+# Initialize session state
+if 'generated_content' not in st.session_state:
+    st.session_state.generated_content = {}
+if 'transcription' not in st.session_state:
+    st.session_state.transcription = ""
+if 'processing' not in st.session_state:
+    st.session_state.processing = False
+if 'current_task' not in st.session_state:
+    st.session_state.current_task = ""
+if 'models_loaded' not in st.session_state:
+    st.session_state.models_loaded = False
+# Global variables for models
+whisper_model = None
+text_generator = None
+def load_models():
+    """Load models efficiently with progress tracking"""
+    global whisper_model, text_generator
+    if st.session_state.models_loaded:
+        return True
+    if not TRANSFORMERS_AVAILABLE:
+        st.error("AI models not available")
+        return False
+    progress_bar = st.progress(0)
+    status_text = st.empty()
     try:
+        # Load Whisper model
+        status_text.text("Loading speech recognition model...")
+        progress_bar.progress(25)
+        whisper_model = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-tiny",
+            device=-1,
+            torch_dtype=torch.float32
+        )
+        progress_bar.progress(75)
+        status_text.text("Models loaded successfully!")
+        progress_bar.progress(100)
+        st.session_state.models_loaded = True
+        # Clear progress indicators after a moment
+        time.sleep(1)
+        progress_bar.empty()
+        status_text.empty()
+        return True
     except Exception as e:
+        st.error(f"Error loading models: {e}")
+        progress_bar.empty()
+        status_text.empty()
+        return False
+def setup_gemini():
+    """Setup Gemini API if available"""
+    if not GENAI_AVAILABLE:
+        return False
     try:
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key and hasattr(st, 'secrets'):
+            api_key = st.secrets.get("GEMINI_API_KEY", "")
+        if api_key:
+            genai.configure(api_key=api_key)
+            return True
+        return False
     except Exception as e:
+        return False
+def transcribe_audio_simple(audio_file):
+    """Simple audio transcription with progress tracking"""
     try:
+        if whisper_model is None:
+            return "Error: Speech recognition not available"
+        st.session_state.current_task = "Converting speech to text..."
+        # Transcribe using pipeline
+        result = whisper_model(audio_file)
+        st.session_state.current_task = ""
+        return result["text"].strip()
+    except Exception as e:
+        st.session_state.current_task = ""
+        return f"Error: {str(e)}"
+def generate_content_with_gemini(prompt):
+    """Generate content using Gemini"""
+    if not GENAI_AVAILABLE:
+        return generate_content_offline(prompt)
     try:
+        st.session_state.current_task = "Generating enhanced content with Gemini AI..."
+        model = genai.GenerativeModel('gemini-pro')
+        response = model.generate_content(f"""
+        Based on this input: "{prompt}"
+        Create comprehensive marketing content with:
+        ## Marketing Taglines
+        Generate 3 catchy, memorable taglines (max 12 words each)
+        ## Social Media Posts
+        Create 3 engaging social media posts (max 280 characters each)
+        ## Product Description
+        Write 1 compelling product description (100-150 words)
+        ## Image Generation Prompts
+        Provide 3 detailed prompts for AI image generation
+        ## Call-to-Action Ideas
+        Suggest 3 effective call-to-action phrases
+        Format with clear markdown headers and numbered lists.
+        """)
+        st.session_state.current_task = ""
+        return response.text
     except Exception as e:
+        st.warning(f"Gemini error: {e}. Using offline generation.")
+        st.session_state.current_task = ""
+        return generate_content_offline(prompt)
+def generate_content_offline(prompt):
+    """Generate content using offline methods"""
+    st.session_state.current_task = "Generating content with offline templates..."
+    # Create structured content
+    content = {
+        "taglines": [
+            f"Experience {prompt} like never before",
+            f"Transform your world with {prompt}",
+            f"Discover the power of {prompt}"
+        ],
+        "social_posts": [
+            f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation",
+            f"💫 {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger",
+            f"🚀 The future of {prompt} is here! Experience what everyone's talking about. #FutureTech"
+        ],
+        "description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!",
         "image_prompts": [
+            f"Professional product photo of {prompt}, clean white background, studio lighting",
+            f"Modern minimalist illustration of {prompt}, flat design, vibrant colors",
+            f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
         ]
     }
+    # Format for display
+    formatted = format_content_display(content)
+    # Store both versions
+    st.session_state.generated_content['structured'] = content
+    st.session_state.current_task = ""
+    return formatted
+def generate_image_with_api(prompt):
+    """Generate image using free API"""
+    try:
+        st.session_state.current_task = "Creating image with AI..."
+        api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
+        headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
+        if not os.getenv('HF_TOKEN'):
+            st.warning("Add HF_TOKEN environment variable for image generation")
+            st.session_state.current_task = ""
+            return None
+        response = requests.post(api_url, headers=headers, json={"inputs": prompt}, timeout=60)
+        if response.status_code == 200:
+            image = Image.open(io.BytesIO(response.content))
+            st.session_state.current_task = ""
+            return image
+        else:
+            st.warning(f"Image API returned status {response.status_code}")
+            st.session_state.current_task = ""
+            return None
+    except Exception as e:
+        st.error(f"Image generation error: {e}")
+        st.session_state.current_task = ""
+        return None
+def format_content_display(content):
+    """Format content for nice display"""
+    if isinstance(content, dict):
+        formatted = ""
+        if "taglines" in content:
+            formatted += "## 🏷️ Marketing Taglines\n"
+            for i, tagline in enumerate(content["taglines"], 1):
+                formatted += f"{i}. **{tagline}**\n"
+            formatted += "\n"
+        if "social_posts" in content:
+            formatted += "## 📱 Social Media Posts\n"
+            for i, post in enumerate(content["social_posts"], 1):
+                formatted += f"**Post {i}:**\n{post}\n\n"
+        if "description" in content:
+            formatted += "## 📝 Product Description\n"
+            formatted += f"{content['description']}\n\n"
+        if "image_prompts" in content:
+            formatted += "## 🎨 Image Generation Prompts\n"
+            for i, prompt in enumerate(content["image_prompts"], 1):
+                formatted += f"{i}. {prompt}\n"
+        return formatted
+    return str(content)
+def main():
+    # Sidebar with tips and status
+    with st.sidebar:
+        st.header("🎨 VoiceCanvas")
+        st.markdown("*AI Content Studio*")
+        # Status section
+        st.subheader("📊 System Status")
+        gemini_available = setup_gemini()
+        col1, col2 = st.columns(2)
         with col1:
+            st.metric("Mode", "Enhanced" if gemini_available else "Basic")
+        with col2:
+            st.metric("Status", "Ready" if not st.session_state.processing else "Working")
+        # Component status
+        st.write("🤖 **Components:**")
+        st.write(f"• Speech Recognition: {'✅' if TRANSFORMERS_AVAILABLE else '❌'}")
+        st.write(f"• Audio Recording: {'✅' if AUDIO_REC_AVAILABLE else '❌'}")
+        st.write(f"• Enhanced AI: {'✅' if gemini_available else '❌'}")
+        # Current task indicator
+        if st.session_state.current_task:
+            st.info(f"🔄 {st.session_state.current_task}")
+        st.markdown("---")
+        # Tips and help
+        st.subheader("💡 How to Use")
+        with st.expander("🚀 Quick Start", expanded=True):
+            st.markdown("""
+            1. **Input**: Use voice, upload audio, or type text
+            2. **Edit**: Review and refine your input
+            3. **Generate**: Create marketing content
+            4. **Export**: Download your materials
+            """)
+        with st.expander("🎯 Best Practices"):
+            st.markdown("""
+            **For Voice/Audio:**
+            - Speak clearly at normal pace
+            - Use quiet environment
+            - Describe your product/service
+            - Mention target audience
+            **For Text:**
+            - Be specific about features
+            - Include benefits and use cases
+            - Mention what makes it unique
+            - Use 50+ words for detail
+            """)
+        with st.expander("⚙️ Setup (Optional)"):
+            st.markdown("""
+            **Enhanced Features:**
+            Add environment variables:
+            - `GEMINI_API_KEY`: Advanced text generation
+            - `HF_TOKEN`: AI image generation
+            **Get API Keys:**
+            - [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
+            - [Hugging Face](https://huggingface.co/settings/tokens) (Free)
+            """)
+        with st.expander("🛠️ Troubleshooting"):
+            st.markdown("""
+            **Common Issues:**
+            - Audio not recording → Try different browser
+            - Slow processing → Models loading for first time
+            - No image generation → Add HF_TOKEN
+            - Basic content only → Add GEMINI_API_KEY
+            """)
+    # Main content
+    st.title("🎨 VoiceCanvas - AI Content Studio")
+    st.markdown("*Transform your ideas into comprehensive marketing content*")
+    # Main input area
+    st.header("💡 Share Your Idea")
+    # Dynamic tabs based on available features
+    available_tabs = []
+    if AUDIO_REC_AVAILABLE:
+        available_tabs.append("🎙️ Record")
+    available_tabs.extend(["📁 Upload", "✍️ Type"])
+    tabs = st.tabs(available_tabs)
+    tab_index = 0
+    # Recording tab (if available)
+    if AUDIO_REC_AVAILABLE:
+        with tabs[tab_index]:
+            st.info("🎤 Click the microphone button to start recording")
+            # Audio recorder
+            wav_audio_data = st_audiorec()
+            if wav_audio_data is not None:
+                st.success("🎉 Audio recorded successfully!")
+                st.audio(wav_audio_data, format='audio/wav')
+                col1, col2 = st.columns([1, 2])
                 with col1:
+                    if st.button("🔄 Transcribe Audio", key="transcribe_btn", type="primary"):
+                        if not st.session_state.models_loaded:
+                            if load_models():
+                                st.session_state.processing = True
+                                st.rerun()
+                        else:
+                            st.session_state.processing = True
+                            st.rerun()
+                with col2:
+                    if st.session_state.processing:
+                        st.info("🔄 Processing your audio...")
+        tab_index += 1
+    # Upload tab
+    with tabs[tab_index]:
+        st.info("📁 Upload an audio file containing your idea")
+        uploaded_file = st.file_uploader(
+            "Choose audio file",
+            type=['wav', 'mp3', 'm4a'],
+            help="Supported: WAV, MP3, M4A • Max 10MB • Best: 30 seconds or less"
+        )
+        if uploaded_file:
+            st.success("📄 File uploaded successfully!")
+            st.audio(uploaded_file)
+            col1, col2 = st.columns([1, 2])
             with col1:
+                if st.button("🔄 Process Audio", key="upload_transcribe", type="primary"):
+                    if not st.session_state.models_loaded:
+                        if load_models():
+                            st.session_state.processing = True
+                            st.rerun()
                     else:
+                        st.session_state.processing = True
+                        st.rerun()
             with col2:
+                if st.session_state.processing:
+                    st.info("🔄 Converting speech to text...")
+    tab_index += 1
+    # Text tab
+    with tabs[tab_index]:
+        st.info("✍️ Type or paste your product/service description")
+        user_input = st.text_area(
+            "Describe your idea:",
+            placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
+            height=150,
+            help="Be detailed! Include features, benefits, and target audience for best results."
+        )
+        if user_input:
+            st.session_state.transcription = user_input
+            word_count = len(user_input.split())
+            if word_count < 10:
+                st.warning("💡 Add more details for better results (at least 10 words)")
+            elif word_count < 30:
+                st.info("📝 Good start! Add more features/benefits for richer content")
+            else:
+                st.success(f"✅ Great detail! ({word_count} words)")
+    # Process audio transcription
+    if st.session_state.processing:
+        if AUDIO_REC_AVAILABLE and 'wav_audio_data' in locals() and wav_audio_data is not None:
+            # Process recorded audio
+            with st.spinner("🎯 Converting your speech to text..."):
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+                    tmp_file.write(wav_audio_data)
+                    transcription = transcribe_audio_simple(tmp_file.name)
+                    st.session_state.transcription = transcription
+                    os.unlink(tmp_file.name)
+            st.session_state.processing = False
+            st.rerun()
+        elif 'uploaded_file' in locals() and uploaded_file is not None:
+            # Process uploaded file
+            with st.spinner("🎯 Processing your audio file..."):
+                if TRANSFORMERS_AVAILABLE:
+                    transcription = transcribe_audio_simple(uploaded_file)
+                    st.session_state.transcription = transcription
                 else:
+                    st.session_state.transcription = "Speech-to-text not available. Please use text input."
+            st.session_state.processing = False
+            st.rerun()
+    # Show transcription and editing
+    if st.session_state.transcription:
+        st.markdown("---")
+        st.header("📝 Review Your Input")
+        edited_text = st.text_area(
+            "Edit or refine your input:",
+            value=st.session_state.transcription,
+            height=120,
+            key="edit_transcription",
+            help="Make any corrections or add more details"
+        )
+        st.session_state.transcription = edited_text
+        # Generate content section
+        st.markdown("---")
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col2:
+            if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True):
+                with st.spinner("✨ Creating comprehensive marketing content..."):
+                    if gemini_available:
+                        content_text = generate_content_with_gemini(st.session_state.transcription)
+                        st.session_state.generated_content['text'] = content_text
+                    else:
+                        content_text = generate_content_offline(st.session_state.transcription)
+                        st.session_state.generated_content['text'] = content_text
+                st.success("✅ Content generated successfully!")
+                st.rerun()
+    # Display generated content
+    if st.session_state.generated_content:
+        st.markdown("---")
+        st.header("✨ Your Marketing Content")
+        # Text content
+        if 'text' in st.session_state.generated_content:
+            st.markdown(st.session_state.generated_content['text'])
+        # Image generation section
+        st.markdown("---")
+        st.subheader("🎨 Visual Content")
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            if 'structured' in st.session_state.generated_content:
+                # Show pre-made prompts
+                prompts = st.session_state.generated_content['structured'].get('image_prompts', [])
+                if prompts:
+                    selected_prompt = st.selectbox(
+                        "Choose image style:",
+                        prompts,
+                        help="Select from AI-generated image prompts"
+                    )
+            else:
+                # Custom prompt input
+                selected_prompt = st.text_input(
+                    "Describe the image you want:",
+                    placeholder="Professional product photo with clean white background",
+                    help="Be specific about style, colors, composition"
+                )
+        with col2:
+            st.write("")  # Spacing
+            st.write("")  # Spacing
+            if st.button("🖼️ Generate Image", use_container_width=True):
+                if selected_prompt:
+                    img = generate_image_with_api(selected_prompt)
+                    if img:
+                        st.session_state.generated_content['image'] = img
+                        st.success("🎨 Image created!")
+                        st.rerun()
+                    else:
+                        st.error("Image generation failed. Check HF_TOKEN.")
+                else:
+                    st.warning("Please enter/select an image description")
+        # Display generated image
+        if 'image' in st.session_state.generated_content:
+            st.image(
+                st.session_state.generated_content['image'],
+                caption="AI Generated Image",
+                use_column_width=True
+            )
+        # Export section
+        st.markdown("---")
+        st.header("📥 Export Your Content")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            # Text export
+            if 'text' in st.session_state.generated_content:
+                content_export = f"""VOICECANVAS MARKETING CONTENT
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+Source: {st.session_state.transcription[:100]}...
+{st.session_state.generated_content['text']}
+---
+Created with VoiceCanvas AI Content Studio
 """
+                st.download_button(
+                    "📄 Download Text",
+                    content_export,
+                    file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+                    mime="text/plain",
+                    use_container_width=True,
+                    help="Download complete text content"
+                )
+        with col2:
+            # JSON export
+            if 'structured' in st.session_state.generated_content:
+                json_data = {
+                    "metadata": {
+                        "timestamp": datetime.now().isoformat(),
+                        "generator": "VoiceCanvas AI Studio",
+                        "mode": "Enhanced" if gemini_available else "Basic"
+                    },
+                    "input": st.session_state.transcription,
+                    "content": st.session_state.generated_content['structured']
+                }
+                st.download_button(
+                    "📊 Download Data",
+                    json.dumps(json_data, indent=2),
+                    file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
+                    mime="application/json",
+                    use_container_width=True,
+                    help="Download structured data (JSON)"
+                )
+        with col3:
+            # Image export
+            if 'image' in st.session_state.generated_content:
+                img_buffer = io.BytesIO()
+                st.session_state.generated_content['image'].save(img_buffer, format="PNG")
+                st.download_button(
+                    "🖼️ Download Image",
+                    img_buffer.getvalue(),
+                    file_name=f"ai_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
+                    mime="image/png",
+                    use_container_width=True,
+                    help="Download generated image"
+                )
+            else:
+                st.info("Generate an image first", icon="ℹ️")
     # Footer
     st.markdown("---")
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col2:
+        st.markdown("🎨 **VoiceCanvas AI Content Studio**")
+        st.caption("Transform ideas into marketing magic • Built with Streamlit")
 if __name__ == "__main__":
     main()