Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

App Files Files Community

maria355 commited on Sep 14, 2025

Commit

4e5890a

verified ·

1 Parent(s): de2f39c

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -580

app.py CHANGED Viewed

@@ -1,28 +1,15 @@
 import streamlit as st
-import torch
-import numpy as np
-import io
-import os
 import tempfile
-from PIL import Image
-import requests
-import json
 from datetime import datetime
-import time
-# Import with error handling
 try:
     from transformers import pipeline
     TRANSFORMERS_AVAILABLE = True
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
-try:
-    import google.generativeai as genai
-    GENAI_AVAILABLE = True
-except ImportError:
-    GENAI_AVAILABLE = False
 try:
     from st_audiorec import st_audiorec
     AUDIO_REC_AVAILABLE = True
@@ -31,634 +18,195 @@ except ImportError:
 # Configure page
 st.set_page_config(
-    page_title="VoiceCanvas - AI Content Studio",
     page_icon="🎨",
-    layout="wide",
-    initial_sidebar_state="expanded"
 )
-# Initialize session state
-if 'generated_content' not in st.session_state:
-    st.session_state.generated_content = {}
 if 'transcription' not in st.session_state:
     st.session_state.transcription = ""
-if 'processing' not in st.session_state:
-    st.session_state.processing = False
-if 'current_task' not in st.session_state:
-    st.session_state.current_task = ""
-if 'models_loaded' not in st.session_state:
-    st.session_state.models_loaded = False
 if 'whisper_model' not in st.session_state:
     st.session_state.whisper_model = None
-def load_models():
-    """Load models efficiently with progress tracking"""
-    if st.session_state.models_loaded and st.session_state.whisper_model is not None:
-        return True
-    if not TRANSFORMERS_AVAILABLE:
-        st.error("❌ Transformers library not available. Please install: pip install transformers")
-        return False
-    progress_bar = st.progress(0)
-    status_text = st.empty()
-    try:
-        # Load Whisper model
-        status_text.text("Loading speech recognition model...")
-        progress_bar.progress(25)
-        # Use session state to store the model
-        st.session_state.whisper_model = pipeline(
             "automatic-speech-recognition",
             model="openai/whisper-tiny",
-            device=-1,  # Use CPU
-            torch_dtype=torch.float32,
-            return_timestamps=False
         )
-        progress_bar.progress(75)
-        status_text.text("Models loaded successfully!")
-        progress_bar.progress(100)
-        st.session_state.models_loaded = True
-        # Clear progress indicators after a moment
-        time.sleep(1)
-        progress_bar.empty()
-        status_text.empty()
-        return True
-    except Exception as e:
-        st.error(f"❌ Error loading models: {str(e)}")
-        st.error("Try installing additional dependencies: pip install librosa soundfile")
-        progress_bar.empty()
-        status_text.empty()
-        return False
-def setup_gemini():
-    """Setup Gemini API if available"""
-    if not GENAI_AVAILABLE:
-        return False
     try:
-        api_key = os.getenv("GEMINI_API_KEY")
-        if not api_key and hasattr(st, 'secrets'):
-            api_key = st.secrets.get("GEMINI_API_KEY", "")
-        if api_key:
-            genai.configure(api_key=api_key)
-            return True
-        return False
     except Exception as e:
-        return False
-def transcribe_audio_simple(audio_file):
-    """Simple audio transcription with progress tracking"""
-    try:
-        # Check if model is loaded
-        if st.session_state.whisper_model is None:
-            st.error("❌ Speech recognition model not loaded. Please try loading models first.")
-            return "Error: Speech recognition model not available"
-        st.session_state.current_task = "Converting speech to text..."
-        # Handle different input types
-        if isinstance(audio_file, str):
-            # File path
-            audio_input = audio_file
-        else:
-            # File-like object
-            audio_input = audio_file
-        # Transcribe using pipeline
-        result = st.session_state.whisper_model(audio_input)
-        st.session_state.current_task = ""
-        # Handle different result formats
-        if isinstance(result, dict) and "text" in result:
-            return result["text"].strip()
-        elif isinstance(result, str):
-            return result.strip()
-        else:
-            return str(result).strip()
-    except Exception as e:
-        st.session_state.current_task = ""
-        error_msg = f"Transcription error: {str(e)}"
-        st.error(error_msg)
-        # Provide troubleshooting suggestions
-        if "librosa" in str(e).lower() or "soundfile" in str(e).lower():
-            st.error("🔧 Missing audio processing libraries. Install with:")
-            st.code("pip install librosa soundfile")
         return f"Error: {str(e)}"
-def generate_content_with_gemini(prompt):
-    """Generate content using Gemini"""
-    if not GENAI_AVAILABLE:
-        return generate_content_offline(prompt)
-    try:
-        st.session_state.current_task = "Generating enhanced content with Gemini AI..."
-        model = genai.GenerativeModel('gemini-pro')
-        response = model.generate_content(f"""
-        Based on this input: "{prompt}"
-        Create comprehensive marketing content with:
-        ## Marketing Taglines
-        Generate 3 catchy, memorable taglines (max 12 words each)
-        ## Social Media Posts
-        Create 3 engaging social media posts (max 280 characters each)
-        ## Product Description
-        Write 1 compelling product description (100-150 words)
-        ## Image Generation Prompts
-        Provide 3 detailed prompts for AI image generation
-        ## Call-to-Action Ideas
-        Suggest 3 effective call-to-action phrases
-        Format with clear markdown headers and numbered lists.
-        """)
-        st.session_state.current_task = ""
-        return response.text
-    except Exception as e:
-        st.warning(f"Gemini error: {e}. Using offline generation.")
-        st.session_state.current_task = ""
-        return generate_content_offline(prompt)
-def generate_content_offline(prompt):
-    """Generate content using offline methods"""
-    st.session_state.current_task = "Generating content with offline templates..."
-    # Create structured content
-    content = {
-        "taglines": [
-            f"Experience {prompt} like never before",
-            f"Transform your world with {prompt}",
-            f"Discover the power of {prompt}"
-        ],
-        "social_posts": [
-            f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation",
-            f"💫 {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger",
-            f"🚀 The future of {prompt} is here! Experience what everyone's talking about. #FutureTech"
-        ],
-        "description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!",
-        "image_prompts": [
-            f"Professional product photo of {prompt}, clean white background, studio lighting",
-            f"Modern minimalist illustration of {prompt}, flat design, vibrant colors",
-            f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
-        ]
-    }
-    # Format for display
-    formatted = format_content_display(content)
-    # Store both versions
-    st.session_state.generated_content['structured'] = content
-    st.session_state.current_task = ""
-    return formatted
-def generate_image_with_api(prompt):
-    """Generate image using free API"""
-    try:
-        st.session_state.current_task = "Creating image with AI..."
-        api_url = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1"
-        headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN', '')}"}
-        if not os.getenv('HF_TOKEN'):
-            st.warning("Add HF_TOKEN environment variable for image generation")
-            st.session_state.current_task = ""
-            return None
-        response = requests.post(api_url, headers=headers, json={"inputs": prompt}, timeout=60)
-        if response.status_code == 200:
-            image = Image.open(io.BytesIO(response.content))
-            st.session_state.current_task = ""
-            return image
-        else:
-            st.warning(f"Image API returned status {response.status_code}")
-            st.session_state.current_task = ""
-            return None
-    except Exception as e:
-        st.error(f"Image generation error: {e}")
-        st.session_state.current_task = ""
-        return None
-def format_content_display(content):
-    """Format content for nice display"""
-    if isinstance(content, dict):
-        formatted = ""
-        if "taglines" in content:
-            formatted += "## 🏷️ Marketing Taglines\n"
-            for i, tagline in enumerate(content["taglines"], 1):
-                formatted += f"{i}. **{tagline}**\n"
-            formatted += "\n"
-        if "social_posts" in content:
-            formatted += "## 📱 Social Media Posts\n"
-            for i, post in enumerate(content["social_posts"], 1):
-                formatted += f"**Post {i}:**\n{post}\n\n"
-        if "description" in content:
-            formatted += "## 📝 Product Description\n"
-            formatted += f"{content['description']}\n\n"
-        if "image_prompts" in content:
-            formatted += "## 🎨 Image Generation Prompts\n"
-            for i, prompt in enumerate(content["image_prompts"], 1):
-                formatted += f"{i}. {prompt}\n"
-        return formatted
-    return str(content)
-def main():
-    # Sidebar with tips and status
-    with st.sidebar:
-        st.header("🎨 VoiceCanvas")
-        st.markdown("*AI Content Studio*")
-        # Load models button
-        if not st.session_state.models_loaded:
-            if st.button("🚀 Load AI Models", type="primary", use_container_width=True):
-                load_models()
-        # Status section
-        st.subheader("📊 System Status")
-        gemini_available = setup_gemini()
-        col1, col2 = st.columns(2)
-        with col1:
-            st.metric("Mode", "Enhanced" if gemini_available else "Basic")
-        with col2:
-            st.metric("Status", "Ready" if not st.session_state.processing else "Working")
-        # Component status
-        st.write("🤖 **Components:**")
-        st.write(f"• Speech Recognition: {'✅' if st.session_state.models_loaded else '❌'}")
-        st.write(f"• Audio Recording: {'✅' if AUDIO_REC_AVAILABLE else '❌'}")
-        st.write(f"• Enhanced AI: {'✅' if gemini_available else '❌'}")
-        # Current task indicator
-        if st.session_state.current_task:
-            st.info(f"🔄 {st.session_state.current_task}")
-        st.markdown("---")
-        # Tips and help
-        st.subheader("💡 How to Use")
-        with st.expander("🚀 Quick Start", expanded=True):
-            st.markdown("""
-            1. **Load Models**: Click "Load AI Models" button first
-            2. **Input**: Use voice, upload audio, or type text
-            3. **Edit**: Review and refine your input
-            4. **Generate**: Create marketing content
-            5. **Export**: Download your materials
-            """)
-        with st.expander("🎯 Best Practices"):
-            st.markdown("""
-            **For Voice/Audio:**
-            - Speak clearly at normal pace
-            - Use quiet environment
-            - Describe your product/service
-            - Mention target audience
-            **For Text:**
-            - Be specific about features
-            - Include benefits and use cases
-            - Mention what makes it unique
-            - Use 50+ words for detail
-            """)
-        with st.expander("⚙️ Setup (Optional)"):
-            st.markdown("""
-            **Enhanced Features:**
-            Add environment variables:
-            - `GEMINI_API_KEY`: Advanced text generation
-            - `HF_TOKEN`: AI image generation
-            **Get API Keys:**
-            - [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
-            - [Hugging Face](https://huggingface.co/settings/tokens) (Free)
-            """)
-        with st.expander("🛠️ Troubleshooting"):
-            st.markdown("""
-            **Common Issues:**
-            - "Speech recognition not available" → Click "Load AI Models"
-            - Audio processing errors → Install: `pip install librosa soundfile`
-            - Slow processing → Models loading for first time
-            - No image generation → Add HF_TOKEN
-            - Basic content only → Add GEMINI_API_KEY
-            """)
-    # Main content
-    st.title("🎨 VoiceCanvas - AI Content Studio")
-    st.markdown("*Transform your ideas into comprehensive marketing content*")
-    # Show model loading status
-    if not st.session_state.models_loaded:
-        st.warning("⚠️ AI models not loaded yet. Click 'Load AI Models' in the sidebar to enable speech recognition.")
-    # Main input area
-    st.header("💡 Share Your Idea")
-    # Dynamic tabs based on available features
-    available_tabs = []
-    if AUDIO_REC_AVAILABLE:
-        available_tabs.append("🎙️ Record")
-    available_tabs.extend(["📁 Upload", "✍️ Type"])
-    tabs = st.tabs(available_tabs)
-    tab_index = 0
-    # Recording tab (if available)
-    if AUDIO_REC_AVAILABLE:
-        with tabs[tab_index]:
-            st.info("🎤 Click the microphone button to start recording")
-            # Audio recorder
             wav_audio_data = st_audiorec()
             if wav_audio_data is not None:
-                st.success("🎉 Audio recorded successfully!")
                 st.audio(wav_audio_data, format='audio/wav')
-                col1, col2 = st.columns([1, 2])
-                with col1:
-                    if st.button("🔄 Transcribe Audio", key="transcribe_btn", type="primary"):
-                        if not st.session_state.models_loaded:
-                            st.error("Please load AI models first using the sidebar button.")
-                        else:
-                            st.session_state.processing = True
-                            st.rerun()
-                with col2:
-                    if st.session_state.processing:
-                        st.info("🔄 Processing your audio...")
-        tab_index += 1
     # Upload tab
-    with tabs[tab_index]:
-        st.info("📁 Upload an audio file containing your idea")
-        uploaded_file = st.file_uploader(
-            "Choose audio file",
-            type=['wav', 'mp3', 'm4a'],
-            help="Supported: WAV, MP3, M4A • Max 10MB • Best: 30 seconds or less"
-        )
         if uploaded_file:
-            st.success("📄 File uploaded successfully!")
             st.audio(uploaded_file)
-            col1, col2 = st.columns([1, 2])
-            with col1:
-                if st.button("🔄 Process Audio", key="upload_transcribe", type="primary"):
-                    if not st.session_state.models_loaded:
-                        st.error("Please load AI models first using the sidebar button.")
-                    else:
-                        st.session_state.processing = True
-                        st.rerun()
-            with col2:
-                if st.session_state.processing:
-                    st.info("🔄 Converting speech to text...")
-    tab_index += 1
     # Text tab
-    with tabs[tab_index]:
-        st.info("✍️ Type or paste your product/service description")
         user_input = st.text_area(
-            "Describe your idea:",
-            placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
-            height=150,
-            help="Be detailed! Include features, benefits, and target audience for best results."
         )
         if user_input:
             st.session_state.transcription = user_input
-            word_count = len(user_input.split())
-            if word_count < 10:
-                st.warning("💡 Add more details for better results (at least 10 words)")
-            elif word_count < 30:
-                st.info("📝 Good start! Add more features/benefits for richer content")
-            else:
-                st.success(f"✅ Great detail! ({word_count} words)")
-    # Process audio transcription
-    if st.session_state.processing:
-        if AUDIO_REC_AVAILABLE and 'wav_audio_data' in locals() and wav_audio_data is not None:
-            # Process recorded audio
-            with st.spinner("🎯 Converting your speech to text..."):
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-                    tmp_file.write(wav_audio_data)
-                    transcription = transcribe_audio_simple(tmp_file.name)
-                    st.session_state.transcription = transcription
-                    os.unlink(tmp_file.name)
-            st.session_state.processing = False
-            st.rerun()
-        elif 'uploaded_file' in locals() and uploaded_file is not None:
-            # Process uploaded file
-            with st.spinner("🎯 Processing your audio file..."):
-                transcription = transcribe_audio_simple(uploaded_file)
-                st.session_state.transcription = transcription
-            st.session_state.processing = False
-            st.rerun()
-    # Show transcription and editing
     if st.session_state.transcription:
         st.markdown("---")
-        st.header("📝 Review Your Input")
         edited_text = st.text_area(
-            "Edit or refine your input:",
             value=st.session_state.transcription,
-            height=120,
-            key="edit_transcription",
-            help="Make any corrections or add more details"
         )
         st.session_state.transcription = edited_text
-        # Generate content section
         st.markdown("---")
-        col1, col2, col3 = st.columns([1, 2, 1])
-        with col2:
-            if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True):
-                with st.spinner("✨ Creating comprehensive marketing content..."):
-                    if setup_gemini():
-                        content_text = generate_content_with_gemini(st.session_state.transcription)
-                        st.session_state.generated_content['text'] = content_text
-                    else:
-                        content_text = generate_content_offline(st.session_state.transcription)
-                        st.session_state.generated_content['text'] = content_text
-                st.success("✅ Content generated successfully!")
-                st.rerun()
-    # Display generated content
     if st.session_state.generated_content:
         st.markdown("---")
         st.header("✨ Your Marketing Content")
-        # Text content
-        if 'text' in st.session_state.generated_content:
-            st.markdown(st.session_state.generated_content['text'])
-        # Image generation section
         st.markdown("---")
-        st.subheader("🎨 Visual Content")
-        col1, col2 = st.columns([2, 1])
-        with col1:
-            if 'structured' in st.session_state.generated_content:
-                # Show pre-made prompts
-                prompts = st.session_state.generated_content['structured'].get('image_prompts', [])
-                if prompts:
-                    selected_prompt = st.selectbox(
-                        "Choose image style:",
-                        prompts,
-                        help="Select from AI-generated image prompts"
-                    )
-                else:
-                    selected_prompt = st.text_input(
-                        "Describe the image you want:",
-                        placeholder="Professional product photo with clean white background",
-                        help="Be specific about style, colors, composition"
-                    )
-            else:
-                # Custom prompt input
-                selected_prompt = st.text_input(
-                    "Describe the image you want:",
-                    placeholder="Professional product photo with clean white background",
-                    help="Be specific about style, colors, composition"
-                )
-        with col2:
-            st.write("")  # Spacing
-            st.write("")  # Spacing
-            if st.button("🖼️ Generate Image", use_container_width=True):
-                if selected_prompt:
-                    img = generate_image_with_api(selected_prompt)
-                    if img:
-                        st.session_state.generated_content['image'] = img
-                        st.success("🎨 Image created!")
-                        st.rerun()
-                    else:
-                        st.error("Image generation failed. Check HF_TOKEN.")
-                else:
-                    st.warning("Please enter/select an image description")
-        # Display generated image
-        if 'image' in st.session_state.generated_content:
-            st.image(
-                st.session_state.generated_content['image'],
-                caption="AI Generated Image",
-                use_column_width=True
-            )
-        # Export section
-        st.markdown("---")
-        st.header("📥 Export Your Content")
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            # Text export
-            if 'text' in st.session_state.generated_content:
-                content_export = f"""VOICECANVAS MARKETING CONTENT
-Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-Source: {st.session_state.transcription[:100]}...
-{st.session_state.generated_content['text']}
----
-Created with VoiceCanvas AI Content Studio
-"""
-                st.download_button(
-                    "📄 Download Text",
-                    content_export,
-                    file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
-                    mime="text/plain",
-                    use_container_width=True,
-                    help="Download complete text content"
-                )
-        with col2:
-            # JSON export
-            if 'structured' in st.session_state.generated_content:
-                json_data = {
-                    "metadata": {
-                        "timestamp": datetime.now().isoformat(),
-                        "generator": "VoiceCanvas AI Studio",
-                        "mode": "Enhanced" if setup_gemini() else "Basic"
-                    },
-                    "input": st.session_state.transcription,
-                    "content": st.session_state.generated_content['structured']
-                }
-                st.download_button(
-                    "📊 Download Data",
-                    json.dumps(json_data, indent=2),
-                    file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
-                    mime="application/json",
-                    use_container_width=True,
-                    help="Download structured data (JSON)"
-                )
-        with col3:
-            # Image export
-            if 'image' in st.session_state.generated_content:
-                img_buffer = io.BytesIO()
-                st.session_state.generated_content['image'].save(img_buffer, format="PNG")
-                st.download_button(
-                    "🖼️ Download Image",
-                    img_buffer.getvalue(),
-                    file_name=f"ai_image_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
-                    mime="image/png",
-                    use_container_width=True,
-                    help="Download generated image"
-                )
-            else:
-                st.info("Generate an image first", icon="ℹ️")
-    # Footer
     st.markdown("---")
-    col1, col2, col3 = st.columns([1, 2, 1])
-    with col2:
-        st.markdown("🎨 **VoiceCanvas AI Content Studio**")
-        st.caption("Transform ideas into marketing magic • Built with Streamlit")
 if __name__ == "__main__":
     main()

 import streamlit as st
 import tempfile
+import os
 from datetime import datetime
+# Simple imports only
 try:
     from transformers import pipeline
     TRANSFORMERS_AVAILABLE = True
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
 try:
     from st_audiorec import st_audiorec
     AUDIO_REC_AVAILABLE = True
 # Configure page
 st.set_page_config(
+    page_title="VoiceCanvas - Simple AI Studio",
     page_icon="🎨",
+    layout="centered"
 )
+# Initialize session state - SIMPLIFIED
 if 'transcription' not in st.session_state:
     st.session_state.transcription = ""
+if 'generated_content' not in st.session_state:
+    st.session_state.generated_content = ""
 if 'whisper_model' not in st.session_state:
     st.session_state.whisper_model = None
+@st.cache_resource
+def load_whisper_model():
+    """Load Whisper model once and cache it"""
+    if TRANSFORMERS_AVAILABLE:
+        return pipeline(
             "automatic-speech-recognition",
             model="openai/whisper-tiny",
+            device=-1
         )
+    return None
+def transcribe_audio(audio_file):
+    """Simple audio transcription"""
     try:
+        model = load_whisper_model()
+        if model is None:
+            return "Error: Speech recognition not available"
+        result = model(audio_file)
+        return result["text"].strip()
     except Exception as e:
         return f"Error: {str(e)}"
+def generate_simple_content(prompt):
+    """Generate simple marketing content without external APIs"""
+    # Extract key words from prompt
+    words = prompt.lower().split()
+    key_features = [word for word in words if len(word) > 4][:3]
+    content = f"""# 🎯 Marketing Content for: {prompt[:50]}...
+## 🏷️ Taglines
+1. **Experience {key_features[0] if key_features else 'innovation'} like never before**
+2. **Transform your world with our solution**
+3. **Discover the power of smart technology**
+## 📱 Social Media Posts
+**Post 1:**
+🌟 Ready to experience something amazing? Our innovative solution is changing lives every day! #Innovation #Technology
+**Post 2:**
+💫 Join thousands who've already discovered the difference. Don't miss out on this incredible opportunity! #GameChanger
+**Post 3:**
+🚀 The future is here! Experience what everyone's talking about and transform your daily routine. #Future
+## 📝 Product Description
+{prompt}
+Our innovative approach combines cutting-edge technology with user-friendly design. Perfect for both beginners and experts, this solution delivers results that exceed expectations.
+## 🎯 Call-to-Action Ideas
+1. **Get Started Today!**
+2. **Transform Your Experience Now**
+3. **Join the Revolution**
+---
+*Generated by VoiceCanvas AI Studio*
+"""
+    return content
+def main():
+    # Header
+    st.title("🎨 VoiceCanvas - Simple AI Studio")
+    st.markdown("*Transform your ideas into marketing content quickly*")
+    # Simple status
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Speech Recognition", "✅ Ready" if TRANSFORMERS_AVAILABLE else "❌ Not Available")
+    with col2:
+        st.metric("Audio Recording", "✅ Ready" if AUDIO_REC_AVAILABLE else "❌ Not Available")
+    st.markdown("---")
+    # Input Section
+    st.header("💡 Your Idea")
+    # Simple tabs
+    tab1, tab2, tab3 = st.tabs(["🎙️ Record" if AUDIO_REC_AVAILABLE else "❌ Record", "📁 Upload", "✍️ Type"])
+    # Recording tab
+    with tab1:
+        if AUDIO_REC_AVAILABLE:
+            st.info("🎤 Record your idea")
             wav_audio_data = st_audiorec()
             if wav_audio_data is not None:
+                st.success("✅ Audio recorded!")
                 st.audio(wav_audio_data, format='audio/wav')
+                # Single button with immediate processing
+                if st.button("🔄 Convert to Text", key="record_btn", type="primary"):
+                    with st.spinner("Converting speech to text..."):
+                        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+                            tmp_file.write(wav_audio_data)
+                            transcription = transcribe_audio(tmp_file.name)
+                            os.unlink(tmp_file.name)
+                            st.session_state.transcription = transcription
+                    st.success("✅ Done!")
+                    st.rerun()
+        else:
+            st.warning("Audio recording not available")
     # Upload tab
+    with tab2:
+        st.info("📁 Upload audio file")
+        uploaded_file = st.file_uploader("Choose file", type=['wav', 'mp3', 'm4a'])
         if uploaded_file:
+            st.success("✅ File uploaded!")
             st.audio(uploaded_file)
+            # Single button with immediate processing
+            if st.button("🔄 Convert to Text", key="upload_btn", type="primary"):
+                with st.spinner("Converting speech to text..."):
+                    transcription = transcribe_audio(uploaded_file)
+                    st.session_state.transcription = transcription
+                st.success("✅ Done!")
+                st.rerun()
     # Text tab
+    with tab3:
         user_input = st.text_area(
+            "Describe your product/service:",
+            placeholder="A smart fitness tracker that helps busy professionals stay healthy...",
+            height=150
         )
         if user_input:
             st.session_state.transcription = user_input
+            st.success(f"✅ {len(user_input.split())} words entered")
+    # Show current input
     if st.session_state.transcription:
         st.markdown("---")
+        st.header("📝 Your Input")
+        # Editable text
         edited_text = st.text_area(
+            "Edit if needed:",
             value=st.session_state.transcription,
+            height=100
         )
         st.session_state.transcription = edited_text
+        # Generate content button
         st.markdown("---")
+        if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True):
+            with st.spinner("✨ Creating marketing content..."):
+                content = generate_simple_content(st.session_state.transcription)
+                st.session_state.generated_content = content
+            st.success("✅ Content generated!")
+            st.rerun()
+    # Show generated content
     if st.session_state.generated_content:
         st.markdown("---")
         st.header("✨ Your Marketing Content")
+        st.markdown(st.session_state.generated_content)
+        # Simple download
         st.markdown("---")
+        st.download_button(
+            "📥 Download Content",
+            st.session_state.generated_content,
+            file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+            mime="text/markdown",
+            use_container_width=True
+        )
+    # Simple footer
     st.markdown("---")
+    st.caption("🎨 VoiceCanvas - Simple & Fast")
 if __name__ == "__main__":
     main()