Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

File size: 28,970 Bytes


import streamlit as st
import torch
import numpy as np
import io
import os
import tempfile
from PIL import Image, ImageDraw, ImageFont
import requests
import json
from datetime import datetime
import time

# Import with error handling
try:
    from transformers import pipeline
    TRANSFORMERS_AVAILABLE = True
except ImportError:
    TRANSFORMERS_AVAILABLE = False

try:
    import google.generativeai as genai
    GENAI_AVAILABLE = True
except ImportError:
    GENAI_AVAILABLE = False

try:
    from st_audiorec import st_audiorec
    AUDIO_REC_AVAILABLE = True
except ImportError:
    AUDIO_REC_AVAILABLE = False

# Configure page
st.set_page_config(
    page_title="VoiceCanvas - AI Content Studio",
    page_icon="🎨",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Initialize session state
if 'generated_content' not in st.session_state:
    st.session_state.generated_content = {}
if 'transcription' not in st.session_state:
    st.session_state.transcription = ""
if 'processing' not in st.session_state:
    st.session_state.processing = False
if 'current_task' not in st.session_state:
    st.session_state.current_task = ""
if 'models_loaded' not in st.session_state:
    st.session_state.models_loaded = False
if 'whisper_model' not in st.session_state:
    st.session_state.whisper_model = None
if 'button_clicked' not in st.session_state:
    st.session_state.button_clicked = False

def load_models():
    """Load models efficiently with progress tracking"""
    
    if st.session_state.models_loaded and st.session_state.whisper_model is not None:
        return True
    
    if not TRANSFORMERS_AVAILABLE:
        st.error("❌ Transformers library not available. Please install: pip install transformers")
        return False
    
    progress_bar = st.progress(0)
    status_text = st.empty()
    
    try:
        # Load Whisper model
        status_text.text("Loading speech recognition model...")
        progress_bar.progress(25)
        
        # Use session state to store the model
        st.session_state.whisper_model = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny",
            device=-1,  # Use CPU
            torch_dtype=torch.float32,
            return_timestamps=False
        )
        
        progress_bar.progress(75)
        status_text.text("Models loaded successfully!")
        progress_bar.progress(100)
        
        st.session_state.models_loaded = True
        
        # Clear progress indicators after a moment
        time.sleep(1)
        progress_bar.empty()
        status_text.empty()
        
        return True
        
    except Exception as e:
        st.error(f"❌ Error loading models: {str(e)}")
        st.error("Try installing additional dependencies: pip install librosa soundfile")
        progress_bar.empty()
        status_text.empty()
        return False

def setup_gemini():
    """Setup Gemini API if available"""
    if not GENAI_AVAILABLE:
        return False
        
    try:
        api_key = os.getenv("GEMINI_API_KEY")
        if not api_key and hasattr(st, 'secrets'):
            api_key = st.secrets.get("GEMINI_API_KEY", "")
        
        if api_key:
            genai.configure(api_key=api_key)
            return True
        return False
    except Exception as e:
        return False

def transcribe_audio_simple(audio_file):
    """Simple audio transcription with progress tracking"""
    try:
        # Check if model is loaded
        if st.session_state.whisper_model is None:
            st.error("❌ Speech recognition model not loaded. Please try loading models first.")
            return "Error: Speech recognition model not available"
        
        st.session_state.current_task = "Converting speech to text..."
        
        # Handle different input types
        if isinstance(audio_file, str):
            # File path
            audio_input = audio_file
        else:
            # File-like object
            audio_input = audio_file
        
        # Transcribe using pipeline
        result = st.session_state.whisper_model(audio_input)
        
        st.session_state.current_task = ""
        
        # Handle different result formats
        if isinstance(result, dict) and "text" in result:
            return result["text"].strip()
        elif isinstance(result, str):
            return result.strip()
        else:
            return str(result).strip()
        
    except Exception as e:
        st.session_state.current_task = ""
        error_msg = f"Transcription error: {str(e)}"
        st.error(error_msg)
        
        # Provide troubleshooting suggestions
        if "librosa" in str(e).lower() or "soundfile" in str(e).lower():
            st.error("🔧 Missing audio processing libraries. Install with:")
            st.code("pip install librosa soundfile")
        
        return f"Error: {str(e)}"

def generate_content_with_gemini(prompt):
    """Generate content using Gemini"""
    if not GENAI_AVAILABLE:
        return generate_content_offline(prompt)
        
    try:
        st.session_state.current_task = "Generating enhanced content with Gemini AI..."
        
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(f"""
        Based on this input: "{prompt}"
        
        Create comprehensive marketing content with:
        
        ## Marketing Taglines
        Generate 3 catchy, memorable taglines (max 12 words each)
        
        ## Social Media Posts  
        Create 3 engaging social media posts (max 280 characters each)
        
        ## Product Description
        Write 1 compelling product description (100-150 words)
        
        ## Image Generation Prompts
        Provide 3 detailed prompts for AI image generation
        
        ## Call-to-Action Ideas
        Suggest 3 effective call-to-action phrases
        
        Format with clear markdown headers and numbered lists.
        """)
        
        st.session_state.current_task = ""
        return response.text
        
    except Exception as e:
        st.warning(f"Gemini error: {e}. Using offline generation.")
        st.session_state.current_task = ""
        return generate_content_offline(prompt)

def generate_content_offline(prompt):
    """Generate content using offline methods"""
    st.session_state.current_task = "Generating content with offline templates..."
    
    # Create structured content
    content = {
        "taglines": [
            f"Experience {prompt} like never before",
            f"Transform your world with {prompt}",
            f"Discover the power of {prompt}"
        ],
        "social_posts": [
            f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation",
            f"💫 {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger", 
            f"🚀 The future of {prompt} is here! Experience what everyone's talking about. #FutureTech"
        ],
        "description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!",
        "image_prompts": [
            f"Professional product photo of {prompt}, clean white background, studio lighting",
            f"Modern minimalist illustration of {prompt}, flat design, vibrant colors",
            f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
        ],
        "cta_ideas": [
            f"Get Started with {prompt} Today!",
            f"Transform Your Experience Now",
            f"Join the {prompt} Revolution"
        ]
    }
    
    # Format for display
    formatted = format_content_display(content)
    
    # Store both versions
    st.session_state.generated_content['structured'] = content
    st.session_state.current_task = ""
    
    return formatted

def create_flowchart_image(content_data):
    """Create a simple flowchart visualization of the content"""
    try:
        # Create image
        width, height = 800, 600
        image = Image.new('RGB', (width, height), 'white')
        draw = ImageDraw.Draw(image)
        
        # Try to use a basic font, fall back to default if not available
        try:
            font_title = ImageFont.truetype("arial.ttf", 20)
            font_text = ImageFont.truetype("arial.ttf", 14)
            font_small = ImageFont.truetype("arial.ttf", 12)
        except:
            font_title = ImageFont.load_default()
            font_text = ImageFont.load_default()
            font_small = ImageFont.load_default()
        
        # Colors
        primary_color = "#2E86AB"
        secondary_color = "#A23B72"
        accent_color = "#F18F01"
        text_color = "#333333"
        
        # Title
        draw.text((width//2 - 150, 20), "Marketing Content Strategy", fill=text_color, font=font_title)
        
        # Draw boxes and content
        y_offset = 80
        box_height = 80
        box_width = 180
        
        # Row 1: Taglines and Social Media
        draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
        draw.text((60, y_offset + 10), "🏷️ Taglines", fill=primary_color, font=font_text)
        draw.text((60, y_offset + 35), f"• {content_data.get('taglines', ['Sample tagline'])[0][:25]}...", fill=text_color, font=font_small)
        
        draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
        draw.text((width//2 + 60, y_offset + 10), "📱 Social Media", fill=secondary_color, font=font_text)
        draw.text((width//2 + 60, y_offset + 35), f"• {content_data.get('social_posts', ['Sample post'])[0][:25]}...", fill=text_color, font=font_small)
        
        # Row 2: Description
        y_offset += 120
        draw.rectangle([width//4, y_offset, width*3//4, y_offset + box_height], outline=accent_color, width=2)
        draw.text((width//4 + 10, y_offset + 10), "📝 Product Description", fill=accent_color, font=font_text)
        desc_text = content_data.get('description', 'Product description goes here')[:50] + "..."
        draw.text((width//4 + 10, y_offset + 35), desc_text, fill=text_color, font=font_small)
        
        # Row 3: CTAs and Image Ideas
        y_offset += 120
        draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
        draw.text((60, y_offset + 10), "🎯 Call-to-Actions", fill=primary_color, font=font_text)
        draw.text((60, y_offset + 35), f"• {content_data.get('cta_ideas', ['Sample CTA'])[0]}", fill=text_color, font=font_small)
        
        draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
        draw.text((width//2 + 60, y_offset + 10), "🎨 Visual Ideas", fill=secondary_color, font=font_text)
        draw.text((width//2 + 60, y_offset + 35), "• Professional photos", fill=text_color, font=font_small)
        draw.text((width//2 + 60, y_offset + 50), "• Minimalist design", fill=text_color, font=font_small)
        
        # Draw connecting lines
        draw.line([(width//2, 80 + box_height), (width//2, 200)], fill=text_color, width=2)
        draw.line([(width//4 + box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)
        draw.line([(width*3//4 - box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)
        
        # Add footer
        draw.text((width//2 - 100, height - 30), "Generated by VoiceCanvas AI Studio", fill=text_color, font=font_small)
        
        return image
        
    except Exception as e:
        st.error(f"Error creating flowchart: {e}")
        return None

def format_content_display(content):
    """Format content for nice display"""
    if isinstance(content, dict):
        formatted = ""
        
        if "taglines" in content:
            formatted += "## 🏷️ Marketing Taglines\n"
            for i, tagline in enumerate(content["taglines"], 1):
                formatted += f"{i}. **{tagline}**\n"
            formatted += "\n"
        
        if "social_posts" in content:
            formatted += "## 📱 Social Media Posts\n"
            for i, post in enumerate(content["social_posts"], 1):
                formatted += f"**Post {i}:**\n{post}\n\n"
        
        if "description" in content:
            formatted += "## 📝 Product Description\n"
            formatted += f"{content['description']}\n\n"
        
        if "cta_ideas" in content:
            formatted += "## 🎯 Call-to-Action Ideas\n"
            for i, cta in enumerate(content["cta_ideas"], 1):
                formatted += f"{i}. {cta}\n"
            formatted += "\n"
        
        if "image_prompts" in content:
            formatted += "## 🎨 Image Generation Prompts\n"
            for i, prompt in enumerate(content["image_prompts"], 1):
                formatted += f"{i}. {prompt}\n"
        
        return formatted
    
    return str(content)

def handle_button_click(button_key):
    """Handle button clicks to prevent multiple clicks"""
    if not st.session_state.get(f'{button_key}_clicked', False):
        st.session_state[f'{button_key}_clicked'] = True
        return True
    return False

def reset_button_state(button_key):
    """Reset button state"""
    if f'{button_key}_clicked' in st.session_state:
        st.session_state[f'{button_key}_clicked'] = False

def main():
    # Sidebar with tips and status
    with st.sidebar:
        st.header("🎨 VoiceCanvas")
        st.markdown("*AI Content Studio*")
        
        # Load models button
        if not st.session_state.models_loaded:
            if st.button("🚀 Load AI Models", type="primary", use_container_width=True):
                if handle_button_click("load_models"):
                    with st.spinner("Loading AI models..."):
                        success = load_models()
                        reset_button_state("load_models")
                        if success:
                            st.rerun()
        
        # Status section
        st.subheader("📊 System Status")
        
        gemini_available = setup_gemini()
        
        col1, col2 = st.columns(2)        
        # Component status
        st.write("🤖 **Components:**")
        st.write("• Speech Recognition")
        st.write("• Audio Recording")
        st.write("• Enhanced AI")
        
        # Current task indicator
        if st.session_state.current_task:
            st.info(f"🔄 {st.session_state.current_task}")
        
        st.markdown("---")
        
        # Tips and help
        st.subheader("💡 How to Use")
        
        with st.expander("🚀 Quick Start", expanded=True):
            st.markdown("""
            1. **Load Models**: Click "Load AI Models" button first
            2. **Input**: Use voice, upload audio, or type text
            3. **Edit**: Review and refine your input
            4. **Generate**: Create marketing content
            5. **Visualize**: Generate flowchart of your strategy
            6. **Export**: Download your materials
            """)
        
        with st.expander("🎯 Best Practices"):
            st.markdown("""
            **For Voice/Audio:**
            - Speak clearly at normal pace
            - Use quiet environment
            - Describe your product/service
            - Mention target audience
            
            **For Text:**
            - Be specific about features
            - Include benefits and use cases
            - Mention what makes it unique
            - Use 50+ words for detail
            """)
        
        with st.expander("⚙️ Setup (Optional)"):
            st.markdown("""
            **Enhanced Features:**
            
            Add environment variables:
            - `GEMINI_API_KEY`: Advanced text generation
            
            **Get API Key:**
            - [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
            """)
        
        with st.expander("🛠️ Troubleshooting"):
            st.markdown("""
            **Common Issues:**
            - "Speech recognition not available" → Click "Load AI Models"
            - Audio processing errors → Install: `pip install librosa soundfile`
            - Button not responding → Wait for processing to complete
            - Slow processing → Models loading for first time
            - Basic content only → Add GEMINI_API_KEY
            """)

    # Main content
    st.title("🎨 VoiceCanvas - AI Content Studio")
    st.markdown("*Transform your ideas into comprehensive marketing content*")
    
    # Show model loading status
    if not st.session_state.models_loaded:
        st.warning("⚠️ AI models not loaded yet. Click 'Load AI Models' in the sidebar to enable speech recognition.")
    
    # Main input area
    st.header("💡 Share Your Idea")
    
    # Dynamic tabs based on available features
    available_tabs = []
    if AUDIO_REC_AVAILABLE:
        available_tabs.append("🎙️ Record")
    available_tabs.extend(["📁 Upload", "✍️ Type"])
    
    tabs = st.tabs(available_tabs)
    tab_index = 0
    
    # Recording tab (if available)
    if AUDIO_REC_AVAILABLE:
        with tabs[tab_index]:
            st.info("🎤 Click the microphone button to start recording")
            
            # Audio recorder
            wav_audio_data = st_audiorec()
            
            if wav_audio_data is not None:
                st.success("🎉 Audio recorded successfully!")
                st.audio(wav_audio_data, format='audio/wav')
                
                col1, col2 = st.columns([1, 2])
                with col1:
                    if st.button("🔄 Transcribe Audio", key="transcribe_btn", type="primary"):
                        if not st.session_state.models_loaded:
                            st.error("Please load AI models first using the sidebar button.")
                        else:
                            if handle_button_click("transcribe"):
                                st.session_state.processing = True
                                # Process immediately
                                with st.spinner("🎯 Converting your speech to text..."):
                                    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
                                        tmp_file.write(wav_audio_data)
                                        transcription = transcribe_audio_simple(tmp_file.name)
                                        st.session_state.transcription = transcription
                                        os.unlink(tmp_file.name)
                                st.session_state.processing = False
                                reset_button_state("transcribe")
                                st.rerun()
                
                with col2:
                    if st.session_state.processing:
                        st.info("🔄 Processing your audio...")
        tab_index += 1
    
    # Upload tab
    with tabs[tab_index]:
        st.info("📁 Upload an audio file containing your idea")
        
        uploaded_file = st.file_uploader(
            "Choose audio file", 
            type=['wav', 'mp3', 'm4a'],
            help="Supported: WAV, MP3, M4A • Max 10MB • Best: 30 seconds or less"
        )
        
        if uploaded_file:
            st.success("📄 File uploaded successfully!")
            st.audio(uploaded_file)
            
            col1, col2 = st.columns([1, 2])
            with col1:
                if st.button("🔄 Process Audio", key="upload_transcribe", type="primary"):
                    if not st.session_state.models_loaded:
                        st.error("Please load AI models first using the sidebar button.")
                    else:
                        if handle_button_click("upload_process"):
                            st.session_state.processing = True
                            # Process immediately
                            with st.spinner("🎯 Processing your audio file..."):
                                transcription = transcribe_audio_simple(uploaded_file)
                                st.session_state.transcription = transcription
                            st.session_state.processing = False
                            reset_button_state("upload_process")
                            st.rerun()
            
            with col2:
                if st.session_state.processing:
                    st.info("🔄 Converting speech to text...")
    
    tab_index += 1
    
    # Text tab
    with tabs[tab_index]:
        st.info("✍️ Type or paste your product/service description")
        
        user_input = st.text_area(
            "Describe your idea:",
            placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
            height=150,
            help="Be detailed! Include features, benefits, and target audience for best results."
        )
        
        if user_input:
            st.session_state.transcription = user_input
            word_count = len(user_input.split())
            
            if word_count < 10:
                st.warning("💡 Add more details for better results (at least 10 words)")
            elif word_count < 30:
                st.info("📝 Good start! Add more features/benefits for richer content")
            else:
                st.success(f"✅ Great detail! ({word_count} words)")
    
    # Show transcription and editing
    if st.session_state.transcription:
        st.markdown("---")
        st.header("📝 Review Your Input")
        
        edited_text = st.text_area(
            "Edit or refine your input:",
            value=st.session_state.transcription,
            height=120,
            key="edit_transcription",
            help="Make any corrections or add more details"
        )
        st.session_state.transcription = edited_text
        
        # Generate content section
        st.markdown("---")
        col1, col2, col3 = st.columns([1, 2, 1])
        
        with col2:
            if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True):
                if handle_button_click("generate_content"):
                    with st.spinner("✨ Creating comprehensive marketing content..."):
                        if setup_gemini():
                            content_text = generate_content_with_gemini(st.session_state.transcription)
                            st.session_state.generated_content['text'] = content_text
                        else:
                            content_text = generate_content_offline(st.session_state.transcription)
                            st.session_state.generated_content['text'] = content_text
                    reset_button_state("generate_content")
                    st.success("✅ Content generated successfully!")
                    st.rerun()
    
    # Display generated content
    if st.session_state.generated_content:
        st.markdown("---")
        st.header("✨ Your Marketing Content")
        
        # Text content
        if 'text' in st.session_state.generated_content:
            st.markdown(st.session_state.generated_content['text'])
        
        # Visual content section
        st.markdown("---")
        st.subheader("🎨 Visual Content")
        
        col1, col2 = st.columns([1, 1])
        
        with col1:
            if st.button("📊 Generate Strategy Flowchart", use_container_width=True, type="secondary"):
                if handle_button_click("generate_flowchart"):
                    with st.spinner("🎨 Creating strategy flowchart..."):
                        if 'structured' in st.session_state.generated_content:
                            flowchart_img = create_flowchart_image(st.session_state.generated_content['structured'])
                            if flowchart_img:
                                st.session_state.generated_content['flowchart'] = flowchart_img
                        else:
                            # Create basic flowchart from text content
                            basic_data = {
                                'taglines': ['Key message from your content'],
                                'social_posts': ['Social media strategy'],
                                'description': st.session_state.transcription[:100],
                                'cta_ideas': ['Call to action'],
                                'image_prompts': ['Visual elements']
                            }
                            flowchart_img = create_flowchart_image(basic_data)
                            if flowchart_img:
                                st.session_state.generated_content['flowchart'] = flowchart_img
                    reset_button_state("generate_flowchart")
                    st.success("📊 Flowchart created!")
                    st.rerun()
        
        with col2:
            st.info("💡 Generate a visual flowchart of your marketing strategy to better understand content relationships and flow.")
        
        # Display generated flowchart
        if 'flowchart' in st.session_state.generated_content:
            st.image(
                st.session_state.generated_content['flowchart'], 
                caption="Marketing Strategy Flowchart", 
                use_column_width=True
            )
        
        # Export section
        st.markdown("---")
        st.header("📥 Export Your Content")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            # Text export
            if 'text' in st.session_state.generated_content:
                content_export = f"""VOICECANVAS MARKETING CONTENT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Source: {st.session_state.transcription[:100]}...

{st.session_state.generated_content['text']}

---
Created with VoiceCanvas AI Content Studio
"""
                
                st.download_button(
                    "📄 Download Text",
                    content_export,
                    file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
                    mime="text/plain",
                    use_container_width=True,
                    help="Download complete text content"
                )
        
        with col2:
            # JSON export
            if 'structured' in st.session_state.generated_content:
                json_data = {
                    "metadata": {
                        "timestamp": datetime.now().isoformat(),
                        "generator": "VoiceCanvas AI Studio",
                        "mode": "Enhanced" if setup_gemini() else "Basic"
                    },
                    "input": st.session_state.transcription,
                    "content": st.session_state.generated_content['structured']
                }
                
                st.download_button(
                    "📊 Download Data",
                    json.dumps(json_data, indent=2),
                    file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
                    mime="application/json",
                    use_container_width=True,
                    help="Download structured data (JSON)"
                )
        
        with col3:
            # Flowchart export
            if 'flowchart' in st.session_state.generated_content:
                img_buffer = io.BytesIO()
                st.session_state.generated_content['flowchart'].save(img_buffer, format="PNG")
                
                st.download_button(
                    "📊 Download Flowchart",
                    img_buffer.getvalue(),
                    file_name=f"strategy_flowchart_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
                    mime="image/png",
                    use_container_width=True,
                    help="Download strategy flowchart"
                )
            else:
                st.info("Generate flowchart first", icon="ℹ️")
    
    # Footer
    st.markdown("---")
    col1, col2, col3 = st.columns([1, 2, 1])
    with col2:
        st.markdown("🎨 **VoiceCanvas AI Content Studio**")
        st.caption("Transform ideas into marketing magic • Built with Streamlit")

if __name__ == "__main__":
    main()