import streamlit as st import torch import numpy as np import io import os import tempfile from PIL import Image, ImageDraw, ImageFont import requests import json from datetime import datetime import time # Import with error handling try: from transformers import pipeline TRANSFORMERS_AVAILABLE = True except ImportError: TRANSFORMERS_AVAILABLE = False try: import google.generativeai as genai GENAI_AVAILABLE = True except ImportError: GENAI_AVAILABLE = False try: from st_audiorec import st_audiorec AUDIO_REC_AVAILABLE = True except ImportError: AUDIO_REC_AVAILABLE = False # Configure page st.set_page_config( page_title="VoiceCanvas - AI Content Studio", page_icon="🎨", layout="wide", initial_sidebar_state="expanded" ) # Initialize session state if 'generated_content' not in st.session_state: st.session_state.generated_content = {} if 'transcription' not in st.session_state: st.session_state.transcription = "" if 'processing' not in st.session_state: st.session_state.processing = False if 'current_task' not in st.session_state: st.session_state.current_task = "" if 'models_loaded' not in st.session_state: st.session_state.models_loaded = False if 'whisper_model' not in st.session_state: st.session_state.whisper_model = None if 'button_clicked' not in st.session_state: st.session_state.button_clicked = False def load_models(): """Load models efficiently with progress tracking""" if st.session_state.models_loaded and st.session_state.whisper_model is not None: return True if not TRANSFORMERS_AVAILABLE: st.error("❌ Transformers library not available. Please install: pip install transformers") return False progress_bar = st.progress(0) status_text = st.empty() try: # Load Whisper model status_text.text("Loading speech recognition model...") progress_bar.progress(25) # Use session state to store the model st.session_state.whisper_model = pipeline( "automatic-speech-recognition", model="openai/whisper-tiny", device=-1, # Use CPU torch_dtype=torch.float32, return_timestamps=False ) progress_bar.progress(75) status_text.text("Models loaded successfully!") progress_bar.progress(100) st.session_state.models_loaded = True # Clear progress indicators after a moment time.sleep(1) progress_bar.empty() status_text.empty() return True except Exception as e: st.error(f"❌ Error loading models: {str(e)}") st.error("Try installing additional dependencies: pip install librosa soundfile") progress_bar.empty() status_text.empty() return False def setup_gemini(): """Setup Gemini API if available""" if not GENAI_AVAILABLE: return False try: api_key = os.getenv("GEMINI_API_KEY") if not api_key and hasattr(st, 'secrets'): api_key = st.secrets.get("GEMINI_API_KEY", "") if api_key: genai.configure(api_key=api_key) return True return False except Exception as e: return False def transcribe_audio_simple(audio_file): """Simple audio transcription with progress tracking""" try: # Check if model is loaded if st.session_state.whisper_model is None: st.error("❌ Speech recognition model not loaded. Please try loading models first.") return "Error: Speech recognition model not available" st.session_state.current_task = "Converting speech to text..." # Handle different input types if isinstance(audio_file, str): # File path audio_input = audio_file else: # File-like object audio_input = audio_file # Transcribe using pipeline result = st.session_state.whisper_model(audio_input) st.session_state.current_task = "" # Handle different result formats if isinstance(result, dict) and "text" in result: return result["text"].strip() elif isinstance(result, str): return result.strip() else: return str(result).strip() except Exception as e: st.session_state.current_task = "" error_msg = f"Transcription error: {str(e)}" st.error(error_msg) # Provide troubleshooting suggestions if "librosa" in str(e).lower() or "soundfile" in str(e).lower(): st.error("🔧 Missing audio processing libraries. Install with:") st.code("pip install librosa soundfile") return f"Error: {str(e)}" def generate_content_with_gemini(prompt): """Generate content using Gemini""" if not GENAI_AVAILABLE: return generate_content_offline(prompt) try: st.session_state.current_task = "Generating enhanced content with Gemini AI..." model = genai.GenerativeModel('gemini-pro') response = model.generate_content(f""" Based on this input: "{prompt}" Create comprehensive marketing content with: ## Marketing Taglines Generate 3 catchy, memorable taglines (max 12 words each) ## Social Media Posts Create 3 engaging social media posts (max 280 characters each) ## Product Description Write 1 compelling product description (100-150 words) ## Image Generation Prompts Provide 3 detailed prompts for AI image generation ## Call-to-Action Ideas Suggest 3 effective call-to-action phrases Format with clear markdown headers and numbered lists. """) st.session_state.current_task = "" return response.text except Exception as e: st.warning(f"Gemini error: {e}. Using offline generation.") st.session_state.current_task = "" return generate_content_offline(prompt) def generate_content_offline(prompt): """Generate content using offline methods""" st.session_state.current_task = "Generating content with offline templates..." # Create structured content content = { "taglines": [ f"Experience {prompt} like never before", f"Transform your world with {prompt}", f"Discover the power of {prompt}" ], "social_posts": [ f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation", f"đŸ’Ģ {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger", f"🚀 The future of {prompt} is here! Experience what everyone's talking about. #FutureTech" ], "description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!", "image_prompts": [ f"Professional product photo of {prompt}, clean white background, studio lighting", f"Modern minimalist illustration of {prompt}, flat design, vibrant colors", f"Futuristic concept art of {prompt}, digital art, high quality, detailed" ], "cta_ideas": [ f"Get Started with {prompt} Today!", f"Transform Your Experience Now", f"Join the {prompt} Revolution" ] } # Format for display formatted = format_content_display(content) # Store both versions st.session_state.generated_content['structured'] = content st.session_state.current_task = "" return formatted def create_flowchart_image(content_data): """Create a simple flowchart visualization of the content""" try: # Create image width, height = 800, 600 image = Image.new('RGB', (width, height), 'white') draw = ImageDraw.Draw(image) # Try to use a basic font, fall back to default if not available try: font_title = ImageFont.truetype("arial.ttf", 20) font_text = ImageFont.truetype("arial.ttf", 14) font_small = ImageFont.truetype("arial.ttf", 12) except: font_title = ImageFont.load_default() font_text = ImageFont.load_default() font_small = ImageFont.load_default() # Colors primary_color = "#2E86AB" secondary_color = "#A23B72" accent_color = "#F18F01" text_color = "#333333" # Title draw.text((width//2 - 150, 20), "Marketing Content Strategy", fill=text_color, font=font_title) # Draw boxes and content y_offset = 80 box_height = 80 box_width = 180 # Row 1: Taglines and Social Media draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2) draw.text((60, y_offset + 10), "đŸˇī¸ Taglines", fill=primary_color, font=font_text) draw.text((60, y_offset + 35), f"â€ĸ {content_data.get('taglines', ['Sample tagline'])[0][:25]}...", fill=text_color, font=font_small) draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2) draw.text((width//2 + 60, y_offset + 10), "📱 Social Media", fill=secondary_color, font=font_text) draw.text((width//2 + 60, y_offset + 35), f"â€ĸ {content_data.get('social_posts', ['Sample post'])[0][:25]}...", fill=text_color, font=font_small) # Row 2: Description y_offset += 120 draw.rectangle([width//4, y_offset, width*3//4, y_offset + box_height], outline=accent_color, width=2) draw.text((width//4 + 10, y_offset + 10), "📝 Product Description", fill=accent_color, font=font_text) desc_text = content_data.get('description', 'Product description goes here')[:50] + "..." draw.text((width//4 + 10, y_offset + 35), desc_text, fill=text_color, font=font_small) # Row 3: CTAs and Image Ideas y_offset += 120 draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2) draw.text((60, y_offset + 10), "đŸŽ¯ Call-to-Actions", fill=primary_color, font=font_text) draw.text((60, y_offset + 35), f"â€ĸ {content_data.get('cta_ideas', ['Sample CTA'])[0]}", fill=text_color, font=font_small) draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2) draw.text((width//2 + 60, y_offset + 10), "🎨 Visual Ideas", fill=secondary_color, font=font_text) draw.text((width//2 + 60, y_offset + 35), "â€ĸ Professional photos", fill=text_color, font=font_small) draw.text((width//2 + 60, y_offset + 50), "â€ĸ Minimalist design", fill=text_color, font=font_small) # Draw connecting lines draw.line([(width//2, 80 + box_height), (width//2, 200)], fill=text_color, width=2) draw.line([(width//4 + box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2) draw.line([(width*3//4 - box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2) # Add footer draw.text((width//2 - 100, height - 30), "Generated by VoiceCanvas AI Studio", fill=text_color, font=font_small) return image except Exception as e: st.error(f"Error creating flowchart: {e}") return None def format_content_display(content): """Format content for nice display""" if isinstance(content, dict): formatted = "" if "taglines" in content: formatted += "## đŸˇī¸ Marketing Taglines\n" for i, tagline in enumerate(content["taglines"], 1): formatted += f"{i}. **{tagline}**\n" formatted += "\n" if "social_posts" in content: formatted += "## 📱 Social Media Posts\n" for i, post in enumerate(content["social_posts"], 1): formatted += f"**Post {i}:**\n{post}\n\n" if "description" in content: formatted += "## 📝 Product Description\n" formatted += f"{content['description']}\n\n" if "cta_ideas" in content: formatted += "## đŸŽ¯ Call-to-Action Ideas\n" for i, cta in enumerate(content["cta_ideas"], 1): formatted += f"{i}. {cta}\n" formatted += "\n" if "image_prompts" in content: formatted += "## 🎨 Image Generation Prompts\n" for i, prompt in enumerate(content["image_prompts"], 1): formatted += f"{i}. {prompt}\n" return formatted return str(content) def handle_button_click(button_key): """Handle button clicks to prevent multiple clicks""" if not st.session_state.get(f'{button_key}_clicked', False): st.session_state[f'{button_key}_clicked'] = True return True return False def reset_button_state(button_key): """Reset button state""" if f'{button_key}_clicked' in st.session_state: st.session_state[f'{button_key}_clicked'] = False def main(): # Sidebar with tips and status with st.sidebar: st.header("🎨 VoiceCanvas") st.markdown("*AI Content Studio*") # Load models button if not st.session_state.models_loaded: if st.button("🚀 Load AI Models", type="primary", use_container_width=True): if handle_button_click("load_models"): with st.spinner("Loading AI models..."): success = load_models() reset_button_state("load_models") if success: st.rerun() # Status section st.subheader("📊 System Status") gemini_available = setup_gemini() col1, col2 = st.columns(2) # Component status st.write("🤖 **Components:**") st.write("â€ĸ Speech Recognition") st.write("â€ĸ Audio Recording") st.write("â€ĸ Enhanced AI") # Current task indicator if st.session_state.current_task: st.info(f"🔄 {st.session_state.current_task}") st.markdown("---") # Tips and help st.subheader("💡 How to Use") with st.expander("🚀 Quick Start", expanded=True): st.markdown(""" 1. **Load Models**: Click "Load AI Models" button first 2. **Input**: Use voice, upload audio, or type text 3. **Edit**: Review and refine your input 4. **Generate**: Create marketing content 5. **Visualize**: Generate flowchart of your strategy 6. **Export**: Download your materials """) with st.expander("đŸŽ¯ Best Practices"): st.markdown(""" **For Voice/Audio:** - Speak clearly at normal pace - Use quiet environment - Describe your product/service - Mention target audience **For Text:** - Be specific about features - Include benefits and use cases - Mention what makes it unique - Use 50+ words for detail """) with st.expander("âš™ī¸ Setup (Optional)"): st.markdown(""" **Enhanced Features:** Add environment variables: - `GEMINI_API_KEY`: Advanced text generation **Get API Key:** - [Google AI Studio](https://makersuite.google.com/app/apikey) (Free) """) with st.expander("đŸ› ī¸ Troubleshooting"): st.markdown(""" **Common Issues:** - "Speech recognition not available" → Click "Load AI Models" - Audio processing errors → Install: `pip install librosa soundfile` - Button not responding → Wait for processing to complete - Slow processing → Models loading for first time - Basic content only → Add GEMINI_API_KEY """) # Main content st.title("🎨 VoiceCanvas - AI Content Studio") st.markdown("*Transform your ideas into comprehensive marketing content*") # Show model loading status if not st.session_state.models_loaded: st.warning("âš ī¸ AI models not loaded yet. Click 'Load AI Models' in the sidebar to enable speech recognition.") # Main input area st.header("💡 Share Your Idea") # Dynamic tabs based on available features available_tabs = [] if AUDIO_REC_AVAILABLE: available_tabs.append("đŸŽ™ī¸ Record") available_tabs.extend(["📁 Upload", "âœī¸ Type"]) tabs = st.tabs(available_tabs) tab_index = 0 # Recording tab (if available) if AUDIO_REC_AVAILABLE: with tabs[tab_index]: st.info("🎤 Click the microphone button to start recording") # Audio recorder wav_audio_data = st_audiorec() if wav_audio_data is not None: st.success("🎉 Audio recorded successfully!") st.audio(wav_audio_data, format='audio/wav') col1, col2 = st.columns([1, 2]) with col1: if st.button("🔄 Transcribe Audio", key="transcribe_btn", type="primary"): if not st.session_state.models_loaded: st.error("Please load AI models first using the sidebar button.") else: if handle_button_click("transcribe"): st.session_state.processing = True # Process immediately with st.spinner("đŸŽ¯ Converting your speech to text..."): with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_file.write(wav_audio_data) transcription = transcribe_audio_simple(tmp_file.name) st.session_state.transcription = transcription os.unlink(tmp_file.name) st.session_state.processing = False reset_button_state("transcribe") st.rerun() with col2: if st.session_state.processing: st.info("🔄 Processing your audio...") tab_index += 1 # Upload tab with tabs[tab_index]: st.info("📁 Upload an audio file containing your idea") uploaded_file = st.file_uploader( "Choose audio file", type=['wav', 'mp3', 'm4a'], help="Supported: WAV, MP3, M4A â€ĸ Max 10MB â€ĸ Best: 30 seconds or less" ) if uploaded_file: st.success("📄 File uploaded successfully!") st.audio(uploaded_file) col1, col2 = st.columns([1, 2]) with col1: if st.button("🔄 Process Audio", key="upload_transcribe", type="primary"): if not st.session_state.models_loaded: st.error("Please load AI models first using the sidebar button.") else: if handle_button_click("upload_process"): st.session_state.processing = True # Process immediately with st.spinner("đŸŽ¯ Processing your audio file..."): transcription = transcribe_audio_simple(uploaded_file) st.session_state.transcription = transcription st.session_state.processing = False reset_button_state("upload_process") st.rerun() with col2: if st.session_state.processing: st.info("🔄 Converting speech to text...") tab_index += 1 # Text tab with tabs[tab_index]: st.info("âœī¸ Type or paste your product/service description") user_input = st.text_area( "Describe your idea:", placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.", height=150, help="Be detailed! Include features, benefits, and target audience for best results." ) if user_input: st.session_state.transcription = user_input word_count = len(user_input.split()) if word_count < 10: st.warning("💡 Add more details for better results (at least 10 words)") elif word_count < 30: st.info("📝 Good start! Add more features/benefits for richer content") else: st.success(f"✅ Great detail! ({word_count} words)") # Show transcription and editing if st.session_state.transcription: st.markdown("---") st.header("📝 Review Your Input") edited_text = st.text_area( "Edit or refine your input:", value=st.session_state.transcription, height=120, key="edit_transcription", help="Make any corrections or add more details" ) st.session_state.transcription = edited_text # Generate content section st.markdown("---") col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True): if handle_button_click("generate_content"): with st.spinner("✨ Creating comprehensive marketing content..."): if setup_gemini(): content_text = generate_content_with_gemini(st.session_state.transcription) st.session_state.generated_content['text'] = content_text else: content_text = generate_content_offline(st.session_state.transcription) st.session_state.generated_content['text'] = content_text reset_button_state("generate_content") st.success("✅ Content generated successfully!") st.rerun() # Display generated content if st.session_state.generated_content: st.markdown("---") st.header("✨ Your Marketing Content") # Text content if 'text' in st.session_state.generated_content: st.markdown(st.session_state.generated_content['text']) # Visual content section st.markdown("---") st.subheader("🎨 Visual Content") col1, col2 = st.columns([1, 1]) with col1: if st.button("📊 Generate Strategy Flowchart", use_container_width=True, type="secondary"): if handle_button_click("generate_flowchart"): with st.spinner("🎨 Creating strategy flowchart..."): if 'structured' in st.session_state.generated_content: flowchart_img = create_flowchart_image(st.session_state.generated_content['structured']) if flowchart_img: st.session_state.generated_content['flowchart'] = flowchart_img else: # Create basic flowchart from text content basic_data = { 'taglines': ['Key message from your content'], 'social_posts': ['Social media strategy'], 'description': st.session_state.transcription[:100], 'cta_ideas': ['Call to action'], 'image_prompts': ['Visual elements'] } flowchart_img = create_flowchart_image(basic_data) if flowchart_img: st.session_state.generated_content['flowchart'] = flowchart_img reset_button_state("generate_flowchart") st.success("📊 Flowchart created!") st.rerun() with col2: st.info("💡 Generate a visual flowchart of your marketing strategy to better understand content relationships and flow.") # Display generated flowchart if 'flowchart' in st.session_state.generated_content: st.image( st.session_state.generated_content['flowchart'], caption="Marketing Strategy Flowchart", use_column_width=True ) # Export section st.markdown("---") st.header("đŸ“Ĩ Export Your Content") col1, col2, col3 = st.columns(3) with col1: # Text export if 'text' in st.session_state.generated_content: content_export = f"""VOICECANVAS MARKETING CONTENT Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Source: {st.session_state.transcription[:100]}... {st.session_state.generated_content['text']} --- Created with VoiceCanvas AI Content Studio """ st.download_button( "📄 Download Text", content_export, file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt", mime="text/plain", use_container_width=True, help="Download complete text content" ) with col2: # JSON export if 'structured' in st.session_state.generated_content: json_data = { "metadata": { "timestamp": datetime.now().isoformat(), "generator": "VoiceCanvas AI Studio", "mode": "Enhanced" if setup_gemini() else "Basic" }, "input": st.session_state.transcription, "content": st.session_state.generated_content['structured'] } st.download_button( "📊 Download Data", json.dumps(json_data, indent=2), file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", mime="application/json", use_container_width=True, help="Download structured data (JSON)" ) with col3: # Flowchart export if 'flowchart' in st.session_state.generated_content: img_buffer = io.BytesIO() st.session_state.generated_content['flowchart'].save(img_buffer, format="PNG") st.download_button( "📊 Download Flowchart", img_buffer.getvalue(), file_name=f"strategy_flowchart_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png", mime="image/png", use_container_width=True, help="Download strategy flowchart" ) else: st.info("Generate flowchart first", icon="â„šī¸") # Footer st.markdown("---") col1, col2, col3 = st.columns([1, 2, 1]) with col2: st.markdown("🎨 **VoiceCanvas AI Content Studio**") st.caption("Transform ideas into marketing magic â€ĸ Built with Streamlit") if __name__ == "__main__": main()