maria355's picture
Update app.py
deece74 verified
import streamlit as st
import torch
import numpy as np
import io
import os
import tempfile
from PIL import Image, ImageDraw, ImageFont
import requests
import json
from datetime import datetime
import time
# Import with error handling
try:
from transformers import pipeline
TRANSFORMERS_AVAILABLE = True
except ImportError:
TRANSFORMERS_AVAILABLE = False
try:
import google.generativeai as genai
GENAI_AVAILABLE = True
except ImportError:
GENAI_AVAILABLE = False
try:
from st_audiorec import st_audiorec
AUDIO_REC_AVAILABLE = True
except ImportError:
AUDIO_REC_AVAILABLE = False
# Configure page
st.set_page_config(
page_title="VoiceCanvas - AI Content Studio",
page_icon="🎨",
layout="wide",
initial_sidebar_state="expanded"
)
# Initialize session state
if 'generated_content' not in st.session_state:
st.session_state.generated_content = {}
if 'transcription' not in st.session_state:
st.session_state.transcription = ""
if 'processing' not in st.session_state:
st.session_state.processing = False
if 'current_task' not in st.session_state:
st.session_state.current_task = ""
if 'models_loaded' not in st.session_state:
st.session_state.models_loaded = False
if 'whisper_model' not in st.session_state:
st.session_state.whisper_model = None
if 'button_clicked' not in st.session_state:
st.session_state.button_clicked = False
def load_models():
"""Load models efficiently with progress tracking"""
if st.session_state.models_loaded and st.session_state.whisper_model is not None:
return True
if not TRANSFORMERS_AVAILABLE:
st.error("❌ Transformers library not available. Please install: pip install transformers")
return False
progress_bar = st.progress(0)
status_text = st.empty()
try:
# Load Whisper model
status_text.text("Loading speech recognition model...")
progress_bar.progress(25)
# Use session state to store the model
st.session_state.whisper_model = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny",
device=-1, # Use CPU
torch_dtype=torch.float32,
return_timestamps=False
)
progress_bar.progress(75)
status_text.text("Models loaded successfully!")
progress_bar.progress(100)
st.session_state.models_loaded = True
# Clear progress indicators after a moment
time.sleep(1)
progress_bar.empty()
status_text.empty()
return True
except Exception as e:
st.error(f"❌ Error loading models: {str(e)}")
st.error("Try installing additional dependencies: pip install librosa soundfile")
progress_bar.empty()
status_text.empty()
return False
def setup_gemini():
"""Setup Gemini API if available"""
if not GENAI_AVAILABLE:
return False
try:
api_key = os.getenv("GEMINI_API_KEY")
if not api_key and hasattr(st, 'secrets'):
api_key = st.secrets.get("GEMINI_API_KEY", "")
if api_key:
genai.configure(api_key=api_key)
return True
return False
except Exception as e:
return False
def transcribe_audio_simple(audio_file):
"""Simple audio transcription with progress tracking"""
try:
# Check if model is loaded
if st.session_state.whisper_model is None:
st.error("❌ Speech recognition model not loaded. Please try loading models first.")
return "Error: Speech recognition model not available"
st.session_state.current_task = "Converting speech to text..."
# Handle different input types
if isinstance(audio_file, str):
# File path
audio_input = audio_file
else:
# File-like object
audio_input = audio_file
# Transcribe using pipeline
result = st.session_state.whisper_model(audio_input)
st.session_state.current_task = ""
# Handle different result formats
if isinstance(result, dict) and "text" in result:
return result["text"].strip()
elif isinstance(result, str):
return result.strip()
else:
return str(result).strip()
except Exception as e:
st.session_state.current_task = ""
error_msg = f"Transcription error: {str(e)}"
st.error(error_msg)
# Provide troubleshooting suggestions
if "librosa" in str(e).lower() or "soundfile" in str(e).lower():
st.error("πŸ”§ Missing audio processing libraries. Install with:")
st.code("pip install librosa soundfile")
return f"Error: {str(e)}"
def generate_content_with_gemini(prompt):
"""Generate content using Gemini"""
if not GENAI_AVAILABLE:
return generate_content_offline(prompt)
try:
st.session_state.current_task = "Generating enhanced content with Gemini AI..."
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content(f"""
Based on this input: "{prompt}"
Create comprehensive marketing content with:
## Marketing Taglines
Generate 3 catchy, memorable taglines (max 12 words each)
## Social Media Posts
Create 3 engaging social media posts (max 280 characters each)
## Product Description
Write 1 compelling product description (100-150 words)
## Image Generation Prompts
Provide 3 detailed prompts for AI image generation
## Call-to-Action Ideas
Suggest 3 effective call-to-action phrases
Format with clear markdown headers and numbered lists.
""")
st.session_state.current_task = ""
return response.text
except Exception as e:
st.warning(f"Gemini error: {e}. Using offline generation.")
st.session_state.current_task = ""
return generate_content_offline(prompt)
def generate_content_offline(prompt):
"""Generate content using offline methods"""
st.session_state.current_task = "Generating content with offline templates..."
# Create structured content
content = {
"taglines": [
f"Experience {prompt} like never before",
f"Transform your world with {prompt}",
f"Discover the power of {prompt}"
],
"social_posts": [
f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation",
f"πŸ’« {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger",
f"πŸš€ The future of {prompt} is here! Experience what everyone's talking about. #FutureTech"
],
"description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!",
"image_prompts": [
f"Professional product photo of {prompt}, clean white background, studio lighting",
f"Modern minimalist illustration of {prompt}, flat design, vibrant colors",
f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
],
"cta_ideas": [
f"Get Started with {prompt} Today!",
f"Transform Your Experience Now",
f"Join the {prompt} Revolution"
]
}
# Format for display
formatted = format_content_display(content)
# Store both versions
st.session_state.generated_content['structured'] = content
st.session_state.current_task = ""
return formatted
def create_flowchart_image(content_data):
"""Create a simple flowchart visualization of the content"""
try:
# Create image
width, height = 800, 600
image = Image.new('RGB', (width, height), 'white')
draw = ImageDraw.Draw(image)
# Try to use a basic font, fall back to default if not available
try:
font_title = ImageFont.truetype("arial.ttf", 20)
font_text = ImageFont.truetype("arial.ttf", 14)
font_small = ImageFont.truetype("arial.ttf", 12)
except:
font_title = ImageFont.load_default()
font_text = ImageFont.load_default()
font_small = ImageFont.load_default()
# Colors
primary_color = "#2E86AB"
secondary_color = "#A23B72"
accent_color = "#F18F01"
text_color = "#333333"
# Title
draw.text((width//2 - 150, 20), "Marketing Content Strategy", fill=text_color, font=font_title)
# Draw boxes and content
y_offset = 80
box_height = 80
box_width = 180
# Row 1: Taglines and Social Media
draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
draw.text((60, y_offset + 10), "🏷️ Taglines", fill=primary_color, font=font_text)
draw.text((60, y_offset + 35), f"β€’ {content_data.get('taglines', ['Sample tagline'])[0][:25]}...", fill=text_color, font=font_small)
draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
draw.text((width//2 + 60, y_offset + 10), "πŸ“± Social Media", fill=secondary_color, font=font_text)
draw.text((width//2 + 60, y_offset + 35), f"β€’ {content_data.get('social_posts', ['Sample post'])[0][:25]}...", fill=text_color, font=font_small)
# Row 2: Description
y_offset += 120
draw.rectangle([width//4, y_offset, width*3//4, y_offset + box_height], outline=accent_color, width=2)
draw.text((width//4 + 10, y_offset + 10), "πŸ“ Product Description", fill=accent_color, font=font_text)
desc_text = content_data.get('description', 'Product description goes here')[:50] + "..."
draw.text((width//4 + 10, y_offset + 35), desc_text, fill=text_color, font=font_small)
# Row 3: CTAs and Image Ideas
y_offset += 120
draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
draw.text((60, y_offset + 10), "🎯 Call-to-Actions", fill=primary_color, font=font_text)
draw.text((60, y_offset + 35), f"β€’ {content_data.get('cta_ideas', ['Sample CTA'])[0]}", fill=text_color, font=font_small)
draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
draw.text((width//2 + 60, y_offset + 10), "🎨 Visual Ideas", fill=secondary_color, font=font_text)
draw.text((width//2 + 60, y_offset + 35), "β€’ Professional photos", fill=text_color, font=font_small)
draw.text((width//2 + 60, y_offset + 50), "β€’ Minimalist design", fill=text_color, font=font_small)
# Draw connecting lines
draw.line([(width//2, 80 + box_height), (width//2, 200)], fill=text_color, width=2)
draw.line([(width//4 + box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)
draw.line([(width*3//4 - box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)
# Add footer
draw.text((width//2 - 100, height - 30), "Generated by VoiceCanvas AI Studio", fill=text_color, font=font_small)
return image
except Exception as e:
st.error(f"Error creating flowchart: {e}")
return None
def format_content_display(content):
"""Format content for nice display"""
if isinstance(content, dict):
formatted = ""
if "taglines" in content:
formatted += "## 🏷️ Marketing Taglines\n"
for i, tagline in enumerate(content["taglines"], 1):
formatted += f"{i}. **{tagline}**\n"
formatted += "\n"
if "social_posts" in content:
formatted += "## πŸ“± Social Media Posts\n"
for i, post in enumerate(content["social_posts"], 1):
formatted += f"**Post {i}:**\n{post}\n\n"
if "description" in content:
formatted += "## πŸ“ Product Description\n"
formatted += f"{content['description']}\n\n"
if "cta_ideas" in content:
formatted += "## 🎯 Call-to-Action Ideas\n"
for i, cta in enumerate(content["cta_ideas"], 1):
formatted += f"{i}. {cta}\n"
formatted += "\n"
if "image_prompts" in content:
formatted += "## 🎨 Image Generation Prompts\n"
for i, prompt in enumerate(content["image_prompts"], 1):
formatted += f"{i}. {prompt}\n"
return formatted
return str(content)
def handle_button_click(button_key):
"""Handle button clicks to prevent multiple clicks"""
if not st.session_state.get(f'{button_key}_clicked', False):
st.session_state[f'{button_key}_clicked'] = True
return True
return False
def reset_button_state(button_key):
"""Reset button state"""
if f'{button_key}_clicked' in st.session_state:
st.session_state[f'{button_key}_clicked'] = False
def main():
# Sidebar with tips and status
with st.sidebar:
st.header("🎨 VoiceCanvas")
st.markdown("*AI Content Studio*")
# Load models button
if not st.session_state.models_loaded:
if st.button("πŸš€ Load AI Models", type="primary", use_container_width=True):
if handle_button_click("load_models"):
with st.spinner("Loading AI models..."):
success = load_models()
reset_button_state("load_models")
if success:
st.rerun()
# Status section
st.subheader("πŸ“Š System Status")
gemini_available = setup_gemini()
col1, col2 = st.columns(2)
# Component status
st.write("πŸ€– **Components:**")
st.write("β€’ Speech Recognition")
st.write("β€’ Audio Recording")
st.write("β€’ Enhanced AI")
# Current task indicator
if st.session_state.current_task:
st.info(f"πŸ”„ {st.session_state.current_task}")
st.markdown("---")
# Tips and help
st.subheader("πŸ’‘ How to Use")
with st.expander("πŸš€ Quick Start", expanded=True):
st.markdown("""
1. **Load Models**: Click "Load AI Models" button first
2. **Input**: Use voice, upload audio, or type text
3. **Edit**: Review and refine your input
4. **Generate**: Create marketing content
5. **Visualize**: Generate flowchart of your strategy
6. **Export**: Download your materials
""")
with st.expander("🎯 Best Practices"):
st.markdown("""
**For Voice/Audio:**
- Speak clearly at normal pace
- Use quiet environment
- Describe your product/service
- Mention target audience
**For Text:**
- Be specific about features
- Include benefits and use cases
- Mention what makes it unique
- Use 50+ words for detail
""")
with st.expander("βš™οΈ Setup (Optional)"):
st.markdown("""
**Enhanced Features:**
Add environment variables:
- `GEMINI_API_KEY`: Advanced text generation
**Get API Key:**
- [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
""")
with st.expander("πŸ› οΈ Troubleshooting"):
st.markdown("""
**Common Issues:**
- "Speech recognition not available" β†’ Click "Load AI Models"
- Audio processing errors β†’ Install: `pip install librosa soundfile`
- Button not responding β†’ Wait for processing to complete
- Slow processing β†’ Models loading for first time
- Basic content only β†’ Add GEMINI_API_KEY
""")
# Main content
st.title("🎨 VoiceCanvas - AI Content Studio")
st.markdown("*Transform your ideas into comprehensive marketing content*")
# Show model loading status
if not st.session_state.models_loaded:
st.warning("⚠️ AI models not loaded yet. Click 'Load AI Models' in the sidebar to enable speech recognition.")
# Main input area
st.header("πŸ’‘ Share Your Idea")
# Dynamic tabs based on available features
available_tabs = []
if AUDIO_REC_AVAILABLE:
available_tabs.append("πŸŽ™οΈ Record")
available_tabs.extend(["πŸ“ Upload", "✍️ Type"])
tabs = st.tabs(available_tabs)
tab_index = 0
# Recording tab (if available)
if AUDIO_REC_AVAILABLE:
with tabs[tab_index]:
st.info("🎀 Click the microphone button to start recording")
# Audio recorder
wav_audio_data = st_audiorec()
if wav_audio_data is not None:
st.success("πŸŽ‰ Audio recorded successfully!")
st.audio(wav_audio_data, format='audio/wav')
col1, col2 = st.columns([1, 2])
with col1:
if st.button("πŸ”„ Transcribe Audio", key="transcribe_btn", type="primary"):
if not st.session_state.models_loaded:
st.error("Please load AI models first using the sidebar button.")
else:
if handle_button_click("transcribe"):
st.session_state.processing = True
# Process immediately
with st.spinner("🎯 Converting your speech to text..."):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_file.write(wav_audio_data)
transcription = transcribe_audio_simple(tmp_file.name)
st.session_state.transcription = transcription
os.unlink(tmp_file.name)
st.session_state.processing = False
reset_button_state("transcribe")
st.rerun()
with col2:
if st.session_state.processing:
st.info("πŸ”„ Processing your audio...")
tab_index += 1
# Upload tab
with tabs[tab_index]:
st.info("πŸ“ Upload an audio file containing your idea")
uploaded_file = st.file_uploader(
"Choose audio file",
type=['wav', 'mp3', 'm4a'],
help="Supported: WAV, MP3, M4A β€’ Max 10MB β€’ Best: 30 seconds or less"
)
if uploaded_file:
st.success("πŸ“„ File uploaded successfully!")
st.audio(uploaded_file)
col1, col2 = st.columns([1, 2])
with col1:
if st.button("πŸ”„ Process Audio", key="upload_transcribe", type="primary"):
if not st.session_state.models_loaded:
st.error("Please load AI models first using the sidebar button.")
else:
if handle_button_click("upload_process"):
st.session_state.processing = True
# Process immediately
with st.spinner("🎯 Processing your audio file..."):
transcription = transcribe_audio_simple(uploaded_file)
st.session_state.transcription = transcription
st.session_state.processing = False
reset_button_state("upload_process")
st.rerun()
with col2:
if st.session_state.processing:
st.info("πŸ”„ Converting speech to text...")
tab_index += 1
# Text tab
with tabs[tab_index]:
st.info("✍️ Type or paste your product/service description")
user_input = st.text_area(
"Describe your idea:",
placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
height=150,
help="Be detailed! Include features, benefits, and target audience for best results."
)
if user_input:
st.session_state.transcription = user_input
word_count = len(user_input.split())
if word_count < 10:
st.warning("πŸ’‘ Add more details for better results (at least 10 words)")
elif word_count < 30:
st.info("πŸ“ Good start! Add more features/benefits for richer content")
else:
st.success(f"βœ… Great detail! ({word_count} words)")
# Show transcription and editing
if st.session_state.transcription:
st.markdown("---")
st.header("πŸ“ Review Your Input")
edited_text = st.text_area(
"Edit or refine your input:",
value=st.session_state.transcription,
height=120,
key="edit_transcription",
help="Make any corrections or add more details"
)
st.session_state.transcription = edited_text
# Generate content section
st.markdown("---")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.button("πŸš€ Generate Marketing Content", type="primary", use_container_width=True):
if handle_button_click("generate_content"):
with st.spinner("✨ Creating comprehensive marketing content..."):
if setup_gemini():
content_text = generate_content_with_gemini(st.session_state.transcription)
st.session_state.generated_content['text'] = content_text
else:
content_text = generate_content_offline(st.session_state.transcription)
st.session_state.generated_content['text'] = content_text
reset_button_state("generate_content")
st.success("βœ… Content generated successfully!")
st.rerun()
# Display generated content
if st.session_state.generated_content:
st.markdown("---")
st.header("✨ Your Marketing Content")
# Text content
if 'text' in st.session_state.generated_content:
st.markdown(st.session_state.generated_content['text'])
# Visual content section
st.markdown("---")
st.subheader("🎨 Visual Content")
col1, col2 = st.columns([1, 1])
with col1:
if st.button("πŸ“Š Generate Strategy Flowchart", use_container_width=True, type="secondary"):
if handle_button_click("generate_flowchart"):
with st.spinner("🎨 Creating strategy flowchart..."):
if 'structured' in st.session_state.generated_content:
flowchart_img = create_flowchart_image(st.session_state.generated_content['structured'])
if flowchart_img:
st.session_state.generated_content['flowchart'] = flowchart_img
else:
# Create basic flowchart from text content
basic_data = {
'taglines': ['Key message from your content'],
'social_posts': ['Social media strategy'],
'description': st.session_state.transcription[:100],
'cta_ideas': ['Call to action'],
'image_prompts': ['Visual elements']
}
flowchart_img = create_flowchart_image(basic_data)
if flowchart_img:
st.session_state.generated_content['flowchart'] = flowchart_img
reset_button_state("generate_flowchart")
st.success("πŸ“Š Flowchart created!")
st.rerun()
with col2:
st.info("πŸ’‘ Generate a visual flowchart of your marketing strategy to better understand content relationships and flow.")
# Display generated flowchart
if 'flowchart' in st.session_state.generated_content:
st.image(
st.session_state.generated_content['flowchart'],
caption="Marketing Strategy Flowchart",
use_column_width=True
)
# Export section
st.markdown("---")
st.header("πŸ“₯ Export Your Content")
col1, col2, col3 = st.columns(3)
with col1:
# Text export
if 'text' in st.session_state.generated_content:
content_export = f"""VOICECANVAS MARKETING CONTENT
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Source: {st.session_state.transcription[:100]}...
{st.session_state.generated_content['text']}
---
Created with VoiceCanvas AI Content Studio
"""
st.download_button(
"πŸ“„ Download Text",
content_export,
file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain",
use_container_width=True,
help="Download complete text content"
)
with col2:
# JSON export
if 'structured' in st.session_state.generated_content:
json_data = {
"metadata": {
"timestamp": datetime.now().isoformat(),
"generator": "VoiceCanvas AI Studio",
"mode": "Enhanced" if setup_gemini() else "Basic"
},
"input": st.session_state.transcription,
"content": st.session_state.generated_content['structured']
}
st.download_button(
"πŸ“Š Download Data",
json.dumps(json_data, indent=2),
file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json",
use_container_width=True,
help="Download structured data (JSON)"
)
with col3:
# Flowchart export
if 'flowchart' in st.session_state.generated_content:
img_buffer = io.BytesIO()
st.session_state.generated_content['flowchart'].save(img_buffer, format="PNG")
st.download_button(
"πŸ“Š Download Flowchart",
img_buffer.getvalue(),
file_name=f"strategy_flowchart_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
mime="image/png",
use_container_width=True,
help="Download strategy flowchart"
)
else:
st.info("Generate flowchart first", icon="ℹ️")
# Footer
st.markdown("---")
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
st.markdown("🎨 **VoiceCanvas AI Content Studio**")
st.caption("Transform ideas into marketing magic β€’ Built with Streamlit")
if __name__ == "__main__":
main()