Spaces:

maria355
/

VoiceVision-Creative-AI

Sleeping

App Files Files Community

VoiceVision-Creative-AI / app.py

maria355

Update app.py

deece74 verified 17 days ago

raw

history blame contribute delete

29 kB


	import streamlit as st
	import torch
	import numpy as np
	import io
	import os
	import tempfile
	from PIL import Image, ImageDraw, ImageFont
	import requests
	import json
	from datetime import datetime
	import time

	# Import with error handling
	try:
	from transformers import pipeline
	TRANSFORMERS_AVAILABLE = True
	except ImportError:
	TRANSFORMERS_AVAILABLE = False

	try:
	import google.generativeai as genai
	GENAI_AVAILABLE = True
	except ImportError:
	GENAI_AVAILABLE = False

	try:
	from st_audiorec import st_audiorec
	AUDIO_REC_AVAILABLE = True
	except ImportError:
	AUDIO_REC_AVAILABLE = False

	# Configure page
	st.set_page_config(
	page_title="VoiceCanvas - AI Content Studio",
	page_icon="🎨",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Initialize session state
	if 'generated_content' not in st.session_state:
	st.session_state.generated_content = {}
	if 'transcription' not in st.session_state:
	st.session_state.transcription = ""
	if 'processing' not in st.session_state:
	st.session_state.processing = False
	if 'current_task' not in st.session_state:
	st.session_state.current_task = ""
	if 'models_loaded' not in st.session_state:
	st.session_state.models_loaded = False
	if 'whisper_model' not in st.session_state:
	st.session_state.whisper_model = None
	if 'button_clicked' not in st.session_state:
	st.session_state.button_clicked = False

	def load_models():
	"""Load models efficiently with progress tracking"""

	if st.session_state.models_loaded and st.session_state.whisper_model is not None:
	return True

	if not TRANSFORMERS_AVAILABLE:
	st.error("❌ Transformers library not available. Please install: pip install transformers")
	return False

	progress_bar = st.progress(0)
	status_text = st.empty()

	try:
	# Load Whisper model
	status_text.text("Loading speech recognition model...")
	progress_bar.progress(25)

	# Use session state to store the model
	st.session_state.whisper_model = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-tiny",
	device=-1, # Use CPU
	torch_dtype=torch.float32,
	return_timestamps=False
	)

	progress_bar.progress(75)
	status_text.text("Models loaded successfully!")
	progress_bar.progress(100)

	st.session_state.models_loaded = True

	# Clear progress indicators after a moment
	time.sleep(1)
	progress_bar.empty()
	status_text.empty()

	return True

	except Exception as e:
	st.error(f"❌ Error loading models: {str(e)}")
	st.error("Try installing additional dependencies: pip install librosa soundfile")
	progress_bar.empty()
	status_text.empty()
	return False

	def setup_gemini():
	"""Setup Gemini API if available"""
	if not GENAI_AVAILABLE:
	return False

	try:
	api_key = os.getenv("GEMINI_API_KEY")
	if not api_key and hasattr(st, 'secrets'):
	api_key = st.secrets.get("GEMINI_API_KEY", "")

	if api_key:
	genai.configure(api_key=api_key)
	return True
	return False
	except Exception as e:
	return False

	def transcribe_audio_simple(audio_file):
	"""Simple audio transcription with progress tracking"""
	try:
	# Check if model is loaded
	if st.session_state.whisper_model is None:
	st.error("❌ Speech recognition model not loaded. Please try loading models first.")
	return "Error: Speech recognition model not available"

	st.session_state.current_task = "Converting speech to text..."

	# Handle different input types
	if isinstance(audio_file, str):
	# File path
	audio_input = audio_file
	else:
	# File-like object
	audio_input = audio_file

	# Transcribe using pipeline
	result = st.session_state.whisper_model(audio_input)

	st.session_state.current_task = ""

	# Handle different result formats
	if isinstance(result, dict) and "text" in result:
	return result["text"].strip()
	elif isinstance(result, str):
	return result.strip()
	else:
	return str(result).strip()

	except Exception as e:
	st.session_state.current_task = ""
	error_msg = f"Transcription error: {str(e)}"
	st.error(error_msg)

	# Provide troubleshooting suggestions
	if "librosa" in str(e).lower() or "soundfile" in str(e).lower():
	st.error("🔧 Missing audio processing libraries. Install with:")
	st.code("pip install librosa soundfile")

	return f"Error: {str(e)}"

	def generate_content_with_gemini(prompt):
	"""Generate content using Gemini"""
	if not GENAI_AVAILABLE:
	return generate_content_offline(prompt)

	try:
	st.session_state.current_task = "Generating enhanced content with Gemini AI..."

	model = genai.GenerativeModel('gemini-pro')
	response = model.generate_content(f"""
	Based on this input: "{prompt}"

	Create comprehensive marketing content with:

	## Marketing Taglines
	Generate 3 catchy, memorable taglines (max 12 words each)

	## Social Media Posts
	Create 3 engaging social media posts (max 280 characters each)

	## Product Description
	Write 1 compelling product description (100-150 words)

	## Image Generation Prompts
	Provide 3 detailed prompts for AI image generation

	## Call-to-Action Ideas
	Suggest 3 effective call-to-action phrases

	Format with clear markdown headers and numbered lists.
	""")

	st.session_state.current_task = ""
	return response.text

	except Exception as e:
	st.warning(f"Gemini error: {e}. Using offline generation.")
	st.session_state.current_task = ""
	return generate_content_offline(prompt)

	def generate_content_offline(prompt):
	"""Generate content using offline methods"""
	st.session_state.current_task = "Generating content with offline templates..."

	# Create structured content
	content = {
	"taglines": [
	f"Experience {prompt} like never before",
	f"Transform your world with {prompt}",
	f"Discover the power of {prompt}"
	],
	"social_posts": [
	f"🌟 Ready to explore {prompt}? Join thousands who've already discovered the difference! #Innovation",
	f"💫 {prompt} is changing the game! Don't miss out on this incredible opportunity. #GameChanger",
	f"🚀 The future of {prompt} is here! Experience what everyone's talking about. #FutureTech"
	],
	"description": f"Discover the revolutionary world of {prompt}. Our innovative approach combines cutting-edge technology with user-friendly design to deliver an unmatched experience. Perfect for both beginners and experts, this solution transforms how you interact with {prompt}. Join thousands of satisfied users today!",
	"image_prompts": [
	f"Professional product photo of {prompt}, clean white background, studio lighting",
	f"Modern minimalist illustration of {prompt}, flat design, vibrant colors",
	f"Futuristic concept art of {prompt}, digital art, high quality, detailed"
	],
	"cta_ideas": [
	f"Get Started with {prompt} Today!",
	f"Transform Your Experience Now",
	f"Join the {prompt} Revolution"
	]
	}

	# Format for display
	formatted = format_content_display(content)

	# Store both versions
	st.session_state.generated_content['structured'] = content
	st.session_state.current_task = ""

	return formatted

	def create_flowchart_image(content_data):
	"""Create a simple flowchart visualization of the content"""
	try:
	# Create image
	width, height = 800, 600
	image = Image.new('RGB', (width, height), 'white')
	draw = ImageDraw.Draw(image)

	# Try to use a basic font, fall back to default if not available
	try:
	font_title = ImageFont.truetype("arial.ttf", 20)
	font_text = ImageFont.truetype("arial.ttf", 14)
	font_small = ImageFont.truetype("arial.ttf", 12)
	except:
	font_title = ImageFont.load_default()
	font_text = ImageFont.load_default()
	font_small = ImageFont.load_default()

	# Colors
	primary_color = "#2E86AB"
	secondary_color = "#A23B72"
	accent_color = "#F18F01"
	text_color = "#333333"

	# Title
	draw.text((width//2 - 150, 20), "Marketing Content Strategy", fill=text_color, font=font_title)

	# Draw boxes and content
	y_offset = 80
	box_height = 80
	box_width = 180

	# Row 1: Taglines and Social Media
	draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
	draw.text((60, y_offset + 10), "🏷️ Taglines", fill=primary_color, font=font_text)
	draw.text((60, y_offset + 35), f"• {content_data.get('taglines', ['Sample tagline'])[0][:25]}...", fill=text_color, font=font_small)

	draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
	draw.text((width//2 + 60, y_offset + 10), "📱 Social Media", fill=secondary_color, font=font_text)
	draw.text((width//2 + 60, y_offset + 35), f"• {content_data.get('social_posts', ['Sample post'])[0][:25]}...", fill=text_color, font=font_small)

	# Row 2: Description
	y_offset += 120
	draw.rectangle([width//4, y_offset, width*3//4, y_offset + box_height], outline=accent_color, width=2)
	draw.text((width//4 + 10, y_offset + 10), "📝 Product Description", fill=accent_color, font=font_text)
	desc_text = content_data.get('description', 'Product description goes here')[:50] + "..."
	draw.text((width//4 + 10, y_offset + 35), desc_text, fill=text_color, font=font_small)

	# Row 3: CTAs and Image Ideas
	y_offset += 120
	draw.rectangle([50, y_offset, 50 + box_width, y_offset + box_height], outline=primary_color, width=2)
	draw.text((60, y_offset + 10), "🎯 Call-to-Actions", fill=primary_color, font=font_text)
	draw.text((60, y_offset + 35), f"• {content_data.get('cta_ideas', ['Sample CTA'])[0]}", fill=text_color, font=font_small)

	draw.rectangle([width//2 + 50, y_offset, width//2 + 50 + box_width, y_offset + box_height], outline=secondary_color, width=2)
	draw.text((width//2 + 60, y_offset + 10), "🎨 Visual Ideas", fill=secondary_color, font=font_text)
	draw.text((width//2 + 60, y_offset + 35), "• Professional photos", fill=text_color, font=font_small)
	draw.text((width//2 + 60, y_offset + 50), "• Minimalist design", fill=text_color, font=font_small)

	# Draw connecting lines
	draw.line([(width//2, 80 + box_height), (width//2, 200)], fill=text_color, width=2)
	draw.line([(width//4 + box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)
	draw.line([(width*3//4 - box_width//2, 200 + box_height), (width//2, 320)], fill=text_color, width=2)

	# Add footer
	draw.text((width//2 - 100, height - 30), "Generated by VoiceCanvas AI Studio", fill=text_color, font=font_small)

	return image

	except Exception as e:
	st.error(f"Error creating flowchart: {e}")
	return None

	def format_content_display(content):
	"""Format content for nice display"""
	if isinstance(content, dict):
	formatted = ""

	if "taglines" in content:
	formatted += "## 🏷️ Marketing Taglines\n"
	for i, tagline in enumerate(content["taglines"], 1):
	formatted += f"{i}. {tagline}\n"
	formatted += "\n"

	if "social_posts" in content:
	formatted += "## 📱 Social Media Posts\n"
	for i, post in enumerate(content["social_posts"], 1):
	formatted += f"Post {i}:\n{post}\n\n"

	if "description" in content:
	formatted += "## 📝 Product Description\n"
	formatted += f"{content['description']}\n\n"

	if "cta_ideas" in content:
	formatted += "## 🎯 Call-to-Action Ideas\n"
	for i, cta in enumerate(content["cta_ideas"], 1):
	formatted += f"{i}. {cta}\n"
	formatted += "\n"

	if "image_prompts" in content:
	formatted += "## 🎨 Image Generation Prompts\n"
	for i, prompt in enumerate(content["image_prompts"], 1):
	formatted += f"{i}. {prompt}\n"

	return formatted

	return str(content)

	def handle_button_click(button_key):
	"""Handle button clicks to prevent multiple clicks"""
	if not st.session_state.get(f'{button_key}_clicked', False):
	st.session_state[f'{button_key}_clicked'] = True
	return True
	return False

	def reset_button_state(button_key):
	"""Reset button state"""
	if f'{button_key}_clicked' in st.session_state:
	st.session_state[f'{button_key}_clicked'] = False

	def main():
	# Sidebar with tips and status
	with st.sidebar:
	st.header("🎨 VoiceCanvas")
	st.markdown("AI Content Studio")

	# Load models button
	if not st.session_state.models_loaded:
	if st.button("🚀 Load AI Models", type="primary", use_container_width=True):
	if handle_button_click("load_models"):
	with st.spinner("Loading AI models..."):
	success = load_models()
	reset_button_state("load_models")
	if success:
	st.rerun()

	# Status section
	st.subheader("📊 System Status")

	gemini_available = setup_gemini()

	col1, col2 = st.columns(2)
	# Component status
	st.write("🤖 Components:")
	st.write("• Speech Recognition")
	st.write("• Audio Recording")
	st.write("• Enhanced AI")

	# Current task indicator
	if st.session_state.current_task:
	st.info(f"🔄 {st.session_state.current_task}")

	st.markdown("---")

	# Tips and help
	st.subheader("💡 How to Use")

	with st.expander("🚀 Quick Start", expanded=True):
	st.markdown("""
	1. Load Models: Click "Load AI Models" button first
	2. Input: Use voice, upload audio, or type text
	3. Edit: Review and refine your input
	4. Generate: Create marketing content
	5. Visualize: Generate flowchart of your strategy
	6. Export: Download your materials
	""")

	with st.expander("🎯 Best Practices"):
	st.markdown("""
	For Voice/Audio:
	- Speak clearly at normal pace
	- Use quiet environment
	- Describe your product/service
	- Mention target audience

	For Text:
	- Be specific about features
	- Include benefits and use cases
	- Mention what makes it unique
	- Use 50+ words for detail
	""")

	with st.expander("⚙️ Setup (Optional)"):
	st.markdown("""
	Enhanced Features:

	Add environment variables:
	- `GEMINI_API_KEY`: Advanced text generation

	Get API Key:
	- [Google AI Studio](https://makersuite.google.com/app/apikey) (Free)
	""")

	with st.expander("🛠️ Troubleshooting"):
	st.markdown("""
	Common Issues:
	- "Speech recognition not available" → Click "Load AI Models"
	- Audio processing errors → Install: `pip install librosa soundfile`
	- Button not responding → Wait for processing to complete
	- Slow processing → Models loading for first time
	- Basic content only → Add GEMINI_API_KEY
	""")

	# Main content
	st.title("🎨 VoiceCanvas - AI Content Studio")
	st.markdown("Transform your ideas into comprehensive marketing content")

	# Show model loading status
	if not st.session_state.models_loaded:
	st.warning("⚠️ AI models not loaded yet. Click 'Load AI Models' in the sidebar to enable speech recognition.")

	# Main input area
	st.header("💡 Share Your Idea")

	# Dynamic tabs based on available features
	available_tabs = []
	if AUDIO_REC_AVAILABLE:
	available_tabs.append("🎙️ Record")
	available_tabs.extend(["📁 Upload", "✍️ Type"])

	tabs = st.tabs(available_tabs)
	tab_index = 0

	# Recording tab (if available)
	if AUDIO_REC_AVAILABLE:
	with tabs[tab_index]:
	st.info("🎤 Click the microphone button to start recording")

	# Audio recorder
	wav_audio_data = st_audiorec()

	if wav_audio_data is not None:
	st.success("🎉 Audio recorded successfully!")
	st.audio(wav_audio_data, format='audio/wav')

	col1, col2 = st.columns([1, 2])
	with col1:
	if st.button("🔄 Transcribe Audio", key="transcribe_btn", type="primary"):
	if not st.session_state.models_loaded:
	st.error("Please load AI models first using the sidebar button.")
	else:
	if handle_button_click("transcribe"):
	st.session_state.processing = True
	# Process immediately
	with st.spinner("🎯 Converting your speech to text..."):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	tmp_file.write(wav_audio_data)
	transcription = transcribe_audio_simple(tmp_file.name)
	st.session_state.transcription = transcription
	os.unlink(tmp_file.name)
	st.session_state.processing = False
	reset_button_state("transcribe")
	st.rerun()

	with col2:
	if st.session_state.processing:
	st.info("🔄 Processing your audio...")
	tab_index += 1

	# Upload tab
	with tabs[tab_index]:
	st.info("📁 Upload an audio file containing your idea")

	uploaded_file = st.file_uploader(
	"Choose audio file",
	type=['wav', 'mp3', 'm4a'],
	help="Supported: WAV, MP3, M4A • Max 10MB • Best: 30 seconds or less"
	)

	if uploaded_file:
	st.success("📄 File uploaded successfully!")
	st.audio(uploaded_file)

	col1, col2 = st.columns([1, 2])
	with col1:
	if st.button("🔄 Process Audio", key="upload_transcribe", type="primary"):
	if not st.session_state.models_loaded:
	st.error("Please load AI models first using the sidebar button.")
	else:
	if handle_button_click("upload_process"):
	st.session_state.processing = True
	# Process immediately
	with st.spinner("🎯 Processing your audio file..."):
	transcription = transcribe_audio_simple(uploaded_file)
	st.session_state.transcription = transcription
	st.session_state.processing = False
	reset_button_state("upload_process")
	st.rerun()

	with col2:
	if st.session_state.processing:
	st.info("🔄 Converting speech to text...")

	tab_index += 1

	# Text tab
	with tabs[tab_index]:
	st.info("✍️ Type or paste your product/service description")

	user_input = st.text_area(
	"Describe your idea:",
	placeholder="Example: A smart fitness tracker that monitors sleep patterns, heart rate, and stress levels. It provides personalized workout recommendations for busy professionals who want to maintain their health despite hectic schedules.",
	height=150,
	help="Be detailed! Include features, benefits, and target audience for best results."
	)

	if user_input:
	st.session_state.transcription = user_input
	word_count = len(user_input.split())

	if word_count < 10:
	st.warning("💡 Add more details for better results (at least 10 words)")
	elif word_count < 30:
	st.info("📝 Good start! Add more features/benefits for richer content")
	else:
	st.success(f"✅ Great detail! ({word_count} words)")

	# Show transcription and editing
	if st.session_state.transcription:
	st.markdown("---")
	st.header("📝 Review Your Input")

	edited_text = st.text_area(
	"Edit or refine your input:",
	value=st.session_state.transcription,
	height=120,
	key="edit_transcription",
	help="Make any corrections or add more details"
	)
	st.session_state.transcription = edited_text

	# Generate content section
	st.markdown("---")
	col1, col2, col3 = st.columns([1, 2, 1])

	with col2:
	if st.button("🚀 Generate Marketing Content", type="primary", use_container_width=True):
	if handle_button_click("generate_content"):
	with st.spinner("✨ Creating comprehensive marketing content..."):
	if setup_gemini():
	content_text = generate_content_with_gemini(st.session_state.transcription)
	st.session_state.generated_content['text'] = content_text
	else:
	content_text = generate_content_offline(st.session_state.transcription)
	st.session_state.generated_content['text'] = content_text
	reset_button_state("generate_content")
	st.success("✅ Content generated successfully!")
	st.rerun()

	# Display generated content
	if st.session_state.generated_content:
	st.markdown("---")
	st.header("✨ Your Marketing Content")

	# Text content
	if 'text' in st.session_state.generated_content:
	st.markdown(st.session_state.generated_content['text'])

	# Visual content section
	st.markdown("---")
	st.subheader("🎨 Visual Content")

	col1, col2 = st.columns([1, 1])

	with col1:
	if st.button("📊 Generate Strategy Flowchart", use_container_width=True, type="secondary"):
	if handle_button_click("generate_flowchart"):
	with st.spinner("🎨 Creating strategy flowchart..."):
	if 'structured' in st.session_state.generated_content:
	flowchart_img = create_flowchart_image(st.session_state.generated_content['structured'])
	if flowchart_img:
	st.session_state.generated_content['flowchart'] = flowchart_img
	else:
	# Create basic flowchart from text content
	basic_data = {
	'taglines': ['Key message from your content'],
	'social_posts': ['Social media strategy'],
	'description': st.session_state.transcription[:100],
	'cta_ideas': ['Call to action'],
	'image_prompts': ['Visual elements']
	}
	flowchart_img = create_flowchart_image(basic_data)
	if flowchart_img:
	st.session_state.generated_content['flowchart'] = flowchart_img
	reset_button_state("generate_flowchart")
	st.success("📊 Flowchart created!")
	st.rerun()

	with col2:
	st.info("💡 Generate a visual flowchart of your marketing strategy to better understand content relationships and flow.")

	# Display generated flowchart
	if 'flowchart' in st.session_state.generated_content:
	st.image(
	st.session_state.generated_content['flowchart'],
	caption="Marketing Strategy Flowchart",
	use_column_width=True
	)

	# Export section
	st.markdown("---")
	st.header("📥 Export Your Content")

	col1, col2, col3 = st.columns(3)

	with col1:
	# Text export
	if 'text' in st.session_state.generated_content:
	content_export = f"""VOICECANVAS MARKETING CONTENT
	Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	Source: {st.session_state.transcription[:100]}...

	{st.session_state.generated_content['text']}

	---
	Created with VoiceCanvas AI Content Studio
	"""

	st.download_button(
	"📄 Download Text",
	content_export,
	file_name=f"marketing_content_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
	mime="text/plain",
	use_container_width=True,
	help="Download complete text content"
	)

	with col2:
	# JSON export
	if 'structured' in st.session_state.generated_content:
	json_data = {
	"metadata": {
	"timestamp": datetime.now().isoformat(),
	"generator": "VoiceCanvas AI Studio",
	"mode": "Enhanced" if setup_gemini() else "Basic"
	},
	"input": st.session_state.transcription,
	"content": st.session_state.generated_content['structured']
	}

	st.download_button(
	"📊 Download Data",
	json.dumps(json_data, indent=2),
	file_name=f"content_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
	mime="application/json",
	use_container_width=True,
	help="Download structured data (JSON)"
	)

	with col3:
	# Flowchart export
	if 'flowchart' in st.session_state.generated_content:
	img_buffer = io.BytesIO()
	st.session_state.generated_content['flowchart'].save(img_buffer, format="PNG")

	st.download_button(
	"📊 Download Flowchart",
	img_buffer.getvalue(),
	file_name=f"strategy_flowchart_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png",
	mime="image/png",
	use_container_width=True,
	help="Download strategy flowchart"
	)
	else:
	st.info("Generate flowchart first", icon="ℹ️")

	# Footer
	st.markdown("---")
	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	st.markdown("🎨 VoiceCanvas AI Content Studio")
	st.caption("Transform ideas into marketing magic • Built with Streamlit")

	if __name__ == "__main__":
	main()