maria355's picture
Update app.py
17840c3 verified
import streamlit as st
import google.generativeai as genai
from huggingface_hub import InferenceClient
import requests
from PIL import Image
import io
import json
import time
import zipfile
import tempfile
import os
from gtts import gTTS
# Configure page
st.set_page_config(
page_title="AI Video Script & Storyboard Generator",
page_icon="🎬",
layout="wide"
)
# Initialize session state
if 'generated_script' not in st.session_state:
st.session_state.generated_script = None
if 'storyboard_images' not in st.session_state:
st.session_state.storyboard_images = []
if 'gif_preview' not in st.session_state:
st.session_state.gif_preview = None
# API Configuration
def load_api_keys():
"""Load API keys from secrets or environment"""
try:
gemini_api_key = st.secrets.get("GEMINI_API_KEY") or os.getenv("GEMINI_API_KEY")
hf_token = st.secrets.get("HF_TOKEN") or os.getenv("HF_TOKEN")
if not gemini_api_key or not hf_token:
st.error("❌ API Keys not found. Please configure GEMINI_API_KEY and HF_TOKEN")
st.stop()
return gemini_api_key, hf_token
except Exception as e:
st.error(f"❌ Error loading API keys: {str(e)}")
st.stop()
# Load API keys
gemini_api_key, hf_token = load_api_keys()
# Configure Gemini API
genai.configure(api_key=gemini_api_key)
# Initialize Hugging Face client
client = InferenceClient(token=hf_token)
# Main title
st.title("🎬 AI Video Script & Storyboard Generator")
st.markdown("Create professional video scripts and visual storyboards with AI assistance")
# Input section
st.header("📝 Video Specifications")
col1, col2 = st.columns(2)
with col1:
video_topic = st.text_area(
"Video Topic",
placeholder="Enter your video topic or detailed description...",
height=100
)
video_length = st.selectbox(
"Video Length",
["30 seconds", "1 minute", "2 minutes", "3 minutes", "5 minutes", "Custom"]
)
if video_length == "Custom":
custom_length = st.number_input("Custom length (seconds)", min_value=10, max_value=600, value=60)
video_length = f"{custom_length} seconds"
style = st.selectbox(
"Video Style",
["Explainer", "Cinematic", "Tutorial", "Vlog", "Animation", "Documentary", "Commercial"]
)
with col2:
tone = st.selectbox(
"Tone/Emotion",
["Professional", "Funny", "Serious", "Dramatic", "Inspirational", "Casual", "Educational"]
)
platform = st.selectbox(
"Target Platform",
["YouTube", "TikTok", "Instagram Reels", "LinkedIn", "Presentation", "General"]
)
art_style = st.selectbox(
"Storyboard Art Style",
["Realistic", "Cartoon", "Cinematic", "Minimalistic", "Sketch", "Digital Art"]
)
# Functions for AI generation
def generate_script_with_gemini(topic, length, style, tone, platform):
"""Generate video script using Gemini API"""
try:
model = genai.GenerativeModel('gemini-1.5-flash')
prompt = f"""
Create a detailed video script for the following specifications:
Topic: {topic}
Length: {length}
Style: {style}
Tone: {tone}
Platform: {platform}
Format the output as JSON with the following structure:
{{
"title": "Video Title",
"total_duration": "{length}",
"scenes": [
{{
"scene_number": 1,
"duration": "10 seconds",
"description": "Visual description for storyboard",
"dialogue": "Script/narration text",
"camera_angle": "Wide shot/Close-up/etc",
"visual_elements": "Key visual elements to include"
}}
]
}}
Make sure the scenes add up to the total duration and are engaging for {platform}.
Include specific visual descriptions that can be used to generate storyboard images.
Return only valid JSON, no additional text.
"""
response = model.generate_content(prompt)
response_text = response.text.strip()
# Clean JSON response
if response_text.startswith("```json"):
response_text = response_text[7:-3]
elif response_text.startswith("```"):
response_text = response_text[3:-3]
script_data = json.loads(response_text)
return script_data
except Exception as e:
st.error(f"Error generating script: {str(e)}")
return generate_fallback_script(topic, length, style, tone, platform)
def generate_fallback_script(topic, length, style, tone, platform):
"""Generate a simple fallback script"""
try:
# Parse length
if "second" in length.lower():
total_seconds = int(length.split()[0])
elif "minute" in length.lower():
minutes = int(length.split()[0])
total_seconds = minutes * 60
else:
total_seconds = 60
# Create scenes
num_scenes = max(3, min(8, total_seconds // 10)) # 3-8 scenes
scene_duration = total_seconds // num_scenes
scenes = []
scene_types = ["opening", "main content", "detail", "conclusion"]
for i in range(num_scenes):
scene_type = scene_types[min(i, len(scene_types)-1)]
scene = {
"scene_number": i + 1,
"duration": f"{scene_duration} seconds",
"description": f"A {style.lower()} {scene_type} scene about {topic}, showing professional visuals in {art_style.lower()} style",
"dialogue": f"Engaging {tone.lower()} narration about {topic} for scene {i+1}",
"camera_angle": ["Wide shot", "Medium shot", "Close-up", "Over shoulder"][i % 4],
"visual_elements": f"Professional visuals related to {topic}, {style.lower()} cinematography"
}
scenes.append(scene)
return {
"title": f"{topic} - {style} Video",
"total_duration": length,
"scenes": scenes
}
except Exception as e:
st.error(f"Error creating fallback script: {str(e)}")
return None
def generate_storyboard_image_stable(scene_description, art_style, max_retries=3):
"""Generate storyboard image with better error handling"""
style_prompts = {
"Realistic": "photorealistic, professional, high quality, detailed",
"Cartoon": "cartoon style, animated, colorful, illustration, Disney-like",
"Cinematic": "cinematic, dramatic lighting, film still, movie scene",
"Minimalistic": "minimalist, clean, simple, modern design",
"Sketch": "pencil sketch, hand-drawn, artistic, line art",
"Digital Art": "digital art, concept art, vibrant colors, detailed"
}
# Create enhanced prompt
base_prompt = f"{scene_description}"
style_enhancement = style_prompts.get(art_style, "professional, high quality")
enhanced_prompt = f"{base_prompt}, {style_enhancement}, storyboard frame"
# Try different approaches
approaches = [
enhanced_prompt,
f"storyboard illustration: {base_prompt}, {style_enhancement}",
f"{base_prompt}, simple illustration, clean design"
]
for attempt, prompt in enumerate(approaches):
try:
# Use a more reliable model
image = client.text_to_image(
prompt,
model="runwayml/stable-diffusion-v1-5" # More reliable model
)
if image and hasattr(image, 'size'):
return image
else:
raise Exception("Invalid image returned")
except Exception as e:
if attempt < len(approaches) - 1:
time.sleep(2) # Wait before retry
continue
else:
# Create a placeholder image as last resort
return create_placeholder_image(f"Scene: {scene_description[:50]}...")
return None
def create_placeholder_image(text):
"""Create a placeholder image with text"""
try:
from PIL import Image, ImageDraw, ImageFont
# Create a simple placeholder
img = Image.new('RGB', (512, 384), color=(200, 200, 200))
draw = ImageDraw.Draw(img)
# Try to use default font
try:
font = ImageFont.load_default()
except:
font = None
# Add text
text_lines = text.split(' ')
line_height = 30
y_pos = 150
for i in range(0, len(text_lines), 4): # 4 words per line
line = ' '.join(text_lines[i:i+4])
draw.text((50, y_pos), line, fill=(50, 50, 50), font=font)
y_pos += line_height
if y_pos > 300: # Don't overflow
break
return img
except Exception:
return None
def create_gif_preview(images, script_data):
"""Create a GIF preview"""
try:
# Filter valid images
valid_images = [img for img in images if img is not None]
if not valid_images:
return None
# Resize images
target_size = (400, 300)
resized_images = []
for image in valid_images:
try:
resized_img = image.resize(target_size, Image.Resampling.LANCZOS)
resized_images.append(resized_img)
except Exception:
continue
if not resized_images:
return None
# Create GIF
gif_buffer = io.BytesIO()
resized_images[0].save(
gif_buffer,
format='GIF',
save_all=True,
append_images=resized_images[1:] if len(resized_images) > 1 else [],
duration=2500, # 2.5 seconds per frame
loop=0
)
gif_buffer.seek(0)
return gif_buffer
except Exception as e:
st.error(f"Error creating GIF: {str(e)}")
return None
def text_to_speech(text, language='en'):
"""Convert text to speech using gTTS"""
try:
# Limit text length to avoid issues
if len(text) > 500:
text = text[:500] + "..."
tts = gTTS(text=text, lang=language, slow=False)
audio_buffer = io.BytesIO()
tts.write_to_fp(audio_buffer)
audio_buffer.seek(0)
return audio_buffer
except Exception as e:
st.error(f"Error generating speech: {str(e)}")
return None
def create_download_zip(images, script_data):
"""Create a ZIP file with all content"""
try:
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
# Add script as JSON
script_json = json.dumps(script_data, indent=2)
zip_file.writestr("script.json", script_json)
# Add script as readable text
script_text = f"Title: {script_data.get('title', '')}\n"
script_text += f"Duration: {script_data.get('total_duration', '')}\n\n"
for i, scene in enumerate(script_data.get('scenes', []), 1):
script_text += f"=== SCENE {i} ===\n"
script_text += f"Duration: {scene.get('duration', '')}\n"
script_text += f"Camera: {scene.get('camera_angle', '')}\n"
script_text += f"Description: {scene.get('description', '')}\n"
script_text += f"Dialogue: {scene.get('dialogue', '')}\n"
script_text += f"Visual Elements: {scene.get('visual_elements', '')}\n\n"
zip_file.writestr("script.txt", script_text)
# Add images
for i, image in enumerate(images):
if image:
img_buffer = io.BytesIO()
try:
image.save(img_buffer, format='PNG')
zip_file.writestr(f"scene_{i+1:02d}.png", img_buffer.getvalue())
except Exception:
continue
zip_buffer.seek(0)
return zip_buffer
except Exception as e:
st.error(f"Error creating ZIP file: {str(e)}")
return None
# Main generation button
if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
if not video_topic.strip():
st.error("Please enter a video topic")
else:
# Generate script
with st.spinner("🤖 Generating script with AI..."):
script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
if script_data:
st.session_state.generated_script = script_data
st.success("✅ Script generated successfully!")
# Generate storyboard images
st.info("🎨 Generating storyboard images (this may take a few minutes)...")
images = []
# Create progress tracking
progress_container = st.container()
with progress_container:
progress_bar = st.progress(0)
status_text = st.empty()
total_scenes = len(script_data['scenes'])
for i, scene in enumerate(script_data['scenes']):
status_text.text(f"Generating image {i+1}/{total_scenes}: Scene {i+1}")
try:
image = generate_storyboard_image_stable(
scene['description'],
art_style
)
images.append(image)
if image:
st.success(f"✅ Scene {i+1} generated successfully")
else:
st.warning(f"⚠️ Scene {i+1} failed, using placeholder")
except Exception as e:
st.error(f"❌ Error generating scene {i+1}: {str(e)}")
images.append(None)
progress_bar.progress((i + 1) / total_scenes)
# Rate limiting
if i < total_scenes - 1: # Don't wait after last image
time.sleep(3) # Wait 3 seconds between requests
status_text.text("✅ Storyboard generation complete!")
st.session_state.storyboard_images = images
st.success(f"✅ Generated {len([img for img in images if img is not None])} out of {len(images)} storyboard images!")
else:
st.error("Failed to generate script. Please try again.")
# Display results
if st.session_state.generated_script:
script_data = st.session_state.generated_script
st.header("📜 Generated Script")
st.subheader(f"🎬 {script_data.get('title', 'Video Title')}")
st.write(f"**Duration:** {script_data.get('total_duration', 'N/A')}")
# Display script in tabs
tab1, tab2, tab3 = st.tabs(["📝 Script Details", "🖼️ Storyboard", "📥 Export"])
with tab1:
for i, scene in enumerate(script_data.get('scenes', []), 1):
with st.expander(f"Scene {i} - {scene.get('duration', 'N/A')}", expanded=False):
col1, col2 = st.columns(2)
with col1:
st.write("**Visual Description:**")
st.write(scene.get('description', 'N/A'))
st.write("**Camera Angle:**")
st.write(scene.get('camera_angle', 'N/A'))
with col2:
st.write("**Dialogue/Narration:**")
st.write(scene.get('dialogue', 'N/A'))
st.write("**Visual Elements:**")
st.write(scene.get('visual_elements', 'N/A'))
# Text-to-speech
dialogue = scene.get('dialogue', '')
if dialogue and st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
with st.spinner("Generating audio..."):
audio_buffer = text_to_speech(dialogue)
if audio_buffer:
st.audio(audio_buffer.getvalue(), format='audio/mp3')
with tab2:
if st.session_state.storyboard_images:
st.subheader("🎨 Storyboard Images")
# Show images in a grid
cols_per_row = 2
for i in range(0, len(st.session_state.storyboard_images), cols_per_row):
cols = st.columns(cols_per_row)
for j, col in enumerate(cols):
idx = i + j
if idx < len(st.session_state.storyboard_images):
image = st.session_state.storyboard_images[idx]
scene = script_data['scenes'][idx] if idx < len(script_data['scenes']) else {}
with col:
if image:
st.image(image, caption=f"Scene {idx+1}", use_container_width=True)
else:
st.write(f"❌ Scene {idx+1} - Image failed to generate")
st.write(f"**Duration:** {scene.get('duration', 'N/A')}")
st.write(f"**Description:** {scene.get('description', 'N/A')[:100]}...")
# GIF Preview section
st.subheader("📱 Animated Preview")
col1, col2 = st.columns([1, 2])
with col1:
if st.button("🎬 Create GIF Preview"):
with st.spinner("Creating animated preview..."):
gif_buffer = create_gif_preview(
st.session_state.storyboard_images,
script_data
)
if gif_buffer:
st.session_state.gif_preview = gif_buffer
st.success("GIF preview created!")
else:
st.error("Failed to create GIF preview")
with col2:
if st.session_state.gif_preview:
st.image(st.session_state.gif_preview.getvalue(), caption="Storyboard Preview")
else:
st.info("Generate storyboard images first using the button above.")
with tab3:
st.subheader("📥 Download Options")
col1, col2, col3 = st.columns(3)
with col1:
# Script download
script_json = json.dumps(script_data, indent=2)
st.download_button(
label="📄 Download Script (JSON)",
data=script_json,
file_name=f"script_{int(time.time())}.json",
mime="application/json"
)
with col2:
# ZIP download
if st.session_state.storyboard_images:
zip_data = create_download_zip(st.session_state.storyboard_images, script_data)
if zip_data:
st.download_button(
label="📦 Download Complete Package",
data=zip_data.getvalue(),
file_name=f"storyboard_package_{int(time.time())}.zip",
mime="application/zip"
)
with col3:
# GIF download
if st.session_state.gif_preview:
st.download_button(
label="🎬 Download GIF Preview",
data=st.session_state.gif_preview.getvalue(),
file_name=f"storyboard_preview_{int(time.time())}.gif",
mime="image/gif"
)
# Sidebar
with st.sidebar:
st.markdown("### 📚 How to Use")
st.markdown("""
1. **Enter Details**: Describe your video topic and preferences
2. **Generate**: Click the generate button and wait
3. **Review**: Check the script and storyboard images
4. **Export**: Download your files
""")
st.markdown("### 🔧 Features")
st.markdown("""
- ✅ AI-powered script generation
- ✅ Visual storyboard creation
- ✅ Text-to-speech narration
- ✅ GIF preview generation
- ✅ Complete package download
""")
st.markdown("### 🐛 Debug Info")
if st.button("🧪 Test Image Generation"):
with st.spinner("Testing image generation..."):
test_image = generate_storyboard_image_stable(
"A simple test scene with a person",
"Cartoon"
)
if test_image:
st.image(test_image, caption="Test Image", width=200)
st.success("✅ Image generation working!")
else:
st.error("❌ Image generation failed!")
st.markdown("### ⚙️ Settings")
st.markdown("""
**Required API Keys:**
- GEMINI_API_KEY
- HF_TOKEN (Hugging Face)
**Models Used:**
- Script: Gemini 1.5 Flash
- Images: Stable Diffusion v1.5
- Speech: Google TTS
""")
# Footer
st.markdown("---")
st.markdown("🤖 **Powered by**: Gemini AI • Hugging Face • Google TTS")
st.markdown("💡 **Tips**: Be specific in your descriptions for better results!")