Spaces:

maria355
/

AI-Video-Script-and-Storyboard-Generator

Sleeping

AI-Video-Script-and-Storyboard-Generator

File size: 22,005 Bytes

ae2847b
 
 
 
 
 
 
 
82bcdd5
ae2847b
 
 
17840c3
ae2847b
 
 
 
 
 
 
 
 
 
 
82bcdd5
 
ae2847b
82bcdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae2847b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b37f161
ae2847b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82bcdd5
ae2847b
 
 
 
82bcdd5
 
ae2847b
 
 
 
 
 
 
 
 
 
82bcdd5
ae2847b
82bcdd5
 
ae2847b
82bcdd5
 
 
 
 
 
 
 
ae2847b
82bcdd5
 
 
ae2847b
82bcdd5
 
b37f161
82bcdd5
 
 
 
 
 
 
 
 
 
 
ae2847b
82bcdd5
 
 
 
 
ae2847b
82bcdd5
ae2847b
 
82bcdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae2847b
82bcdd5
 
 
 
ae2847b
82bcdd5
ae2847b
82bcdd5
 
 
ae2847b
82bcdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae2847b
 
259f3c7
82bcdd5
259f3c7
82bcdd5
259f3c7
 
 
 
82bcdd5
 
259f3c7
 
 
82bcdd5
 
 
 
 
 
 
 
259f3c7
 
 
 
 
 
 
82bcdd5
 
259f3c7
 
 
 
 
 
82bcdd5
259f3c7
 
ae2847b
 
 
82bcdd5
 
 
 
ae2847b
 
 
 
 
 
 
 
 
82bcdd5
 
b37f161
82bcdd5
b37f161
82bcdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b37f161
 
ae2847b
 
82bcdd5
ae2847b
 
82bcdd5
ae2847b
 
 
 
 
 
 
 
82bcdd5
 
 
 
 
 
ae2847b
82bcdd5
 
 
ae2847b
 
82bcdd5
 
 
 
b37f161
 
 
 
 
82bcdd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae2847b
82bcdd5
 
ae2847b
b37f161
82bcdd5
ae2847b
 
 
 
 
 
 
 
 
 
82bcdd5
ae2847b
 
 
82bcdd5
ae2847b
 
 
 
 
 
 
 
 
 
 
 
 
 
82bcdd5
 
 
 
 
 
 
ae2847b
 
 
 
 
82bcdd5
 
 
 
b37f161
82bcdd5
 
 
 
 
ae2847b
82bcdd5
 
fb63116
82bcdd5
 
259f3c7
82bcdd5
 
 
 
 
 
 
 
 
 
259f3c7
 
 
 
 
82bcdd5
259f3c7
82bcdd5
 
 
 
 
 
 
 
 
 
 
ae2847b
 
 
 
82bcdd5
 
 
 
 
 
 
 
ae2847b
 
82bcdd5
 
 
 
ae2847b
82bcdd5
 
 
ae2847b
 
 
 
82bcdd5
 
 
 
 
 
 
 
ae2847b
82bcdd5
ae2847b
 
 
82bcdd5
 
 
 
ae2847b
 
bb04d06
 
82bcdd5
 
 
 
 
b37f161
 
91bf393
 
 
 
 
 
 
 
 
 
 
 
 
82bcdd5
b37f161
82bcdd5
 
 
259f3c7
82bcdd5
 
 
 
 
b37f161
82bcdd5

import streamlit as st
import google.generativeai as genai
from huggingface_hub import InferenceClient
import requests
from PIL import Image
import io
import json
import time
import zipfile
import tempfile
import os
from gtts import gTTS

# Configure page
st.set_page_config(
    page_title="AI Video Script & Storyboard Generator",
    page_icon="🎬",
    layout="wide"
)
# Initialize session state
if 'generated_script' not in st.session_state:
    st.session_state.generated_script = None
if 'storyboard_images' not in st.session_state:
    st.session_state.storyboard_images = []
if 'gif_preview' not in st.session_state:
    st.session_state.gif_preview = None

# API Configuration
def load_api_keys():
    """Load API keys from secrets or environment"""
    try:
        gemini_api_key = st.secrets.get("GEMINI_API_KEY") or os.getenv("GEMINI_API_KEY")
        hf_token = st.secrets.get("HF_TOKEN") or os.getenv("HF_TOKEN")
        
        if not gemini_api_key or not hf_token:
            st.error("❌ API Keys not found. Please configure GEMINI_API_KEY and HF_TOKEN")
            st.stop()
            
        return gemini_api_key, hf_token
    except Exception as e:
        st.error(f"❌ Error loading API keys: {str(e)}")
        st.stop()

# Load API keys
gemini_api_key, hf_token = load_api_keys()

# Configure Gemini API
genai.configure(api_key=gemini_api_key)

# Initialize Hugging Face client
client = InferenceClient(token=hf_token)

# Main title
st.title("🎬 AI Video Script & Storyboard Generator")
st.markdown("Create professional video scripts and visual storyboards with AI assistance")

# Input section
st.header("📝 Video Specifications")

col1, col2 = st.columns(2)

with col1:
    video_topic = st.text_area(
        "Video Topic", 
        placeholder="Enter your video topic or detailed description...",
        height=100
    )
    
    video_length = st.selectbox(
        "Video Length",
        ["30 seconds", "1 minute", "2 minutes", "3 minutes", "5 minutes", "Custom"]
    )
    
    if video_length == "Custom":
        custom_length = st.number_input("Custom length (seconds)", min_value=10, max_value=600, value=60)
        video_length = f"{custom_length} seconds"
    
    style = st.selectbox(
        "Video Style",
        ["Explainer", "Cinematic", "Tutorial", "Vlog", "Animation", "Documentary", "Commercial"]
    )

with col2:
    tone = st.selectbox(
        "Tone/Emotion",
        ["Professional", "Funny", "Serious", "Dramatic", "Inspirational", "Casual", "Educational"]
    )
    
    platform = st.selectbox(
        "Target Platform",
        ["YouTube", "TikTok", "Instagram Reels", "LinkedIn", "Presentation", "General"]
    )
    
    art_style = st.selectbox(
        "Storyboard Art Style",
        ["Realistic", "Cartoon", "Cinematic", "Minimalistic", "Sketch", "Digital Art"]
    )

# Functions for AI generation
def generate_script_with_gemini(topic, length, style, tone, platform):
    """Generate video script using Gemini API"""
    try:
        model = genai.GenerativeModel('gemini-1.5-flash')
        
        prompt = f"""
        Create a detailed video script for the following specifications:
        
        Topic: {topic}
        Length: {length}
        Style: {style}
        Tone: {tone}
        Platform: {platform}
        
        Format the output as JSON with the following structure:
        {{
            "title": "Video Title",
            "total_duration": "{length}",
            "scenes": [
                {{
                    "scene_number": 1,
                    "duration": "10 seconds",
                    "description": "Visual description for storyboard",
                    "dialogue": "Script/narration text",
                    "camera_angle": "Wide shot/Close-up/etc",
                    "visual_elements": "Key visual elements to include"
                }}
            ]
        }}
        
        Make sure the scenes add up to the total duration and are engaging for {platform}.
        Include specific visual descriptions that can be used to generate storyboard images.
        Return only valid JSON, no additional text.
        """
        
        response = model.generate_content(prompt)
        response_text = response.text.strip()
        
        # Clean JSON response
        if response_text.startswith("```json"):
            response_text = response_text[7:-3]
        elif response_text.startswith("```"):
            response_text = response_text[3:-3]
        
        script_data = json.loads(response_text)
        return script_data
        
    except Exception as e:
        st.error(f"Error generating script: {str(e)}")
        return generate_fallback_script(topic, length, style, tone, platform)

def generate_fallback_script(topic, length, style, tone, platform):
    """Generate a simple fallback script"""
    try:
        # Parse length
        if "second" in length.lower():
            total_seconds = int(length.split()[0])
        elif "minute" in length.lower():
            minutes = int(length.split()[0])
            total_seconds = minutes * 60
        else:
            total_seconds = 60
        
        # Create scenes
        num_scenes = max(3, min(8, total_seconds // 10))  # 3-8 scenes
        scene_duration = total_seconds // num_scenes
        
        scenes = []
        scene_types = ["opening", "main content", "detail", "conclusion"]
        
        for i in range(num_scenes):
            scene_type = scene_types[min(i, len(scene_types)-1)]
            scene = {
                "scene_number": i + 1,
                "duration": f"{scene_duration} seconds",
                "description": f"A {style.lower()} {scene_type} scene about {topic}, showing professional visuals in {art_style.lower()} style",
                "dialogue": f"Engaging {tone.lower()} narration about {topic} for scene {i+1}",
                "camera_angle": ["Wide shot", "Medium shot", "Close-up", "Over shoulder"][i % 4],
                "visual_elements": f"Professional visuals related to {topic}, {style.lower()} cinematography"
            }
            scenes.append(scene)
        
        return {
            "title": f"{topic} - {style} Video",
            "total_duration": length,
            "scenes": scenes
        }
    except Exception as e:
        st.error(f"Error creating fallback script: {str(e)}")
        return None

def generate_storyboard_image_stable(scene_description, art_style, max_retries=3):
    """Generate storyboard image with better error handling"""
    
    style_prompts = {
        "Realistic": "photorealistic, professional, high quality, detailed",
        "Cartoon": "cartoon style, animated, colorful, illustration, Disney-like",
        "Cinematic": "cinematic, dramatic lighting, film still, movie scene",
        "Minimalistic": "minimalist, clean, simple, modern design",
        "Sketch": "pencil sketch, hand-drawn, artistic, line art",
        "Digital Art": "digital art, concept art, vibrant colors, detailed"
    }
    
    # Create enhanced prompt
    base_prompt = f"{scene_description}"
    style_enhancement = style_prompts.get(art_style, "professional, high quality")
    enhanced_prompt = f"{base_prompt}, {style_enhancement}, storyboard frame"
    
    # Try different approaches
    approaches = [
        enhanced_prompt,
        f"storyboard illustration: {base_prompt}, {style_enhancement}",
        f"{base_prompt}, simple illustration, clean design"
    ]
    
    for attempt, prompt in enumerate(approaches):
        try:
            # Use a more reliable model
            image = client.text_to_image(
                prompt,
                model="runwayml/stable-diffusion-v1-5"  # More reliable model
            )
            
            if image and hasattr(image, 'size'):
                return image
            else:
                raise Exception("Invalid image returned")
                
        except Exception as e:
            if attempt < len(approaches) - 1:
                time.sleep(2)  # Wait before retry
                continue
            else:
                # Create a placeholder image as last resort
                return create_placeholder_image(f"Scene: {scene_description[:50]}...")
    
    return None

def create_placeholder_image(text):
    """Create a placeholder image with text"""
    try:
        from PIL import Image, ImageDraw, ImageFont
        
        # Create a simple placeholder
        img = Image.new('RGB', (512, 384), color=(200, 200, 200))
        draw = ImageDraw.Draw(img)
        
        # Try to use default font
        try:
            font = ImageFont.load_default()
        except:
            font = None
        
        # Add text
        text_lines = text.split(' ')
        line_height = 30
        y_pos = 150
        
        for i in range(0, len(text_lines), 4):  # 4 words per line
            line = ' '.join(text_lines[i:i+4])
            draw.text((50, y_pos), line, fill=(50, 50, 50), font=font)
            y_pos += line_height
            if y_pos > 300:  # Don't overflow
                break
        
        return img
    except Exception:
        return None

def create_gif_preview(images, script_data):
    """Create a GIF preview"""
    try:
        # Filter valid images
        valid_images = [img for img in images if img is not None]
        if not valid_images:
            return None
        
        # Resize images
        target_size = (400, 300)
        resized_images = []
        
        for image in valid_images:
            try:
                resized_img = image.resize(target_size, Image.Resampling.LANCZOS)
                resized_images.append(resized_img)
            except Exception:
                continue
        
        if not resized_images:
            return None
        
        # Create GIF
        gif_buffer = io.BytesIO()
        resized_images[0].save(
            gif_buffer,
            format='GIF',
            save_all=True,
            append_images=resized_images[1:] if len(resized_images) > 1 else [],
            duration=2500,  # 2.5 seconds per frame
            loop=0
        )
        gif_buffer.seek(0)
        return gif_buffer
        
    except Exception as e:
        st.error(f"Error creating GIF: {str(e)}")
        return None

def text_to_speech(text, language='en'):
    """Convert text to speech using gTTS"""
    try:
        # Limit text length to avoid issues
        if len(text) > 500:
            text = text[:500] + "..."
            
        tts = gTTS(text=text, lang=language, slow=False)
        audio_buffer = io.BytesIO()
        tts.write_to_fp(audio_buffer)
        audio_buffer.seek(0)
        return audio_buffer
    except Exception as e:
        st.error(f"Error generating speech: {str(e)}")
        return None

def create_download_zip(images, script_data):
    """Create a ZIP file with all content"""
    try:
        zip_buffer = io.BytesIO()
        
        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
            # Add script as JSON
            script_json = json.dumps(script_data, indent=2)
            zip_file.writestr("script.json", script_json)
            
            # Add script as readable text
            script_text = f"Title: {script_data.get('title', '')}\n"
            script_text += f"Duration: {script_data.get('total_duration', '')}\n\n"
            
            for i, scene in enumerate(script_data.get('scenes', []), 1):
                script_text += f"=== SCENE {i} ===\n"
                script_text += f"Duration: {scene.get('duration', '')}\n"
                script_text += f"Camera: {scene.get('camera_angle', '')}\n"
                script_text += f"Description: {scene.get('description', '')}\n"
                script_text += f"Dialogue: {scene.get('dialogue', '')}\n"
                script_text += f"Visual Elements: {scene.get('visual_elements', '')}\n\n"
            
            zip_file.writestr("script.txt", script_text)
            
            # Add images
            for i, image in enumerate(images):
                if image:
                    img_buffer = io.BytesIO()
                    try:
                        image.save(img_buffer, format='PNG')
                        zip_file.writestr(f"scene_{i+1:02d}.png", img_buffer.getvalue())
                    except Exception:
                        continue
        
        zip_buffer.seek(0)
        return zip_buffer
    except Exception as e:
        st.error(f"Error creating ZIP file: {str(e)}")
        return None

# Main generation button
if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
    if not video_topic.strip():
        st.error("Please enter a video topic")
    else:
        # Generate script
        with st.spinner("🤖 Generating script with AI..."):
            script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
            
        if script_data:
            st.session_state.generated_script = script_data
            st.success("✅ Script generated successfully!")
            
            # Generate storyboard images
            st.info("🎨 Generating storyboard images (this may take a few minutes)...")
            images = []
            
            # Create progress tracking
            progress_container = st.container()
            with progress_container:
                progress_bar = st.progress(0)
                status_text = st.empty()
                
                total_scenes = len(script_data['scenes'])
                
                for i, scene in enumerate(script_data['scenes']):
                    status_text.text(f"Generating image {i+1}/{total_scenes}: Scene {i+1}")
                    
                    try:
                        image = generate_storyboard_image_stable(
                            scene['description'], 
                            art_style
                        )
                        images.append(image)
                        
                        if image:
                            st.success(f"✅ Scene {i+1} generated successfully")
                        else:
                            st.warning(f"⚠️ Scene {i+1} failed, using placeholder")
                            
                    except Exception as e:
                        st.error(f"❌ Error generating scene {i+1}: {str(e)}")
                        images.append(None)
                    
                    progress_bar.progress((i + 1) / total_scenes)
                    
                    # Rate limiting
                    if i < total_scenes - 1:  # Don't wait after last image
                        time.sleep(3)  # Wait 3 seconds between requests
                
                status_text.text("✅ Storyboard generation complete!")
                
            st.session_state.storyboard_images = images
            st.success(f"✅ Generated {len([img for img in images if img is not None])} out of {len(images)} storyboard images!")
            
        else:
            st.error("Failed to generate script. Please try again.")

# Display results
if st.session_state.generated_script:
    script_data = st.session_state.generated_script
    
    st.header("📜 Generated Script")
    st.subheader(f"🎬 {script_data.get('title', 'Video Title')}")
    st.write(f"**Duration:** {script_data.get('total_duration', 'N/A')}")
    
    # Display script in tabs
    tab1, tab2, tab3 = st.tabs(["📝 Script Details", "🖼️ Storyboard", "📥 Export"])
    
    with tab1:
        for i, scene in enumerate(script_data.get('scenes', []), 1):
            with st.expander(f"Scene {i} - {scene.get('duration', 'N/A')}", expanded=False):
                col1, col2 = st.columns(2)
                
                with col1:
                    st.write("**Visual Description:**")
                    st.write(scene.get('description', 'N/A'))
                    st.write("**Camera Angle:**")
                    st.write(scene.get('camera_angle', 'N/A'))
                
                with col2:
                    st.write("**Dialogue/Narration:**")
                    st.write(scene.get('dialogue', 'N/A'))
                    st.write("**Visual Elements:**")
                    st.write(scene.get('visual_elements', 'N/A'))
                
                # Text-to-speech
                dialogue = scene.get('dialogue', '')
                if dialogue and st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
                    with st.spinner("Generating audio..."):
                        audio_buffer = text_to_speech(dialogue)
                        if audio_buffer:
                            st.audio(audio_buffer.getvalue(), format='audio/mp3')
    
    with tab2:
        if st.session_state.storyboard_images:
            st.subheader("🎨 Storyboard Images")
            
            # Show images in a grid
            cols_per_row = 2
            for i in range(0, len(st.session_state.storyboard_images), cols_per_row):
                cols = st.columns(cols_per_row)
                
                for j, col in enumerate(cols):
                    idx = i + j
                    if idx < len(st.session_state.storyboard_images):
                        image = st.session_state.storyboard_images[idx]
                        scene = script_data['scenes'][idx] if idx < len(script_data['scenes']) else {}
                        
                        with col:
                            if image:
                                st.image(image, caption=f"Scene {idx+1}", use_container_width=True)
                            else:
                                st.write(f"❌ Scene {idx+1} - Image failed to generate")
                            
                            st.write(f"**Duration:** {scene.get('duration', 'N/A')}")
                            st.write(f"**Description:** {scene.get('description', 'N/A')[:100]}...")
            
            # GIF Preview section
            st.subheader("📱 Animated Preview")
            col1, col2 = st.columns([1, 2])
            
            with col1:
                if st.button("🎬 Create GIF Preview"):
                    with st.spinner("Creating animated preview..."):
                        gif_buffer = create_gif_preview(
                            st.session_state.storyboard_images,
                            script_data
                        )
                        if gif_buffer:
                            st.session_state.gif_preview = gif_buffer
                            st.success("GIF preview created!")
                        else:
                            st.error("Failed to create GIF preview")
            
            with col2:
                if st.session_state.gif_preview:
                    st.image(st.session_state.gif_preview.getvalue(), caption="Storyboard Preview")
        else:
            st.info("Generate storyboard images first using the button above.")
    
    with tab3:
        st.subheader("📥 Download Options")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            # Script download
            script_json = json.dumps(script_data, indent=2)
            st.download_button(
                label="📄 Download Script (JSON)",
                data=script_json,
                file_name=f"script_{int(time.time())}.json",
                mime="application/json"
            )
        
        with col2:
            # ZIP download
            if st.session_state.storyboard_images:
                zip_data = create_download_zip(st.session_state.storyboard_images, script_data)
                if zip_data:
                    st.download_button(
                        label="📦 Download Complete Package",
                        data=zip_data.getvalue(),
                        file_name=f"storyboard_package_{int(time.time())}.zip",
                        mime="application/zip"
                    )
        
        with col3:
            # GIF download
            if st.session_state.gif_preview:
                st.download_button(
                    label="🎬 Download GIF Preview",
                    data=st.session_state.gif_preview.getvalue(),
                    file_name=f"storyboard_preview_{int(time.time())}.gif",
                    mime="image/gif"
                )

# Sidebar
with st.sidebar:
    st.markdown("### 📚 How to Use")
    st.markdown("""
    1. **Enter Details**: Describe your video topic and preferences
    2. **Generate**: Click the generate button and wait
    3. **Review**: Check the script and storyboard images
    4. **Export**: Download your files
    """)
    
    st.markdown("### 🔧 Features")
    st.markdown("""
    - ✅ AI-powered script generation
    - ✅ Visual storyboard creation
    - ✅ Text-to-speech narration
    - ✅ GIF preview generation
    - ✅ Complete package download
    """)
    
    st.markdown("### 🐛 Debug Info")
    if st.button("🧪 Test Image Generation"):
        with st.spinner("Testing image generation..."):
            test_image = generate_storyboard_image_stable(
                "A simple test scene with a person", 
                "Cartoon"
            )
            if test_image:
                st.image(test_image, caption="Test Image", width=200)
                st.success("✅ Image generation working!")
            else:
                st.error("❌ Image generation failed!")
    
    st.markdown("### ⚙️ Settings")
    st.markdown("""
    **Required API Keys:**
    - GEMINI_API_KEY
    - HF_TOKEN (Hugging Face)
    
    **Models Used:**
    - Script: Gemini 1.5 Flash
    - Images: Stable Diffusion v1.5
    - Speech: Google TTS
    """)

# Footer
st.markdown("---")
st.markdown("🤖 **Powered by**: Gemini AI • Hugging Face • Google TTS")
st.markdown("💡 **Tips**: Be specific in your descriptions for better results!")