Spaces:

maria355
/

AI-Video-Script-and-Storyboard-Generator

Sleeping

App Files Files Community

maria355 commited on Sep 11, 2025

Commit

ae2847b

verified ·

1 Parent(s): f6e1d0e

Create app.py

Browse files

Files changed (1) hide show

app.py +420 -0

app.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import streamlit as st
+import google.generativeai as genai
+from huggingface_hub import InferenceClient
+import requests
+from PIL import Image
+import io
+import json
+import time
+import cv2
+import numpy as np
+from moviepy.editor import ImageSequenceClip, concatenate_videoclips
+import tempfile
+import os
+from gtts import gTTS
+import base64
+# Configure page
+st.set_page_config(
+    page_title="AI Video Script & Storyboard Generator",
+    page_icon="🎬",
+    layout="wide"
+)
+# Initialize session state
+if 'generated_script' not in st.session_state:
+    st.session_state.generated_script = None
+if 'storyboard_images' not in st.session_state:
+    st.session_state.storyboard_images = []
+if 'video_preview' not in st.session_state:
+    st.session_state.video_preview = None
+# Sidebar for API configuration
+st.sidebar.title("🔧 API Configuration")
+gemini_api_key = st.sidebar.text_input("Gemini API Key", type="password", help="Get your API key from Google AI Studio")
+hf_token = st.sidebar.text_input("Hugging Face Token", type="password", help="Get your token from Hugging Face")
+if gemini_api_key:
+    genai.configure(api_key=gemini_api_key)
+# Main title
+st.title("🎬 AI Video Script & Storyboard Generator")
+st.markdown("Create professional video scripts and visual storyboards with AI assistance")
+# Input section
+st.header("📝 Video Specifications")
+col1, col2 = st.columns(2)
+with col1:
+    video_topic = st.text_area(
+        "Video Topic",
+        placeholder="Enter your video topic or detailed description...",
+        height=100
+    )
+    video_length = st.selectbox(
+        "Video Length",
+        ["30 seconds", "1 minute", "2 minutes", "3 minutes", "5 minutes", "Custom"]
+    )
+    if video_length == "Custom":
+        custom_length = st.number_input("Custom length (seconds)", min_value=10, max_value=600, value=60)
+        video_length = f"{custom_length} seconds"
+    style = st.selectbox(
+        "Video Style",
+        ["Explainer", "Cinematic", "Tutorial", "Vlog", "Animation", "Documentary", "Commercial"]
+    )
+with col2:
+    tone = st.selectbox(
+        "Tone/Emotion",
+        ["Professional", "Funny", "Serious", "Dramatic", "Inspirational", "Casual", "Educational"]
+    )
+    platform = st.selectbox(
+        "Target Platform",
+        ["YouTube", "TikTok", "Instagram Reels", "LinkedIn", "Presentation", "General"]
+    )
+    art_style = st.selectbox(
+        "Storyboard Art Style",
+        ["Realistic", "Cartoon", "Cinematic", "Minimalistic", "Sketch", "Digital Art"]
+    )
+# Functions for AI generation
+def generate_script_with_gemini(topic, length, style, tone, platform):
+    """Generate video script using Gemini API"""
+    if not gemini_api_key:
+        st.error("Please provide Gemini API key in the sidebar")
+        return None
+    try:
+        model = genai.GenerativeModel('gemini-pro')
+        prompt = f"""
+        Create a detailed video script for the following specifications:
+        Topic: {topic}
+        Length: {length}
+        Style: {style}
+        Tone: {tone}
+        Platform: {platform}
+        Format the output as JSON with the following structure:
+        {{
+            "title": "Video Title",
+            "total_duration": "{length}",
+            "scenes": [
+                {{
+                    "scene_number": 1,
+                    "duration": "10 seconds",
+                    "description": "Visual description for storyboard",
+                    "dialogue": "Script/narration text",
+                    "camera_angle": "Wide shot/Close-up/etc",
+                    "visual_elements": "Key visual elements to include"
+                }}
+            ]
+        }}
+        Make sure the scenes add up to the total duration and are engaging for {platform}.
+        Include specific visual descriptions that can be used to generate storyboard images.
+        """
+        response = model.generate_content(prompt)
+        # Clean the response text to extract JSON
+        response_text = response.text.strip()
+        if response_text.startswith("```json"):
+            response_text = response_text[7:-3]
+        elif response_text.startswith("```"):
+            response_text = response_text[3:-3]
+        script_data = json.loads(response_text)
+        return script_data
+    except Exception as e:
+        st.error(f"Error generating script: {str(e)}")
+        return None
+def generate_storyboard_image(scene_description, art_style, hf_token):
+    """Generate storyboard image using Stable Diffusion"""
+    if not hf_token:
+        st.error("Please provide Hugging Face token")
+        return None
+    try:
+        # Use Hugging Face Inference API for image generation
+        client = InferenceClient(token=hf_token)
+        # Enhance prompt based on art style
+        style_prompts = {
+            "Realistic": "photorealistic, high quality, detailed",
+            "Cartoon": "cartoon style, animated, colorful, Disney-like",
+            "Cinematic": "cinematic lighting, dramatic, film still, high contrast",
+            "Minimalistic": "minimalist, clean, simple, geometric",
+            "Sketch": "pencil sketch, hand-drawn, artistic, black and white",
+            "Digital Art": "digital art, concept art, detailed, vibrant colors"
+        }
+        enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional"
+        image = client.text_to_image(
+            enhanced_prompt,
+            model="stabilityai/stable-diffusion-2-1"
+        )
+        return image
+    except Exception as e:
+        st.error(f"Error generating image: {str(e)}")
+        return None
+def create_video_preview(images, script_data):
+    """Create a basic video preview with Ken Burns effect"""
+    if not images or not script_data:
+        return None
+    try:
+        # Create temporary directory
+        temp_dir = tempfile.mkdtemp()
+        clips = []
+        for i, (image, scene) in enumerate(zip(images, script_data['scenes'])):
+            if image:
+                # Save image temporarily
+                img_path = os.path.join(temp_dir, f"scene_{i}.jpg")
+                image.save(img_path)
+                # Parse duration (extract number from string like "10 seconds")
+                duration_str = scene.get('duration', '5 seconds')
+                duration = float(duration_str.split()[0])
+                # Create image clip with Ken Burns effect
+                img_array = np.array(image)
+                h, w = img_array.shape[:2]
+                # Create zoom effect
+                zoom_factor = 1.2
+                start_size = (w, h)
+                end_size = (int(w * zoom_factor), int(h * zoom_factor))
+                clip = ImageSequenceClip([img_path], durations=[duration])
+                clip = clip.resize(height=480)  # Standardize height
+                clips.append(clip)
+        if clips:
+            # Concatenate all clips
+            final_video = concatenate_videoclips(clips, method="compose")
+            # Save video
+            video_path = os.path.join(temp_dir, "preview.mp4")
+            final_video.write_videofile(
+                video_path,
+                fps=24,
+                codec='libx264',
+                audio_codec='aac',
+                verbose=False,
+                logger=None
+            )
+            return video_path
+    except Exception as e:
+        st.error(f"Error creating video preview: {str(e)}")
+        return None
+def text_to_speech(text, language='en'):
+    """Convert text to speech using gTTS"""
+    try:
+        tts = gTTS(text=text, lang=language, slow=False)
+        audio_buffer = io.BytesIO()
+        tts.write_to_fp(audio_buffer)
+        audio_buffer.seek(0)
+        return audio_buffer
+    except Exception as e:
+        st.error(f"Error generating speech: {str(e)}")
+        return None
+# Main generation button
+if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
+    if not video_topic:
+        st.error("Please enter a video topic")
+    elif not gemini_api_key:
+        st.error("Please provide Gemini API key")
+    elif not hf_token:
+        st.error("Please provide Hugging Face token")
+    else:
+        with st.spinner("🤖 Generating script with AI..."):
+            script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
+        if script_data:
+            st.session_state.generated_script = script_data
+            st.success("✅ Script generated successfully!")
+            # Generate storyboard images
+            with st.spinner("🎨 Creating storyboard images..."):
+                images = []
+                progress_bar = st.progress(0)
+                for i, scene in enumerate(script_data['scenes']):
+                    image = generate_storyboard_image(
+                        scene['description'],
+                        art_style,
+                        hf_token
+                    )
+                    images.append(image)
+                    progress_bar.progress((i + 1) / len(script_data['scenes']))
+                st.session_state.storyboard_images = images
+            st.success("✅ Storyboard images generated!")
+# Display results
+if st.session_state.generated_script:
+    script_data = st.session_state.generated_script
+    st.header("📜 Generated Script")
+    st.subheader(f"🎬 {script_data.get('title', 'Video Title')}")
+    st.write(f"**Duration:** {script_data.get('total_duration', 'N/A')}")
+    # Display script in tabs
+    tab1, tab2, tab3 = st.tabs(["📝 Script Details", "🖼️ Storyboard", "🎥 Preview"])
+    with tab1:
+        for i, scene in enumerate(script_data.get('scenes', []), 1):
+            with st.expander(f"Scene {i} - {scene.get('duration', 'N/A')}"):
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.write("**Visual Description:**")
+                    st.write(scene.get('description', 'N/A'))
+                    st.write("**Camera Angle:**")
+                    st.write(scene.get('camera_angle', 'N/A'))
+                with col2:
+                    st.write("**Dialogue/Narration:**")
+                    st.write(scene.get('dialogue', 'N/A'))
+                    st.write("**Visual Elements:**")
+                    st.write(scene.get('visual_elements', 'N/A'))
+                # Add text-to-speech for dialogue
+                if scene.get('dialogue'):
+                    if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
+                        audio_buffer = text_to_speech(scene['dialogue'])
+                        if audio_buffer:
+                            st.audio(audio_buffer.getvalue(), format='audio/mp3')
+    with tab2:
+        if st.session_state.storyboard_images:
+            st.subheader("🎨 Storyboard Images")
+            for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
+                if image:
+                    col1, col2 = st.columns([1, 2])
+                    with col1:
+                        st.image(image, caption=f"Scene {i+1}", use_column_width=True)
+                        # Refinement option
+                        if st.button(f"🔄 Regenerate Scene {i+1}", key=f"regen_{i}"):
+                            with st.spinner(f"Regenerating scene {i+1}..."):
+                                new_image = generate_storyboard_image(
+                                    scene['description'],
+                                    art_style,
+                                    hf_token
+                                )
+                                if new_image:
+                                    st.session_state.storyboard_images[i] = new_image
+                                    st.experimental_rerun()
+                    with col2:
+                        st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
+                        st.write(f"**Description:** {scene.get('description', 'N/A')}")
+                        st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
+    with tab3:
+        st.subheader("🎥 Video Preview")
+        if st.button("🎬 Create Video Preview"):
+            if st.session_state.storyboard_images:
+                with st.spinner("Creating video preview..."):
+                    video_path = create_video_preview(
+                        st.session_state.storyboard_images,
+                        script_data
+                    )
+                    if video_path:
+                        st.session_state.video_preview = video_path
+                        st.success("Video preview created!")
+        if st.session_state.video_preview and os.path.exists(st.session_state.video_preview):
+            st.video(st.session_state.video_preview)
+        # Export options
+        st.subheader("📥 Export Options")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            if st.button("📄 Download Script (JSON)"):
+                script_json = json.dumps(script_data, indent=2)
+                st.download_button(
+                    label="Download JSON",
+                    data=script_json,
+                    file_name="video_script.json",
+                    mime="application/json"
+                )
+        with col2:
+            if st.button("🖼️ Download Storyboard Images"):
+                if st.session_state.storyboard_images:
+                    # Create a zip file with all images
+                    import zipfile
+                    zip_buffer = io.BytesIO()
+                    with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
+                        for i, image in enumerate(st.session_state.storyboard_images):
+                            if image:
+                                img_buffer = io.BytesIO()
+                                image.save(img_buffer, format='PNG')
+                                zip_file.writestr(f"scene_{i+1}.png", img_buffer.getvalue())
+                    st.download_button(
+                        label="Download ZIP",
+                        data=zip_buffer.getvalue(),
+                        file_name="storyboard_images.zip",
+                        mime="application/zip"
+                    )
+        with col3:
+            if st.session_state.video_preview:
+                with open(st.session_state.video_preview, 'rb') as f:
+                    st.download_button(
+                        label="🎥 Download Video",
+                        data=f.read(),
+                        file_name="video_preview.mp4",
+                        mime="video/mp4"
+                    )
+# Footer
+st.markdown("---")
+st.markdown("🤖 **Powered by**: Gemini AI • Stable Diffusion • Hugging Face")
+st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
+# Sidebar info
+with st.sidebar:
+    st.markdown("---")
+    st.markdown("### 📚 How to Use")
+    st.markdown("""
+    1. **Set up APIs**: Add your Gemini and HuggingFace tokens
+    2. **Define Video**: Enter topic, length, and style
+    3. **Generate**: Click the generate button
+    4. **Refine**: Regenerate individual scenes if needed
+    5. **Export**: Download script, images, or video
+    """)
+    st.markdown("### 🔗 Get API Keys")
+    st.markdown("[Gemini API](https://makersuite.google.com/app/apikey)")
+    st.markdown("[Hugging Face Token](https://huggingface.co/settings/tokens)")