Spaces:

maria355
/

AI-Video-Script-and-Storyboard-Generator

Running

App Files Files Community

maria355 commited on Sep 11, 2025

Commit

b37f161

verified ·

1 Parent(s): bb04d06

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -38

app.py CHANGED Viewed

@@ -105,7 +105,8 @@ with col2:
 def generate_script_with_gemini(topic, length, style, tone, platform):
     """Generate video script using Gemini API"""
     try:
-        model = genai.GenerativeModel('gemini-pro')
         prompt = f"""
         Create a detailed video script for the following specifications:
@@ -134,6 +135,7 @@ def generate_script_with_gemini(topic, length, style, tone, platform):
         Make sure the scenes add up to the total duration and are engaging for {platform}.
         Include specific visual descriptions that can be used to generate storyboard images.
         """
         response = model.generate_content(prompt)
@@ -154,32 +156,61 @@ def generate_script_with_gemini(topic, length, style, tone, platform):
         return None
     except Exception as e:
         st.error(f"Error generating script: {str(e)}")
-        return None
 def generate_storyboard_image(scene_description, art_style):
-    """Generate storyboard image using Stable Diffusion"""
     try:
-        # Use Hugging Face Inference API for image generation
         client = InferenceClient(token=hf_token)
-        # Enhance prompt based on art style
         style_prompts = {
-            "Realistic": "photorealistic, high quality, detailed",
-            "Cartoon": "cartoon style, animated, colorful, Disney-like",
-            "Cinematic": "cinematic lighting, dramatic, film still, high contrast",
-            "Minimalistic": "minimalist, clean, simple, geometric",
-            "Sketch": "pencil sketch, hand-drawn, artistic, black and white",
-            "Digital Art": "digital art, concept art, detailed, vibrant colors"
         }
-        enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional"
-        image = client.text_to_image(
-            enhanced_prompt,
-            model="stabilityai/stable-diffusion-2-1"
-        )
-        return image
     except Exception as e:
         st.error(f"Error generating image: {str(e)}")
@@ -283,6 +314,42 @@ def text_to_speech(text, language='en'):
         st.error(f"Error generating speech: {str(e)}")
         return None
 # Main generation button
 if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
     if not video_topic:
@@ -291,6 +358,11 @@ if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
         with st.spinner("🤖 Generating script with AI..."):
             script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
         if script_data:
             st.session_state.generated_script = script_data
             st.success("✅ Script generated successfully!")
@@ -301,16 +373,22 @@ if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
                 progress_bar = st.progress(0)
                 for i, scene in enumerate(script_data['scenes']):
-                    image = generate_storyboard_image(
-                        scene['description'],
-                        art_style
-                    )
-                    images.append(image)
-                    progress_bar.progress((i + 1) / len(script_data['scenes']))
                 st.session_state.storyboard_images = images
             st.success("✅ Storyboard images generated!")
 # Display results
 if st.session_state.generated_script:
@@ -343,19 +421,20 @@ if st.session_state.generated_script:
                 # Add text-to-speech for dialogue
                 if scene.get('dialogue'):
                     if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
-                        audio_buffer = text_to_speech(scene['dialogue'])
-                        if audio_buffer:
-                            st.audio(audio_buffer.getvalue(), format='audio/mp3')
     with tab2:
         if st.session_state.storyboard_images:
             st.subheader("🎨 Storyboard Images")
             for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
-                if image:
-                    col1, col2 = st.columns([1, 2])
-                    with col1:
                         st.image(image, caption=f"Scene {i+1}", use_column_width=True)
                         # Refinement option
@@ -368,11 +447,24 @@ if st.session_state.generated_script:
                                 if new_image:
                                     st.session_state.storyboard_images[i] = new_image
                                     st.rerun()
-                    with col2:
-                        st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
-                        st.write(f"**Description:** {scene.get('description', 'N/A')}")
-                        st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
     with tab3:
         st.subheader("🎥 Video Preview")
@@ -475,7 +567,7 @@ if st.session_state.generated_script:
 # Footer
 st.markdown("---")
-st.markdown("🤖 **Powered by**: Gemini AI • Stable Diffusion • Hugging Face")
 st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
 # Sidebar info
@@ -493,13 +585,35 @@ with st.sidebar:
     st.markdown("### 🔧 Features")
     st.markdown("""
     - ✅ **AI Script Generation** with Gemini
-    - ✅ **Visual Storyboards** with Stable Diffusion
     - ✅ **Text-to-Speech** for narration
     - ✅ **Multiple Export Formats**
     - ✅ **Scene Regeneration**
     """)
     if not MOVIEPY_AVAILABLE:
         st.markdown("---")
         st.markdown("### ℹ️ Note")
-        st.markdown("Video preview feature disabled for faster deployment. GIF preview available!")

 def generate_script_with_gemini(topic, length, style, tone, platform):
     """Generate video script using Gemini API"""
     try:
+        # Use the current Gemini model
+        model = genai.GenerativeModel('gemini-1.5-flash')
         prompt = f"""
         Create a detailed video script for the following specifications:
         Make sure the scenes add up to the total duration and are engaging for {platform}.
         Include specific visual descriptions that can be used to generate storyboard images.
+        Make sure to return valid JSON only, no additional text or formatting.
         """
         response = model.generate_content(prompt)
         return None
     except Exception as e:
         st.error(f"Error generating script: {str(e)}")
+        # Try alternative model if the first one fails
+        try:
+            st.info("Trying alternative model...")
+            model = genai.GenerativeModel('gemini-1.5-pro')
+            response = model.generate_content(prompt)
+            response_text = response.text.strip()
+            if response_text.startswith("```json"):
+                response_text = response_text[7:-3]
+            elif response_text.startswith("```"):
+                response_text = response_text[3:-3]
+            script_data = json.loads(response_text)
+            return script_data
+        except:
+            return None
 def generate_storyboard_image(scene_description, art_style):
+    """Generate storyboard image using free Hugging Face models"""
     try:
+        # Initialize Hugging Face client
         client = InferenceClient(token=hf_token)
+        # Style-specific enhancements
         style_prompts = {
+            "Realistic": "photorealistic, high quality, detailed, professional photography",
+            "Cartoon": "cartoon style, animated, colorful, Disney-like, illustration",
+            "Cinematic": "cinematic lighting, dramatic, film still, high contrast, movie scene",
+            "Minimalistic": "minimalist, clean, simple, geometric, modern design",
+            "Sketch": "pencil sketch, hand-drawn, artistic, black and white line art",
+            "Digital Art": "digital art, concept art, detailed, vibrant colors, fantasy art"
         }
+        enhanced_prompt = f"{scene_description}, {style_prompts.get(art_style, '')}, storyboard frame, professional, high quality"
+        # Try multiple free models in case one fails
+        models_to_try = [
+            "black-forest-labs/FLUX.1-schnell",
+            "stabilityai/stable-diffusion-2-1",
+            "runwayml/stable-diffusion-v1-5",
+            "CompVis/stable-diffusion-v1-4"
+        ]
+        for model in models_to_try:
+            try:
+                image = client.text_to_image(
+                    enhanced_prompt,
+                    model=model
+                )
+                return image
+            except Exception as model_error:
+                st.warning(f"Model {model} failed, trying next...")
+                continue
+        # If all models fail, return None
+        st.error("All image generation models failed")
+        return None
     except Exception as e:
         st.error(f"Error generating image: {str(e)}")
         st.error(f"Error generating speech: {str(e)}")
         return None
+def generate_fallback_script(topic, length, style, tone, platform):
+    """Generate a fallback script if Gemini fails"""
+    try:
+        # Parse length to get number of seconds
+        if "second" in length:
+            total_seconds = int(length.split()[0])
+        elif "minute" in length:
+            minutes = int(length.split()[0])
+            total_seconds = minutes * 60
+        else:
+            total_seconds = 60
+        # Calculate number of scenes (roughly 10-15 seconds per scene)
+        num_scenes = max(2, total_seconds // 12)
+        scene_duration = total_seconds // num_scenes
+        scenes = []
+        for i in range(num_scenes):
+            scene = {
+                "scene_number": i + 1,
+                "duration": f"{scene_duration} seconds",
+                "description": f"Scene {i+1} showing {topic} in {style.lower()} style",
+                "dialogue": f"Narration for scene {i+1} about {topic}",
+                "camera_angle": "Medium shot" if i % 2 == 0 else "Close-up",
+                "visual_elements": f"Key visuals related to {topic}"
+            }
+            scenes.append(scene)
+        return {
+            "title": f"{topic} - {style} Video",
+            "total_duration": length,
+            "scenes": scenes
+        }
+    except:
+        return None
 # Main generation button
 if st.button("🚀 Generate Video Script & Storyboard", type="primary"):
     if not video_topic:
         with st.spinner("🤖 Generating script with AI..."):
             script_data = generate_script_with_gemini(video_topic, video_length, style, tone, platform)
+            # If Gemini fails, use fallback
+            if not script_data:
+                st.warning("Primary AI model failed, using fallback script generation...")
+                script_data = generate_fallback_script(video_topic, video_length, style, tone, platform)
         if script_data:
             st.session_state.generated_script = script_data
             st.success("✅ Script generated successfully!")
                 progress_bar = st.progress(0)
                 for i, scene in enumerate(script_data['scenes']):
+                    with st.spinner(f"Generating image {i+1}/{len(script_data['scenes'])}..."):
+                        image = generate_storyboard_image(
+                            scene['description'],
+                            art_style
+                        )
+                        images.append(image)
+                        progress_bar.progress((i + 1) / len(script_data['scenes']))
+                        # Add a small delay to avoid rate limiting
+                        time.sleep(1)
                 st.session_state.storyboard_images = images
             st.success("✅ Storyboard images generated!")
+        else:
+            st.error("Failed to generate script. Please try again or modify your request.")
 # Display results
 if st.session_state.generated_script:
                 # Add text-to-speech for dialogue
                 if scene.get('dialogue'):
                     if st.button(f"🔊 Play Audio - Scene {i}", key=f"audio_{i}"):
+                        with st.spinner("Generating audio..."):
+                            audio_buffer = text_to_speech(scene['dialogue'])
+                            if audio_buffer:
+                                st.audio(audio_buffer.getvalue(), format='audio/mp3')
     with tab2:
         if st.session_state.storyboard_images:
             st.subheader("🎨 Storyboard Images")
             for i, (scene, image) in enumerate(zip(script_data['scenes'], st.session_state.storyboard_images)):
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    if image:
                         st.image(image, caption=f"Scene {i+1}", use_column_width=True)
                         # Refinement option
                                 if new_image:
                                     st.session_state.storyboard_images[i] = new_image
                                     st.rerun()
+                    else:
+                        st.write("❌ Image generation failed for this scene")
+                        if st.button(f"🔄 Try Again - Scene {i+1}", key=f"retry_{i}"):
+                            with st.spinner(f"Trying to generate scene {i+1}..."):
+                                new_image = generate_storyboard_image(
+                                    scene['description'],
+                                    art_style
+                                )
+                                if new_image:
+                                    st.session_state.storyboard_images[i] = new_image
+                                    st.rerun()
+                with col2:
+                    st.write(f"**Scene {i+1}: {scene.get('duration', 'N/A')}**")
+                    st.write(f"**Description:** {scene.get('description', 'N/A')}")
+                    st.write(f"**Dialogue:** {scene.get('dialogue', 'N/A')}")
+        else:
+            st.info("No storyboard images generated yet. Click the generate button above.")
     with tab3:
         st.subheader("🎥 Video Preview")
 # Footer
 st.markdown("---")
+st.markdown("🤖 **Powered by**: Gemini AI • Free Hugging Face Models • gTTS")
 st.markdown("💡 **Tips**: Use detailed topic descriptions for better results. Experiment with different art styles!")
 # Sidebar info
     st.markdown("### 🔧 Features")
     st.markdown("""
     - ✅ **AI Script Generation** with Gemini
+    - ✅ **Visual Storyboards** with Free HF Models
     - ✅ **Text-to-Speech** for narration
     - ✅ **Multiple Export Formats**
     - ✅ **Scene Regeneration**
+    - ✅ **GIF Preview Creation**
+    """)
+    st.markdown("---")
+    st.markdown("### 🆓 Free Models Used")
+    st.markdown("""
+    - **Script**: Gemini 1.5 Flash/Pro
+    - **Images**: FLUX.1, Stable Diffusion
+    - **Speech**: Google TTS
     """)
     if not MOVIEPY_AVAILABLE:
         st.markdown("---")
         st.markdown("### ℹ️ Note")
+        st.markdown("Video preview feature disabled for faster deployment. GIF preview available!")
+# Additional configuration section
+st.sidebar.markdown("---")
+st.sidebar.markdown("### ⚙️ Configuration")
+st.sidebar.markdown("""
+**Required Environment Variables:**
+- `GEMINI_API_KEY`: Your Gemini API key
+- `HF_TOKEN`: Your Hugging Face token
+**Free API Limits:**
+- Gemini: 15 RPM, 32K TPM
+- Hugging Face: Rate limited per model
+""")