Spaces:

Munaf1987
/

replacebg

Sleeping

App Files Files Community

Munaf1987 commited on Jun 30, 2025

Commit

8cd6e88

verified ·

1 Parent(s): caaaf21

Update app.py

Browse files

Files changed (1) hide show

app.py +248 -180

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ import gc
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from diffusers import (
     StableDiffusionPipeline,
     StableVideoDiffusionPipeline,
     AnimateDiffPipeline,
     MotionAdapter,
@@ -41,51 +42,114 @@ class CartoonFilmGenerator:
         print("Loading open-source models...")
-        # 1. Text generation for script enhancement (Open source)
-        self.text_generator = pipeline(
-            "text-generation",
-            model="microsoft/DialoGPT-large",
-            tokenizer="microsoft/DialoGPT-large",
-            device=0 if self.device == "cuda" else -1,
-            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
-        )
-        # 2. Image generation - SDXL (fully open source)
-        self.image_generator = StableDiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            torch_dtype=torch.float16,
-            use_safetensors=True,
-            variant="fp16"
-        ).to(self.device)
-        # Enable memory efficient attention
-        self.image_generator.enable_memory_efficient_attention()
-        self.image_generator.enable_vae_slicing()
-        # 3. Video generation - AnimateDiff (open source)
-        adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
-        self.video_generator = AnimateDiffPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
-            motion_adapter=adapter,
-            torch_dtype=torch.float16
-        ).to(self.device)
-        self.video_generator.scheduler = DDIMScheduler.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
-            subfolder="scheduler",
-            clip_sample=False,
-            timestep_spacing="linspace",
-            beta_schedule="linear",
-            steps_offset=1,
-        )
-        self.video_generator.enable_vae_slicing()
-        self.video_generator.enable_memory_efficient_attention()
-        # 4. Text-to-Speech (Open source XTTS)
-        self.tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
         self.models_loaded = True
-        print("All open-source models loaded successfully!")
     def clear_gpu_memory(self):
         """Clear GPU memory between operations"""
@@ -96,39 +160,8 @@ class CartoonFilmGenerator:
     def enhance_script_with_llm(self, raw_script: str) -> Dict[str, Any]:
         """Use open-source LLM to enhance the script"""
-        # Structured prompt for script enhancement
-        enhancement_prompt = f"""
-        Original script: {raw_script}
-        Transform this into a detailed 8-minute cartoon film with:
-        - 12 scenes (40 seconds each)
-        - Consistent characters
-        - Clear scene descriptions
-        - Simple dialogue
-        - Visual descriptions for animation
-        Create a story structure with beginning, middle, and end.
-        """
-        try:
-            # Use the text generation pipeline
-            response = self.text_generator(
-                enhancement_prompt,
-                max_length=1000,
-                num_return_sequences=1,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=self.text_generator.tokenizer.eos_token_id
-            )
-            enhanced_script = response[0]['generated_text']
-        except Exception as e:
-            print(f"LLM enhancement failed: {e}")
-            enhanced_script = raw_script
-        # Create structured output (fallback method)
-        return self.create_structured_script(raw_script, enhanced_script)
     def create_structured_script(self, original: str, enhanced: str) -> Dict[str, Any]:
         """Create structured script data"""
@@ -154,18 +187,14 @@ class CartoonFilmGenerator:
         else:
             setting = "colorful fantasy world"
-        # Create 12 scenes for 8-minute film
         scenes = []
         scene_templates = [
             "Introduction of the main character",
-            "Character discovers the challenge",
             "Meeting helpful friends",
             "First obstacle appears",
             "Character shows determination",
-            "Meeting the antagonist",
-            "Major challenge or conflict",
-            "Character feels doubt",
-            "Friends provide support",
             "Final confrontation",
             "Resolution and victory",
             "Happy ending celebration"
@@ -177,11 +206,11 @@ class CartoonFilmGenerator:
                 "description": f"{template} in the {setting}",
                 "characters_present": [main_char] if i % 3 != 0 else [main_char, "supporting character"],
                 "dialogue": [
-                    {"character": main_char, "text": f"Scene {i+1} dialogue based on: {template}"}
                 ],
                 "background": f"{setting} with {['sunrise', 'daylight', 'sunset', 'moonlight'][i % 4]} lighting",
-                "mood": ["hopeful", "determined", "friendly", "tense", "brave", "worried", "dramatic", "uncertain", "supportive", "exciting", "triumphant", "joyful"][i],
-                "duration": "40"
             })
         return {
@@ -203,10 +232,14 @@ class CartoonFilmGenerator:
     @spaces.GPU
     def generate_character_images(self, characters: List[Dict]) -> Dict[str, str]:
-        """Generate character images using SDXL"""
         self.load_models()
         character_images = {}
         for character in characters:
             prompt = f"cartoon character sheet, {character['description']}, multiple poses, clean white background, 2D animation style, colorful, expressive, high quality"
             negative_prompt = "realistic, 3D, dark, scary, inappropriate, low quality, blurry"
@@ -215,21 +248,22 @@ class CartoonFilmGenerator:
                 image = self.image_generator(
                     prompt=prompt,
                     negative_prompt=negative_prompt,
-                    num_inference_steps=25,
                     guidance_scale=7.5,
-                    height=1024,
-                    width=1024
                 ).images[0]
                 char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
                 character_images[character['name']] = char_path
                 # Clear memory after each character
                 self.clear_gpu_memory()
             except Exception as e:
-                print(f"Error generating character {character['name']}: {e}")
         return character_images
@@ -239,6 +273,10 @@ class CartoonFilmGenerator:
         self.load_models()
         background_images = {}
         for scene in scenes:
             prompt = f"cartoon background, {scene['background']}, {scene['mood']} atmosphere, animated style, no characters, detailed environment, bright colors, 2D animation"
             negative_prompt = "characters, people, realistic, dark, scary, low quality"
@@ -247,100 +285,127 @@ class CartoonFilmGenerator:
                 image = self.image_generator(
                     prompt=prompt,
                     negative_prompt=negative_prompt,
-                    num_inference_steps=20,
                     guidance_scale=7.0,
-                    height=576,
-                    width=1024  # 16:9 aspect ratio
                 ).images[0]
                 bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
                 image.save(bg_path)
                 background_images[scene['scene_number']] = bg_path
                 # Clear memory after each background
                 self.clear_gpu_memory()
             except Exception as e:
-                print(f"Error generating background for scene {scene['scene_number']}: {e}")
         return background_images
     @spaces.GPU
     def generate_scene_videos(self, scenes: List[Dict], character_images: Dict, background_images: Dict) -> List[str]:
-        """Generate animated videos for each scene using AnimateDiff"""
         self.load_models()
         scene_videos = []
         for scene in scenes:
             try:
-                # Create prompt for scene animation
-                characters_text = ", ".join(scene['characters_present'])
-                prompt = f"cartoon animation, {characters_text} in {scene['background']}, {scene['mood']} mood, 2D animated style, smooth motion, family friendly"
-                negative_prompt = "realistic, 3D, static, blurry, low quality, scary"
-                # Generate animated video using AnimateDiff
-                video_frames = self.video_generator(
-                    prompt=prompt,
-                    negative_prompt=negative_prompt,
-                    num_frames=16,  # 16 frames for smooth motion
-                    guidance_scale=7.5,
-                    num_inference_steps=20,
-                    height=576,
-                    width=1024
-                ).frames[0]
-                # Save video
-                video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"
-                export_to_video(video_frames, video_path, fps=8)
-                scene_videos.append(video_path)
-                # Clear GPU memory
-                self.clear_gpu_memory()
             except Exception as e:
-                print(f"Error generating video for scene {scene['scene_number']}: {e}")
-                # Fallback: create static video
                 if scene['scene_number'] in background_images:
-                    video_path = self.create_static_video(
-                        Image.open(background_images[scene['scene_number']]),
-                        int(scene.get('duration', 40)),
-                        scene['scene_number']
-                    )
-                    scene_videos.append(video_path)
         return scene_videos
     def create_static_video(self, image: Image.Image, duration: int, scene_num: int) -> str:
-        """Fallback: Create video from static image"""
         video_path = f"{self.temp_dir}/scene_{scene_num}.mp4"
-        # Convert PIL to OpenCV
-        img_array = np.array(image.resize((1024, 576)))
-        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
-        # Create video writer
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        fps = 24
-        out = cv2.VideoWriter(video_path, fourcc, fps, (1024, 576))
-        # Add simple zoom effect
-        for i in range(duration * fps):
-            scale = 1.0 + (i / (duration * fps)) * 0.1  # Slight zoom
-            h, w = img_array.shape[:2]
-            center_x, center_y = w // 2, h // 2
-            # Create zoom matrix
-            M = cv2.getRotationMatrix2D((center_x, center_y), 0, scale)
-            zoomed = cv2.warpAffine(img_array, M, (w, h))
-            out.write(zoomed)
-        out.release()
-        return video_path
     @spaces.GPU
     def generate_audio(self, scenes: List[Dict]) -> str:
         """Generate audio using open-source XTTS"""
         self.load_models()
         try:
@@ -375,7 +440,7 @@ class CartoonFilmGenerator:
             return audio_path
         except Exception as e:
-            print(f"Audio generation failed: {e}")
             return None
     def merge_videos_with_ffmpeg(self, scene_videos: List[str], audio_path: str = None) -> str:
@@ -410,35 +475,36 @@ class CartoonFilmGenerator:
             result = subprocess.run(cmd, capture_output=True, text=True)
             if result.returncode == 0:
                 return final_video_path
             else:
-                print(f"FFmpeg error: {result.stderr}")
                 return None
         except Exception as e:
-            print(f"Video merging failed: {e}")
             return None
     @spaces.GPU
     def generate_cartoon_film(self, script: str, include_audio: bool = True) -> tuple:
         """Main function to generate complete cartoon film"""
         try:
-            progress_updates = []
             # Step 1: Enhance script
-            progress_updates.append("🎬 Processing and enhancing script...")
             processed_script = self.enhance_script_with_llm(script)
             # Step 2: Generate characters
-            progress_updates.append("👥 Creating character designs...")
             character_images = self.generate_character_images(processed_script['characters'])
             # Step 3: Generate backgrounds
-            progress_updates.append("🏞️ Generating scene backgrounds...")
             background_images = self.generate_background_images(processed_script['scenes'])
             # Step 4: Generate scene videos
-            progress_updates.append("🎥 Creating animated scenes...")
             scene_videos = self.generate_scene_videos(
                 processed_script['scenes'],
                 character_images,
@@ -448,20 +514,23 @@ class CartoonFilmGenerator:
             # Step 5: Generate audio
             audio_path = None
             if include_audio:
-                progress_updates.append("🎵 Generating audio and voices...")
                 audio_path = self.generate_audio(processed_script['scenes'])
             # Step 6: Merge final video
-            progress_updates.append("🎞️ Merging final cartoon film...")
             final_video = self.merge_videos_with_ffmpeg(scene_videos, audio_path)
             if final_video and os.path.exists(final_video):
                 return final_video, processed_script, "✅ Cartoon film generated successfully!"
             else:
-                return None, processed_script, "❌ Error in final video generation"
         except Exception as e:
-            # Return error information in proper JSON format
             error_info = {
                 "error": True,
                 "message": str(e),
@@ -503,16 +572,16 @@ with gr.Blocks(
     gr.Markdown("""
     # 🎬 AI Cartoon Film Generator (100% Open Source)
-    Transform your script into a complete 7-10 minute cartoon film using only open-source models!
     **🔥 Features:**
-    - **Stable Diffusion XL** for high-quality character & background generation
-    - **AnimateDiff** for smooth video animation
-    - **XTTS** for multilingual voice synthesis
-    - **All models run on ZeroGPU** - completely free!
     - **No API keys required** - everything is open source
-    **⚡ Optimized for Hugging Face ZeroGPU**
     """)
     with gr.Row():
@@ -527,7 +596,7 @@ with gr.Blocks(
             with gr.Row():
                 include_audio = gr.Checkbox(
                     label="🎵 Include AI-Generated Voices",
-                    value=True,
                     info="Generate speech for character dialogue"
                 )
@@ -538,9 +607,9 @@ with gr.Blocks(
             )
             gr.Markdown("""
-            **⏱️ Processing Time:** 10-15 minutes
-            **🎥 Output:** 7-10 minute MP4 film
-            **📱 All models:** 100% open source & free
             """)
         with gr.Column(scale=1):
@@ -551,12 +620,12 @@ with gr.Blocks(
             status_output = gr.Textbox(
                 label="📊 Status",
-                lines=2
             )
             script_details = gr.JSON(
                 label="📋 Generated Script Details",
-                visible=False
             )
     # Event handlers
@@ -570,10 +639,10 @@ with gr.Blocks(
     # Example scripts
     gr.Examples(
         examples=[
-            ["A brave young explorer discovers a magical forest where talking animals help her find a lost treasure that will save her village.", True],
-            ["Two best friends embark on a space adventure to help a friendly alien return home while learning about friendship and courage.", True],
-            ["A small robot in a big city learns about emotions and friendship when it meets a lonely child who needs a companion.", False],
-            ["A young artist discovers their drawings come to life and must help the characters solve problems in both the real and drawn worlds.", True]
         ],
         inputs=[script_input, include_audio],
         label="💡 Try these example scripts:"
@@ -581,14 +650,13 @@ with gr.Blocks(
     gr.Markdown("""
     ---
-    **🔧 Technical Details:**
-    - **Image Generation:** Stable Diffusion XL (open source)
-    - **Video Animation:** AnimateDiff (open source)
-    - **Voice Synthesis:** XTTS v2 (open source)
-    - **Script Enhancement:** DialoGPT (open source)
-    - **Infrastructure:** Hugging Face ZeroGPU (free)
-    **💝 Completely free and open source!** No API keys or subscriptions required.
     """)
 if __name__ == "__main__":

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from diffusers import (
     StableDiffusionPipeline,
+    StableDiffusionXLPipeline,
     StableVideoDiffusionPipeline,
     AnimateDiffPipeline,
     MotionAdapter,
         print("Loading open-source models...")
+        try:
+            # 1. Text generation for script enhancement (Open source)
+            self.text_generator = pipeline(
+                "text-generation",
+                model="microsoft/DialoGPT-large",
+                tokenizer="microsoft/DialoGPT-large",
+                device=0 if self.device == "cuda" else -1,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+            )
+            print("✅ Text generator loaded")
+        except Exception as e:
+            print(f"❌ Text generator failed: {e}")
+            self.text_generator = None
+        try:
+            # 2. Image generation - SDXL (fully open source)
+            self.image_generator = StableDiffusionXLPipeline.from_pretrained(
+                "stabilityai/stable-diffusion-xl-base-1.0",
+                torch_dtype=torch.float16,
+                use_safetensors=True,
+                variant="fp16"
+            ).to(self.device)
+            # Enable memory optimizations (updated methods)
+            self.image_generator.enable_vae_slicing()
+            self.image_generator.enable_vae_tiling()
+            if hasattr(self.image_generator, 'enable_memory_efficient_attention'):
+                self.image_generator.enable_memory_efficient_attention()
+            elif hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
+                try:
+                    self.image_generator.enable_xformers_memory_efficient_attention()
+                except:
+                    print("XFormers not available, using default attention")
+            print("✅ Image generator (SDXL) loaded")
+        except Exception as e:
+            print(f"❌ SDXL failed, trying SD 1.5: {e}")
+            try:
+                # Fallback to SD 1.5
+                self.image_generator = StableDiffusionPipeline.from_pretrained(
+                    "runwayml/stable-diffusion-v1-5",
+                    torch_dtype=torch.float16,
+                    use_safetensors=True
+                ).to(self.device)
+                # Enable memory optimizations for SD 1.5
+                self.image_generator.enable_vae_slicing()
+                if hasattr(self.image_generator, 'enable_vae_tiling'):
+                    self.image_generator.enable_vae_tiling()
+                if hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
+                    try:
+                        self.image_generator.enable_xformers_memory_efficient_attention()
+                    except:
+                        print("XFormers not available")
+                print("✅ Image generator (SD 1.5) loaded")
+            except Exception as e2:
+                print(f"❌ All image generators failed: {e2}")
+                self.image_generator = None
+        try:
+            # 3. Video generation - AnimateDiff (open source)
+            adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
+            self.video_generator = AnimateDiffPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                motion_adapter=adapter,
+                torch_dtype=torch.float16
+            ).to(self.device)
+            self.video_generator.scheduler = DDIMScheduler.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                subfolder="scheduler",
+                clip_sample=False,
+                timestep_spacing="linspace",
+                beta_schedule="linear",
+                steps_offset=1,
+            )
+            # Enable memory optimizations
+            self.video_generator.enable_vae_slicing()
+            if hasattr(self.video_generator, 'enable_vae_tiling'):
+                self.video_generator.enable_vae_tiling()
+            if hasattr(self.video_generator, 'enable_xformers_memory_efficient_attention'):
+                try:
+                    self.video_generator.enable_xformers_memory_efficient_attention()
+                except:
+                    print("XFormers not available for video generator")
+            print("✅ Video generator (AnimateDiff) loaded")
+        except Exception as e:
+            print(f"❌ Video generator failed: {e}")
+            self.video_generator = None
+        try:
+            # 4. Text-to-Speech (Open source XTTS)
+            self.tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
+            print("✅ TTS model loaded")
+        except Exception as e:
+            print(f"❌ TTS model failed: {e}")
+            self.tts_model = None
         self.models_loaded = True
+        print("🎬 Model loading completed!")
     def clear_gpu_memory(self):
         """Clear GPU memory between operations"""
     def enhance_script_with_llm(self, raw_script: str) -> Dict[str, Any]:
         """Use open-source LLM to enhance the script"""
+        # Always return structured script (fallback method)
+        return self.create_structured_script(raw_script, raw_script)
     def create_structured_script(self, original: str, enhanced: str) -> Dict[str, Any]:
         """Create structured script data"""
         else:
             setting = "colorful fantasy world"
+        # Create 8 scenes for shorter processing time
         scenes = []
         scene_templates = [
             "Introduction of the main character",
+            "Character discovers the challenge",
             "Meeting helpful friends",
             "First obstacle appears",
             "Character shows determination",
             "Final confrontation",
             "Resolution and victory",
             "Happy ending celebration"
                 "description": f"{template} in the {setting}",
                 "characters_present": [main_char] if i % 3 != 0 else [main_char, "supporting character"],
                 "dialogue": [
+                    {"character": main_char, "text": f"This is scene {i+1} where {template.lower()}."}
                 ],
                 "background": f"{setting} with {['sunrise', 'daylight', 'sunset', 'moonlight'][i % 4]} lighting",
+                "mood": ["hopeful", "determined", "friendly", "tense", "brave", "exciting", "triumphant", "joyful"][i],
+                "duration": "30"
             })
         return {
     @spaces.GPU
     def generate_character_images(self, characters: List[Dict]) -> Dict[str, str]:
+        """Generate character images using available image generator"""
         self.load_models()
         character_images = {}
+        if not self.image_generator:
+            print("❌ No image generator available")
+            return character_images
         for character in characters:
             prompt = f"cartoon character sheet, {character['description']}, multiple poses, clean white background, 2D animation style, colorful, expressive, high quality"
             negative_prompt = "realistic, 3D, dark, scary, inappropriate, low quality, blurry"
                 image = self.image_generator(
                     prompt=prompt,
                     negative_prompt=negative_prompt,
+                    num_inference_steps=20,  # Reduced for speed
                     guidance_scale=7.5,
+                    height=768,  # Smaller for memory efficiency
+                    width=768
                 ).images[0]
                 char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
                 character_images[character['name']] = char_path
+                print(f"✅ Generated character: {character['name']}")
                 # Clear memory after each character
                 self.clear_gpu_memory()
             except Exception as e:
+                print(f"❌ Error generating character {character['name']}: {e}")
         return character_images
         self.load_models()
         background_images = {}
+        if not self.image_generator:
+            print("❌ No image generator available")
+            return background_images
         for scene in scenes:
             prompt = f"cartoon background, {scene['background']}, {scene['mood']} atmosphere, animated style, no characters, detailed environment, bright colors, 2D animation"
             negative_prompt = "characters, people, realistic, dark, scary, low quality"
                 image = self.image_generator(
                     prompt=prompt,
                     negative_prompt=negative_prompt,
+                    num_inference_steps=15,  # Reduced for speed
                     guidance_scale=7.0,
+                    height=512,  # 16:9 aspect ratio
+                    width=768
                 ).images[0]
                 bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
                 image.save(bg_path)
                 background_images[scene['scene_number']] = bg_path
+                print(f"✅ Generated background for scene {scene['scene_number']}")
                 # Clear memory after each background
                 self.clear_gpu_memory()
             except Exception as e:
+                print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
         return background_images
     @spaces.GPU
     def generate_scene_videos(self, scenes: List[Dict], character_images: Dict, background_images: Dict) -> List[str]:
+        """Generate animated videos for each scene"""
         self.load_models()
         scene_videos = []
         for scene in scenes:
             try:
+                if self.video_generator:
+                    # Create prompt for scene animation
+                    characters_text = ", ".join(scene['characters_present'])
+                    prompt = f"cartoon animation, {characters_text} in {scene['background']}, {scene['mood']} mood, 2D animated style, smooth motion, family friendly"
+                    negative_prompt = "realistic, 3D, static, blurry, low quality, scary"
+                    # Generate animated video using AnimateDiff
+                    video_frames = self.video_generator(
+                        prompt=prompt,
+                        negative_prompt=negative_prompt,
+                        num_frames=12,  # Reduced frames for speed
+                        guidance_scale=7.5,
+                        num_inference_steps=15,  # Reduced steps
+                        height=512,
+                        width=768
+                    ).frames[0]
+                    # Save video
+                    video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"
+                    export_to_video(video_frames, video_path, fps=6)
+                    scene_videos.append(video_path)
+                    print(f"✅ Generated video for scene {scene['scene_number']}")
+                    # Clear GPU memory
+                    self.clear_gpu_memory()
+                else:
+                    # Fallback: create static video
+                    if scene['scene_number'] in background_images:
+                        video_path = self.create_static_video(
+                            Image.open(background_images[scene['scene_number']]),
+                            int(scene.get('duration', 30)),
+                            scene['scene_number']
+                        )
+                        scene_videos.append(video_path)
+                        print(f"✅ Created static video for scene {scene['scene_number']}")
             except Exception as e:
+                print(f"❌ Error generating video for scene {scene['scene_number']}: {e}")
+                # Create fallback static video
                 if scene['scene_number'] in background_images:
+                    try:
+                        video_path = self.create_static_video(
+                            Image.open(background_images[scene['scene_number']]),
+                            int(scene.get('duration', 30)),
+                            scene['scene_number']
+                        )
+                        scene_videos.append(video_path)
+                        print(f"✅ Created fallback video for scene {scene['scene_number']}")
+                    except Exception as e2:
+                        print(f"❌ Fallback video creation failed: {e2}")
         return scene_videos
     def create_static_video(self, image: Image.Image, duration: int, scene_num: int) -> str:
+        """Create video from static image with simple effects"""
         video_path = f"{self.temp_dir}/scene_{scene_num}.mp4"
+        try:
+            # Convert PIL to OpenCV
+            img_array = np.array(image.resize((768, 512)))
+            img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            fps = 24
+            out = cv2.VideoWriter(video_path, fourcc, fps, (768, 512))
+            # Add simple zoom effect
+            for i in range(duration * fps):
+                scale = 1.0 + (i / (duration * fps)) * 0.05  # Slight zoom
+                h, w = img_array.shape[:2]
+                center_x, center_y = w // 2, h // 2
+                # Create zoom matrix
+                M = cv2.getRotationMatrix2D((center_x, center_y), 0, scale)
+                zoomed = cv2.warpAffine(img_array, M, (w, h))
+                out.write(zoomed)
+            out.release()
+            return video_path
+        except Exception as e:
+            print(f"❌ Static video creation failed: {e}")
+            return None
     @spaces.GPU
     def generate_audio(self, scenes: List[Dict]) -> str:
         """Generate audio using open-source XTTS"""
+        if not self.tts_model:
+            print("❌ No TTS model available")
+            return None
         self.load_models()
         try:
             return audio_path
         except Exception as e:
+            print(f"❌ Audio generation failed: {e}")
             return None
     def merge_videos_with_ffmpeg(self, scene_videos: List[str], audio_path: str = None) -> str:
             result = subprocess.run(cmd, capture_output=True, text=True)
             if result.returncode == 0:
+                print("✅ Video merging successful")
                 return final_video_path
             else:
+                print(f"❌ FFmpeg error: {result.stderr}")
                 return None
         except Exception as e:
+            print(f"❌ Video merging failed: {e}")
             return None
     @spaces.GPU
     def generate_cartoon_film(self, script: str, include_audio: bool = True) -> tuple:
         """Main function to generate complete cartoon film"""
         try:
+            print("🎬 Starting cartoon film generation...")
             # Step 1: Enhance script
+            print("📝 Processing script...")
             processed_script = self.enhance_script_with_llm(script)
             # Step 2: Generate characters
+            print("👥 Creating characters...")
             character_images = self.generate_character_images(processed_script['characters'])
             # Step 3: Generate backgrounds
+            print("🏞️ Creating backgrounds...")
             background_images = self.generate_background_images(processed_script['scenes'])
             # Step 4: Generate scene videos
+            print("🎥 Creating videos...")
             scene_videos = self.generate_scene_videos(
                 processed_script['scenes'],
                 character_images,
             # Step 5: Generate audio
             audio_path = None
             if include_audio:
+                print("🎵 Creating audio...")
                 audio_path = self.generate_audio(processed_script['scenes'])
             # Step 6: Merge final video
+            print("🎞️ Finalizing film...")
             final_video = self.merge_videos_with_ffmpeg(scene_videos, audio_path)
             if final_video and os.path.exists(final_video):
+                print("✅ Film generation complete!")
                 return final_video, processed_script, "✅ Cartoon film generated successfully!"
             else:
+                print("⚠️ Partial success - some steps may have failed")
+                return None, processed_script, "⚠️ Partial generation - check individual steps"
         except Exception as e:
+            print(f"❌ Generation failed: {e}")
+            # Return error information in proper format
             error_info = {
                 "error": True,
                 "message": str(e),
     gr.Markdown("""
     # 🎬 AI Cartoon Film Generator (100% Open Source)
+    Transform your script into a complete cartoon film using only open-source models!
     **🔥 Features:**
+    - **Stable Diffusion XL/1.5** for character & background generation
+    - **AnimateDiff** for video animation
+    - **XTTS** for voice synthesis
+    - **ZeroGPU optimized** - completely free!
     - **No API keys required** - everything is open source
+    **⚡ Fixed compatibility issues and memory optimization**
     """)
     with gr.Row():
             with gr.Row():
                 include_audio = gr.Checkbox(
                     label="🎵 Include AI-Generated Voices",
+                    value=False,  # Default to False for faster testing
                     info="Generate speech for character dialogue"
                 )
             )
             gr.Markdown("""
+            **⏱️ Processing Time:** 5-10 minutes
+            **🎥 Output:** 4-5 minute MP4 film
+            **📱 Models:** SDXL + AnimateDiff + XTTS
             """)
         with gr.Column(scale=1):
             status_output = gr.Textbox(
                 label="📊 Status",
+                lines=3
             )
             script_details = gr.JSON(
                 label="📋 Generated Script Details",
+                visible=True
             )
     # Event handlers
     # Example scripts
     gr.Examples(
         examples=[
+            ["A brave young explorer discovers a magical forest where talking animals help her find a lost treasure.", False],
+            ["Two best friends go on a space adventure to help a friendly alien return home.", False],
+            ["A small robot learns about emotions when it meets a lonely child in the city.", False],
+            ["A young artist discovers their drawings come to life and must help solve problems.", False]
         ],
         inputs=[script_input, include_audio],
         label="💡 Try these example scripts:"
     gr.Markdown("""
     ---
+    **🔧 Fixed Issues:**
+    - ✅ Memory optimization methods updated for latest diffusers
+    - ✅ Fallback models for compatibility
+    - ✅ Better error handling and logging
+    - ✅ Reduced parameters for ZeroGPU efficiency
+    **💝 Completely free and open source!** No API keys required.
     """)
 if __name__ == "__main__":