Spaces:

Munaf1987
/

replacebg

Sleeping

App Files Files Community

Munaf1987 commited on Jul 2, 2025

Commit

81cccef

verified ·

1 Parent(s): b2bf6ea

Update app.py

Browse files

Files changed (1) hide show

app.py +542 -284

app.py CHANGED Viewed

@@ -65,132 +65,99 @@ class ProfessionalCartoonFilmGenerator:
     @spaces.GPU
     def load_models(self):
-        """Load state-of-the-art models for professional quality"""
-        if self.models_loaded:
-            return
-        print("🚀 Loading professional-grade models...")
         try:
-            # 1. Try FLUX pipeline first (if user has authentication)
-            print("🎨 Loading FLUX pipeline...")
             try:
                 self.flux_pipe = FluxPipeline.from_pretrained(
                     "black-forest-labs/FLUX.1-dev",
-                    torch_dtype=torch.bfloat16,
-                    variant="fp16",
-                    use_safetensors=True
-                ).to(self.device)
-                print("✅ FLUX pipeline loaded successfully!")
-                self.using_flux = True
-            except Exception as flux_error:
-                if "401" in str(flux_error) or "authentication" in str(flux_error).lower():
-                    print("🔐 FLUX authentication failed - model requires Hugging Face token")
-                    print("💡 To use FLUX, you need to:")
-                    print("   1. Get a Hugging Face token from https://huggingface.co/settings/tokens")
-                    print("   2. Accept the FLUX model license at https://huggingface.co/black-forest-labs/FLUX.1-dev")
-                    print("   3. Set your token: huggingface-cli login")
-                    print("🔄 Falling back to Stable Diffusion...")
-                    self.using_flux = False
-                else:
-                    print(f"❌ FLUX loading failed: {flux_error}")
-                    self.using_flux = False
-        except Exception as e:
-            print(f"❌ FLUX pipeline failed: {e}")
-            self.using_flux = False
-        # Load cartoon/anime LoRA for character generation (only if FLUX is available)
-        if self.using_flux:
-            print("🎭 Loading cartoon LoRA models...")
-            try:
-                # Load multiple LoRA models for different purposes
-                self.cartoon_lora = hf_hub_download(
-                    "prithivMLmods/Canopus-LoRA-Flux-Anime",
-                    "Canopus-LoRA-Flux-Anime.safetensors"
-                )
-                self.character_lora = hf_hub_download(
-                    "enhanceaiteam/Anime-Flux",
-                    "anime-flux.safetensors"
-                )
-                self.sketch_lora = hf_hub_download(
-                    "Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch",
-                    "FLUX-dev-lora-children-simple-sketch.safetensors"
                 )
-                print("✅ LoRA models loaded successfully")
-            except Exception as e:
-                print(f"⚠️ Some LoRA models failed to load: {e}")
-            # Enable memory optimizations for FLUX
-            if self.flux_pipe:
-                self.flux_pipe.enable_vae_slicing()
-                self.flux_pipe.enable_vae_tiling()
-                # Enable flash attention if available
-                if FLASH_ATTN_AVAILABLE:
-                    try:
-                        self.flux_pipe.enable_xformers_memory_efficient_attention()
-                        print("✅ Flash attention enabled for better performance")
-                    except Exception as e:
-                        print(f"⚠️ Flash attention failed: {e}")
-                else:
-                    print("ℹ️ Using standard attention (flash attention not available)")
-        # Load Stable Diffusion fallback if FLUX is not available
-        if not self.using_flux:
-            try:
-                from diffusers import StableDiffusionPipeline
                 print("🔄 Loading Stable Diffusion fallback model...")
-                # Try a more accessible model first
-                try:
-                    self.flux_pipe = StableDiffusionPipeline.from_pretrained(
-                        "CompVis/stable-diffusion-v1-4",
-                        torch_dtype=torch.float16,
-                        use_safetensors=True,
-                        safety_checker=None,
-                        requires_safety_checker=False
-                    ).to(self.device)
-                    print("✅ Loaded Stable Diffusion v1.4")
-                except Exception as sd_error:
-                    print(f"⚠️ SD v1.4 failed: {sd_error}")
-                    # Try the original model
-                    self.flux_pipe = StableDiffusionPipeline.from_pretrained(
-                        "runwayml/stable-diffusion-v1-5",
-                        torch_dtype=torch.float16,
-                        use_safetensors=True,
-                        safety_checker=None,
-                        requires_safety_checker=False
-                    ).to(self.device)
-                    print("✅ Loaded Stable Diffusion v1.5")
-                # Enable memory optimizations
-                self.flux_pipe.enable_vae_slicing()
-                if hasattr(self.flux_pipe, 'enable_vae_tiling'):
-                    self.flux_pipe.enable_vae_tiling()
                 print("✅ Stable Diffusion fallback loaded successfully")
-            except Exception as e2:
-                print(f"❌ Stable Diffusion fallback also failed: {e2}")
-                self.flux_pipe = None
-        try:
-            # 2. Advanced script generation model
             print("📝 Loading script enhancement model...")
-            self.script_enhancer = pipeline(
-                "text-generation",
-                model="microsoft/DialoGPT-large",
-                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
-                device=0 if self.device == "cuda" else -1
             )
             print("✅ Script enhancer loaded")
         except Exception as e:
-            print(f"❌ Script enhancer failed: {e}")
-            self.script_enhancer = None
-        self.models_loaded = True
-        print("🎬 All professional models loaded!")
     def clear_gpu_memory(self):
         """Clear GPU memory between operations"""
@@ -482,203 +449,216 @@ class ProfessionalCartoonFilmGenerator:
     @spaces.GPU
     def generate_professional_character_images(self, characters: List[Dict]) -> Dict[str, str]:
-        """Generate high-quality character images using FLUX + LoRA"""
-        self.load_models()
         character_images = {}
-        if not self.flux_pipe:
             print("❌ No image generation pipeline available")
             return character_images
         for character in characters:
             try:
-                print(f"🎭 Generating professional character: {character['name']}")
-                # Load appropriate LoRA based on character type (only for FLUX)
-                if hasattr(self.flux_pipe, 'load_lora_weights') and "anime" in character.get("animation_style", "").lower():
-                    if hasattr(self, 'cartoon_lora'):
-                        try:
-                            self.flux_pipe.load_lora_weights(self.cartoon_lora)
-                        except Exception as e:
-                            print(f"⚠️ LoRA loading failed: {e}")
-                # Professional character prompt (optimized for CLIP token limit)
-                character_desc = character['description'][:100]  # Limit description length
-                animation_style = character.get('animation_style', 'high-quality character design')[:50]
-                prompt = f"anime style, professional cartoon character, {character_desc}, character sheet, clean background, 2D animation, Disney quality, detailed, {animation_style}"
-                # Use the optimization function to ensure CLIP compatibility
-                prompt = self.optimize_prompt_for_clip(prompt)
-                negative_prompt = """
-                realistic, 3D render, dark, scary, inappropriate, low quality, blurry,
-                inconsistent, amateur, simple, crude, manga, sketch
-                """
-                # Handle different pipeline types with CLIP token error handling
-                try:
-                    if hasattr(self.flux_pipe, 'max_sequence_length'):
-                        # FLUX pipeline
-                        image = self.flux_pipe(
-                            prompt=prompt,
-                            negative_prompt=negative_prompt,
-                            num_inference_steps=25,  # High quality steps
-                            guidance_scale=3.5,
-                            height=1024,  # High resolution
-                            width=1024,
-                            max_sequence_length=256
-                        ).images[0]
-                    else:
-                        # Stable Diffusion pipeline
-                        image = self.flux_pipe(
-                            prompt=prompt,
-                            negative_prompt=negative_prompt,
-                            num_inference_steps=25,  # High quality steps
-                            guidance_scale=7.5,
-                            height=1024,  # High resolution
-                            width=1024
-                        ).images[0]
-                except Exception as e:
-                    if "CLIP" in str(e) and "token" in str(e).lower():
-                        print(f"⚠️ CLIP token error detected, using simplified prompt...")
-                        # Fallback to very simple prompt
-                        simple_prompt = f"anime character, {character['name']}, clean background"
-                        simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=30)
-                        if hasattr(self.flux_pipe, 'max_sequence_length'):
-                            image = self.flux_pipe(
-                                prompt=simple_prompt,
-                                negative_prompt="low quality, blurry",
-                                num_inference_steps=20,
-                                guidance_scale=3.0,
-                                height=1024,
-                                width=1024,
-                                max_sequence_length=128
-                            ).images[0]
-                        else:
-                            image = self.flux_pipe(
-                                prompt=simple_prompt,
-                                negative_prompt="low quality, blurry",
-                                num_inference_steps=20,
-                                guidance_scale=7.0,
-                                height=1024,
-                                width=1024
-                            ).images[0]
-                    else:
-                        raise e
                 char_path = f"{self.output_dir}/char_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
-                character_images[character['name']] = char_path
-                # Create download URL for character
-                download_info = self.create_download_url(char_path, f"character_{character['name']}")
-                print(f"✅ Generated high-quality character: {character['name']}")
-                print(download_info)
-                self.clear_gpu_memory()
             except Exception as e:
-                print(f"❌ Error generating character {character['name']}: {e}")
         return character_images
     @spaces.GPU
     def generate_cinematic_backgrounds(self, scenes: List[Dict], color_palette: str) -> Dict[int, str]:
-        """Generate cinematic background images for each scene"""
-        self.load_models()
         background_images = {}
-        if not self.flux_pipe:
             print("❌ No image generation pipeline available")
             return background_images
         for scene in scenes:
             try:
-                print(f"🏞️ Creating cinematic background for scene {scene['scene_number']}")
-                # Professional background prompt (optimized for CLIP token limit)
-                background_desc = scene['background'][:80]  # Limit background description
-                mood = scene['mood'][:30]
-                shot_type = scene.get('shot_type', 'medium shot')[:20]
-                animation_notes = scene.get('animation_notes', 'professional background art')[:40]
-                prompt = f"Professional cartoon background, {background_desc}, {mood} atmosphere, {color_palette} colors, {shot_type}, no characters, detailed environment, Disney quality, {animation_notes}"
-                # Use the optimization function to ensure CLIP compatibility
-                prompt = self.optimize_prompt_for_clip(prompt)
-                negative_prompt = """
-                characters, people, animals, realistic, dark, scary, low quality,
-                blurry, simple, amateur, 3D render
-                """
-                # Handle different pipeline types for backgrounds with CLIP token error handling
-                try:
-                    if hasattr(self.flux_pipe, 'max_sequence_length'):
-                        # FLUX pipeline
-                        image = self.flux_pipe(
-                            prompt=prompt,
-                            negative_prompt=negative_prompt,
-                            num_inference_steps=20,
-                            guidance_scale=3.0,
-                            height=768,   # 4:3 aspect ratio for traditional animation
-                            width=1024,
-                            max_sequence_length=256
-                        ).images[0]
-                    else:
-                        # Stable Diffusion pipeline
-                        image = self.flux_pipe(
-                            prompt=prompt,
-                            negative_prompt=negative_prompt,
-                            num_inference_steps=20,
-                            guidance_scale=7.0,
-                            height=768,   # 4:3 aspect ratio for traditional animation
-                            width=1024
-                        ).images[0]
-                except Exception as e:
-                    if "CLIP" in str(e) and "token" in str(e).lower():
-                        print(f"⚠️ CLIP token error detected for background, using simplified prompt...")
-                        # Fallback to very simple prompt
-                        simple_prompt = f"cartoon background, {scene['background'][:40]}, clean"
-                        simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=25)
-                        if hasattr(self.flux_pipe, 'max_sequence_length'):
-                            image = self.flux_pipe(
-                                prompt=simple_prompt,
-                                negative_prompt="characters, low quality",
-                                num_inference_steps=15,
-                                guidance_scale=3.0,
-                                height=768,
-                                width=1024,
-                                max_sequence_length=128
-                            ).images[0]
-                        else:
-                            image = self.flux_pipe(
-                                prompt=simple_prompt,
-                                negative_prompt="characters, low quality",
-                                num_inference_steps=15,
-                                guidance_scale=7.0,
-                                height=768,
-                                width=1024
-                            ).images[0]
-                    else:
-                        raise e
-                bg_path = f"{self.output_dir}/bg_scene_{scene['scene_number']}.png"
                 image.save(bg_path)
-                background_images[scene['scene_number']] = bg_path
-                # Create download URL for background
-                download_info = self.create_download_url(bg_path, f"background_scene_{scene['scene_number']}")
-                print(f"✅ Created cinematic background for scene {scene['scene_number']}")
-                print(download_info)
-                self.clear_gpu_memory()
             except Exception as e:
                 print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
         return background_images
@@ -687,6 +667,13 @@ class ProfessionalCartoonFilmGenerator:
         try:
             print("🎬 Setting up Open-Sora 2.0 for video generation...")
             # Check if we're already in the right directory
             current_dir = os.getcwd()
             opensora_dir = os.path.join(current_dir, "Open-Sora")
@@ -694,33 +681,97 @@ class ProfessionalCartoonFilmGenerator:
             # Clone Open-Sora repository if it doesn't exist
             if not os.path.exists(opensora_dir):
                 print("📥 Cloning Open-Sora repository...")
-                subprocess.run([
-                    "git", "clone", "https://github.com/hpcaitech/Open-Sora.git"
-                ], check=True, capture_output=True)
             # Check if the repository was cloned successfully
             if not os.path.exists(opensora_dir):
                 print("❌ Failed to clone Open-Sora repository")
                 return False
             # Check if model weights exist
             ckpts_dir = os.path.join(opensora_dir, "ckpts")
             if not os.path.exists(ckpts_dir):
                 print("📥 Downloading Open-Sora 2.0 model...")
                 try:
-                    subprocess.run([
                         "huggingface-cli", "download", "hpcai-tech/Open-Sora-v2",
                         "--local-dir", ckpts_dir
-                    ], check=True, capture_output=True)
-                except Exception as e:
-                    print(f"❌ Model download failed: {e}")
                     return False
             print("✅ Open-Sora setup completed")
             return True
         except Exception as e:
             print(f"❌ Open-Sora setup failed: {e}")
             return False
     @spaces.GPU
@@ -746,17 +797,23 @@ class ProfessionalCartoonFilmGenerator:
                     if video_path:
                         print(f"✅ Open-Sora video generated for scene {scene_num}")
                     else:
-                        print(f"❌ Open-Sora failed for scene {scene_num}, trying fallback...")
-                        video_path = self._create_professional_static_video(scene, background_images)
                     # If professional video fails, try simple video
                     if not video_path:
-                        print(f"🔄 Professional video failed, trying simple video for scene {scene_num}...")
                         video_path = self._create_simple_static_video(scene, background_images)
                 else:
-                    print(f"🎬 Using static video fallback for scene {scene_num}...")
-                    # Fallback to enhanced static video
-                    video_path = self._create_professional_static_video(scene, background_images)
                 if video_path and os.path.exists(video_path):
                     scene_videos.append(video_path)
@@ -804,6 +861,7 @@ class ProfessionalCartoonFilmGenerator:
             # Use the optimization function to ensure CLIP compatibility
             prompt = self.optimize_prompt_for_clip(prompt)
             video_path = f"{self.output_dir}/video_scene_{scene['scene_number']}.mp4"
@@ -815,6 +873,18 @@ class ProfessionalCartoonFilmGenerator:
                 print("❌ Open-Sora directory not found")
                 return None
             # Run Open-Sora inference
             cmd = [
                 "torchrun", "--nproc_per_node", "1", "--standalone",
@@ -827,7 +897,14 @@ class ProfessionalCartoonFilmGenerator:
                 "--motion-score", "6"  # High motion for dynamic scenes
             ]
-            result = subprocess.run(cmd, capture_output=True, text=True, cwd=opensora_dir)
             if result.returncode == 0:
                 # Find generated video file
@@ -835,12 +912,22 @@ class ProfessionalCartoonFilmGenerator:
                     if file.endswith('.mp4') and 'scene' not in file:
                         src_path = os.path.join(self.output_dir, file)
                         os.rename(src_path, video_path)
                         return video_path
             return None
         except Exception as e:
             print(f"❌ Open-Sora generation failed: {e}")
             return None
     def _create_professional_static_video(self, scene: Dict, background_images: Dict) -> str:
@@ -1225,6 +1312,177 @@ class ProfessionalCartoonFilmGenerator:
             }
             return None, error_info, f"❌ Generation failed: {str(e)}", [], []
 # Initialize professional generator
 generator = ProfessionalCartoonFilmGenerator()

     @spaces.GPU
     def load_models(self):
+        """Load all required AI models for professional generation"""
         try:
+            print("🚀 Loading professional-grade models...")
+            # Clear GPU memory first
+            self.clear_gpu_memory()
+            # Detect device and set appropriate dtype
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.dtype = torch.float16 if self.device == "cuda" else torch.float32
+            print(f"🎮 Using device: {self.device} with dtype: {self.dtype}")
+            # Try to load FLUX first
             try:
+                print("🎨 Loading FLUX pipeline...")
+                from diffusers import FluxPipeline
                 self.flux_pipe = FluxPipeline.from_pretrained(
                     "black-forest-labs/FLUX.1-dev",
+                    torch_dtype=self.dtype,
+                    device_map="auto" if self.device == "cuda" else None
                 )
+                if self.device == "cuda":
+                    self.flux_pipe = self.flux_pipe.to("cuda")
+                print("✅ FLUX pipeline loaded successfully")
+                self.flux_available = True
+            except Exception as e:
+                print("🔐 FLUX authentication failed - model requires Hugging Face token")
+                print("💡 To use FLUX, you need to:")
+                print("   1. Get a Hugging Face token from https://huggingface.co/settings/tokens")
+                print("   2. Accept the FLUX model license at https://huggingface.co/black-forest-labs/FLUX.1-dev")
+                print("   3. Set your token: huggingface-cli login")
+                print("🔄 Falling back to Stable Diffusion...")
+                self.flux_available = False
+            # Load Stable Diffusion fallback
+            if not self.flux_available:
                 print("🔄 Loading Stable Diffusion fallback model...")
+                from diffusers import StableDiffusionPipeline, DDIMScheduler
+                self.sd_pipe = StableDiffusionPipeline.from_pretrained(
+                    "CompVis/stable-diffusion-v1-4",
+                    torch_dtype=self.dtype,
+                    safety_checker=None,
+                    requires_safety_checker=False
+                )
+                # Configure scheduler for better quality
+                self.sd_pipe.scheduler = DDIMScheduler.from_config(self.sd_pipe.scheduler.config)
+                if self.device == "cuda":
+                    self.sd_pipe = self.sd_pipe.to("cuda")
+                print("✅ Loaded Stable Diffusion v1.4")
                 print("✅ Stable Diffusion fallback loaded successfully")
+            # Load script enhancement model with correct device
             print("📝 Loading script enhancement model...")
+            self.script_model = AutoModelForCausalLM.from_pretrained(
+                "microsoft/DialoGPT-medium",
+                torch_dtype=self.dtype,
+                device_map="auto" if self.device == "cuda" else None
             )
+            self.script_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
+            if self.script_tokenizer.pad_token is None:
+                self.script_tokenizer.pad_token = self.script_tokenizer.eos_token
+            if self.device == "cuda":
+                self.script_model = self.script_model.to("cuda")
+            print(f"Device set to use {self.device}")
             print("✅ Script enhancer loaded")
+            # Set model states
+            if self.device == "cuda":
+                if self.flux_available:
+                    self.flux_pipe.enable_model_cpu_offload()
+                else:
+                    self.sd_pipe.enable_model_cpu_offload()
+            print("🎬 All professional models loaded!")
+            return True
         except Exception as e:
+            print(f"❌ Model loading failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
     def clear_gpu_memory(self):
         """Clear GPU memory between operations"""
     @spaces.GPU
     def generate_professional_character_images(self, characters: List[Dict]) -> Dict[str, str]:
+        """Generate professional character images with consistency"""
         character_images = {}
+        print(f"🎭 Generating {len(characters)} professional character designs...")
+        # Check if we have any image generation pipeline available
+        if not hasattr(self, 'flux_available'):
+            print("❌ No image generation models loaded")
+            return character_images
+        pipeline = None
+        if self.flux_available and hasattr(self, 'flux_pipe'):
+            pipeline = self.flux_pipe
+            model_name = "FLUX"
+        elif hasattr(self, 'sd_pipe'):
+            pipeline = self.sd_pipe
+            model_name = "Stable Diffusion"
+        else:
             print("❌ No image generation pipeline available")
             return character_images
+        print(f"🎨 Using {model_name} for character generation")
         for character in characters:
+            character_name = character['name']
+            print(f"\n🎨 Generating character: {character_name}")
             try:
+                # Build comprehensive character prompt
+                base_prompt = f"Professional cartoon character design, {character['name']}, {character['description']}"
+                # Add style and quality modifiers
+                if self.flux_available:
+                    # FLUX-specific prompt
+                    prompt = f"{base_prompt}, Disney-Pixar animation style, highly detailed character sheet, clean white background, 2D animation model sheet, expressive face, vibrant colors, professional character design, perfect for animation"
+                else:
+                    # Stable Diffusion prompt
+                    prompt = f"{base_prompt}, anime style, cartoon character, clean background, high quality, detailed, 2D animation style, character sheet"
+                # Optimize prompt for CLIP
+                prompt = self.optimize_prompt_for_clip(prompt, max_tokens=75)
+                print(f"📝 Character prompt: {prompt}")
+                # Generate with appropriate settings
+                if self.flux_available:
+                    # FLUX generation settings
+                    image = pipeline(
+                        prompt=prompt,
+                        width=1024,
+                        height=1024,
+                        num_inference_steps=25,
+                        guidance_scale=7.5,
+                        generator=torch.Generator(device=self.device).manual_seed(42)
+                    ).images[0]
+                else:
+                    # Stable Diffusion generation settings
+                    image = pipeline(
+                        prompt=prompt,
+                        width=512,
+                        height=512,
+                        num_inference_steps=30,
+                        guidance_scale=7.5,
+                        generator=torch.Generator(device=self.device).manual_seed(42)
+                    ).images[0]
+                    # Upscale for SD
+                    image = image.resize((1024, 1024), Image.Resampling.LANCZOS)
+                # Save character image
                 char_path = f"{self.output_dir}/char_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
+                # Verify file was created
+                if os.path.exists(char_path):
+                    file_size = os.path.getsize(char_path)
+                    character_images[character_name] = char_path
+                    # Create download URL
+                    download_info = self.create_download_url(char_path, f"character_{character['name']}")
+                    print(f"📥 Generated character_{character['name']}: char_{character['name'].replace(' ', '_')}.png")
+                    print(f"   📊 File size: {file_size / (1024*1024):.1f} MB")
+                    print(f"   📁 Internal path: {char_path}")
+                    print(download_info)
+                    # Clear GPU memory after each generation
+                    if self.device == "cuda":
+                        torch.cuda.empty_cache()
+                        gc.collect()
+                else:
+                    print(f"❌ Failed to save character image: {char_path}")
             except Exception as e:
+                print(f"❌ Error generating character {character_name}: {e}")
+                import traceback
+                traceback.print_exc()
+                # Continue with next character
+                continue
+        print(f"\n📊 Character generation summary:")
+        print(f"   - Characters requested: {len(characters)}")
+        print(f"   - Characters generated: {len(character_images)}")
+        print(f"   - Success rate: {len(character_images)/len(characters)*100:.1f}%")
         return character_images
     @spaces.GPU
     def generate_cinematic_backgrounds(self, scenes: List[Dict], color_palette: str) -> Dict[int, str]:
+        """Generate professional cinematic backgrounds for each scene"""
         background_images = {}
+        print(f"🎞️ Generating {len(scenes)} cinematic backgrounds...")
+        # Check if we have any image generation pipeline available
+        if not hasattr(self, 'flux_available'):
+            print("❌ No image generation models loaded")
+            return background_images
+        pipeline = None
+        if self.flux_available and hasattr(self, 'flux_pipe'):
+            pipeline = self.flux_pipe
+            model_name = "FLUX"
+        elif hasattr(self, 'sd_pipe'):
+            pipeline = self.sd_pipe
+            model_name = "Stable Diffusion"
+        else:
             print("❌ No image generation pipeline available")
             return background_images
+        print(f"🎨 Using {model_name} for background generation")
         for scene in scenes:
+            scene_num = scene['scene_number']
+            print(f"\n🌄 Generating background for scene {scene_num}")
             try:
+                # Build cinematic background prompt
+                background_desc = scene['background']
+                mood = scene.get('mood', 'neutral')
+                shot_type = scene.get('shot_type', 'medium shot')
+                lighting = scene.get('lighting', 'natural lighting')
+                base_prompt = f"Cinematic background scene, {background_desc}, {mood} atmosphere, {lighting}"
+                # Add style and quality modifiers
+                if self.flux_available:
+                    prompt = f"{base_prompt}, Disney-Pixar animation style, detailed landscape, professional background art, vibrant colors, high quality, cinematic composition, no characters"
+                else:
+                    prompt = f"{base_prompt}, anime style background, detailed landscape, high quality, cinematic, {color_palette} color palette, no people"
+                # Optimize for CLIP
+                prompt = self.optimize_prompt_for_clip(prompt, max_tokens=75)
+                print(f"📝 Background prompt: {prompt}")
+                # Generate with appropriate settings
+                if self.flux_available:
+                    # FLUX generation settings
+                    image = pipeline(
+                        prompt=prompt,
+                        width=1024,
+                        height=768,  # 4:3 aspect ratio for video
+                        num_inference_steps=25,
+                        guidance_scale=7.5,
+                        generator=torch.Generator(device=self.device).manual_seed(scene_num * 10)
+                    ).images[0]
+                else:
+                    # Stable Diffusion generation settings
+                    image = pipeline(
+                        prompt=prompt,
+                        width=512,
+                        height=384,  # 4:3 aspect ratio
+                        num_inference_steps=30,
+                        guidance_scale=7.5,
+                        generator=torch.Generator(device=self.device).manual_seed(scene_num * 10)
+                    ).images[0]
+                    # Upscale for SD
+                    image = image.resize((1024, 768), Image.Resampling.LANCZOS)
+                # Save background image
+                bg_path = f"{self.output_dir}/bg_scene_{scene_num}.png"
                 image.save(bg_path)
+                # Verify file was created
+                if os.path.exists(bg_path):
+                    file_size = os.path.getsize(bg_path)
+                    background_images[scene_num] = bg_path
+                    # Create download URL
+                    download_info = self.create_download_url(bg_path, f"background_scene_{scene_num}")
+                    print(f"📥 Generated background_scene_{scene_num}: bg_scene_{scene_num}.png")
+                    print(f"   📊 File size: {file_size / (1024*1024):.1f} MB")
+                    print(f"   📁 Internal path: {bg_path}")
+                    print(download_info)
+                    # Clear GPU memory after each generation
+                    if self.device == "cuda":
+                        torch.cuda.empty_cache()
+                        gc.collect()
+                else:
+                    print(f"❌ Failed to save background image: {bg_path}")
             except Exception as e:
                 print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
+                import traceback
+                traceback.print_exc()
+                # Continue with next scene
+                continue
+        print(f"\n📊 Background generation summary:")
+        print(f"   - Scenes requested: {len(scenes)}")
+        print(f"   - Backgrounds generated: {len(background_images)}")
+        print(f"   - Success rate: {len(background_images)/len(scenes)*100:.1f}%")
         return background_images
         try:
             print("🎬 Setting up Open-Sora 2.0 for video generation...")
+            # Check available GPU memory
+            if torch.cuda.is_available():
+                gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
+                print(f"🎮 Available GPU memory: {gpu_memory:.1f} GB")
+                if gpu_memory < 16:
+                    print("⚠️ Warning: Open-Sora requires 16GB+ GPU memory for stable operation")
             # Check if we're already in the right directory
             current_dir = os.getcwd()
             opensora_dir = os.path.join(current_dir, "Open-Sora")
             # Clone Open-Sora repository if it doesn't exist
             if not os.path.exists(opensora_dir):
                 print("📥 Cloning Open-Sora repository...")
+                try:
+                    result = subprocess.run([
+                        "git", "clone", "https://github.com/hpcaitech/Open-Sora.git"
+                    ], check=True, capture_output=True, text=True, timeout=120)
+                    print("✅ Repository cloned successfully")
+                except subprocess.TimeoutExpired:
+                    print("❌ Repository cloning timed out")
+                    return False
+                except subprocess.CalledProcessError as e:
+                    print(f"❌ Repository cloning failed: {e.stderr}")
+                    return False
             # Check if the repository was cloned successfully
             if not os.path.exists(opensora_dir):
                 print("❌ Failed to clone Open-Sora repository")
                 return False
+            # Check for required scripts
+            script_path = os.path.join(opensora_dir, "scripts/diffusion/inference.py")
+            config_path = os.path.join(opensora_dir, "configs/diffusion/inference/t2i2v_256px.py")
+            print(f"📁 Checking for script: {script_path}")
+            print(f"📁 Checking for config: {config_path}")
+            if not os.path.exists(script_path):
+                print(f"❌ Required script not found: {script_path}")
+                # List available files for debugging
+                scripts_dir = os.path.join(opensora_dir, "scripts")
+                if os.path.exists(scripts_dir):
+                    print(f"📁 Available in scripts/: {os.listdir(scripts_dir)}")
+                return False
+            if not os.path.exists(config_path):
+                print(f"❌ Required config not found: {config_path}")
+                # List available configs for debugging
+                configs_dir = os.path.join(opensora_dir, "configs")
+                if os.path.exists(configs_dir):
+                    print(f"📁 Available in configs/: {os.listdir(configs_dir)}")
+                return False
             # Check if model weights exist
             ckpts_dir = os.path.join(opensora_dir, "ckpts")
             if not os.path.exists(ckpts_dir):
                 print("📥 Downloading Open-Sora 2.0 model...")
                 try:
+                    # Use smaller timeout and check if huggingface-cli is available
+                    result = subprocess.run([
                         "huggingface-cli", "download", "hpcai-tech/Open-Sora-v2",
                         "--local-dir", ckpts_dir
+                    ], check=True, capture_output=True, text=True, timeout=300)
+                    print("✅ Model downloaded successfully")
+                except subprocess.TimeoutExpired:
+                    print("❌ Model download timed out (5 minutes)")
+                    return False
+                except subprocess.CalledProcessError as e:
+                    print(f"❌ Model download failed: {e.stderr}")
+                    return False
+                except FileNotFoundError:
+                    print("❌ huggingface-cli not found - cannot download model")
+                    return False
+            else:
+                print("✅ Model weights already exist")
+            # Check dependencies
+            try:
+                import torch.distributed
+                print("✅ torch.distributed available")
+            except ImportError:
+                print("❌ torch.distributed not available")
+                return False
+            # Test if torchrun is available
+            try:
+                result = subprocess.run(["torchrun", "--help"],
+                                      capture_output=True, text=True, timeout=10)
+                if result.returncode == 0:
+                    print("✅ torchrun available")
+                else:
+                    print("❌ torchrun not working properly")
                     return False
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                print("❌ torchrun not found")
+                return False
             print("✅ Open-Sora setup completed")
             return True
         except Exception as e:
             print(f"❌ Open-Sora setup failed: {e}")
+            import traceback
+            traceback.print_exc()
             return False
     @spaces.GPU
                     if video_path:
                         print(f"✅ Open-Sora video generated for scene {scene_num}")
                     else:
+                        print(f"❌ Open-Sora failed for scene {scene_num}, trying lightweight animation...")
+                        video_path = self._create_lightweight_animated_video(scene, character_images, background_images)
+                        if not video_path:
+                            print(f"🔄 Lightweight animation failed, trying static video...")
+                            video_path = self._create_professional_static_video(scene, background_images)
                     # If professional video fails, try simple video
                     if not video_path:
+                        print(f"🔄 All methods failed, trying simple video for scene {scene_num}...")
                         video_path = self._create_simple_static_video(scene, background_images)
                 else:
+                    print(f"🎬 Open-Sora not available, using lightweight animation for scene {scene_num}...")
+                    # First try lightweight animation, then fallback to static
+                    video_path = self._create_lightweight_animated_video(scene, character_images, background_images)
+                    if not video_path:
+                        print(f"🔄 Lightweight animation failed, using static video fallback...")
+                        video_path = self._create_professional_static_video(scene, background_images)
                 if video_path and os.path.exists(video_path):
                     scene_videos.append(video_path)
             # Use the optimization function to ensure CLIP compatibility
             prompt = self.optimize_prompt_for_clip(prompt)
+            print(f"🎬 Open-Sora prompt: {prompt}")
             video_path = f"{self.output_dir}/video_scene_{scene['scene_number']}.mp4"
                 print("❌ Open-Sora directory not found")
                 return None
+            # Check for required files
+            script_path = os.path.join(opensora_dir, "scripts/diffusion/inference.py")
+            config_path = os.path.join(opensora_dir, "configs/diffusion/inference/t2i2v_256px.py")
+            if not os.path.exists(script_path):
+                print(f"❌ Open-Sora script not found: {script_path}")
+                return None
+            if not os.path.exists(config_path):
+                print(f"❌ Open-Sora config not found: {config_path}")
+                return None
             # Run Open-Sora inference
             cmd = [
                 "torchrun", "--nproc_per_node", "1", "--standalone",
                 "--motion-score", "6"  # High motion for dynamic scenes
             ]
+            print(f"🎬 Running Open-Sora command: {' '.join(cmd)}")
+            result = subprocess.run(cmd, capture_output=True, text=True, cwd=opensora_dir, timeout=300)
+            print(f"🎬 Open-Sora return code: {result.returncode}")
+            if result.stdout:
+                print(f"🎬 Open-Sora stdout: {result.stdout}")
+            if result.stderr:
+                print(f"❌ Open-Sora stderr: {result.stderr}")
             if result.returncode == 0:
                 # Find generated video file
                     if file.endswith('.mp4') and 'scene' not in file:
                         src_path = os.path.join(self.output_dir, file)
                         os.rename(src_path, video_path)
+                        print(f"✅ Open-Sora video generated: {video_path}")
                         return video_path
+                print("❌ Open-Sora completed but no video file found")
+                return None
+            else:
+                print(f"❌ Open-Sora failed with return code: {result.returncode}")
+                return None
+        except subprocess.TimeoutExpired:
+            print("❌ Open-Sora generation timed out (5 minutes)")
             return None
         except Exception as e:
             print(f"❌ Open-Sora generation failed: {e}")
+            import traceback
+            traceback.print_exc()
             return None
     def _create_professional_static_video(self, scene: Dict, background_images: Dict) -> str:
             }
             return None, error_info, f"❌ Generation failed: {str(e)}", [], []
+    def _create_lightweight_animated_video(self, scene: Dict, character_images: Dict, background_images: Dict) -> str:
+        """Create lightweight animated video with character/background compositing"""
+        scene_num = scene['scene_number']
+        if scene_num not in background_images:
+            print(f"❌ No background image for scene {scene_num}")
+            return None
+        video_path = f"{self.output_dir}/video_animated_scene_{scene_num}.mp4"
+        try:
+            print(f"🎬 Creating lightweight animated video for scene {scene_num}...")
+            # Load background image
+            bg_path = background_images[scene_num]
+            print(f"📁 Loading background from: {bg_path}")
+            if not os.path.exists(bg_path):
+                print(f"❌ Background file not found: {bg_path}")
+                return None
+            bg_image = Image.open(bg_path).resize((1024, 768))
+            bg_array = np.array(bg_image)
+            bg_array = cv2.cvtColor(bg_array, cv2.COLOR_RGB2BGR)
+            # Try to load character images for this scene
+            scene_characters = scene.get('characters_present', [])
+            character_overlays = []
+            for char_name in scene_characters:
+                for char_key, char_path in character_images.items():
+                    if char_name.lower() in char_key.lower():
+                        if os.path.exists(char_path):
+                            char_img = Image.open(char_path).convert("RGBA")
+                            # Resize character to reasonable size (25% of background)
+                            char_w, char_h = char_img.size
+                            new_h = int(768 * 0.25)  # 25% of background height
+                            new_w = int(char_w * (new_h / char_h))
+                            char_img = char_img.resize((new_w, new_h))
+                            character_overlays.append({
+                                'image': np.array(char_img),
+                                'name': char_name,
+                                'original_pos': (100 + len(character_overlays) * 200, 768 - new_h - 50)  # Bottom positioning
+                            })
+                            print(f"✅ Loaded character: {char_name}")
+                            break
+            print(f"📐 Background size: {bg_array.shape}")
+            print(f"🎭 Characters loaded: {len(character_overlays)}")
+            # Professional video settings
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            fps = 24  # Cinematic frame rate
+            duration = int(scene.get('duration', 35))
+            total_frames = duration * fps
+            print(f"🎬 Video settings: {fps}fps, {duration}s duration, {total_frames} frames")
+            out = cv2.VideoWriter(video_path, fourcc, fps, (1024, 768))
+            if not out.isOpened():
+                print(f"❌ Failed to open video writer for {video_path}")
+                return None
+            # Advanced animation with character movement
+            print(f"🎬 Generating {total_frames} animated frames...")
+            for i in range(total_frames):
+                if i % 100 == 0:  # Progress update every 100 frames
+                    print(f"   Frame {i}/{total_frames} ({i/total_frames*100:.1f}%)")
+                frame = bg_array.copy()
+                progress = i / total_frames
+                # Apply cinematic background effects
+                frame = self._apply_cinematic_effects(frame, scene, progress)
+                # Animate characters if available
+                for j, char_data in enumerate(character_overlays):
+                    char_img = char_data['image']
+                    char_name = char_data['name']
+                    base_x, base_y = char_data['original_pos']
+                    # Different animation patterns based on scene mood
+                    mood = scene.get('mood', 'heartwarming')
+                    if mood == 'exciting':
+                        # Bouncing animation
+                        offset_y = int(np.sin(progress * 8 * np.pi + j * np.pi/2) * 20)
+                        offset_x = int(np.sin(progress * 4 * np.pi + j * np.pi/3) * 15)
+                    elif mood == 'peaceful':
+                        # Gentle swaying
+                        offset_y = int(np.sin(progress * 2 * np.pi + j * np.pi/2) * 8)
+                        offset_x = int(np.sin(progress * 1.5 * np.pi + j * np.pi/3) * 12)
+                    elif mood == 'mysterious':
+                        # Subtle floating
+                        offset_y = int(np.sin(progress * 3 * np.pi + j * np.pi/2) * 15)
+                        offset_x = int(np.cos(progress * 2 * np.pi + j * np.pi/4) * 10)
+                    else:
+                        # Default: slight breathing animation
+                        scale_factor = 1.0 + np.sin(progress * 4 * np.pi + j * np.pi/2) * 0.02
+                        offset_y = int(np.sin(progress * 3 * np.pi + j * np.pi/2) * 5)
+                        offset_x = 0
+                    # Calculate final position
+                    final_x = base_x + offset_x
+                    final_y = base_y + offset_y
+                    # Overlay character on frame
+                    if char_img.shape[2] == 4:  # Has alpha channel
+                        frame = self._overlay_character(frame, char_img, final_x, final_y)
+                    else:
+                        # Simple overlay without alpha
+                        char_rgb = cv2.cvtColor(char_img[:,:,:3], cv2.COLOR_RGB2BGR)
+                        h, w = char_rgb.shape[:2]
+                        if (final_y >= 0 and final_y + h < 768 and
+                            final_x >= 0 and final_x + w < 1024):
+                            frame[final_y:final_y+h, final_x:final_x+w] = char_rgb
+                out.write(frame)
+            print(f"✅ All {total_frames} animated frames generated")
+            out.release()
+            if os.path.exists(video_path):
+                file_size = os.path.getsize(video_path)
+                print(f"✅ Lightweight animated video created: {video_path} ({file_size / (1024*1024):.1f} MB)")
+                return video_path
+            else:
+                print(f"❌ Video file not created: {video_path}")
+                return None
+        except Exception as e:
+            print(f"❌ Lightweight animated video creation failed for scene {scene_num}: {e}")
+            import traceback
+            traceback.print_exc()
+            return None
+    def _overlay_character(self, background, character_rgba, x, y):
+        """Overlay character with alpha transparency on background"""
+        try:
+            char_h, char_w = character_rgba.shape[:2]
+            bg_h, bg_w = background.shape[:2]
+            # Ensure the character fits within background bounds
+            if x < 0 or y < 0 or x + char_w > bg_w or y + char_h > bg_h:
+                return background
+            # Extract RGB and alpha channels
+            char_rgb = character_rgba[:, :, :3]
+            char_alpha = character_rgba[:, :, 3] / 255.0
+            # Convert character to BGR for OpenCV
+            char_bgr = cv2.cvtColor(char_rgb, cv2.COLOR_RGB2BGR)
+            # Get the region of interest from background
+            roi = background[y:y+char_h, x:x+char_w]
+            # Blend character with background using alpha
+            for c in range(3):
+                roi[:, :, c] = (char_alpha * char_bgr[:, :, c] +
+                               (1 - char_alpha) * roi[:, :, c])
+            background[y:y+char_h, x:x+char_w] = roi
+            return background
+        except Exception as e:
+            print(f"⚠️ Character overlay failed: {e}")
+            return background
 # Initialize professional generator
 generator = ProfessionalCartoonFilmGenerator()