Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,132 +65,99 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 65 |
|
| 66 |
@spaces.GPU
|
| 67 |
def load_models(self):
|
| 68 |
-
"""Load
|
| 69 |
-
if self.models_loaded:
|
| 70 |
-
return
|
| 71 |
-
|
| 72 |
-
print("🚀 Loading professional-grade models...")
|
| 73 |
-
|
| 74 |
try:
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
try:
|
|
|
|
|
|
|
|
|
|
| 78 |
self.flux_pipe = FluxPipeline.from_pretrained(
|
| 79 |
"black-forest-labs/FLUX.1-dev",
|
| 80 |
-
torch_dtype=
|
| 81 |
-
|
| 82 |
-
use_safetensors=True
|
| 83 |
-
).to(self.device)
|
| 84 |
-
print("✅ FLUX pipeline loaded successfully!")
|
| 85 |
-
self.using_flux = True
|
| 86 |
-
except Exception as flux_error:
|
| 87 |
-
if "401" in str(flux_error) or "authentication" in str(flux_error).lower():
|
| 88 |
-
print("🔐 FLUX authentication failed - model requires Hugging Face token")
|
| 89 |
-
print("💡 To use FLUX, you need to:")
|
| 90 |
-
print(" 1. Get a Hugging Face token from https://huggingface.co/settings/tokens")
|
| 91 |
-
print(" 2. Accept the FLUX model license at https://huggingface.co/black-forest-labs/FLUX.1-dev")
|
| 92 |
-
print(" 3. Set your token: huggingface-cli login")
|
| 93 |
-
print("🔄 Falling back to Stable Diffusion...")
|
| 94 |
-
self.using_flux = False
|
| 95 |
-
else:
|
| 96 |
-
print(f"❌ FLUX loading failed: {flux_error}")
|
| 97 |
-
self.using_flux = False
|
| 98 |
-
except Exception as e:
|
| 99 |
-
print(f"❌ FLUX pipeline failed: {e}")
|
| 100 |
-
self.using_flux = False
|
| 101 |
-
|
| 102 |
-
# Load cartoon/anime LoRA for character generation (only if FLUX is available)
|
| 103 |
-
if self.using_flux:
|
| 104 |
-
print("🎭 Loading cartoon LoRA models...")
|
| 105 |
-
try:
|
| 106 |
-
# Load multiple LoRA models for different purposes
|
| 107 |
-
self.cartoon_lora = hf_hub_download(
|
| 108 |
-
"prithivMLmods/Canopus-LoRA-Flux-Anime",
|
| 109 |
-
"Canopus-LoRA-Flux-Anime.safetensors"
|
| 110 |
-
)
|
| 111 |
-
self.character_lora = hf_hub_download(
|
| 112 |
-
"enhanceaiteam/Anime-Flux",
|
| 113 |
-
"anime-flux.safetensors"
|
| 114 |
-
)
|
| 115 |
-
self.sketch_lora = hf_hub_download(
|
| 116 |
-
"Shakker-Labs/FLUX.1-dev-LoRA-Children-Simple-Sketch",
|
| 117 |
-
"FLUX-dev-lora-children-simple-sketch.safetensors"
|
| 118 |
)
|
| 119 |
-
print("✅ LoRA models loaded successfully")
|
| 120 |
-
except Exception as e:
|
| 121 |
-
print(f"⚠️ Some LoRA models failed to load: {e}")
|
| 122 |
-
|
| 123 |
-
# Enable memory optimizations for FLUX
|
| 124 |
-
if self.flux_pipe:
|
| 125 |
-
self.flux_pipe.enable_vae_slicing()
|
| 126 |
-
self.flux_pipe.enable_vae_tiling()
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
| 142 |
print("🔄 Loading Stable Diffusion fallback model...")
|
|
|
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
self.
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
safety_checker=None,
|
| 151 |
-
requires_safety_checker=False
|
| 152 |
-
).to(self.device)
|
| 153 |
-
print("✅ Loaded Stable Diffusion v1.4")
|
| 154 |
-
except Exception as sd_error:
|
| 155 |
-
print(f"⚠️ SD v1.4 failed: {sd_error}")
|
| 156 |
-
# Try the original model
|
| 157 |
-
self.flux_pipe = StableDiffusionPipeline.from_pretrained(
|
| 158 |
-
"runwayml/stable-diffusion-v1-5",
|
| 159 |
-
torch_dtype=torch.float16,
|
| 160 |
-
use_safetensors=True,
|
| 161 |
-
safety_checker=None,
|
| 162 |
-
requires_safety_checker=False
|
| 163 |
-
).to(self.device)
|
| 164 |
-
print("✅ Loaded Stable Diffusion v1.5")
|
| 165 |
|
| 166 |
-
#
|
| 167 |
-
self.
|
| 168 |
-
if hasattr(self.flux_pipe, 'enable_vae_tiling'):
|
| 169 |
-
self.flux_pipe.enable_vae_tiling()
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
print("✅ Stable Diffusion fallback loaded successfully")
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
print(f"❌ Stable Diffusion fallback also failed: {e2}")
|
| 175 |
-
self.flux_pipe = None
|
| 176 |
-
|
| 177 |
-
try:
|
| 178 |
-
# 2. Advanced script generation model
|
| 179 |
print("📝 Loading script enhancement model...")
|
| 180 |
-
self.
|
| 181 |
-
"
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
device=0 if self.device == "cuda" else -1
|
| 185 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
print("✅ Script enhancer loaded")
|
| 187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
except Exception as e:
|
| 189 |
-
print(f"❌
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
print("🎬 All professional models loaded!")
|
| 194 |
|
| 195 |
def clear_gpu_memory(self):
|
| 196 |
"""Clear GPU memory between operations"""
|
|
@@ -482,203 +449,216 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 482 |
|
| 483 |
@spaces.GPU
|
| 484 |
def generate_professional_character_images(self, characters: List[Dict]) -> Dict[str, str]:
|
| 485 |
-
"""Generate
|
| 486 |
-
self.load_models()
|
| 487 |
character_images = {}
|
| 488 |
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
print("❌ No image generation pipeline available")
|
| 491 |
return character_images
|
|
|
|
|
|
|
| 492 |
|
| 493 |
for character in characters:
|
|
|
|
|
|
|
|
|
|
| 494 |
try:
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
# Load appropriate LoRA based on character type (only for FLUX)
|
| 498 |
-
if hasattr(self.flux_pipe, 'load_lora_weights') and "anime" in character.get("animation_style", "").lower():
|
| 499 |
-
if hasattr(self, 'cartoon_lora'):
|
| 500 |
-
try:
|
| 501 |
-
self.flux_pipe.load_lora_weights(self.cartoon_lora)
|
| 502 |
-
except Exception as e:
|
| 503 |
-
print(f"⚠️ LoRA loading failed: {e}")
|
| 504 |
|
| 505 |
-
#
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
prompt = self.optimize_prompt_for_clip(prompt)
|
| 513 |
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
"""
|
| 518 |
|
| 519 |
-
#
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
except Exception as e:
|
| 543 |
-
if "CLIP" in str(e) and "token" in str(e).lower():
|
| 544 |
-
print(f"⚠️ CLIP token error detected, using simplified prompt...")
|
| 545 |
-
# Fallback to very simple prompt
|
| 546 |
-
simple_prompt = f"anime character, {character['name']}, clean background"
|
| 547 |
-
simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=30)
|
| 548 |
-
|
| 549 |
-
if hasattr(self.flux_pipe, 'max_sequence_length'):
|
| 550 |
-
image = self.flux_pipe(
|
| 551 |
-
prompt=simple_prompt,
|
| 552 |
-
negative_prompt="low quality, blurry",
|
| 553 |
-
num_inference_steps=20,
|
| 554 |
-
guidance_scale=3.0,
|
| 555 |
-
height=1024,
|
| 556 |
-
width=1024,
|
| 557 |
-
max_sequence_length=128
|
| 558 |
-
).images[0]
|
| 559 |
-
else:
|
| 560 |
-
image = self.flux_pipe(
|
| 561 |
-
prompt=simple_prompt,
|
| 562 |
-
negative_prompt="low quality, blurry",
|
| 563 |
-
num_inference_steps=20,
|
| 564 |
-
guidance_scale=7.0,
|
| 565 |
-
height=1024,
|
| 566 |
-
width=1024
|
| 567 |
-
).images[0]
|
| 568 |
-
else:
|
| 569 |
-
raise e
|
| 570 |
|
|
|
|
| 571 |
char_path = f"{self.output_dir}/char_{character['name'].replace(' ', '_')}.png"
|
| 572 |
image.save(char_path)
|
| 573 |
-
character_images[character['name']] = char_path
|
| 574 |
-
|
| 575 |
-
# Create download URL for character
|
| 576 |
-
download_info = self.create_download_url(char_path, f"character_{character['name']}")
|
| 577 |
-
print(f"✅ Generated high-quality character: {character['name']}")
|
| 578 |
-
print(download_info)
|
| 579 |
|
| 580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
|
| 582 |
except Exception as e:
|
| 583 |
-
print(f"❌ Error generating character {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
return character_images
|
| 586 |
|
| 587 |
@spaces.GPU
|
| 588 |
def generate_cinematic_backgrounds(self, scenes: List[Dict], color_palette: str) -> Dict[int, str]:
|
| 589 |
-
"""Generate cinematic
|
| 590 |
-
self.load_models()
|
| 591 |
background_images = {}
|
| 592 |
|
| 593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
print("❌ No image generation pipeline available")
|
| 595 |
return background_images
|
|
|
|
|
|
|
| 596 |
|
| 597 |
for scene in scenes:
|
|
|
|
|
|
|
|
|
|
| 598 |
try:
|
| 599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
|
| 601 |
-
|
| 602 |
-
background_desc = scene['background'][:80] # Limit background description
|
| 603 |
-
mood = scene['mood'][:30]
|
| 604 |
-
shot_type = scene.get('shot_type', 'medium shot')[:20]
|
| 605 |
-
animation_notes = scene.get('animation_notes', 'professional background art')[:40]
|
| 606 |
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
|
|
|
| 611 |
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
"""
|
| 616 |
|
| 617 |
-
#
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
except Exception as e:
|
| 641 |
-
if "CLIP" in str(e) and "token" in str(e).lower():
|
| 642 |
-
print(f"⚠️ CLIP token error detected for background, using simplified prompt...")
|
| 643 |
-
# Fallback to very simple prompt
|
| 644 |
-
simple_prompt = f"cartoon background, {scene['background'][:40]}, clean"
|
| 645 |
-
simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=25)
|
| 646 |
-
|
| 647 |
-
if hasattr(self.flux_pipe, 'max_sequence_length'):
|
| 648 |
-
image = self.flux_pipe(
|
| 649 |
-
prompt=simple_prompt,
|
| 650 |
-
negative_prompt="characters, low quality",
|
| 651 |
-
num_inference_steps=15,
|
| 652 |
-
guidance_scale=3.0,
|
| 653 |
-
height=768,
|
| 654 |
-
width=1024,
|
| 655 |
-
max_sequence_length=128
|
| 656 |
-
).images[0]
|
| 657 |
-
else:
|
| 658 |
-
image = self.flux_pipe(
|
| 659 |
-
prompt=simple_prompt,
|
| 660 |
-
negative_prompt="characters, low quality",
|
| 661 |
-
num_inference_steps=15,
|
| 662 |
-
guidance_scale=7.0,
|
| 663 |
-
height=768,
|
| 664 |
-
width=1024
|
| 665 |
-
).images[0]
|
| 666 |
-
else:
|
| 667 |
-
raise e
|
| 668 |
|
| 669 |
-
|
|
|
|
| 670 |
image.save(bg_path)
|
| 671 |
-
background_images[scene['scene_number']] = bg_path
|
| 672 |
-
|
| 673 |
-
# Create download URL for background
|
| 674 |
-
download_info = self.create_download_url(bg_path, f"background_scene_{scene['scene_number']}")
|
| 675 |
-
print(f"✅ Created cinematic background for scene {scene['scene_number']}")
|
| 676 |
-
print(download_info)
|
| 677 |
|
| 678 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
except Exception as e:
|
| 681 |
print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
|
| 683 |
return background_images
|
| 684 |
|
|
@@ -687,6 +667,13 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 687 |
try:
|
| 688 |
print("🎬 Setting up Open-Sora 2.0 for video generation...")
|
| 689 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
# Check if we're already in the right directory
|
| 691 |
current_dir = os.getcwd()
|
| 692 |
opensora_dir = os.path.join(current_dir, "Open-Sora")
|
|
@@ -694,33 +681,97 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 694 |
# Clone Open-Sora repository if it doesn't exist
|
| 695 |
if not os.path.exists(opensora_dir):
|
| 696 |
print("📥 Cloning Open-Sora repository...")
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 700 |
|
| 701 |
# Check if the repository was cloned successfully
|
| 702 |
if not os.path.exists(opensora_dir):
|
| 703 |
print("❌ Failed to clone Open-Sora repository")
|
| 704 |
return False
|
| 705 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
# Check if model weights exist
|
| 707 |
ckpts_dir = os.path.join(opensora_dir, "ckpts")
|
| 708 |
if not os.path.exists(ckpts_dir):
|
| 709 |
print("📥 Downloading Open-Sora 2.0 model...")
|
| 710 |
try:
|
| 711 |
-
|
|
|
|
| 712 |
"huggingface-cli", "download", "hpcai-tech/Open-Sora-v2",
|
| 713 |
"--local-dir", ckpts_dir
|
| 714 |
-
], check=True, capture_output=True)
|
| 715 |
-
|
| 716 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
return False
|
|
|
|
|
|
|
|
|
|
| 718 |
|
| 719 |
print("✅ Open-Sora setup completed")
|
| 720 |
return True
|
| 721 |
|
| 722 |
except Exception as e:
|
| 723 |
print(f"❌ Open-Sora setup failed: {e}")
|
|
|
|
|
|
|
| 724 |
return False
|
| 725 |
|
| 726 |
@spaces.GPU
|
|
@@ -746,17 +797,23 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 746 |
if video_path:
|
| 747 |
print(f"✅ Open-Sora video generated for scene {scene_num}")
|
| 748 |
else:
|
| 749 |
-
print(f"❌ Open-Sora failed for scene {scene_num}, trying
|
| 750 |
-
video_path = self.
|
|
|
|
|
|
|
|
|
|
| 751 |
|
| 752 |
# If professional video fails, try simple video
|
| 753 |
if not video_path:
|
| 754 |
-
print(f"🔄
|
| 755 |
video_path = self._create_simple_static_video(scene, background_images)
|
| 756 |
else:
|
| 757 |
-
print(f"🎬
|
| 758 |
-
#
|
| 759 |
-
video_path = self.
|
|
|
|
|
|
|
|
|
|
| 760 |
|
| 761 |
if video_path and os.path.exists(video_path):
|
| 762 |
scene_videos.append(video_path)
|
|
@@ -804,6 +861,7 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 804 |
|
| 805 |
# Use the optimization function to ensure CLIP compatibility
|
| 806 |
prompt = self.optimize_prompt_for_clip(prompt)
|
|
|
|
| 807 |
|
| 808 |
video_path = f"{self.output_dir}/video_scene_{scene['scene_number']}.mp4"
|
| 809 |
|
|
@@ -815,6 +873,18 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 815 |
print("❌ Open-Sora directory not found")
|
| 816 |
return None
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
# Run Open-Sora inference
|
| 819 |
cmd = [
|
| 820 |
"torchrun", "--nproc_per_node", "1", "--standalone",
|
|
@@ -827,7 +897,14 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 827 |
"--motion-score", "6" # High motion for dynamic scenes
|
| 828 |
]
|
| 829 |
|
| 830 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
|
| 832 |
if result.returncode == 0:
|
| 833 |
# Find generated video file
|
|
@@ -835,12 +912,22 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 835 |
if file.endswith('.mp4') and 'scene' not in file:
|
| 836 |
src_path = os.path.join(self.output_dir, file)
|
| 837 |
os.rename(src_path, video_path)
|
|
|
|
| 838 |
return video_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
|
|
|
|
|
|
|
| 840 |
return None
|
| 841 |
-
|
| 842 |
except Exception as e:
|
| 843 |
print(f"❌ Open-Sora generation failed: {e}")
|
|
|
|
|
|
|
| 844 |
return None
|
| 845 |
|
| 846 |
def _create_professional_static_video(self, scene: Dict, background_images: Dict) -> str:
|
|
@@ -1225,6 +1312,177 @@ class ProfessionalCartoonFilmGenerator:
|
|
| 1225 |
}
|
| 1226 |
return None, error_info, f"❌ Generation failed: {str(e)}", [], []
|
| 1227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1228 |
# Initialize professional generator
|
| 1229 |
generator = ProfessionalCartoonFilmGenerator()
|
| 1230 |
|
|
|
|
| 65 |
|
| 66 |
@spaces.GPU
|
| 67 |
def load_models(self):
|
| 68 |
+
"""Load all required AI models for professional generation"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
try:
|
| 70 |
+
print("🚀 Loading professional-grade models...")
|
| 71 |
+
|
| 72 |
+
# Clear GPU memory first
|
| 73 |
+
self.clear_gpu_memory()
|
| 74 |
+
|
| 75 |
+
# Detect device and set appropriate dtype
|
| 76 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 77 |
+
self.dtype = torch.float16 if self.device == "cuda" else torch.float32
|
| 78 |
+
|
| 79 |
+
print(f"🎮 Using device: {self.device} with dtype: {self.dtype}")
|
| 80 |
+
|
| 81 |
+
# Try to load FLUX first
|
| 82 |
try:
|
| 83 |
+
print("🎨 Loading FLUX pipeline...")
|
| 84 |
+
from diffusers import FluxPipeline
|
| 85 |
+
|
| 86 |
self.flux_pipe = FluxPipeline.from_pretrained(
|
| 87 |
"black-forest-labs/FLUX.1-dev",
|
| 88 |
+
torch_dtype=self.dtype,
|
| 89 |
+
device_map="auto" if self.device == "cuda" else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
if self.device == "cuda":
|
| 93 |
+
self.flux_pipe = self.flux_pipe.to("cuda")
|
| 94 |
+
|
| 95 |
+
print("✅ FLUX pipeline loaded successfully")
|
| 96 |
+
self.flux_available = True
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print("🔐 FLUX authentication failed - model requires Hugging Face token")
|
| 100 |
+
print("💡 To use FLUX, you need to:")
|
| 101 |
+
print(" 1. Get a Hugging Face token from https://huggingface.co/settings/tokens")
|
| 102 |
+
print(" 2. Accept the FLUX model license at https://huggingface.co/black-forest-labs/FLUX.1-dev")
|
| 103 |
+
print(" 3. Set your token: huggingface-cli login")
|
| 104 |
+
print("🔄 Falling back to Stable Diffusion...")
|
| 105 |
+
self.flux_available = False
|
| 106 |
+
|
| 107 |
+
# Load Stable Diffusion fallback
|
| 108 |
+
if not self.flux_available:
|
| 109 |
print("🔄 Loading Stable Diffusion fallback model...")
|
| 110 |
+
from diffusers import StableDiffusionPipeline, DDIMScheduler
|
| 111 |
|
| 112 |
+
self.sd_pipe = StableDiffusionPipeline.from_pretrained(
|
| 113 |
+
"CompVis/stable-diffusion-v1-4",
|
| 114 |
+
torch_dtype=self.dtype,
|
| 115 |
+
safety_checker=None,
|
| 116 |
+
requires_safety_checker=False
|
| 117 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
# Configure scheduler for better quality
|
| 120 |
+
self.sd_pipe.scheduler = DDIMScheduler.from_config(self.sd_pipe.scheduler.config)
|
|
|
|
|
|
|
| 121 |
|
| 122 |
+
if self.device == "cuda":
|
| 123 |
+
self.sd_pipe = self.sd_pipe.to("cuda")
|
| 124 |
+
|
| 125 |
+
print("✅ Loaded Stable Diffusion v1.4")
|
| 126 |
print("✅ Stable Diffusion fallback loaded successfully")
|
| 127 |
+
|
| 128 |
+
# Load script enhancement model with correct device
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
print("📝 Loading script enhancement model...")
|
| 130 |
+
self.script_model = AutoModelForCausalLM.from_pretrained(
|
| 131 |
+
"microsoft/DialoGPT-medium",
|
| 132 |
+
torch_dtype=self.dtype,
|
| 133 |
+
device_map="auto" if self.device == "cuda" else None
|
|
|
|
| 134 |
)
|
| 135 |
+
self.script_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
|
| 136 |
+
|
| 137 |
+
if self.script_tokenizer.pad_token is None:
|
| 138 |
+
self.script_tokenizer.pad_token = self.script_tokenizer.eos_token
|
| 139 |
+
|
| 140 |
+
if self.device == "cuda":
|
| 141 |
+
self.script_model = self.script_model.to("cuda")
|
| 142 |
+
|
| 143 |
+
print(f"Device set to use {self.device}")
|
| 144 |
print("✅ Script enhancer loaded")
|
| 145 |
|
| 146 |
+
# Set model states
|
| 147 |
+
if self.device == "cuda":
|
| 148 |
+
if self.flux_available:
|
| 149 |
+
self.flux_pipe.enable_model_cpu_offload()
|
| 150 |
+
else:
|
| 151 |
+
self.sd_pipe.enable_model_cpu_offload()
|
| 152 |
+
|
| 153 |
+
print("🎬 All professional models loaded!")
|
| 154 |
+
return True
|
| 155 |
+
|
| 156 |
except Exception as e:
|
| 157 |
+
print(f"❌ Model loading failed: {e}")
|
| 158 |
+
import traceback
|
| 159 |
+
traceback.print_exc()
|
| 160 |
+
return False
|
|
|
|
| 161 |
|
| 162 |
def clear_gpu_memory(self):
|
| 163 |
"""Clear GPU memory between operations"""
|
|
|
|
| 449 |
|
| 450 |
@spaces.GPU
|
| 451 |
def generate_professional_character_images(self, characters: List[Dict]) -> Dict[str, str]:
|
| 452 |
+
"""Generate professional character images with consistency"""
|
|
|
|
| 453 |
character_images = {}
|
| 454 |
|
| 455 |
+
print(f"🎭 Generating {len(characters)} professional character designs...")
|
| 456 |
+
|
| 457 |
+
# Check if we have any image generation pipeline available
|
| 458 |
+
if not hasattr(self, 'flux_available'):
|
| 459 |
+
print("❌ No image generation models loaded")
|
| 460 |
+
return character_images
|
| 461 |
+
|
| 462 |
+
pipeline = None
|
| 463 |
+
if self.flux_available and hasattr(self, 'flux_pipe'):
|
| 464 |
+
pipeline = self.flux_pipe
|
| 465 |
+
model_name = "FLUX"
|
| 466 |
+
elif hasattr(self, 'sd_pipe'):
|
| 467 |
+
pipeline = self.sd_pipe
|
| 468 |
+
model_name = "Stable Diffusion"
|
| 469 |
+
else:
|
| 470 |
print("❌ No image generation pipeline available")
|
| 471 |
return character_images
|
| 472 |
+
|
| 473 |
+
print(f"🎨 Using {model_name} for character generation")
|
| 474 |
|
| 475 |
for character in characters:
|
| 476 |
+
character_name = character['name']
|
| 477 |
+
print(f"\n🎨 Generating character: {character_name}")
|
| 478 |
+
|
| 479 |
try:
|
| 480 |
+
# Build comprehensive character prompt
|
| 481 |
+
base_prompt = f"Professional cartoon character design, {character['name']}, {character['description']}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
+
# Add style and quality modifiers
|
| 484 |
+
if self.flux_available:
|
| 485 |
+
# FLUX-specific prompt
|
| 486 |
+
prompt = f"{base_prompt}, Disney-Pixar animation style, highly detailed character sheet, clean white background, 2D animation model sheet, expressive face, vibrant colors, professional character design, perfect for animation"
|
| 487 |
+
else:
|
| 488 |
+
# Stable Diffusion prompt
|
| 489 |
+
prompt = f"{base_prompt}, anime style, cartoon character, clean background, high quality, detailed, 2D animation style, character sheet"
|
|
|
|
| 490 |
|
| 491 |
+
# Optimize prompt for CLIP
|
| 492 |
+
prompt = self.optimize_prompt_for_clip(prompt, max_tokens=75)
|
| 493 |
+
print(f"📝 Character prompt: {prompt}")
|
|
|
|
| 494 |
|
| 495 |
+
# Generate with appropriate settings
|
| 496 |
+
if self.flux_available:
|
| 497 |
+
# FLUX generation settings
|
| 498 |
+
image = pipeline(
|
| 499 |
+
prompt=prompt,
|
| 500 |
+
width=1024,
|
| 501 |
+
height=1024,
|
| 502 |
+
num_inference_steps=25,
|
| 503 |
+
guidance_scale=7.5,
|
| 504 |
+
generator=torch.Generator(device=self.device).manual_seed(42)
|
| 505 |
+
).images[0]
|
| 506 |
+
else:
|
| 507 |
+
# Stable Diffusion generation settings
|
| 508 |
+
image = pipeline(
|
| 509 |
+
prompt=prompt,
|
| 510 |
+
width=512,
|
| 511 |
+
height=512,
|
| 512 |
+
num_inference_steps=30,
|
| 513 |
+
guidance_scale=7.5,
|
| 514 |
+
generator=torch.Generator(device=self.device).manual_seed(42)
|
| 515 |
+
).images[0]
|
| 516 |
+
# Upscale for SD
|
| 517 |
+
image = image.resize((1024, 1024), Image.Resampling.LANCZOS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
|
| 519 |
+
# Save character image
|
| 520 |
char_path = f"{self.output_dir}/char_{character['name'].replace(' ', '_')}.png"
|
| 521 |
image.save(char_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
|
| 523 |
+
# Verify file was created
|
| 524 |
+
if os.path.exists(char_path):
|
| 525 |
+
file_size = os.path.getsize(char_path)
|
| 526 |
+
character_images[character_name] = char_path
|
| 527 |
+
|
| 528 |
+
# Create download URL
|
| 529 |
+
download_info = self.create_download_url(char_path, f"character_{character['name']}")
|
| 530 |
+
print(f"📥 Generated character_{character['name']}: char_{character['name'].replace(' ', '_')}.png")
|
| 531 |
+
print(f" 📊 File size: {file_size / (1024*1024):.1f} MB")
|
| 532 |
+
print(f" 📁 Internal path: {char_path}")
|
| 533 |
+
print(download_info)
|
| 534 |
+
|
| 535 |
+
# Clear GPU memory after each generation
|
| 536 |
+
if self.device == "cuda":
|
| 537 |
+
torch.cuda.empty_cache()
|
| 538 |
+
gc.collect()
|
| 539 |
+
else:
|
| 540 |
+
print(f"❌ Failed to save character image: {char_path}")
|
| 541 |
|
| 542 |
except Exception as e:
|
| 543 |
+
print(f"❌ Error generating character {character_name}: {e}")
|
| 544 |
+
import traceback
|
| 545 |
+
traceback.print_exc()
|
| 546 |
+
# Continue with next character
|
| 547 |
+
continue
|
| 548 |
+
|
| 549 |
+
print(f"\n📊 Character generation summary:")
|
| 550 |
+
print(f" - Characters requested: {len(characters)}")
|
| 551 |
+
print(f" - Characters generated: {len(character_images)}")
|
| 552 |
+
print(f" - Success rate: {len(character_images)/len(characters)*100:.1f}%")
|
| 553 |
|
| 554 |
return character_images
|
| 555 |
|
| 556 |
@spaces.GPU
|
| 557 |
def generate_cinematic_backgrounds(self, scenes: List[Dict], color_palette: str) -> Dict[int, str]:
|
| 558 |
+
"""Generate professional cinematic backgrounds for each scene"""
|
|
|
|
| 559 |
background_images = {}
|
| 560 |
|
| 561 |
+
print(f"🎞️ Generating {len(scenes)} cinematic backgrounds...")
|
| 562 |
+
|
| 563 |
+
# Check if we have any image generation pipeline available
|
| 564 |
+
if not hasattr(self, 'flux_available'):
|
| 565 |
+
print("❌ No image generation models loaded")
|
| 566 |
+
return background_images
|
| 567 |
+
|
| 568 |
+
pipeline = None
|
| 569 |
+
if self.flux_available and hasattr(self, 'flux_pipe'):
|
| 570 |
+
pipeline = self.flux_pipe
|
| 571 |
+
model_name = "FLUX"
|
| 572 |
+
elif hasattr(self, 'sd_pipe'):
|
| 573 |
+
pipeline = self.sd_pipe
|
| 574 |
+
model_name = "Stable Diffusion"
|
| 575 |
+
else:
|
| 576 |
print("❌ No image generation pipeline available")
|
| 577 |
return background_images
|
| 578 |
+
|
| 579 |
+
print(f"🎨 Using {model_name} for background generation")
|
| 580 |
|
| 581 |
for scene in scenes:
|
| 582 |
+
scene_num = scene['scene_number']
|
| 583 |
+
print(f"\n🌄 Generating background for scene {scene_num}")
|
| 584 |
+
|
| 585 |
try:
|
| 586 |
+
# Build cinematic background prompt
|
| 587 |
+
background_desc = scene['background']
|
| 588 |
+
mood = scene.get('mood', 'neutral')
|
| 589 |
+
shot_type = scene.get('shot_type', 'medium shot')
|
| 590 |
+
lighting = scene.get('lighting', 'natural lighting')
|
| 591 |
|
| 592 |
+
base_prompt = f"Cinematic background scene, {background_desc}, {mood} atmosphere, {lighting}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
|
| 594 |
+
# Add style and quality modifiers
|
| 595 |
+
if self.flux_available:
|
| 596 |
+
prompt = f"{base_prompt}, Disney-Pixar animation style, detailed landscape, professional background art, vibrant colors, high quality, cinematic composition, no characters"
|
| 597 |
+
else:
|
| 598 |
+
prompt = f"{base_prompt}, anime style background, detailed landscape, high quality, cinematic, {color_palette} color palette, no people"
|
| 599 |
|
| 600 |
+
# Optimize for CLIP
|
| 601 |
+
prompt = self.optimize_prompt_for_clip(prompt, max_tokens=75)
|
| 602 |
+
print(f"📝 Background prompt: {prompt}")
|
|
|
|
| 603 |
|
| 604 |
+
# Generate with appropriate settings
|
| 605 |
+
if self.flux_available:
|
| 606 |
+
# FLUX generation settings
|
| 607 |
+
image = pipeline(
|
| 608 |
+
prompt=prompt,
|
| 609 |
+
width=1024,
|
| 610 |
+
height=768, # 4:3 aspect ratio for video
|
| 611 |
+
num_inference_steps=25,
|
| 612 |
+
guidance_scale=7.5,
|
| 613 |
+
generator=torch.Generator(device=self.device).manual_seed(scene_num * 10)
|
| 614 |
+
).images[0]
|
| 615 |
+
else:
|
| 616 |
+
# Stable Diffusion generation settings
|
| 617 |
+
image = pipeline(
|
| 618 |
+
prompt=prompt,
|
| 619 |
+
width=512,
|
| 620 |
+
height=384, # 4:3 aspect ratio
|
| 621 |
+
num_inference_steps=30,
|
| 622 |
+
guidance_scale=7.5,
|
| 623 |
+
generator=torch.Generator(device=self.device).manual_seed(scene_num * 10)
|
| 624 |
+
).images[0]
|
| 625 |
+
# Upscale for SD
|
| 626 |
+
image = image.resize((1024, 768), Image.Resampling.LANCZOS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
+
# Save background image
|
| 629 |
+
bg_path = f"{self.output_dir}/bg_scene_{scene_num}.png"
|
| 630 |
image.save(bg_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
|
| 632 |
+
# Verify file was created
|
| 633 |
+
if os.path.exists(bg_path):
|
| 634 |
+
file_size = os.path.getsize(bg_path)
|
| 635 |
+
background_images[scene_num] = bg_path
|
| 636 |
+
|
| 637 |
+
# Create download URL
|
| 638 |
+
download_info = self.create_download_url(bg_path, f"background_scene_{scene_num}")
|
| 639 |
+
print(f"📥 Generated background_scene_{scene_num}: bg_scene_{scene_num}.png")
|
| 640 |
+
print(f" 📊 File size: {file_size / (1024*1024):.1f} MB")
|
| 641 |
+
print(f" 📁 Internal path: {bg_path}")
|
| 642 |
+
print(download_info)
|
| 643 |
+
|
| 644 |
+
# Clear GPU memory after each generation
|
| 645 |
+
if self.device == "cuda":
|
| 646 |
+
torch.cuda.empty_cache()
|
| 647 |
+
gc.collect()
|
| 648 |
+
else:
|
| 649 |
+
print(f"❌ Failed to save background image: {bg_path}")
|
| 650 |
|
| 651 |
except Exception as e:
|
| 652 |
print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
|
| 653 |
+
import traceback
|
| 654 |
+
traceback.print_exc()
|
| 655 |
+
# Continue with next scene
|
| 656 |
+
continue
|
| 657 |
+
|
| 658 |
+
print(f"\n📊 Background generation summary:")
|
| 659 |
+
print(f" - Scenes requested: {len(scenes)}")
|
| 660 |
+
print(f" - Backgrounds generated: {len(background_images)}")
|
| 661 |
+
print(f" - Success rate: {len(background_images)/len(scenes)*100:.1f}%")
|
| 662 |
|
| 663 |
return background_images
|
| 664 |
|
|
|
|
| 667 |
try:
|
| 668 |
print("🎬 Setting up Open-Sora 2.0 for video generation...")
|
| 669 |
|
| 670 |
+
# Check available GPU memory
|
| 671 |
+
if torch.cuda.is_available():
|
| 672 |
+
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 673 |
+
print(f"🎮 Available GPU memory: {gpu_memory:.1f} GB")
|
| 674 |
+
if gpu_memory < 16:
|
| 675 |
+
print("⚠️ Warning: Open-Sora requires 16GB+ GPU memory for stable operation")
|
| 676 |
+
|
| 677 |
# Check if we're already in the right directory
|
| 678 |
current_dir = os.getcwd()
|
| 679 |
opensora_dir = os.path.join(current_dir, "Open-Sora")
|
|
|
|
| 681 |
# Clone Open-Sora repository if it doesn't exist
|
| 682 |
if not os.path.exists(opensora_dir):
|
| 683 |
print("📥 Cloning Open-Sora repository...")
|
| 684 |
+
try:
|
| 685 |
+
result = subprocess.run([
|
| 686 |
+
"git", "clone", "https://github.com/hpcaitech/Open-Sora.git"
|
| 687 |
+
], check=True, capture_output=True, text=True, timeout=120)
|
| 688 |
+
print("✅ Repository cloned successfully")
|
| 689 |
+
except subprocess.TimeoutExpired:
|
| 690 |
+
print("❌ Repository cloning timed out")
|
| 691 |
+
return False
|
| 692 |
+
except subprocess.CalledProcessError as e:
|
| 693 |
+
print(f"❌ Repository cloning failed: {e.stderr}")
|
| 694 |
+
return False
|
| 695 |
|
| 696 |
# Check if the repository was cloned successfully
|
| 697 |
if not os.path.exists(opensora_dir):
|
| 698 |
print("❌ Failed to clone Open-Sora repository")
|
| 699 |
return False
|
| 700 |
|
| 701 |
+
# Check for required scripts
|
| 702 |
+
script_path = os.path.join(opensora_dir, "scripts/diffusion/inference.py")
|
| 703 |
+
config_path = os.path.join(opensora_dir, "configs/diffusion/inference/t2i2v_256px.py")
|
| 704 |
+
|
| 705 |
+
print(f"📁 Checking for script: {script_path}")
|
| 706 |
+
print(f"📁 Checking for config: {config_path}")
|
| 707 |
+
|
| 708 |
+
if not os.path.exists(script_path):
|
| 709 |
+
print(f"❌ Required script not found: {script_path}")
|
| 710 |
+
# List available files for debugging
|
| 711 |
+
scripts_dir = os.path.join(opensora_dir, "scripts")
|
| 712 |
+
if os.path.exists(scripts_dir):
|
| 713 |
+
print(f"📁 Available in scripts/: {os.listdir(scripts_dir)}")
|
| 714 |
+
return False
|
| 715 |
+
|
| 716 |
+
if not os.path.exists(config_path):
|
| 717 |
+
print(f"❌ Required config not found: {config_path}")
|
| 718 |
+
# List available configs for debugging
|
| 719 |
+
configs_dir = os.path.join(opensora_dir, "configs")
|
| 720 |
+
if os.path.exists(configs_dir):
|
| 721 |
+
print(f"📁 Available in configs/: {os.listdir(configs_dir)}")
|
| 722 |
+
return False
|
| 723 |
+
|
| 724 |
# Check if model weights exist
|
| 725 |
ckpts_dir = os.path.join(opensora_dir, "ckpts")
|
| 726 |
if not os.path.exists(ckpts_dir):
|
| 727 |
print("📥 Downloading Open-Sora 2.0 model...")
|
| 728 |
try:
|
| 729 |
+
# Use smaller timeout and check if huggingface-cli is available
|
| 730 |
+
result = subprocess.run([
|
| 731 |
"huggingface-cli", "download", "hpcai-tech/Open-Sora-v2",
|
| 732 |
"--local-dir", ckpts_dir
|
| 733 |
+
], check=True, capture_output=True, text=True, timeout=300)
|
| 734 |
+
print("✅ Model downloaded successfully")
|
| 735 |
+
except subprocess.TimeoutExpired:
|
| 736 |
+
print("❌ Model download timed out (5 minutes)")
|
| 737 |
+
return False
|
| 738 |
+
except subprocess.CalledProcessError as e:
|
| 739 |
+
print(f"❌ Model download failed: {e.stderr}")
|
| 740 |
+
return False
|
| 741 |
+
except FileNotFoundError:
|
| 742 |
+
print("❌ huggingface-cli not found - cannot download model")
|
| 743 |
+
return False
|
| 744 |
+
else:
|
| 745 |
+
print("✅ Model weights already exist")
|
| 746 |
+
|
| 747 |
+
# Check dependencies
|
| 748 |
+
try:
|
| 749 |
+
import torch.distributed
|
| 750 |
+
print("✅ torch.distributed available")
|
| 751 |
+
except ImportError:
|
| 752 |
+
print("❌ torch.distributed not available")
|
| 753 |
+
return False
|
| 754 |
+
|
| 755 |
+
# Test if torchrun is available
|
| 756 |
+
try:
|
| 757 |
+
result = subprocess.run(["torchrun", "--help"],
|
| 758 |
+
capture_output=True, text=True, timeout=10)
|
| 759 |
+
if result.returncode == 0:
|
| 760 |
+
print("✅ torchrun available")
|
| 761 |
+
else:
|
| 762 |
+
print("❌ torchrun not working properly")
|
| 763 |
return False
|
| 764 |
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
| 765 |
+
print("❌ torchrun not found")
|
| 766 |
+
return False
|
| 767 |
|
| 768 |
print("✅ Open-Sora setup completed")
|
| 769 |
return True
|
| 770 |
|
| 771 |
except Exception as e:
|
| 772 |
print(f"❌ Open-Sora setup failed: {e}")
|
| 773 |
+
import traceback
|
| 774 |
+
traceback.print_exc()
|
| 775 |
return False
|
| 776 |
|
| 777 |
@spaces.GPU
|
|
|
|
| 797 |
if video_path:
|
| 798 |
print(f"✅ Open-Sora video generated for scene {scene_num}")
|
| 799 |
else:
|
| 800 |
+
print(f"❌ Open-Sora failed for scene {scene_num}, trying lightweight animation...")
|
| 801 |
+
video_path = self._create_lightweight_animated_video(scene, character_images, background_images)
|
| 802 |
+
if not video_path:
|
| 803 |
+
print(f"🔄 Lightweight animation failed, trying static video...")
|
| 804 |
+
video_path = self._create_professional_static_video(scene, background_images)
|
| 805 |
|
| 806 |
# If professional video fails, try simple video
|
| 807 |
if not video_path:
|
| 808 |
+
print(f"🔄 All methods failed, trying simple video for scene {scene_num}...")
|
| 809 |
video_path = self._create_simple_static_video(scene, background_images)
|
| 810 |
else:
|
| 811 |
+
print(f"🎬 Open-Sora not available, using lightweight animation for scene {scene_num}...")
|
| 812 |
+
# First try lightweight animation, then fallback to static
|
| 813 |
+
video_path = self._create_lightweight_animated_video(scene, character_images, background_images)
|
| 814 |
+
if not video_path:
|
| 815 |
+
print(f"🔄 Lightweight animation failed, using static video fallback...")
|
| 816 |
+
video_path = self._create_professional_static_video(scene, background_images)
|
| 817 |
|
| 818 |
if video_path and os.path.exists(video_path):
|
| 819 |
scene_videos.append(video_path)
|
|
|
|
| 861 |
|
| 862 |
# Use the optimization function to ensure CLIP compatibility
|
| 863 |
prompt = self.optimize_prompt_for_clip(prompt)
|
| 864 |
+
print(f"🎬 Open-Sora prompt: {prompt}")
|
| 865 |
|
| 866 |
video_path = f"{self.output_dir}/video_scene_{scene['scene_number']}.mp4"
|
| 867 |
|
|
|
|
| 873 |
print("❌ Open-Sora directory not found")
|
| 874 |
return None
|
| 875 |
|
| 876 |
+
# Check for required files
|
| 877 |
+
script_path = os.path.join(opensora_dir, "scripts/diffusion/inference.py")
|
| 878 |
+
config_path = os.path.join(opensora_dir, "configs/diffusion/inference/t2i2v_256px.py")
|
| 879 |
+
|
| 880 |
+
if not os.path.exists(script_path):
|
| 881 |
+
print(f"❌ Open-Sora script not found: {script_path}")
|
| 882 |
+
return None
|
| 883 |
+
|
| 884 |
+
if not os.path.exists(config_path):
|
| 885 |
+
print(f"❌ Open-Sora config not found: {config_path}")
|
| 886 |
+
return None
|
| 887 |
+
|
| 888 |
# Run Open-Sora inference
|
| 889 |
cmd = [
|
| 890 |
"torchrun", "--nproc_per_node", "1", "--standalone",
|
|
|
|
| 897 |
"--motion-score", "6" # High motion for dynamic scenes
|
| 898 |
]
|
| 899 |
|
| 900 |
+
print(f"🎬 Running Open-Sora command: {' '.join(cmd)}")
|
| 901 |
+
result = subprocess.run(cmd, capture_output=True, text=True, cwd=opensora_dir, timeout=300)
|
| 902 |
+
|
| 903 |
+
print(f"🎬 Open-Sora return code: {result.returncode}")
|
| 904 |
+
if result.stdout:
|
| 905 |
+
print(f"🎬 Open-Sora stdout: {result.stdout}")
|
| 906 |
+
if result.stderr:
|
| 907 |
+
print(f"❌ Open-Sora stderr: {result.stderr}")
|
| 908 |
|
| 909 |
if result.returncode == 0:
|
| 910 |
# Find generated video file
|
|
|
|
| 912 |
if file.endswith('.mp4') and 'scene' not in file:
|
| 913 |
src_path = os.path.join(self.output_dir, file)
|
| 914 |
os.rename(src_path, video_path)
|
| 915 |
+
print(f"✅ Open-Sora video generated: {video_path}")
|
| 916 |
return video_path
|
| 917 |
+
|
| 918 |
+
print("❌ Open-Sora completed but no video file found")
|
| 919 |
+
return None
|
| 920 |
+
else:
|
| 921 |
+
print(f"❌ Open-Sora failed with return code: {result.returncode}")
|
| 922 |
+
return None
|
| 923 |
|
| 924 |
+
except subprocess.TimeoutExpired:
|
| 925 |
+
print("❌ Open-Sora generation timed out (5 minutes)")
|
| 926 |
return None
|
|
|
|
| 927 |
except Exception as e:
|
| 928 |
print(f"❌ Open-Sora generation failed: {e}")
|
| 929 |
+
import traceback
|
| 930 |
+
traceback.print_exc()
|
| 931 |
return None
|
| 932 |
|
| 933 |
def _create_professional_static_video(self, scene: Dict, background_images: Dict) -> str:
|
|
|
|
| 1312 |
}
|
| 1313 |
return None, error_info, f"❌ Generation failed: {str(e)}", [], []
|
| 1314 |
|
| 1315 |
+
def _create_lightweight_animated_video(self, scene: Dict, character_images: Dict, background_images: Dict) -> str:
|
| 1316 |
+
"""Create lightweight animated video with character/background compositing"""
|
| 1317 |
+
scene_num = scene['scene_number']
|
| 1318 |
+
|
| 1319 |
+
if scene_num not in background_images:
|
| 1320 |
+
print(f"❌ No background image for scene {scene_num}")
|
| 1321 |
+
return None
|
| 1322 |
+
|
| 1323 |
+
video_path = f"{self.output_dir}/video_animated_scene_{scene_num}.mp4"
|
| 1324 |
+
|
| 1325 |
+
try:
|
| 1326 |
+
print(f"🎬 Creating lightweight animated video for scene {scene_num}...")
|
| 1327 |
+
|
| 1328 |
+
# Load background image
|
| 1329 |
+
bg_path = background_images[scene_num]
|
| 1330 |
+
print(f"📁 Loading background from: {bg_path}")
|
| 1331 |
+
|
| 1332 |
+
if not os.path.exists(bg_path):
|
| 1333 |
+
print(f"❌ Background file not found: {bg_path}")
|
| 1334 |
+
return None
|
| 1335 |
+
|
| 1336 |
+
bg_image = Image.open(bg_path).resize((1024, 768))
|
| 1337 |
+
bg_array = np.array(bg_image)
|
| 1338 |
+
bg_array = cv2.cvtColor(bg_array, cv2.COLOR_RGB2BGR)
|
| 1339 |
+
|
| 1340 |
+
# Try to load character images for this scene
|
| 1341 |
+
scene_characters = scene.get('characters_present', [])
|
| 1342 |
+
character_overlays = []
|
| 1343 |
+
|
| 1344 |
+
for char_name in scene_characters:
|
| 1345 |
+
for char_key, char_path in character_images.items():
|
| 1346 |
+
if char_name.lower() in char_key.lower():
|
| 1347 |
+
if os.path.exists(char_path):
|
| 1348 |
+
char_img = Image.open(char_path).convert("RGBA")
|
| 1349 |
+
# Resize character to reasonable size (25% of background)
|
| 1350 |
+
char_w, char_h = char_img.size
|
| 1351 |
+
new_h = int(768 * 0.25) # 25% of background height
|
| 1352 |
+
new_w = int(char_w * (new_h / char_h))
|
| 1353 |
+
char_img = char_img.resize((new_w, new_h))
|
| 1354 |
+
character_overlays.append({
|
| 1355 |
+
'image': np.array(char_img),
|
| 1356 |
+
'name': char_name,
|
| 1357 |
+
'original_pos': (100 + len(character_overlays) * 200, 768 - new_h - 50) # Bottom positioning
|
| 1358 |
+
})
|
| 1359 |
+
print(f"✅ Loaded character: {char_name}")
|
| 1360 |
+
break
|
| 1361 |
+
|
| 1362 |
+
print(f"📐 Background size: {bg_array.shape}")
|
| 1363 |
+
print(f"🎭 Characters loaded: {len(character_overlays)}")
|
| 1364 |
+
|
| 1365 |
+
# Professional video settings
|
| 1366 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 1367 |
+
fps = 24 # Cinematic frame rate
|
| 1368 |
+
duration = int(scene.get('duration', 35))
|
| 1369 |
+
total_frames = duration * fps
|
| 1370 |
+
|
| 1371 |
+
print(f"🎬 Video settings: {fps}fps, {duration}s duration, {total_frames} frames")
|
| 1372 |
+
|
| 1373 |
+
out = cv2.VideoWriter(video_path, fourcc, fps, (1024, 768))
|
| 1374 |
+
|
| 1375 |
+
if not out.isOpened():
|
| 1376 |
+
print(f"❌ Failed to open video writer for {video_path}")
|
| 1377 |
+
return None
|
| 1378 |
+
|
| 1379 |
+
# Advanced animation with character movement
|
| 1380 |
+
print(f"🎬 Generating {total_frames} animated frames...")
|
| 1381 |
+
|
| 1382 |
+
for i in range(total_frames):
|
| 1383 |
+
if i % 100 == 0: # Progress update every 100 frames
|
| 1384 |
+
print(f" Frame {i}/{total_frames} ({i/total_frames*100:.1f}%)")
|
| 1385 |
+
|
| 1386 |
+
frame = bg_array.copy()
|
| 1387 |
+
progress = i / total_frames
|
| 1388 |
+
|
| 1389 |
+
# Apply cinematic background effects
|
| 1390 |
+
frame = self._apply_cinematic_effects(frame, scene, progress)
|
| 1391 |
+
|
| 1392 |
+
# Animate characters if available
|
| 1393 |
+
for j, char_data in enumerate(character_overlays):
|
| 1394 |
+
char_img = char_data['image']
|
| 1395 |
+
char_name = char_data['name']
|
| 1396 |
+
base_x, base_y = char_data['original_pos']
|
| 1397 |
+
|
| 1398 |
+
# Different animation patterns based on scene mood
|
| 1399 |
+
mood = scene.get('mood', 'heartwarming')
|
| 1400 |
+
|
| 1401 |
+
if mood == 'exciting':
|
| 1402 |
+
# Bouncing animation
|
| 1403 |
+
offset_y = int(np.sin(progress * 8 * np.pi + j * np.pi/2) * 20)
|
| 1404 |
+
offset_x = int(np.sin(progress * 4 * np.pi + j * np.pi/3) * 15)
|
| 1405 |
+
elif mood == 'peaceful':
|
| 1406 |
+
# Gentle swaying
|
| 1407 |
+
offset_y = int(np.sin(progress * 2 * np.pi + j * np.pi/2) * 8)
|
| 1408 |
+
offset_x = int(np.sin(progress * 1.5 * np.pi + j * np.pi/3) * 12)
|
| 1409 |
+
elif mood == 'mysterious':
|
| 1410 |
+
# Subtle floating
|
| 1411 |
+
offset_y = int(np.sin(progress * 3 * np.pi + j * np.pi/2) * 15)
|
| 1412 |
+
offset_x = int(np.cos(progress * 2 * np.pi + j * np.pi/4) * 10)
|
| 1413 |
+
else:
|
| 1414 |
+
# Default: slight breathing animation
|
| 1415 |
+
scale_factor = 1.0 + np.sin(progress * 4 * np.pi + j * np.pi/2) * 0.02
|
| 1416 |
+
offset_y = int(np.sin(progress * 3 * np.pi + j * np.pi/2) * 5)
|
| 1417 |
+
offset_x = 0
|
| 1418 |
+
|
| 1419 |
+
# Calculate final position
|
| 1420 |
+
final_x = base_x + offset_x
|
| 1421 |
+
final_y = base_y + offset_y
|
| 1422 |
+
|
| 1423 |
+
# Overlay character on frame
|
| 1424 |
+
if char_img.shape[2] == 4: # Has alpha channel
|
| 1425 |
+
frame = self._overlay_character(frame, char_img, final_x, final_y)
|
| 1426 |
+
else:
|
| 1427 |
+
# Simple overlay without alpha
|
| 1428 |
+
char_rgb = cv2.cvtColor(char_img[:,:,:3], cv2.COLOR_RGB2BGR)
|
| 1429 |
+
h, w = char_rgb.shape[:2]
|
| 1430 |
+
if (final_y >= 0 and final_y + h < 768 and
|
| 1431 |
+
final_x >= 0 and final_x + w < 1024):
|
| 1432 |
+
frame[final_y:final_y+h, final_x:final_x+w] = char_rgb
|
| 1433 |
+
|
| 1434 |
+
out.write(frame)
|
| 1435 |
+
|
| 1436 |
+
print(f"✅ All {total_frames} animated frames generated")
|
| 1437 |
+
|
| 1438 |
+
out.release()
|
| 1439 |
+
|
| 1440 |
+
if os.path.exists(video_path):
|
| 1441 |
+
file_size = os.path.getsize(video_path)
|
| 1442 |
+
print(f"✅ Lightweight animated video created: {video_path} ({file_size / (1024*1024):.1f} MB)")
|
| 1443 |
+
return video_path
|
| 1444 |
+
else:
|
| 1445 |
+
print(f"❌ Video file not created: {video_path}")
|
| 1446 |
+
return None
|
| 1447 |
+
|
| 1448 |
+
except Exception as e:
|
| 1449 |
+
print(f"❌ Lightweight animated video creation failed for scene {scene_num}: {e}")
|
| 1450 |
+
import traceback
|
| 1451 |
+
traceback.print_exc()
|
| 1452 |
+
return None
|
| 1453 |
+
|
| 1454 |
+
def _overlay_character(self, background, character_rgba, x, y):
|
| 1455 |
+
"""Overlay character with alpha transparency on background"""
|
| 1456 |
+
try:
|
| 1457 |
+
char_h, char_w = character_rgba.shape[:2]
|
| 1458 |
+
bg_h, bg_w = background.shape[:2]
|
| 1459 |
+
|
| 1460 |
+
# Ensure the character fits within background bounds
|
| 1461 |
+
if x < 0 or y < 0 or x + char_w > bg_w or y + char_h > bg_h:
|
| 1462 |
+
return background
|
| 1463 |
+
|
| 1464 |
+
# Extract RGB and alpha channels
|
| 1465 |
+
char_rgb = character_rgba[:, :, :3]
|
| 1466 |
+
char_alpha = character_rgba[:, :, 3] / 255.0
|
| 1467 |
+
|
| 1468 |
+
# Convert character to BGR for OpenCV
|
| 1469 |
+
char_bgr = cv2.cvtColor(char_rgb, cv2.COLOR_RGB2BGR)
|
| 1470 |
+
|
| 1471 |
+
# Get the region of interest from background
|
| 1472 |
+
roi = background[y:y+char_h, x:x+char_w]
|
| 1473 |
+
|
| 1474 |
+
# Blend character with background using alpha
|
| 1475 |
+
for c in range(3):
|
| 1476 |
+
roi[:, :, c] = (char_alpha * char_bgr[:, :, c] +
|
| 1477 |
+
(1 - char_alpha) * roi[:, :, c])
|
| 1478 |
+
|
| 1479 |
+
background[y:y+char_h, x:x+char_w] = roi
|
| 1480 |
+
return background
|
| 1481 |
+
|
| 1482 |
+
except Exception as e:
|
| 1483 |
+
print(f"⚠️ Character overlay failed: {e}")
|
| 1484 |
+
return background
|
| 1485 |
+
|
| 1486 |
# Initialize professional generator
|
| 1487 |
generator = ProfessionalCartoonFilmGenerator()
|
| 1488 |
|