Spaces:

Onise
/

anycoder-8d7d31b2

Running

App Files Files Community

anycoder-8d7d31b2 / model.py

Onise

Update model.py from anycoder

e46a321 verified 18 days ago

raw

history blame contribute delete

8.25 kB

	import torch
	import os
	import gc
	import time
	from typing import Optional, Callable, Any
	from pathlib import Path
	import numpy as np
	from PIL import Image
	import safetensors.torch

	# Configuration
	MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers" # Base model
	LORA_CACHE_DIR = "/tmp/lora_cache"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32

	# Ensure LoRA cache directory exists
	os.makedirs(LORA_CACHE_DIR, exist_ok=True)

	# Predefined LoRA configurations
	AVAILABLE_LORAS = {
	"wan-fast-lora": {
	"repo": "Kijai/Wan2.1-fp8-diffusers", # FP8 quantized for speed
	"filename": "wan2.1_fast_lora.safetensors",
	"description": "Optimized for 2-3x faster generation",
	"trigger_words": []
	},
	"wan-quality-lora": {
	"repo": "Kijai/Wan2.1-fp8-diffusers",
	"filename": "wan2.1_quality_lora.safetensors",
	"description": "Enhanced visual quality",
	"trigger_words": ["high quality", "detailed"]
	},
	"wan-motion-lora": {
	"repo": "Kijai/Wan2.1-fp8-diffusers",
	"filename": "wan2.1_motion_lora.safetensors",
	"description": "Better motion dynamics",
	"trigger_words": ["smooth motion", "dynamic"]
	}
	}


	def get_available_loras() -> list:
	"""Get list of available LoRAs."""
	return list(AVAILABLE_LORAS.keys())


	class WanVideoGenerator:
	"""Wan2.2-TI2V-5B Video Generator with LoRA support."""

	def __init__(self):
	self.pipeline = None
	self.current_lora = None
	self.lora_scale = 0.0
	self._load_model()

	def _load_model(self):
	"""Load the base model with optimizations."""
	from diffusers import WanPipeline, WanTransformer3DModel
	from diffusers.schedulers import UniPCMultistepScheduler
	from transformers import AutoTokenizer, T5EncoderModel

	print(f"Loading Wan2.2-TI2V-5B model on {DEVICE}...")

	# Load transformer with memory optimizations
	transformer = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer",
	torch_dtype=DTYPE,
	use_safetensors=True,
	)

	# Load text encoder
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_ID,
	subfolder="tokenizer",
	)
	text_encoder = T5EncoderModel.from_pretrained(
	MODEL_ID,
	subfolder="text_encoder",
	torch_dtype=DTYPE,
	)

	# Create pipeline
	self.pipeline = WanPipeline.from_pretrained(
	MODEL_ID,
	transformer=transformer,
	text_encoder=text_encoder,
	tokenizer=tokenizer,
	torch_dtype=DTYPE,
	)

	# Enable memory optimizations
	if DEVICE == "cuda":
	self.pipeline.enable_model_cpu_offload()
	# Enable attention slicing for lower memory
	self.pipeline.enable_attention_slicing()

	# Use efficient scheduler
	self.pipeline.scheduler = UniPCMultistepScheduler.from_config(
	self.pipeline.scheduler.config
	)

	print("Model loaded successfully!")

	def load_lora(self, lora_name: str, scale: float = 0.8):
	"""Load a LoRA adapter on demand."""
	if lora_name not in AVAILABLE_LORAS:
	raise ValueError(f"Unknown LoRA: {lora_name}")

	if self.current_lora == lora_name and abs(self.lora_scale - scale) < 0.01:
	print(f"LoRA {lora_name} already loaded with scale {scale}")
	return

	# Unload previous LoRA
	if self.current_lora:
	self.unload_lora()

	lora_config = AVAILABLE_LORAS[lora_name]
	lora_path = self._download_lora(lora_config)

	print(f"Loading LoRA: {lora_name} with scale {scale}...")

	# Load LoRA weights
	self.pipeline.load_lora_weights(
	lora_path,
	adapter_name=lora_name,
	)

	# Set LoRA scale
	self.pipeline.set_adapters([lora_name], adapter_weights=[scale])

	self.current_lora = lora_name
	self.lora_scale = scale
	print(f"LoRA {lora_name} loaded successfully!")

	def _download_lora(self, lora_config: dict) -> str:
	"""Download LoRA weights if not cached."""
	from huggingface_hub import hf_hub_download

	lora_path = os.path.join(LORA_CACHE_DIR, lora_config["filename"])

	if not os.path.exists(lora_path):
	print(f"Downloading LoRA: {lora_config['filename']}...")
	lora_path = hf_hub_download(
	repo_id=lora_config["repo"],
	filename=lora_config["filename"],
	local_dir=LORA_CACHE_DIR,
	)

	return lora_path

	def unload_lora(self):
	"""Unload current LoRA adapter."""
	if self.current_lora and self.pipeline:
	try:
	self.pipeline.disable_lora()
	self.pipeline.unload_lora_weights()
	print(f"Unloaded LoRA: {self.current_lora}")
	except Exception as e:
	print(f"Warning: Could not unload LoRA: {e}")
	finally:
	self.current_lora = None
	self.lora_scale = 0.0

	@torch.inference_mode()
	def generate(
	self,
	prompt: str,
	negative_prompt: str = "",
	image: Optional[Image.Image] = None,
	height: int = 480,
	width: int = 848,
	num_frames: int = 25,
	guidance_scale: float = 5.0,
	num_inference_steps: int = 20,
	fps: int = 16,
	seed: Optional[int] = None,
	progress_callback: Optional[Callable[[float], None]] = None,
	) -> str:
	"""Generate video from text or image prompt."""

	# Set seed
	generator = None
	if seed is not None:
	generator = torch.Generator(device=DEVICE).manual_seed(seed)

	# Prepare kwargs
	kwargs = {
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"height": height,
	"width": width,
	"num_frames": num_frames,
	"guidance_scale": guidance_scale,
	"num_inference_steps": num_inference_steps,
	"generator": generator,
	"output_type": "pil",
	}

	# Add image for TI2V
	if image is not None:
	kwargs["image"] = image

	# Generate with progress tracking
	start_time = time.time()

	# Callback for progress
	def callback_on_step_end(pipeline, i, t, callback_kwargs):
	if progress_callback:
	progress = (i + 1) / num_inference_steps
	progress_callback(progress)
	return callback_kwargs

	kwargs["callback_on_step_end"] = callback_on_step_end

	# Generate frames
	output = self.pipeline(**kwargs)

	frames = output.frames[0]

	# Save video
	output_path = f"/tmp/output_{int(time.time())}.mp4"
	self._save_video(frames, output_path, fps)

	elapsed = time.time() - start_time
	print(f"Generation completed in {elapsed:.2f}s")

	return output_path

	def _save_video(self, frames: list, output_path: str, fps: int):
	"""Save frames as video file."""
	import imageio

	# Convert PIL images to numpy arrays
	frames_np = [np.array(frame) for frame in frames]

	# Write video
	with imageio.get_writer(output_path, fps=fps, codec='libx264', quality=8) as writer:
	for frame in frames_np:
	writer.append_data(frame)

	print(f"Video saved to: {output_path}")


	# Singleton instance
	_generator_instance = None

	def get_generator() -> WanVideoGenerator:
	"""Get or create the generator instance."""
	global _generator_instance
	if _generator_instance is None:
	_generator_instance = WanVideoGenerator()
	return _generator_instance