| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
| import time |
| import logging |
| import yaml |
| import torch |
| import numpy as np |
| from PIL import Image, ImageOps |
|
|
| |
| from typing import List, Dict, Any, Callable, Optional |
| |
|
|
| |
| from .deformes2D_thinker import deformes2d_thinker_singleton |
| from ..types import LatentConditioningItem |
| from ..managers.ltx_manager import ltx_manager_singleton |
| from ..managers.vae_manager import vae_manager_singleton |
| from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| ProgressCallback = Optional[Callable[[float, str], None]] |
|
|
| class Deformes3DEngine: |
| """ |
| Especialista ADUC para a geração de imagens estáticas (keyframes). |
| """ |
| def __init__(self): |
| """O construtor é leve e não recebe argumentos.""" |
| self.workspace_dir: Optional[str] = None |
| logger.info("Deformes3DEngine instanciado (não inicializado).") |
|
|
| def initialize(self, workspace_dir: str): |
| """Inicializa o engenheiro com as configurações necessárias.""" |
| if self.workspace_dir is not None: |
| return |
| self.workspace_dir = workspace_dir |
| logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.") |
|
|
| def generate_keyframes_from_storyboard( |
| self, |
| generation_state: Dict[str, Any], |
| progress_callback: ProgressCallback = None |
| ) -> List[Dict[str, Any]]: |
| """ |
| Orquestra a geração de todos os keyframes com base no estado de geração completo. |
| Retorna uma lista de dicionários com dados detalhados de cada keyframe. |
| """ |
| if not self.workspace_dir: |
| raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.") |
| |
| |
| params = generation_state.get("parametros_geracao", {}).get("pre_producao", {}) |
| storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])] |
| global_prompt = generation_state.get("Promt_geral", "") |
| general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])] |
| |
| keyframe_resolution = params.get('resolution', 480) |
| initial_ref_path = general_ref_paths[0] if general_ref_paths else None |
|
|
| if not initial_ref_path: |
| raise ValueError("Não há imagem de referência inicial para começar a geração de keyframes.") |
|
|
| current_base_image_path = initial_ref_path |
| previous_prompt = "N/A (imagem de referência inicial)" |
| all_keyframes_data: List[Dict[str, Any]] = [] |
| width, height = keyframe_resolution, keyframe_resolution |
| target_resolution_tuple = (width, height) |
| |
| num_keyframes_to_generate = len(storyboard) |
| if num_keyframes_to_generate <= 0: |
| logger.warning("Storyboard vazio. Nenhum keyframe a ser gerado.") |
| return [] |
|
|
| logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).") |
|
|
| for i in range(num_keyframes_to_generate): |
| scene_index = i + 1 |
| current_scene = storyboard[i] |
| future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final." |
| |
| logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...") |
| |
| img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( |
| global_prompt=global_prompt, |
| scene_history=previous_prompt, |
| current_scene_desc=current_scene, |
| future_scene_desc=future_scene, |
| last_image_path=current_base_image_path, |
| fixed_ref_paths=general_ref_paths |
| ) |
| |
| context_paths = [current_base_image_path] + [p for p in general_ref_paths if p != current_base_image_path][:3] |
| ltx_context_paths = list(reversed(context_paths)) |
| |
| ltx_conditioning_items = [] |
| weight = 0.6 |
| for idx, path in enumerate(ltx_context_paths): |
| img_pil = Image.open(path).convert("RGB") |
| img_processed = self._preprocess_image_for_latent_conversion(img_pil, target_resolution_tuple) |
| pixel_tensor = self._pil_to_pixel_tensor(img_processed) |
| latent_tensor = vae_manager_singleton.encode(pixel_tensor) |
| ltx_conditioning_items.append(LatentConditioningItem(latent_tensor, 0, weight)) |
| if idx >= 0: weight -= 0.1 |
| |
| ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} |
| generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( |
| height=height, width=width, |
| conditioning_items_data=ltx_conditioning_items, |
| motion_prompt=img_prompt, |
| video_total_frames=48, video_fps=24, |
| **ltx_base_params |
| ) |
|
|
| final_latent = generated_latents[:, :, -1:, :, :] |
| upscaled_latent = latent_enhancer_specialist_singleton.upscale(final_latent) |
| enriched_pixel_tensor = vae_manager_singleton.decode(upscaled_latent) |
|
|
| pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png") |
| latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt") |
| self.save_image_from_tensor(enriched_pixel_tensor, pixel_path) |
| torch.save(final_latent.cpu(), latent_path) |
|
|
| keyframe_data = { |
| "id": scene_index, |
| "caminho_pixel": pixel_path, |
| "caminho_latent": latent_path, |
| "prompt_keyframe": img_prompt |
| } |
| all_keyframes_data.append(keyframe_data) |
| |
| if progress_callback: |
| progress_fraction = 0.2 + ((scene_index / num_keyframes_to_generate) * 0.8) |
| progress_callback(progress_fraction, f"Keyframe {scene_index}/{num_keyframes_to_generate} gerado.") |
| |
| current_base_image_path = pixel_path |
| previous_prompt = img_prompt |
|
|
| logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.") |
| return all_keyframes_data |
|
|
| |
|
|
| def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: |
| if image.size != target_resolution: |
| return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) |
| return image |
| |
| def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: |
| image_np = np.array(pil_image).astype(np.float32) / 255.0 |
| tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) |
| return (tensor * 2.0) - 1.0 |
|
|
| def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): |
| tensor_chw = pixel_tensor.squeeze(0).squeeze(1) |
| tensor_hwc = tensor_chw.permute(1, 2, 0) |
| tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 |
| image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) |
| Image.fromarray(image_np).save(path) |
|
|
| |
| |
| deformes3d_engine_singleton = Deformes3DEngine() |