| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
| import time |
| import logging |
| import yaml |
| import torch |
| import numpy as np |
| from PIL import Image, ImageOps |
|
|
| |
| from typing import List, Dict, Any, Callable, Optional |
| |
|
|
| |
| from .deformes2D_thinker import deformes2d_thinker_singleton |
| from ..types import LatentConditioningItem |
| from ..managers.ltx_manager import ltx_manager_singleton |
| from ..managers.vae_manager import vae_manager_singleton |
| from ..managers.latent_enhancer_manager import latent_enhancer_specialist_singleton |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| ProgressCallback = Optional[Callable[[float, str], None]] |
|
|
| class Deformes3DEngine: |
| """ |
| Especialista ADUC para a geração de imagens estáticas (keyframes). |
| """ |
| def __init__(self): |
| """O construtor é leve e não recebe argumentos.""" |
| self.workspace_dir: Optional[str] = None |
| logger.info("Deformes3DEngine instanciado (não inicializado).") |
|
|
| def initialize(self, workspace_dir: str): |
| """Inicializa o engenheiro com as configurações necessárias.""" |
| if self.workspace_dir is not None: |
| return |
| self.workspace_dir = workspace_dir |
| logger.info(f"3D Engine (Image Specialist) inicializado com workspace: {self.workspace_dir}.") |
|
|
| def generate_keyframes_from_storyboard( |
| self, |
| generation_state: Dict[str, Any], |
| progress_callback: ProgressCallback = None |
| ) -> List[Dict[str, Any]]: |
| """ |
| Orquestra a geração de todos os keyframes com base no estado de geração completo. |
| Retorna uma lista de dicionários com dados detalhados de cada keyframe. |
| """ |
| if not self.workspace_dir: |
| raise RuntimeError("Deformes3DEngine não foi inicializado. Chame o método initialize() antes de usar.") |
| |
| |
| params = generation_state.get("parametros_geracao", {}).get("pre_producao", {}) |
| storyboard = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])] |
| global_prompt = generation_state.get("Promt_geral", "") |
| general_ref_paths = [media["caminho"] for media in generation_state.get("midias_referencia", [])] |
| |
| keyframe_resolution = params.get('resolution', 480) |
| initial_ref_path = general_ref_paths[0] |
| |
| previous_prompt = "" |
| all_keyframes_data: List[Dict[str, Any]] = [] |
| width, height = keyframe_resolution, keyframe_resolution |
| target_resolution_tuple = (width, height) |
| |
| |
|
|
| logger.info(f"IMAGE SPECIALIST: Ordem para gerar {num_keyframes_to_generate} keyframes (versões LTX).") |
| ltx_conditioning_items0 = [] |
|
|
| |
| img_pil0 = Image.open(initial_ref_path).convert("RGB") |
| img_processed0 = self._preprocess_image_for_latent_conversion(img_pil0, target_resolution_tuple) |
| pixel_tensor0 = self._pil_to_pixel_tensor(img_processed0) |
|
|
| ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 0, 0.05)) |
| ltx_conditioning_items0.append(LatentConditioningItem(pixel_tensor0, 23, 0.05)) |
|
|
| latent_tensorY = pixel_tensor0 |
| latent_tensorX = latent_tensorY |
| |
|
|
| current_base_image_path = initial_ref_path |
| past_base_image_path = initial_ref_path |
|
|
| |
| for i in range(num_keyframes_to_generate): |
| ltx_conditioning_items = ltx_conditioning_items0 |
| scene_index = i + 1 |
| |
| current_scene = storyboard[i] |
| future_scene = storyboard[i + 1] if (i + 1) < len(storyboard) else "A cena final." |
| logger.info(f"--> Gerando Keyframe {scene_index}/{num_keyframes_to_generate}...") |
| |
| img_prompt = deformes2d_thinker_singleton.get_anticipatory_keyframe_prompt( |
| global_prompt=global_prompt, |
| scene_history=previous_prompt, |
| current_scene_desc=current_scene, |
| future_scene_desc=future_scene, |
| last_image_path=past_base_image_path, |
| fixed_ref_paths=current_base_image_path |
| ) |
|
|
| past_base_image_path = current_base_image_path |
|
|
| |
|
|
| ltx_conditioning_items = ltx_conditioning_items0 |
| ltx_conditioning_items.append(LatentConditioningItem(latent_tensorX, 0, 0.4)) |
| ltx_conditioning_items.append(LatentConditioningItem(latent_tensorY, 8, 0.6)) |
|
|
| latent_tensorX = latent_tensorY |
| |
| ltx_base_params = {"guidance_scale": 1.0, "stg_scale": 0.001, "num_inference_steps": 25} |
| generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( |
| height=height, width=width, |
| conditioning_items_data=ltx_conditioning_items, |
| motion_prompt=img_prompt, |
| video_total_frames=24, video_fps=24, |
| **ltx_base_params |
| ) |
| |
| final_latent = generated_latents[:, :, -1:, :, :] |
| |
| enriched_pixel_tensor = vae_manager_singleton.decode(final_latent) |
|
|
| pixel_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_pixel.png") |
| latent_path = os.path.join(self.workspace_dir, f"keyframe_{scene_index:04d}_latent.pt") |
| self.save_image_from_tensor(enriched_pixel_tensor, pixel_path) |
| torch.save(final_latent.cpu(), latent_path) |
|
|
| latent_tensorY = latent_path |
| past_base_image_path = current_base_image_path |
| |
| keyframe_data = { |
| "id": scene_index, |
| "caminho_pixel": pixel_path, |
| "caminho_latent": latent_path, |
| "prompt_keyframe": img_prompt |
| } |
| |
| all_keyframes_data.append(keyframe_data) |
| |
| |
| current_base_image_path = pixel_path |
| previous_prompt = img_prompt |
|
|
| logger.info("IMAGE SPECIALIST: Geração de todos os dados de keyframes completa.") |
| return all_keyframes_data |
|
|
| |
|
|
| def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: |
| if image.size != target_resolution: |
| return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) |
| return image |
| |
| def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: |
| image_np = np.array(pil_image).astype(np.float32) / 255.0 |
| tensor = torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) |
| return (tensor * 2.0) - 1.0 |
|
|
| def save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): |
| tensor_chw = pixel_tensor.squeeze(0).squeeze(1) |
| tensor_hwc = tensor_chw.permute(1, 2, 0) |
| tensor_hwc = (tensor_hwc.clamp(-1, 1) + 1) / 2.0 |
| image_np = (tensor_hwc.cpu().float().numpy() * 255).astype(np.uint8) |
| Image.fromarray(image_np).save(path) |
|
|
| |
| |
| deformes3d_engine_singleton = Deformes3DEngine() |