Reward-Forcing

Paused

App Files Files Community

fffiloni commited on Dec 31, 2025

Commit

5a46abc

verified ·

1 Parent(s): e3f74ba

back to previous app steady

Browse files

Files changed (1) hide show

app_wip.py +53 -78

app_wip.py CHANGED Viewed

@@ -51,70 +51,6 @@ OUTPUT_ROOT = "videos"
 os.makedirs(PROMPT_DIR, exist_ok=True)
 os.makedirs(OUTPUT_ROOT, exist_ok=True)
-# === Globals pour le cache du modèle ===
-DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-PIPELINE = None
-LOW_MEMORY = None
-CHECKPOINT_STEP = None
-def load_pipeline(progress: gr.Progress):
-    """
-    Charge la config + pipeline + checkpoint + placement device une seule fois.
-    Utilise progress.tqdm pour afficher plusieurs étapes la 1ère fois.
-    """
-    global PIPELINE, LOW_MEMORY, CHECKPOINT_STEP
-    logs = ""
-    # Si déjà chargé, on ne refait rien de lourd
-    if PIPELINE is not None:
-        progress(0.1, desc="Modèle déjà initialisé (cache)")
-        logs += "Modèle déjà initialisé, réutilisation du cache.\n"
-        return PIPELINE, LOW_MEMORY, CHECKPOINT_STEP, logs
-    # ---- Première initialisation lourde ----
-    set_seed(0)
-    free_vram = get_cuda_free_memory_gb(DEVICE)
-    LOW_MEMORY = free_vram < 40
-    logs += f"Free VRAM {free_vram} GB\n"
-    steps = range(4)
-    for step in progress.tqdm(steps, desc="Initialisation du modèle", unit="étape"):
-        if step == 0:
-            logs += "Étape 1/4 : Chargement de la config...\n"
-            config = OmegaConf.load(CONFIG_PATH)
-            default_config = OmegaConf.load("configs/default_config.yaml")
-            config = OmegaConf.merge(default_config, config)
-        elif step == 1:
-            logs += "Étape 2/4 : Création de la pipeline...\n"
-            if hasattr(config, "denoising_step_list"):
-                PIPELINE = CausalInferencePipeline(config, device=DEVICE)
-            else:
-                PIPELINE = CausalDiffusionInferencePipeline(config, device=DEVICE)
-        elif step == 2:
-            logs += "Étape 3/4 : Chargement des poids du checkpoint...\n"
-            state_dict = torch.load(CHECKPOINT_PATH, map_location="cpu")
-            PIPELINE.generator.load_state_dict(state_dict)
-            ckpt_dir = os.path.dirname(CHECKPOINT_PATH)
-            CHECKPOINT_STEP = os.path.basename(ckpt_dir)
-            CHECKPOINT_STEP = CHECKPOINT_STEP.split("_")[-1]
-        elif step == 3:
-            logs += "Étape 4/4 : Placement du modèle sur le device...\n"
-            PIPELINE = PIPELINE.to(dtype=torch.bfloat16)
-            if LOW_MEMORY:
-                DynamicSwapInstaller.install_model(PIPELINE.text_encoder, device=DEVICE)
-            else:
-                PIPELINE.text_encoder.to(device=DEVICE)
-            PIPELINE.generator.to(device=DEVICE)
-            PIPELINE.vae.to(device=DEVICE)
-    logs += "Initialisation du modèle terminée ✅\n"
-    return PIPELINE, LOW_MEMORY, CHECKPOINT_STEP, logs
 def reward_forcing_inference(
     prompt_txt_path: str,
@@ -125,17 +61,55 @@ def reward_forcing_inference(
 ):
     """
     Version inline / simplifiée de inference.py :
     - T2V uniquement
     - 1 fichier .txt = n prompts (mais on retourne la 1ère vidéo)
     """
     logs = ""
-    # --------------------- Load / cache pipeline ---------------------
-    pipeline, low_memory, checkpoint_step, init_logs = load_pipeline(progress)
-    logs += init_logs
     # --------------------- Dataset / DataLoader ---------------------
-    progress(0.7, desc="Préparation du dataset")
     logs += "Préparation du dataset (TextDataset)...\n"
     dataset = TextDataset(prompt_path=prompt_txt_path, extended_prompt_path=None)
     num_prompts = len(dataset)
@@ -149,7 +123,7 @@ def reward_forcing_inference(
     )
     # --------------------- Output folder (on le vide) ---------------------
-    progress(0.8, desc="Nettoyage du dossier de sortie")
     output_folder = os.path.join(
         output_root, f"rewardforcing-{num_output_frames}f", checkpoint_step
     )
@@ -157,7 +131,8 @@ def reward_forcing_inference(
     os.makedirs(output_folder, exist_ok=True)
     logs += f"Dossier de sortie: {output_folder}\n"
-    # --------------------- Boucle d'inférence (tqdm) ---------------------
     for i, batch_data in progress.tqdm(
         enumerate(dataloader),
         total=num_prompts,
@@ -176,7 +151,7 @@ def reward_forcing_inference(
         all_video = []
-        # TEXT-TO-VIDEO uniquement
         prompt = batch["prompts"][0]
         extended_prompt = batch.get("extended_prompts", [None])[0]
         if extended_prompt is not None:
@@ -188,7 +163,7 @@ def reward_forcing_inference(
         sampled_noise = torch.randn(
             [1, num_output_frames, 16, 60, 104],
-            device=DEVICE,
             dtype=torch.bfloat16,
         )
@@ -272,15 +247,15 @@ def gradio_generate(
 # UI Gradio
 # -------------------------------------------------------------------
-with gr.Blocks(title="Reward Forcing T2V Demo (inline, cached)") as demo:
     gr.Markdown(
         """
-        # 🎬 Reward Forcing – Text-to-Video (inline & cached)
-        Cette version :
-        - Charge et initialise le modèle **une seule fois** (cache global)
-        - Affiche une barre `tqdm` multi-étapes pour l'initialisation la 1ère fois
-        - Affiche une barre `tqdm` pour la génération vidéo (1 step / prompt)
         """
     )

 os.makedirs(PROMPT_DIR, exist_ok=True)
 os.makedirs(OUTPUT_ROOT, exist_ok=True)
 def reward_forcing_inference(
     prompt_txt_path: str,
 ):
     """
     Version inline / simplifiée de inference.py :
+    - single GPU
     - T2V uniquement
     - 1 fichier .txt = n prompts (mais on retourne la 1ère vidéo)
     """
     logs = ""
+    # --------------------- Device & seed ---------------------
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    set_seed(0)
+    free_vram = get_cuda_free_memory_gb(device)
+    logs += f"Free VRAM {free_vram} GB\n"
+    low_memory = free_vram < 40
+    torch.set_grad_enabled(False)
+    # --------------------- Phase 1 : init modèle / config ---------------------
+    progress(0.05, desc="Initialisation : chargement de la config")
+    logs += "Chargement de la config...\n"
+    config = OmegaConf.load(CONFIG_PATH)
+    default_config = OmegaConf.load("configs/default_config.yaml")
+    config = OmegaConf.merge(default_config, config)
+    progress(0.15, desc="Initialisation : création de la pipeline")
+    logs += "Initialisation de la pipeline...\n"
+    if hasattr(config, "denoising_step_list"):
+        pipeline = CausalInferencePipeline(config, device=device)
+    else:
+        pipeline = CausalDiffusionInferencePipeline(config, device=device)
+    progress(0.35, desc="Initialisation : chargement du checkpoint")
+    logs += "Chargement des poids du checkpoint...\n"
+    state_dict = torch.load(CHECKPOINT_PATH, map_location="cpu")
+    pipeline.generator.load_state_dict(state_dict)
+    checkpoint_step = os.path.basename(os.path.dirname(CHECKPOINT_PATH))
+    checkpoint_step = checkpoint_step.split("_")[-1]
+    progress(0.55, desc="Initialisation : placement sur le device")
+    logs += "Placement du modèle sur le device...\n"
+    pipeline = pipeline.to(dtype=torch.bfloat16)
+    if low_memory:
+        DynamicSwapInstaller.install_model(pipeline.text_encoder, device=device)
+    else:
+        pipeline.text_encoder.to(device=device)
+    pipeline.generator.to(device=device)
+    pipeline.vae.to(device=device)
     # --------------------- Dataset / DataLoader ---------------------
+    progress(0.65, desc="Préparation du dataset")
     logs += "Préparation du dataset (TextDataset)...\n"
     dataset = TextDataset(prompt_path=prompt_txt_path, extended_prompt_path=None)
     num_prompts = len(dataset)
     )
     # --------------------- Output folder (on le vide) ---------------------
+    progress(0.7, desc="Nettoyage du dossier de sortie")
     output_folder = os.path.join(
         output_root, f"rewardforcing-{num_output_frames}f", checkpoint_step
     )
     os.makedirs(output_folder, exist_ok=True)
     logs += f"Dossier de sortie: {output_folder}\n"
+    # --------------------- Phase 2 : boucle d'inférence ---------------------
+    # Ici on peut utiliser progress.tqdm sur la boucle dataloader
     for i, batch_data in progress.tqdm(
         enumerate(dataloader),
         total=num_prompts,
         all_video = []
+        # TEXT-TO-VIDEO uniquement (pas d'I2V ici)
         prompt = batch["prompts"][0]
         extended_prompt = batch.get("extended_prompts", [None])[0]
         if extended_prompt is not None:
         sampled_noise = torch.randn(
             [1, num_output_frames, 16, 60, 104],
+            device=device,
             dtype=torch.bfloat16,
         )
 # UI Gradio
 # -------------------------------------------------------------------
+with gr.Blocks(title="Reward Forcing T2V Demo (inline inference)") as demo:
     gr.Markdown(
         """
+        # 🎬 Reward Forcing – Text-to-Video (inline)
+        Cette version appelle directement la logique d'inférence en Python,
+        ce qui permet à Gradio de suivre :
+        - l'initialisation du modèle (via `progress(...)`)
+        - la boucle de génération (via `progress.tqdm(...)`)
         """
     )