Reward-Forcing

Paused

App Files Files Community

fffiloni commited on Dec 31, 2025

Commit

e3f74ba

verified ·

1 Parent(s): d45d065

Update app_wip.py

Browse files

Files changed (1) hide show

app_wip.py +78 -53

app_wip.py CHANGED Viewed

@@ -51,6 +51,70 @@ OUTPUT_ROOT = "videos"
 os.makedirs(PROMPT_DIR, exist_ok=True)
 os.makedirs(OUTPUT_ROOT, exist_ok=True)
 def reward_forcing_inference(
     prompt_txt_path: str,
@@ -61,55 +125,17 @@ def reward_forcing_inference(
 ):
     """
     Version inline / simplifiée de inference.py :
-    - single GPU
     - T2V uniquement
     - 1 fichier .txt = n prompts (mais on retourne la 1ère vidéo)
     """
     logs = ""
-    # --------------------- Device & seed ---------------------
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    set_seed(0)
-    free_vram = get_cuda_free_memory_gb(device)
-    logs += f"Free VRAM {free_vram} GB\n"
-    low_memory = free_vram < 40
-    torch.set_grad_enabled(False)
-    # --------------------- Phase 1 : init modèle / config ---------------------
-    progress(0.05, desc="Initialisation : chargement de la config")
-    logs += "Chargement de la config...\n"
-    config = OmegaConf.load(CONFIG_PATH)
-    default_config = OmegaConf.load("configs/default_config.yaml")
-    config = OmegaConf.merge(default_config, config)
-    progress(0.15, desc="Initialisation : création de la pipeline")
-    logs += "Initialisation de la pipeline...\n"
-    if hasattr(config, "denoising_step_list"):
-        pipeline = CausalInferencePipeline(config, device=device)
-    else:
-        pipeline = CausalDiffusionInferencePipeline(config, device=device)
-    progress(0.35, desc="Initialisation : chargement du checkpoint")
-    logs += "Chargement des poids du checkpoint...\n"
-    state_dict = torch.load(CHECKPOINT_PATH, map_location="cpu")
-    pipeline.generator.load_state_dict(state_dict)
-    checkpoint_step = os.path.basename(os.path.dirname(CHECKPOINT_PATH))
-    checkpoint_step = checkpoint_step.split("_")[-1]
-    progress(0.55, desc="Initialisation : placement sur le device")
-    logs += "Placement du modèle sur le device...\n"
-    pipeline = pipeline.to(dtype=torch.bfloat16)
-    if low_memory:
-        DynamicSwapInstaller.install_model(pipeline.text_encoder, device=device)
-    else:
-        pipeline.text_encoder.to(device=device)
-    pipeline.generator.to(device=device)
-    pipeline.vae.to(device=device)
     # --------------------- Dataset / DataLoader ---------------------
-    progress(0.65, desc="Préparation du dataset")
     logs += "Préparation du dataset (TextDataset)...\n"
     dataset = TextDataset(prompt_path=prompt_txt_path, extended_prompt_path=None)
     num_prompts = len(dataset)
@@ -123,7 +149,7 @@ def reward_forcing_inference(
     )
     # --------------------- Output folder (on le vide) ---------------------
-    progress(0.7, desc="Nettoyage du dossier de sortie")
     output_folder = os.path.join(
         output_root, f"rewardforcing-{num_output_frames}f", checkpoint_step
     )
@@ -131,8 +157,7 @@ def reward_forcing_inference(
     os.makedirs(output_folder, exist_ok=True)
     logs += f"Dossier de sortie: {output_folder}\n"
-    # --------------------- Phase 2 : boucle d'inférence ---------------------
-    # Ici on peut utiliser progress.tqdm sur la boucle dataloader
     for i, batch_data in progress.tqdm(
         enumerate(dataloader),
         total=num_prompts,
@@ -151,7 +176,7 @@ def reward_forcing_inference(
         all_video = []
-        # TEXT-TO-VIDEO uniquement (pas d'I2V ici)
         prompt = batch["prompts"][0]
         extended_prompt = batch.get("extended_prompts", [None])[0]
         if extended_prompt is not None:
@@ -163,7 +188,7 @@ def reward_forcing_inference(
         sampled_noise = torch.randn(
             [1, num_output_frames, 16, 60, 104],
-            device=device,
             dtype=torch.bfloat16,
         )
@@ -247,15 +272,15 @@ def gradio_generate(
 # UI Gradio
 # -------------------------------------------------------------------
-with gr.Blocks(title="Reward Forcing T2V Demo (inline inference)") as demo:
     gr.Markdown(
         """
-        # 🎬 Reward Forcing – Text-to-Video (inline)
-        Cette version appelle directement la logique d'inférence en Python,
-        ce qui permet à Gradio de suivre :
-        - l'initialisation du modèle (via `progress(...)`)
-        - la boucle de génération (via `progress.tqdm(...)`)
         """
     )

 os.makedirs(PROMPT_DIR, exist_ok=True)
 os.makedirs(OUTPUT_ROOT, exist_ok=True)
+# === Globals pour le cache du modèle ===
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+PIPELINE = None
+LOW_MEMORY = None
+CHECKPOINT_STEP = None
+def load_pipeline(progress: gr.Progress):
+    """
+    Charge la config + pipeline + checkpoint + placement device une seule fois.
+    Utilise progress.tqdm pour afficher plusieurs étapes la 1ère fois.
+    """
+    global PIPELINE, LOW_MEMORY, CHECKPOINT_STEP
+    logs = ""
+    # Si déjà chargé, on ne refait rien de lourd
+    if PIPELINE is not None:
+        progress(0.1, desc="Modèle déjà initialisé (cache)")
+        logs += "Modèle déjà initialisé, réutilisation du cache.\n"
+        return PIPELINE, LOW_MEMORY, CHECKPOINT_STEP, logs
+    # ---- Première initialisation lourde ----
+    set_seed(0)
+    free_vram = get_cuda_free_memory_gb(DEVICE)
+    LOW_MEMORY = free_vram < 40
+    logs += f"Free VRAM {free_vram} GB\n"
+    steps = range(4)
+    for step in progress.tqdm(steps, desc="Initialisation du modèle", unit="étape"):
+        if step == 0:
+            logs += "Étape 1/4 : Chargement de la config...\n"
+            config = OmegaConf.load(CONFIG_PATH)
+            default_config = OmegaConf.load("configs/default_config.yaml")
+            config = OmegaConf.merge(default_config, config)
+        elif step == 1:
+            logs += "Étape 2/4 : Création de la pipeline...\n"
+            if hasattr(config, "denoising_step_list"):
+                PIPELINE = CausalInferencePipeline(config, device=DEVICE)
+            else:
+                PIPELINE = CausalDiffusionInferencePipeline(config, device=DEVICE)
+        elif step == 2:
+            logs += "Étape 3/4 : Chargement des poids du checkpoint...\n"
+            state_dict = torch.load(CHECKPOINT_PATH, map_location="cpu")
+            PIPELINE.generator.load_state_dict(state_dict)
+            ckpt_dir = os.path.dirname(CHECKPOINT_PATH)
+            CHECKPOINT_STEP = os.path.basename(ckpt_dir)
+            CHECKPOINT_STEP = CHECKPOINT_STEP.split("_")[-1]
+        elif step == 3:
+            logs += "Étape 4/4 : Placement du modèle sur le device...\n"
+            PIPELINE = PIPELINE.to(dtype=torch.bfloat16)
+            if LOW_MEMORY:
+                DynamicSwapInstaller.install_model(PIPELINE.text_encoder, device=DEVICE)
+            else:
+                PIPELINE.text_encoder.to(device=DEVICE)
+            PIPELINE.generator.to(device=DEVICE)
+            PIPELINE.vae.to(device=DEVICE)
+    logs += "Initialisation du modèle terminée ✅\n"
+    return PIPELINE, LOW_MEMORY, CHECKPOINT_STEP, logs
 def reward_forcing_inference(
     prompt_txt_path: str,
 ):
     """
     Version inline / simplifiée de inference.py :
     - T2V uniquement
     - 1 fichier .txt = n prompts (mais on retourne la 1ère vidéo)
     """
     logs = ""
+    # --------------------- Load / cache pipeline ---------------------
+    pipeline, low_memory, checkpoint_step, init_logs = load_pipeline(progress)
+    logs += init_logs
     # --------------------- Dataset / DataLoader ---------------------
+    progress(0.7, desc="Préparation du dataset")
     logs += "Préparation du dataset (TextDataset)...\n"
     dataset = TextDataset(prompt_path=prompt_txt_path, extended_prompt_path=None)
     num_prompts = len(dataset)
     )
     # --------------------- Output folder (on le vide) ---------------------
+    progress(0.8, desc="Nettoyage du dossier de sortie")
     output_folder = os.path.join(
         output_root, f"rewardforcing-{num_output_frames}f", checkpoint_step
     )
     os.makedirs(output_folder, exist_ok=True)
     logs += f"Dossier de sortie: {output_folder}\n"
+    # --------------------- Boucle d'inférence (tqdm) ---------------------
     for i, batch_data in progress.tqdm(
         enumerate(dataloader),
         total=num_prompts,
         all_video = []
+        # TEXT-TO-VIDEO uniquement
         prompt = batch["prompts"][0]
         extended_prompt = batch.get("extended_prompts", [None])[0]
         if extended_prompt is not None:
         sampled_noise = torch.randn(
             [1, num_output_frames, 16, 60, 104],
+            device=DEVICE,
             dtype=torch.bfloat16,
         )
 # UI Gradio
 # -------------------------------------------------------------------
+with gr.Blocks(title="Reward Forcing T2V Demo (inline, cached)") as demo:
     gr.Markdown(
         """
+        # 🎬 Reward Forcing – Text-to-Video (inline & cached)
+        Cette version :
+        - Charge et initialise le modèle **une seule fois** (cache global)
+        - Affiche une barre `tqdm` multi-étapes pour l'initialisation la 1ère fois
+        - Affiche une barre `tqdm` pour la génération vidéo (1 step / prompt)
         """
     )