autoprogrammer
/

dream_rcr

@@ -1,8 +1,9 @@
 # coding=utf-8
-# Copyright 2024 The Dream team, HKUNLP Group and the HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
@@ -77,7 +78,8 @@ def sample_tokens(logits, temperature=0.0, top_p=None, top_k=None, margin_confid
     if neg_entropy:
         epsilon = 1e-10
         log_probs = torch.log(probs + epsilon)
-        confidence = torch.sum(probs * log_probs, dim=-1)
     return confidence, x0
@@ -101,9 +103,8 @@ class DreamGenerationConfig(GenerationConfig):
         self.alg: str = kwargs.pop("alg", 'origin')
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
-        # === RCR 相关参数（新增；默认不影响原逻辑） ===
         self.rcr: bool = kwargs.pop("rcr", False)
-        # 仅在 rcr=True 时用于选择置信度算法；rcr=False 不读取它
         self.conf_alg: str = kwargs.pop("conf_alg", 'maskgit_plus')
         # Parameters that define the output variables of `generate`
@@ -120,7 +121,7 @@ class DreamGenerationConfig(GenerationConfig):
         # Wild card
         self.generation_kwargs = kwargs.pop("generation_kwargs", {})
-        # The remaining attributes do not parametrize `.generate()`, but are informative and/or used by the hub interface.
         self._from_model_config = kwargs.pop("_from_model_config", False)
         self._commit_hash = kwargs.pop("_commit_hash", None)
         self.transformers_version = kwargs.pop("transformers_version", __version__)
@@ -154,48 +155,46 @@ class DreamGenerationMixin:
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
-    # === 新增：RCR 逻辑，仅在 rcr=True 时被调用；不改动非 RCR 分支 ===
     def _apply_rcr_logic(self, x, x0, confidence, mask_index, overtime_confidence,
                          mask_token_id, step, total_steps, s, t):
         """
-        在 Dream 的“maskgit”采样骨架上，执行 Running Confidence Remasking：
-        - 本步采用 Dream 原调度：global_k = num_mask_token * (1 - s/t)
-        - 先以当前置信度将 top-k token 从 [MASK] 转为预测 token，并累计它们的置信度
-        - 再施加“目标累计”约束：截至本步应累计生成 target_cum = num_mask_token * (1 - s/t)
-          若当前累计 > 目标，则把最低置信度的那些 token 反遮盖回 [MASK]
-        说明：只影响 rcr=True 的路径；rcr=False 时完全不调用本函数。
         """
         device = x.device
         B = x.shape[0]
-        # 与 Dream 一致的 num_mask_token（按 batch 平均）
         num_mask_token = mask_index.sum() / mask_index.shape[0]
-        # 本步的转移数量（按 Dream 调度）
         number_transfer_tokens = int(num_mask_token * (1 - s / t)) if step < total_steps - 1 else int(num_mask_token)
-        # 构造全长置信度和候选值（非 mask 位置分别设为 -inf / mask_token_id）
         full_conf = torch.full_like(x, -torch.inf, device=device, dtype=confidence.dtype)
         x_temp = torch.zeros_like(x, device=device, dtype=torch.long) + mask_token_id
         full_conf[mask_index] = confidence
         x_temp[mask_index] = x0.clone()
         for j in range(B):
-            # 逐样本 clamp，避免 batch 均值带来越界
             masked_j = int(mask_index[j].sum().item())
             k_j = min(number_transfer_tokens, masked_j)
-            # 先按置信度选出本步 top-k_j
             if k_j > 0:
                 _, select_idx = torch.topk(full_conf[j], k=k_j, largest=True)
                 x[j, select_idx] = x_temp[j, select_idx]
                 overtime_confidence[j, select_idx] = full_conf[j, select_idx].clone().float()
-            # 目标累计约束：截至本步应累计的生成数
             if step < total_steps - 1:
-                target_cum = int(num_mask_token * (1 - s))  # 累计目标：随 s 递减而线性增长
-                gen_mask = overtime_confidence[j] > 0
                 current_gen = int(gen_mask.sum().item())
-                # 若超额，则按最低置信度回遮
                 to_remask = max(0, current_gen - target_cum)
                 if to_remask > 0:
                     gen_indices = torch.where(gen_mask)[0]
@@ -205,7 +204,7 @@ class DreamGenerationMixin:
                         _, local_low = torch.topk(gen_conf, k=to_remask, largest=False)
                         low_global = gen_indices[local_low]
                         x[j, low_global] = mask_token_id
-                        overtime_confidence[j, low_global] = 0.0
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         if is_torchdynamo_compiling():
@@ -362,7 +361,7 @@ class DreamGenerationMixin:
         generation_tokens_hook_func,
         generation_logits_hook_func
     ) -> Union[DreamModelOutput, torch.LongTensor]:
-        # === 原变量 ===
         output_history = generation_config.output_history
         return_dict_in_generate = generation_config.return_dict_in_generate
         max_length = generation_config.max_length
@@ -375,7 +374,7 @@ class DreamGenerationMixin:
         top_p = generation_config.top_p
         top_k = generation_config.top_k
-        # === 新增：RCR 控制变量（不会影响 rcr=False 的路径） ===
         rcr = generation_config.rcr
         conf_alg = generation_config.conf_alg
@@ -398,8 +397,8 @@ class DreamGenerationMixin:
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
-        # === 仅在 rcr=True 时分配 Overtime Confidence（不影响 baseline） ===
-        overtime_confidence = torch.zeros_like(x, dtype=torch.float32) if rcr else None
         # this allows user-defined token control of the intermediate steps
         x = generation_tokens_hook_func(None, x, None)
@@ -416,7 +415,7 @@ class DreamGenerationMixin:
             s = timesteps[i + 1]
             if alg == 'origin':
-                # === 原版 origin 分支：保持不变 ===
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
@@ -425,8 +424,7 @@ class DreamGenerationMixin:
                 )
                 x[mask_index] = x0.clone()
             else:
-                # === 非 origin 分支 ===
-                # rcr=False：保持原有使用 alg 的置信度算法
                 # rcr=True ：使用 conf_alg 指定的置信度算法（不改变 rcr=False 的行为）
                 if (not rcr and alg == 'maskgit_plus') or (rcr and conf_alg == 'maskgit_plus'):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
@@ -439,7 +437,6 @@ class DreamGenerationMixin:
                         mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                     )
                 else:
-                    # 兼容：如果 rcr=True 但 conf_alg 非上述三者，回退到 alg 指定
                     if rcr:
                         if alg == 'maskgit_plus':
                             confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
@@ -457,14 +454,14 @@ class DreamGenerationMixin:
                         raise RuntimeError(f"Unknown alg: {alg}")
                 if rcr:
-                    # === 仅在 rcr=True 时：应用 RCR；不会触碰 baseline 分支实现 ===
-                    print("rcr")
                     self._apply_rcr_logic(
                         x, x0, confidence, mask_index, overtime_confidence,
                         mask_token_id, i, steps, s, t
                     )
                 else:
-                    # === 原版 Dream 逻辑：保持不变（包括 device=self.device 等细节） ===
                     num_mask_token = mask_index.sum() / mask_index.shape[0]
                     number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
                     full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)

 # coding=utf-8
+# Copyright 2024 The Dream team, HKUNLP Group and the
+# HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
+# You may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
     if neg_entropy:
         epsilon = 1e-10
         log_probs = torch.log(probs + epsilon)
+        # 改动 1：用“负熵”的正值（越大越自信），与其它置信度方向保持一致
+        confidence = -(probs * log_probs).sum(dim=-1)
     return confidence, x0
         self.alg: str = kwargs.pop("alg", 'origin')
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
+        # === RCR 相关参数（默认不影响原逻辑） ===
         self.rcr: bool = kwargs.pop("rcr", False)
         self.conf_alg: str = kwargs.pop("conf_alg", 'maskgit_plus')
         # Parameters that define the output variables of `generate`
         # Wild card
         self.generation_kwargs = kwargs.pop("generation_kwargs", {})
+        # hub interface
         self._from_model_config = kwargs.pop("_from_model_config", False)
         self._commit_hash = kwargs.pop("_commit_hash", None)
         self.transformers_version = kwargs.pop("transformers_version", __version__)
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
+    # === RCR：仅在 rcr=True 时调用；不改动 baseline 分支 ===
     def _apply_rcr_logic(self, x, x0, confidence, mask_index, overtime_confidence,
                          mask_token_id, step, total_steps, s, t):
         """
+        Running Confidence Remasking：
+        - 采用 Dream 的调度：k_step = num_mask_token * (1 - s/t)
+        - 本步先按置信度从 [MASK] 中挑 top-k_step 写入预测，并把置信度累计到 overtime_confidence
+        - 再施加“累计目标”约束：target_cum = num_mask_token * (1 - s/t)
+          若当前累计 > 目标，则把最低置信度的 token 反遮回 [MASK]
         """
         device = x.device
         B = x.shape[0]
+        # 与 Dream 一致的“批均值”口径
         num_mask_token = mask_index.sum() / mask_index.shape[0]
         number_transfer_tokens = int(num_mask_token * (1 - s / t)) if step < total_steps - 1 else int(num_mask_token)
+        # 构造全长置信度和候选（非 mask 置 -inf / mask_token）
         full_conf = torch.full_like(x, -torch.inf, device=device, dtype=confidence.dtype)
         x_temp = torch.zeros_like(x, device=device, dtype=torch.long) + mask_token_id
         full_conf[mask_index] = confidence
         x_temp[mask_index] = x0.clone()
         for j in range(B):
             masked_j = int(mask_index[j].sum().item())
             k_j = min(number_transfer_tokens, masked_j)
+            # 先选本步 top-k_j
             if k_j > 0:
                 _, select_idx = torch.topk(full_conf[j], k=k_j, largest=True)
                 x[j, select_idx] = x_temp[j, select_idx]
                 overtime_confidence[j, select_idx] = full_conf[j, select_idx].clone().float()
+            # 累计目标（与 baseline 对齐）
             if step < total_steps - 1:
+                target_cum = int(num_mask_token * (1 - s / t))
+                # 改动 2：用有限性判断“已生成”，而不是 > 0
+                gen_mask = torch.isfinite(overtime_confidence[j])
                 current_gen = int(gen_mask.sum().item())
                 to_remask = max(0, current_gen - target_cum)
                 if to_remask > 0:
                     gen_indices = torch.where(gen_mask)[0]
                         _, local_low = torch.topk(gen_conf, k=to_remask, largest=False)
                         low_global = gen_indices[local_low]
                         x[j, low_global] = mask_token_id
+                        overtime_confidence[j, low_global] = float("-inf")
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         if is_torchdynamo_compiling():
         generation_tokens_hook_func,
         generation_logits_hook_func
     ) -> Union[DreamModelOutput, torch.LongTensor]:
+        # ---- 原参数 ----
         output_history = generation_config.output_history
         return_dict_in_generate = generation_config.return_dict_in_generate
         max_length = generation_config.max_length
         top_p = generation_config.top_p
         top_k = generation_config.top_k
+        # ---- RCR 参数 ----
         rcr = generation_config.rcr
         conf_alg = generation_config.conf_alg
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
+        # 改动 2：仅在 rcr=True 时，用 -inf 初始化，后续用 isfinite 判断
+        overtime_confidence = torch.full_like(x, float("-inf"), dtype=torch.float32) if rcr else None
         # this allows user-defined token control of the intermediate steps
         x = generation_tokens_hook_func(None, x, None)
             s = timesteps[i + 1]
             if alg == 'origin':
+                # 原版 origin 分支：保持不变
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
                 )
                 x[mask_index] = x0.clone()
             else:
+                # rcr=False：沿用 alg 指定的置信度算法
                 # rcr=True ：使用 conf_alg 指定的置信度算法（不改变 rcr=False 的行为）
                 if (not rcr and alg == 'maskgit_plus') or (rcr and conf_alg == 'maskgit_plus'):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
                         mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                     )
                 else:
                     if rcr:
                         if alg == 'maskgit_plus':
                             confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
                         raise RuntimeError(f"Unknown alg: {alg}")
                 if rcr:
+                    # 仅在 rcr=True：应用 RCR
+                    print("[RCR] step", i)
                     self._apply_rcr_logic(
                         x, x0, confidence, mask_index, overtime_confidence,
                         mask_token_id, i, steps, s, t
                     )
                 else:
+                    # 原版 Dream 逻辑：保持不变
                     num_mask_token = mask_index.sum() / mask_index.shape[0]
                     number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
                     full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)