autoprogrammer
/

dream_rcr

@@ -1,19 +1,5 @@
 # coding=utf-8
-# Copyright 2024 The Dream team, HKUNLP Group and the
-# HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# You may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import warnings
 import copy
 from dataclasses import dataclass
@@ -33,10 +19,8 @@ def top_p_logits(logits, top_p=None):
     sorted_logits, sorted_indices = torch.sort(logits, descending=True)
     cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
     sorted_indices_to_remove = cumulative_probs > top_p
-    # Shift the indices to the right to keep the first token above the threshold
     sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
     sorted_indices_to_remove[..., 0] = 0
     mask = torch.zeros_like(logits, dtype=torch.bool, device=logits.device)
     mask = mask.scatter_(-1, sorted_indices, sorted_indices_to_remove)
     logits = logits.masked_fill(mask, torch.finfo(logits.dtype).min)
@@ -44,20 +28,27 @@ def top_p_logits(logits, top_p=None):
 def top_k_logits(logits, top_k=None):
-    top_k = min(top_k, logits.size(-1))  # Safety check
-    # Remove all tokens with a probability less than the last token of the top-k
     indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
     logits = logits.masked_fill(indices_to_remove, torch.finfo(logits.dtype).min)
     return logits
-def sample_tokens(logits, temperature=0.0, top_p=None, top_k=None, margin_confidence=False, neg_entropy=False):
     if temperature > 0:
         logits = logits / temperature
     if top_p is not None and top_p < 1:
         logits = top_p_logits(logits, top_p)
     if top_k is not None:
         logits = top_k_logits(logits, top_k)
     probs = torch.softmax(logits, dim=-1)
     if temperature > 0:
@@ -76,10 +67,10 @@ def sample_tokens(logits, temperature=0.0, top_p=None, top_k=None, margin_confid
         confidence = top1_probs - top2_probs
     if neg_entropy:
         epsilon = 1e-10
         log_probs = torch.log(probs + epsilon)
-        # 改动 1：用“负熵”的正值（越大越自信），与其它置信度方向保持一致
-        confidence = -(probs * log_probs).sum(dim=-1)
     return confidence, x0
@@ -97,31 +88,29 @@ class DreamGenerationConfig(GenerationConfig):
         self.top_k: Optional[int] = kwargs.pop("top_k", None)
         self.max_length = kwargs.pop("max_length", 20)
         self.max_new_tokens = kwargs.pop("max_new_tokens", None)
-        # diffusion specific params
         self.eps: float = kwargs.pop("eps", 1e-3)
         self.steps: int = kwargs.pop("steps", 512)
-        self.alg: str = kwargs.pop("alg", 'origin')
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
-        # === RCR 相关参数（默认不影响原逻辑） ===
         self.rcr: bool = kwargs.pop("rcr", False)
-        self.conf_alg: str = kwargs.pop("conf_alg", 'maskgit_plus')
-        # Parameters that define the output variables of `generate`
         self.num_return_sequences: int = kwargs.pop("num_return_sequences", 1)
         self.return_dict_in_generate: bool = kwargs.pop("return_dict_in_generate", False)
         self.output_history: bool = kwargs.pop("output_history", False)
-        # Special tokens that can be used at generation time
         self.mask_token_id = kwargs.pop("mask_token_id", None)
         self.pad_token_id = kwargs.pop("pad_token_id", None)
         self.bos_token_id = kwargs.pop("bos_token_id", None)
         self.eos_token_id = kwargs.pop("eos_token_id", None)
-        # Wild card
         self.generation_kwargs = kwargs.pop("generation_kwargs", {})
-        # hub interface
         self._from_model_config = kwargs.pop("_from_model_config", False)
         self._commit_hash = kwargs.pop("_commit_hash", None)
         self.transformers_version = kwargs.pop("transformers_version", __version__)
@@ -145,7 +134,7 @@ class DreamGenerationMixin:
     def _expand_inputs_for_generation(
         expand_size: int = 1,
         input_ids: Optional[torch.LongTensor] = None,
-        attention_mask: Optional[torch.LongTensor] = None
     ) -> Tuple[torch.LongTensor, Dict[str, Any]]:
         if expand_size == 1:
             return input_ids, attention_mask
@@ -155,56 +144,50 @@ class DreamGenerationMixin:
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
-    # === RCR：仅在 rcr=True 时调用；不改动 baseline 分支 ===
-    def _apply_rcr_logic(self, x, x0, confidence, mask_index, overtime_confidence,
-                         mask_token_id, step, total_steps, s, t):
         """
-        Running Confidence Remasking：
-        - 采用 Dream 的调度：k_step = num_mask_token * (1 - s/t)
-        - 本步先按置信度从 [MASK] 中挑 top-k_step 写入预测，并把置信度累计到 overtime_confidence
-        - 再施加“累计目标”约束：target_cum = num_mask_token * (1 - s/t)
-          若当前累计 > 目标，则把最低置信度的 token 反遮回 [MASK]
         """
-        device = x.device
-        B = x.shape[0]
-        # 与 Dream 一致的“批均值”口径
-        num_mask_token = mask_index.sum() / mask_index.shape[0]
-        number_transfer_tokens = int(num_mask_token * (1 - s / t)) if step < total_steps - 1 else int(num_mask_token)
-        # 构造全长置信度和候选（非 mask 置 -inf / mask_token）
-        full_conf = torch.full_like(x, -torch.inf, device=device, dtype=confidence.dtype)
-        x_temp = torch.zeros_like(x, device=device, dtype=torch.long) + mask_token_id
-        full_conf[mask_index] = confidence
-        x_temp[mask_index] = x0.clone()
         for j in range(B):
-            masked_j = int(mask_index[j].sum().item())
-            k_j = min(number_transfer_tokens, masked_j)
-            # 先选本步 top-k_j
-            if k_j > 0:
-                _, select_idx = torch.topk(full_conf[j], k=k_j, largest=True)
-                x[j, select_idx] = x_temp[j, select_idx]
-                overtime_confidence[j, select_idx] = full_conf[j, select_idx].clone().float()
-            # 累计目标（与 baseline 对齐）
-            if step < total_steps - 1:
-                target_cum = int(num_mask_token * (1 - s / t))
-                # 改动 2：用有限性判断“已生成”，而不是 > 0
-                gen_mask = torch.isfinite(overtime_confidence[j])
-                current_gen = int(gen_mask.sum().item())
-                to_remask = max(0, current_gen - target_cum)
-                if to_remask > 0:
-                    gen_indices = torch.where(gen_mask)[0]
-                    if gen_indices.numel() > 0:
-                        gen_conf = overtime_confidence[j, gen_indices]
-                        to_remask = min(to_remask, int(gen_indices.numel()))
-                        _, local_low = torch.topk(gen_conf, k=to_remask, largest=False)
-                        low_global = gen_indices[local_low]
-                        x[j, low_global] = mask_token_id
-                        overtime_confidence[j, low_global] = float("-inf")
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         if is_torchdynamo_compiling():
@@ -229,12 +212,9 @@ class DreamGenerationMixin:
             if not has_default_max_length and generation_config.max_length is not None:
                 logger.warning(
                     f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
-                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
-                    "Please refer to the documentation for more information. "
-                    "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)"
                 )
             generation_config.max_length = generation_config.max_new_tokens + input_ids_length
         elif has_default_max_length:
             if generation_config.max_length == DreamGenerationConfig().max_length:
                 generation_config.max_length = generation_config.max_length + input_ids_length
@@ -261,7 +241,6 @@ class DreamGenerationMixin:
                     generation_config.pad_token_id = self.generation_config.pad_token_id
                 if generation_config.mask_token_id is None:
                     generation_config.mask_token_id = self.generation_config.mask_token_id
         return generation_config
     def _prepare_special_tokens(self, generation_config: DreamGenerationConfig, device: Optional[Union[torch.device, str]] = None):
@@ -314,18 +293,13 @@ class DreamGenerationMixin:
             has_default_max_length=has_default_max_length,
             input_ids_length=input_ids_length,
         )
         self._validate_generated_length(generation_config, input_ids_length, has_default_max_length)
         if not is_torchdynamo_compiling() and self.device.type != input_ids.device.type:
             warnings.warn(
                 "You are calling .generate() with the `input_ids` being on a device type different"
                 f" than your model's device. `input_ids` is on {input_ids.device.type}, whereas the model"
-                f" is on {self.device.type}. You may experience unexpected behaviors or slower generation."
-                " Please make sure that you have put `input_ids` to the"
-                f" correct device by calling for example input_ids = input_ids.to('{self.device.type}') before"
-                " running `.generate()`.",
-                UserWarning,
             )
         if (
             hasattr(generation_config, "pad_token_id")
@@ -333,9 +307,7 @@ class DreamGenerationMixin:
             and attention_mask is None
         ):
             warnings.warn(
-                "Padding was detected but no attention mask is passed here. For correct "
-                "generation results, please set `attention_mask` when batch-padding inputs.",
-                UserWarning,
             )
         input_ids, attention_mask = self._expand_inputs_for_generation(
@@ -359,9 +331,8 @@ class DreamGenerationMixin:
         attention_mask: Optional[torch.LongTensor],
         generation_config: DreamGenerationConfig,
         generation_tokens_hook_func,
-        generation_logits_hook_func
     ) -> Union[DreamModelOutput, torch.LongTensor]:
-        # ---- 原参数 ----
         output_history = generation_config.output_history
         return_dict_in_generate = generation_config.return_dict_in_generate
         max_length = generation_config.max_length
@@ -374,13 +345,12 @@ class DreamGenerationMixin:
         top_p = generation_config.top_p
         top_k = generation_config.top_k
-        # ---- RCR 参数 ----
         rcr = generation_config.rcr
         conf_alg = generation_config.conf_alg
         histories = [] if (return_dict_in_generate and output_history) else None
-        # pad input_ids to max_length
         x = F.pad(input_ids, (0, max_length - input_ids.shape[1]), value=mask_token_id)
         if attention_mask is not None and torch.any(attention_mask == 0.0):
@@ -397,54 +367,57 @@ class DreamGenerationMixin:
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
-        # 改动 2：仅在 rcr=True 时，用 -inf 初始化，后续用 isfinite 判断
-        overtime_confidence = torch.full_like(x, float("-inf"), dtype=torch.float32) if rcr else None
-        # this allows user-defined token control of the intermediate steps
         x = generation_tokens_hook_func(None, x, None)
         for i in range(steps):
             mask_index = (x == mask_token_id)
             logits = self(x, attention_mask, tok_idx).logits
             logits = torch.cat([logits[:, :1], logits[:, :-1]], dim=1)
-            # this allows user-defined logits control of the intermediate steps
             logits = generation_logits_hook_func(i, x, logits)
             mask_logits = logits[mask_index]
             t = timesteps[i]
             s = timesteps[i + 1]
-            if alg == 'origin':
-                # 原版 origin 分支：保持不变
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
                 _, x0[transfer_index_t_s] = sample_tokens(
-                    mask_logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k
                 )
                 x[mask_index] = x0.clone()
             else:
-                # rcr=False：沿用 alg 指定的置信度算法
-                # rcr=True ：使用 conf_alg 指定的置信度算法（不改变 rcr=False 的行为）
-                if (not rcr and alg == 'maskgit_plus') or (rcr and conf_alg == 'maskgit_plus'):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
-                elif (not rcr and alg == 'topk_margin') or (rcr and conf_alg == 'topk_margin'):
                     confidence, x0 = sample_tokens(
                         mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True
                     )
-                elif (not rcr and alg == 'entropy') or (rcr and conf_alg == 'entropy'):
                     confidence, x0 = sample_tokens(
                         mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                     )
                 else:
                     if rcr:
-                        if alg == 'maskgit_plus':
                             confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
-                        elif alg == 'topk_margin':
                             confidence, x0 = sample_tokens(
                                 mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True
                             )
-                        elif alg == 'entropy':
                             confidence, x0 = sample_tokens(
                                 mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                             )
@@ -453,41 +426,53 @@ class DreamGenerationMixin:
                     else:
                         raise RuntimeError(f"Unknown alg: {alg}")
                 if rcr:
-                    # 仅在 rcr=True：应用 RCR
-                    print("[RCR] step", i)
-                    self._apply_rcr_logic(
-                        x, x0, confidence, mask_index, overtime_confidence,
-                        mask_token_id, i, steps, s, t
                     )
-                else:
-                    # 原版 Dream 逻辑：保持不变
-                    num_mask_token = mask_index.sum() / mask_index.shape[0]
-                    number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
-                    full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)
-                    full_confidence[mask_index] = confidence
-                    if number_transfer_tokens > 0:
-                        if alg_temp is None or alg_temp == 0:
-                            _, transfer_index = torch.topk(full_confidence, number_transfer_tokens)
-                        else:
-                            full_confidence = full_confidence / alg_temp
-                            full_confidence = F.softmax(full_confidence, dim=-1)
-                            transfer_index = torch.multinomial(full_confidence, num_samples=number_transfer_tokens)
-                        x_ = torch.zeros_like(x, device=self.device, dtype=torch.long) + mask_token_id
-                        x_[mask_index] = x0.clone()
-                        row_indices = torch.arange(x.size(0), device=self.device).unsqueeze(1).expand_as(transfer_index)
-                        x[row_indices, transfer_index] = x_[row_indices, transfer_index]
-            # this allows user-defined token control of the intermediate steps
             x = generation_tokens_hook_func(i, x, logits)
             if histories is not None:
                 histories.append(x.clone())
         if return_dict_in_generate:
-            return DreamModelOutput(
-                sequences=x,
-                history=histories,
-            )
         else:
             return x

 # coding=utf-8
+# Copyright ...
 import warnings
 import copy
 from dataclasses import dataclass
     sorted_logits, sorted_indices = torch.sort(logits, descending=True)
     cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
     sorted_indices_to_remove = cumulative_probs > top_p
     sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
     sorted_indices_to_remove[..., 0] = 0
     mask = torch.zeros_like(logits, dtype=torch.bool, device=logits.device)
     mask = mask.scatter_(-1, sorted_indices, sorted_indices_to_remove)
     logits = logits.masked_fill(mask, torch.finfo(logits.dtype).min)
 def top_k_logits(logits, top_k=None):
+    top_k = min(top_k, logits.size(-1))
     indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
     logits = logits.masked_fill(indices_to_remove, torch.finfo(logits.dtype).min)
     return logits
+def sample_tokens(
+    logits,
+    temperature=0.0,
+    top_p=None,
+    top_k=None,
+    margin_confidence=False,
+    neg_entropy=False,
+):
     if temperature > 0:
         logits = logits / temperature
     if top_p is not None and top_p < 1:
         logits = top_p_logits(logits, top_p)
     if top_k is not None:
         logits = top_k_logits(logits, top_k)
     probs = torch.softmax(logits, dim=-1)
     if temperature > 0:
         confidence = top1_probs - top2_probs
     if neg_entropy:
+        # 保持你原来的“熵”定义（注意它是负值；不改符号，避免影响 baseline）
         epsilon = 1e-10
         log_probs = torch.log(probs + epsilon)
+        confidence = torch.sum(probs * log_probs, dim=-1)
     return confidence, x0
         self.top_k: Optional[int] = kwargs.pop("top_k", None)
         self.max_length = kwargs.pop("max_length", 20)
         self.max_new_tokens = kwargs.pop("max_new_tokens", None)
+        # diffusion specific
         self.eps: float = kwargs.pop("eps", 1e-3)
         self.steps: int = kwargs.pop("steps", 512)
+        self.alg: str = kwargs.pop("alg", "origin")
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
+        # RCR：默认关闭；开启后只做“选后回遮”，不动 baseline 行为
         self.rcr: bool = kwargs.pop("rcr", False)
+        self.conf_alg: str = kwargs.pop("conf_alg", "maskgit_plus")
+        # generate 输出控制
         self.num_return_sequences: int = kwargs.pop("num_return_sequences", 1)
         self.return_dict_in_generate: bool = kwargs.pop("return_dict_in_generate", False)
         self.output_history: bool = kwargs.pop("output_history", False)
+        # special tokens
         self.mask_token_id = kwargs.pop("mask_token_id", None)
         self.pad_token_id = kwargs.pop("pad_token_id", None)
         self.bos_token_id = kwargs.pop("bos_token_id", None)
         self.eos_token_id = kwargs.pop("eos_token_id", None)
         self.generation_kwargs = kwargs.pop("generation_kwargs", {})
         self._from_model_config = kwargs.pop("_from_model_config", False)
         self._commit_hash = kwargs.pop("_commit_hash", None)
         self.transformers_version = kwargs.pop("transformers_version", __version__)
     def _expand_inputs_for_generation(
         expand_size: int = 1,
         input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.LongTensor] = None,
     ) -> Tuple[torch.LongTensor, Dict[str, Any]]:
         if expand_size == 1:
             return input_ids, attention_mask
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
+    # 仅 rcr=True 使用；不改变 baseline 的选入逻辑
+    def _rcr_remask_after_selection(
+        self,
+        x,                        # [B, L] 当前序列
+        mask_token_id: int,
+        step: int,
+        steps: int,
+        s: torch.Tensor,
+        t: torch.Tensor,
+        is_fixed: torch.Tensor,   # [B, L] bool，已“确定”的位置
+        fixed_conf: torch.Tensor  # [B, L] float，已确定位置的置信度（其余为 -inf）
+    ):
         """
+        在已经“按 baseline 完成选入”之后，按累计目标回遮最低置信度的超额 token。
+        —— 极小侵入：不改变 baseline 的挑选，只在其后做回遮。
         """
+        B, L = x.shape
+        # 计算“批均值语义”的 num_mask_token（与 baseline 保持一致）
+        # 注意这里基于当前 x 的 [MASK] 数量计算
+        mask_index = (x == mask_token_id)
+        num_mask_token = (mask_index.sum() / mask_index.shape[0]).item()
+        # Dream 原调度：到本步为止应累计确定的目标总量
+        target_cum = int(num_mask_token * (1 - (s / t).item())) if step < steps - 1 else int(num_mask_token)
         for j in range(B):
+            # 当前累计（已确定）的数量
+            fixed_j = is_fixed[j]
+            current_gen = int(fixed_j.sum().item())
+            # 如果超额，回遮最低置信度的那部分
+            to_remask = max(0, current_gen - target_cum)
+            if to_remask > 0:
+                cand_idx = torch.where(fixed_j)[0]
+                if cand_idx.numel() == 0:
+                    continue
+                conf_vals = fixed_conf[j, cand_idx]
+                # 取最小的 to_remask 个
+                k = min(to_remask, int(cand_idx.numel()))
+                _, local_low = torch.topk(conf_vals, k=k, largest=False)
+                low_global = cand_idx[local_low]
+                # 打回 [MASK]，并清空标记
+                x[j, low_global] = mask_token_id
+                is_fixed[j, low_global] = False
+                fixed_conf[j, low_global] = float("-inf")
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         if is_torchdynamo_compiling():
             if not has_default_max_length and generation_config.max_length is not None:
                 logger.warning(
                     f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
+                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence."
                 )
             generation_config.max_length = generation_config.max_new_tokens + input_ids_length
         elif has_default_max_length:
             if generation_config.max_length == DreamGenerationConfig().max_length:
                 generation_config.max_length = generation_config.max_length + input_ids_length
                     generation_config.pad_token_id = self.generation_config.pad_token_id
                 if generation_config.mask_token_id is None:
                     generation_config.mask_token_id = self.generation_config.mask_token_id
         return generation_config
     def _prepare_special_tokens(self, generation_config: DreamGenerationConfig, device: Optional[Union[torch.device, str]] = None):
             has_default_max_length=has_default_max_length,
             input_ids_length=input_ids_length,
         )
         self._validate_generated_length(generation_config, input_ids_length, has_default_max_length)
         if not is_torchdynamo_compiling() and self.device.type != input_ids.device.type:
             warnings.warn(
                 "You are calling .generate() with the `input_ids` being on a device type different"
                 f" than your model's device. `input_ids` is on {input_ids.device.type}, whereas the model"
+                f" is on {self.device.type}."
             )
         if (
             hasattr(generation_config, "pad_token_id")
             and attention_mask is None
         ):
             warnings.warn(
+                "Padding was detected but no attention mask is passed here. For correct results, please set `attention_mask`."
             )
         input_ids, attention_mask = self._expand_inputs_for_generation(
         attention_mask: Optional[torch.LongTensor],
         generation_config: DreamGenerationConfig,
         generation_tokens_hook_func,
+        generation_logits_hook_func,
     ) -> Union[DreamModelOutput, torch.LongTensor]:
         output_history = generation_config.output_history
         return_dict_in_generate = generation_config.return_dict_in_generate
         max_length = generation_config.max_length
         top_p = generation_config.top_p
         top_k = generation_config.top_k
         rcr = generation_config.rcr
         conf_alg = generation_config.conf_alg
         histories = [] if (return_dict_in_generate and output_history) else None
+        # pad to max_length
         x = F.pad(input_ids, (0, max_length - input_ids.shape[1]), value=mask_token_id)
         if attention_mask is not None and torch.any(attention_mask == 0.0):
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
+        # 仅 rcr=True：引入轻量跟踪，不影响 baseline
+        is_fixed = torch.zeros_like(x, dtype=torch.bool) if rcr else None
+        fixed_conf = torch.full_like(x, float("-inf")) if rcr else None  # 存放已确定位置的置信度
         x = generation_tokens_hook_func(None, x, None)
         for i in range(steps):
             mask_index = (x == mask_token_id)
             logits = self(x, attention_mask, tok_idx).logits
             logits = torch.cat([logits[:, :1], logits[:, :-1]], dim=1)
             logits = generation_logits_hook_func(i, x, logits)
             mask_logits = logits[mask_index]
             t = timesteps[i]
             s = timesteps[i + 1]
+            if alg == "origin":
+                # 完全保持原始
                 p_transfer = 1 - s / t if i < steps - 1 else 1
                 x0 = torch.zeros_like(x[mask_index], device=self.device, dtype=torch.long) + mask_token_id
                 transfer_index_t_s = torch.rand(*x0.shape, device=self.device) < p_transfer
                 _, x0[transfer_index_t_s] = sample_tokens(
+                    mask_logits[transfer_index_t_s],
+                    temperature=temperature,
+                    top_p=top_p,
+                    top_k=top_k,
                 )
                 x[mask_index] = x0.clone()
+                # origin 分支不做 RCR（与原版一致）
             else:
+                # 置信度算法：rcr=False 用 alg；rcr=True 用 conf_alg（与之前一致）
+                if (not rcr and alg == "maskgit_plus") or (rcr and conf_alg == "maskgit_plus"):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
+                elif (not rcr and alg == "topk_margin") or (rcr and conf_alg == "topk_margin"):
                     confidence, x0 = sample_tokens(
                         mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True
                     )
+                elif (not rcr and alg == "entropy") or (rcr and conf_alg == "entropy"):
                     confidence, x0 = sample_tokens(
                         mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                     )
                 else:
                     if rcr:
+                        if alg == "maskgit_plus":
                             confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
+                        elif alg == "topk_margin":
                             confidence, x0 = sample_tokens(
                                 mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True
                             )
+                        elif alg == "entropy":
                             confidence, x0 = sample_tokens(
                                 mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True
                             )
                     else:
                         raise RuntimeError(f"Unknown alg: {alg}")
+                # ===== baseline 的“选入”逻辑：原样保留 =====
+                num_mask_token = mask_index.sum() / mask_index.shape[0]
+                number_transfer_tokens = (
+                    int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
+                )
+                full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)
+                full_confidence[mask_index] = confidence
+                if number_transfer_tokens > 0:
+                    if alg_temp is None or alg_temp == 0:
+                        _, transfer_index = torch.topk(full_confidence, number_transfer_tokens)
+                    else:
+                        full_confidence = full_confidence / alg_temp
+                        full_confidence = F.softmax(full_confidence, dim=-1)
+                        transfer_index = torch.multinomial(full_confidence, num_samples=number_transfer_tokens)
+                    x_ = torch.zeros_like(x, device=self.device, dtype=torch.long) + mask_token_id
+                    x_[mask_index] = x0.clone()
+                    row_indices = torch.arange(x.size(0), device=self.device).unsqueeze(1).expand_as(transfer_index)
+                    x[row_indices, transfer_index] = x_[row_indices, transfer_index]
+                    # ===== 仅 rcr=True：记录“已确定”的位置与它们的置信度（用于后续回遮）=====
+                    if rcr:
+                        is_fixed[row_indices, transfer_index] = True
+                        # 注意：这里存 baseline 使用的 full_confidence（与 baseline 完全一致）
+                        fixed_conf[row_indices, transfer_index] = full_confidence[row_indices, transfer_index]
+                # ===== 仅 rcr=True：在“选入”之后按累计目标回遮最低置信度的超额部分 =====
                 if rcr:
+                    # 这一步只回遮，完全不改变 baseline 的选入行为
+                    self._rcr_remask_after_selection(
+                        x=x,
+                        mask_token_id=mask_token_id,
+                        step=i,
+                        steps=steps,
+                        s=s,
+                        t=t,
+                        is_fixed=is_fixed,
+                        fixed_conf=fixed_conf,
                     )
             x = generation_tokens_hook_func(i, x, logits)
             if histories is not None:
                 histories.append(x.clone())
         if return_dict_in_generate:
+            return DreamModelOutput(sequences=x, history=histories)
         else:
             return x