dream_rcr

@@ -109,6 +109,10 @@ class DreamGenerationConfig(GenerationConfig):
         self.alg: str = kwargs.pop("alg", 'origin')
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
         # Parameters that define the output variables of `generate`
         self.num_return_sequences: int = kwargs.pop("num_return_sequences", 1)
         self.return_dict_in_generate: bool = kwargs.pop("return_dict_in_generate", False)
@@ -164,6 +168,56 @@ class DreamGenerationMixin:
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         """Performs validation related to the resulting generated length"""
@@ -382,6 +436,10 @@ class DreamGenerationMixin:
         top_p = generation_config.top_p
         top_k = generation_config.top_k
         histories = [] if (return_dict_in_generate and output_history) else None
         # pad input_ids to max_length
@@ -404,6 +462,9 @@ class DreamGenerationMixin:
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
         # this allows user-defined token control of the intermediate steps
         x = generation_tokens_hook_func(None, x, None)
         for i in range(steps):
@@ -425,29 +486,36 @@ class DreamGenerationMixin:
                 _, x0[transfer_index_t_s]= sample_tokens(mask_logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k)
                 x[mask_index] = x0.clone()
             else:
-                if alg == 'maskgit_plus':
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
-                elif alg == 'topk_margin':
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True)
-                elif alg == 'entropy':
                     confidence, x0 = sample_tokens(mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True)
                 else:
                     raise RuntimeError(f"Unknown alg: {alg}")
-                num_mask_token = mask_index.sum() / mask_index.shape[0]
-                number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
-                full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)
-                full_confidence[mask_index] = confidence
-                if number_transfer_tokens > 0:
-                    if alg_temp is None or alg_temp == 0:
-                        _, transfer_index = torch.topk(full_confidence, number_transfer_tokens)
-                    else:
-                        full_confidence = full_confidence / alg_temp
-                        full_confidence = F.softmax(full_confidence, dim=-1)
-                        transfer_index = torch.multinomial(full_confidence, num_samples=number_transfer_tokens)
-                    x_ = torch.zeros_like(x, device=self.device, dtype=torch.long) + mask_token_id
-                    x_[mask_index] = x0.clone()
-                    row_indices = torch.arange(x.size(0), device=self.device).unsqueeze(1).expand_as(transfer_index)
-                    x[row_indices,transfer_index] = x_[row_indices,transfer_index]
             # this allows user-defined token control of the intermediate steps
             x = generation_tokens_hook_func(i, x, logits)

         self.alg: str = kwargs.pop("alg", 'origin')
         self.alg_temp: Optional[float] = kwargs.pop("alg_temp", None)
+        # RCR specific parameters
+        self.rcr: bool = kwargs.pop("rcr", False)
+        self.conf_alg: str = kwargs.pop("conf_alg", 'maskgit_plus')
         # Parameters that define the output variables of `generate`
         self.num_return_sequences: int = kwargs.pop("num_return_sequences", 1)
         self.return_dict_in_generate: bool = kwargs.pop("return_dict_in_generate", False)
             attention_mask = attention_mask.repeat_interleave(expand_size, dim=0)
         return input_ids, attention_mask
+    def _apply_rcr_logic(self, x, x0, confidence, mask_index, overtime_confidence,
+                        mask_token_id, step, total_steps, s, t):
+        """
+        Apply Running Confidence Remasking (RCR) logic adapted for Dream model.
+        """
+        batch_size = x.shape[0]
+        # Calculate number of tokens to transfer using Dream's scheduling
+        num_mask_token = mask_index.sum() / mask_index.shape[0]
+        number_transfer_tokens = int(num_mask_token * (1 - s / t)) if step < total_steps - 1 else int(num_mask_token)
+        # Update predictions for masked positions only
+        x0 = torch.where(mask_index, x0, x)
+        confidence = torch.where(mask_index, confidence, torch.tensor(-float('inf'), device=x0.device))
+        # RCR: Select tokens based on cumulative confidence
+        for j in range(batch_size):
+            if number_transfer_tokens > 0:
+                batch_confidence = confidence[j]
+                batch_mask_index = mask_index[j]
+                # Select top confident tokens to transfer
+                _, select_indices = torch.topk(batch_confidence, k=number_transfer_tokens, largest=True)
+                x[j, select_indices] = x0[j, select_indices]
+                overtime_confidence[j, select_indices] = batch_confidence[select_indices].clone().float()
+                # RCR: Re-mask lowest confidence tokens for next steps
+                if step < total_steps - 1:
+                    # Find tokens that have been generated (non-zero confidence)
+                    generated_mask = overtime_confidence[j] > 0
+                    if generated_mask.any():
+                        # Calculate tokens to re-mask for next iteration
+                        next_num_mask_tokens = int(num_mask_token * (1 - torch.linspace(1, s, total_steps + 1, device=x.device)[step + 2] / t))
+                        if next_num_mask_tokens > 0:
+                            # Get confidence of generated tokens
+                            generated_confidence = overtime_confidence[j][generated_mask]
+                            generated_indices = torch.where(generated_mask)[0]
+                            if len(generated_confidence) >= next_num_mask_tokens:
+                                # Re-mask lowest confidence tokens
+                                _, local_mask_indices = torch.topk(
+                                    generated_confidence,
+                                    k=next_num_mask_tokens,
+                                    largest=False
+                                )
+                                global_mask_indices = generated_indices[local_mask_indices]
+                                x[j, global_mask_indices] = mask_token_id
+                                overtime_confidence[j, global_mask_indices] = 0.0
     def _validate_generated_length(self, generation_config, input_ids_length, has_default_max_length):
         """Performs validation related to the resulting generated length"""
         top_p = generation_config.top_p
         top_k = generation_config.top_k
+        # RCR specific values
+        rcr = generation_config.rcr
+        conf_alg = generation_config.conf_alg
         histories = [] if (return_dict_in_generate and output_history) else None
         # pad input_ids to max_length
         timesteps = torch.linspace(1, eps, steps + 1, device=x.device)
+        # RCR tracking - initialize overtime confidence tracking
+        overtime_confidence = torch.zeros_like(x, dtype=torch.float32) if rcr else None
         # this allows user-defined token control of the intermediate steps
         x = generation_tokens_hook_func(None, x, None)
         for i in range(steps):
                 _, x0[transfer_index_t_s]= sample_tokens(mask_logits[transfer_index_t_s], temperature=temperature, top_p=top_p, top_k=top_k)
                 x[mask_index] = x0.clone()
             else:
+                if alg == 'maskgit_plus' or (rcr and conf_alg == 'maskgit_plus'):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k)
+                elif alg == 'topk_margin' or (rcr and conf_alg == 'topk_margin'):
                     confidence, x0 = sample_tokens(mask_logits, temperature=temperature, top_p=top_p, top_k=top_k, margin_confidence=True)
+                elif alg == 'entropy' or (rcr and conf_alg == 'entropy'):
                     confidence, x0 = sample_tokens(mask_logits, temperature, top_p=top_p, top_k=top_k, neg_entropy=True)
                 else:
                     raise RuntimeError(f"Unknown alg: {alg}")
+                # Apply RCR logic if enabled
+                if rcr:
+                    self._apply_rcr_logic(x, x0, confidence, mask_index, overtime_confidence,
+                                        mask_token_id, i, steps, s, t)
+                else:
+                    # Original Dream sampling logic
+                    num_mask_token = mask_index.sum() / mask_index.shape[0]
+                    number_transfer_tokens = int(num_mask_token * (1 - s / t)) if i < steps - 1 else int(num_mask_token)
+                    full_confidence = torch.full_like(x, -torch.inf, device=self.device, dtype=logits.dtype)
+                    full_confidence[mask_index] = confidence
+                    if number_transfer_tokens > 0:
+                        if alg_temp is None or alg_temp == 0:
+                            _, transfer_index = torch.topk(full_confidence, number_transfer_tokens)
+                        else:
+                            full_confidence = full_confidence / alg_temp
+                            full_confidence = F.softmax(full_confidence, dim=-1)
+                            transfer_index = torch.multinomial(full_confidence, num_samples=number_transfer_tokens)
+                        x_ = torch.zeros_like(x, device=self.device, dtype=torch.long) + mask_token_id
+                        x_[mask_index] = x0.clone()
+                        row_indices = torch.arange(x.size(0), device=self.device).unsqueeze(1).expand_as(transfer_index)
+                        x[row_indices,transfer_index] = x_[row_indices,transfer_index]
             # this allows user-defined token control of the intermediate steps
             x = generation_tokens_hook_func(i, x, logits)