QuixiAI
/

Prisma-VL-8B

@@ -1155,49 +1155,31 @@ class PrismaVLModel(PrismaVLPreTrainedModel):
         if inputs_embeds is None:
             inputs_embeds = self.get_input_embeddings()(input_ids)
-        # === INJECT 16-BIT UNCERTAINTY SIGNAL ===
-        # Add learned uncertainty embedding from previous step
-        batch_size, seq_len = inputs_embeds.shape[:2]
-        # Initialize uncertainty codes if needed
-        if self.prev_uncertainty_code is None or self.prev_uncertainty_code.shape[0] != batch_size:
-            # First step or batch size changed: use neutral uncertainty (middle of range)
-            # 32768 represents "medium uncertainty" (50% of max entropy)
-            uncertainty_code = torch.full(
-                (batch_size, seq_len),
-                self.n_uncertainty_levels // 2,  # 32768 for 16-bit
-                dtype=torch.long,
-                device=inputs_embeds.device
-            )
-        else:
-            # Use uncertainty from previous step
-            # Pad or truncate to match current sequence length
-            prev_len = self.prev_uncertainty_code.shape[1]
-            if prev_len < seq_len:
-                # Pad with neutral uncertainty
-                padding = torch.full(
-                    (batch_size, seq_len - prev_len),
-                    self.n_uncertainty_levels // 2,
-                    dtype=torch.long,
-                    device=self.prev_uncertainty_code.device
-                )
-                uncertainty_code = torch.cat([self.prev_uncertainty_code, padding], dim=1)
-            else:
-                uncertainty_code = self.prev_uncertainty_code[:, :seq_len]
-        # Look up uncertainty embeddings (256 learned vectors)
-        uncertainty_embeds = self.uncertainty_embeddings(uncertainty_code)
-        # Shift right: position i gets uncertainty from position i-1
-        # First position gets zero (no previous uncertainty)
-        uncertainty_shifted = torch.nn.functional.pad(
-            uncertainty_embeds[:, :-1, :],
-            (0, 0, 1, 0),  # Pad one position at the start
-            value=0.0
-        )
-        # Inject into input: model sees both content and "how uncertain was I?"
-        inputs_embeds = inputs_embeds + uncertainty_shifted
         image_mask = None
         video_mask = None
@@ -1465,31 +1447,29 @@ class PrismaVLForConditionalGeneration(PrismaVLPreTrainedModel, GenerationMixin)
         if labels is not None:
             loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.text_config.vocab_size)
-        # === COMPUTE UNCERTAINTY FOR NEXT STEP ===
-        # Update uncertainty codes based on current predictions
-        # Works during both training and inference for full introspective capability
-        if logits is not None:
-            with torch.no_grad():
-                logits_detached = logits.detach()
-                # Compute probability distribution
-                probs = logits_detached.softmax(dim=-1)  # [batch, seq, vocab]
-                # Compute entropy: H = -Σ p log p (uncertainty measure)
-                log_probs = torch.log(probs.clamp(min=1e-9))
-                entropy = -(probs * log_probs).sum(dim=-1)  # [batch, seq]
-                # Normalize by maximum possible entropy (uniform distribution)
-                vocab_size = logits_detached.size(-1)
-                max_entropy = math.log(vocab_size)
-                entropy_norm = (entropy / max_entropy).clamp(0.0, 1.0)
-                # Quantize to 16 bits (0-65535)
-                # Low entropy (confident) → low code (0-32767)
-                # High entropy (uncertain) → high code (32768-65535)
-                self.model.prev_uncertainty_code = (
-                    entropy_norm * (self.model.n_uncertainty_levels - 1)
-                ).long().clamp(0, self.model.n_uncertainty_levels - 1)
         return PrismaVLCausalLMOutputWithPast(
             loss=loss,

         if inputs_embeds is None:
             inputs_embeds = self.get_input_embeddings()(input_ids)
+        # === PRISMA UNCERTAINTY INJECTION (TRAINING + INFERENCE, ROBUST) ===
+        B, S = inputs_embeds.shape[:2]
+        if self.prev_uncertainty_code is not None:
+            if self.training and S > 1:
+                prev_len = self.prev_uncertainty_code.shape[1]
+                curr_len = S - 1
+                if prev_len >= curr_len:
+                    codes_to_use = self.prev_uncertainty_code[:, :curr_len]
+                else:
+                    codes_to_use = F.pad(
+                        self.prev_uncertainty_code,
+                        (0, curr_len - prev_len),
+                        value=self.n_uncertainty_levels // 2,  # neutral
+                    )
+                u = self.uncertainty_embeddings(codes_to_use)  # [B, S-1, D]
+                inputs_embeds = inputs_embeds.clone()
+                inputs_embeds[:, 1:, :] += u
+            elif not self.training and S == 1:
+                u = self.uncertainty_embeddings(self.prev_uncertainty_code)  # [B, 1, D]
+                inputs_embeds = inputs_embeds + u
+        # === END PRISMA INJECTION ===
         image_mask = None
         video_mask = None
         if labels is not None:
             loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.text_config.vocab_size)
+        # === PRISMA UNCERTAINTY UPDATE (TRAINING + INFERENCE) ===
+        with torch.no_grad():
+            B, S, V = logits.shape
+            probs = logits.softmax(dim=-1)
+            # Compute entropy for all available logits
+            entropy = -(probs * torch.log(probs.clamp_min(1e-9))).sum(dim=-1)
+            entropy = entropy / math.log(V)
+            codes = (
+                entropy * (self.model.n_uncertainty_levels - 1)
+            ).long().clamp(0, self.model.n_uncertainty_levels - 1)
+            if self.training:
+                # Teacher forcing: use entropy[t] → condition token t+1
+                # So we store entropy[0..S-2]
+                self.model.prev_uncertainty_code = codes[:, :-1] if S > 1 else None
+            else:
+                # Inference: only one step at a time (S == 1)
+                self.model.prev_uncertainty_code = codes[:, -1:].contiguous()
+        # === END PRISMA UPDATE ===
         return PrismaVLCausalLMOutputWithPast(
             loss=loss,