QuixiAI
/

Prisma-VL-8B

Image-Text-to-Text

vision-language

introspective-architecture

uncertainty-aware

self-calibrating

Model card Files Files and versions

ehartford commited on Dec 30, 2025

Commit

1a5225e

·

verified ·

1 Parent(s): bb41467

Update modeling.py

Files changed (1) hide show

modeling.py +28 -15

modeling.py CHANGED Viewed

@@ -1450,27 +1450,40 @@ class PrismaVLForConditionalGeneration(PrismaVLPreTrainedModel, GenerationMixin)
         # === PRISMA UNCERTAINTY UPDATE (TRAINING + INFERENCE) ===
         with torch.no_grad():
             B, S, V = logits.shape
-            probs = logits.softmax(dim=-1)
-            # Compute entropy for all available logits
-            entropy = -(probs * torch.log(probs.clamp_min(1e-9))).sum(dim=-1)
-            entropy = entropy / math.log(V)
-            codes = (
-                entropy * (self.model.n_uncertainty_levels - 1)
-            ).long().clamp(0, self.model.n_uncertainty_levels - 1)
-            if self.training:
-                # Teacher forcing: use entropy[t] → condition token t+1
-                # So we store entropy[0..S-2]
-                self.model.prev_uncertainty_code = codes[:, :-1] if S > 1 else None
             else:
-                # Inference: only one step at a time (S == 1)
-                self.model.prev_uncertainty_code = codes[:, -1:].contiguous()
         # === END PRISMA UPDATE ===
         return PrismaVLCausalLMOutputWithPast(
             loss=loss,
             logits=logits,

         # === PRISMA UNCERTAINTY UPDATE (TRAINING + INFERENCE) ===
         with torch.no_grad():
             B, S, V = logits.shape
+            expected_V = self.config.text_config.vocab_size
+            if V != expected_V:
+                # Logits are sharded (tensor parallel vocab split)
+                # Softmax/entropy would be incorrect → do NOT compute
+                if self.training:
+                    # Training: skip uncertainty for next batch
+                    self.model.prev_uncertainty_code = None
+                else:
+                    # Inference: neutral uncertainty
+                    self.model.prev_uncertainty_code = torch.full(
+                        (B, 1),
+                        self.model.n_uncertainty_levels // 2,
+                        dtype=torch.long,
+                        device=logits.device,
+                    )
             else:
+                # Full-vocab logits → safe entropy computation
+                probs = logits.softmax(dim=-1)
+                entropy = -(probs * torch.log(probs.clamp_min(1e-9))).sum(dim=-1)
+                entropy = entropy / math.log(expected_V)
+                codes = (
+                    entropy * (self.model.n_uncertainty_levels - 1)
+                ).long().clamp(0, self.model.n_uncertainty_levels - 1)
+                if self.training:
+                    self.model.prev_uncertainty_code = codes[:, :-1] if S > 1 else None
+                else:
+                    self.model.prev_uncertainty_code = codes[:, -1:].contiguous()
         # === END PRISMA UPDATE ===
         return PrismaVLCausalLMOutputWithPast(
             loss=loss,
             logits=logits,