Spaces:

ejschwartz
/

entropy

Sleeping

ejschwartz commited on Apr 3

Commit

f722db4

1 Parent(s): ce736c4

Add BOS token handling in entropy computation and update notes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,6 +46,16 @@ def compute_entropy(code: str):
         if attention_mask is not None:
             attention_mask = attention_mask.to(device)
         # Need at least 2 tokens to compute next-token NLL
         if input_ids.shape[1] < 2:
             return "Input is too short to compute token-level entropy.", None
@@ -117,7 +127,7 @@ The table shows each token's NLL and probability under the model.
         gr.Markdown(
             """
 Notes:
-- NLL is computed for next-token prediction and excludes the first token.
 - Large inputs may take time to process depending on hardware.
 """
         )

         if attention_mask is not None:
             attention_mask = attention_mask.to(device)
+        # Prepend BOS if not already present so the first real token gets a predicted probability
+        bos_id = TOKENIZER.bos_token_id
+        if bos_id is not None and input_ids[0, 0].item() != bos_id:
+            bos_tensor = torch.full((1, 1), bos_id, dtype=input_ids.dtype, device=device)
+            input_ids = torch.cat([bos_tensor, input_ids], dim=1)
+            if attention_mask is not None:
+                attention_mask = torch.cat(
+                    [torch.ones(1, 1, dtype=attention_mask.dtype, device=device), attention_mask], dim=1
+                )
         # Need at least 2 tokens to compute next-token NLL
         if input_ids.shape[1] < 2:
             return "Input is too short to compute token-level entropy.", None
         gr.Markdown(
             """
 Notes:
+- NLL is computed for next-token prediction; a BOS token is prepended if needed so all tokens are included.
 - Large inputs may take time to process depending on hardware.
 """
         )