Spaces:

lucalp
/

blt-entropy-patcher

Running on Zero

Srinivasan Iyer

sviyer commited on Mar 13

Commit

fc946a1

unverified ·

1 Parent(s): 083656c

Some fixes for entropy model predictions (#83)

Co-authored-by: Srini Iyer <sviyer@meta.com>

Files changed (2) hide show

bytelatent/data/patcher.py CHANGED Viewed

@@ -91,7 +91,7 @@ def calculate_entropies(
             split = split.reshape(-1, max_length)
             if device is not None:
                 split = split.to(device)
-            assert torch.all(split >= 0) and torch.all(split < 260)
             pred = entropy_model(split)
             pred = pred.reshape(-1, pred.shape[-1])[
                 : split.numel() - pad_size, :
@@ -103,7 +103,7 @@ def calculate_entropies(
         concat_entropies = torch.cat(entropies, dim=0)
         concat_entropies = concat_entropies.reshape(tokens.shape)
         concat_preds = torch.cat(preds, dim=0)
-        concat_preds = concat_preds.reshape(tokens.shape[0], tokens.shape[1], -1)
     return concat_entropies, concat_preds

             split = split.reshape(-1, max_length)
             if device is not None:
                 split = split.to(device)
+            # assert torch.all(split >= 0) and torch.all(split < 260)
             pred = entropy_model(split)
             pred = pred.reshape(-1, pred.shape[-1])[
                 : split.numel() - pad_size, :
         concat_entropies = torch.cat(entropies, dim=0)
         concat_entropies = concat_entropies.reshape(tokens.shape)
         concat_preds = torch.cat(preds, dim=0)
+        concat_preds = concat_preds.reshape(tokens.shape[0], -1)
     return concat_entropies, concat_preds

bytelatent/entropy_model.py CHANGED Viewed

@@ -15,7 +15,7 @@ def load_entropy_model(entropy_model_checkpoint_dir, state_dict_path, device="cp
         reloaded = json.loads(fr.read())
     torch.set_default_dtype(torch.bfloat16)
-    model_params = reloaded["model"]
     logger.warning(
         "Update checkpoint to load attn and sliding window args from checkpoint"
     )
@@ -24,7 +24,7 @@ def load_entropy_model(entropy_model_checkpoint_dir, state_dict_path, device="cp
             dim=model_params["dim"],
             n_layers=model_params["n_layers"],
             n_heads=model_params["n_heads"],
-            max_seqlen=model_params["max_length"],
             ffn_dim_multiplier=model_params["ffn_dim_multiplier"],
             vocab_size=model_params["vocab_size"],
             attn_bias_type="local_block_causal",
@@ -34,7 +34,7 @@ def load_entropy_model(entropy_model_checkpoint_dir, state_dict_path, device="cp
     )
     entropy_model.load_state_dict(
-        torch.load(state_dict_path, map_location=device), strict=False
     )
     entropy_model.to(device)
     entropy_model = entropy_model.eval()

         reloaded = json.loads(fr.read())
     torch.set_default_dtype(torch.bfloat16)
+    model_params = reloaded["entropy_model"]
     logger.warning(
         "Update checkpoint to load attn and sliding window args from checkpoint"
     )
             dim=model_params["dim"],
             n_layers=model_params["n_layers"],
             n_heads=model_params["n_heads"],
+            max_seqlen=model_params["max_seqlen"],
             ffn_dim_multiplier=model_params["ffn_dim_multiplier"],
             vocab_size=model_params["vocab_size"],
             attn_bias_type="local_block_causal",
     )
     entropy_model.load_state_dict(
+        torch.load(state_dict_path, map_location=device)["model"], strict=False
     )
     entropy_model.to(device)
     entropy_model = entropy_model.eval()