Fix a NaN attention issue in converted model

Files changed (6) hide show

PlaprePicoDecode.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da0d297f0cba045c409eb9b2d743022b4f650852511b14e693b381183918f5b8
 size 579193

 version https://git-lfs.github.com/spec/v1
+oid sha256:3204fd09a746e1814b5a6803aaf73fc910d7723710ea6a3eeac7dd5970a77341
 size 579193

PlaprePicoDecode.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "4F014012-D167-40F9-A1E1-3C5893482B75": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
-        "97AF2C9A-BA5C-46E8-8E92-0056C5769D29": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "97AF2C9A-BA5C-46E8-8E92-0056C5769D29"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "D9ED4ABB-3CF3-496D-8858-06948CEBC48F": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
+        "E087C383-13E2-4E2C-B87A-990925041088": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "E087C383-13E2-4E2C-B87A-990925041088"
 }

PlaprePicoPrefill.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d10cacfc9f4cf6c930e97f1dfcffbad287129a91c179c654991776ab58f4a57
-size 639785

 version https://git-lfs.github.com/spec/v1
+oid sha256:3606f27d4f4bb6f366ba9ff496766c076d3069c6fc3ed18bf590a01bcea570f8
+size 884638

PlaprePicoPrefill.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "8B9849F1-AEA1-4E2B-ACC6-B60CE6D23C7E": {
-            "author": "com.apple.CoreML",
-            "description": "CoreML Model Specification",
-            "name": "model.mlmodel",
-            "path": "com.apple.CoreML/model.mlmodel"
-        },
-        "B531BF6D-40DE-44ED-B183-91FC01140413": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         }
     },
-    "rootModelIdentifier": "8B9849F1-AEA1-4E2B-ACC6-B60CE6D23C7E"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "94E53F9E-0046-4DEE-897F-3E8DECA267BE": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
+        },
+        "FD65B49A-2F33-4104-A93C-96340E2B0D48": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "FD65B49A-2F33-4104-A93C-96340E2B0D48"
 }

scripts/attention.py CHANGED Viewed

@@ -135,7 +135,10 @@ class LlamaAttentionPrefill(nn.Module):
         attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
         attn_weights = attn_weights + causal_mask
-        attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(q.dtype)
         attn_output = torch.matmul(attn_weights, v_full)
         attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)

         attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
         attn_weights = attn_weights + causal_mask
+        attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32)
+        # Rows where mask is all -inf produce NaN after softmax — zero them out.
+        # This happens for pad positions that don't attend to anything.
+        attn_weights = attn_weights.nan_to_num(0.0).to(q.dtype)
         attn_output = torch.matmul(attn_weights, v_full)
         attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)

scripts/convert.py CHANGED Viewed

@@ -244,23 +244,20 @@ def convert_decode(model: PlaprePicoDecode, output_dir: Path):
 def copy_assets(model_dir: Path, output_dir: Path):
-    """Copy tokenizer.json and speakers.json to output."""
-    constants_dir = output_dir / "constants_bin"
-    constants_dir.mkdir(exist_ok=True)
     for filename in ["tokenizer.json", "speakers.json"]:
         src = model_dir / filename
         if src.exists():
-            shutil.copy2(src, constants_dir / filename)
-            print(f"Copied {filename} to {constants_dir}")
         else:
             print(f"  WARNING: {filename} not found in {model_dir}")
     # Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
     cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
-    np.save(str(constants_dir / "rope_cos.npy"), cos_full.numpy().astype(np.float16))
-    np.save(str(constants_dir / "rope_sin.npy"), sin_full.numpy().astype(np.float16))
-    print(f"Exported RoPE tables to {constants_dir}")
     manifest = {
         "model": "plapre-pico",
@@ -284,7 +281,7 @@ def copy_assets(model_dir: Path, output_dir: Path):
 def main():
     parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
     parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
-    parser.add_argument("--output-dir", type=str, default="output", help="Output directory")
     args = parser.parse_args()
     if args.model_dir:

 def copy_assets(model_dir: Path, output_dir: Path):
+    """Copy tokenizer.json, speakers.json, and RoPE tables to output root."""
     for filename in ["tokenizer.json", "speakers.json"]:
         src = model_dir / filename
         if src.exists():
+            shutil.copy2(src, output_dir / filename)
+            print(f"Copied {filename} to {output_dir}")
         else:
             print(f"  WARNING: {filename} not found in {model_dir}")
     # Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
     cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
+    np.save(str(output_dir / "rope_cos.npy"), cos_full.numpy().astype(np.float16))
+    np.save(str(output_dir / "rope_sin.npy"), sin_full.numpy().astype(np.float16))
+    print(f"Exported RoPE tables to {output_dir}")
     manifest = {
         "model": "plapre-pico",
 def main():
     parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
     parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
+    parser.add_argument("--output-dir", type=str, default=str(Path(__file__).parent.parent), help="Output directory")
     args = parser.parse_args()
     if args.model_dir: