Daniel Rothmann commited on
Commit
a2c97d7
·
1 Parent(s): cb20bed

Fix a NaN attention issue in converted model

Browse files
PlaprePicoDecode.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da0d297f0cba045c409eb9b2d743022b4f650852511b14e693b381183918f5b8
3
  size 579193
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3204fd09a746e1814b5a6803aaf73fc910d7723710ea6a3eeac7dd5970a77341
3
  size 579193
PlaprePicoDecode.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "4F014012-D167-40F9-A1E1-3C5893482B75": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
- "97AF2C9A-BA5C-46E8-8E92-0056C5769D29": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
- "rootModelIdentifier": "97AF2C9A-BA5C-46E8-8E92-0056C5769D29"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "D9ED4ABB-3CF3-496D-8858-06948CEBC48F": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
  },
10
+ "E087C383-13E2-4E2C-B87A-990925041088": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Specification",
13
  "name": "model.mlmodel",
14
  "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
+ "rootModelIdentifier": "E087C383-13E2-4E2C-B87A-990925041088"
18
  }
PlaprePicoPrefill.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d10cacfc9f4cf6c930e97f1dfcffbad287129a91c179c654991776ab58f4a57
3
- size 639785
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3606f27d4f4bb6f366ba9ff496766c076d3069c6fc3ed18bf590a01bcea570f8
3
+ size 884638
PlaprePicoPrefill.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "8B9849F1-AEA1-4E2B-ACC6-B60CE6D23C7E": {
5
- "author": "com.apple.CoreML",
6
- "description": "CoreML Model Specification",
7
- "name": "model.mlmodel",
8
- "path": "com.apple.CoreML/model.mlmodel"
9
- },
10
- "B531BF6D-40DE-44ED-B183-91FC01140413": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
 
 
 
 
 
 
15
  }
16
  },
17
- "rootModelIdentifier": "8B9849F1-AEA1-4E2B-ACC6-B60CE6D23C7E"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "94E53F9E-0046-4DEE-897F-3E8DECA267BE": {
 
 
 
 
 
 
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Weights",
7
  "name": "weights",
8
  "path": "com.apple.CoreML/weights"
9
+ },
10
+ "FD65B49A-2F33-4104-A93C-96340E2B0D48": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
  }
16
  },
17
+ "rootModelIdentifier": "FD65B49A-2F33-4104-A93C-96340E2B0D48"
18
  }
scripts/attention.py CHANGED
@@ -135,7 +135,10 @@ class LlamaAttentionPrefill(nn.Module):
135
 
136
  attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
137
  attn_weights = attn_weights + causal_mask
138
- attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(q.dtype)
 
 
 
139
  attn_output = torch.matmul(attn_weights, v_full)
140
 
141
  attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)
 
135
 
136
  attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
137
  attn_weights = attn_weights + causal_mask
138
+ attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32)
139
+ # Rows where mask is all -inf produce NaN after softmax — zero them out.
140
+ # This happens for pad positions that don't attend to anything.
141
+ attn_weights = attn_weights.nan_to_num(0.0).to(q.dtype)
142
  attn_output = torch.matmul(attn_weights, v_full)
143
 
144
  attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)
scripts/convert.py CHANGED
@@ -244,23 +244,20 @@ def convert_decode(model: PlaprePicoDecode, output_dir: Path):
244
 
245
 
246
  def copy_assets(model_dir: Path, output_dir: Path):
247
- """Copy tokenizer.json and speakers.json to output."""
248
- constants_dir = output_dir / "constants_bin"
249
- constants_dir.mkdir(exist_ok=True)
250
-
251
  for filename in ["tokenizer.json", "speakers.json"]:
252
  src = model_dir / filename
253
  if src.exists():
254
- shutil.copy2(src, constants_dir / filename)
255
- print(f"Copied {filename} to {constants_dir}")
256
  else:
257
  print(f" WARNING: {filename} not found in {model_dir}")
258
 
259
  # Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
260
  cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
261
- np.save(str(constants_dir / "rope_cos.npy"), cos_full.numpy().astype(np.float16))
262
- np.save(str(constants_dir / "rope_sin.npy"), sin_full.numpy().astype(np.float16))
263
- print(f"Exported RoPE tables to {constants_dir}")
264
 
265
  manifest = {
266
  "model": "plapre-pico",
@@ -284,7 +281,7 @@ def copy_assets(model_dir: Path, output_dir: Path):
284
  def main():
285
  parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
286
  parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
287
- parser.add_argument("--output-dir", type=str, default="output", help="Output directory")
288
  args = parser.parse_args()
289
 
290
  if args.model_dir:
 
244
 
245
 
246
  def copy_assets(model_dir: Path, output_dir: Path):
247
+ """Copy tokenizer.json, speakers.json, and RoPE tables to output root."""
 
 
 
248
  for filename in ["tokenizer.json", "speakers.json"]:
249
  src = model_dir / filename
250
  if src.exists():
251
+ shutil.copy2(src, output_dir / filename)
252
+ print(f"Copied {filename} to {output_dir}")
253
  else:
254
  print(f" WARNING: {filename} not found in {model_dir}")
255
 
256
  # Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
257
  cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
258
+ np.save(str(output_dir / "rope_cos.npy"), cos_full.numpy().astype(np.float16))
259
+ np.save(str(output_dir / "rope_sin.npy"), sin_full.numpy().astype(np.float16))
260
+ print(f"Exported RoPE tables to {output_dir}")
261
 
262
  manifest = {
263
  "model": "plapre-pico",
 
281
  def main():
282
  parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
283
  parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
284
+ parser.add_argument("--output-dir", type=str, default=str(Path(__file__).parent.parent), help="Output directory")
285
  args = parser.parse_args()
286
 
287
  if args.model_dir: