Daniel Rothmann commited on
Commit ·
a2c97d7
1
Parent(s): cb20bed
Fix a NaN attention issue in converted model
Browse files
PlaprePicoDecode.mlpackage/Data/com.apple.CoreML/model.mlmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 579193
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3204fd09a746e1814b5a6803aaf73fc910d7723710ea6a3eeac7dd5970a77341
|
| 3 |
size 579193
|
PlaprePicoDecode.mlpackage/Manifest.json
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
{
|
| 2 |
"fileFormatVersion": "1.0.0",
|
| 3 |
"itemInfoEntries": {
|
| 4 |
-
"
|
| 5 |
"author": "com.apple.CoreML",
|
| 6 |
"description": "CoreML Model Weights",
|
| 7 |
"name": "weights",
|
| 8 |
"path": "com.apple.CoreML/weights"
|
| 9 |
},
|
| 10 |
-
"
|
| 11 |
"author": "com.apple.CoreML",
|
| 12 |
"description": "CoreML Model Specification",
|
| 13 |
"name": "model.mlmodel",
|
| 14 |
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
}
|
| 16 |
},
|
| 17 |
-
"rootModelIdentifier": "
|
| 18 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"fileFormatVersion": "1.0.0",
|
| 3 |
"itemInfoEntries": {
|
| 4 |
+
"D9ED4ABB-3CF3-496D-8858-06948CEBC48F": {
|
| 5 |
"author": "com.apple.CoreML",
|
| 6 |
"description": "CoreML Model Weights",
|
| 7 |
"name": "weights",
|
| 8 |
"path": "com.apple.CoreML/weights"
|
| 9 |
},
|
| 10 |
+
"E087C383-13E2-4E2C-B87A-990925041088": {
|
| 11 |
"author": "com.apple.CoreML",
|
| 12 |
"description": "CoreML Model Specification",
|
| 13 |
"name": "model.mlmodel",
|
| 14 |
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
}
|
| 16 |
},
|
| 17 |
+
"rootModelIdentifier": "E087C383-13E2-4E2C-B87A-990925041088"
|
| 18 |
}
|
PlaprePicoPrefill.mlpackage/Data/com.apple.CoreML/model.mlmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3606f27d4f4bb6f366ba9ff496766c076d3069c6fc3ed18bf590a01bcea570f8
|
| 3 |
+
size 884638
|
PlaprePicoPrefill.mlpackage/Manifest.json
CHANGED
|
@@ -1,18 +1,18 @@
|
|
| 1 |
{
|
| 2 |
"fileFormatVersion": "1.0.0",
|
| 3 |
"itemInfoEntries": {
|
| 4 |
-
"
|
| 5 |
-
"author": "com.apple.CoreML",
|
| 6 |
-
"description": "CoreML Model Specification",
|
| 7 |
-
"name": "model.mlmodel",
|
| 8 |
-
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
-
},
|
| 10 |
-
"B531BF6D-40DE-44ED-B183-91FC01140413": {
|
| 11 |
"author": "com.apple.CoreML",
|
| 12 |
"description": "CoreML Model Weights",
|
| 13 |
"name": "weights",
|
| 14 |
"path": "com.apple.CoreML/weights"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
}
|
| 16 |
},
|
| 17 |
-
"rootModelIdentifier": "
|
| 18 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"fileFormatVersion": "1.0.0",
|
| 3 |
"itemInfoEntries": {
|
| 4 |
+
"94E53F9E-0046-4DEE-897F-3E8DECA267BE": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"author": "com.apple.CoreML",
|
| 6 |
"description": "CoreML Model Weights",
|
| 7 |
"name": "weights",
|
| 8 |
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"FD65B49A-2F33-4104-A93C-96340E2B0D48": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
}
|
| 16 |
},
|
| 17 |
+
"rootModelIdentifier": "FD65B49A-2F33-4104-A93C-96340E2B0D48"
|
| 18 |
}
|
scripts/attention.py
CHANGED
|
@@ -135,7 +135,10 @@ class LlamaAttentionPrefill(nn.Module):
|
|
| 135 |
|
| 136 |
attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
|
| 137 |
attn_weights = attn_weights + causal_mask
|
| 138 |
-
attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32)
|
|
|
|
|
|
|
|
|
|
| 139 |
attn_output = torch.matmul(attn_weights, v_full)
|
| 140 |
|
| 141 |
attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)
|
|
|
|
| 135 |
|
| 136 |
attn_weights = torch.matmul(q, k_full.transpose(2, 3)) * self.scale
|
| 137 |
attn_weights = attn_weights + causal_mask
|
| 138 |
+
attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32)
|
| 139 |
+
# Rows where mask is all -inf produce NaN after softmax — zero them out.
|
| 140 |
+
# This happens for pad positions that don't attend to anything.
|
| 141 |
+
attn_weights = attn_weights.nan_to_num(0.0).to(q.dtype)
|
| 142 |
attn_output = torch.matmul(attn_weights, v_full)
|
| 143 |
|
| 144 |
attn_output = attn_output.transpose(1, 2).contiguous().reshape(1, seq_len, self.num_heads * self.head_dim)
|
scripts/convert.py
CHANGED
|
@@ -244,23 +244,20 @@ def convert_decode(model: PlaprePicoDecode, output_dir: Path):
|
|
| 244 |
|
| 245 |
|
| 246 |
def copy_assets(model_dir: Path, output_dir: Path):
|
| 247 |
-
"""Copy tokenizer.json
|
| 248 |
-
constants_dir = output_dir / "constants_bin"
|
| 249 |
-
constants_dir.mkdir(exist_ok=True)
|
| 250 |
-
|
| 251 |
for filename in ["tokenizer.json", "speakers.json"]:
|
| 252 |
src = model_dir / filename
|
| 253 |
if src.exists():
|
| 254 |
-
shutil.copy2(src,
|
| 255 |
-
print(f"Copied {filename} to {
|
| 256 |
else:
|
| 257 |
print(f" WARNING: {filename} not found in {model_dir}")
|
| 258 |
|
| 259 |
# Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
|
| 260 |
cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
|
| 261 |
-
np.save(str(
|
| 262 |
-
np.save(str(
|
| 263 |
-
print(f"Exported RoPE tables to {
|
| 264 |
|
| 265 |
manifest = {
|
| 266 |
"model": "plapre-pico",
|
|
@@ -284,7 +281,7 @@ def copy_assets(model_dir: Path, output_dir: Path):
|
|
| 284 |
def main():
|
| 285 |
parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
|
| 286 |
parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
|
| 287 |
-
parser.add_argument("--output-dir", type=str, default=
|
| 288 |
args = parser.parse_args()
|
| 289 |
|
| 290 |
if args.model_dir:
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
def copy_assets(model_dir: Path, output_dir: Path):
|
| 247 |
+
"""Copy tokenizer.json, speakers.json, and RoPE tables to output root."""
|
|
|
|
|
|
|
|
|
|
| 248 |
for filename in ["tokenizer.json", "speakers.json"]:
|
| 249 |
src = model_dir / filename
|
| 250 |
if src.exists():
|
| 251 |
+
shutil.copy2(src, output_dir / filename)
|
| 252 |
+
print(f"Copied {filename} to {output_dir}")
|
| 253 |
else:
|
| 254 |
print(f" WARNING: {filename} not found in {model_dir}")
|
| 255 |
|
| 256 |
# Export RoPE tables for the iOS runtime to build cos/sin/update_mask inputs
|
| 257 |
cos_full, sin_full = precompute_rope_frequencies(HEAD_DIM, MAX_CONTEXT, 100000.0)
|
| 258 |
+
np.save(str(output_dir / "rope_cos.npy"), cos_full.numpy().astype(np.float16))
|
| 259 |
+
np.save(str(output_dir / "rope_sin.npy"), sin_full.numpy().astype(np.float16))
|
| 260 |
+
print(f"Exported RoPE tables to {output_dir}")
|
| 261 |
|
| 262 |
manifest = {
|
| 263 |
"model": "plapre-pico",
|
|
|
|
| 281 |
def main():
|
| 282 |
parser = argparse.ArgumentParser(description="Convert Plapre Pico to CoreML")
|
| 283 |
parser.add_argument("--model-dir", type=str, help="Path to downloaded model directory")
|
| 284 |
+
parser.add_argument("--output-dir", type=str, default=str(Path(__file__).parent.parent), help="Output directory")
|
| 285 |
args = parser.parse_args()
|
| 286 |
|
| 287 |
if args.model_dir:
|