Kyle Howells commited on
Commit
a150c61
·
1 Parent(s): f1d722d

Add htdemucs MLX weights and model card

Browse files
README.md CHANGED
@@ -1,3 +1,121 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: mlx
4
+ tags:
5
+ - mlx
6
+ - audio
7
+ - music-source-separation
8
+ - source-separation
9
+ - demucs
10
+ - htdemucs
11
+ - apple-silicon
12
+ base_model: adefossez/demucs
13
+ pipeline_tag: audio-to-audio
14
+ ---
15
+
16
+ # htdemucs - MLX
17
+
18
+ MLX-compatible weights for `htdemucs` from [Demucs](https://github.com/adefossez/demucs), a music source separation model for splitting audio into `drums`, `bass`, `other`, and `vocals`.
19
+
20
+ This is a conversion of the Demucs checkpoint into `safetensors` format for MLX-based inference on Apple Silicon.
21
+
22
+ ## Origin
23
+
24
+ - Original model/repo: [adefossez/demucs](https://github.com/adefossez/demucs)
25
+ - Model variant: `htdemucs` (Hybrid Transformer Demucs)
26
+ - License: MIT (same as original Demucs)
27
+ - Conversion path: `demucs-mlx` checkpoint (`htdemucs_mlx.pkl`) exported to safetensors via `convert_demucs_mlx_checkpoint.py`
28
+ - MLX port source repo: [ssmall256/demucs-mlx](https://github.com/ssmall256/demucs-mlx)
29
+ - Swift integration repo: [`/Users/kylehowells/Developer/Github/demucs-mlx-swift`](/Users/kylehowells/Developer/Github/demucs-mlx-swift)
30
+
31
+ No fine-tuning or quantization was applied.
32
+
33
+ ## Files
34
+
35
+ | File | Description |
36
+ |---|---|
37
+ | `htdemucs_config.json` | Runtime model metadata/config exported with tensor manifest |
38
+ | `htdemucs.safetensors` | Pre-converted MLX weights (float32) |
39
+ | `convert_demucs_mlx_checkpoint.py` | Conversion script (demucs-mlx pickle -> MLX safetensors + JSON config) |
40
+
41
+ ## Model Details
42
+
43
+ | Parameter | Value |
44
+ |---|---|
45
+ | Sample rate | 44.1 kHz |
46
+ | Sources | `drums`, `bass`, `other`, `vocals` |
47
+ | Audio channels | 2 (stereo) |
48
+ | Segment | `39/5` seconds (7.8 s) |
49
+ | FFT size (`nfft`) | 4096 |
50
+ | Tensor count | 573 |
51
+ | Parameter count | 41,984,456 (~42.0M) |
52
+
53
+ ## Checksums
54
+
55
+ | File | SHA256 |
56
+ |---|---|
57
+ | `htdemucs.safetensors` | `339d267a7a6983a11eedbdc00413c602a65e9b9103f695fb5c2b2a481cd9d297` |
58
+ | `htdemucs_config.json` | `e53bae0f10eecee377dc894ac165fbdeb3e57b793694d0e778fbc32bc01f9a47` |
59
+
60
+ ## Usage
61
+
62
+ ### Swift (demucs-mlx-swift)
63
+
64
+ Reference repo: [`/Users/kylehowells/Developer/Github/demucs-mlx-swift`](/Users/kylehowells/Developer/Github/demucs-mlx-swift)
65
+
66
+ ```swift
67
+ import DemucsMLX
68
+ import Foundation
69
+
70
+ let modelDir = URL(fileURLWithPath: "/Users/kylehowells/Developer/ML-Models/demucs-mlx", isDirectory: true)
71
+ let separator = try DemucsSeparator(modelName: "htdemucs", modelDirectory: modelDir)
72
+ let result = try separator.separate(fileAt: URL(fileURLWithPath: "song.wav"))
73
+ ```
74
+
75
+ ### Python (demucs-mlx)
76
+
77
+ Reference repo: [ssmall256/demucs-mlx](https://github.com/ssmall256/demucs-mlx)
78
+
79
+ ```python
80
+ from demucs_mlx.mlx_convert import load_mlx_model_from_safetensors
81
+
82
+ model = load_mlx_model_from_safetensors(
83
+ model_name="htdemucs",
84
+ cache_dir="/Users/kylehowells/Developer/ML-Models/demucs-mlx"
85
+ )
86
+ ```
87
+
88
+ ### Snippet Sources
89
+
90
+ - `demucs-mlx` loader API (`load_mlx_model_from_safetensors`): [ssmall256/demucs-mlx/demucs_mlx/mlx_convert.py](https://github.com/ssmall256/demucs-mlx/blob/main/demucs_mlx/mlx_convert.py)
91
+ - `demucs-mlx-swift` separator API (`DemucsSeparator`): [`/Users/kylehowells/Developer/Github/demucs-mlx-swift/Sources/DemucsMLX/DemucsSeparator.swift`](/Users/kylehowells/Developer/Github/demucs-mlx-swift/Sources/DemucsMLX/DemucsSeparator.swift)
92
+ - Export script used for this model card (`convert_demucs_mlx_checkpoint.py`): [`/Users/kylehowells/Developer/ML-Models/demucs-mlx/convert_demucs_mlx_checkpoint.py`](/Users/kylehowells/Developer/ML-Models/demucs-mlx/convert_demucs_mlx_checkpoint.py)
93
+
94
+ ## Converting from demucs-mlx checkpoint
95
+
96
+ To reproduce this export from an existing `demucs-mlx` cache checkpoint:
97
+
98
+ ```bash
99
+ python convert_demucs_mlx_checkpoint.py \
100
+ --checkpoint /path/to/htdemucs_mlx.pkl \
101
+ --out-dir /Users/kylehowells/Developer/ML-Models/demucs-mlx \
102
+ --name htdemucs
103
+ ```
104
+
105
+ ## Citation
106
+
107
+ ```bibtex
108
+ @inproceedings{rouard2022hybrid,
109
+ title={Hybrid Transformers for Music Source Separation},
110
+ author={Rouard, Simon and Massa, Francisco and Defossez, Alexandre},
111
+ booktitle={ICASSP 23},
112
+ year={2023}
113
+ }
114
+
115
+ @inproceedings{defossez2021hybrid,
116
+ title={Hybrid Spectrogram and Waveform Source Separation},
117
+ author={Defossez, Alexandre},
118
+ booktitle={Proceedings of the ISMIR 2021 Workshop on Music Source Separation},
119
+ year={2021}
120
+ }
121
+ ```
convert_demucs_mlx_checkpoint.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Export demucs-mlx pickle checkpoint to flat safetensors + JSON metadata.
4
+
5
+ This is a preparation step for native Swift/MLX loading.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import json
12
+ import os
13
+ import pickle
14
+ from pathlib import Path
15
+ from typing import Any
16
+ from fractions import Fraction
17
+
18
+ import mlx.core as mx
19
+
20
+
21
+ def flatten_tree(node: Any, prefix: str = "") -> dict[str, mx.array]:
22
+ out: dict[str, mx.array] = {}
23
+
24
+ if isinstance(node, dict):
25
+ for k, v in node.items():
26
+ key = f"{prefix}.{k}" if prefix else str(k)
27
+ out.update(flatten_tree(v, key))
28
+ return out
29
+
30
+ if isinstance(node, (list, tuple)):
31
+ for idx, v in enumerate(node):
32
+ key = f"{prefix}.{idx}" if prefix else str(idx)
33
+ out.update(flatten_tree(v, key))
34
+ return out
35
+
36
+ # MLX array leaf
37
+ if isinstance(node, mx.array):
38
+ out[prefix] = node
39
+ return out
40
+
41
+ # Non-array leaf in state tree: ignore.
42
+ return out
43
+
44
+
45
+ def to_builtin(obj: Any) -> Any:
46
+ if isinstance(obj, dict):
47
+ return {str(k): to_builtin(v) for k, v in obj.items()}
48
+ if isinstance(obj, (list, tuple)):
49
+ return [to_builtin(x) for x in obj]
50
+ if isinstance(obj, Fraction):
51
+ return f"{obj.numerator}/{obj.denominator}"
52
+ return obj
53
+
54
+
55
+ def main() -> None:
56
+ ap = argparse.ArgumentParser()
57
+ ap.add_argument(
58
+ "--checkpoint",
59
+ default=os.path.expanduser("~/.cache/demucs-mlx/htdemucs_mlx.pkl"),
60
+ help="Path to demucs-mlx pickle checkpoint",
61
+ )
62
+ ap.add_argument(
63
+ "--out-dir",
64
+ default="./Models/htdemucs",
65
+ help="Output directory",
66
+ )
67
+ ap.add_argument(
68
+ "--name",
69
+ default="htdemucs",
70
+ help="Output model basename",
71
+ )
72
+ args = ap.parse_args()
73
+
74
+ ck_path = Path(args.checkpoint).expanduser().resolve()
75
+ out_dir = Path(args.out_dir).resolve()
76
+ out_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ with ck_path.open("rb") as f:
79
+ checkpoint = pickle.load(f)
80
+
81
+ if "state" not in checkpoint:
82
+ raise ValueError(f"No 'state' key in checkpoint: {ck_path}")
83
+
84
+ flat = flatten_tree(checkpoint["state"])
85
+ if not flat:
86
+ raise ValueError("No MLX arrays found while flattening state tree")
87
+
88
+ safetensors_path = out_dir / f"{args.name}.safetensors"
89
+ config_path = out_dir / f"{args.name}_config.json"
90
+
91
+ mx.save_safetensors(str(safetensors_path), flat)
92
+
93
+ metadata = {
94
+ "model_name": checkpoint.get("model_name"),
95
+ "model_class": checkpoint.get("model_class"),
96
+ "sub_model_class": checkpoint.get("sub_model_class"),
97
+ "num_models": checkpoint.get("num_models"),
98
+ "weights": checkpoint.get("weights"),
99
+ "args": to_builtin(checkpoint.get("args", [])),
100
+ "kwargs": to_builtin(checkpoint.get("kwargs", {})),
101
+ "mlx_version": checkpoint.get("mlx_version"),
102
+ "tensor_count": len(flat),
103
+ "tensors": {
104
+ k: {
105
+ "shape": list(v.shape),
106
+ "dtype": str(v.dtype),
107
+ }
108
+ for k, v in flat.items()
109
+ },
110
+ }
111
+
112
+ with config_path.open("w") as f:
113
+ json.dump(metadata, f, indent=2)
114
+
115
+ print(f"wrote {safetensors_path}")
116
+ print(f"wrote {config_path}")
117
+ print(f"tensors: {len(flat)}")
118
+
119
+
120
+ if __name__ == "__main__":
121
+ main()
htdemucs.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339d267a7a6983a11eedbdc00413c602a65e9b9103f695fb5c2b2a481cd9d297
3
+ size 168005865
htdemucs_config.json ADDED
@@ -0,0 +1,3807 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "htdemucs",
3
+ "model_class": "BagOfModelsMLX",
4
+ "sub_model_class": "HTDemucsMLX",
5
+ "num_models": 1,
6
+ "weights": [
7
+ [
8
+ 1.0,
9
+ 1.0,
10
+ 1.0,
11
+ 1.0
12
+ ]
13
+ ],
14
+ "args": [],
15
+ "kwargs": {
16
+ "sources": [
17
+ "drums",
18
+ "bass",
19
+ "other",
20
+ "vocals"
21
+ ],
22
+ "audio_channels": 2,
23
+ "samplerate": 44100,
24
+ "segment": "39/5",
25
+ "channels": 48,
26
+ "channels_time": null,
27
+ "growth": 2,
28
+ "nfft": 4096,
29
+ "wiener_iters": 0,
30
+ "end_iters": 0,
31
+ "wiener_residual": false,
32
+ "cac": true,
33
+ "depth": 4,
34
+ "rewrite": true,
35
+ "multi_freqs": [],
36
+ "multi_freqs_depth": 3,
37
+ "freq_emb": 0.2,
38
+ "emb_scale": 10,
39
+ "emb_smooth": true,
40
+ "kernel_size": 8,
41
+ "stride": 4,
42
+ "time_stride": 2,
43
+ "context": 1,
44
+ "context_enc": 0,
45
+ "norm_starts": 4,
46
+ "norm_groups": 4,
47
+ "dconv_mode": 3,
48
+ "dconv_depth": 2,
49
+ "dconv_comp": 8,
50
+ "dconv_init": 0.001,
51
+ "bottom_channels": 512,
52
+ "t_layers": 5,
53
+ "t_hidden_scale": 4.0,
54
+ "t_heads": 8,
55
+ "t_dropout": 0.02,
56
+ "t_layer_scale": true,
57
+ "t_gelu": true,
58
+ "t_emb": "sin",
59
+ "t_max_positions": 10000,
60
+ "t_max_period": 10000.0,
61
+ "t_weight_pos_embed": 1.0,
62
+ "t_cape_mean_normalize": true,
63
+ "t_cape_augment": true,
64
+ "t_cape_glob_loc_scale": [
65
+ 5000.0,
66
+ 1.0,
67
+ 1.4
68
+ ],
69
+ "t_sin_random_shift": 0,
70
+ "t_norm_in": true,
71
+ "t_norm_in_group": false,
72
+ "t_group_norm": false,
73
+ "t_norm_first": true,
74
+ "t_norm_out": true,
75
+ "t_weight_decay": 0.0,
76
+ "t_lr": null,
77
+ "t_sparse_self_attn": false,
78
+ "t_sparse_cross_attn": false,
79
+ "t_mask_type": "diag",
80
+ "t_mask_random_seed": 42,
81
+ "t_sparse_attn_window": 400,
82
+ "t_global_window": 100,
83
+ "t_sparsity": 0.95,
84
+ "t_auto_sparsity": false,
85
+ "t_cross_first": false,
86
+ "rescale": 0.1
87
+ },
88
+ "mlx_version": "0.30.3",
89
+ "tensor_count": 573,
90
+ "tensors": {
91
+ "model_0.encoder.0.conv.conv.weight": {
92
+ "shape": [
93
+ 48,
94
+ 8,
95
+ 1,
96
+ 4
97
+ ],
98
+ "dtype": "mlx.core.float32"
99
+ },
100
+ "model_0.encoder.0.conv.conv.bias": {
101
+ "shape": [
102
+ 48
103
+ ],
104
+ "dtype": "mlx.core.float32"
105
+ },
106
+ "model_0.encoder.0.rewrite.conv.weight": {
107
+ "shape": [
108
+ 96,
109
+ 1,
110
+ 1,
111
+ 48
112
+ ],
113
+ "dtype": "mlx.core.float32"
114
+ },
115
+ "model_0.encoder.0.rewrite.conv.bias": {
116
+ "shape": [
117
+ 96
118
+ ],
119
+ "dtype": "mlx.core.float32"
120
+ },
121
+ "model_0.encoder.0.dconv.layers.0.layers.0.conv.weight": {
122
+ "shape": [
123
+ 6,
124
+ 3,
125
+ 48
126
+ ],
127
+ "dtype": "mlx.core.float32"
128
+ },
129
+ "model_0.encoder.0.dconv.layers.0.layers.0.conv.bias": {
130
+ "shape": [
131
+ 6
132
+ ],
133
+ "dtype": "mlx.core.float32"
134
+ },
135
+ "model_0.encoder.0.dconv.layers.0.layers.1.weight": {
136
+ "shape": [
137
+ 6
138
+ ],
139
+ "dtype": "mlx.core.float32"
140
+ },
141
+ "model_0.encoder.0.dconv.layers.0.layers.1.bias": {
142
+ "shape": [
143
+ 6
144
+ ],
145
+ "dtype": "mlx.core.float32"
146
+ },
147
+ "model_0.encoder.0.dconv.layers.0.layers.3.conv.weight": {
148
+ "shape": [
149
+ 96,
150
+ 1,
151
+ 6
152
+ ],
153
+ "dtype": "mlx.core.float32"
154
+ },
155
+ "model_0.encoder.0.dconv.layers.0.layers.3.conv.bias": {
156
+ "shape": [
157
+ 96
158
+ ],
159
+ "dtype": "mlx.core.float32"
160
+ },
161
+ "model_0.encoder.0.dconv.layers.0.layers.4.weight": {
162
+ "shape": [
163
+ 96
164
+ ],
165
+ "dtype": "mlx.core.float32"
166
+ },
167
+ "model_0.encoder.0.dconv.layers.0.layers.4.bias": {
168
+ "shape": [
169
+ 96
170
+ ],
171
+ "dtype": "mlx.core.float32"
172
+ },
173
+ "model_0.encoder.0.dconv.layers.0.layers.6.scale": {
174
+ "shape": [
175
+ 48
176
+ ],
177
+ "dtype": "mlx.core.float32"
178
+ },
179
+ "model_0.encoder.0.dconv.layers.1.layers.0.conv.weight": {
180
+ "shape": [
181
+ 6,
182
+ 3,
183
+ 48
184
+ ],
185
+ "dtype": "mlx.core.float32"
186
+ },
187
+ "model_0.encoder.0.dconv.layers.1.layers.0.conv.bias": {
188
+ "shape": [
189
+ 6
190
+ ],
191
+ "dtype": "mlx.core.float32"
192
+ },
193
+ "model_0.encoder.0.dconv.layers.1.layers.1.weight": {
194
+ "shape": [
195
+ 6
196
+ ],
197
+ "dtype": "mlx.core.float32"
198
+ },
199
+ "model_0.encoder.0.dconv.layers.1.layers.1.bias": {
200
+ "shape": [
201
+ 6
202
+ ],
203
+ "dtype": "mlx.core.float32"
204
+ },
205
+ "model_0.encoder.0.dconv.layers.1.layers.3.conv.weight": {
206
+ "shape": [
207
+ 96,
208
+ 1,
209
+ 6
210
+ ],
211
+ "dtype": "mlx.core.float32"
212
+ },
213
+ "model_0.encoder.0.dconv.layers.1.layers.3.conv.bias": {
214
+ "shape": [
215
+ 96
216
+ ],
217
+ "dtype": "mlx.core.float32"
218
+ },
219
+ "model_0.encoder.0.dconv.layers.1.layers.4.weight": {
220
+ "shape": [
221
+ 96
222
+ ],
223
+ "dtype": "mlx.core.float32"
224
+ },
225
+ "model_0.encoder.0.dconv.layers.1.layers.4.bias": {
226
+ "shape": [
227
+ 96
228
+ ],
229
+ "dtype": "mlx.core.float32"
230
+ },
231
+ "model_0.encoder.0.dconv.layers.1.layers.6.scale": {
232
+ "shape": [
233
+ 48
234
+ ],
235
+ "dtype": "mlx.core.float32"
236
+ },
237
+ "model_0.encoder.1.conv.conv.weight": {
238
+ "shape": [
239
+ 96,
240
+ 8,
241
+ 1,
242
+ 48
243
+ ],
244
+ "dtype": "mlx.core.float32"
245
+ },
246
+ "model_0.encoder.1.conv.conv.bias": {
247
+ "shape": [
248
+ 96
249
+ ],
250
+ "dtype": "mlx.core.float32"
251
+ },
252
+ "model_0.encoder.1.rewrite.conv.weight": {
253
+ "shape": [
254
+ 192,
255
+ 1,
256
+ 1,
257
+ 96
258
+ ],
259
+ "dtype": "mlx.core.float32"
260
+ },
261
+ "model_0.encoder.1.rewrite.conv.bias": {
262
+ "shape": [
263
+ 192
264
+ ],
265
+ "dtype": "mlx.core.float32"
266
+ },
267
+ "model_0.encoder.1.dconv.layers.0.layers.0.conv.weight": {
268
+ "shape": [
269
+ 12,
270
+ 3,
271
+ 96
272
+ ],
273
+ "dtype": "mlx.core.float32"
274
+ },
275
+ "model_0.encoder.1.dconv.layers.0.layers.0.conv.bias": {
276
+ "shape": [
277
+ 12
278
+ ],
279
+ "dtype": "mlx.core.float32"
280
+ },
281
+ "model_0.encoder.1.dconv.layers.0.layers.1.weight": {
282
+ "shape": [
283
+ 12
284
+ ],
285
+ "dtype": "mlx.core.float32"
286
+ },
287
+ "model_0.encoder.1.dconv.layers.0.layers.1.bias": {
288
+ "shape": [
289
+ 12
290
+ ],
291
+ "dtype": "mlx.core.float32"
292
+ },
293
+ "model_0.encoder.1.dconv.layers.0.layers.3.conv.weight": {
294
+ "shape": [
295
+ 192,
296
+ 1,
297
+ 12
298
+ ],
299
+ "dtype": "mlx.core.float32"
300
+ },
301
+ "model_0.encoder.1.dconv.layers.0.layers.3.conv.bias": {
302
+ "shape": [
303
+ 192
304
+ ],
305
+ "dtype": "mlx.core.float32"
306
+ },
307
+ "model_0.encoder.1.dconv.layers.0.layers.4.weight": {
308
+ "shape": [
309
+ 192
310
+ ],
311
+ "dtype": "mlx.core.float32"
312
+ },
313
+ "model_0.encoder.1.dconv.layers.0.layers.4.bias": {
314
+ "shape": [
315
+ 192
316
+ ],
317
+ "dtype": "mlx.core.float32"
318
+ },
319
+ "model_0.encoder.1.dconv.layers.0.layers.6.scale": {
320
+ "shape": [
321
+ 96
322
+ ],
323
+ "dtype": "mlx.core.float32"
324
+ },
325
+ "model_0.encoder.1.dconv.layers.1.layers.0.conv.weight": {
326
+ "shape": [
327
+ 12,
328
+ 3,
329
+ 96
330
+ ],
331
+ "dtype": "mlx.core.float32"
332
+ },
333
+ "model_0.encoder.1.dconv.layers.1.layers.0.conv.bias": {
334
+ "shape": [
335
+ 12
336
+ ],
337
+ "dtype": "mlx.core.float32"
338
+ },
339
+ "model_0.encoder.1.dconv.layers.1.layers.1.weight": {
340
+ "shape": [
341
+ 12
342
+ ],
343
+ "dtype": "mlx.core.float32"
344
+ },
345
+ "model_0.encoder.1.dconv.layers.1.layers.1.bias": {
346
+ "shape": [
347
+ 12
348
+ ],
349
+ "dtype": "mlx.core.float32"
350
+ },
351
+ "model_0.encoder.1.dconv.layers.1.layers.3.conv.weight": {
352
+ "shape": [
353
+ 192,
354
+ 1,
355
+ 12
356
+ ],
357
+ "dtype": "mlx.core.float32"
358
+ },
359
+ "model_0.encoder.1.dconv.layers.1.layers.3.conv.bias": {
360
+ "shape": [
361
+ 192
362
+ ],
363
+ "dtype": "mlx.core.float32"
364
+ },
365
+ "model_0.encoder.1.dconv.layers.1.layers.4.weight": {
366
+ "shape": [
367
+ 192
368
+ ],
369
+ "dtype": "mlx.core.float32"
370
+ },
371
+ "model_0.encoder.1.dconv.layers.1.layers.4.bias": {
372
+ "shape": [
373
+ 192
374
+ ],
375
+ "dtype": "mlx.core.float32"
376
+ },
377
+ "model_0.encoder.1.dconv.layers.1.layers.6.scale": {
378
+ "shape": [
379
+ 96
380
+ ],
381
+ "dtype": "mlx.core.float32"
382
+ },
383
+ "model_0.encoder.2.conv.conv.weight": {
384
+ "shape": [
385
+ 192,
386
+ 8,
387
+ 1,
388
+ 96
389
+ ],
390
+ "dtype": "mlx.core.float32"
391
+ },
392
+ "model_0.encoder.2.conv.conv.bias": {
393
+ "shape": [
394
+ 192
395
+ ],
396
+ "dtype": "mlx.core.float32"
397
+ },
398
+ "model_0.encoder.2.rewrite.conv.weight": {
399
+ "shape": [
400
+ 384,
401
+ 1,
402
+ 1,
403
+ 192
404
+ ],
405
+ "dtype": "mlx.core.float32"
406
+ },
407
+ "model_0.encoder.2.rewrite.conv.bias": {
408
+ "shape": [
409
+ 384
410
+ ],
411
+ "dtype": "mlx.core.float32"
412
+ },
413
+ "model_0.encoder.2.dconv.layers.0.layers.0.conv.weight": {
414
+ "shape": [
415
+ 24,
416
+ 3,
417
+ 192
418
+ ],
419
+ "dtype": "mlx.core.float32"
420
+ },
421
+ "model_0.encoder.2.dconv.layers.0.layers.0.conv.bias": {
422
+ "shape": [
423
+ 24
424
+ ],
425
+ "dtype": "mlx.core.float32"
426
+ },
427
+ "model_0.encoder.2.dconv.layers.0.layers.1.weight": {
428
+ "shape": [
429
+ 24
430
+ ],
431
+ "dtype": "mlx.core.float32"
432
+ },
433
+ "model_0.encoder.2.dconv.layers.0.layers.1.bias": {
434
+ "shape": [
435
+ 24
436
+ ],
437
+ "dtype": "mlx.core.float32"
438
+ },
439
+ "model_0.encoder.2.dconv.layers.0.layers.3.conv.weight": {
440
+ "shape": [
441
+ 384,
442
+ 1,
443
+ 24
444
+ ],
445
+ "dtype": "mlx.core.float32"
446
+ },
447
+ "model_0.encoder.2.dconv.layers.0.layers.3.conv.bias": {
448
+ "shape": [
449
+ 384
450
+ ],
451
+ "dtype": "mlx.core.float32"
452
+ },
453
+ "model_0.encoder.2.dconv.layers.0.layers.4.weight": {
454
+ "shape": [
455
+ 384
456
+ ],
457
+ "dtype": "mlx.core.float32"
458
+ },
459
+ "model_0.encoder.2.dconv.layers.0.layers.4.bias": {
460
+ "shape": [
461
+ 384
462
+ ],
463
+ "dtype": "mlx.core.float32"
464
+ },
465
+ "model_0.encoder.2.dconv.layers.0.layers.6.scale": {
466
+ "shape": [
467
+ 192
468
+ ],
469
+ "dtype": "mlx.core.float32"
470
+ },
471
+ "model_0.encoder.2.dconv.layers.1.layers.0.conv.weight": {
472
+ "shape": [
473
+ 24,
474
+ 3,
475
+ 192
476
+ ],
477
+ "dtype": "mlx.core.float32"
478
+ },
479
+ "model_0.encoder.2.dconv.layers.1.layers.0.conv.bias": {
480
+ "shape": [
481
+ 24
482
+ ],
483
+ "dtype": "mlx.core.float32"
484
+ },
485
+ "model_0.encoder.2.dconv.layers.1.layers.1.weight": {
486
+ "shape": [
487
+ 24
488
+ ],
489
+ "dtype": "mlx.core.float32"
490
+ },
491
+ "model_0.encoder.2.dconv.layers.1.layers.1.bias": {
492
+ "shape": [
493
+ 24
494
+ ],
495
+ "dtype": "mlx.core.float32"
496
+ },
497
+ "model_0.encoder.2.dconv.layers.1.layers.3.conv.weight": {
498
+ "shape": [
499
+ 384,
500
+ 1,
501
+ 24
502
+ ],
503
+ "dtype": "mlx.core.float32"
504
+ },
505
+ "model_0.encoder.2.dconv.layers.1.layers.3.conv.bias": {
506
+ "shape": [
507
+ 384
508
+ ],
509
+ "dtype": "mlx.core.float32"
510
+ },
511
+ "model_0.encoder.2.dconv.layers.1.layers.4.weight": {
512
+ "shape": [
513
+ 384
514
+ ],
515
+ "dtype": "mlx.core.float32"
516
+ },
517
+ "model_0.encoder.2.dconv.layers.1.layers.4.bias": {
518
+ "shape": [
519
+ 384
520
+ ],
521
+ "dtype": "mlx.core.float32"
522
+ },
523
+ "model_0.encoder.2.dconv.layers.1.layers.6.scale": {
524
+ "shape": [
525
+ 192
526
+ ],
527
+ "dtype": "mlx.core.float32"
528
+ },
529
+ "model_0.encoder.3.conv.conv.weight": {
530
+ "shape": [
531
+ 384,
532
+ 8,
533
+ 1,
534
+ 192
535
+ ],
536
+ "dtype": "mlx.core.float32"
537
+ },
538
+ "model_0.encoder.3.conv.conv.bias": {
539
+ "shape": [
540
+ 384
541
+ ],
542
+ "dtype": "mlx.core.float32"
543
+ },
544
+ "model_0.encoder.3.rewrite.conv.weight": {
545
+ "shape": [
546
+ 768,
547
+ 1,
548
+ 1,
549
+ 384
550
+ ],
551
+ "dtype": "mlx.core.float32"
552
+ },
553
+ "model_0.encoder.3.rewrite.conv.bias": {
554
+ "shape": [
555
+ 768
556
+ ],
557
+ "dtype": "mlx.core.float32"
558
+ },
559
+ "model_0.encoder.3.dconv.layers.0.layers.0.conv.weight": {
560
+ "shape": [
561
+ 48,
562
+ 3,
563
+ 384
564
+ ],
565
+ "dtype": "mlx.core.float32"
566
+ },
567
+ "model_0.encoder.3.dconv.layers.0.layers.0.conv.bias": {
568
+ "shape": [
569
+ 48
570
+ ],
571
+ "dtype": "mlx.core.float32"
572
+ },
573
+ "model_0.encoder.3.dconv.layers.0.layers.1.weight": {
574
+ "shape": [
575
+ 48
576
+ ],
577
+ "dtype": "mlx.core.float32"
578
+ },
579
+ "model_0.encoder.3.dconv.layers.0.layers.1.bias": {
580
+ "shape": [
581
+ 48
582
+ ],
583
+ "dtype": "mlx.core.float32"
584
+ },
585
+ "model_0.encoder.3.dconv.layers.0.layers.3.conv.weight": {
586
+ "shape": [
587
+ 768,
588
+ 1,
589
+ 48
590
+ ],
591
+ "dtype": "mlx.core.float32"
592
+ },
593
+ "model_0.encoder.3.dconv.layers.0.layers.3.conv.bias": {
594
+ "shape": [
595
+ 768
596
+ ],
597
+ "dtype": "mlx.core.float32"
598
+ },
599
+ "model_0.encoder.3.dconv.layers.0.layers.4.weight": {
600
+ "shape": [
601
+ 768
602
+ ],
603
+ "dtype": "mlx.core.float32"
604
+ },
605
+ "model_0.encoder.3.dconv.layers.0.layers.4.bias": {
606
+ "shape": [
607
+ 768
608
+ ],
609
+ "dtype": "mlx.core.float32"
610
+ },
611
+ "model_0.encoder.3.dconv.layers.0.layers.6.scale": {
612
+ "shape": [
613
+ 384
614
+ ],
615
+ "dtype": "mlx.core.float32"
616
+ },
617
+ "model_0.encoder.3.dconv.layers.1.layers.0.conv.weight": {
618
+ "shape": [
619
+ 48,
620
+ 3,
621
+ 384
622
+ ],
623
+ "dtype": "mlx.core.float32"
624
+ },
625
+ "model_0.encoder.3.dconv.layers.1.layers.0.conv.bias": {
626
+ "shape": [
627
+ 48
628
+ ],
629
+ "dtype": "mlx.core.float32"
630
+ },
631
+ "model_0.encoder.3.dconv.layers.1.layers.1.weight": {
632
+ "shape": [
633
+ 48
634
+ ],
635
+ "dtype": "mlx.core.float32"
636
+ },
637
+ "model_0.encoder.3.dconv.layers.1.layers.1.bias": {
638
+ "shape": [
639
+ 48
640
+ ],
641
+ "dtype": "mlx.core.float32"
642
+ },
643
+ "model_0.encoder.3.dconv.layers.1.layers.3.conv.weight": {
644
+ "shape": [
645
+ 768,
646
+ 1,
647
+ 48
648
+ ],
649
+ "dtype": "mlx.core.float32"
650
+ },
651
+ "model_0.encoder.3.dconv.layers.1.layers.3.conv.bias": {
652
+ "shape": [
653
+ 768
654
+ ],
655
+ "dtype": "mlx.core.float32"
656
+ },
657
+ "model_0.encoder.3.dconv.layers.1.layers.4.weight": {
658
+ "shape": [
659
+ 768
660
+ ],
661
+ "dtype": "mlx.core.float32"
662
+ },
663
+ "model_0.encoder.3.dconv.layers.1.layers.4.bias": {
664
+ "shape": [
665
+ 768
666
+ ],
667
+ "dtype": "mlx.core.float32"
668
+ },
669
+ "model_0.encoder.3.dconv.layers.1.layers.6.scale": {
670
+ "shape": [
671
+ 384
672
+ ],
673
+ "dtype": "mlx.core.float32"
674
+ },
675
+ "model_0.decoder.0.conv_tr.conv.weight": {
676
+ "shape": [
677
+ 192,
678
+ 8,
679
+ 1,
680
+ 384
681
+ ],
682
+ "dtype": "mlx.core.float32"
683
+ },
684
+ "model_0.decoder.0.conv_tr.conv.bias": {
685
+ "shape": [
686
+ 192
687
+ ],
688
+ "dtype": "mlx.core.float32"
689
+ },
690
+ "model_0.decoder.0.rewrite.conv.weight": {
691
+ "shape": [
692
+ 768,
693
+ 3,
694
+ 3,
695
+ 384
696
+ ],
697
+ "dtype": "mlx.core.float32"
698
+ },
699
+ "model_0.decoder.0.rewrite.conv.bias": {
700
+ "shape": [
701
+ 768
702
+ ],
703
+ "dtype": "mlx.core.float32"
704
+ },
705
+ "model_0.decoder.0.dconv.layers.0.layers.0.conv.weight": {
706
+ "shape": [
707
+ 48,
708
+ 3,
709
+ 384
710
+ ],
711
+ "dtype": "mlx.core.float32"
712
+ },
713
+ "model_0.decoder.0.dconv.layers.0.layers.0.conv.bias": {
714
+ "shape": [
715
+ 48
716
+ ],
717
+ "dtype": "mlx.core.float32"
718
+ },
719
+ "model_0.decoder.0.dconv.layers.0.layers.1.weight": {
720
+ "shape": [
721
+ 48
722
+ ],
723
+ "dtype": "mlx.core.float32"
724
+ },
725
+ "model_0.decoder.0.dconv.layers.0.layers.1.bias": {
726
+ "shape": [
727
+ 48
728
+ ],
729
+ "dtype": "mlx.core.float32"
730
+ },
731
+ "model_0.decoder.0.dconv.layers.0.layers.3.conv.weight": {
732
+ "shape": [
733
+ 768,
734
+ 1,
735
+ 48
736
+ ],
737
+ "dtype": "mlx.core.float32"
738
+ },
739
+ "model_0.decoder.0.dconv.layers.0.layers.3.conv.bias": {
740
+ "shape": [
741
+ 768
742
+ ],
743
+ "dtype": "mlx.core.float32"
744
+ },
745
+ "model_0.decoder.0.dconv.layers.0.layers.4.weight": {
746
+ "shape": [
747
+ 768
748
+ ],
749
+ "dtype": "mlx.core.float32"
750
+ },
751
+ "model_0.decoder.0.dconv.layers.0.layers.4.bias": {
752
+ "shape": [
753
+ 768
754
+ ],
755
+ "dtype": "mlx.core.float32"
756
+ },
757
+ "model_0.decoder.0.dconv.layers.0.layers.6.scale": {
758
+ "shape": [
759
+ 384
760
+ ],
761
+ "dtype": "mlx.core.float32"
762
+ },
763
+ "model_0.decoder.0.dconv.layers.1.layers.0.conv.weight": {
764
+ "shape": [
765
+ 48,
766
+ 3,
767
+ 384
768
+ ],
769
+ "dtype": "mlx.core.float32"
770
+ },
771
+ "model_0.decoder.0.dconv.layers.1.layers.0.conv.bias": {
772
+ "shape": [
773
+ 48
774
+ ],
775
+ "dtype": "mlx.core.float32"
776
+ },
777
+ "model_0.decoder.0.dconv.layers.1.layers.1.weight": {
778
+ "shape": [
779
+ 48
780
+ ],
781
+ "dtype": "mlx.core.float32"
782
+ },
783
+ "model_0.decoder.0.dconv.layers.1.layers.1.bias": {
784
+ "shape": [
785
+ 48
786
+ ],
787
+ "dtype": "mlx.core.float32"
788
+ },
789
+ "model_0.decoder.0.dconv.layers.1.layers.3.conv.weight": {
790
+ "shape": [
791
+ 768,
792
+ 1,
793
+ 48
794
+ ],
795
+ "dtype": "mlx.core.float32"
796
+ },
797
+ "model_0.decoder.0.dconv.layers.1.layers.3.conv.bias": {
798
+ "shape": [
799
+ 768
800
+ ],
801
+ "dtype": "mlx.core.float32"
802
+ },
803
+ "model_0.decoder.0.dconv.layers.1.layers.4.weight": {
804
+ "shape": [
805
+ 768
806
+ ],
807
+ "dtype": "mlx.core.float32"
808
+ },
809
+ "model_0.decoder.0.dconv.layers.1.layers.4.bias": {
810
+ "shape": [
811
+ 768
812
+ ],
813
+ "dtype": "mlx.core.float32"
814
+ },
815
+ "model_0.decoder.0.dconv.layers.1.layers.6.scale": {
816
+ "shape": [
817
+ 384
818
+ ],
819
+ "dtype": "mlx.core.float32"
820
+ },
821
+ "model_0.decoder.1.conv_tr.conv.weight": {
822
+ "shape": [
823
+ 96,
824
+ 8,
825
+ 1,
826
+ 192
827
+ ],
828
+ "dtype": "mlx.core.float32"
829
+ },
830
+ "model_0.decoder.1.conv_tr.conv.bias": {
831
+ "shape": [
832
+ 96
833
+ ],
834
+ "dtype": "mlx.core.float32"
835
+ },
836
+ "model_0.decoder.1.rewrite.conv.weight": {
837
+ "shape": [
838
+ 384,
839
+ 3,
840
+ 3,
841
+ 192
842
+ ],
843
+ "dtype": "mlx.core.float32"
844
+ },
845
+ "model_0.decoder.1.rewrite.conv.bias": {
846
+ "shape": [
847
+ 384
848
+ ],
849
+ "dtype": "mlx.core.float32"
850
+ },
851
+ "model_0.decoder.1.dconv.layers.0.layers.0.conv.weight": {
852
+ "shape": [
853
+ 24,
854
+ 3,
855
+ 192
856
+ ],
857
+ "dtype": "mlx.core.float32"
858
+ },
859
+ "model_0.decoder.1.dconv.layers.0.layers.0.conv.bias": {
860
+ "shape": [
861
+ 24
862
+ ],
863
+ "dtype": "mlx.core.float32"
864
+ },
865
+ "model_0.decoder.1.dconv.layers.0.layers.1.weight": {
866
+ "shape": [
867
+ 24
868
+ ],
869
+ "dtype": "mlx.core.float32"
870
+ },
871
+ "model_0.decoder.1.dconv.layers.0.layers.1.bias": {
872
+ "shape": [
873
+ 24
874
+ ],
875
+ "dtype": "mlx.core.float32"
876
+ },
877
+ "model_0.decoder.1.dconv.layers.0.layers.3.conv.weight": {
878
+ "shape": [
879
+ 384,
880
+ 1,
881
+ 24
882
+ ],
883
+ "dtype": "mlx.core.float32"
884
+ },
885
+ "model_0.decoder.1.dconv.layers.0.layers.3.conv.bias": {
886
+ "shape": [
887
+ 384
888
+ ],
889
+ "dtype": "mlx.core.float32"
890
+ },
891
+ "model_0.decoder.1.dconv.layers.0.layers.4.weight": {
892
+ "shape": [
893
+ 384
894
+ ],
895
+ "dtype": "mlx.core.float32"
896
+ },
897
+ "model_0.decoder.1.dconv.layers.0.layers.4.bias": {
898
+ "shape": [
899
+ 384
900
+ ],
901
+ "dtype": "mlx.core.float32"
902
+ },
903
+ "model_0.decoder.1.dconv.layers.0.layers.6.scale": {
904
+ "shape": [
905
+ 192
906
+ ],
907
+ "dtype": "mlx.core.float32"
908
+ },
909
+ "model_0.decoder.1.dconv.layers.1.layers.0.conv.weight": {
910
+ "shape": [
911
+ 24,
912
+ 3,
913
+ 192
914
+ ],
915
+ "dtype": "mlx.core.float32"
916
+ },
917
+ "model_0.decoder.1.dconv.layers.1.layers.0.conv.bias": {
918
+ "shape": [
919
+ 24
920
+ ],
921
+ "dtype": "mlx.core.float32"
922
+ },
923
+ "model_0.decoder.1.dconv.layers.1.layers.1.weight": {
924
+ "shape": [
925
+ 24
926
+ ],
927
+ "dtype": "mlx.core.float32"
928
+ },
929
+ "model_0.decoder.1.dconv.layers.1.layers.1.bias": {
930
+ "shape": [
931
+ 24
932
+ ],
933
+ "dtype": "mlx.core.float32"
934
+ },
935
+ "model_0.decoder.1.dconv.layers.1.layers.3.conv.weight": {
936
+ "shape": [
937
+ 384,
938
+ 1,
939
+ 24
940
+ ],
941
+ "dtype": "mlx.core.float32"
942
+ },
943
+ "model_0.decoder.1.dconv.layers.1.layers.3.conv.bias": {
944
+ "shape": [
945
+ 384
946
+ ],
947
+ "dtype": "mlx.core.float32"
948
+ },
949
+ "model_0.decoder.1.dconv.layers.1.layers.4.weight": {
950
+ "shape": [
951
+ 384
952
+ ],
953
+ "dtype": "mlx.core.float32"
954
+ },
955
+ "model_0.decoder.1.dconv.layers.1.layers.4.bias": {
956
+ "shape": [
957
+ 384
958
+ ],
959
+ "dtype": "mlx.core.float32"
960
+ },
961
+ "model_0.decoder.1.dconv.layers.1.layers.6.scale": {
962
+ "shape": [
963
+ 192
964
+ ],
965
+ "dtype": "mlx.core.float32"
966
+ },
967
+ "model_0.decoder.2.conv_tr.conv.weight": {
968
+ "shape": [
969
+ 48,
970
+ 8,
971
+ 1,
972
+ 96
973
+ ],
974
+ "dtype": "mlx.core.float32"
975
+ },
976
+ "model_0.decoder.2.conv_tr.conv.bias": {
977
+ "shape": [
978
+ 48
979
+ ],
980
+ "dtype": "mlx.core.float32"
981
+ },
982
+ "model_0.decoder.2.rewrite.conv.weight": {
983
+ "shape": [
984
+ 192,
985
+ 3,
986
+ 3,
987
+ 96
988
+ ],
989
+ "dtype": "mlx.core.float32"
990
+ },
991
+ "model_0.decoder.2.rewrite.conv.bias": {
992
+ "shape": [
993
+ 192
994
+ ],
995
+ "dtype": "mlx.core.float32"
996
+ },
997
+ "model_0.decoder.2.dconv.layers.0.layers.0.conv.weight": {
998
+ "shape": [
999
+ 12,
1000
+ 3,
1001
+ 96
1002
+ ],
1003
+ "dtype": "mlx.core.float32"
1004
+ },
1005
+ "model_0.decoder.2.dconv.layers.0.layers.0.conv.bias": {
1006
+ "shape": [
1007
+ 12
1008
+ ],
1009
+ "dtype": "mlx.core.float32"
1010
+ },
1011
+ "model_0.decoder.2.dconv.layers.0.layers.1.weight": {
1012
+ "shape": [
1013
+ 12
1014
+ ],
1015
+ "dtype": "mlx.core.float32"
1016
+ },
1017
+ "model_0.decoder.2.dconv.layers.0.layers.1.bias": {
1018
+ "shape": [
1019
+ 12
1020
+ ],
1021
+ "dtype": "mlx.core.float32"
1022
+ },
1023
+ "model_0.decoder.2.dconv.layers.0.layers.3.conv.weight": {
1024
+ "shape": [
1025
+ 192,
1026
+ 1,
1027
+ 12
1028
+ ],
1029
+ "dtype": "mlx.core.float32"
1030
+ },
1031
+ "model_0.decoder.2.dconv.layers.0.layers.3.conv.bias": {
1032
+ "shape": [
1033
+ 192
1034
+ ],
1035
+ "dtype": "mlx.core.float32"
1036
+ },
1037
+ "model_0.decoder.2.dconv.layers.0.layers.4.weight": {
1038
+ "shape": [
1039
+ 192
1040
+ ],
1041
+ "dtype": "mlx.core.float32"
1042
+ },
1043
+ "model_0.decoder.2.dconv.layers.0.layers.4.bias": {
1044
+ "shape": [
1045
+ 192
1046
+ ],
1047
+ "dtype": "mlx.core.float32"
1048
+ },
1049
+ "model_0.decoder.2.dconv.layers.0.layers.6.scale": {
1050
+ "shape": [
1051
+ 96
1052
+ ],
1053
+ "dtype": "mlx.core.float32"
1054
+ },
1055
+ "model_0.decoder.2.dconv.layers.1.layers.0.conv.weight": {
1056
+ "shape": [
1057
+ 12,
1058
+ 3,
1059
+ 96
1060
+ ],
1061
+ "dtype": "mlx.core.float32"
1062
+ },
1063
+ "model_0.decoder.2.dconv.layers.1.layers.0.conv.bias": {
1064
+ "shape": [
1065
+ 12
1066
+ ],
1067
+ "dtype": "mlx.core.float32"
1068
+ },
1069
+ "model_0.decoder.2.dconv.layers.1.layers.1.weight": {
1070
+ "shape": [
1071
+ 12
1072
+ ],
1073
+ "dtype": "mlx.core.float32"
1074
+ },
1075
+ "model_0.decoder.2.dconv.layers.1.layers.1.bias": {
1076
+ "shape": [
1077
+ 12
1078
+ ],
1079
+ "dtype": "mlx.core.float32"
1080
+ },
1081
+ "model_0.decoder.2.dconv.layers.1.layers.3.conv.weight": {
1082
+ "shape": [
1083
+ 192,
1084
+ 1,
1085
+ 12
1086
+ ],
1087
+ "dtype": "mlx.core.float32"
1088
+ },
1089
+ "model_0.decoder.2.dconv.layers.1.layers.3.conv.bias": {
1090
+ "shape": [
1091
+ 192
1092
+ ],
1093
+ "dtype": "mlx.core.float32"
1094
+ },
1095
+ "model_0.decoder.2.dconv.layers.1.layers.4.weight": {
1096
+ "shape": [
1097
+ 192
1098
+ ],
1099
+ "dtype": "mlx.core.float32"
1100
+ },
1101
+ "model_0.decoder.2.dconv.layers.1.layers.4.bias": {
1102
+ "shape": [
1103
+ 192
1104
+ ],
1105
+ "dtype": "mlx.core.float32"
1106
+ },
1107
+ "model_0.decoder.2.dconv.layers.1.layers.6.scale": {
1108
+ "shape": [
1109
+ 96
1110
+ ],
1111
+ "dtype": "mlx.core.float32"
1112
+ },
1113
+ "model_0.decoder.3.conv_tr.conv.weight": {
1114
+ "shape": [
1115
+ 16,
1116
+ 8,
1117
+ 1,
1118
+ 48
1119
+ ],
1120
+ "dtype": "mlx.core.float32"
1121
+ },
1122
+ "model_0.decoder.3.conv_tr.conv.bias": {
1123
+ "shape": [
1124
+ 16
1125
+ ],
1126
+ "dtype": "mlx.core.float32"
1127
+ },
1128
+ "model_0.decoder.3.rewrite.conv.weight": {
1129
+ "shape": [
1130
+ 96,
1131
+ 3,
1132
+ 3,
1133
+ 48
1134
+ ],
1135
+ "dtype": "mlx.core.float32"
1136
+ },
1137
+ "model_0.decoder.3.rewrite.conv.bias": {
1138
+ "shape": [
1139
+ 96
1140
+ ],
1141
+ "dtype": "mlx.core.float32"
1142
+ },
1143
+ "model_0.decoder.3.dconv.layers.0.layers.0.conv.weight": {
1144
+ "shape": [
1145
+ 6,
1146
+ 3,
1147
+ 48
1148
+ ],
1149
+ "dtype": "mlx.core.float32"
1150
+ },
1151
+ "model_0.decoder.3.dconv.layers.0.layers.0.conv.bias": {
1152
+ "shape": [
1153
+ 6
1154
+ ],
1155
+ "dtype": "mlx.core.float32"
1156
+ },
1157
+ "model_0.decoder.3.dconv.layers.0.layers.1.weight": {
1158
+ "shape": [
1159
+ 6
1160
+ ],
1161
+ "dtype": "mlx.core.float32"
1162
+ },
1163
+ "model_0.decoder.3.dconv.layers.0.layers.1.bias": {
1164
+ "shape": [
1165
+ 6
1166
+ ],
1167
+ "dtype": "mlx.core.float32"
1168
+ },
1169
+ "model_0.decoder.3.dconv.layers.0.layers.3.conv.weight": {
1170
+ "shape": [
1171
+ 96,
1172
+ 1,
1173
+ 6
1174
+ ],
1175
+ "dtype": "mlx.core.float32"
1176
+ },
1177
+ "model_0.decoder.3.dconv.layers.0.layers.3.conv.bias": {
1178
+ "shape": [
1179
+ 96
1180
+ ],
1181
+ "dtype": "mlx.core.float32"
1182
+ },
1183
+ "model_0.decoder.3.dconv.layers.0.layers.4.weight": {
1184
+ "shape": [
1185
+ 96
1186
+ ],
1187
+ "dtype": "mlx.core.float32"
1188
+ },
1189
+ "model_0.decoder.3.dconv.layers.0.layers.4.bias": {
1190
+ "shape": [
1191
+ 96
1192
+ ],
1193
+ "dtype": "mlx.core.float32"
1194
+ },
1195
+ "model_0.decoder.3.dconv.layers.0.layers.6.scale": {
1196
+ "shape": [
1197
+ 48
1198
+ ],
1199
+ "dtype": "mlx.core.float32"
1200
+ },
1201
+ "model_0.decoder.3.dconv.layers.1.layers.0.conv.weight": {
1202
+ "shape": [
1203
+ 6,
1204
+ 3,
1205
+ 48
1206
+ ],
1207
+ "dtype": "mlx.core.float32"
1208
+ },
1209
+ "model_0.decoder.3.dconv.layers.1.layers.0.conv.bias": {
1210
+ "shape": [
1211
+ 6
1212
+ ],
1213
+ "dtype": "mlx.core.float32"
1214
+ },
1215
+ "model_0.decoder.3.dconv.layers.1.layers.1.weight": {
1216
+ "shape": [
1217
+ 6
1218
+ ],
1219
+ "dtype": "mlx.core.float32"
1220
+ },
1221
+ "model_0.decoder.3.dconv.layers.1.layers.1.bias": {
1222
+ "shape": [
1223
+ 6
1224
+ ],
1225
+ "dtype": "mlx.core.float32"
1226
+ },
1227
+ "model_0.decoder.3.dconv.layers.1.layers.3.conv.weight": {
1228
+ "shape": [
1229
+ 96,
1230
+ 1,
1231
+ 6
1232
+ ],
1233
+ "dtype": "mlx.core.float32"
1234
+ },
1235
+ "model_0.decoder.3.dconv.layers.1.layers.3.conv.bias": {
1236
+ "shape": [
1237
+ 96
1238
+ ],
1239
+ "dtype": "mlx.core.float32"
1240
+ },
1241
+ "model_0.decoder.3.dconv.layers.1.layers.4.weight": {
1242
+ "shape": [
1243
+ 96
1244
+ ],
1245
+ "dtype": "mlx.core.float32"
1246
+ },
1247
+ "model_0.decoder.3.dconv.layers.1.layers.4.bias": {
1248
+ "shape": [
1249
+ 96
1250
+ ],
1251
+ "dtype": "mlx.core.float32"
1252
+ },
1253
+ "model_0.decoder.3.dconv.layers.1.layers.6.scale": {
1254
+ "shape": [
1255
+ 48
1256
+ ],
1257
+ "dtype": "mlx.core.float32"
1258
+ },
1259
+ "model_0.tencoder.0.conv.conv.weight": {
1260
+ "shape": [
1261
+ 48,
1262
+ 8,
1263
+ 2
1264
+ ],
1265
+ "dtype": "mlx.core.float32"
1266
+ },
1267
+ "model_0.tencoder.0.conv.conv.bias": {
1268
+ "shape": [
1269
+ 48
1270
+ ],
1271
+ "dtype": "mlx.core.float32"
1272
+ },
1273
+ "model_0.tencoder.0.rewrite.conv.weight": {
1274
+ "shape": [
1275
+ 96,
1276
+ 1,
1277
+ 48
1278
+ ],
1279
+ "dtype": "mlx.core.float32"
1280
+ },
1281
+ "model_0.tencoder.0.rewrite.conv.bias": {
1282
+ "shape": [
1283
+ 96
1284
+ ],
1285
+ "dtype": "mlx.core.float32"
1286
+ },
1287
+ "model_0.tencoder.0.dconv.layers.0.layers.0.conv.weight": {
1288
+ "shape": [
1289
+ 6,
1290
+ 3,
1291
+ 48
1292
+ ],
1293
+ "dtype": "mlx.core.float32"
1294
+ },
1295
+ "model_0.tencoder.0.dconv.layers.0.layers.0.conv.bias": {
1296
+ "shape": [
1297
+ 6
1298
+ ],
1299
+ "dtype": "mlx.core.float32"
1300
+ },
1301
+ "model_0.tencoder.0.dconv.layers.0.layers.1.weight": {
1302
+ "shape": [
1303
+ 6
1304
+ ],
1305
+ "dtype": "mlx.core.float32"
1306
+ },
1307
+ "model_0.tencoder.0.dconv.layers.0.layers.1.bias": {
1308
+ "shape": [
1309
+ 6
1310
+ ],
1311
+ "dtype": "mlx.core.float32"
1312
+ },
1313
+ "model_0.tencoder.0.dconv.layers.0.layers.3.conv.weight": {
1314
+ "shape": [
1315
+ 96,
1316
+ 1,
1317
+ 6
1318
+ ],
1319
+ "dtype": "mlx.core.float32"
1320
+ },
1321
+ "model_0.tencoder.0.dconv.layers.0.layers.3.conv.bias": {
1322
+ "shape": [
1323
+ 96
1324
+ ],
1325
+ "dtype": "mlx.core.float32"
1326
+ },
1327
+ "model_0.tencoder.0.dconv.layers.0.layers.4.weight": {
1328
+ "shape": [
1329
+ 96
1330
+ ],
1331
+ "dtype": "mlx.core.float32"
1332
+ },
1333
+ "model_0.tencoder.0.dconv.layers.0.layers.4.bias": {
1334
+ "shape": [
1335
+ 96
1336
+ ],
1337
+ "dtype": "mlx.core.float32"
1338
+ },
1339
+ "model_0.tencoder.0.dconv.layers.0.layers.6.scale": {
1340
+ "shape": [
1341
+ 48
1342
+ ],
1343
+ "dtype": "mlx.core.float32"
1344
+ },
1345
+ "model_0.tencoder.0.dconv.layers.1.layers.0.conv.weight": {
1346
+ "shape": [
1347
+ 6,
1348
+ 3,
1349
+ 48
1350
+ ],
1351
+ "dtype": "mlx.core.float32"
1352
+ },
1353
+ "model_0.tencoder.0.dconv.layers.1.layers.0.conv.bias": {
1354
+ "shape": [
1355
+ 6
1356
+ ],
1357
+ "dtype": "mlx.core.float32"
1358
+ },
1359
+ "model_0.tencoder.0.dconv.layers.1.layers.1.weight": {
1360
+ "shape": [
1361
+ 6
1362
+ ],
1363
+ "dtype": "mlx.core.float32"
1364
+ },
1365
+ "model_0.tencoder.0.dconv.layers.1.layers.1.bias": {
1366
+ "shape": [
1367
+ 6
1368
+ ],
1369
+ "dtype": "mlx.core.float32"
1370
+ },
1371
+ "model_0.tencoder.0.dconv.layers.1.layers.3.conv.weight": {
1372
+ "shape": [
1373
+ 96,
1374
+ 1,
1375
+ 6
1376
+ ],
1377
+ "dtype": "mlx.core.float32"
1378
+ },
1379
+ "model_0.tencoder.0.dconv.layers.1.layers.3.conv.bias": {
1380
+ "shape": [
1381
+ 96
1382
+ ],
1383
+ "dtype": "mlx.core.float32"
1384
+ },
1385
+ "model_0.tencoder.0.dconv.layers.1.layers.4.weight": {
1386
+ "shape": [
1387
+ 96
1388
+ ],
1389
+ "dtype": "mlx.core.float32"
1390
+ },
1391
+ "model_0.tencoder.0.dconv.layers.1.layers.4.bias": {
1392
+ "shape": [
1393
+ 96
1394
+ ],
1395
+ "dtype": "mlx.core.float32"
1396
+ },
1397
+ "model_0.tencoder.0.dconv.layers.1.layers.6.scale": {
1398
+ "shape": [
1399
+ 48
1400
+ ],
1401
+ "dtype": "mlx.core.float32"
1402
+ },
1403
+ "model_0.tencoder.1.conv.conv.weight": {
1404
+ "shape": [
1405
+ 96,
1406
+ 8,
1407
+ 48
1408
+ ],
1409
+ "dtype": "mlx.core.float32"
1410
+ },
1411
+ "model_0.tencoder.1.conv.conv.bias": {
1412
+ "shape": [
1413
+ 96
1414
+ ],
1415
+ "dtype": "mlx.core.float32"
1416
+ },
1417
+ "model_0.tencoder.1.rewrite.conv.weight": {
1418
+ "shape": [
1419
+ 192,
1420
+ 1,
1421
+ 96
1422
+ ],
1423
+ "dtype": "mlx.core.float32"
1424
+ },
1425
+ "model_0.tencoder.1.rewrite.conv.bias": {
1426
+ "shape": [
1427
+ 192
1428
+ ],
1429
+ "dtype": "mlx.core.float32"
1430
+ },
1431
+ "model_0.tencoder.1.dconv.layers.0.layers.0.conv.weight": {
1432
+ "shape": [
1433
+ 12,
1434
+ 3,
1435
+ 96
1436
+ ],
1437
+ "dtype": "mlx.core.float32"
1438
+ },
1439
+ "model_0.tencoder.1.dconv.layers.0.layers.0.conv.bias": {
1440
+ "shape": [
1441
+ 12
1442
+ ],
1443
+ "dtype": "mlx.core.float32"
1444
+ },
1445
+ "model_0.tencoder.1.dconv.layers.0.layers.1.weight": {
1446
+ "shape": [
1447
+ 12
1448
+ ],
1449
+ "dtype": "mlx.core.float32"
1450
+ },
1451
+ "model_0.tencoder.1.dconv.layers.0.layers.1.bias": {
1452
+ "shape": [
1453
+ 12
1454
+ ],
1455
+ "dtype": "mlx.core.float32"
1456
+ },
1457
+ "model_0.tencoder.1.dconv.layers.0.layers.3.conv.weight": {
1458
+ "shape": [
1459
+ 192,
1460
+ 1,
1461
+ 12
1462
+ ],
1463
+ "dtype": "mlx.core.float32"
1464
+ },
1465
+ "model_0.tencoder.1.dconv.layers.0.layers.3.conv.bias": {
1466
+ "shape": [
1467
+ 192
1468
+ ],
1469
+ "dtype": "mlx.core.float32"
1470
+ },
1471
+ "model_0.tencoder.1.dconv.layers.0.layers.4.weight": {
1472
+ "shape": [
1473
+ 192
1474
+ ],
1475
+ "dtype": "mlx.core.float32"
1476
+ },
1477
+ "model_0.tencoder.1.dconv.layers.0.layers.4.bias": {
1478
+ "shape": [
1479
+ 192
1480
+ ],
1481
+ "dtype": "mlx.core.float32"
1482
+ },
1483
+ "model_0.tencoder.1.dconv.layers.0.layers.6.scale": {
1484
+ "shape": [
1485
+ 96
1486
+ ],
1487
+ "dtype": "mlx.core.float32"
1488
+ },
1489
+ "model_0.tencoder.1.dconv.layers.1.layers.0.conv.weight": {
1490
+ "shape": [
1491
+ 12,
1492
+ 3,
1493
+ 96
1494
+ ],
1495
+ "dtype": "mlx.core.float32"
1496
+ },
1497
+ "model_0.tencoder.1.dconv.layers.1.layers.0.conv.bias": {
1498
+ "shape": [
1499
+ 12
1500
+ ],
1501
+ "dtype": "mlx.core.float32"
1502
+ },
1503
+ "model_0.tencoder.1.dconv.layers.1.layers.1.weight": {
1504
+ "shape": [
1505
+ 12
1506
+ ],
1507
+ "dtype": "mlx.core.float32"
1508
+ },
1509
+ "model_0.tencoder.1.dconv.layers.1.layers.1.bias": {
1510
+ "shape": [
1511
+ 12
1512
+ ],
1513
+ "dtype": "mlx.core.float32"
1514
+ },
1515
+ "model_0.tencoder.1.dconv.layers.1.layers.3.conv.weight": {
1516
+ "shape": [
1517
+ 192,
1518
+ 1,
1519
+ 12
1520
+ ],
1521
+ "dtype": "mlx.core.float32"
1522
+ },
1523
+ "model_0.tencoder.1.dconv.layers.1.layers.3.conv.bias": {
1524
+ "shape": [
1525
+ 192
1526
+ ],
1527
+ "dtype": "mlx.core.float32"
1528
+ },
1529
+ "model_0.tencoder.1.dconv.layers.1.layers.4.weight": {
1530
+ "shape": [
1531
+ 192
1532
+ ],
1533
+ "dtype": "mlx.core.float32"
1534
+ },
1535
+ "model_0.tencoder.1.dconv.layers.1.layers.4.bias": {
1536
+ "shape": [
1537
+ 192
1538
+ ],
1539
+ "dtype": "mlx.core.float32"
1540
+ },
1541
+ "model_0.tencoder.1.dconv.layers.1.layers.6.scale": {
1542
+ "shape": [
1543
+ 96
1544
+ ],
1545
+ "dtype": "mlx.core.float32"
1546
+ },
1547
+ "model_0.tencoder.2.conv.conv.weight": {
1548
+ "shape": [
1549
+ 192,
1550
+ 8,
1551
+ 96
1552
+ ],
1553
+ "dtype": "mlx.core.float32"
1554
+ },
1555
+ "model_0.tencoder.2.conv.conv.bias": {
1556
+ "shape": [
1557
+ 192
1558
+ ],
1559
+ "dtype": "mlx.core.float32"
1560
+ },
1561
+ "model_0.tencoder.2.rewrite.conv.weight": {
1562
+ "shape": [
1563
+ 384,
1564
+ 1,
1565
+ 192
1566
+ ],
1567
+ "dtype": "mlx.core.float32"
1568
+ },
1569
+ "model_0.tencoder.2.rewrite.conv.bias": {
1570
+ "shape": [
1571
+ 384
1572
+ ],
1573
+ "dtype": "mlx.core.float32"
1574
+ },
1575
+ "model_0.tencoder.2.dconv.layers.0.layers.0.conv.weight": {
1576
+ "shape": [
1577
+ 24,
1578
+ 3,
1579
+ 192
1580
+ ],
1581
+ "dtype": "mlx.core.float32"
1582
+ },
1583
+ "model_0.tencoder.2.dconv.layers.0.layers.0.conv.bias": {
1584
+ "shape": [
1585
+ 24
1586
+ ],
1587
+ "dtype": "mlx.core.float32"
1588
+ },
1589
+ "model_0.tencoder.2.dconv.layers.0.layers.1.weight": {
1590
+ "shape": [
1591
+ 24
1592
+ ],
1593
+ "dtype": "mlx.core.float32"
1594
+ },
1595
+ "model_0.tencoder.2.dconv.layers.0.layers.1.bias": {
1596
+ "shape": [
1597
+ 24
1598
+ ],
1599
+ "dtype": "mlx.core.float32"
1600
+ },
1601
+ "model_0.tencoder.2.dconv.layers.0.layers.3.conv.weight": {
1602
+ "shape": [
1603
+ 384,
1604
+ 1,
1605
+ 24
1606
+ ],
1607
+ "dtype": "mlx.core.float32"
1608
+ },
1609
+ "model_0.tencoder.2.dconv.layers.0.layers.3.conv.bias": {
1610
+ "shape": [
1611
+ 384
1612
+ ],
1613
+ "dtype": "mlx.core.float32"
1614
+ },
1615
+ "model_0.tencoder.2.dconv.layers.0.layers.4.weight": {
1616
+ "shape": [
1617
+ 384
1618
+ ],
1619
+ "dtype": "mlx.core.float32"
1620
+ },
1621
+ "model_0.tencoder.2.dconv.layers.0.layers.4.bias": {
1622
+ "shape": [
1623
+ 384
1624
+ ],
1625
+ "dtype": "mlx.core.float32"
1626
+ },
1627
+ "model_0.tencoder.2.dconv.layers.0.layers.6.scale": {
1628
+ "shape": [
1629
+ 192
1630
+ ],
1631
+ "dtype": "mlx.core.float32"
1632
+ },
1633
+ "model_0.tencoder.2.dconv.layers.1.layers.0.conv.weight": {
1634
+ "shape": [
1635
+ 24,
1636
+ 3,
1637
+ 192
1638
+ ],
1639
+ "dtype": "mlx.core.float32"
1640
+ },
1641
+ "model_0.tencoder.2.dconv.layers.1.layers.0.conv.bias": {
1642
+ "shape": [
1643
+ 24
1644
+ ],
1645
+ "dtype": "mlx.core.float32"
1646
+ },
1647
+ "model_0.tencoder.2.dconv.layers.1.layers.1.weight": {
1648
+ "shape": [
1649
+ 24
1650
+ ],
1651
+ "dtype": "mlx.core.float32"
1652
+ },
1653
+ "model_0.tencoder.2.dconv.layers.1.layers.1.bias": {
1654
+ "shape": [
1655
+ 24
1656
+ ],
1657
+ "dtype": "mlx.core.float32"
1658
+ },
1659
+ "model_0.tencoder.2.dconv.layers.1.layers.3.conv.weight": {
1660
+ "shape": [
1661
+ 384,
1662
+ 1,
1663
+ 24
1664
+ ],
1665
+ "dtype": "mlx.core.float32"
1666
+ },
1667
+ "model_0.tencoder.2.dconv.layers.1.layers.3.conv.bias": {
1668
+ "shape": [
1669
+ 384
1670
+ ],
1671
+ "dtype": "mlx.core.float32"
1672
+ },
1673
+ "model_0.tencoder.2.dconv.layers.1.layers.4.weight": {
1674
+ "shape": [
1675
+ 384
1676
+ ],
1677
+ "dtype": "mlx.core.float32"
1678
+ },
1679
+ "model_0.tencoder.2.dconv.layers.1.layers.4.bias": {
1680
+ "shape": [
1681
+ 384
1682
+ ],
1683
+ "dtype": "mlx.core.float32"
1684
+ },
1685
+ "model_0.tencoder.2.dconv.layers.1.layers.6.scale": {
1686
+ "shape": [
1687
+ 192
1688
+ ],
1689
+ "dtype": "mlx.core.float32"
1690
+ },
1691
+ "model_0.tencoder.3.conv.conv.weight": {
1692
+ "shape": [
1693
+ 384,
1694
+ 8,
1695
+ 192
1696
+ ],
1697
+ "dtype": "mlx.core.float32"
1698
+ },
1699
+ "model_0.tencoder.3.conv.conv.bias": {
1700
+ "shape": [
1701
+ 384
1702
+ ],
1703
+ "dtype": "mlx.core.float32"
1704
+ },
1705
+ "model_0.tencoder.3.rewrite.conv.weight": {
1706
+ "shape": [
1707
+ 768,
1708
+ 1,
1709
+ 384
1710
+ ],
1711
+ "dtype": "mlx.core.float32"
1712
+ },
1713
+ "model_0.tencoder.3.rewrite.conv.bias": {
1714
+ "shape": [
1715
+ 768
1716
+ ],
1717
+ "dtype": "mlx.core.float32"
1718
+ },
1719
+ "model_0.tencoder.3.dconv.layers.0.layers.0.conv.weight": {
1720
+ "shape": [
1721
+ 48,
1722
+ 3,
1723
+ 384
1724
+ ],
1725
+ "dtype": "mlx.core.float32"
1726
+ },
1727
+ "model_0.tencoder.3.dconv.layers.0.layers.0.conv.bias": {
1728
+ "shape": [
1729
+ 48
1730
+ ],
1731
+ "dtype": "mlx.core.float32"
1732
+ },
1733
+ "model_0.tencoder.3.dconv.layers.0.layers.1.weight": {
1734
+ "shape": [
1735
+ 48
1736
+ ],
1737
+ "dtype": "mlx.core.float32"
1738
+ },
1739
+ "model_0.tencoder.3.dconv.layers.0.layers.1.bias": {
1740
+ "shape": [
1741
+ 48
1742
+ ],
1743
+ "dtype": "mlx.core.float32"
1744
+ },
1745
+ "model_0.tencoder.3.dconv.layers.0.layers.3.conv.weight": {
1746
+ "shape": [
1747
+ 768,
1748
+ 1,
1749
+ 48
1750
+ ],
1751
+ "dtype": "mlx.core.float32"
1752
+ },
1753
+ "model_0.tencoder.3.dconv.layers.0.layers.3.conv.bias": {
1754
+ "shape": [
1755
+ 768
1756
+ ],
1757
+ "dtype": "mlx.core.float32"
1758
+ },
1759
+ "model_0.tencoder.3.dconv.layers.0.layers.4.weight": {
1760
+ "shape": [
1761
+ 768
1762
+ ],
1763
+ "dtype": "mlx.core.float32"
1764
+ },
1765
+ "model_0.tencoder.3.dconv.layers.0.layers.4.bias": {
1766
+ "shape": [
1767
+ 768
1768
+ ],
1769
+ "dtype": "mlx.core.float32"
1770
+ },
1771
+ "model_0.tencoder.3.dconv.layers.0.layers.6.scale": {
1772
+ "shape": [
1773
+ 384
1774
+ ],
1775
+ "dtype": "mlx.core.float32"
1776
+ },
1777
+ "model_0.tencoder.3.dconv.layers.1.layers.0.conv.weight": {
1778
+ "shape": [
1779
+ 48,
1780
+ 3,
1781
+ 384
1782
+ ],
1783
+ "dtype": "mlx.core.float32"
1784
+ },
1785
+ "model_0.tencoder.3.dconv.layers.1.layers.0.conv.bias": {
1786
+ "shape": [
1787
+ 48
1788
+ ],
1789
+ "dtype": "mlx.core.float32"
1790
+ },
1791
+ "model_0.tencoder.3.dconv.layers.1.layers.1.weight": {
1792
+ "shape": [
1793
+ 48
1794
+ ],
1795
+ "dtype": "mlx.core.float32"
1796
+ },
1797
+ "model_0.tencoder.3.dconv.layers.1.layers.1.bias": {
1798
+ "shape": [
1799
+ 48
1800
+ ],
1801
+ "dtype": "mlx.core.float32"
1802
+ },
1803
+ "model_0.tencoder.3.dconv.layers.1.layers.3.conv.weight": {
1804
+ "shape": [
1805
+ 768,
1806
+ 1,
1807
+ 48
1808
+ ],
1809
+ "dtype": "mlx.core.float32"
1810
+ },
1811
+ "model_0.tencoder.3.dconv.layers.1.layers.3.conv.bias": {
1812
+ "shape": [
1813
+ 768
1814
+ ],
1815
+ "dtype": "mlx.core.float32"
1816
+ },
1817
+ "model_0.tencoder.3.dconv.layers.1.layers.4.weight": {
1818
+ "shape": [
1819
+ 768
1820
+ ],
1821
+ "dtype": "mlx.core.float32"
1822
+ },
1823
+ "model_0.tencoder.3.dconv.layers.1.layers.4.bias": {
1824
+ "shape": [
1825
+ 768
1826
+ ],
1827
+ "dtype": "mlx.core.float32"
1828
+ },
1829
+ "model_0.tencoder.3.dconv.layers.1.layers.6.scale": {
1830
+ "shape": [
1831
+ 384
1832
+ ],
1833
+ "dtype": "mlx.core.float32"
1834
+ },
1835
+ "model_0.tdecoder.0.conv_tr.conv.weight": {
1836
+ "shape": [
1837
+ 192,
1838
+ 8,
1839
+ 384
1840
+ ],
1841
+ "dtype": "mlx.core.float32"
1842
+ },
1843
+ "model_0.tdecoder.0.conv_tr.conv.bias": {
1844
+ "shape": [
1845
+ 192
1846
+ ],
1847
+ "dtype": "mlx.core.float32"
1848
+ },
1849
+ "model_0.tdecoder.0.rewrite.conv.weight": {
1850
+ "shape": [
1851
+ 768,
1852
+ 3,
1853
+ 384
1854
+ ],
1855
+ "dtype": "mlx.core.float32"
1856
+ },
1857
+ "model_0.tdecoder.0.rewrite.conv.bias": {
1858
+ "shape": [
1859
+ 768
1860
+ ],
1861
+ "dtype": "mlx.core.float32"
1862
+ },
1863
+ "model_0.tdecoder.0.dconv.layers.0.layers.0.conv.weight": {
1864
+ "shape": [
1865
+ 48,
1866
+ 3,
1867
+ 384
1868
+ ],
1869
+ "dtype": "mlx.core.float32"
1870
+ },
1871
+ "model_0.tdecoder.0.dconv.layers.0.layers.0.conv.bias": {
1872
+ "shape": [
1873
+ 48
1874
+ ],
1875
+ "dtype": "mlx.core.float32"
1876
+ },
1877
+ "model_0.tdecoder.0.dconv.layers.0.layers.1.weight": {
1878
+ "shape": [
1879
+ 48
1880
+ ],
1881
+ "dtype": "mlx.core.float32"
1882
+ },
1883
+ "model_0.tdecoder.0.dconv.layers.0.layers.1.bias": {
1884
+ "shape": [
1885
+ 48
1886
+ ],
1887
+ "dtype": "mlx.core.float32"
1888
+ },
1889
+ "model_0.tdecoder.0.dconv.layers.0.layers.3.conv.weight": {
1890
+ "shape": [
1891
+ 768,
1892
+ 1,
1893
+ 48
1894
+ ],
1895
+ "dtype": "mlx.core.float32"
1896
+ },
1897
+ "model_0.tdecoder.0.dconv.layers.0.layers.3.conv.bias": {
1898
+ "shape": [
1899
+ 768
1900
+ ],
1901
+ "dtype": "mlx.core.float32"
1902
+ },
1903
+ "model_0.tdecoder.0.dconv.layers.0.layers.4.weight": {
1904
+ "shape": [
1905
+ 768
1906
+ ],
1907
+ "dtype": "mlx.core.float32"
1908
+ },
1909
+ "model_0.tdecoder.0.dconv.layers.0.layers.4.bias": {
1910
+ "shape": [
1911
+ 768
1912
+ ],
1913
+ "dtype": "mlx.core.float32"
1914
+ },
1915
+ "model_0.tdecoder.0.dconv.layers.0.layers.6.scale": {
1916
+ "shape": [
1917
+ 384
1918
+ ],
1919
+ "dtype": "mlx.core.float32"
1920
+ },
1921
+ "model_0.tdecoder.0.dconv.layers.1.layers.0.conv.weight": {
1922
+ "shape": [
1923
+ 48,
1924
+ 3,
1925
+ 384
1926
+ ],
1927
+ "dtype": "mlx.core.float32"
1928
+ },
1929
+ "model_0.tdecoder.0.dconv.layers.1.layers.0.conv.bias": {
1930
+ "shape": [
1931
+ 48
1932
+ ],
1933
+ "dtype": "mlx.core.float32"
1934
+ },
1935
+ "model_0.tdecoder.0.dconv.layers.1.layers.1.weight": {
1936
+ "shape": [
1937
+ 48
1938
+ ],
1939
+ "dtype": "mlx.core.float32"
1940
+ },
1941
+ "model_0.tdecoder.0.dconv.layers.1.layers.1.bias": {
1942
+ "shape": [
1943
+ 48
1944
+ ],
1945
+ "dtype": "mlx.core.float32"
1946
+ },
1947
+ "model_0.tdecoder.0.dconv.layers.1.layers.3.conv.weight": {
1948
+ "shape": [
1949
+ 768,
1950
+ 1,
1951
+ 48
1952
+ ],
1953
+ "dtype": "mlx.core.float32"
1954
+ },
1955
+ "model_0.tdecoder.0.dconv.layers.1.layers.3.conv.bias": {
1956
+ "shape": [
1957
+ 768
1958
+ ],
1959
+ "dtype": "mlx.core.float32"
1960
+ },
1961
+ "model_0.tdecoder.0.dconv.layers.1.layers.4.weight": {
1962
+ "shape": [
1963
+ 768
1964
+ ],
1965
+ "dtype": "mlx.core.float32"
1966
+ },
1967
+ "model_0.tdecoder.0.dconv.layers.1.layers.4.bias": {
1968
+ "shape": [
1969
+ 768
1970
+ ],
1971
+ "dtype": "mlx.core.float32"
1972
+ },
1973
+ "model_0.tdecoder.0.dconv.layers.1.layers.6.scale": {
1974
+ "shape": [
1975
+ 384
1976
+ ],
1977
+ "dtype": "mlx.core.float32"
1978
+ },
1979
+ "model_0.tdecoder.1.conv_tr.conv.weight": {
1980
+ "shape": [
1981
+ 96,
1982
+ 8,
1983
+ 192
1984
+ ],
1985
+ "dtype": "mlx.core.float32"
1986
+ },
1987
+ "model_0.tdecoder.1.conv_tr.conv.bias": {
1988
+ "shape": [
1989
+ 96
1990
+ ],
1991
+ "dtype": "mlx.core.float32"
1992
+ },
1993
+ "model_0.tdecoder.1.rewrite.conv.weight": {
1994
+ "shape": [
1995
+ 384,
1996
+ 3,
1997
+ 192
1998
+ ],
1999
+ "dtype": "mlx.core.float32"
2000
+ },
2001
+ "model_0.tdecoder.1.rewrite.conv.bias": {
2002
+ "shape": [
2003
+ 384
2004
+ ],
2005
+ "dtype": "mlx.core.float32"
2006
+ },
2007
+ "model_0.tdecoder.1.dconv.layers.0.layers.0.conv.weight": {
2008
+ "shape": [
2009
+ 24,
2010
+ 3,
2011
+ 192
2012
+ ],
2013
+ "dtype": "mlx.core.float32"
2014
+ },
2015
+ "model_0.tdecoder.1.dconv.layers.0.layers.0.conv.bias": {
2016
+ "shape": [
2017
+ 24
2018
+ ],
2019
+ "dtype": "mlx.core.float32"
2020
+ },
2021
+ "model_0.tdecoder.1.dconv.layers.0.layers.1.weight": {
2022
+ "shape": [
2023
+ 24
2024
+ ],
2025
+ "dtype": "mlx.core.float32"
2026
+ },
2027
+ "model_0.tdecoder.1.dconv.layers.0.layers.1.bias": {
2028
+ "shape": [
2029
+ 24
2030
+ ],
2031
+ "dtype": "mlx.core.float32"
2032
+ },
2033
+ "model_0.tdecoder.1.dconv.layers.0.layers.3.conv.weight": {
2034
+ "shape": [
2035
+ 384,
2036
+ 1,
2037
+ 24
2038
+ ],
2039
+ "dtype": "mlx.core.float32"
2040
+ },
2041
+ "model_0.tdecoder.1.dconv.layers.0.layers.3.conv.bias": {
2042
+ "shape": [
2043
+ 384
2044
+ ],
2045
+ "dtype": "mlx.core.float32"
2046
+ },
2047
+ "model_0.tdecoder.1.dconv.layers.0.layers.4.weight": {
2048
+ "shape": [
2049
+ 384
2050
+ ],
2051
+ "dtype": "mlx.core.float32"
2052
+ },
2053
+ "model_0.tdecoder.1.dconv.layers.0.layers.4.bias": {
2054
+ "shape": [
2055
+ 384
2056
+ ],
2057
+ "dtype": "mlx.core.float32"
2058
+ },
2059
+ "model_0.tdecoder.1.dconv.layers.0.layers.6.scale": {
2060
+ "shape": [
2061
+ 192
2062
+ ],
2063
+ "dtype": "mlx.core.float32"
2064
+ },
2065
+ "model_0.tdecoder.1.dconv.layers.1.layers.0.conv.weight": {
2066
+ "shape": [
2067
+ 24,
2068
+ 3,
2069
+ 192
2070
+ ],
2071
+ "dtype": "mlx.core.float32"
2072
+ },
2073
+ "model_0.tdecoder.1.dconv.layers.1.layers.0.conv.bias": {
2074
+ "shape": [
2075
+ 24
2076
+ ],
2077
+ "dtype": "mlx.core.float32"
2078
+ },
2079
+ "model_0.tdecoder.1.dconv.layers.1.layers.1.weight": {
2080
+ "shape": [
2081
+ 24
2082
+ ],
2083
+ "dtype": "mlx.core.float32"
2084
+ },
2085
+ "model_0.tdecoder.1.dconv.layers.1.layers.1.bias": {
2086
+ "shape": [
2087
+ 24
2088
+ ],
2089
+ "dtype": "mlx.core.float32"
2090
+ },
2091
+ "model_0.tdecoder.1.dconv.layers.1.layers.3.conv.weight": {
2092
+ "shape": [
2093
+ 384,
2094
+ 1,
2095
+ 24
2096
+ ],
2097
+ "dtype": "mlx.core.float32"
2098
+ },
2099
+ "model_0.tdecoder.1.dconv.layers.1.layers.3.conv.bias": {
2100
+ "shape": [
2101
+ 384
2102
+ ],
2103
+ "dtype": "mlx.core.float32"
2104
+ },
2105
+ "model_0.tdecoder.1.dconv.layers.1.layers.4.weight": {
2106
+ "shape": [
2107
+ 384
2108
+ ],
2109
+ "dtype": "mlx.core.float32"
2110
+ },
2111
+ "model_0.tdecoder.1.dconv.layers.1.layers.4.bias": {
2112
+ "shape": [
2113
+ 384
2114
+ ],
2115
+ "dtype": "mlx.core.float32"
2116
+ },
2117
+ "model_0.tdecoder.1.dconv.layers.1.layers.6.scale": {
2118
+ "shape": [
2119
+ 192
2120
+ ],
2121
+ "dtype": "mlx.core.float32"
2122
+ },
2123
+ "model_0.tdecoder.2.conv_tr.conv.weight": {
2124
+ "shape": [
2125
+ 48,
2126
+ 8,
2127
+ 96
2128
+ ],
2129
+ "dtype": "mlx.core.float32"
2130
+ },
2131
+ "model_0.tdecoder.2.conv_tr.conv.bias": {
2132
+ "shape": [
2133
+ 48
2134
+ ],
2135
+ "dtype": "mlx.core.float32"
2136
+ },
2137
+ "model_0.tdecoder.2.rewrite.conv.weight": {
2138
+ "shape": [
2139
+ 192,
2140
+ 3,
2141
+ 96
2142
+ ],
2143
+ "dtype": "mlx.core.float32"
2144
+ },
2145
+ "model_0.tdecoder.2.rewrite.conv.bias": {
2146
+ "shape": [
2147
+ 192
2148
+ ],
2149
+ "dtype": "mlx.core.float32"
2150
+ },
2151
+ "model_0.tdecoder.2.dconv.layers.0.layers.0.conv.weight": {
2152
+ "shape": [
2153
+ 12,
2154
+ 3,
2155
+ 96
2156
+ ],
2157
+ "dtype": "mlx.core.float32"
2158
+ },
2159
+ "model_0.tdecoder.2.dconv.layers.0.layers.0.conv.bias": {
2160
+ "shape": [
2161
+ 12
2162
+ ],
2163
+ "dtype": "mlx.core.float32"
2164
+ },
2165
+ "model_0.tdecoder.2.dconv.layers.0.layers.1.weight": {
2166
+ "shape": [
2167
+ 12
2168
+ ],
2169
+ "dtype": "mlx.core.float32"
2170
+ },
2171
+ "model_0.tdecoder.2.dconv.layers.0.layers.1.bias": {
2172
+ "shape": [
2173
+ 12
2174
+ ],
2175
+ "dtype": "mlx.core.float32"
2176
+ },
2177
+ "model_0.tdecoder.2.dconv.layers.0.layers.3.conv.weight": {
2178
+ "shape": [
2179
+ 192,
2180
+ 1,
2181
+ 12
2182
+ ],
2183
+ "dtype": "mlx.core.float32"
2184
+ },
2185
+ "model_0.tdecoder.2.dconv.layers.0.layers.3.conv.bias": {
2186
+ "shape": [
2187
+ 192
2188
+ ],
2189
+ "dtype": "mlx.core.float32"
2190
+ },
2191
+ "model_0.tdecoder.2.dconv.layers.0.layers.4.weight": {
2192
+ "shape": [
2193
+ 192
2194
+ ],
2195
+ "dtype": "mlx.core.float32"
2196
+ },
2197
+ "model_0.tdecoder.2.dconv.layers.0.layers.4.bias": {
2198
+ "shape": [
2199
+ 192
2200
+ ],
2201
+ "dtype": "mlx.core.float32"
2202
+ },
2203
+ "model_0.tdecoder.2.dconv.layers.0.layers.6.scale": {
2204
+ "shape": [
2205
+ 96
2206
+ ],
2207
+ "dtype": "mlx.core.float32"
2208
+ },
2209
+ "model_0.tdecoder.2.dconv.layers.1.layers.0.conv.weight": {
2210
+ "shape": [
2211
+ 12,
2212
+ 3,
2213
+ 96
2214
+ ],
2215
+ "dtype": "mlx.core.float32"
2216
+ },
2217
+ "model_0.tdecoder.2.dconv.layers.1.layers.0.conv.bias": {
2218
+ "shape": [
2219
+ 12
2220
+ ],
2221
+ "dtype": "mlx.core.float32"
2222
+ },
2223
+ "model_0.tdecoder.2.dconv.layers.1.layers.1.weight": {
2224
+ "shape": [
2225
+ 12
2226
+ ],
2227
+ "dtype": "mlx.core.float32"
2228
+ },
2229
+ "model_0.tdecoder.2.dconv.layers.1.layers.1.bias": {
2230
+ "shape": [
2231
+ 12
2232
+ ],
2233
+ "dtype": "mlx.core.float32"
2234
+ },
2235
+ "model_0.tdecoder.2.dconv.layers.1.layers.3.conv.weight": {
2236
+ "shape": [
2237
+ 192,
2238
+ 1,
2239
+ 12
2240
+ ],
2241
+ "dtype": "mlx.core.float32"
2242
+ },
2243
+ "model_0.tdecoder.2.dconv.layers.1.layers.3.conv.bias": {
2244
+ "shape": [
2245
+ 192
2246
+ ],
2247
+ "dtype": "mlx.core.float32"
2248
+ },
2249
+ "model_0.tdecoder.2.dconv.layers.1.layers.4.weight": {
2250
+ "shape": [
2251
+ 192
2252
+ ],
2253
+ "dtype": "mlx.core.float32"
2254
+ },
2255
+ "model_0.tdecoder.2.dconv.layers.1.layers.4.bias": {
2256
+ "shape": [
2257
+ 192
2258
+ ],
2259
+ "dtype": "mlx.core.float32"
2260
+ },
2261
+ "model_0.tdecoder.2.dconv.layers.1.layers.6.scale": {
2262
+ "shape": [
2263
+ 96
2264
+ ],
2265
+ "dtype": "mlx.core.float32"
2266
+ },
2267
+ "model_0.tdecoder.3.conv_tr.conv.weight": {
2268
+ "shape": [
2269
+ 8,
2270
+ 8,
2271
+ 48
2272
+ ],
2273
+ "dtype": "mlx.core.float32"
2274
+ },
2275
+ "model_0.tdecoder.3.conv_tr.conv.bias": {
2276
+ "shape": [
2277
+ 8
2278
+ ],
2279
+ "dtype": "mlx.core.float32"
2280
+ },
2281
+ "model_0.tdecoder.3.rewrite.conv.weight": {
2282
+ "shape": [
2283
+ 96,
2284
+ 3,
2285
+ 48
2286
+ ],
2287
+ "dtype": "mlx.core.float32"
2288
+ },
2289
+ "model_0.tdecoder.3.rewrite.conv.bias": {
2290
+ "shape": [
2291
+ 96
2292
+ ],
2293
+ "dtype": "mlx.core.float32"
2294
+ },
2295
+ "model_0.tdecoder.3.dconv.layers.0.layers.0.conv.weight": {
2296
+ "shape": [
2297
+ 6,
2298
+ 3,
2299
+ 48
2300
+ ],
2301
+ "dtype": "mlx.core.float32"
2302
+ },
2303
+ "model_0.tdecoder.3.dconv.layers.0.layers.0.conv.bias": {
2304
+ "shape": [
2305
+ 6
2306
+ ],
2307
+ "dtype": "mlx.core.float32"
2308
+ },
2309
+ "model_0.tdecoder.3.dconv.layers.0.layers.1.weight": {
2310
+ "shape": [
2311
+ 6
2312
+ ],
2313
+ "dtype": "mlx.core.float32"
2314
+ },
2315
+ "model_0.tdecoder.3.dconv.layers.0.layers.1.bias": {
2316
+ "shape": [
2317
+ 6
2318
+ ],
2319
+ "dtype": "mlx.core.float32"
2320
+ },
2321
+ "model_0.tdecoder.3.dconv.layers.0.layers.3.conv.weight": {
2322
+ "shape": [
2323
+ 96,
2324
+ 1,
2325
+ 6
2326
+ ],
2327
+ "dtype": "mlx.core.float32"
2328
+ },
2329
+ "model_0.tdecoder.3.dconv.layers.0.layers.3.conv.bias": {
2330
+ "shape": [
2331
+ 96
2332
+ ],
2333
+ "dtype": "mlx.core.float32"
2334
+ },
2335
+ "model_0.tdecoder.3.dconv.layers.0.layers.4.weight": {
2336
+ "shape": [
2337
+ 96
2338
+ ],
2339
+ "dtype": "mlx.core.float32"
2340
+ },
2341
+ "model_0.tdecoder.3.dconv.layers.0.layers.4.bias": {
2342
+ "shape": [
2343
+ 96
2344
+ ],
2345
+ "dtype": "mlx.core.float32"
2346
+ },
2347
+ "model_0.tdecoder.3.dconv.layers.0.layers.6.scale": {
2348
+ "shape": [
2349
+ 48
2350
+ ],
2351
+ "dtype": "mlx.core.float32"
2352
+ },
2353
+ "model_0.tdecoder.3.dconv.layers.1.layers.0.conv.weight": {
2354
+ "shape": [
2355
+ 6,
2356
+ 3,
2357
+ 48
2358
+ ],
2359
+ "dtype": "mlx.core.float32"
2360
+ },
2361
+ "model_0.tdecoder.3.dconv.layers.1.layers.0.conv.bias": {
2362
+ "shape": [
2363
+ 6
2364
+ ],
2365
+ "dtype": "mlx.core.float32"
2366
+ },
2367
+ "model_0.tdecoder.3.dconv.layers.1.layers.1.weight": {
2368
+ "shape": [
2369
+ 6
2370
+ ],
2371
+ "dtype": "mlx.core.float32"
2372
+ },
2373
+ "model_0.tdecoder.3.dconv.layers.1.layers.1.bias": {
2374
+ "shape": [
2375
+ 6
2376
+ ],
2377
+ "dtype": "mlx.core.float32"
2378
+ },
2379
+ "model_0.tdecoder.3.dconv.layers.1.layers.3.conv.weight": {
2380
+ "shape": [
2381
+ 96,
2382
+ 1,
2383
+ 6
2384
+ ],
2385
+ "dtype": "mlx.core.float32"
2386
+ },
2387
+ "model_0.tdecoder.3.dconv.layers.1.layers.3.conv.bias": {
2388
+ "shape": [
2389
+ 96
2390
+ ],
2391
+ "dtype": "mlx.core.float32"
2392
+ },
2393
+ "model_0.tdecoder.3.dconv.layers.1.layers.4.weight": {
2394
+ "shape": [
2395
+ 96
2396
+ ],
2397
+ "dtype": "mlx.core.float32"
2398
+ },
2399
+ "model_0.tdecoder.3.dconv.layers.1.layers.4.bias": {
2400
+ "shape": [
2401
+ 96
2402
+ ],
2403
+ "dtype": "mlx.core.float32"
2404
+ },
2405
+ "model_0.tdecoder.3.dconv.layers.1.layers.6.scale": {
2406
+ "shape": [
2407
+ 48
2408
+ ],
2409
+ "dtype": "mlx.core.float32"
2410
+ },
2411
+ "model_0.freq_emb.embedding.weight": {
2412
+ "shape": [
2413
+ 512,
2414
+ 48
2415
+ ],
2416
+ "dtype": "mlx.core.float32"
2417
+ },
2418
+ "model_0.channel_upsampler.conv.weight": {
2419
+ "shape": [
2420
+ 512,
2421
+ 1,
2422
+ 384
2423
+ ],
2424
+ "dtype": "mlx.core.float32"
2425
+ },
2426
+ "model_0.channel_upsampler.conv.bias": {
2427
+ "shape": [
2428
+ 512
2429
+ ],
2430
+ "dtype": "mlx.core.float32"
2431
+ },
2432
+ "model_0.channel_downsampler.conv.weight": {
2433
+ "shape": [
2434
+ 384,
2435
+ 1,
2436
+ 512
2437
+ ],
2438
+ "dtype": "mlx.core.float32"
2439
+ },
2440
+ "model_0.channel_downsampler.conv.bias": {
2441
+ "shape": [
2442
+ 384
2443
+ ],
2444
+ "dtype": "mlx.core.float32"
2445
+ },
2446
+ "model_0.channel_upsampler_t.conv.weight": {
2447
+ "shape": [
2448
+ 512,
2449
+ 1,
2450
+ 384
2451
+ ],
2452
+ "dtype": "mlx.core.float32"
2453
+ },
2454
+ "model_0.channel_upsampler_t.conv.bias": {
2455
+ "shape": [
2456
+ 512
2457
+ ],
2458
+ "dtype": "mlx.core.float32"
2459
+ },
2460
+ "model_0.channel_downsampler_t.conv.weight": {
2461
+ "shape": [
2462
+ 384,
2463
+ 1,
2464
+ 512
2465
+ ],
2466
+ "dtype": "mlx.core.float32"
2467
+ },
2468
+ "model_0.channel_downsampler_t.conv.bias": {
2469
+ "shape": [
2470
+ 384
2471
+ ],
2472
+ "dtype": "mlx.core.float32"
2473
+ },
2474
+ "model_0.crosstransformer.norm_in.weight": {
2475
+ "shape": [
2476
+ 512
2477
+ ],
2478
+ "dtype": "mlx.core.float32"
2479
+ },
2480
+ "model_0.crosstransformer.norm_in.bias": {
2481
+ "shape": [
2482
+ 512
2483
+ ],
2484
+ "dtype": "mlx.core.float32"
2485
+ },
2486
+ "model_0.crosstransformer.norm_in_t.weight": {
2487
+ "shape": [
2488
+ 512
2489
+ ],
2490
+ "dtype": "mlx.core.float32"
2491
+ },
2492
+ "model_0.crosstransformer.norm_in_t.bias": {
2493
+ "shape": [
2494
+ 512
2495
+ ],
2496
+ "dtype": "mlx.core.float32"
2497
+ },
2498
+ "model_0.crosstransformer.layers.0.attn.query_proj.weight": {
2499
+ "shape": [
2500
+ 512,
2501
+ 512
2502
+ ],
2503
+ "dtype": "mlx.core.float32"
2504
+ },
2505
+ "model_0.crosstransformer.layers.0.attn.query_proj.bias": {
2506
+ "shape": [
2507
+ 512
2508
+ ],
2509
+ "dtype": "mlx.core.float32"
2510
+ },
2511
+ "model_0.crosstransformer.layers.0.attn.key_proj.weight": {
2512
+ "shape": [
2513
+ 512,
2514
+ 512
2515
+ ],
2516
+ "dtype": "mlx.core.float32"
2517
+ },
2518
+ "model_0.crosstransformer.layers.0.attn.key_proj.bias": {
2519
+ "shape": [
2520
+ 512
2521
+ ],
2522
+ "dtype": "mlx.core.float32"
2523
+ },
2524
+ "model_0.crosstransformer.layers.0.attn.value_proj.weight": {
2525
+ "shape": [
2526
+ 512,
2527
+ 512
2528
+ ],
2529
+ "dtype": "mlx.core.float32"
2530
+ },
2531
+ "model_0.crosstransformer.layers.0.attn.value_proj.bias": {
2532
+ "shape": [
2533
+ 512
2534
+ ],
2535
+ "dtype": "mlx.core.float32"
2536
+ },
2537
+ "model_0.crosstransformer.layers.0.attn.out_proj.weight": {
2538
+ "shape": [
2539
+ 512,
2540
+ 512
2541
+ ],
2542
+ "dtype": "mlx.core.float32"
2543
+ },
2544
+ "model_0.crosstransformer.layers.0.attn.out_proj.bias": {
2545
+ "shape": [
2546
+ 512
2547
+ ],
2548
+ "dtype": "mlx.core.float32"
2549
+ },
2550
+ "model_0.crosstransformer.layers.0.linear1.weight": {
2551
+ "shape": [
2552
+ 2048,
2553
+ 512
2554
+ ],
2555
+ "dtype": "mlx.core.float32"
2556
+ },
2557
+ "model_0.crosstransformer.layers.0.linear1.bias": {
2558
+ "shape": [
2559
+ 2048
2560
+ ],
2561
+ "dtype": "mlx.core.float32"
2562
+ },
2563
+ "model_0.crosstransformer.layers.0.linear2.weight": {
2564
+ "shape": [
2565
+ 512,
2566
+ 2048
2567
+ ],
2568
+ "dtype": "mlx.core.float32"
2569
+ },
2570
+ "model_0.crosstransformer.layers.0.linear2.bias": {
2571
+ "shape": [
2572
+ 512
2573
+ ],
2574
+ "dtype": "mlx.core.float32"
2575
+ },
2576
+ "model_0.crosstransformer.layers.0.norm1.weight": {
2577
+ "shape": [
2578
+ 512
2579
+ ],
2580
+ "dtype": "mlx.core.float32"
2581
+ },
2582
+ "model_0.crosstransformer.layers.0.norm1.bias": {
2583
+ "shape": [
2584
+ 512
2585
+ ],
2586
+ "dtype": "mlx.core.float32"
2587
+ },
2588
+ "model_0.crosstransformer.layers.0.norm2.weight": {
2589
+ "shape": [
2590
+ 512
2591
+ ],
2592
+ "dtype": "mlx.core.float32"
2593
+ },
2594
+ "model_0.crosstransformer.layers.0.norm2.bias": {
2595
+ "shape": [
2596
+ 512
2597
+ ],
2598
+ "dtype": "mlx.core.float32"
2599
+ },
2600
+ "model_0.crosstransformer.layers.0.norm_out.gn.bias": {
2601
+ "shape": [
2602
+ 512
2603
+ ],
2604
+ "dtype": "mlx.core.float32"
2605
+ },
2606
+ "model_0.crosstransformer.layers.0.norm_out.gn.weight": {
2607
+ "shape": [
2608
+ 512
2609
+ ],
2610
+ "dtype": "mlx.core.float32"
2611
+ },
2612
+ "model_0.crosstransformer.layers.0.gamma_1.scale": {
2613
+ "shape": [
2614
+ 512
2615
+ ],
2616
+ "dtype": "mlx.core.float32"
2617
+ },
2618
+ "model_0.crosstransformer.layers.0.gamma_2.scale": {
2619
+ "shape": [
2620
+ 512
2621
+ ],
2622
+ "dtype": "mlx.core.float32"
2623
+ },
2624
+ "model_0.crosstransformer.layers.1.cross_attn.query_proj.weight": {
2625
+ "shape": [
2626
+ 512,
2627
+ 512
2628
+ ],
2629
+ "dtype": "mlx.core.float32"
2630
+ },
2631
+ "model_0.crosstransformer.layers.1.cross_attn.query_proj.bias": {
2632
+ "shape": [
2633
+ 512
2634
+ ],
2635
+ "dtype": "mlx.core.float32"
2636
+ },
2637
+ "model_0.crosstransformer.layers.1.cross_attn.key_proj.weight": {
2638
+ "shape": [
2639
+ 512,
2640
+ 512
2641
+ ],
2642
+ "dtype": "mlx.core.float32"
2643
+ },
2644
+ "model_0.crosstransformer.layers.1.cross_attn.key_proj.bias": {
2645
+ "shape": [
2646
+ 512
2647
+ ],
2648
+ "dtype": "mlx.core.float32"
2649
+ },
2650
+ "model_0.crosstransformer.layers.1.cross_attn.value_proj.weight": {
2651
+ "shape": [
2652
+ 512,
2653
+ 512
2654
+ ],
2655
+ "dtype": "mlx.core.float32"
2656
+ },
2657
+ "model_0.crosstransformer.layers.1.cross_attn.value_proj.bias": {
2658
+ "shape": [
2659
+ 512
2660
+ ],
2661
+ "dtype": "mlx.core.float32"
2662
+ },
2663
+ "model_0.crosstransformer.layers.1.cross_attn.out_proj.weight": {
2664
+ "shape": [
2665
+ 512,
2666
+ 512
2667
+ ],
2668
+ "dtype": "mlx.core.float32"
2669
+ },
2670
+ "model_0.crosstransformer.layers.1.cross_attn.out_proj.bias": {
2671
+ "shape": [
2672
+ 512
2673
+ ],
2674
+ "dtype": "mlx.core.float32"
2675
+ },
2676
+ "model_0.crosstransformer.layers.1.linear1.weight": {
2677
+ "shape": [
2678
+ 2048,
2679
+ 512
2680
+ ],
2681
+ "dtype": "mlx.core.float32"
2682
+ },
2683
+ "model_0.crosstransformer.layers.1.linear1.bias": {
2684
+ "shape": [
2685
+ 2048
2686
+ ],
2687
+ "dtype": "mlx.core.float32"
2688
+ },
2689
+ "model_0.crosstransformer.layers.1.linear2.weight": {
2690
+ "shape": [
2691
+ 512,
2692
+ 2048
2693
+ ],
2694
+ "dtype": "mlx.core.float32"
2695
+ },
2696
+ "model_0.crosstransformer.layers.1.linear2.bias": {
2697
+ "shape": [
2698
+ 512
2699
+ ],
2700
+ "dtype": "mlx.core.float32"
2701
+ },
2702
+ "model_0.crosstransformer.layers.1.norm1.weight": {
2703
+ "shape": [
2704
+ 512
2705
+ ],
2706
+ "dtype": "mlx.core.float32"
2707
+ },
2708
+ "model_0.crosstransformer.layers.1.norm1.bias": {
2709
+ "shape": [
2710
+ 512
2711
+ ],
2712
+ "dtype": "mlx.core.float32"
2713
+ },
2714
+ "model_0.crosstransformer.layers.1.norm2.weight": {
2715
+ "shape": [
2716
+ 512
2717
+ ],
2718
+ "dtype": "mlx.core.float32"
2719
+ },
2720
+ "model_0.crosstransformer.layers.1.norm2.bias": {
2721
+ "shape": [
2722
+ 512
2723
+ ],
2724
+ "dtype": "mlx.core.float32"
2725
+ },
2726
+ "model_0.crosstransformer.layers.1.norm3.weight": {
2727
+ "shape": [
2728
+ 512
2729
+ ],
2730
+ "dtype": "mlx.core.float32"
2731
+ },
2732
+ "model_0.crosstransformer.layers.1.norm3.bias": {
2733
+ "shape": [
2734
+ 512
2735
+ ],
2736
+ "dtype": "mlx.core.float32"
2737
+ },
2738
+ "model_0.crosstransformer.layers.1.norm_out.gn.bias": {
2739
+ "shape": [
2740
+ 512
2741
+ ],
2742
+ "dtype": "mlx.core.float32"
2743
+ },
2744
+ "model_0.crosstransformer.layers.1.norm_out.gn.weight": {
2745
+ "shape": [
2746
+ 512
2747
+ ],
2748
+ "dtype": "mlx.core.float32"
2749
+ },
2750
+ "model_0.crosstransformer.layers.1.gamma_1.scale": {
2751
+ "shape": [
2752
+ 512
2753
+ ],
2754
+ "dtype": "mlx.core.float32"
2755
+ },
2756
+ "model_0.crosstransformer.layers.1.gamma_2.scale": {
2757
+ "shape": [
2758
+ 512
2759
+ ],
2760
+ "dtype": "mlx.core.float32"
2761
+ },
2762
+ "model_0.crosstransformer.layers.2.attn.query_proj.weight": {
2763
+ "shape": [
2764
+ 512,
2765
+ 512
2766
+ ],
2767
+ "dtype": "mlx.core.float32"
2768
+ },
2769
+ "model_0.crosstransformer.layers.2.attn.query_proj.bias": {
2770
+ "shape": [
2771
+ 512
2772
+ ],
2773
+ "dtype": "mlx.core.float32"
2774
+ },
2775
+ "model_0.crosstransformer.layers.2.attn.key_proj.weight": {
2776
+ "shape": [
2777
+ 512,
2778
+ 512
2779
+ ],
2780
+ "dtype": "mlx.core.float32"
2781
+ },
2782
+ "model_0.crosstransformer.layers.2.attn.key_proj.bias": {
2783
+ "shape": [
2784
+ 512
2785
+ ],
2786
+ "dtype": "mlx.core.float32"
2787
+ },
2788
+ "model_0.crosstransformer.layers.2.attn.value_proj.weight": {
2789
+ "shape": [
2790
+ 512,
2791
+ 512
2792
+ ],
2793
+ "dtype": "mlx.core.float32"
2794
+ },
2795
+ "model_0.crosstransformer.layers.2.attn.value_proj.bias": {
2796
+ "shape": [
2797
+ 512
2798
+ ],
2799
+ "dtype": "mlx.core.float32"
2800
+ },
2801
+ "model_0.crosstransformer.layers.2.attn.out_proj.weight": {
2802
+ "shape": [
2803
+ 512,
2804
+ 512
2805
+ ],
2806
+ "dtype": "mlx.core.float32"
2807
+ },
2808
+ "model_0.crosstransformer.layers.2.attn.out_proj.bias": {
2809
+ "shape": [
2810
+ 512
2811
+ ],
2812
+ "dtype": "mlx.core.float32"
2813
+ },
2814
+ "model_0.crosstransformer.layers.2.linear1.weight": {
2815
+ "shape": [
2816
+ 2048,
2817
+ 512
2818
+ ],
2819
+ "dtype": "mlx.core.float32"
2820
+ },
2821
+ "model_0.crosstransformer.layers.2.linear1.bias": {
2822
+ "shape": [
2823
+ 2048
2824
+ ],
2825
+ "dtype": "mlx.core.float32"
2826
+ },
2827
+ "model_0.crosstransformer.layers.2.linear2.weight": {
2828
+ "shape": [
2829
+ 512,
2830
+ 2048
2831
+ ],
2832
+ "dtype": "mlx.core.float32"
2833
+ },
2834
+ "model_0.crosstransformer.layers.2.linear2.bias": {
2835
+ "shape": [
2836
+ 512
2837
+ ],
2838
+ "dtype": "mlx.core.float32"
2839
+ },
2840
+ "model_0.crosstransformer.layers.2.norm1.weight": {
2841
+ "shape": [
2842
+ 512
2843
+ ],
2844
+ "dtype": "mlx.core.float32"
2845
+ },
2846
+ "model_0.crosstransformer.layers.2.norm1.bias": {
2847
+ "shape": [
2848
+ 512
2849
+ ],
2850
+ "dtype": "mlx.core.float32"
2851
+ },
2852
+ "model_0.crosstransformer.layers.2.norm2.weight": {
2853
+ "shape": [
2854
+ 512
2855
+ ],
2856
+ "dtype": "mlx.core.float32"
2857
+ },
2858
+ "model_0.crosstransformer.layers.2.norm2.bias": {
2859
+ "shape": [
2860
+ 512
2861
+ ],
2862
+ "dtype": "mlx.core.float32"
2863
+ },
2864
+ "model_0.crosstransformer.layers.2.norm_out.gn.bias": {
2865
+ "shape": [
2866
+ 512
2867
+ ],
2868
+ "dtype": "mlx.core.float32"
2869
+ },
2870
+ "model_0.crosstransformer.layers.2.norm_out.gn.weight": {
2871
+ "shape": [
2872
+ 512
2873
+ ],
2874
+ "dtype": "mlx.core.float32"
2875
+ },
2876
+ "model_0.crosstransformer.layers.2.gamma_1.scale": {
2877
+ "shape": [
2878
+ 512
2879
+ ],
2880
+ "dtype": "mlx.core.float32"
2881
+ },
2882
+ "model_0.crosstransformer.layers.2.gamma_2.scale": {
2883
+ "shape": [
2884
+ 512
2885
+ ],
2886
+ "dtype": "mlx.core.float32"
2887
+ },
2888
+ "model_0.crosstransformer.layers.3.cross_attn.query_proj.weight": {
2889
+ "shape": [
2890
+ 512,
2891
+ 512
2892
+ ],
2893
+ "dtype": "mlx.core.float32"
2894
+ },
2895
+ "model_0.crosstransformer.layers.3.cross_attn.query_proj.bias": {
2896
+ "shape": [
2897
+ 512
2898
+ ],
2899
+ "dtype": "mlx.core.float32"
2900
+ },
2901
+ "model_0.crosstransformer.layers.3.cross_attn.key_proj.weight": {
2902
+ "shape": [
2903
+ 512,
2904
+ 512
2905
+ ],
2906
+ "dtype": "mlx.core.float32"
2907
+ },
2908
+ "model_0.crosstransformer.layers.3.cross_attn.key_proj.bias": {
2909
+ "shape": [
2910
+ 512
2911
+ ],
2912
+ "dtype": "mlx.core.float32"
2913
+ },
2914
+ "model_0.crosstransformer.layers.3.cross_attn.value_proj.weight": {
2915
+ "shape": [
2916
+ 512,
2917
+ 512
2918
+ ],
2919
+ "dtype": "mlx.core.float32"
2920
+ },
2921
+ "model_0.crosstransformer.layers.3.cross_attn.value_proj.bias": {
2922
+ "shape": [
2923
+ 512
2924
+ ],
2925
+ "dtype": "mlx.core.float32"
2926
+ },
2927
+ "model_0.crosstransformer.layers.3.cross_attn.out_proj.weight": {
2928
+ "shape": [
2929
+ 512,
2930
+ 512
2931
+ ],
2932
+ "dtype": "mlx.core.float32"
2933
+ },
2934
+ "model_0.crosstransformer.layers.3.cross_attn.out_proj.bias": {
2935
+ "shape": [
2936
+ 512
2937
+ ],
2938
+ "dtype": "mlx.core.float32"
2939
+ },
2940
+ "model_0.crosstransformer.layers.3.linear1.weight": {
2941
+ "shape": [
2942
+ 2048,
2943
+ 512
2944
+ ],
2945
+ "dtype": "mlx.core.float32"
2946
+ },
2947
+ "model_0.crosstransformer.layers.3.linear1.bias": {
2948
+ "shape": [
2949
+ 2048
2950
+ ],
2951
+ "dtype": "mlx.core.float32"
2952
+ },
2953
+ "model_0.crosstransformer.layers.3.linear2.weight": {
2954
+ "shape": [
2955
+ 512,
2956
+ 2048
2957
+ ],
2958
+ "dtype": "mlx.core.float32"
2959
+ },
2960
+ "model_0.crosstransformer.layers.3.linear2.bias": {
2961
+ "shape": [
2962
+ 512
2963
+ ],
2964
+ "dtype": "mlx.core.float32"
2965
+ },
2966
+ "model_0.crosstransformer.layers.3.norm1.weight": {
2967
+ "shape": [
2968
+ 512
2969
+ ],
2970
+ "dtype": "mlx.core.float32"
2971
+ },
2972
+ "model_0.crosstransformer.layers.3.norm1.bias": {
2973
+ "shape": [
2974
+ 512
2975
+ ],
2976
+ "dtype": "mlx.core.float32"
2977
+ },
2978
+ "model_0.crosstransformer.layers.3.norm2.weight": {
2979
+ "shape": [
2980
+ 512
2981
+ ],
2982
+ "dtype": "mlx.core.float32"
2983
+ },
2984
+ "model_0.crosstransformer.layers.3.norm2.bias": {
2985
+ "shape": [
2986
+ 512
2987
+ ],
2988
+ "dtype": "mlx.core.float32"
2989
+ },
2990
+ "model_0.crosstransformer.layers.3.norm3.weight": {
2991
+ "shape": [
2992
+ 512
2993
+ ],
2994
+ "dtype": "mlx.core.float32"
2995
+ },
2996
+ "model_0.crosstransformer.layers.3.norm3.bias": {
2997
+ "shape": [
2998
+ 512
2999
+ ],
3000
+ "dtype": "mlx.core.float32"
3001
+ },
3002
+ "model_0.crosstransformer.layers.3.norm_out.gn.bias": {
3003
+ "shape": [
3004
+ 512
3005
+ ],
3006
+ "dtype": "mlx.core.float32"
3007
+ },
3008
+ "model_0.crosstransformer.layers.3.norm_out.gn.weight": {
3009
+ "shape": [
3010
+ 512
3011
+ ],
3012
+ "dtype": "mlx.core.float32"
3013
+ },
3014
+ "model_0.crosstransformer.layers.3.gamma_1.scale": {
3015
+ "shape": [
3016
+ 512
3017
+ ],
3018
+ "dtype": "mlx.core.float32"
3019
+ },
3020
+ "model_0.crosstransformer.layers.3.gamma_2.scale": {
3021
+ "shape": [
3022
+ 512
3023
+ ],
3024
+ "dtype": "mlx.core.float32"
3025
+ },
3026
+ "model_0.crosstransformer.layers.4.attn.query_proj.weight": {
3027
+ "shape": [
3028
+ 512,
3029
+ 512
3030
+ ],
3031
+ "dtype": "mlx.core.float32"
3032
+ },
3033
+ "model_0.crosstransformer.layers.4.attn.query_proj.bias": {
3034
+ "shape": [
3035
+ 512
3036
+ ],
3037
+ "dtype": "mlx.core.float32"
3038
+ },
3039
+ "model_0.crosstransformer.layers.4.attn.key_proj.weight": {
3040
+ "shape": [
3041
+ 512,
3042
+ 512
3043
+ ],
3044
+ "dtype": "mlx.core.float32"
3045
+ },
3046
+ "model_0.crosstransformer.layers.4.attn.key_proj.bias": {
3047
+ "shape": [
3048
+ 512
3049
+ ],
3050
+ "dtype": "mlx.core.float32"
3051
+ },
3052
+ "model_0.crosstransformer.layers.4.attn.value_proj.weight": {
3053
+ "shape": [
3054
+ 512,
3055
+ 512
3056
+ ],
3057
+ "dtype": "mlx.core.float32"
3058
+ },
3059
+ "model_0.crosstransformer.layers.4.attn.value_proj.bias": {
3060
+ "shape": [
3061
+ 512
3062
+ ],
3063
+ "dtype": "mlx.core.float32"
3064
+ },
3065
+ "model_0.crosstransformer.layers.4.attn.out_proj.weight": {
3066
+ "shape": [
3067
+ 512,
3068
+ 512
3069
+ ],
3070
+ "dtype": "mlx.core.float32"
3071
+ },
3072
+ "model_0.crosstransformer.layers.4.attn.out_proj.bias": {
3073
+ "shape": [
3074
+ 512
3075
+ ],
3076
+ "dtype": "mlx.core.float32"
3077
+ },
3078
+ "model_0.crosstransformer.layers.4.linear1.weight": {
3079
+ "shape": [
3080
+ 2048,
3081
+ 512
3082
+ ],
3083
+ "dtype": "mlx.core.float32"
3084
+ },
3085
+ "model_0.crosstransformer.layers.4.linear1.bias": {
3086
+ "shape": [
3087
+ 2048
3088
+ ],
3089
+ "dtype": "mlx.core.float32"
3090
+ },
3091
+ "model_0.crosstransformer.layers.4.linear2.weight": {
3092
+ "shape": [
3093
+ 512,
3094
+ 2048
3095
+ ],
3096
+ "dtype": "mlx.core.float32"
3097
+ },
3098
+ "model_0.crosstransformer.layers.4.linear2.bias": {
3099
+ "shape": [
3100
+ 512
3101
+ ],
3102
+ "dtype": "mlx.core.float32"
3103
+ },
3104
+ "model_0.crosstransformer.layers.4.norm1.weight": {
3105
+ "shape": [
3106
+ 512
3107
+ ],
3108
+ "dtype": "mlx.core.float32"
3109
+ },
3110
+ "model_0.crosstransformer.layers.4.norm1.bias": {
3111
+ "shape": [
3112
+ 512
3113
+ ],
3114
+ "dtype": "mlx.core.float32"
3115
+ },
3116
+ "model_0.crosstransformer.layers.4.norm2.weight": {
3117
+ "shape": [
3118
+ 512
3119
+ ],
3120
+ "dtype": "mlx.core.float32"
3121
+ },
3122
+ "model_0.crosstransformer.layers.4.norm2.bias": {
3123
+ "shape": [
3124
+ 512
3125
+ ],
3126
+ "dtype": "mlx.core.float32"
3127
+ },
3128
+ "model_0.crosstransformer.layers.4.norm_out.gn.bias": {
3129
+ "shape": [
3130
+ 512
3131
+ ],
3132
+ "dtype": "mlx.core.float32"
3133
+ },
3134
+ "model_0.crosstransformer.layers.4.norm_out.gn.weight": {
3135
+ "shape": [
3136
+ 512
3137
+ ],
3138
+ "dtype": "mlx.core.float32"
3139
+ },
3140
+ "model_0.crosstransformer.layers.4.gamma_1.scale": {
3141
+ "shape": [
3142
+ 512
3143
+ ],
3144
+ "dtype": "mlx.core.float32"
3145
+ },
3146
+ "model_0.crosstransformer.layers.4.gamma_2.scale": {
3147
+ "shape": [
3148
+ 512
3149
+ ],
3150
+ "dtype": "mlx.core.float32"
3151
+ },
3152
+ "model_0.crosstransformer.layers_t.0.attn.query_proj.weight": {
3153
+ "shape": [
3154
+ 512,
3155
+ 512
3156
+ ],
3157
+ "dtype": "mlx.core.float32"
3158
+ },
3159
+ "model_0.crosstransformer.layers_t.0.attn.query_proj.bias": {
3160
+ "shape": [
3161
+ 512
3162
+ ],
3163
+ "dtype": "mlx.core.float32"
3164
+ },
3165
+ "model_0.crosstransformer.layers_t.0.attn.key_proj.weight": {
3166
+ "shape": [
3167
+ 512,
3168
+ 512
3169
+ ],
3170
+ "dtype": "mlx.core.float32"
3171
+ },
3172
+ "model_0.crosstransformer.layers_t.0.attn.key_proj.bias": {
3173
+ "shape": [
3174
+ 512
3175
+ ],
3176
+ "dtype": "mlx.core.float32"
3177
+ },
3178
+ "model_0.crosstransformer.layers_t.0.attn.value_proj.weight": {
3179
+ "shape": [
3180
+ 512,
3181
+ 512
3182
+ ],
3183
+ "dtype": "mlx.core.float32"
3184
+ },
3185
+ "model_0.crosstransformer.layers_t.0.attn.value_proj.bias": {
3186
+ "shape": [
3187
+ 512
3188
+ ],
3189
+ "dtype": "mlx.core.float32"
3190
+ },
3191
+ "model_0.crosstransformer.layers_t.0.attn.out_proj.weight": {
3192
+ "shape": [
3193
+ 512,
3194
+ 512
3195
+ ],
3196
+ "dtype": "mlx.core.float32"
3197
+ },
3198
+ "model_0.crosstransformer.layers_t.0.attn.out_proj.bias": {
3199
+ "shape": [
3200
+ 512
3201
+ ],
3202
+ "dtype": "mlx.core.float32"
3203
+ },
3204
+ "model_0.crosstransformer.layers_t.0.linear1.weight": {
3205
+ "shape": [
3206
+ 2048,
3207
+ 512
3208
+ ],
3209
+ "dtype": "mlx.core.float32"
3210
+ },
3211
+ "model_0.crosstransformer.layers_t.0.linear1.bias": {
3212
+ "shape": [
3213
+ 2048
3214
+ ],
3215
+ "dtype": "mlx.core.float32"
3216
+ },
3217
+ "model_0.crosstransformer.layers_t.0.linear2.weight": {
3218
+ "shape": [
3219
+ 512,
3220
+ 2048
3221
+ ],
3222
+ "dtype": "mlx.core.float32"
3223
+ },
3224
+ "model_0.crosstransformer.layers_t.0.linear2.bias": {
3225
+ "shape": [
3226
+ 512
3227
+ ],
3228
+ "dtype": "mlx.core.float32"
3229
+ },
3230
+ "model_0.crosstransformer.layers_t.0.norm1.weight": {
3231
+ "shape": [
3232
+ 512
3233
+ ],
3234
+ "dtype": "mlx.core.float32"
3235
+ },
3236
+ "model_0.crosstransformer.layers_t.0.norm1.bias": {
3237
+ "shape": [
3238
+ 512
3239
+ ],
3240
+ "dtype": "mlx.core.float32"
3241
+ },
3242
+ "model_0.crosstransformer.layers_t.0.norm2.weight": {
3243
+ "shape": [
3244
+ 512
3245
+ ],
3246
+ "dtype": "mlx.core.float32"
3247
+ },
3248
+ "model_0.crosstransformer.layers_t.0.norm2.bias": {
3249
+ "shape": [
3250
+ 512
3251
+ ],
3252
+ "dtype": "mlx.core.float32"
3253
+ },
3254
+ "model_0.crosstransformer.layers_t.0.norm_out.gn.bias": {
3255
+ "shape": [
3256
+ 512
3257
+ ],
3258
+ "dtype": "mlx.core.float32"
3259
+ },
3260
+ "model_0.crosstransformer.layers_t.0.norm_out.gn.weight": {
3261
+ "shape": [
3262
+ 512
3263
+ ],
3264
+ "dtype": "mlx.core.float32"
3265
+ },
3266
+ "model_0.crosstransformer.layers_t.0.gamma_1.scale": {
3267
+ "shape": [
3268
+ 512
3269
+ ],
3270
+ "dtype": "mlx.core.float32"
3271
+ },
3272
+ "model_0.crosstransformer.layers_t.0.gamma_2.scale": {
3273
+ "shape": [
3274
+ 512
3275
+ ],
3276
+ "dtype": "mlx.core.float32"
3277
+ },
3278
+ "model_0.crosstransformer.layers_t.1.cross_attn.query_proj.weight": {
3279
+ "shape": [
3280
+ 512,
3281
+ 512
3282
+ ],
3283
+ "dtype": "mlx.core.float32"
3284
+ },
3285
+ "model_0.crosstransformer.layers_t.1.cross_attn.query_proj.bias": {
3286
+ "shape": [
3287
+ 512
3288
+ ],
3289
+ "dtype": "mlx.core.float32"
3290
+ },
3291
+ "model_0.crosstransformer.layers_t.1.cross_attn.key_proj.weight": {
3292
+ "shape": [
3293
+ 512,
3294
+ 512
3295
+ ],
3296
+ "dtype": "mlx.core.float32"
3297
+ },
3298
+ "model_0.crosstransformer.layers_t.1.cross_attn.key_proj.bias": {
3299
+ "shape": [
3300
+ 512
3301
+ ],
3302
+ "dtype": "mlx.core.float32"
3303
+ },
3304
+ "model_0.crosstransformer.layers_t.1.cross_attn.value_proj.weight": {
3305
+ "shape": [
3306
+ 512,
3307
+ 512
3308
+ ],
3309
+ "dtype": "mlx.core.float32"
3310
+ },
3311
+ "model_0.crosstransformer.layers_t.1.cross_attn.value_proj.bias": {
3312
+ "shape": [
3313
+ 512
3314
+ ],
3315
+ "dtype": "mlx.core.float32"
3316
+ },
3317
+ "model_0.crosstransformer.layers_t.1.cross_attn.out_proj.weight": {
3318
+ "shape": [
3319
+ 512,
3320
+ 512
3321
+ ],
3322
+ "dtype": "mlx.core.float32"
3323
+ },
3324
+ "model_0.crosstransformer.layers_t.1.cross_attn.out_proj.bias": {
3325
+ "shape": [
3326
+ 512
3327
+ ],
3328
+ "dtype": "mlx.core.float32"
3329
+ },
3330
+ "model_0.crosstransformer.layers_t.1.linear1.weight": {
3331
+ "shape": [
3332
+ 2048,
3333
+ 512
3334
+ ],
3335
+ "dtype": "mlx.core.float32"
3336
+ },
3337
+ "model_0.crosstransformer.layers_t.1.linear1.bias": {
3338
+ "shape": [
3339
+ 2048
3340
+ ],
3341
+ "dtype": "mlx.core.float32"
3342
+ },
3343
+ "model_0.crosstransformer.layers_t.1.linear2.weight": {
3344
+ "shape": [
3345
+ 512,
3346
+ 2048
3347
+ ],
3348
+ "dtype": "mlx.core.float32"
3349
+ },
3350
+ "model_0.crosstransformer.layers_t.1.linear2.bias": {
3351
+ "shape": [
3352
+ 512
3353
+ ],
3354
+ "dtype": "mlx.core.float32"
3355
+ },
3356
+ "model_0.crosstransformer.layers_t.1.norm1.weight": {
3357
+ "shape": [
3358
+ 512
3359
+ ],
3360
+ "dtype": "mlx.core.float32"
3361
+ },
3362
+ "model_0.crosstransformer.layers_t.1.norm1.bias": {
3363
+ "shape": [
3364
+ 512
3365
+ ],
3366
+ "dtype": "mlx.core.float32"
3367
+ },
3368
+ "model_0.crosstransformer.layers_t.1.norm2.weight": {
3369
+ "shape": [
3370
+ 512
3371
+ ],
3372
+ "dtype": "mlx.core.float32"
3373
+ },
3374
+ "model_0.crosstransformer.layers_t.1.norm2.bias": {
3375
+ "shape": [
3376
+ 512
3377
+ ],
3378
+ "dtype": "mlx.core.float32"
3379
+ },
3380
+ "model_0.crosstransformer.layers_t.1.norm3.weight": {
3381
+ "shape": [
3382
+ 512
3383
+ ],
3384
+ "dtype": "mlx.core.float32"
3385
+ },
3386
+ "model_0.crosstransformer.layers_t.1.norm3.bias": {
3387
+ "shape": [
3388
+ 512
3389
+ ],
3390
+ "dtype": "mlx.core.float32"
3391
+ },
3392
+ "model_0.crosstransformer.layers_t.1.norm_out.gn.bias": {
3393
+ "shape": [
3394
+ 512
3395
+ ],
3396
+ "dtype": "mlx.core.float32"
3397
+ },
3398
+ "model_0.crosstransformer.layers_t.1.norm_out.gn.weight": {
3399
+ "shape": [
3400
+ 512
3401
+ ],
3402
+ "dtype": "mlx.core.float32"
3403
+ },
3404
+ "model_0.crosstransformer.layers_t.1.gamma_1.scale": {
3405
+ "shape": [
3406
+ 512
3407
+ ],
3408
+ "dtype": "mlx.core.float32"
3409
+ },
3410
+ "model_0.crosstransformer.layers_t.1.gamma_2.scale": {
3411
+ "shape": [
3412
+ 512
3413
+ ],
3414
+ "dtype": "mlx.core.float32"
3415
+ },
3416
+ "model_0.crosstransformer.layers_t.2.attn.query_proj.weight": {
3417
+ "shape": [
3418
+ 512,
3419
+ 512
3420
+ ],
3421
+ "dtype": "mlx.core.float32"
3422
+ },
3423
+ "model_0.crosstransformer.layers_t.2.attn.query_proj.bias": {
3424
+ "shape": [
3425
+ 512
3426
+ ],
3427
+ "dtype": "mlx.core.float32"
3428
+ },
3429
+ "model_0.crosstransformer.layers_t.2.attn.key_proj.weight": {
3430
+ "shape": [
3431
+ 512,
3432
+ 512
3433
+ ],
3434
+ "dtype": "mlx.core.float32"
3435
+ },
3436
+ "model_0.crosstransformer.layers_t.2.attn.key_proj.bias": {
3437
+ "shape": [
3438
+ 512
3439
+ ],
3440
+ "dtype": "mlx.core.float32"
3441
+ },
3442
+ "model_0.crosstransformer.layers_t.2.attn.value_proj.weight": {
3443
+ "shape": [
3444
+ 512,
3445
+ 512
3446
+ ],
3447
+ "dtype": "mlx.core.float32"
3448
+ },
3449
+ "model_0.crosstransformer.layers_t.2.attn.value_proj.bias": {
3450
+ "shape": [
3451
+ 512
3452
+ ],
3453
+ "dtype": "mlx.core.float32"
3454
+ },
3455
+ "model_0.crosstransformer.layers_t.2.attn.out_proj.weight": {
3456
+ "shape": [
3457
+ 512,
3458
+ 512
3459
+ ],
3460
+ "dtype": "mlx.core.float32"
3461
+ },
3462
+ "model_0.crosstransformer.layers_t.2.attn.out_proj.bias": {
3463
+ "shape": [
3464
+ 512
3465
+ ],
3466
+ "dtype": "mlx.core.float32"
3467
+ },
3468
+ "model_0.crosstransformer.layers_t.2.linear1.weight": {
3469
+ "shape": [
3470
+ 2048,
3471
+ 512
3472
+ ],
3473
+ "dtype": "mlx.core.float32"
3474
+ },
3475
+ "model_0.crosstransformer.layers_t.2.linear1.bias": {
3476
+ "shape": [
3477
+ 2048
3478
+ ],
3479
+ "dtype": "mlx.core.float32"
3480
+ },
3481
+ "model_0.crosstransformer.layers_t.2.linear2.weight": {
3482
+ "shape": [
3483
+ 512,
3484
+ 2048
3485
+ ],
3486
+ "dtype": "mlx.core.float32"
3487
+ },
3488
+ "model_0.crosstransformer.layers_t.2.linear2.bias": {
3489
+ "shape": [
3490
+ 512
3491
+ ],
3492
+ "dtype": "mlx.core.float32"
3493
+ },
3494
+ "model_0.crosstransformer.layers_t.2.norm1.weight": {
3495
+ "shape": [
3496
+ 512
3497
+ ],
3498
+ "dtype": "mlx.core.float32"
3499
+ },
3500
+ "model_0.crosstransformer.layers_t.2.norm1.bias": {
3501
+ "shape": [
3502
+ 512
3503
+ ],
3504
+ "dtype": "mlx.core.float32"
3505
+ },
3506
+ "model_0.crosstransformer.layers_t.2.norm2.weight": {
3507
+ "shape": [
3508
+ 512
3509
+ ],
3510
+ "dtype": "mlx.core.float32"
3511
+ },
3512
+ "model_0.crosstransformer.layers_t.2.norm2.bias": {
3513
+ "shape": [
3514
+ 512
3515
+ ],
3516
+ "dtype": "mlx.core.float32"
3517
+ },
3518
+ "model_0.crosstransformer.layers_t.2.norm_out.gn.bias": {
3519
+ "shape": [
3520
+ 512
3521
+ ],
3522
+ "dtype": "mlx.core.float32"
3523
+ },
3524
+ "model_0.crosstransformer.layers_t.2.norm_out.gn.weight": {
3525
+ "shape": [
3526
+ 512
3527
+ ],
3528
+ "dtype": "mlx.core.float32"
3529
+ },
3530
+ "model_0.crosstransformer.layers_t.2.gamma_1.scale": {
3531
+ "shape": [
3532
+ 512
3533
+ ],
3534
+ "dtype": "mlx.core.float32"
3535
+ },
3536
+ "model_0.crosstransformer.layers_t.2.gamma_2.scale": {
3537
+ "shape": [
3538
+ 512
3539
+ ],
3540
+ "dtype": "mlx.core.float32"
3541
+ },
3542
+ "model_0.crosstransformer.layers_t.3.cross_attn.query_proj.weight": {
3543
+ "shape": [
3544
+ 512,
3545
+ 512
3546
+ ],
3547
+ "dtype": "mlx.core.float32"
3548
+ },
3549
+ "model_0.crosstransformer.layers_t.3.cross_attn.query_proj.bias": {
3550
+ "shape": [
3551
+ 512
3552
+ ],
3553
+ "dtype": "mlx.core.float32"
3554
+ },
3555
+ "model_0.crosstransformer.layers_t.3.cross_attn.key_proj.weight": {
3556
+ "shape": [
3557
+ 512,
3558
+ 512
3559
+ ],
3560
+ "dtype": "mlx.core.float32"
3561
+ },
3562
+ "model_0.crosstransformer.layers_t.3.cross_attn.key_proj.bias": {
3563
+ "shape": [
3564
+ 512
3565
+ ],
3566
+ "dtype": "mlx.core.float32"
3567
+ },
3568
+ "model_0.crosstransformer.layers_t.3.cross_attn.value_proj.weight": {
3569
+ "shape": [
3570
+ 512,
3571
+ 512
3572
+ ],
3573
+ "dtype": "mlx.core.float32"
3574
+ },
3575
+ "model_0.crosstransformer.layers_t.3.cross_attn.value_proj.bias": {
3576
+ "shape": [
3577
+ 512
3578
+ ],
3579
+ "dtype": "mlx.core.float32"
3580
+ },
3581
+ "model_0.crosstransformer.layers_t.3.cross_attn.out_proj.weight": {
3582
+ "shape": [
3583
+ 512,
3584
+ 512
3585
+ ],
3586
+ "dtype": "mlx.core.float32"
3587
+ },
3588
+ "model_0.crosstransformer.layers_t.3.cross_attn.out_proj.bias": {
3589
+ "shape": [
3590
+ 512
3591
+ ],
3592
+ "dtype": "mlx.core.float32"
3593
+ },
3594
+ "model_0.crosstransformer.layers_t.3.linear1.weight": {
3595
+ "shape": [
3596
+ 2048,
3597
+ 512
3598
+ ],
3599
+ "dtype": "mlx.core.float32"
3600
+ },
3601
+ "model_0.crosstransformer.layers_t.3.linear1.bias": {
3602
+ "shape": [
3603
+ 2048
3604
+ ],
3605
+ "dtype": "mlx.core.float32"
3606
+ },
3607
+ "model_0.crosstransformer.layers_t.3.linear2.weight": {
3608
+ "shape": [
3609
+ 512,
3610
+ 2048
3611
+ ],
3612
+ "dtype": "mlx.core.float32"
3613
+ },
3614
+ "model_0.crosstransformer.layers_t.3.linear2.bias": {
3615
+ "shape": [
3616
+ 512
3617
+ ],
3618
+ "dtype": "mlx.core.float32"
3619
+ },
3620
+ "model_0.crosstransformer.layers_t.3.norm1.weight": {
3621
+ "shape": [
3622
+ 512
3623
+ ],
3624
+ "dtype": "mlx.core.float32"
3625
+ },
3626
+ "model_0.crosstransformer.layers_t.3.norm1.bias": {
3627
+ "shape": [
3628
+ 512
3629
+ ],
3630
+ "dtype": "mlx.core.float32"
3631
+ },
3632
+ "model_0.crosstransformer.layers_t.3.norm2.weight": {
3633
+ "shape": [
3634
+ 512
3635
+ ],
3636
+ "dtype": "mlx.core.float32"
3637
+ },
3638
+ "model_0.crosstransformer.layers_t.3.norm2.bias": {
3639
+ "shape": [
3640
+ 512
3641
+ ],
3642
+ "dtype": "mlx.core.float32"
3643
+ },
3644
+ "model_0.crosstransformer.layers_t.3.norm3.weight": {
3645
+ "shape": [
3646
+ 512
3647
+ ],
3648
+ "dtype": "mlx.core.float32"
3649
+ },
3650
+ "model_0.crosstransformer.layers_t.3.norm3.bias": {
3651
+ "shape": [
3652
+ 512
3653
+ ],
3654
+ "dtype": "mlx.core.float32"
3655
+ },
3656
+ "model_0.crosstransformer.layers_t.3.norm_out.gn.bias": {
3657
+ "shape": [
3658
+ 512
3659
+ ],
3660
+ "dtype": "mlx.core.float32"
3661
+ },
3662
+ "model_0.crosstransformer.layers_t.3.norm_out.gn.weight": {
3663
+ "shape": [
3664
+ 512
3665
+ ],
3666
+ "dtype": "mlx.core.float32"
3667
+ },
3668
+ "model_0.crosstransformer.layers_t.3.gamma_1.scale": {
3669
+ "shape": [
3670
+ 512
3671
+ ],
3672
+ "dtype": "mlx.core.float32"
3673
+ },
3674
+ "model_0.crosstransformer.layers_t.3.gamma_2.scale": {
3675
+ "shape": [
3676
+ 512
3677
+ ],
3678
+ "dtype": "mlx.core.float32"
3679
+ },
3680
+ "model_0.crosstransformer.layers_t.4.attn.query_proj.weight": {
3681
+ "shape": [
3682
+ 512,
3683
+ 512
3684
+ ],
3685
+ "dtype": "mlx.core.float32"
3686
+ },
3687
+ "model_0.crosstransformer.layers_t.4.attn.query_proj.bias": {
3688
+ "shape": [
3689
+ 512
3690
+ ],
3691
+ "dtype": "mlx.core.float32"
3692
+ },
3693
+ "model_0.crosstransformer.layers_t.4.attn.key_proj.weight": {
3694
+ "shape": [
3695
+ 512,
3696
+ 512
3697
+ ],
3698
+ "dtype": "mlx.core.float32"
3699
+ },
3700
+ "model_0.crosstransformer.layers_t.4.attn.key_proj.bias": {
3701
+ "shape": [
3702
+ 512
3703
+ ],
3704
+ "dtype": "mlx.core.float32"
3705
+ },
3706
+ "model_0.crosstransformer.layers_t.4.attn.value_proj.weight": {
3707
+ "shape": [
3708
+ 512,
3709
+ 512
3710
+ ],
3711
+ "dtype": "mlx.core.float32"
3712
+ },
3713
+ "model_0.crosstransformer.layers_t.4.attn.value_proj.bias": {
3714
+ "shape": [
3715
+ 512
3716
+ ],
3717
+ "dtype": "mlx.core.float32"
3718
+ },
3719
+ "model_0.crosstransformer.layers_t.4.attn.out_proj.weight": {
3720
+ "shape": [
3721
+ 512,
3722
+ 512
3723
+ ],
3724
+ "dtype": "mlx.core.float32"
3725
+ },
3726
+ "model_0.crosstransformer.layers_t.4.attn.out_proj.bias": {
3727
+ "shape": [
3728
+ 512
3729
+ ],
3730
+ "dtype": "mlx.core.float32"
3731
+ },
3732
+ "model_0.crosstransformer.layers_t.4.linear1.weight": {
3733
+ "shape": [
3734
+ 2048,
3735
+ 512
3736
+ ],
3737
+ "dtype": "mlx.core.float32"
3738
+ },
3739
+ "model_0.crosstransformer.layers_t.4.linear1.bias": {
3740
+ "shape": [
3741
+ 2048
3742
+ ],
3743
+ "dtype": "mlx.core.float32"
3744
+ },
3745
+ "model_0.crosstransformer.layers_t.4.linear2.weight": {
3746
+ "shape": [
3747
+ 512,
3748
+ 2048
3749
+ ],
3750
+ "dtype": "mlx.core.float32"
3751
+ },
3752
+ "model_0.crosstransformer.layers_t.4.linear2.bias": {
3753
+ "shape": [
3754
+ 512
3755
+ ],
3756
+ "dtype": "mlx.core.float32"
3757
+ },
3758
+ "model_0.crosstransformer.layers_t.4.norm1.weight": {
3759
+ "shape": [
3760
+ 512
3761
+ ],
3762
+ "dtype": "mlx.core.float32"
3763
+ },
3764
+ "model_0.crosstransformer.layers_t.4.norm1.bias": {
3765
+ "shape": [
3766
+ 512
3767
+ ],
3768
+ "dtype": "mlx.core.float32"
3769
+ },
3770
+ "model_0.crosstransformer.layers_t.4.norm2.weight": {
3771
+ "shape": [
3772
+ 512
3773
+ ],
3774
+ "dtype": "mlx.core.float32"
3775
+ },
3776
+ "model_0.crosstransformer.layers_t.4.norm2.bias": {
3777
+ "shape": [
3778
+ 512
3779
+ ],
3780
+ "dtype": "mlx.core.float32"
3781
+ },
3782
+ "model_0.crosstransformer.layers_t.4.norm_out.gn.bias": {
3783
+ "shape": [
3784
+ 512
3785
+ ],
3786
+ "dtype": "mlx.core.float32"
3787
+ },
3788
+ "model_0.crosstransformer.layers_t.4.norm_out.gn.weight": {
3789
+ "shape": [
3790
+ 512
3791
+ ],
3792
+ "dtype": "mlx.core.float32"
3793
+ },
3794
+ "model_0.crosstransformer.layers_t.4.gamma_1.scale": {
3795
+ "shape": [
3796
+ 512
3797
+ ],
3798
+ "dtype": "mlx.core.float32"
3799
+ },
3800
+ "model_0.crosstransformer.layers_t.4.gamma_2.scale": {
3801
+ "shape": [
3802
+ 512
3803
+ ],
3804
+ "dtype": "mlx.core.float32"
3805
+ }
3806
+ }
3807
+ }