alexwengg commited on
Commit
9dd220c
·
verified ·
1 Parent(s): f8c382d

Upload 39 files

Browse files
Files changed (39) hide show
  1. final_scripts/export_decoder_joint.py +188 -0
  2. final_scripts/export_encoder.py +149 -0
  3. final_scripts/export_preprocessor.py +95 -0
  4. final_scripts/inference_benchmark.py +847 -0
  5. final_scripts/inference_reference_nemo.py +238 -0
  6. parakeet_decoder.mlmodelc/analytics/coremldata.bin +3 -0
  7. parakeet_decoder.mlmodelc/coremldata.bin +3 -0
  8. parakeet_decoder.mlmodelc/metadata.json +116 -0
  9. parakeet_decoder.mlmodelc/model.mil +47 -0
  10. parakeet_decoder.mlmodelc/weights/weight.bin +3 -0
  11. parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  12. parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  13. parakeet_decoder.mlpackage/Manifest.json +18 -0
  14. parakeet_joint.mlmodelc/analytics/coremldata.bin +3 -0
  15. parakeet_joint.mlmodelc/coremldata.bin +3 -0
  16. parakeet_joint.mlmodelc/metadata.json +74 -0
  17. parakeet_joint.mlmodelc/model.mil +23 -0
  18. parakeet_joint.mlmodelc/weights/weight.bin +3 -0
  19. parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  20. parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  21. parakeet_joint.mlpackage/Manifest.json +18 -0
  22. preprocessor.mlmodelc/analytics/coremldata.bin +3 -0
  23. preprocessor.mlmodelc/coremldata.bin +3 -0
  24. preprocessor.mlmodelc/metadata.json +103 -0
  25. preprocessor.mlmodelc/model.mil +104 -0
  26. preprocessor.mlmodelc/weights/weight.bin +3 -0
  27. preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  28. preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  29. preprocessor.mlpackage/Manifest.json +18 -0
  30. streaming_encoder.mlmodelc/analytics/coremldata.bin +3 -0
  31. streaming_encoder.mlmodelc/coremldata.bin +3 -0
  32. streaming_encoder.mlmodelc/metadata.json +167 -0
  33. streaming_encoder.mlmodelc/model.mil +0 -0
  34. streaming_encoder.mlmodelc/weights/weight.bin +3 -0
  35. streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  36. streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  37. streaming_encoder.mlpackage/Manifest.json +18 -0
  38. tokenizer.model +3 -0
  39. vocab.json +1028 -1028
final_scripts/export_decoder_joint.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import nemo.collections.asr as nemo_asr
4
+ import coremltools as ct
5
+ import numpy as np
6
+
7
+ class DecoderWrapper(nn.Module):
8
+ def __init__(self, decoder, hidden_size):
9
+ super().__init__()
10
+ self.decoder = decoder
11
+ self.hidden_size = hidden_size
12
+
13
+ def forward(self, targets, target_length, h_in, c_in):
14
+ # targets: [Batch, 1]
15
+ # target_length: [Batch]
16
+ # h_in, c_in: [Batch, Layers, Hidden] -> NeMo expects [Layers, Batch, Hidden]
17
+
18
+ # Transpose state for NeMo: [B, L, H] -> [L, B, H]
19
+ h_n = h_in.permute(1, 0, 2).contiguous()
20
+ c_n = c_in.permute(1, 0, 2).contiguous()
21
+ state = (h_n, c_n)
22
+
23
+ # Run decoder
24
+ # predict(y, state, add_sos=False, batch_size=None)
25
+ # y is [B, U] -> [1, 1]
26
+ dec_out, new_state = self.decoder.predict(targets, state, add_sos=False)
27
+
28
+ # dec_out: [B, U, H] -> [1, 1, 640]
29
+
30
+ # Transpose state back: [L, B, H] -> [B, L, H]
31
+ h_out = new_state[0].permute(1, 0, 2)
32
+ c_out = new_state[1].permute(1, 0, 2)
33
+
34
+ return dec_out, h_out, c_out
35
+
36
+ class JointWrapper(nn.Module):
37
+ def __init__(self, joint):
38
+ super().__init__()
39
+ self.joint = joint
40
+
41
+ def forward(self, encoder_output, decoder_output):
42
+ # encoder_output: [B, D, T] -> [1, 512, 1]
43
+ # decoder_output: [B, U, H] -> [1, 640, 1] (Wait, decoder output is usually [B, U, H])
44
+
45
+ # NeMo Joint expects:
46
+ # encoder_outputs: [B, D, T]
47
+ # decoder_outputs: [B, U, H]
48
+
49
+ # But wait, NeMo Joint usually projects them first?
50
+ # self.joint.joint_net(enc, dec)
51
+
52
+ # Let's check if we need to project.
53
+ # self.joint.project_encoder(encoder_output)
54
+ # self.joint.project_decoder(decoder_output)
55
+
56
+ # If inputs are RAW, we need to project.
57
+ # If inputs are already projected, we just sum and act.
58
+
59
+ # In this wrapper, we assume inputs are RAW (from Encoder and Decoder).
60
+
61
+ # Project Encoder
62
+ # encoder_output: [B, D, T] -> [B, T, D] for Linear?
63
+ # NeMo project_encoder handles transpose if needed?
64
+ # Usually project_encoder expects [B, D, T] and returns [B, T, D_joint] or similar.
65
+
66
+ # Let's use the high-level method if possible.
67
+ # res = self.joint(encoder_outputs=encoder_output, decoder_outputs=decoder_output)
68
+ # This returns LOGITS [B, T, U, V+1]
69
+
70
+ # Manually call projection and joint net to avoid length checks
71
+ # 1. Project Encoder
72
+ # encoder_output: [B, D, T] -> [B, T, D]
73
+ enc_in = encoder_output.transpose(1, 2)
74
+ f = self.joint.project_encoder(enc_in)
75
+
76
+ # 2. Project Decoder
77
+ # decoder_output: [B, U, H] -> [B, U, H] (Already correct? Check shape)
78
+ # If decoder_output is [1, 1, 640], it's fine.
79
+ g = self.joint.project_prednet(decoder_output)
80
+
81
+ # 3. Combine (Broadcasting)
82
+ # f: [B, T, D] -> [B, T, 1, D]
83
+ # g: [B, U, D] -> [B, 1, U, D]
84
+ # res: [B, T, U, D]
85
+ res = f.unsqueeze(2) + g.unsqueeze(1)
86
+
87
+ # 4. Joint Net (ReLU + Linear)
88
+ logits = self.joint.joint_net(res)
89
+
90
+ # logits: [1, 1, 1, Vocab]
91
+
92
+ return logits
93
+
94
+ def export_rnnt_decoder_joint(model_id="nvidia/parakeet_realtime_eou_120m-v1"):
95
+ print(f"Loading model: {model_id}")
96
+ asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
97
+ asr_model.eval()
98
+
99
+ decoder = asr_model.decoder
100
+ joint = asr_model.joint
101
+
102
+ hidden_size = decoder.pred_hidden # 640
103
+ vocab_size = decoder.vocab_size # 1024
104
+
105
+ print(f"Decoder Hidden Size: {hidden_size}")
106
+ print(f"Vocab Size: {vocab_size}")
107
+
108
+ # --- Export Decoder ---
109
+ print("Exporting Decoder...")
110
+ decoder_wrapper = DecoderWrapper(decoder, hidden_size)
111
+ decoder_wrapper.eval()
112
+
113
+ # Inputs
114
+ # targets: [1, 1]
115
+ # state: [1, 1, 640] (assuming 1 layer?)
116
+ # Check num layers
117
+ num_layers = decoder.pred_rnn_layers
118
+ print(f"Decoder Layers: {num_layers}")
119
+
120
+ example_targets = torch.zeros((1, 1), dtype=torch.int32)
121
+ example_length = torch.tensor([1], dtype=torch.int32)
122
+ example_h = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
123
+ example_c = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
124
+
125
+ traced_decoder = torch.jit.trace(decoder_wrapper, (example_targets, example_length, example_h, example_c))
126
+
127
+ decoder_mlmodel = ct.convert(
128
+ traced_decoder,
129
+ inputs=[
130
+ ct.TensorType(name="targets", shape=(1, 1), dtype=np.int32),
131
+ ct.TensorType(name="target_length", shape=(1,), dtype=np.int32),
132
+ ct.TensorType(name="h_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
133
+ ct.TensorType(name="c_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
134
+ ],
135
+ outputs=[
136
+ ct.TensorType(name="decoder_output"),
137
+ ct.TensorType(name="h_out"),
138
+ ct.TensorType(name="c_out"),
139
+ ],
140
+ minimum_deployment_target=ct.target.iOS17,
141
+ compute_units=ct.ComputeUnit.CPU_ONLY,
142
+ )
143
+ decoder_mlmodel.save("parakeet_decoder.mlpackage")
144
+ print("Saved parakeet_decoder.mlpackage")
145
+
146
+ # --- Export Joint ---
147
+ print("Exporting Joint...")
148
+ joint_wrapper = JointWrapper(joint)
149
+ joint_wrapper.eval()
150
+
151
+ # Inputs
152
+ # encoder: [1, 512, 1]
153
+ # decoder: [1, 640, 1] (Wait, decoder output from wrapper is [1, 1, 640]?)
154
+ # Let's check DecoderWrapper output shape.
155
+ # dec_out: [B, U, H] -> [1, 1, 640].
156
+
157
+ # NeMo Joint expects [B, D, T] and [B, U, H].
158
+ # So encoder should be [1, 512, 1].
159
+ # Decoder should be [1, 1, 640].
160
+
161
+ example_enc = torch.randn(1, 512, 1)
162
+ example_dec = torch.randn(1, 1, 640) # Note: Time/U dim is 2nd for decoder?
163
+
164
+ # Verify Joint forward
165
+ with torch.no_grad():
166
+ out = joint_wrapper(example_enc, example_dec)
167
+ print(f"Joint Output Shape: {out.shape}")
168
+
169
+ traced_joint = torch.jit.trace(joint_wrapper, (example_enc, example_dec))
170
+
171
+ joint_mlmodel = ct.convert(
172
+ traced_joint,
173
+ inputs=[
174
+ ct.TensorType(name="encoder_output", shape=(1, 512, 1), dtype=np.float32),
175
+ ct.TensorType(name="decoder_output", shape=(1, 1, 640), dtype=np.float32),
176
+ ],
177
+ outputs=[
178
+ ct.TensorType(name="logits"),
179
+ ],
180
+ minimum_deployment_target=ct.target.iOS17,
181
+ compute_units=ct.ComputeUnit.CPU_ONLY,
182
+ compute_precision=ct.precision.FLOAT32,
183
+ )
184
+ joint_mlmodel.save("parakeet_joint.mlpackage")
185
+ print("Saved parakeet_joint.mlpackage")
186
+
187
+ if __name__ == "__main__":
188
+ export_rnnt_decoder_joint()
final_scripts/export_encoder.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import torch.nn as nn
4
+ import nemo.collections.asr as nemo_asr
5
+ import coremltools as ct
6
+ import numpy as np
7
+ from typing import Tuple
8
+
9
+ class StreamingEncoderWrapper(nn.Module):
10
+ """Wrapper for cache-aware streaming encoder."""
11
+
12
+ def __init__(self, encoder: nn.Module, keep_all_outputs: bool = True):
13
+ super().__init__()
14
+ self.encoder = encoder
15
+ self.keep_all_outputs = keep_all_outputs
16
+
17
+ if encoder.streaming_cfg is None:
18
+ encoder.setup_streaming_params()
19
+ self.streaming_cfg = encoder.streaming_cfg
20
+
21
+ def forward(
22
+ self,
23
+ mel: torch.Tensor,
24
+ mel_length: torch.Tensor,
25
+ cache_last_channel: torch.Tensor,
26
+ cache_last_time: torch.Tensor,
27
+ cache_last_channel_len: torch.Tensor,
28
+ ) -> Tuple[torch.Tensor, ...]:
29
+
30
+ # Call encoder with cache
31
+ outputs = self.encoder.cache_aware_stream_step(
32
+ processed_signal=mel,
33
+ processed_signal_length=mel_length,
34
+ cache_last_channel=cache_last_channel,
35
+ cache_last_time=cache_last_time,
36
+ cache_last_channel_len=cache_last_channel_len,
37
+ )
38
+
39
+ # Handle cache updates (ring buffer)
40
+ # NeMo returns only the updated part of the cache
41
+ # We need to concatenate it with the previous cache (shifted)
42
+
43
+ # 1. cache_last_channel: [layers, 1, T, D] -> dim 2
44
+ new_channel_cache = outputs[2]
45
+ update_len = new_channel_cache.size(2)
46
+ if update_len < cache_last_channel.size(2):
47
+ # Shift and append
48
+ full_channel_cache = torch.cat([
49
+ cache_last_channel[:, :, update_len:, :],
50
+ new_channel_cache
51
+ ], dim=2)
52
+ else:
53
+ full_channel_cache = new_channel_cache
54
+
55
+ # 2. cache_last_time: [layers, 1, D, T] -> dim 3
56
+ new_time_cache = outputs[3]
57
+ update_len_time = new_time_cache.size(3)
58
+ if update_len_time < cache_last_time.size(3):
59
+ # Shift and append
60
+ full_time_cache = torch.cat([
61
+ cache_last_time[:, :, :, update_len_time:],
62
+ new_time_cache
63
+ ], dim=3)
64
+ else:
65
+ full_time_cache = new_time_cache
66
+
67
+ # Construct new outputs tuple
68
+ # (encoder, encoder_len, full_channel_cache, full_time_cache, cache_len)
69
+ return (outputs[0], outputs[1], full_channel_cache, full_time_cache, outputs[4])
70
+
71
+ def export_streaming_encoder(model_id="nvidia/parakeet_realtime_eou_120m-v1", output_path="streaming_encoder.mlpackage", frames=16, shift=None, streaming_chunk_size=None):
72
+ print(f"Loading model: {model_id}")
73
+ asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
74
+ asr_model.eval()
75
+
76
+ encoder = asr_model.encoder
77
+
78
+ # Configure streaming params
79
+ # If streaming_chunk_size is provided, use it. Otherwise use frames.
80
+ c_size = streaming_chunk_size if streaming_chunk_size is not None else frames
81
+ s_size = shift if shift is not None else c_size
82
+
83
+ print(f"Setting up streaming params: chunk_size={c_size}, shift_size={s_size}")
84
+ encoder.setup_streaming_params(chunk_size=c_size, shift_size=s_size)
85
+
86
+ wrapper = StreamingEncoderWrapper(encoder)
87
+ wrapper.eval()
88
+
89
+ # Define input shapes
90
+ # 16 frames = 160ms
91
+ print(f"Exporting for chunk size: {frames} frames ({frames*10}ms)")
92
+ if shift:
93
+ print(f"Shift size: {shift} frames ({shift*10}ms)")
94
+
95
+ mel_dim = 128 # Parakeet uses 128 mel features, not 80
96
+
97
+ # Cache shapes: number of layers = 17 (FastConformer architecture)
98
+ num_layers = 17
99
+
100
+ example_input = (
101
+ torch.randn(1, mel_dim, frames),
102
+ torch.tensor([frames], dtype=torch.int32),
103
+ torch.randn(num_layers, 1, 70, 512), # cache_last_channel
104
+ torch.randn(num_layers, 1, 512, 8), # cache_last_time
105
+ torch.tensor([0], dtype=torch.int32) # cache_last_channel_len
106
+ )
107
+
108
+ print("Tracing model...")
109
+ traced_model = torch.jit.trace(wrapper, example_input, strict=False)
110
+
111
+ print("Converting to CoreML...")
112
+ inputs = [
113
+ ct.TensorType(name="mel", shape=(1, mel_dim, frames), dtype=np.float32),
114
+ ct.TensorType(name="mel_length", shape=(1,), dtype=np.int32),
115
+ ct.TensorType(name="cache_last_channel", shape=(num_layers, 1, 70, 512), dtype=np.float32),
116
+ ct.TensorType(name="cache_last_time", shape=(num_layers, 1, 512, 8), dtype=np.float32),
117
+ ct.TensorType(name="cache_last_channel_len", shape=(1,), dtype=np.int32),
118
+ ]
119
+
120
+ outputs = [
121
+ ct.TensorType(name="encoder", dtype=np.float32),
122
+ ct.TensorType(name="encoder_length", dtype=np.int32),
123
+ ct.TensorType(name="cache_last_channel_out", dtype=np.float32),
124
+ ct.TensorType(name="cache_last_time_out", dtype=np.float32),
125
+ ct.TensorType(name="cache_last_channel_len_out", dtype=np.int32),
126
+ ]
127
+
128
+ mlmodel = ct.convert(
129
+ traced_model,
130
+ inputs=inputs,
131
+ outputs=outputs,
132
+ minimum_deployment_target=ct.target.iOS17,
133
+ compute_units=ct.ComputeUnit.ALL,
134
+ )
135
+
136
+ print(f"Saving to {output_path}")
137
+ mlmodel.save(output_path)
138
+ print("Done!")
139
+
140
+ if __name__ == "__main__":
141
+ import argparse
142
+ parser = argparse.ArgumentParser()
143
+ parser.add_argument("--frames", type=int, default=16, help="Number of frames per chunk (10ms per frame)")
144
+ parser.add_argument("--shift", type=int, default=None, help="Shift size in frames (default: same as frames)")
145
+ parser.add_argument("--model-chunk-size", type=int, default=None, help="Chunk size for model setup (output steps). If None, uses frames.")
146
+ parser.add_argument("--output", type=str, default="streaming_encoder.mlpackage", help="Output path")
147
+ args = parser.parse_args()
148
+
149
+ export_streaming_encoder(frames=args.frames, shift=args.shift, streaming_chunk_size=args.model_chunk_size, output_path=args.output)
final_scripts/export_preprocessor.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import nemo.collections.asr as nemo_asr
4
+ import coremltools as ct
5
+ import numpy as np
6
+ import argparse
7
+
8
+ class PreprocessorWrapper(nn.Module):
9
+ """Wrapper for audio preprocessor."""
10
+
11
+ def __init__(self, preprocessor: nn.Module):
12
+ super().__init__()
13
+ self.preprocessor = preprocessor
14
+
15
+ def forward(
16
+ self,
17
+ input_signal: torch.Tensor,
18
+ length: torch.Tensor,
19
+ ):
20
+ # Call preprocessor
21
+ processed_signal, processed_signal_length = self.preprocessor(
22
+ input_signal=input_signal,
23
+ length=length
24
+ )
25
+ return processed_signal, processed_signal_length
26
+
27
+ def export_preprocessor(
28
+ model_id="nvidia/parakeet_realtime_eou_120m-v1",
29
+ output_path="preprocessor.mlpackage",
30
+ chunk_ms=160
31
+ ):
32
+ print(f"Loading model: {model_id}")
33
+ asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
34
+ asr_model.eval()
35
+
36
+ preprocessor = asr_model.preprocessor
37
+ # Disable dither and padding for consistent inference
38
+ if hasattr(preprocessor, 'dither'):
39
+ preprocessor.dither = 0.0
40
+ if hasattr(preprocessor, 'pad_to'):
41
+ preprocessor.pad_to = 0
42
+
43
+ wrapper = PreprocessorWrapper(preprocessor)
44
+ wrapper.eval()
45
+
46
+ # Calculate audio samples for chunk
47
+ # 160ms at 16kHz = 2560 samples
48
+ chunk_samples = int(chunk_ms / 1000 * 16000)
49
+
50
+ print(f"Chunk: {chunk_ms}ms = {chunk_samples} samples")
51
+
52
+ # Create example input
53
+ example_input = (
54
+ torch.randn(1, chunk_samples),
55
+ torch.tensor([chunk_samples], dtype=torch.int64),
56
+ )
57
+
58
+ print("Tracing model...")
59
+ traced_model = torch.jit.trace(wrapper, example_input, strict=False)
60
+
61
+ print("Converting to CoreML...")
62
+ # Use RangeDim for variable-length audio input
63
+ inputs = [
64
+ ct.TensorType(
65
+ name="input_signal",
66
+ shape=ct.Shape(shape=(1, ct.RangeDim(lower_bound=1600, upper_bound=16000, default=chunk_samples))),
67
+ dtype=np.float32
68
+ ),
69
+ ct.TensorType(name="length", shape=(1,), dtype=np.int32),
70
+ ]
71
+
72
+ outputs = [
73
+ ct.TensorType(name="mel", dtype=np.float32),
74
+ ct.TensorType(name="mel_length", dtype=np.int32),
75
+ ]
76
+
77
+ mlmodel = ct.convert(
78
+ traced_model,
79
+ inputs=inputs,
80
+ outputs=outputs,
81
+ compute_units=ct.ComputeUnit.CPU_ONLY,
82
+ minimum_deployment_target=ct.target.iOS17,
83
+ )
84
+
85
+ print(f"Saving to {output_path}")
86
+ mlmodel.save(output_path)
87
+ print("Done!")
88
+
89
+ if __name__ == "__main__":
90
+ parser = argparse.ArgumentParser()
91
+ parser.add_argument("--chunk-ms", type=int, default=160, help="Chunk size in milliseconds")
92
+ parser.add_argument("--output-path", type=str, default="preprocessor.mlpackage", help="Output path")
93
+ args = parser.parse_args()
94
+
95
+ export_preprocessor(chunk_ms=args.chunk_ms, output_path=args.output_path)
final_scripts/inference_benchmark.py ADDED
@@ -0,0 +1,847 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import torch
3
+ import torchaudio
4
+ import coremltools as ct
5
+ import numpy as np
6
+ import nemo.collections.asr as nemo_asr
7
+ from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
8
+ from pathlib import Path
9
+ import jiwer
10
+ import time
11
+
12
+ def load_manifest(dataset_path, subset='test-clean', max_files=None):
13
+ subset_dir = Path(dataset_path) / subset
14
+ if not subset_dir.exists():
15
+ raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
16
+
17
+ flac_files = list(subset_dir.rglob('*.flac'))
18
+ if not flac_files:
19
+ raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
20
+
21
+ # Sort for determinism
22
+ flac_files = sorted(flac_files)
23
+
24
+ entries = []
25
+ for flac_path in flac_files:
26
+ if max_files and len(entries) >= max_files:
27
+ break
28
+
29
+ speaker_id = flac_path.parent.parent.name
30
+ chapter_id = flac_path.parent.name
31
+ trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
32
+
33
+ if trans_file.exists():
34
+ utterance_id = flac_path.stem
35
+ with open(trans_file, 'r') as f:
36
+ for line in f:
37
+ parts = line.strip().split(' ', 1)
38
+ if len(parts) == 2 and parts[0] == utterance_id:
39
+ entries.append({
40
+ 'audio_filepath': str(flac_path),
41
+ 'text': parts[1],
42
+ 'duration': 0
43
+ })
44
+ break
45
+ print(f"Loaded {len(entries)} entries from {subset_dir}")
46
+ return entries
47
+
48
+ def run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, audio_path, coreml_preprocessor=None):
49
+ # 1. Load Audio
50
+ try:
51
+ audio, sr = torchaudio.load(audio_path)
52
+ if sr != 16000:
53
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
54
+ audio = resampler(audio)
55
+ if audio.shape[0] > 1:
56
+ audio = audio.mean(dim=0, keepdim=True)
57
+
58
+ audio_tensor = audio
59
+ audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
60
+ except Exception as e:
61
+ print(f"Error loading {audio_path}: {e}")
62
+ return {'hypothesis': "", 'audio_length': 0}
63
+
64
+ # 2. Setup Streaming Params & Buffer
65
+ # Use chunk_size=4 to match PyTorch success (approx 320ms compute, 410ms input)
66
+ pytorch_model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
67
+
68
+ streaming_buffer = CacheAwareStreamingAudioBuffer(
69
+ model=pytorch_model,
70
+ online_normalization=False,
71
+ pad_and_drop_preencoded=False
72
+ )
73
+ streaming_buffer.append_audio_file(audio_path, stream_id=-1)
74
+
75
+ # 3. CoreML True Streaming Loop
76
+ # Initialize CoreML Cache (Encoder)
77
+ num_layers = 17
78
+ cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
79
+ cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
80
+ cache_last_channel_len = np.zeros((1,), dtype=np.int32)
81
+
82
+ # Initialize Decoder State
83
+ h_state = np.zeros((1, 1, 640), dtype=np.float32)
84
+ c_state = np.zeros((1, 1, 640), dtype=np.float32)
85
+
86
+ blank_token = 1026 # Parakeet blank
87
+ last_token = blank_token
88
+
89
+ hypothesis_tokens = []
90
+ max_symbols_per_step = 10
91
+
92
+ fixed_chunk_frames = 41 # Matches export for chunk_size=4
93
+
94
+ for chunk_audio, chunk_len in streaming_buffer:
95
+ # --- Encoder Step ---
96
+ # chunk_audio: [1, 128, T]
97
+ T_curr = chunk_audio.shape[2]
98
+
99
+ if T_curr < fixed_chunk_frames:
100
+ pad_amt = fixed_chunk_frames - T_curr
101
+ padding = torch.full((1, 128, pad_amt), -16.0)
102
+ chunk_audio = torch.cat([chunk_audio, padding], dim=2)
103
+ elif T_curr > fixed_chunk_frames:
104
+ chunk_audio = chunk_audio[:, :, :fixed_chunk_frames]
105
+
106
+ chunk_mel_input = chunk_audio.numpy()
107
+ mel_len_input = np.array([fixed_chunk_frames], dtype=np.int32)
108
+
109
+ inputs = {
110
+ "mel": chunk_mel_input,
111
+ "mel_length": mel_len_input,
112
+ "cache_last_channel": cache_last_channel,
113
+ "cache_last_time": cache_last_time,
114
+ "cache_last_channel_len": cache_last_channel_len
115
+ }
116
+
117
+ outputs = coreml_encoder.predict(inputs)
118
+
119
+ cache_last_channel = outputs["cache_last_channel_out"]
120
+ cache_last_time = outputs["cache_last_time_out"]
121
+ cache_last_channel_len = outputs["cache_last_channel_len_out"]
122
+
123
+ enc_out = outputs["encoder"] # [1, 512, 4]
124
+
125
+ # --- Decoder Step (Immediate) ---
126
+ T_enc = enc_out.shape[2]
127
+
128
+ for t in range(T_enc):
129
+ enc_t = enc_out[:, :, t:t+1] # [1, 512, 1]
130
+
131
+ # Initialize Decoder Output (Cache)
132
+ targets = np.array([[last_token]], dtype=np.int32)
133
+ target_length = np.array([1], dtype=np.int32)
134
+
135
+ dec_inputs = {
136
+ "targets": targets,
137
+ "target_length": target_length,
138
+ "h_in": h_state,
139
+ "c_in": c_state
140
+ }
141
+
142
+ dec_outputs = coreml_decoder.predict(dec_inputs)
143
+ decoder_step = dec_outputs["decoder_output"]
144
+ h_state_next = dec_outputs["h_out"]
145
+ c_state_next = dec_outputs["c_out"]
146
+
147
+ symbols_added = 0
148
+ while symbols_added < max_symbols_per_step:
149
+ joint_inputs = {
150
+ "encoder_output": enc_t,
151
+ "decoder_output": decoder_step
152
+ }
153
+
154
+ joint_outputs = coreml_joint.predict(joint_inputs)
155
+
156
+ logits = joint_outputs["logits"]
157
+ token_id = int(np.argmax(logits))
158
+
159
+ if token_id == blank_token:
160
+ break
161
+
162
+ # EOU Check (1024)
163
+ if token_id == 1024:
164
+ # Reset State
165
+ h_state = np.zeros((1, 1, 640), dtype=np.float32)
166
+ c_state = np.zeros((1, 1, 640), dtype=np.float32)
167
+ last_token = blank_token
168
+ break
169
+
170
+ else:
171
+ hypothesis_tokens.append(token_id)
172
+ last_token = token_id
173
+ symbols_added += 1
174
+
175
+ h_state = h_state_next
176
+ c_state = c_state_next
177
+
178
+ targets = np.array([[last_token]], dtype=np.int32)
179
+ dec_inputs = {
180
+ "targets": targets,
181
+ "target_length": target_length,
182
+ "h_in": h_state,
183
+ "c_in": c_state
184
+ }
185
+ dec_outputs = coreml_decoder.predict(dec_inputs)
186
+ decoder_step = dec_outputs["decoder_output"]
187
+ h_state_next = dec_outputs["h_out"]
188
+ c_state_next = dec_outputs["c_out"]
189
+
190
+ # Decode tokens
191
+ vocab_size = pytorch_model.tokenizer.vocab_size
192
+ valid_tokens = [t for t in hypothesis_tokens if t < vocab_size]
193
+
194
+ if len(valid_tokens) != len(hypothesis_tokens):
195
+ print(f"Filtered {len(hypothesis_tokens) - len(valid_tokens)} invalid tokens (>= {vocab_size})")
196
+
197
+ if not valid_tokens:
198
+ return {
199
+ 'hypothesis': "",
200
+ 'audio_length': audio.shape[1] / 16000
201
+ }
202
+
203
+ hypothesis = pytorch_model.decoding.decode_tokens_to_str([valid_tokens])[0]
204
+ hypothesis = hypothesis.replace("<EOU>", "").strip()
205
+
206
+ return {
207
+ 'hypothesis': hypothesis,
208
+ 'audio_length': audio.shape[1] / 16000
209
+ }
210
+
211
+ def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
212
+ # 1. Load Audio
213
+ try:
214
+ audio, sr = torchaudio.load(audio_path)
215
+ if sr != 16000:
216
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
217
+ audio = resampler(audio)
218
+ if audio.shape[0] > 1:
219
+ audio = audio.mean(dim=0, keepdim=True)
220
+
221
+ audio_tensor = audio
222
+ audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
223
+ except Exception as e:
224
+ print(f"Error loading {audio_path}: {e}")
225
+ return {'hypothesis': "", 'audio_length': 0}
226
+
227
+ # 2. Preprocessor
228
+ with torch.no_grad():
229
+ processed_signal, processed_signal_len = pytorch_model.preprocessor(
230
+ input_signal=audio_tensor, length=audio_len
231
+ )
232
+
233
+ # 3. Streaming Loop
234
+ total_frames = processed_signal.shape[2]
235
+ chunk_frames = 32 # Match CoreML
236
+
237
+ # Initialize Cache
238
+ num_layers = 17
239
+ cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
240
+ cache_last_time = torch.zeros(num_layers, 1, 512, 8)
241
+ cache_last_channel_len = torch.zeros(1, dtype=torch.long)
242
+
243
+ # Initialize Decoder State
244
+ decoder_state = None
245
+ last_token = torch.tensor([[1026]], dtype=torch.long) # Blank token
246
+
247
+ final_hyp_tokens = []
248
+
249
+ for i in range(0, total_frames, chunk_frames):
250
+ end = min(i + chunk_frames, total_frames)
251
+ chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
252
+
253
+ # Pad to chunk_frames if needed
254
+ if chunk_mel.shape[2] < chunk_frames:
255
+ pad_amt = chunk_frames - chunk_mel.shape[2]
256
+ chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
257
+
258
+ chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
259
+
260
+ with torch.no_grad():
261
+ # 1. Encoder Step
262
+ (
263
+ enc_out,
264
+ enc_len,
265
+ cache_last_channel,
266
+ cache_last_time,
267
+ cache_last_channel_len
268
+ ) = pytorch_model.encoder.forward_internal(
269
+ audio_signal=chunk_mel,
270
+ length=chunk_len,
271
+ cache_last_channel=cache_last_channel,
272
+ cache_last_time=cache_last_time,
273
+ cache_last_channel_len=cache_last_channel_len
274
+ )
275
+
276
+ # enc_out: [B, D, T_out] -> [1, 512, T_out]
277
+ # Transpose to [B, T_out, D] for Joint
278
+ enc_out = enc_out.transpose(1, 2)
279
+
280
+ # 2. Greedy Decoding Loop (Symbol Loop)
281
+ # For each acoustic frame t
282
+ for t in range(enc_out.shape[1]):
283
+ f_t = enc_out[:, t:t+1, :] # [1, 1, 512]
284
+
285
+ # Project Encoder (Joint.enc)
286
+ # pytorch_model.joint.enc is the Linear layer
287
+ # Or use pytorch_model.joint(enc_out, dec_out) which does projection internally?
288
+ # Standard RNNTJoint: forward(f, g) -> res -> joint_net
289
+ # But we need to loop over symbols u.
290
+
291
+ # Pre-project encoder for this frame
292
+ f_t_proj = pytorch_model.joint.enc(f_t) # [1, 1, 640]
293
+
294
+ # Limit max symbols per frame (e.g. 10) to prevent infinite loops
295
+ max_symbols = 10
296
+ symbols_added = 0
297
+
298
+ while symbols_added < max_symbols:
299
+ # Decoder Step
300
+ # decoder.forward(targets, lengths, states)
301
+ # targets: [B, 1] (last token)
302
+ g, _, decoder_state = pytorch_model.decoder.forward(
303
+ targets=last_token,
304
+ target_length=torch.tensor([1]),
305
+ states=decoder_state
306
+ )
307
+
308
+ # g: [B, 640, U+1?] -> [1, 640, 2]
309
+ # We want the last step output
310
+ g = g[:, :, -1:] # [1, 640, 1]
311
+ g = g.transpose(1, 2) # [1, 1, 640]
312
+
313
+ # Project Decoder (Joint.pred)
314
+ g_proj = pytorch_model.joint.pred(g) # [1, 1, 640]
315
+
316
+ # Joint
317
+ # joint_net(f + g)
318
+ # Note: f_t_proj and g_proj are [1, 1, 640]
319
+ # We broadcast? They are same shape here.
320
+ out = pytorch_model.joint.joint_net(f_t_proj + g_proj) # [1, 1, 1027]
321
+
322
+ # Argmax
323
+ k = out.argmax(dim=-1) # [1, 1]
324
+ pred_token = k.item()
325
+
326
+ if pred_token == 1026: # Blank
327
+ break
328
+ else:
329
+ final_hyp_tokens.append(pred_token)
330
+ last_token = k # Update last token
331
+ # decoder_state is already updated by forward()
332
+ # But wait! If we predict a symbol, we advance decoder state.
333
+ # If we predict blank, we DO NOT advance decoder state?
334
+ # In standard RNNT:
335
+ # If blank: advance t (next acoustic frame), keep u (decoder state).
336
+ # If symbol: advance u (update decoder state), keep t (same acoustic frame).
337
+
338
+ # My decoder.forward call UPDATED the state.
339
+ # If I predict blank, I should DISCARD the new state?
340
+ # YES!
341
+ # But wait, decoder.forward takes the *previous* token/state and produces the *current* embedding/state.
342
+ # The state returned is the state AFTER processing `last_token`.
343
+ # This state is what we use to predict the NEXT token.
344
+ # So if we predict a symbol, we KEEP this state and use it for the next step.
345
+ # If we predict blank, we KEEP the *previous* state (before this forward)?
346
+ # No, the state corresponds to the *current* position `u`.
347
+ # The `g` vector corresponds to `h_u`.
348
+ # `f_t` corresponds to `h_t`.
349
+ # `Joint(h_t, h_u)` produces prob of `y_{u+1}` or `blank`.
350
+
351
+ # If `blank`: we move to `t+1`. We stay at `u`. State `h_u` is unchanged.
352
+ # If `symbol`: we move to `u+1`. We update `h_u` to `h_{u+1}`. We stay at `t`.
353
+
354
+ # So:
355
+ # 1. We have `decoder_state` (corresponding to `u`).
356
+ # 2. We compute `g` from `last_token` and `decoder_state`.
357
+ # Wait, `decoder.forward` usually takes `last_token` and `previous_state` and returns `current_embedding` and `new_state`.
358
+ # So `g` is `P(u)`. `decoder_state` is `State(u)`.
359
+ # Actually, for LSTM, `forward` does one step.
360
+
361
+ # Let's verify:
362
+ # `g, _, new_state = decoder(last_token, state)`
363
+ # `out = joint(f, g)`
364
+ # If `out` -> Symbol:
365
+ # We accept `new_state` as the current state.
366
+ # We update `last_token` to Symbol.
367
+ # We loop again (same `t`).
368
+ # If `out` -> Blank:
369
+ # We discard `new_state`.
370
+ # We keep `state` (old).
371
+ # We break loop (next `t`).
372
+
373
+ # BUT, `decoder.forward` is expensive. We don't want to re-compute it if we stay at `u`.
374
+ # But we only stay at `u` if we predict Blank, which means we move to next `t`.
375
+ # For the next `t`, we need `g` (which depends on `u`).
376
+ # So we should cache `g` and `state`?
377
+ # Yes.
378
+
379
+ # Correct Logic:
380
+ # Initialize `decoder_state = None`.
381
+ # Initialize `last_token = Blank`.
382
+ # Compute `g, _, next_decoder_state = decoder(last_token, decoder_state)` ONCE.
383
+ # `g_proj = joint.pred(g)`
384
+
385
+ # Loop t:
386
+ # `f_t_proj = ...`
387
+ # Loop u:
388
+ # `logits = joint(f_t_proj + g_proj)`
389
+ # `k = argmax`
390
+ # If k == Blank:
391
+ # break (advance t)
392
+ # Else:
393
+ # Append k.
394
+ # `last_token = k`
395
+ # `decoder_state = next_decoder_state` (Accept the state transition)
396
+ # # Compute NEXT g and state
397
+ # `g, _, next_decoder_state = decoder(last_token, decoder_state)`
398
+ # `g_proj = joint.pred(g)`
399
+
400
+ # This looks correct.
401
+ # But I need to initialize `g` and `next_decoder_state` before the loop.
402
+
403
+ pass
404
+
405
+ # Refined Logic Implementation
406
+
407
+ # Initialize Decoder
408
+ # First step: Feed Blank/SOS to get initial g and state
409
+ # Note: Parakeet uses Blank (1026) as SOS? Or does it rely on zero state?
410
+ # Usually we feed SOS. Let's assume 1026 is SOS.
411
+
412
+ last_token = torch.tensor([[1026]], dtype=torch.long)
413
+ decoder_state = None
414
+
415
+ # Pre-compute initial g
416
+ g, _, next_decoder_state = pytorch_model.decoder.forward(
417
+ targets=last_token,
418
+ target_length=torch.tensor([1]),
419
+ states=decoder_state
420
+ )
421
+ # g: [1, 640, 2] -> Slice last
422
+ g = g[:, :, -1:] # [1, 640, 1]
423
+ g = g.transpose(1, 2) # [1, 1, 640]
424
+
425
+ g_proj = pytorch_model.joint.pred(g)
426
+
427
+ # Update decoder_state to next_decoder_state?
428
+ # No, `next_decoder_state` is the state AFTER processing `last_token`.
429
+ # This is the state we need for the NEXT step if we emit a token.
430
+ # Wait, `g` is the embedding used for prediction.
431
+ # So `g` and `next_decoder_state` go together.
432
+ # We hold `g` and `next_decoder_state` as "Current Decoder Output".
433
+ # If we emit a symbol, we use `next_decoder_state` as the input for the NEXT decoder step.
434
+
435
+ # Let's call the holding variables `current_g_proj` and `candidate_state`.
436
+ current_g_proj = g_proj
437
+ candidate_state = next_decoder_state
438
+
439
+ # Current state input to decoder (for next step)
440
+ # Actually, `decoder.forward` takes `states`.
441
+ # If we emit a symbol, the `states` for the NEXT call should be `candidate_state`.
442
+ # So we need to track `current_state_for_input`.
443
+ # Initially `None`.
444
+ # After first call, `candidate_state` is the state after SOS.
445
+
446
+ # Wait, if we emit a symbol, we call decoder with that symbol and `candidate_state`.
447
+ # So `candidate_state` IS the state we maintain.
448
+
449
+ # Let's verify:
450
+ # 1. Start: `last_token=SOS`, `state=None`.
451
+ # 2. `g, _, state = decoder(SOS, None)`.
452
+ # 3. `g` is used to predict first token.
453
+ # 4. `joint(f, g)`.
454
+ # 5. If `k` (symbol):
455
+ # `last_token = k`.
456
+ # `g, _, state = decoder(k, state)`.
457
+ # Loop.
458
+ # 6. If `Blank`:
459
+ # Keep `g` and `state` as is.
460
+ # Advance `f`.
461
+
462
+ # Yes, this is correct.
463
+
464
+ # So,
465
+ def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
466
+ import librosa
467
+ # Load audio
468
+ audio, sample_rate = librosa.load(audio_path, sr=16000)
469
+
470
+ # Preprocessing
471
+ processed_signal, processed_signal_length = pytorch_model.preprocessor(
472
+ input_signal=torch.tensor([audio]),
473
+ length=torch.tensor([len(audio)])
474
+ )
475
+
476
+ # Switch to Greedy Strategy (if not already)
477
+ if pytorch_model.decoding.cfg.strategy != 'greedy':
478
+ print("Switching to 'greedy' decoding strategy for streaming...")
479
+ from omegaconf import OmegaConf
480
+ from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
481
+
482
+ new_cfg = OmegaConf.create({
483
+ 'strategy': 'greedy',
484
+ 'greedy': {'max_symbols': 10},
485
+ 'preserve_alignments': True,
486
+ 'compute_timestamps': False
487
+ })
488
+
489
+ pytorch_model.decoding = RNNTBPEDecoding(
490
+ decoding_cfg=new_cfg,
491
+ decoder=pytorch_model.decoder,
492
+ joint=pytorch_model.joint,
493
+ tokenizer=pytorch_model.tokenizer
494
+ )
495
+
496
+ # Streaming Loop
497
+ total_frames = processed_signal.shape[2]
498
+ chunk_frames = 32
499
+
500
+ # Initialize Cache
501
+ num_layers = 17
502
+ cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
503
+ cache_last_time = torch.zeros(num_layers, 1, 512, 8)
504
+ cache_last_channel_len = torch.zeros(1, dtype=torch.long)
505
+
506
+ previous_hypotheses = None
507
+ previous_pred_out = None
508
+
509
+ final_hyp = ""
510
+
511
+ for i in range(0, total_frames, chunk_frames):
512
+ end = min(i + chunk_frames, total_frames)
513
+ chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
514
+
515
+ # Pad to chunk_frames if needed
516
+ if chunk_mel.shape[2] < chunk_frames:
517
+ pad_amt = chunk_frames - chunk_mel.shape[2]
518
+ chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
519
+
520
+ chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
521
+
522
+ with torch.no_grad():
523
+ # Native Streaming Step
524
+ (
525
+ greedy_predictions,
526
+ all_hyp_text,
527
+ cache_last_channel,
528
+ cache_last_time,
529
+ cache_last_channel_len,
530
+ best_hyp_list, # This is the Hypothesis list
531
+ ) = pytorch_model.conformer_stream_step(
532
+ processed_signal=chunk_mel,
533
+ processed_signal_length=chunk_len,
534
+ cache_last_channel=cache_last_channel,
535
+ cache_last_time=cache_last_time,
536
+ cache_last_channel_len=cache_last_channel_len,
537
+ previous_hypotheses=previous_hypotheses,
538
+ previous_pred_out=previous_pred_out
539
+ )
540
+
541
+ # Update previous_hypotheses for next step
542
+ previous_hypotheses = best_hyp_list
543
+
544
+ # Extract text from best_hyp
545
+ current_hyp_obj = None
546
+ if best_hyp_list:
547
+ if isinstance(best_hyp_list, list):
548
+ current_hyp_obj = best_hyp_list[0]
549
+ else:
550
+ current_hyp_obj = best_hyp_list
551
+
552
+ # Check for EOU (1024)
553
+ is_eou = False
554
+ if current_hyp_obj:
555
+ if hasattr(current_hyp_obj, 'y_sequence'):
556
+ # y_sequence might be list or tensor
557
+ y_seq = current_hyp_obj.y_sequence
558
+ if isinstance(y_seq, list):
559
+ if 1024 in y_seq:
560
+ is_eou = True
561
+ elif torch.is_tensor(y_seq):
562
+ if (y_seq == 1024).any():
563
+ is_eou = True
564
+
565
+ if is_eou:
566
+ # EOU detected
567
+ # Append current segment text to final_hyp
568
+ if current_hyp_obj and hasattr(current_hyp_obj, 'text'):
569
+ final_hyp += current_hyp_obj.text + " "
570
+
571
+ # Reset state for next segment
572
+ previous_hypotheses = None
573
+ print("DEBUG: EOU detected, resetting previous_hypotheses")
574
+ else:
575
+ # Not EOU, just update final_hyp with current segment text (temporarily)
576
+ # We can't just append, we need to store it.
577
+ # But since we return final_hyp at the end, we need to combine committed text + current segment.
578
+ pass
579
+
580
+ # End of loop
581
+ # Append any remaining text from the last segment
582
+ if previous_hypotheses:
583
+ last_hyp_list = previous_hypotheses
584
+ if isinstance(last_hyp_list, list):
585
+ last_hyp_obj = last_hyp_list[0]
586
+ else:
587
+ last_hyp_obj = last_hyp_list
588
+
589
+ if last_hyp_obj and hasattr(last_hyp_obj, 'text'):
590
+ final_hyp += last_hyp_obj.text
591
+
592
+ # Strip <eou>
593
+ final_hyp = final_hyp.replace("<eou>", "").replace("<EOU>", "").strip()
594
+
595
+ return {
596
+ 'hypothesis': final_hyp,
597
+ 'audio_length': audio.shape[0] / 16000
598
+ }
599
+
600
+ def run_pytorch_pipeline(pytorch_model, audio_path):
601
+ # Pure PyTorch inference using transcribe() (Offline)
602
+ try:
603
+ # Try positional argument for paths2audio_files
604
+ hypotheses = pytorch_model.transcribe([audio_path], batch_size=1, verbose=False)
605
+
606
+ if isinstance(hypotheses, tuple):
607
+ hypotheses = hypotheses[0]
608
+
609
+ hypothesis = hypotheses[0]
610
+
611
+ # Hypothesis object has 'text' attribute?
612
+ if hasattr(hypothesis, 'text'):
613
+ hypothesis = hypothesis.text
614
+
615
+ # Strip <eou> and <EOU>
616
+ if isinstance(hypothesis, str):
617
+ hypothesis = hypothesis.replace("<eou>", "").replace("<EOU>", "").strip()
618
+
619
+ return {
620
+ 'hypothesis': hypothesis,
621
+ 'audio_length': 0 # Placeholder
622
+ }
623
+
624
+ except Exception as e:
625
+ print(f"Error running PyTorch pipeline on {audio_path}: {e}")
626
+ return {'hypothesis': "", 'audio_length': 0}
627
+
628
+ def main():
629
+ parser = argparse.ArgumentParser()
630
+ parser.add_argument('--dataset', default='/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech', help='Path to LibriSpeech')
631
+ parser.add_argument('--subset', default='test-clean', help='Subset to test')
632
+ parser.add_argument('--max-files', type=int, default=100, help='Number of files to process')
633
+
634
+ # Default paths based on file list
635
+ parser.add_argument('--coreml-encoder', default='streaming_encoder_320ms.mlpackage')
636
+ parser.add_argument('--coreml-decoder', default='parakeet_decoder.mlpackage')
637
+ parser.add_argument('--coreml-joint', default='parakeet_joint.mlpackage')
638
+
639
+ parser.add_argument('--pytorch-model', default='nvidia/parakeet_realtime_eou_120m-v1')
640
+ parser.add_argument('--coreml-preprocessor', default='preprocessor_160ms.mlpackage')
641
+ parser.add_argument('--hybrid', action='store_true', help='Use Hybrid mode (CoreML Encoder + PyTorch Decoder)')
642
+ parser.add_argument('--pytorch-only', action='store_true', help='Use pure PyTorch model (Offline)')
643
+ parser.add_argument('--pytorch-streaming', action='store_true', help='Use pure PyTorch model (Streaming Simulation)')
644
+ args = parser.parse_args()
645
+
646
+ print(f"Loading PyTorch model: {args.pytorch_model}")
647
+ pytorch_model = nemo_asr.models.ASRModel.from_pretrained(args.pytorch_model, map_location="cpu")
648
+ pytorch_model.eval()
649
+
650
+ # Only load CoreML if not pytorch-only or pytorch-streaming
651
+ coreml_encoder = None
652
+ coreml_decoder = None
653
+ coreml_joint = None
654
+ coreml_preprocessor = None
655
+
656
+ if not args.pytorch_only and not args.pytorch_streaming:
657
+ print(f"Loading CoreML Encoder: {args.coreml_encoder}")
658
+ coreml_encoder = ct.models.MLModel(args.coreml_encoder)
659
+
660
+ if args.hybrid:
661
+ print(f"Loading CoreML Preprocessor: {args.coreml_preprocessor}")
662
+ try:
663
+ coreml_preprocessor = ct.models.MLModel(args.coreml_preprocessor)
664
+ except Exception as e:
665
+ print(f"Failed to load CoreML Preprocessor: {e}")
666
+ print("Falling back to PyTorch Preprocessor")
667
+
668
+ if not args.hybrid:
669
+ print(f"Loading CoreML Decoder: {args.coreml_decoder}")
670
+ coreml_decoder = ct.models.MLModel(args.coreml_decoder)
671
+
672
+ print(f"Loading CoreML Joint: {args.coreml_joint}")
673
+ coreml_joint = ct.models.MLModel(args.coreml_joint)
674
+ elif args.pytorch_streaming:
675
+ print("Running in PYTORCH-STREAMING mode")
676
+ else:
677
+ print("Running in PYTORCH-ONLY (Offline) mode")
678
+
679
+ entries = load_manifest(args.dataset, args.subset, args.max_files)
680
+
681
+ total_wer = 0
682
+ count = 0
683
+ start_time = time.time()
684
+
685
+ print(f"Starting Benchmark on {len(entries)} files...")
686
+
687
+ for i, entry in enumerate(entries):
688
+ try:
689
+ if args.pytorch_streaming:
690
+ result = run_pytorch_streaming_pipeline(pytorch_model, entry['audio_filepath'])
691
+ elif args.pytorch_only:
692
+ result = run_pytorch_pipeline(pytorch_model, entry['audio_filepath'])
693
+ elif args.hybrid:
694
+ result = run_hybrid_pipeline(coreml_encoder, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
695
+ else:
696
+ result = run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
697
+
698
+ ref = entry['text'].lower()
699
+ hyp = result['hypothesis'].lower()
700
+
701
+ wer = jiwer.wer(ref, hyp)
702
+ total_wer += wer
703
+ count += 1
704
+
705
+ print(f"[{i+1}/{len(entries)}] {Path(entry['audio_filepath']).name} | WER: {wer:.2%} | Ref: '{ref}' | Hyp: '{hyp}'")
706
+ except Exception as e:
707
+ print(f"[{i+1}/{len(entries)}] Failed: {e}")
708
+ import traceback
709
+ traceback.print_exc()
710
+
711
+ if count > 0:
712
+ avg_wer = total_wer / count
713
+ print(f"\nAverage WER over {count} files: {avg_wer:.2%}")
714
+ else:
715
+ print("\nNo files processed successfully.")
716
+
717
+ def run_hybrid_pipeline(coreml_encoder, pytorch_model, audio_path, coreml_preprocessor=None):
718
+ # 1. Load Audio
719
+ try:
720
+ audio, sr = torchaudio.load(audio_path)
721
+ if sr != 16000:
722
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
723
+ audio = resampler(audio)
724
+ if audio.shape[0] > 1:
725
+ audio = audio.mean(dim=0, keepdim=True)
726
+
727
+ audio_tensor = audio
728
+ audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
729
+ except Exception as e:
730
+ print(f"Error loading {audio_path}: {e}")
731
+ return {'hypothesis': "", 'audio_length': 0}
732
+
733
+ # 2. Preprocessor
734
+ if coreml_preprocessor:
735
+ # CoreML Preprocessor
736
+ # Input: input_signal (1, N)
737
+ # Output: mel (1, 128, T)
738
+ audio_np = audio.numpy()
739
+ if audio_np.ndim == 2:
740
+ audio_np = audio_np.reshape(1, -1) # Ensure (1, N)
741
+
742
+ inputs = {
743
+ "input_signal": audio_np,
744
+ "length": np.array([audio_np.shape[1]], dtype=np.float32)
745
+ }
746
+ out = coreml_preprocessor.predict(inputs)
747
+ processed_signal = torch.from_numpy(out["mel"]) # (1, 128, T)
748
+ # CoreML might return (1, 1, 128, T) or similar?
749
+ if processed_signal.ndim == 4:
750
+ processed_signal = processed_signal.squeeze(0)
751
+
752
+ # Check shape
753
+ # PyTorch expects (1, 128, T)
754
+ else:
755
+ # PyTorch Preprocessor
756
+ with torch.no_grad():
757
+ processed_signal, processed_signal_len = pytorch_model.preprocessor(
758
+ input_signal=audio_tensor, length=audio_len
759
+ )
760
+
761
+ # 3. CoreML Encoder Loop
762
+ total_frames = processed_signal.shape[2]
763
+
764
+ # Initialize CoreML Cache
765
+ num_layers = 17
766
+ cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
767
+ cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
768
+ cache_last_channel_len = np.zeros((1,), dtype=np.int32)
769
+
770
+ accumulated_encoder_output = []
771
+
772
+ fixed_chunk_size = 32
773
+ chunk_frames = 32
774
+
775
+ for i in range(0, total_frames, chunk_frames):
776
+ end = min(i + chunk_frames, total_frames)
777
+ chunk_mel = processed_signal[:, :, i:end].numpy() # [1, 128, T]
778
+
779
+ current_chunk_len = chunk_mel.shape[2]
780
+
781
+ # Pad if needed
782
+ if current_chunk_len < fixed_chunk_size:
783
+ pad_amt = fixed_chunk_size - current_chunk_len
784
+ padding = np.full((1, 128, pad_amt), -16.0, dtype=np.float32)
785
+ chunk_mel_input = np.concatenate([chunk_mel, padding], axis=2)
786
+ mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
787
+ else:
788
+ chunk_mel_input = chunk_mel
789
+ mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
790
+
791
+ inputs = {
792
+ "mel": chunk_mel_input,
793
+ "mel_length": mel_len_input,
794
+ "cache_last_channel": cache_last_channel,
795
+ "cache_last_time": cache_last_time,
796
+ "cache_last_channel_len": cache_last_channel_len
797
+ }
798
+
799
+ outputs = coreml_encoder.predict(inputs)
800
+
801
+ cache_last_channel = outputs["cache_last_channel_out"]
802
+ cache_last_time = outputs["cache_last_time_out"]
803
+ cache_last_channel_len = outputs["cache_last_channel_len_out"]
804
+
805
+ enc_out = outputs["encoder"]
806
+ # enc_len = outputs["encoder_length"] # Always 3?
807
+
808
+ accumulated_encoder_output.append(enc_out)
809
+
810
+ if not accumulated_encoder_output:
811
+ return {'hypothesis': "", 'audio_length': audio.shape[1] / 16000}
812
+
813
+ # Concatenate Encoder Outputs: [1, 512, T]
814
+ encoder_output = np.concatenate(accumulated_encoder_output, axis=2)
815
+
816
+ # 4. PyTorch Decoding
817
+ # Convert to Tensor
818
+ encoder_output_tensor = torch.from_numpy(encoder_output) # [1, 512, T]
819
+ # Transpose to [B, D, T] (It is already)
820
+
821
+ # We need to pass valid length.
822
+ # Estimate from original audio length?
823
+ # Or just use full T.
824
+ # Parakeet subsampling is 4x? 8x?
825
+ # 320ms (32 frames) -> 3 frames.
826
+ # 32 / 3 = 10.6?
827
+ # Actually, let's trust the decoder to handle padding or just pass full length.
828
+ encoded_lengths = torch.tensor([encoder_output.shape[2]], dtype=torch.long)
829
+
830
+ with torch.no_grad():
831
+ # Use greedy decoding
832
+ # rnnt_decoder_predictions_tensor expects (B, D, T)
833
+ hypotheses = pytorch_model.decoding.rnnt_decoder_predictions_tensor(
834
+ encoder_output=encoder_output_tensor,
835
+ encoded_lengths=encoded_lengths,
836
+ return_hypotheses=True
837
+ )
838
+
839
+ hypothesis = hypotheses[0].text
840
+
841
+ return {
842
+ 'hypothesis': hypothesis,
843
+ 'audio_length': audio.shape[1] / 16000
844
+ }
845
+
846
+ if __name__ == "__main__":
847
+ main()
final_scripts/inference_reference_nemo.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import soundfile as sf
3
+ import librosa
4
+ import numpy as np
5
+ import logging
6
+ from omegaconf import OmegaConf, open_dict
7
+ import nemo.collections.asr as nemo_asr
8
+ from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
9
+ from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
10
+ from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
+
15
+ def setup_decoding_strategy(asr_model, strategy='greedy'):
16
+ """
17
+ Sets up the decoding strategy.
18
+ Adapted from NeMo example, but with fallback for RNNTBPEDecoding.
19
+ """
20
+ print(f"Setting up decoding strategy: {strategy}")
21
+
22
+ # Create a config for the desired strategy
23
+ # The example uses cfg.rnnt_decoding, we'll create a minimal one
24
+ decoding_cfg = OmegaConf.create({
25
+ 'strategy': strategy,
26
+ 'greedy': {'max_symbols': 10}, # Standard greedy params
27
+ 'fused_batch_size': -1,
28
+ 'compute_timestamps': False, # Disable for stability
29
+ 'preserve_alignments': False
30
+ })
31
+
32
+ if hasattr(asr_model, 'change_decoding_strategy'):
33
+ try:
34
+ asr_model.change_decoding_strategy(decoding_cfg)
35
+ print("Successfully changed decoding strategy via change_decoding_strategy")
36
+ return
37
+ except Exception as e:
38
+ print(f"Standard change_decoding_strategy failed: {e}")
39
+ print("Attempting manual replacement...")
40
+
41
+ # Manual replacement fallback (Required for Parakeet EOU)
42
+ if hasattr(asr_model, 'decoding') and isinstance(asr_model.decoding, RNNTBPEDecoding):
43
+ new_decoding = RNNTBPEDecoding(
44
+ decoding_cfg=decoding_cfg,
45
+ decoder=asr_model.decoder,
46
+ joint=asr_model.joint,
47
+ tokenizer=asr_model.tokenizer
48
+ )
49
+ asr_model.decoding = new_decoding
50
+ print("Successfully replaced decoding strategy manually.")
51
+ else:
52
+ print("Could not change decoding strategy.")
53
+
54
+ def perform_streaming(asr_model, streaming_buffer, device):
55
+ """
56
+ Performs streaming inference using conformer_stream_step.
57
+ Follows the NeMo example structure.
58
+ """
59
+ # Get initial cache state
60
+ # Note: The example uses batch_size from buffer, we assume 1 for simplicity here
61
+ batch_size = 1
62
+ cache_last_channel, cache_last_time, cache_last_channel_len = asr_model.encoder.get_initial_cache_state(
63
+ batch_size=batch_size
64
+ )
65
+
66
+ # Move cache to device
67
+ if cache_last_channel is not None:
68
+ cache_last_channel = cache_last_channel.to(device)
69
+ cache_last_time = cache_last_time.to(device)
70
+ cache_last_channel_len = cache_last_channel_len.to(device)
71
+
72
+ previous_hypotheses = None
73
+ previous_pred_out = None
74
+
75
+ final_transcription = ""
76
+
77
+ print("Starting streaming loop...")
78
+
79
+ for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer):
80
+ chunk_audio = chunk_audio.to(device)
81
+ chunk_lengths = chunk_lengths.to(device)
82
+
83
+ print(f"Step {step_num}: chunk_audio shape: {chunk_audio.shape}")
84
+
85
+ # conformer_stream_step
86
+ with torch.no_grad():
87
+ (
88
+ greedy_predictions,
89
+ transcribed_texts,
90
+ cache_last_channel,
91
+ cache_last_time,
92
+ cache_last_channel_len,
93
+ best_hyp_list,
94
+ ) = asr_model.conformer_stream_step(
95
+ processed_signal=chunk_audio,
96
+ processed_signal_length=chunk_lengths,
97
+ cache_last_channel=cache_last_channel,
98
+ cache_last_time=cache_last_time,
99
+ cache_last_channel_len=cache_last_channel_len,
100
+ keep_all_outputs=False, # We don't need to keep all outputs for now
101
+ previous_hypotheses=previous_hypotheses,
102
+ previous_pred_out=previous_pred_out,
103
+ return_transcription=True
104
+ )
105
+
106
+ # Update state for next step
107
+ previous_hypotheses = best_hyp_list
108
+
109
+ # Extract text and handle EOU (The "Complex" Part)
110
+ current_hyp = best_hyp_list[0] if isinstance(best_hyp_list, list) else best_hyp_list
111
+
112
+ # Check for EOU (1024)
113
+ is_eou = False
114
+ if hasattr(current_hyp, 'y_sequence'):
115
+ y_seq = current_hyp.y_sequence
116
+ if isinstance(y_seq, list) and 1024 in y_seq:
117
+ is_eou = True
118
+ elif torch.is_tensor(y_seq) and (y_seq == 1024).any():
119
+ is_eou = True
120
+
121
+ if is_eou:
122
+ # FIX: Reset decoder state on EOU
123
+ previous_hypotheses = None
124
+ if hasattr(current_hyp, 'text'):
125
+ final_transcription += current_hyp.text + " "
126
+
127
+ # Note: If not EOU, we don't append text yet because it's partial.
128
+ # The example accumulates `transcribed_texts` but that might be for the whole batch/history?
129
+ # In strict streaming, we usually only commit on EOU or stability.
130
+ # For this demo, we'll just print partials.
131
+
132
+ # print(f"Step {step_num}: {current_hyp.text if hasattr(current_hyp, 'text') else ''}")
133
+
134
+ # Append final bit
135
+ if previous_hypotheses:
136
+ last_hyp = previous_hypotheses[0] if isinstance(previous_hypotheses, list) else previous_hypotheses
137
+ if hasattr(last_hyp, 'text'):
138
+ final_transcription += last_hyp.text
139
+
140
+ return final_transcription.replace("<eou>", "").strip()
141
+
142
+ import argparse
143
+ import jiwer
144
+ from pathlib import Path
145
+
146
+ def load_manifest(dataset_path, subset='test-clean', max_files=None):
147
+ subset_dir = Path(dataset_path) / subset
148
+ if not subset_dir.exists():
149
+ raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
150
+
151
+ flac_files = list(subset_dir.rglob('*.flac'))
152
+ if not flac_files:
153
+ raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
154
+
155
+ # Sort for determinism
156
+ flac_files = sorted(flac_files)
157
+
158
+ entries = []
159
+ for flac_path in flac_files:
160
+ if max_files and len(entries) >= max_files:
161
+ break
162
+
163
+ speaker_id = flac_path.parent.parent.name
164
+ chapter_id = flac_path.parent.name
165
+ trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
166
+
167
+ if trans_file.exists():
168
+ utterance_id = flac_path.stem
169
+ with open(trans_file, 'r') as f:
170
+ for line in f:
171
+ parts = line.strip().split(' ', 1)
172
+ if len(parts) == 2 and parts[0] == utterance_id:
173
+ entries.append({
174
+ 'audio_filepath': str(flac_path),
175
+ 'text': parts[1],
176
+ 'duration': 0
177
+ })
178
+ break
179
+ print(f"Loaded {len(entries)} entries from {subset_dir}")
180
+ return entries
181
+
182
+ def main():
183
+ parser = argparse.ArgumentParser()
184
+ parser.add_argument('--max-files', type=int, default=100)
185
+ args = parser.parse_args()
186
+
187
+ model_id = "nvidia/parakeet_realtime_eou_120m-v1"
188
+ dataset_path = "/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech"
189
+
190
+ device = torch.device("cpu") # Force CPU for now
191
+
192
+ print(f"Loading model: {model_id}")
193
+ model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location=device)
194
+ model.eval()
195
+
196
+ # 1. Setup Decoding Strategy (Crucial Step)
197
+ setup_decoding_strategy(model, strategy='greedy')
198
+
199
+ # 2. Setup Streaming Params
200
+ model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
201
+ print(f"Updated Streaming Config: {model.encoder.streaming_cfg}")
202
+
203
+ # Load Data
204
+ entries = load_manifest(dataset_path, max_files=args.max_files)
205
+
206
+ total_wer = 0
207
+ count = 0
208
+
209
+ print(f"Starting Benchmark on {len(entries)} files...")
210
+
211
+ for i, entry in enumerate(entries):
212
+ audio_file = entry['audio_filepath']
213
+ ref_text = entry['text'].lower()
214
+
215
+ # Create buffer per file (clean state)
216
+ streaming_buffer = CacheAwareStreamingAudioBuffer(
217
+ model=model,
218
+ online_normalization=False,
219
+ pad_and_drop_preencoded=False
220
+ )
221
+
222
+ streaming_buffer.append_audio_file(audio_file, stream_id=-1)
223
+
224
+ # 3. Perform Streaming
225
+ hyp_text = perform_streaming(model, streaming_buffer, device)
226
+
227
+ # Calculate WER
228
+ wer = jiwer.wer(ref_text, hyp_text)
229
+ total_wer += wer
230
+ count += 1
231
+
232
+ print(f"[{i+1}/{len(entries)}] {Path(audio_file).name} | WER: {wer*100:.2f}% | Ref: '{ref_text}' | Hyp: '{hyp_text}'")
233
+
234
+ avg_wer = total_wer / count if count > 0 else 0
235
+ print(f"\nAverage WER over {count} files: {avg_wer*100:.2f}%")
236
+
237
+ if __name__ == "__main__":
238
+ main()
parakeet_decoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abebbb833404b4a9bcc374a9430574d574061f65f6327cba59d8cc1a8b95cfaa
3
+ size 243
parakeet_decoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4dbff3f49ae48e899d4dc785cdb8ffa8614bba395c623db025f08bdd633381
3
+ size 439
parakeet_decoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float16",
10
+ "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 640]",
13
+ "name" : "decoder_output",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1, 1, 640]",
23
+ "name" : "h_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
31
+ "shortDescription" : "",
32
+ "shape" : "[1, 1, 640]",
33
+ "name" : "c_out",
34
+ "type" : "MultiArray"
35
+ }
36
+ ],
37
+ "modelParameters" : [
38
+
39
+ ],
40
+ "specificationVersion" : 8,
41
+ "mlProgramOperationTypeHistogram" : {
42
+ "Ios17.squeeze" : 2,
43
+ "Ios17.gather" : 1,
44
+ "Ios17.cast" : 3,
45
+ "Ios17.lstm" : 1,
46
+ "Ios17.transpose" : 6,
47
+ "Identity" : 1,
48
+ "Ios17.expandDims" : 2
49
+ },
50
+ "computePrecision" : "Mixed (Float16, Int16, Int32)",
51
+ "isUpdatable" : "0",
52
+ "stateSchema" : [
53
+
54
+ ],
55
+ "availability" : {
56
+ "macOS" : "14.0",
57
+ "tvOS" : "17.0",
58
+ "visionOS" : "1.0",
59
+ "watchOS" : "10.0",
60
+ "iOS" : "17.0",
61
+ "macCatalyst" : "17.0"
62
+ },
63
+ "modelType" : {
64
+ "name" : "MLModelType_mlProgram"
65
+ },
66
+ "userDefinedMetadata" : {
67
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
68
+ "com.github.apple.coremltools.source" : "torch==2.4.0",
69
+ "com.github.apple.coremltools.version" : "8.3.0"
70
+ },
71
+ "inputSchema" : [
72
+ {
73
+ "hasShapeFlexibility" : "0",
74
+ "isOptional" : "0",
75
+ "dataType" : "Int32",
76
+ "formattedType" : "MultiArray (Int32 1 × 1)",
77
+ "shortDescription" : "",
78
+ "shape" : "[1, 1]",
79
+ "name" : "targets",
80
+ "type" : "MultiArray"
81
+ },
82
+ {
83
+ "hasShapeFlexibility" : "0",
84
+ "isOptional" : "0",
85
+ "dataType" : "Int32",
86
+ "formattedType" : "MultiArray (Int32 1)",
87
+ "shortDescription" : "",
88
+ "shape" : "[1]",
89
+ "name" : "target_length",
90
+ "type" : "MultiArray"
91
+ },
92
+ {
93
+ "hasShapeFlexibility" : "0",
94
+ "isOptional" : "0",
95
+ "dataType" : "Float32",
96
+ "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
97
+ "shortDescription" : "",
98
+ "shape" : "[1, 1, 640]",
99
+ "name" : "h_in",
100
+ "type" : "MultiArray"
101
+ },
102
+ {
103
+ "hasShapeFlexibility" : "0",
104
+ "isOptional" : "0",
105
+ "dataType" : "Float32",
106
+ "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
107
+ "shortDescription" : "",
108
+ "shape" : "[1, 1, 640]",
109
+ "name" : "c_in",
110
+ "type" : "MultiArray"
111
+ }
112
+ ],
113
+ "generatedClassName" : "parakeet_decoder",
114
+ "method" : "predict"
115
+ }
116
+ ]
parakeet_decoder.mlmodelc/model.mil ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios17>(tensor<fp32, [1, 1, 640]> c_in, tensor<fp32, [1, 1, 640]> h_in, tensor<int32, [1]> target_length, tensor<int32, [1, 1]> targets) {
5
+ tensor<int32, [3]> var_14 = const()[name = tensor<string, []>("op_14"), val = tensor<int32, [3]>([1, 0, 2])];
6
+ tensor<string, []> h_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("h_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
7
+ tensor<int32, [3]> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, [3]>([1, 0, 2])];
8
+ tensor<string, []> c_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("c_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
9
+ tensor<int32, []> y_axis_0 = const()[name = tensor<string, []>("y_axis_0"), val = tensor<int32, []>(0)];
10
+ tensor<int32, []> y_batch_dims_0 = const()[name = tensor<string, []>("y_batch_dims_0"), val = tensor<int32, []>(0)];
11
+ tensor<bool, []> y_validate_indices_0 = const()[name = tensor<string, []>("y_validate_indices_0"), val = tensor<bool, []>(false)];
12
+ tensor<fp16, [1027, 640]> decoder_prediction_embed_weight_to_fp16 = const()[name = tensor<string, []>("decoder_prediction_embed_weight_to_fp16"), val = tensor<fp16, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
13
+ tensor<string, []> targets_to_int16_dtype_0 = const()[name = tensor<string, []>("targets_to_int16_dtype_0"), val = tensor<string, []>("int16")];
14
+ tensor<int16, [1, 1]> targets_to_int16 = cast(dtype = targets_to_int16_dtype_0, x = targets)[name = tensor<string, []>("cast_4")];
15
+ tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16 = gather(axis = y_axis_0, batch_dims = y_batch_dims_0, indices = targets_to_int16, validate_indices = y_validate_indices_0, x = decoder_prediction_embed_weight_to_fp16)[name = tensor<string, []>("y_cast_fp16_cast_uint16")];
16
+ tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
17
+ tensor<int32, [1]> input_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
18
+ tensor<fp16, [1, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = tensor<string, []>("cast_6")];
19
+ tensor<fp16, [1, 1, 640]> var_15_cast_fp16 = transpose(perm = var_14, x = h_in_to_fp16)[name = tensor<string, []>("transpose_5")];
20
+ tensor<fp16, [1, 640]> input_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_lstm_h0_squeeze_axes_0, x = var_15_cast_fp16)[name = tensor<string, []>("input_lstm_h0_squeeze_cast_fp16")];
21
+ tensor<int32, [1]> input_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
22
+ tensor<fp16, [1, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = tensor<string, []>("cast_5")];
23
+ tensor<fp16, [1, 1, 640]> var_22_cast_fp16 = transpose(perm = var_21, x = c_in_to_fp16)[name = tensor<string, []>("transpose_4")];
24
+ tensor<fp16, [1, 640]> input_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_lstm_c0_squeeze_axes_0, x = var_22_cast_fp16)[name = tensor<string, []>("input_lstm_c0_squeeze_cast_fp16")];
25
+ tensor<string, []> input_direction_0 = const()[name = tensor<string, []>("input_direction_0"), val = tensor<string, []>("forward")];
26
+ tensor<bool, []> input_output_sequence_0 = const()[name = tensor<string, []>("input_output_sequence_0"), val = tensor<bool, []>(true)];
27
+ tensor<string, []> input_recurrent_activation_0 = const()[name = tensor<string, []>("input_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
28
+ tensor<string, []> input_cell_activation_0 = const()[name = tensor<string, []>("input_cell_activation_0"), val = tensor<string, []>("tanh")];
29
+ tensor<string, []> input_activation_0 = const()[name = tensor<string, []>("input_activation_0"), val = tensor<string, []>("tanh")];
30
+ tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = tensor<string, []>("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1314688)))];
31
+ tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = tensor<string, []>("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4591552)))];
32
+ tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = tensor<string, []>("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7868416)))];
33
+ tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = y_cast_fp16_cast_uint16)[name = tensor<string, []>("transpose_3")];
34
+ tensor<fp16, [1, 1, 640]> input_cast_fp16_0, tensor<fp16, [1, 640]> input_cast_fp16_1, tensor<fp16, [1, 640]> input_cast_fp16_2 = lstm(activation = input_activation_0, bias = concat_0_to_fp16, cell_activation = input_cell_activation_0, direction = input_direction_0, initial_c = input_lstm_c0_squeeze_cast_fp16, initial_h = input_lstm_h0_squeeze_cast_fp16, output_sequence = input_output_sequence_0, recurrent_activation = input_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
35
+ tensor<int32, [1]> var_44_axes_0 = const()[name = tensor<string, []>("op_44_axes_0"), val = tensor<int32, [1]>([0])];
36
+ tensor<fp16, [1, 1, 640]> var_44_cast_fp16 = expand_dims(axes = var_44_axes_0, x = input_cast_fp16_1)[name = tensor<string, []>("op_44_cast_fp16")];
37
+ tensor<int32, [1]> var_45_axes_0 = const()[name = tensor<string, []>("op_45_axes_0"), val = tensor<int32, [1]>([0])];
38
+ tensor<fp16, [1, 1, 640]> var_45_cast_fp16 = expand_dims(axes = var_45_axes_0, x = input_cast_fp16_2)[name = tensor<string, []>("op_45_cast_fp16")];
39
+ tensor<int32, [3]> var_57_perm_0 = const()[name = tensor<string, []>("op_57_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
40
+ tensor<int32, [3]> var_61 = const()[name = tensor<string, []>("op_61"), val = tensor<int32, [3]>([1, 0, 2])];
41
+ tensor<int32, [3]> var_66 = const()[name = tensor<string, []>("op_66"), val = tensor<int32, [3]>([1, 0, 2])];
42
+ tensor<fp16, [1, 1, 640]> c_out = transpose(perm = var_66, x = var_45_cast_fp16)[name = tensor<string, []>("transpose_0")];
43
+ tensor<fp16, [1, 1, 640]> h_out = transpose(perm = var_61, x = var_44_cast_fp16)[name = tensor<string, []>("transpose_1")];
44
+ tensor<fp16, [1, 1, 640]> decoder_output = transpose(perm = var_57_perm_0, x = input_cast_fp16_0)[name = tensor<string, []>("transpose_2")];
45
+ tensor<int32, [1]> target_length_tmp = identity(x = target_length)[name = tensor<string, []>("target_length_tmp")];
46
+ } -> (decoder_output, h_out, c_out);
47
+ }
parakeet_decoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
3
+ size 7873600
parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b706227c9c2a2d64ea0fa3879ca9a4673e61944e8e374160e5a20ae7382207c3
3
+ size 6750
parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
3
+ size 7873600
parakeet_decoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "2A776510-11A3-4993-A996-06C985BF1840": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "5111D7BD-E8E5-42A4-A8E2-11BD568F106B": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "5111D7BD-E8E5-42A4-A8E2-11BD568F106B"
18
+ }
parakeet_joint.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51354af666471dab9e2344e1a7b93004c7fef44c3d455dde75bcaf0abbcc72af
3
+ size 243
parakeet_joint.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ecae4f1db9350ad8bd7050c3c1b973926798c2f7ff408e9ad512d3013f238b
3
+ size 355
parakeet_joint.mlmodelc/metadata.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 1 × 1027)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 1, 1027]",
13
+ "name" : "logits",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "modelParameters" : [
18
+
19
+ ],
20
+ "specificationVersion" : 8,
21
+ "mlProgramOperationTypeHistogram" : {
22
+ "Ios17.expandDims" : 2,
23
+ "Ios17.transpose" : 1,
24
+ "Ios17.linear" : 3,
25
+ "Ios17.add" : 1,
26
+ "Ios16.relu" : 1
27
+ },
28
+ "computePrecision" : "Mixed (Float32, Int32)",
29
+ "isUpdatable" : "0",
30
+ "stateSchema" : [
31
+
32
+ ],
33
+ "availability" : {
34
+ "macOS" : "14.0",
35
+ "tvOS" : "17.0",
36
+ "visionOS" : "1.0",
37
+ "watchOS" : "10.0",
38
+ "iOS" : "17.0",
39
+ "macCatalyst" : "17.0"
40
+ },
41
+ "modelType" : {
42
+ "name" : "MLModelType_mlProgram"
43
+ },
44
+ "userDefinedMetadata" : {
45
+ "com.github.apple.coremltools.version" : "8.3.0",
46
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
47
+ "com.github.apple.coremltools.source" : "torch==2.4.0"
48
+ },
49
+ "inputSchema" : [
50
+ {
51
+ "hasShapeFlexibility" : "0",
52
+ "isOptional" : "0",
53
+ "dataType" : "Float32",
54
+ "formattedType" : "MultiArray (Float32 1 × 512 × 1)",
55
+ "shortDescription" : "",
56
+ "shape" : "[1, 512, 1]",
57
+ "name" : "encoder_output",
58
+ "type" : "MultiArray"
59
+ },
60
+ {
61
+ "hasShapeFlexibility" : "0",
62
+ "isOptional" : "0",
63
+ "dataType" : "Float32",
64
+ "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
65
+ "shortDescription" : "",
66
+ "shape" : "[1, 1, 640]",
67
+ "name" : "decoder_output",
68
+ "type" : "MultiArray"
69
+ }
70
+ ],
71
+ "generatedClassName" : "parakeet_joint",
72
+ "method" : "predict"
73
+ }
74
+ ]
parakeet_joint.mlmodelc/model.mil ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios17>(tensor<fp32, [1, 1, 640]> decoder_output, tensor<fp32, [1, 512, 1]> encoder_output) {
5
+ tensor<fp32, [640]> joint_enc_bias = const()[name = tensor<string, []>("joint_enc_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
6
+ tensor<fp32, [640, 512]> joint_enc_weight = const()[name = tensor<string, []>("joint_enc_weight"), val = tensor<fp32, [640, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2688)))];
7
+ tensor<fp32, [640]> joint_pred_bias = const()[name = tensor<string, []>("joint_pred_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1313472)))];
8
+ tensor<fp32, [640, 640]> joint_pred_weight = const()[name = tensor<string, []>("joint_pred_weight"), val = tensor<fp32, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1316096)))];
9
+ tensor<fp32, [1027]> joint_joint_net_2_bias = const()[name = tensor<string, []>("joint_joint_net_2_bias"), val = tensor<fp32, [1027]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2954560)))];
10
+ tensor<fp32, [1027, 640]> joint_joint_net_2_weight = const()[name = tensor<string, []>("joint_joint_net_2_weight"), val = tensor<fp32, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2958784)))];
11
+ tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
12
+ tensor<fp32, [1, 1, 512]> input_1 = transpose(perm = input_1_perm_0, x = encoder_output)[name = tensor<string, []>("transpose_0")];
13
+ tensor<fp32, [1, 1, 640]> f = linear(bias = joint_enc_bias, weight = joint_enc_weight, x = input_1)[name = tensor<string, []>("linear_0")];
14
+ tensor<fp32, [1, 1, 640]> g = linear(bias = joint_pred_bias, weight = joint_pred_weight, x = decoder_output)[name = tensor<string, []>("linear_1")];
15
+ tensor<int32, [1]> var_19_axes_0 = const()[name = tensor<string, []>("op_19_axes_0"), val = tensor<int32, [1]>([2])];
16
+ tensor<fp32, [1, 1, 1, 640]> var_19 = expand_dims(axes = var_19_axes_0, x = f)[name = tensor<string, []>("op_19")];
17
+ tensor<int32, [1]> var_21_axes_0 = const()[name = tensor<string, []>("op_21_axes_0"), val = tensor<int32, [1]>([1])];
18
+ tensor<fp32, [1, 1, 1, 640]> var_21 = expand_dims(axes = var_21_axes_0, x = g)[name = tensor<string, []>("op_21")];
19
+ tensor<fp32, [1, 1, 1, 640]> input_3 = add(x = var_19, y = var_21)[name = tensor<string, []>("input_3")];
20
+ tensor<fp32, [1, 1, 1, 640]> input_5 = relu(x = input_3)[name = tensor<string, []>("input_5")];
21
+ tensor<fp32, [1, 1, 1, 1027]> logits = linear(bias = joint_joint_net_2_bias, weight = joint_joint_net_2_weight, x = input_5)[name = tensor<string, []>("linear_2")];
22
+ } -> (logits);
23
+ }
parakeet_joint.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
3
+ size 5587968
parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570c88c720a7cca648db2d493635420c24ff837099586e384099c705425b207e
3
+ size 3015
parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
3
+ size 5587968
parakeet_joint.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "A8C1223F-3E7A-421C-AFF6-DB3EADE3826B": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "C6F9C4E8-810B-42F5-9184-A7F28B430B15": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "C6F9C4E8-810B-42F5-9184-A7F28B430B15"
18
+ }
preprocessor.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66bc489d462bb3131074b17c0cf18efe85bc0619b1e22f4a94c69d25576c1041
3
+ size 243
preprocessor.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ed911e33fe8a791a4655ff7539a086dd760a60b60b233153cb769d85f41311
3
+ size 373
preprocessor.mlmodelc/metadata.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "",
12
+ "shape" : "[]",
13
+ "name" : "mel",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Int32",
20
+ "formattedType" : "MultiArray (Int32 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "mel_length",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "modelParameters" : [
28
+
29
+ ],
30
+ "specificationVersion" : 8,
31
+ "mlProgramOperationTypeHistogram" : {
32
+ "Range1d" : 1,
33
+ "Ios17.reshape" : 2,
34
+ "Identity" : 1,
35
+ "Ios17.matmul" : 1,
36
+ "Ios17.expandDims" : 5,
37
+ "Select" : 1,
38
+ "Ios17.add" : 3,
39
+ "Ios17.sliceByIndex" : 3,
40
+ "Ios16.reduceSum" : 1,
41
+ "Shape" : 1,
42
+ "Ios17.gather" : 1,
43
+ "Pad" : 1,
44
+ "Ios17.log" : 1,
45
+ "Ios17.conv" : 2,
46
+ "Ios17.sub" : 2,
47
+ "Ios17.pow" : 1,
48
+ "Ios17.cast" : 6,
49
+ "Stack" : 1,
50
+ "Ios17.concat" : 1,
51
+ "Ios17.floorDiv" : 1,
52
+ "Ios17.greaterEqual" : 1,
53
+ "Ios17.mul" : 1
54
+ },
55
+ "computePrecision" : "Mixed (Float16, Float32, Int32, UInt16)",
56
+ "isUpdatable" : "0",
57
+ "stateSchema" : [
58
+
59
+ ],
60
+ "availability" : {
61
+ "macOS" : "14.0",
62
+ "tvOS" : "17.0",
63
+ "visionOS" : "1.0",
64
+ "watchOS" : "10.0",
65
+ "iOS" : "17.0",
66
+ "macCatalyst" : "17.0"
67
+ },
68
+ "modelType" : {
69
+ "name" : "MLModelType_mlProgram"
70
+ },
71
+ "userDefinedMetadata" : {
72
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
73
+ "com.github.apple.coremltools.source" : "torch==2.4.0",
74
+ "com.github.apple.coremltools.version" : "8.3.0"
75
+ },
76
+ "inputSchema" : [
77
+ {
78
+ "dataType" : "Float32",
79
+ "hasShapeFlexibility" : "1",
80
+ "isOptional" : "0",
81
+ "shapeFlexibility" : "1 × 1600...16000",
82
+ "shapeRange" : "[[1, 1], [1600, 16000]]",
83
+ "formattedType" : "MultiArray (Float32 1 × 6560)",
84
+ "type" : "MultiArray",
85
+ "shape" : "[1, 6560]",
86
+ "name" : "input_signal",
87
+ "shortDescription" : ""
88
+ },
89
+ {
90
+ "hasShapeFlexibility" : "0",
91
+ "isOptional" : "0",
92
+ "dataType" : "Int32",
93
+ "formattedType" : "MultiArray (Int32 1)",
94
+ "shortDescription" : "",
95
+ "shape" : "[1]",
96
+ "name" : "length",
97
+ "type" : "MultiArray"
98
+ }
99
+ ],
100
+ "generatedClassName" : "preprocessor",
101
+ "method" : "predict"
102
+ }
103
+ ]
preprocessor.mlmodelc/model.mil ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
3
+ {
4
+ func main<ios17>(tensor<fp32, [1, ?]> input_signal, tensor<int32, [1]> length) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"input_signal", [1, 6560]}}), ("RangeDims", {{"input_signal", [[1, 1], [1600, 16000]]}})))] {
5
+ tensor<int32, []> var_4 = const()[name = tensor<string, []>("op_4"), val = tensor<int32, []>(1)];
6
+ tensor<int32, []> var_5 = const()[name = tensor<string, []>("op_5"), val = tensor<int32, []>(160)];
7
+ tensor<int32, []> var_27 = const()[name = tensor<string, []>("op_27"), val = tensor<int32, []>(512)];
8
+ tensor<int32, [1]> var_28 = add(x = length, y = var_27)[name = tensor<string, []>("op_28")];
9
+ tensor<int32, []> var_29 = const()[name = tensor<string, []>("op_29"), val = tensor<int32, []>(512)];
10
+ tensor<int32, [1]> var_30 = sub(x = var_28, y = var_29)[name = tensor<string, []>("op_30")];
11
+ tensor<int32, [1]> floor_div_0 = floor_div(x = var_30, y = var_5)[name = tensor<string, []>("floor_div_0")];
12
+ tensor<string, []> var_31_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_31_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
13
+ tensor<fp16, []> var_32_promoted_to_fp16 = const()[name = tensor<string, []>("op_32_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
14
+ tensor<fp16, [1]> floor_div_0_to_fp16 = cast(dtype = var_31_to_fp16_dtype_0, x = floor_div_0)[name = tensor<string, []>("cast_14")];
15
+ tensor<fp16, [1]> seq_len_1_cast_fp16 = add(x = floor_div_0_to_fp16, y = var_32_promoted_to_fp16)[name = tensor<string, []>("seq_len_1_cast_fp16")];
16
+ tensor<string, []> cast_1_dtype_0 = const()[name = tensor<string, []>("cast_1_dtype_0"), val = tensor<string, []>("int32")];
17
+ tensor<int32, [2]> var_36_begin_0 = const()[name = tensor<string, []>("op_36_begin_0"), val = tensor<int32, [2]>([0, 0])];
18
+ tensor<int32, [2]> var_36_end_0 = const()[name = tensor<string, []>("op_36_end_0"), val = tensor<int32, [2]>([1, 1])];
19
+ tensor<bool, [2]> var_36_end_mask_0 = const()[name = tensor<string, []>("op_36_end_mask_0"), val = tensor<bool, [2]>([true, false])];
20
+ tensor<bool, [2]> var_36_squeeze_mask_0 = const()[name = tensor<string, []>("op_36_squeeze_mask_0"), val = tensor<bool, [2]>([false, true])];
21
+ tensor<string, []> input_signal_to_fp16_dtype_0 = const()[name = tensor<string, []>("input_signal_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
22
+ tensor<fp16, [1, ?]> input_signal_to_fp16 = cast(dtype = input_signal_to_fp16_dtype_0, x = input_signal)[name = tensor<string, []>("cast_12")];
23
+ tensor<fp16, [1]> var_36_cast_fp16 = slice_by_index(begin = var_36_begin_0, end = var_36_end_0, end_mask = var_36_end_mask_0, squeeze_mask = var_36_squeeze_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_36_cast_fp16")];
24
+ tensor<int32, [1]> var_37_axes_0 = const()[name = tensor<string, []>("op_37_axes_0"), val = tensor<int32, [1]>([1])];
25
+ tensor<fp16, [1, 1]> var_37_cast_fp16 = expand_dims(axes = var_37_axes_0, x = var_36_cast_fp16)[name = tensor<string, []>("op_37_cast_fp16")];
26
+ tensor<int32, [2]> var_39_begin_0 = const()[name = tensor<string, []>("op_39_begin_0"), val = tensor<int32, [2]>([0, 1])];
27
+ tensor<int32, [2]> var_39_end_0 = const()[name = tensor<string, []>("op_39_end_0"), val = tensor<int32, [2]>([1, 0])];
28
+ tensor<bool, [2]> var_39_end_mask_0 = const()[name = tensor<string, []>("op_39_end_mask_0"), val = tensor<bool, [2]>([true, true])];
29
+ tensor<fp16, [1, ?]> var_39_cast_fp16 = slice_by_index(begin = var_39_begin_0, end = var_39_end_0, end_mask = var_39_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_39_cast_fp16")];
30
+ tensor<int32, [2]> var_41_begin_0 = const()[name = tensor<string, []>("op_41_begin_0"), val = tensor<int32, [2]>([0, 0])];
31
+ tensor<int32, [2]> var_41_end_0 = const()[name = tensor<string, []>("op_41_end_0"), val = tensor<int32, [2]>([1, -1])];
32
+ tensor<bool, [2]> var_41_end_mask_0 = const()[name = tensor<string, []>("op_41_end_mask_0"), val = tensor<bool, [2]>([true, false])];
33
+ tensor<fp16, [1, ?]> var_41_cast_fp16 = slice_by_index(begin = var_41_begin_0, end = var_41_end_0, end_mask = var_41_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_41_cast_fp16")];
34
+ tensor<fp16, []> var_42_to_fp16 = const()[name = tensor<string, []>("op_42_to_fp16"), val = tensor<fp16, []>(0x1.f0cp-1)];
35
+ tensor<fp16, [1, ?]> var_43_cast_fp16 = mul(x = var_41_cast_fp16, y = var_42_to_fp16)[name = tensor<string, []>("op_43_cast_fp16")];
36
+ tensor<fp16, [1, ?]> var_44_cast_fp16 = sub(x = var_39_cast_fp16, y = var_43_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
37
+ tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
38
+ tensor<fp16, [1, ?]> input_1_cast_fp16 = concat(axis = var_4, interleave = input_1_interleave_0, values = (var_37_cast_fp16, var_44_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
39
+ tensor<int32, [3]> concat_0x = const()[name = tensor<string, []>("concat_0x"), val = tensor<int32, [3]>([1, 1, -1])];
40
+ tensor<fp16, [1, 1, ?]> input_3_cast_fp16 = reshape(shape = concat_0x, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
41
+ tensor<int32, [6]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 256, 256])];
42
+ tensor<string, []> input_5_mode_0 = const()[name = tensor<string, []>("input_5_mode_0"), val = tensor<string, []>("reflect")];
43
+ tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
44
+ tensor<fp16, [1, 1, ?]> input_5_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_5_mode_0, pad = input_5_pad_0, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
45
+ tensor<int32, [2]> concat_1x = const()[name = tensor<string, []>("concat_1x"), val = tensor<int32, [2]>([1, -1])];
46
+ tensor<fp16, [1, ?]> input_cast_fp16 = reshape(shape = concat_1x, x = input_5_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
47
+ tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
48
+ tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
49
+ tensor<fp16, [1, 1, ?]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
50
+ tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
51
+ tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
52
+ tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
53
+ tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
54
+ tensor<fp16, [257, 1, 512]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
55
+ tensor<fp16, [1, 257, ?]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
56
+ tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
57
+ tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
58
+ tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
59
+ tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
60
+ tensor<fp16, [257, 1, 512]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263296)))];
61
+ tensor<fp16, [1, 257, ?]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
62
+ tensor<int32, []> stack_0_axis_0 = const()[name = tensor<string, []>("stack_0_axis_0"), val = tensor<int32, []>(-1)];
63
+ tensor<fp16, [1, 257, ?, 2]> stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = tensor<string, []>("stack_0_cast_fp16")];
64
+ tensor<fp16, []> var_12_promoted_to_fp16 = const()[name = tensor<string, []>("op_12_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
65
+ tensor<fp16, [1, 257, ?, 2]> var_60_cast_fp16 = pow(x = stack_0_cast_fp16, y = var_12_promoted_to_fp16)[name = tensor<string, []>("op_60_cast_fp16")];
66
+ tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([-1])];
67
+ tensor<bool, []> var_62_keep_dims_0 = const()[name = tensor<string, []>("op_62_keep_dims_0"), val = tensor<bool, []>(false)];
68
+ tensor<fp16, [1, 257, ?]> var_62_cast_fp16 = reduce_sum(axes = var_62_axes_0, keep_dims = var_62_keep_dims_0, x = var_60_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
69
+ tensor<fp16, [1, 257, ?]> x_9_cast_fp16 = identity(x = var_62_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
70
+ tensor<bool, []> x_11_transpose_x_0 = const()[name = tensor<string, []>("x_11_transpose_x_0"), val = tensor<bool, []>(false)];
71
+ tensor<bool, []> x_11_transpose_y_0 = const()[name = tensor<string, []>("x_11_transpose_y_0"), val = tensor<bool, []>(false)];
72
+ tensor<fp16, [1, 128, 257]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1, 128, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526528)))];
73
+ tensor<fp16, [1, 128, ?]> x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_2_to_fp16, y = x_9_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
74
+ tensor<fp16, []> var_69_to_fp16 = const()[name = tensor<string, []>("op_69_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
75
+ tensor<fp16, [1, 128, ?]> var_70_cast_fp16 = add(x = x_11_cast_fp16, y = var_69_to_fp16)[name = tensor<string, []>("op_70_cast_fp16")];
76
+ tensor<fp32, []> x_epsilon_0 = const()[name = tensor<string, []>("x_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
77
+ tensor<fp16, [1, 128, ?]> x_cast_fp16 = log(epsilon = x_epsilon_0, x = var_70_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
78
+ tensor<int32, [3]> var_72_shape_cast_fp16 = shape(x = x_cast_fp16)[name = tensor<string, []>("op_72_shape_cast_fp16")];
79
+ tensor<int32, []> gather_4_axis_0 = const()[name = tensor<string, []>("gather_4_axis_0"), val = tensor<int32, []>(0)];
80
+ tensor<int32, []> gather_4_batch_dims_0 = const()[name = tensor<string, []>("gather_4_batch_dims_0"), val = tensor<int32, []>(0)];
81
+ tensor<bool, []> gather_4_validate_indices_0 = const()[name = tensor<string, []>("gather_4_validate_indices_0"), val = tensor<bool, []>(false)];
82
+ tensor<string, []> var_72_shape_cast_fp16_to_uint16_dtype_0 = const()[name = tensor<string, []>("op_72_shape_cast_fp16_to_uint16_dtype_0"), val = tensor<string, []>("uint16")];
83
+ tensor<uint16, []> select_4_to_uint16 = const()[name = tensor<string, []>("select_4_to_uint16"), val = tensor<uint16, []>(2)];
84
+ tensor<uint16, [3]> var_72_shape_cast_fp16_to_uint16 = cast(dtype = var_72_shape_cast_fp16_to_uint16_dtype_0, x = var_72_shape_cast_fp16)[name = tensor<string, []>("cast_11")];
85
+ tensor<uint16, []> gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_72_shape_cast_fp16_to_uint16)[name = tensor<string, []>("gather_4_cast_uint16")];
86
+ tensor<string, []> gather_4_cast_uint16_to_int32_dtype_0 = const()[name = tensor<string, []>("gather_4_cast_uint16_to_int32_dtype_0"), val = tensor<string, []>("int32")];
87
+ tensor<int32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<int32, []>(0)];
88
+ tensor<int32, []> const_4 = const()[name = tensor<string, []>("const_4"), val = tensor<int32, []>(1)];
89
+ tensor<int32, []> gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = tensor<string, []>("cast_10")];
90
+ tensor<int32, [?]> mask_1 = range_1d(end = gather_4_cast_uint16_to_int32, start = const_3, step = const_4)[name = tensor<string, []>("mask_1")];
91
+ tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
92
+ tensor<int32, [1, ?]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = mask_1)[name = tensor<string, []>("expand_dims_0")];
93
+ tensor<int32, [1]> var_77_axes_0 = const()[name = tensor<string, []>("op_77_axes_0"), val = tensor<int32, [1]>([1])];
94
+ tensor<int32, [1]> mel_length = cast(dtype = cast_1_dtype_0, x = seq_len_1_cast_fp16)[name = tensor<string, []>("cast_13")];
95
+ tensor<int32, [1, 1]> var_77 = expand_dims(axes = var_77_axes_0, x = mel_length)[name = tensor<string, []>("op_77")];
96
+ tensor<bool, [1, ?]> mask = greater_equal(x = expand_dims_0, y = var_77)[name = tensor<string, []>("mask")];
97
+ tensor<int32, [1]> var_79_axes_0 = const()[name = tensor<string, []>("op_79_axes_0"), val = tensor<int32, [1]>([1])];
98
+ tensor<bool, [1, 1, ?]> var_79 = expand_dims(axes = var_79_axes_0, x = mask)[name = tensor<string, []>("op_79")];
99
+ tensor<fp16, []> cast_6_to_fp16 = const()[name = tensor<string, []>("cast_6_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
100
+ tensor<fp16, [1, 128, ?]> processed_signal_cast_fp16 = select(a = cast_6_to_fp16, b = x_cast_fp16, cond = var_79)[name = tensor<string, []>("processed_signal_cast_fp16")];
101
+ tensor<string, []> processed_signal_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("processed_signal_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
102
+ tensor<fp32, [1, 128, ?]> mel = cast(dtype = processed_signal_cast_fp16_to_fp32_dtype_0, x = processed_signal_cast_fp16)[name = tensor<string, []>("cast_9")];
103
+ } -> (mel, mel_length);
104
+ }
preprocessor.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
3
+ size 592384
preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb46c27ed7a75560111435ef86afbe9128669301b897d613a3fb1cbf8753fc2
3
+ size 13695
preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
3
+ size 592384
preprocessor.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "41A87408-9448-4732-A714-AABD9E8264CD": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "D1D243D6-CE3A-446A-A657-4F2BA0FC58CE": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "D1D243D6-CE3A-446A-A657-4F2BA0FC58CE"
18
+ }
streaming_encoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d10e88a440fa9a238b34284f6be7310ebe682ec7f5240053007b26fe4991edc
3
+ size 243
streaming_encoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcbb3050dd6912cbe43025b9d5e5bbbdfa9471bc08ce5b32565e51a75109638
3
+ size 594
streaming_encoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float16",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 512 × 4)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 512, 4]",
13
+ "name" : "encoder",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Int32",
20
+ "formattedType" : "MultiArray (Int32 1)",
21
+ "shortDescription" : "",
22
+ "shape" : "[1]",
23
+ "name" : "encoder_length",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
31
+ "shortDescription" : "",
32
+ "shape" : "[17, 1, 70, 512]",
33
+ "name" : "cache_last_channel_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
41
+ "shortDescription" : "",
42
+ "shape" : "[17, 1, 512, 8]",
43
+ "name" : "cache_last_time_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Int32",
50
+ "formattedType" : "MultiArray (Int32 1)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1]",
53
+ "name" : "cache_last_channel_len_out",
54
+ "type" : "MultiArray"
55
+ }
56
+ ],
57
+ "modelParameters" : [
58
+
59
+ ],
60
+ "specificationVersion" : 8,
61
+ "mlProgramOperationTypeHistogram" : {
62
+ "Ios17.floor" : 3,
63
+ "Ios17.logicalAnd" : 3,
64
+ "Ios17.reshape" : 103,
65
+ "Ios16.softmax" : 17,
66
+ "Ios17.matmul" : 51,
67
+ "Ios17.transpose" : 157,
68
+ "Split" : 17,
69
+ "Ios17.expandDims" : 6,
70
+ "Select" : 51,
71
+ "Ios17.add" : 125,
72
+ "Tile" : 1,
73
+ "Ios17.sliceByIndex" : 105,
74
+ "Ios16.sigmoid" : 17,
75
+ "Pad" : 20,
76
+ "Ios17.logicalNot" : 2,
77
+ "Ios17.layerNorm" : 102,
78
+ "Ios17.less" : 1,
79
+ "Ios17.sub" : 1,
80
+ "Ios17.conv" : 56,
81
+ "Ios17.clip" : 2,
82
+ "Ios16.relu" : 3,
83
+ "Ios17.linear" : 137,
84
+ "Ios17.greaterEqual" : 1,
85
+ "Ios17.cast" : 12,
86
+ "Ios16.silu" : 51,
87
+ "Ios17.concat" : 51,
88
+ "Stack" : 2,
89
+ "Ios17.mul" : 72
90
+ },
91
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
92
+ "isUpdatable" : "0",
93
+ "stateSchema" : [
94
+
95
+ ],
96
+ "availability" : {
97
+ "macOS" : "14.0",
98
+ "tvOS" : "17.0",
99
+ "visionOS" : "1.0",
100
+ "watchOS" : "10.0",
101
+ "iOS" : "17.0",
102
+ "macCatalyst" : "17.0"
103
+ },
104
+ "modelType" : {
105
+ "name" : "MLModelType_mlProgram"
106
+ },
107
+ "userDefinedMetadata" : {
108
+ "com.github.apple.coremltools.version" : "8.3.0",
109
+ "com.github.apple.coremltools.source" : "torch==2.4.0",
110
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
111
+ },
112
+ "inputSchema" : [
113
+ {
114
+ "hasShapeFlexibility" : "0",
115
+ "isOptional" : "0",
116
+ "dataType" : "Float32",
117
+ "formattedType" : "MultiArray (Float32 1 × 128 × 41)",
118
+ "shortDescription" : "",
119
+ "shape" : "[1, 128, 41]",
120
+ "name" : "mel",
121
+ "type" : "MultiArray"
122
+ },
123
+ {
124
+ "hasShapeFlexibility" : "0",
125
+ "isOptional" : "0",
126
+ "dataType" : "Int32",
127
+ "formattedType" : "MultiArray (Int32 1)",
128
+ "shortDescription" : "",
129
+ "shape" : "[1]",
130
+ "name" : "mel_length",
131
+ "type" : "MultiArray"
132
+ },
133
+ {
134
+ "hasShapeFlexibility" : "0",
135
+ "isOptional" : "0",
136
+ "dataType" : "Float32",
137
+ "formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
138
+ "shortDescription" : "",
139
+ "shape" : "[17, 1, 70, 512]",
140
+ "name" : "cache_last_channel",
141
+ "type" : "MultiArray"
142
+ },
143
+ {
144
+ "hasShapeFlexibility" : "0",
145
+ "isOptional" : "0",
146
+ "dataType" : "Float32",
147
+ "formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
148
+ "shortDescription" : "",
149
+ "shape" : "[17, 1, 512, 8]",
150
+ "name" : "cache_last_time",
151
+ "type" : "MultiArray"
152
+ },
153
+ {
154
+ "hasShapeFlexibility" : "0",
155
+ "isOptional" : "0",
156
+ "dataType" : "Int32",
157
+ "formattedType" : "MultiArray (Int32 1)",
158
+ "shortDescription" : "",
159
+ "shape" : "[1]",
160
+ "name" : "cache_last_channel_len",
161
+ "type" : "MultiArray"
162
+ }
163
+ ],
164
+ "generatedClassName" : "streaming_encoder",
165
+ "method" : "predict"
166
+ }
167
+ ]
streaming_encoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
streaming_encoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
3
+ size 212726592
streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617c9a05405e9e6134838a5c760ab93b83f41b5c1407ce671526c172f94a0c9c
3
+ size 504210
streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
3
+ size 212726592
streaming_encoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "7B68916D-2718-4249-8DA5-9B31FEE8478A": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "E9E4EE1E-0F56-46D5-9093-67095CF85F35": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "E9E4EE1E-0F56-46D5-9093-67095CF85F35"
18
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d556e51ba5b89db64a8cb2be6798fb29974edcadb58b0c7b80418eb5d8752303
3
+ size 258183
vocab.json CHANGED
@@ -1,1028 +1,1028 @@
1
- {
2
- "0": "<unk>",
3
- "1": "▁t",
4
- "2": "▁th",
5
- "3": "▁a",
6
- "4": "▁i",
7
- "5": "▁the",
8
- "6": "▁s",
9
- "7": "re",
10
- "8": "▁w",
11
- "9": "▁o",
12
- "10": "in",
13
- "11": "at",
14
- "12": "er",
15
- "13": "nd",
16
- "14": "ou",
17
- "15": "▁c",
18
- "16": "▁b",
19
- "17": "▁h",
20
- "18": "en",
21
- "19": "on",
22
- "20": "▁m",
23
- "21": "▁f",
24
- "22": "ing",
25
- "23": "▁p",
26
- "24": "▁to",
27
- "25": "▁and",
28
- "26": "▁d",
29
- "27": "an",
30
- "28": "or",
31
- "29": "es",
32
- "30": "▁y",
33
- "31": "▁l",
34
- "32": "▁of",
35
- "33": "ll",
36
- "34": "▁in",
37
- "35": "ed",
38
- "36": "it",
39
- "37": "▁g",
40
- "38": "is",
41
- "39": "▁you",
42
- "40": "▁n",
43
- "41": "ar",
44
- "42": "om",
45
- "43": "as",
46
- "44": "ve",
47
- "45": "▁e",
48
- "46": "ic",
49
- "47": "▁it",
50
- "48": "al",
51
- "49": "us",
52
- "50": "▁wh",
53
- "51": "▁we",
54
- "52": "▁be",
55
- "53": "ion",
56
- "54": "ow",
57
- "55": "le",
58
- "56": "▁is",
59
- "57": "et",
60
- "58": "ent",
61
- "59": "ot",
62
- "60": "ut",
63
- "61": "▁re",
64
- "62": "▁on",
65
- "63": "ay",
66
- "64": "▁ha",
67
- "65": "ig",
68
- "66": "▁so",
69
- "67": "ct",
70
- "68": "▁he",
71
- "69": "▁for",
72
- "70": "ver",
73
- "71": "ke",
74
- "72": "ro",
75
- "73": "▁st",
76
- "74": "id",
77
- "75": "▁go",
78
- "76": "all",
79
- "77": "se",
80
- "78": "ly",
81
- "79": "▁u",
82
- "80": "ch",
83
- "81": "st",
84
- "82": "ld",
85
- "83": "▁k",
86
- "84": "ce",
87
- "85": "ur",
88
- "86": "▁li",
89
- "87": "am",
90
- "88": "▁r",
91
- "89": "ht",
92
- "90": "▁j",
93
- "91": "ith",
94
- "92": "▁se",
95
- "93": "ir",
96
- "94": "▁as",
97
- "95": "▁an",
98
- "96": "im",
99
- "97": "▁do",
100
- "98": "ad",
101
- "99": "▁was",
102
- "100": "ight",
103
- "101": "th",
104
- "102": "▁are",
105
- "103": "▁but",
106
- "104": "▁sh",
107
- "105": "ust",
108
- "106": "ally",
109
- "107": "▁not",
110
- "108": "▁or",
111
- "109": "▁com",
112
- "110": "▁can",
113
- "111": "▁me",
114
- "112": "op",
115
- "113": "▁mo",
116
- "114": "▁at",
117
- "115": "ill",
118
- "116": "▁ch",
119
- "117": "▁ne",
120
- "118": "ant",
121
- "119": "▁de",
122
- "120": "▁kn",
123
- "121": "▁one",
124
- "122": "il",
125
- "123": "ol",
126
- "124": "▁con",
127
- "125": "ter",
128
- "126": "▁ab",
129
- "127": "▁fr",
130
- "128": "ere",
131
- "129": "ck",
132
- "130": "▁al",
133
- "131": "▁all",
134
- "132": "qu",
135
- "133": "▁pro",
136
- "134": "▁som",
137
- "135": "ould",
138
- "136": "▁tw",
139
- "137": "ul",
140
- "138": "ra",
141
- "139": "od",
142
- "140": "ers",
143
- "141": "▁su",
144
- "142": "ive",
145
- "143": "▁v",
146
- "144": "use",
147
- "145": "ate",
148
- "146": "ge",
149
- "147": "if",
150
- "148": "▁ex",
151
- "149": "ess",
152
- "150": "pp",
153
- "151": "▁lo",
154
- "152": "out",
155
- "153": "▁if",
156
- "154": "est",
157
- "155": "ain",
158
- "156": "ist",
159
- "157": "and",
160
- "158": "ea",
161
- "159": "very",
162
- "160": "art",
163
- "161": "▁wor",
164
- "162": "▁my",
165
- "163": "ab",
166
- "164": "ment",
167
- "165": "▁bec",
168
- "166": "un",
169
- "167": "ity",
170
- "168": "ri",
171
- "169": "pe",
172
- "170": "ions",
173
- "171": "▁by",
174
- "172": "ok",
175
- "173": "our",
176
- "174": "ort",
177
- "175": "ind",
178
- "176": "ink",
179
- "177": "nt",
180
- "178": "▁up",
181
- "179": "um",
182
- "180": "▁don",
183
- "181": "▁get",
184
- "182": "red",
185
- "183": "▁out",
186
- "184": "el",
187
- "185": "ause",
188
- "186": "res",
189
- "187": "▁ma",
190
- "188": "ich",
191
- "189": "▁us",
192
- "190": "rou",
193
- "191": "▁int",
194
- "192": "em",
195
- "193": "os",
196
- "194": "ies",
197
- "195": "ie",
198
- "196": "▁pl",
199
- "197": "▁tr",
200
- "198": "ven",
201
- "199": "ous",
202
- "200": "▁le",
203
- "201": "▁two",
204
- "202": "ard",
205
- "203": "ine",
206
- "204": "▁co",
207
- "205": "een",
208
- "206": "▁now",
209
- "207": "ty",
210
- "208": "her",
211
- "209": "ack",
212
- "210": "▁pe",
213
- "211": "ame",
214
- "212": "▁how",
215
- "213": "▁who",
216
- "214": "▁see",
217
- "215": "▁tim",
218
- "216": "ect",
219
- "217": "ast",
220
- "218": "▁our",
221
- "219": "ci",
222
- "220": "ree",
223
- "221": "ople",
224
- "222": "gh",
225
- "223": "▁no",
226
- "224": "▁had",
227
- "225": "▁man",
228
- "226": "▁qu",
229
- "227": "▁en",
230
- "228": "ide",
231
- "229": "ure",
232
- "230": "ud",
233
- "231": "so",
234
- "232": "▁his",
235
- "233": "▁sa",
236
- "234": "▁sp",
237
- "235": "▁say",
238
- "236": "ose",
239
- "237": "ther",
240
- "238": "▁act",
241
- "239": "▁ta",
242
- "240": "▁cl",
243
- "241": "ings",
244
- "242": "pt",
245
- "243": "king",
246
- "244": "▁any",
247
- "245": "▁has",
248
- "246": "▁un",
249
- "247": "iv",
250
- "248": "▁im",
251
- "249": "▁ag",
252
- "250": "▁te",
253
- "251": "▁fe",
254
- "252": "one",
255
- "253": "per",
256
- "254": "ong",
257
- "255": "▁po",
258
- "256": "▁ad",
259
- "257": "ff",
260
- "258": "ore",
261
- "259": "itt",
262
- "260": "ans",
263
- "261": "iz",
264
- "262": "eah",
265
- "263": "reat",
266
- "264": "act",
267
- "265": "own",
268
- "266": "hing",
269
- "267": "enty",
270
- "268": "age",
271
- "269": "ber",
272
- "270": "ice",
273
- "271": "▁am",
274
- "272": "ple",
275
- "273": "are",
276
- "274": "▁per",
277
- "275": "und",
278
- "276": "ite",
279
- "277": "ix",
280
- "278": "pl",
281
- "279": "▁way",
282
- "280": "▁did",
283
- "281": "▁pr",
284
- "282": "▁got",
285
- "283": "ars",
286
- "284": "▁she",
287
- "285": "▁let",
288
- "286": "ag",
289
- "287": "▁ac",
290
- "288": "int",
291
- "289": "▁ar",
292
- "290": "ry",
293
- "291": "ign",
294
- "292": "ish",
295
- "293": "���fir",
296
- "294": "ace",
297
- "295": "ble",
298
- "296": "og",
299
- "297": "ue",
300
- "298": "▁ye",
301
- "299": "ap",
302
- "300": "iff",
303
- "301": "▁ro",
304
- "302": "▁her",
305
- "303": "nder",
306
- "304": "▁ok",
307
- "305": "▁res",
308
- "306": "▁gu",
309
- "307": "ence",
310
- "308": "▁may",
311
- "309": "ated",
312
- "310": "ip",
313
- "311": "▁bo",
314
- "312": "▁him",
315
- "313": "way",
316
- "314": "ac",
317
- "315": "ical",
318
- "316": "ass",
319
- "317": "ase",
320
- "318": "▁dis",
321
- "319": "able",
322
- "320": "ick",
323
- "321": "▁app",
324
- "322": "ance",
325
- "323": "▁pre",
326
- "324": "▁six",
327
- "325": "▁off",
328
- "326": "▁new",
329
- "327": "ia",
330
- "328": "orm",
331
- "329": "ank",
332
- "330": "▁lot",
333
- "331": "ach",
334
- "332": "▁fo",
335
- "333": "inet",
336
- "334": "ire",
337
- "335": "ary",
338
- "336": "ult",
339
- "337": "▁tal",
340
- "338": "▁mu",
341
- "339": "▁bl",
342
- "340": "ount",
343
- "341": "sel",
344
- "342": "vel",
345
- "343": "▁br",
346
- "344": "▁imp",
347
- "345": "ep",
348
- "346": "cess",
349
- "347": "ord",
350
- "348": "▁sc",
351
- "349": "▁inc",
352
- "350": "ound",
353
- "351": "ang",
354
- "352": "be",
355
- "353": "ress",
356
- "354": "uct",
357
- "355": "▁ind",
358
- "356": "▁af",
359
- "357": "ving",
360
- "358": "▁oh",
361
- "359": "▁bet",
362
- "360": "▁use",
363
- "361": "ome",
364
- "362": "ens",
365
- "363": "ys",
366
- "364": "▁bu",
367
- "365": "co",
368
- "366": "ory",
369
- "367": "ater",
370
- "368": "ild",
371
- "369": "ght",
372
- "370": "ial",
373
- "371": "▁day",
374
- "372": "ning",
375
- "373": "na",
376
- "374": "ile",
377
- "375": "▁spe",
378
- "376": "▁mar",
379
- "377": "ody",
380
- "378": "ough",
381
- "379": "ade",
382
- "380": "vers",
383
- "381": "xt",
384
- "382": "▁fl",
385
- "383": "▁ke",
386
- "384": "ian",
387
- "385": "▁sy",
388
- "386": "▁put",
389
- "387": "fore",
390
- "388": "ub",
391
- "389": "▁ph",
392
- "390": "fe",
393
- "391": "▁em",
394
- "392": "▁ser",
395
- "393": "form",
396
- "394": "ting",
397
- "395": "te",
398
- "396": "av",
399
- "397": "ious",
400
- "398": "▁rec",
401
- "399": "ks",
402
- "400": "▁gr",
403
- "401": "ces",
404
- "402": "wn",
405
- "403": "ors",
406
- "404": "▁jo",
407
- "405": "ents",
408
- "406": "▁des",
409
- "407": "▁try",
410
- "408": "▁equ",
411
- "409": "▁z",
412
- "410": "▁rem",
413
- "411": "▁str",
414
- "412": "self",
415
- "413": "▁bit",
416
- "414": "ph",
417
- "415": "ved",
418
- "416": "▁why",
419
- "417": "▁bas",
420
- "418": "▁hel",
421
- "419": "▁rel",
422
- "420": "ath",
423
- "421": "ject",
424
- "422": "ail",
425
- "423": "▁la",
426
- "424": "ual",
427
- "425": "▁god",
428
- "426": "▁nat",
429
- "427": "erm",
430
- "428": "day",
431
- "429": "▁id",
432
- "430": "ft",
433
- "431": "▁wr",
434
- "432": "▁min",
435
- "433": "ates",
436
- "434": "▁gen",
437
- "435": "tain",
438
- "436": "▁ob",
439
- "437": "ull",
440
- "438": "ict",
441
- "439": "▁tra",
442
- "440": "▁end",
443
- "441": "▁hig",
444
- "442": "▁fif",
445
- "443": "oth",
446
- "444": "tern",
447
- "445": "▁its",
448
- "446": "vent",
449
- "447": "▁sm",
450
- "448": "ons",
451
- "449": "▁add",
452
- "450": "iss",
453
- "451": "▁bel",
454
- "452": "ful",
455
- "453": "get",
456
- "454": "▁ele",
457
- "455": "▁rep",
458
- "456": "ak",
459
- "457": "▁ho",
460
- "458": "▁pos",
461
- "459": "▁num",
462
- "460": "ange",
463
- "461": "ves",
464
- "462": "ific",
465
- "463": "urn",
466
- "464": "ise",
467
- "465": "▁cr",
468
- "466": "▁um",
469
- "467": "ward",
470
- "468": "▁reg",
471
- "469": "ady",
472
- "470": "ower",
473
- "471": "uc",
474
- "472": "▁dec",
475
- "473": "lic",
476
- "474": "▁set",
477
- "475": "▁gon",
478
- "476": "▁op",
479
- "477": "▁ear",
480
- "478": "▁sub",
481
- "479": "▁sl",
482
- "480": "les",
483
- "481": "stem",
484
- "482": "cial",
485
- "483": "olog",
486
- "484": "atch",
487
- "485": "ily",
488
- "486": "body",
489
- "487": "nds",
490
- "488": "ular",
491
- "489": "ren",
492
- "490": "▁own",
493
- "491": "▁too",
494
- "492": "cent",
495
- "493": "ible",
496
- "494": "pect",
497
- "495": "ered",
498
- "496": "ways",
499
- "497": "teen",
500
- "498": "▁uh",
501
- "499": "▁big",
502
- "500": "▁mod",
503
- "501": "▁att",
504
- "502": "▁car",
505
- "503": "gr",
506
- "504": "▁acc",
507
- "505": "ied",
508
- "506": "mun",
509
- "507": "ib",
510
- "508": "▁mon",
511
- "509": "▁sch",
512
- "510": "▁pol",
513
- "511": "▁dat",
514
- "512": "▁fin",
515
- "513": "▁sim",
516
- "514": "▁inv",
517
- "515": "▁def",
518
- "516": "ked",
519
- "517": "▁ent",
520
- "518": "▁yes",
521
- "519": "ows",
522
- "520": "ics",
523
- "521": "ited",
524
- "522": "ute",
525
- "523": "ism",
526
- "524": "ps",
527
- "525": "▁ed",
528
- "526": "▁el",
529
- "527": "ably",
530
- "528": "ppen",
531
- "529": "als",
532
- "530": "▁ten",
533
- "531": "ract",
534
- "532": "ss",
535
- "533": "▁ass",
536
- "534": "▁met",
537
- "535": "gan",
538
- "536": "▁eng",
539
- "537": "▁stu",
540
- "538": "ween",
541
- "539": "arch",
542
- "540": "▁gl",
543
- "541": "▁cor",
544
- "542": "▁dr",
545
- "543": "vern",
546
- "544": "▁ty",
547
- "545": "▁run",
548
- "546": "hip",
549
- "547": "cus",
550
- "548": "cond",
551
- "549": "▁ins",
552
- "550": "irty",
553
- "551": "▁pub",
554
- "552": "lud",
555
- "553": "llow",
556
- "554": "▁cou",
557
- "555": "ew",
558
- "556": "iew",
559
- "557": "▁sur",
560
- "558": "ero",
561
- "559": "ood",
562
- "560": "ness",
563
- "561": "▁fun",
564
- "562": "▁eff",
565
- "563": "cept",
566
- "564": "▁ca",
567
- "565": "▁exp",
568
- "566": "duct",
569
- "567": "▁sw",
570
- "568": "ize",
571
- "569": "ope",
572
- "570": "▁par",
573
- "571": "kes",
574
- "572": "cy",
575
- "573": "▁ev",
576
- "574": "▁ref",
577
- "575": "ell",
578
- "576": "▁bus",
579
- "577": "ug",
580
- "578": "rib",
581
- "579": "▁cur",
582
- "580": "mo",
583
- "581": "ock",
584
- "582": "ures",
585
- "583": "air",
586
- "584": "▁war",
587
- "585": "str",
588
- "586": "▁med",
589
- "587": "▁wa",
590
- "588": "▁val",
591
- "589": "▁sin",
592
- "590": "blem",
593
- "591": "▁fam",
594
- "592": "li",
595
- "593": "▁far",
596
- "594": "▁cle",
597
- "595": "▁col",
598
- "596": "mon",
599
- "597": "▁gra",
600
- "598": "led",
601
- "599": "ense",
602
- "600": "tin",
603
- "601": "ues",
604
- "602": "its",
605
- "603": "▁mem",
606
- "604": "▁inf",
607
- "605": "▁eas",
608
- "606": "ideo",
609
- "607": "▁top",
610
- "608": "io",
611
- "609": "pan",
612
- "610": "▁hum",
613
- "611": "▁old",
614
- "612": "ead",
615
- "613": "▁ord",
616
- "614": "ric",
617
- "615": "ants",
618
- "616": "oy",
619
- "617": "esn",
620
- "618": "uck",
621
- "619": "ason",
622
- "620": "ced",
623
- "621": "ool",
624
- "622": "rat",
625
- "623": "ouse",
626
- "624": "▁lar",
627
- "625": "▁art",
628
- "626": "▁wee",
629
- "627": "▁cer",
630
- "628": "ized",
631
- "629": "▁mat",
632
- "630": "con",
633
- "631": "erg",
634
- "632": "land",
635
- "633": "ines",
636
- "634": "▁chr",
637
- "635": "▁aut",
638
- "636": "▁lea",
639
- "637": "▁sou",
640
- "638": "oney",
641
- "639": "tty",
642
- "640": "▁ple",
643
- "641": "ulat",
644
- "642": "oks",
645
- "643": "▁few",
646
- "644": "▁sol",
647
- "645": "▁che",
648
- "646": "chn",
649
- "647": "ird",
650
- "648": "▁bre",
651
- "649": "▁dur",
652
- "650": "▁wom",
653
- "651": "me",
654
- "652": "izat",
655
- "653": "eric",
656
- "654": "ote",
657
- "655": "▁uni",
658
- "656": "eren",
659
- "657": "arn",
660
- "658": "ross",
661
- "659": "ices",
662
- "660": "ten",
663
- "661": "eral",
664
- "662": "ever",
665
- "663": "ieve",
666
- "664": "lish",
667
- "665": "ash",
668
- "666": "▁opp",
669
- "667": "alth",
670
- "668": "ger",
671
- "669": "▁sk",
672
- "670": "▁red",
673
- "671": "peri",
674
- "672": "▁det",
675
- "673": "▁ext",
676
- "674": "ner",
677
- "675": "ah",
678
- "676": "▁var",
679
- "677": "▁loc",
680
- "678": "gram",
681
- "679": "ists",
682
- "680": "ives",
683
- "681": "▁es",
684
- "682": "▁nor",
685
- "683": "tro",
686
- "684": "ale",
687
- "685": "▁iss",
688
- "686": "▁pri",
689
- "687": "gin",
690
- "688": "az",
691
- "689": "oc",
692
- "690": "▁pop",
693
- "691": "ern",
694
- "692": "▁sit",
695
- "693": "ket",
696
- "694": "▁pa",
697
- "695": "▁law",
698
- "696": "ages",
699
- "697": "br",
700
- "698": "▁cam",
701
- "699": "▁mom",
702
- "700": "osed",
703
- "701": "▁bro",
704
- "702": "ne",
705
- "703": "bs",
706
- "704": "▁cre",
707
- "705": "erat",
708
- "706": "▁sec",
709
- "707": "▁cap",
710
- "708": "▁vis",
711
- "709": "▁pat",
712
- "710": "ield",
713
- "711": "iet",
714
- "712": "▁tri",
715
- "713": "up",
716
- "714": "▁bra",
717
- "715": "ts",
718
- "716": "▁mot",
719
- "717": "▁unt",
720
- "718": "put",
721
- "719": "bo",
722
- "720": "ork",
723
- "721": "mer",
724
- "722": "ital",
725
- "723": "▁air",
726
- "724": "ined",
727
- "725": "▁beh",
728
- "726": "▁adv",
729
- "727": "▁ret",
730
- "728": "imes",
731
- "729": "▁tea",
732
- "730": "ural",
733
- "731": "sid",
734
- "732": "ters",
735
- "733": "▁pur",
736
- "734": "▁sci",
737
- "735": "bers",
738
- "736": "ient",
739
- "737": "ier",
740
- "738": "cc",
741
- "739": "sw",
742
- "740": "▁av",
743
- "741": "reen",
744
- "742": "ode",
745
- "743": "ont",
746
- "744": "▁dra",
747
- "745": "ann",
748
- "746": "nect",
749
- "747": "▁x",
750
- "748": "▁eu",
751
- "749": "ton",
752
- "750": "inat",
753
- "751": "ene",
754
- "752": "ared",
755
- "753": "els",
756
- "754": "▁mor",
757
- "755": "▁rat",
758
- "756": "cri",
759
- "757": "▁men",
760
- "758": "▁ah",
761
- "759": "ames",
762
- "760": "▁arm",
763
- "761": "eak",
764
- "762": "▁pay",
765
- "763": "▁hal",
766
- "764": "ins",
767
- "765": "ilit",
768
- "766": "stit",
769
- "767": "▁ra",
770
- "768": "▁leg",
771
- "769": "cl",
772
- "770": "pr",
773
- "771": "▁wal",
774
- "772": "▁bad",
775
- "773": "▁ge",
776
- "774": "roup",
777
- "775": "▁mus",
778
- "776": "man",
779
- "777": "▁gi",
780
- "778": "eds",
781
- "779": "▁aw",
782
- "780": "po",
783
- "781": "ark",
784
- "782": "row",
785
- "783": "▁dep",
786
- "784": "ully",
787
- "785": "ral",
788
- "786": "lect",
789
- "787": "pend",
790
- "788": "▁sev",
791
- "789": "ime",
792
- "790": "gest",
793
- "791": "here",
794
- "792": "▁yet",
795
- "793": "ted",
796
- "794": "▁rev",
797
- "795": "ds",
798
- "796": "▁ask",
799
- "797": "less",
800
- "798": "▁di",
801
- "799": "ets",
802
- "800": "line",
803
- "801": "▁aff",
804
- "802": "ired",
805
- "803": "▁est",
806
- "804": "ken",
807
- "805": "vid",
808
- "806": "most",
809
- "807": "ivid",
810
- "808": "unch",
811
- "809": "par",
812
- "810": "med",
813
- "811": "rop",
814
- "812": "ased",
815
- "813": "eone",
816
- "814": "▁ve",
817
- "815": "▁abs",
818
- "816": "ergy",
819
- "817": "ret",
820
- "818": "▁saw",
821
- "819": "▁ey",
822
- "820": "▁cal",
823
- "821": "uat",
824
- "822": "▁mid",
825
- "823": "vat",
826
- "824": "ream",
827
- "825": "vice",
828
- "826": "ians",
829
- "827": "rent",
830
- "828": "ctor",
831
- "829": "err",
832
- "830": "ush",
833
- "831": "ases",
834
- "832": "▁suc",
835
- "833": "erms",
836
- "834": "ave",
837
- "835": "angu",
838
- "836": "ries",
839
- "837": "▁wo",
840
- "838": "arts",
841
- "839": "▁fil",
842
- "840": "▁fat",
843
- "841": "▁cho",
844
- "842": "orts",
845
- "843": "▁fre",
846
- "844": "ee",
847
- "845": "ught",
848
- "846": "eng",
849
- "847": "ump",
850
- "848": "▁bar",
851
- "849": "ying",
852
- "850": "ane",
853
- "851": "▁tem",
854
- "852": "anks",
855
- "853": "ury",
856
- "854": "iat",
857
- "855": "mit",
858
- "856": "trol",
859
- "857": "▁net",
860
- "858": "▁maj",
861
- "859": "▁cra",
862
- "860": "ling",
863
- "861": "▁fig",
864
- "862": "orn",
865
- "863": "icat",
866
- "864": "pany",
867
- "865": "▁occ",
868
- "866": "ott",
869
- "867": "ands",
870
- "868": "▁exc",
871
- "869": "▁mr",
872
- "870": "ency",
873
- "871": "rope",
874
- "872": "itch",
875
- "873": "▁lit",
876
- "874": "abil",
877
- "875": "not",
878
- "876": "ma",
879
- "877": "▁typ",
880
- "878": "▁opt",
881
- "879": "ob",
882
- "880": "ser",
883
- "881": "ety",
884
- "882": "ms",
885
- "883": "peci",
886
- "884": "aces",
887
- "885": "aut",
888
- "886": "▁hon",
889
- "887": "cuss",
890
- "888": "▁sal",
891
- "889": "▁sor",
892
- "890": "att",
893
- "891": "▁lab",
894
- "892": "▁har",
895
- "893": "urch",
896
- "894": "nded",
897
- "895": "uce",
898
- "896": "ids",
899
- "897": "▁hy",
900
- "898": "▁fut",
901
- "899": "▁ste",
902
- "900": "ours",
903
- "901": "ems",
904
- "902": "utes",
905
- "903": "ng",
906
- "904": "ta",
907
- "905": "▁won",
908
- "906": "▁fa",
909
- "907": "▁env",
910
- "908": "ards",
911
- "909": "▁job",
912
- "910": "ium",
913
- "911": "▁dot",
914
- "912": "▁obv",
915
- "913": "ina",
916
- "914": "side",
917
- "915": "elve",
918
- "916": "cu",
919
- "917": "▁jes",
920
- "918": "▁pot",
921
- "919": "▁pie",
922
- "920": "▁tre",
923
- "921": "▁hey",
924
- "922": "▁mag",
925
- "923": "ron",
926
- "924": "▁key",
927
- "925": "swer",
928
- "926": "▁win",
929
- "927": "ucat",
930
- "928": "work",
931
- "929": "ides",
932
- "930": "▁low",
933
- "931": "▁vol",
934
- "932": "▁oth",
935
- "933": "atic",
936
- "934": "lf",
937
- "935": "ads",
938
- "936": "inds",
939
- "937": "com",
940
- "938": "ths",
941
- "939": "▁ver",
942
- "940": "ised",
943
- "941": "lo",
944
- "942": "▁squ",
945
- "943": "▁cut",
946
- "944": "oked",
947
- "945": "irit",
948
- "946": "ateg",
949
- "947": "ppy",
950
- "948": "mitt",
951
- "949": "come",
952
- "950": "hn",
953
- "951": "igin",
954
- "952": "mand",
955
- "953": "▁dam",
956
- "954": "ho",
957
- "955": "▁da",
958
- "956": "▁fur",
959
- "957": "iron",
960
- "958": "ilar",
961
- "959": "▁fac",
962
- "960": "▁neg",
963
- "961": "▁ago",
964
- "962": "ged",
965
- "963": "miss",
966
- "964": "enth",
967
- "965": "▁dou",
968
- "966": "▁hit",
969
- "967": "▁guy",
970
- "968": "▁bi",
971
- "969": "ove",
972
- "970": "fess",
973
- "971": "ples",
974
- "972": "owed",
975
- "973": "ured",
976
- "974": "▁ris",
977
- "975": "ints",
978
- "976": "rew",
979
- "977": "▁sum",
980
- "978": "▁hu",
981
- "979": "ploy",
982
- "980": "ude",
983
- "981": "ried",
984
- "982": "▁cir",
985
- "983": "▁dev",
986
- "984": "ear",
987
- "985": "▁tot",
988
- "986": "▁ann",
989
- "987": "duc",
990
- "988": "ik",
991
- "989": "pon",
992
- "990": "sted",
993
- "991": "▁ide",
994
- "992": "▁'",
995
- "993": "ipp",
996
- "994": "▁eat",
997
- "995": "▁dom",
998
- "996": "▁",
999
- "997": "e",
1000
- "998": "t",
1001
- "999": "o",
1002
- "1000": "a",
1003
- "1001": "i",
1004
- "1002": "n",
1005
- "1003": "s",
1006
- "1004": "r",
1007
- "1005": "h",
1008
- "1006": "l",
1009
- "1007": "d",
1010
- "1008": "u",
1011
- "1009": "c",
1012
- "1010": "m",
1013
- "1011": "y",
1014
- "1012": "g",
1015
- "1013": "w",
1016
- "1014": "f",
1017
- "1015": "p",
1018
- "1016": "b",
1019
- "1017": "v",
1020
- "1018": "k",
1021
- "1019": "'",
1022
- "1020": "j",
1023
- "1021": "x",
1024
- "1022": "q",
1025
- "1023": "z",
1026
- "1024": "<EOU>",
1027
- "1025": "<EOB>"
1028
- }
 
1
+ [
2
+ "<unk>",
3
+ "\u2581t",
4
+ "\u2581th",
5
+ "\u2581a",
6
+ "\u2581i",
7
+ "\u2581the",
8
+ "\u2581s",
9
+ "re",
10
+ "\u2581w",
11
+ "\u2581o",
12
+ "in",
13
+ "at",
14
+ "er",
15
+ "nd",
16
+ "ou",
17
+ "\u2581c",
18
+ "\u2581b",
19
+ "\u2581h",
20
+ "en",
21
+ "on",
22
+ "\u2581m",
23
+ "\u2581f",
24
+ "ing",
25
+ "\u2581p",
26
+ "\u2581to",
27
+ "\u2581and",
28
+ "\u2581d",
29
+ "an",
30
+ "or",
31
+ "es",
32
+ "\u2581y",
33
+ "\u2581l",
34
+ "\u2581of",
35
+ "ll",
36
+ "\u2581in",
37
+ "ed",
38
+ "it",
39
+ "\u2581g",
40
+ "is",
41
+ "\u2581you",
42
+ "\u2581n",
43
+ "ar",
44
+ "om",
45
+ "as",
46
+ "ve",
47
+ "\u2581e",
48
+ "ic",
49
+ "\u2581it",
50
+ "al",
51
+ "us",
52
+ "\u2581wh",
53
+ "\u2581we",
54
+ "\u2581be",
55
+ "ion",
56
+ "ow",
57
+ "le",
58
+ "\u2581is",
59
+ "et",
60
+ "ent",
61
+ "ot",
62
+ "ut",
63
+ "\u2581re",
64
+ "\u2581on",
65
+ "ay",
66
+ "\u2581ha",
67
+ "ig",
68
+ "\u2581so",
69
+ "ct",
70
+ "\u2581he",
71
+ "\u2581for",
72
+ "ver",
73
+ "ke",
74
+ "ro",
75
+ "\u2581st",
76
+ "id",
77
+ "\u2581go",
78
+ "all",
79
+ "se",
80
+ "ly",
81
+ "\u2581u",
82
+ "ch",
83
+ "st",
84
+ "ld",
85
+ "\u2581k",
86
+ "ce",
87
+ "ur",
88
+ "\u2581li",
89
+ "am",
90
+ "\u2581r",
91
+ "ht",
92
+ "\u2581j",
93
+ "ith",
94
+ "\u2581se",
95
+ "ir",
96
+ "\u2581as",
97
+ "\u2581an",
98
+ "im",
99
+ "\u2581do",
100
+ "ad",
101
+ "\u2581was",
102
+ "ight",
103
+ "th",
104
+ "\u2581are",
105
+ "\u2581but",
106
+ "\u2581sh",
107
+ "ust",
108
+ "ally",
109
+ "\u2581not",
110
+ "\u2581or",
111
+ "\u2581com",
112
+ "\u2581can",
113
+ "\u2581me",
114
+ "op",
115
+ "\u2581mo",
116
+ "\u2581at",
117
+ "ill",
118
+ "\u2581ch",
119
+ "\u2581ne",
120
+ "ant",
121
+ "\u2581de",
122
+ "\u2581kn",
123
+ "\u2581one",
124
+ "il",
125
+ "ol",
126
+ "\u2581con",
127
+ "ter",
128
+ "\u2581ab",
129
+ "\u2581fr",
130
+ "ere",
131
+ "ck",
132
+ "\u2581al",
133
+ "\u2581all",
134
+ "qu",
135
+ "\u2581pro",
136
+ "\u2581som",
137
+ "ould",
138
+ "\u2581tw",
139
+ "ul",
140
+ "ra",
141
+ "od",
142
+ "ers",
143
+ "\u2581su",
144
+ "ive",
145
+ "\u2581v",
146
+ "use",
147
+ "ate",
148
+ "ge",
149
+ "if",
150
+ "\u2581ex",
151
+ "ess",
152
+ "pp",
153
+ "\u2581lo",
154
+ "out",
155
+ "\u2581if",
156
+ "est",
157
+ "ain",
158
+ "ist",
159
+ "and",
160
+ "ea",
161
+ "very",
162
+ "art",
163
+ "\u2581wor",
164
+ "\u2581my",
165
+ "ab",
166
+ "ment",
167
+ "\u2581bec",
168
+ "un",
169
+ "ity",
170
+ "ri",
171
+ "pe",
172
+ "ions",
173
+ "\u2581by",
174
+ "ok",
175
+ "our",
176
+ "ort",
177
+ "ind",
178
+ "ink",
179
+ "nt",
180
+ "\u2581up",
181
+ "um",
182
+ "\u2581don",
183
+ "\u2581get",
184
+ "red",
185
+ "\u2581out",
186
+ "el",
187
+ "ause",
188
+ "res",
189
+ "\u2581ma",
190
+ "ich",
191
+ "\u2581us",
192
+ "rou",
193
+ "\u2581int",
194
+ "em",
195
+ "os",
196
+ "ies",
197
+ "ie",
198
+ "\u2581pl",
199
+ "\u2581tr",
200
+ "ven",
201
+ "ous",
202
+ "\u2581le",
203
+ "\u2581two",
204
+ "ard",
205
+ "ine",
206
+ "\u2581co",
207
+ "een",
208
+ "\u2581now",
209
+ "ty",
210
+ "her",
211
+ "ack",
212
+ "\u2581pe",
213
+ "ame",
214
+ "\u2581how",
215
+ "\u2581who",
216
+ "\u2581see",
217
+ "\u2581tim",
218
+ "ect",
219
+ "ast",
220
+ "\u2581our",
221
+ "ci",
222
+ "ree",
223
+ "ople",
224
+ "gh",
225
+ "\u2581no",
226
+ "\u2581had",
227
+ "\u2581man",
228
+ "\u2581qu",
229
+ "\u2581en",
230
+ "ide",
231
+ "ure",
232
+ "ud",
233
+ "so",
234
+ "\u2581his",
235
+ "\u2581sa",
236
+ "\u2581sp",
237
+ "\u2581say",
238
+ "ose",
239
+ "ther",
240
+ "\u2581act",
241
+ "\u2581ta",
242
+ "\u2581cl",
243
+ "ings",
244
+ "pt",
245
+ "king",
246
+ "\u2581any",
247
+ "\u2581has",
248
+ "\u2581un",
249
+ "iv",
250
+ "\u2581im",
251
+ "\u2581ag",
252
+ "\u2581te",
253
+ "\u2581fe",
254
+ "one",
255
+ "per",
256
+ "ong",
257
+ "\u2581po",
258
+ "\u2581ad",
259
+ "ff",
260
+ "ore",
261
+ "itt",
262
+ "ans",
263
+ "iz",
264
+ "eah",
265
+ "reat",
266
+ "act",
267
+ "own",
268
+ "hing",
269
+ "enty",
270
+ "age",
271
+ "ber",
272
+ "ice",
273
+ "\u2581am",
274
+ "ple",
275
+ "are",
276
+ "\u2581per",
277
+ "und",
278
+ "ite",
279
+ "ix",
280
+ "pl",
281
+ "\u2581way",
282
+ "\u2581did",
283
+ "\u2581pr",
284
+ "\u2581got",
285
+ "ars",
286
+ "\u2581she",
287
+ "\u2581let",
288
+ "ag",
289
+ "\u2581ac",
290
+ "int",
291
+ "\u2581ar",
292
+ "ry",
293
+ "ign",
294
+ "ish",
295
+ "\u2581fir",
296
+ "ace",
297
+ "ble",
298
+ "og",
299
+ "ue",
300
+ "\u2581ye",
301
+ "ap",
302
+ "iff",
303
+ "\u2581ro",
304
+ "\u2581her",
305
+ "nder",
306
+ "\u2581ok",
307
+ "\u2581res",
308
+ "\u2581gu",
309
+ "ence",
310
+ "\u2581may",
311
+ "ated",
312
+ "ip",
313
+ "\u2581bo",
314
+ "\u2581him",
315
+ "way",
316
+ "ac",
317
+ "ical",
318
+ "ass",
319
+ "ase",
320
+ "\u2581dis",
321
+ "able",
322
+ "ick",
323
+ "\u2581app",
324
+ "ance",
325
+ "\u2581pre",
326
+ "\u2581six",
327
+ "\u2581off",
328
+ "\u2581new",
329
+ "ia",
330
+ "orm",
331
+ "ank",
332
+ "\u2581lot",
333
+ "ach",
334
+ "\u2581fo",
335
+ "inet",
336
+ "ire",
337
+ "ary",
338
+ "ult",
339
+ "\u2581tal",
340
+ "\u2581mu",
341
+ "\u2581bl",
342
+ "ount",
343
+ "sel",
344
+ "vel",
345
+ "\u2581br",
346
+ "\u2581imp",
347
+ "ep",
348
+ "cess",
349
+ "ord",
350
+ "\u2581sc",
351
+ "\u2581inc",
352
+ "ound",
353
+ "ang",
354
+ "be",
355
+ "ress",
356
+ "uct",
357
+ "\u2581ind",
358
+ "\u2581af",
359
+ "ving",
360
+ "\u2581oh",
361
+ "\u2581bet",
362
+ "\u2581use",
363
+ "ome",
364
+ "ens",
365
+ "ys",
366
+ "\u2581bu",
367
+ "co",
368
+ "ory",
369
+ "ater",
370
+ "ild",
371
+ "ght",
372
+ "ial",
373
+ "\u2581day",
374
+ "ning",
375
+ "na",
376
+ "ile",
377
+ "\u2581spe",
378
+ "\u2581mar",
379
+ "ody",
380
+ "ough",
381
+ "ade",
382
+ "vers",
383
+ "xt",
384
+ "\u2581fl",
385
+ "\u2581ke",
386
+ "ian",
387
+ "\u2581sy",
388
+ "\u2581put",
389
+ "fore",
390
+ "ub",
391
+ "\u2581ph",
392
+ "fe",
393
+ "\u2581em",
394
+ "\u2581ser",
395
+ "form",
396
+ "ting",
397
+ "te",
398
+ "av",
399
+ "ious",
400
+ "\u2581rec",
401
+ "ks",
402
+ "\u2581gr",
403
+ "ces",
404
+ "wn",
405
+ "ors",
406
+ "\u2581jo",
407
+ "ents",
408
+ "\u2581des",
409
+ "\u2581try",
410
+ "\u2581equ",
411
+ "\u2581z",
412
+ "\u2581rem",
413
+ "\u2581str",
414
+ "self",
415
+ "\u2581bit",
416
+ "ph",
417
+ "ved",
418
+ "\u2581why",
419
+ "\u2581bas",
420
+ "\u2581hel",
421
+ "\u2581rel",
422
+ "ath",
423
+ "ject",
424
+ "ail",
425
+ "\u2581la",
426
+ "ual",
427
+ "\u2581god",
428
+ "\u2581nat",
429
+ "erm",
430
+ "day",
431
+ "\u2581id",
432
+ "ft",
433
+ "\u2581wr",
434
+ "\u2581min",
435
+ "ates",
436
+ "\u2581gen",
437
+ "tain",
438
+ "\u2581ob",
439
+ "ull",
440
+ "ict",
441
+ "\u2581tra",
442
+ "\u2581end",
443
+ "\u2581hig",
444
+ "\u2581fif",
445
+ "oth",
446
+ "tern",
447
+ "\u2581its",
448
+ "vent",
449
+ "\u2581sm",
450
+ "ons",
451
+ "\u2581add",
452
+ "iss",
453
+ "\u2581bel",
454
+ "ful",
455
+ "get",
456
+ "\u2581ele",
457
+ "\u2581rep",
458
+ "ak",
459
+ "\u2581ho",
460
+ "\u2581pos",
461
+ "\u2581num",
462
+ "ange",
463
+ "ves",
464
+ "ific",
465
+ "urn",
466
+ "ise",
467
+ "\u2581cr",
468
+ "\u2581um",
469
+ "ward",
470
+ "\u2581reg",
471
+ "ady",
472
+ "ower",
473
+ "uc",
474
+ "\u2581dec",
475
+ "lic",
476
+ "\u2581set",
477
+ "\u2581gon",
478
+ "\u2581op",
479
+ "\u2581ear",
480
+ "\u2581sub",
481
+ "\u2581sl",
482
+ "les",
483
+ "stem",
484
+ "cial",
485
+ "olog",
486
+ "atch",
487
+ "ily",
488
+ "body",
489
+ "nds",
490
+ "ular",
491
+ "ren",
492
+ "\u2581own",
493
+ "\u2581too",
494
+ "cent",
495
+ "ible",
496
+ "pect",
497
+ "ered",
498
+ "ways",
499
+ "teen",
500
+ "\u2581uh",
501
+ "\u2581big",
502
+ "\u2581mod",
503
+ "\u2581att",
504
+ "\u2581car",
505
+ "gr",
506
+ "\u2581acc",
507
+ "ied",
508
+ "mun",
509
+ "ib",
510
+ "\u2581mon",
511
+ "\u2581sch",
512
+ "\u2581pol",
513
+ "\u2581dat",
514
+ "\u2581fin",
515
+ "\u2581sim",
516
+ "\u2581inv",
517
+ "\u2581def",
518
+ "ked",
519
+ "\u2581ent",
520
+ "\u2581yes",
521
+ "ows",
522
+ "ics",
523
+ "ited",
524
+ "ute",
525
+ "ism",
526
+ "ps",
527
+ "\u2581ed",
528
+ "\u2581el",
529
+ "ably",
530
+ "ppen",
531
+ "als",
532
+ "\u2581ten",
533
+ "ract",
534
+ "ss",
535
+ "\u2581ass",
536
+ "\u2581met",
537
+ "gan",
538
+ "\u2581eng",
539
+ "\u2581stu",
540
+ "ween",
541
+ "arch",
542
+ "\u2581gl",
543
+ "\u2581cor",
544
+ "\u2581dr",
545
+ "vern",
546
+ "\u2581ty",
547
+ "\u2581run",
548
+ "hip",
549
+ "cus",
550
+ "cond",
551
+ "\u2581ins",
552
+ "irty",
553
+ "\u2581pub",
554
+ "lud",
555
+ "llow",
556
+ "\u2581cou",
557
+ "ew",
558
+ "iew",
559
+ "\u2581sur",
560
+ "ero",
561
+ "ood",
562
+ "ness",
563
+ "\u2581fun",
564
+ "\u2581eff",
565
+ "cept",
566
+ "\u2581ca",
567
+ "\u2581exp",
568
+ "duct",
569
+ "\u2581sw",
570
+ "ize",
571
+ "ope",
572
+ "\u2581par",
573
+ "kes",
574
+ "cy",
575
+ "\u2581ev",
576
+ "\u2581ref",
577
+ "ell",
578
+ "\u2581bus",
579
+ "ug",
580
+ "rib",
581
+ "\u2581cur",
582
+ "mo",
583
+ "ock",
584
+ "ures",
585
+ "air",
586
+ "\u2581war",
587
+ "str",
588
+ "\u2581med",
589
+ "\u2581wa",
590
+ "\u2581val",
591
+ "\u2581sin",
592
+ "blem",
593
+ "\u2581fam",
594
+ "li",
595
+ "\u2581far",
596
+ "\u2581cle",
597
+ "\u2581col",
598
+ "mon",
599
+ "\u2581gra",
600
+ "led",
601
+ "ense",
602
+ "tin",
603
+ "ues",
604
+ "its",
605
+ "\u2581mem",
606
+ "\u2581inf",
607
+ "\u2581eas",
608
+ "ideo",
609
+ "\u2581top",
610
+ "io",
611
+ "pan",
612
+ "\u2581hum",
613
+ "\u2581old",
614
+ "ead",
615
+ "\u2581ord",
616
+ "ric",
617
+ "ants",
618
+ "oy",
619
+ "esn",
620
+ "uck",
621
+ "ason",
622
+ "ced",
623
+ "ool",
624
+ "rat",
625
+ "ouse",
626
+ "\u2581lar",
627
+ "\u2581art",
628
+ "\u2581wee",
629
+ "\u2581cer",
630
+ "ized",
631
+ "\u2581mat",
632
+ "con",
633
+ "erg",
634
+ "land",
635
+ "ines",
636
+ "\u2581chr",
637
+ "\u2581aut",
638
+ "\u2581lea",
639
+ "\u2581sou",
640
+ "oney",
641
+ "tty",
642
+ "\u2581ple",
643
+ "ulat",
644
+ "oks",
645
+ "\u2581few",
646
+ "\u2581sol",
647
+ "\u2581che",
648
+ "chn",
649
+ "ird",
650
+ "\u2581bre",
651
+ "\u2581dur",
652
+ "\u2581wom",
653
+ "me",
654
+ "izat",
655
+ "eric",
656
+ "ote",
657
+ "\u2581uni",
658
+ "eren",
659
+ "arn",
660
+ "ross",
661
+ "ices",
662
+ "ten",
663
+ "eral",
664
+ "ever",
665
+ "ieve",
666
+ "lish",
667
+ "ash",
668
+ "\u2581opp",
669
+ "alth",
670
+ "ger",
671
+ "\u2581sk",
672
+ "\u2581red",
673
+ "peri",
674
+ "\u2581det",
675
+ "\u2581ext",
676
+ "ner",
677
+ "ah",
678
+ "\u2581var",
679
+ "\u2581loc",
680
+ "gram",
681
+ "ists",
682
+ "ives",
683
+ "\u2581es",
684
+ "\u2581nor",
685
+ "tro",
686
+ "ale",
687
+ "\u2581iss",
688
+ "\u2581pri",
689
+ "gin",
690
+ "az",
691
+ "oc",
692
+ "\u2581pop",
693
+ "ern",
694
+ "\u2581sit",
695
+ "ket",
696
+ "\u2581pa",
697
+ "\u2581law",
698
+ "ages",
699
+ "br",
700
+ "\u2581cam",
701
+ "\u2581mom",
702
+ "osed",
703
+ "\u2581bro",
704
+ "ne",
705
+ "bs",
706
+ "\u2581cre",
707
+ "erat",
708
+ "\u2581sec",
709
+ "\u2581cap",
710
+ "\u2581vis",
711
+ "\u2581pat",
712
+ "ield",
713
+ "iet",
714
+ "\u2581tri",
715
+ "up",
716
+ "\u2581bra",
717
+ "ts",
718
+ "\u2581mot",
719
+ "\u2581unt",
720
+ "put",
721
+ "bo",
722
+ "ork",
723
+ "mer",
724
+ "ital",
725
+ "\u2581air",
726
+ "ined",
727
+ "\u2581beh",
728
+ "\u2581adv",
729
+ "\u2581ret",
730
+ "imes",
731
+ "\u2581tea",
732
+ "ural",
733
+ "sid",
734
+ "ters",
735
+ "\u2581pur",
736
+ "\u2581sci",
737
+ "bers",
738
+ "ient",
739
+ "ier",
740
+ "cc",
741
+ "sw",
742
+ "\u2581av",
743
+ "reen",
744
+ "ode",
745
+ "ont",
746
+ "\u2581dra",
747
+ "ann",
748
+ "nect",
749
+ "\u2581x",
750
+ "\u2581eu",
751
+ "ton",
752
+ "inat",
753
+ "ene",
754
+ "ared",
755
+ "els",
756
+ "\u2581mor",
757
+ "\u2581rat",
758
+ "cri",
759
+ "\u2581men",
760
+ "\u2581ah",
761
+ "ames",
762
+ "\u2581arm",
763
+ "eak",
764
+ "\u2581pay",
765
+ "\u2581hal",
766
+ "ins",
767
+ "ilit",
768
+ "stit",
769
+ "\u2581ra",
770
+ "\u2581leg",
771
+ "cl",
772
+ "pr",
773
+ "\u2581wal",
774
+ "\u2581bad",
775
+ "\u2581ge",
776
+ "roup",
777
+ "\u2581mus",
778
+ "man",
779
+ "\u2581gi",
780
+ "eds",
781
+ "\u2581aw",
782
+ "po",
783
+ "ark",
784
+ "row",
785
+ "\u2581dep",
786
+ "ully",
787
+ "ral",
788
+ "lect",
789
+ "pend",
790
+ "\u2581sev",
791
+ "ime",
792
+ "gest",
793
+ "here",
794
+ "\u2581yet",
795
+ "ted",
796
+ "\u2581rev",
797
+ "ds",
798
+ "\u2581ask",
799
+ "less",
800
+ "\u2581di",
801
+ "ets",
802
+ "line",
803
+ "\u2581aff",
804
+ "ired",
805
+ "\u2581est",
806
+ "ken",
807
+ "vid",
808
+ "most",
809
+ "ivid",
810
+ "unch",
811
+ "par",
812
+ "med",
813
+ "rop",
814
+ "ased",
815
+ "eone",
816
+ "\u2581ve",
817
+ "\u2581abs",
818
+ "ergy",
819
+ "ret",
820
+ "\u2581saw",
821
+ "\u2581ey",
822
+ "\u2581cal",
823
+ "uat",
824
+ "\u2581mid",
825
+ "vat",
826
+ "ream",
827
+ "vice",
828
+ "ians",
829
+ "rent",
830
+ "ctor",
831
+ "err",
832
+ "ush",
833
+ "ases",
834
+ "\u2581suc",
835
+ "erms",
836
+ "ave",
837
+ "angu",
838
+ "ries",
839
+ "\u2581wo",
840
+ "arts",
841
+ "\u2581fil",
842
+ "\u2581fat",
843
+ "\u2581cho",
844
+ "orts",
845
+ "\u2581fre",
846
+ "ee",
847
+ "ught",
848
+ "eng",
849
+ "ump",
850
+ "\u2581bar",
851
+ "ying",
852
+ "ane",
853
+ "\u2581tem",
854
+ "anks",
855
+ "ury",
856
+ "iat",
857
+ "mit",
858
+ "trol",
859
+ "\u2581net",
860
+ "\u2581maj",
861
+ "\u2581cra",
862
+ "ling",
863
+ "\u2581fig",
864
+ "orn",
865
+ "icat",
866
+ "pany",
867
+ "\u2581occ",
868
+ "ott",
869
+ "ands",
870
+ "\u2581exc",
871
+ "\u2581mr",
872
+ "ency",
873
+ "rope",
874
+ "itch",
875
+ "\u2581lit",
876
+ "abil",
877
+ "not",
878
+ "ma",
879
+ "\u2581typ",
880
+ "\u2581opt",
881
+ "ob",
882
+ "ser",
883
+ "ety",
884
+ "ms",
885
+ "peci",
886
+ "aces",
887
+ "aut",
888
+ "\u2581hon",
889
+ "cuss",
890
+ "\u2581sal",
891
+ "\u2581sor",
892
+ "att",
893
+ "\u2581lab",
894
+ "\u2581har",
895
+ "urch",
896
+ "nded",
897
+ "uce",
898
+ "ids",
899
+ "\u2581hy",
900
+ "\u2581fut",
901
+ "\u2581ste",
902
+ "ours",
903
+ "ems",
904
+ "utes",
905
+ "ng",
906
+ "ta",
907
+ "\u2581won",
908
+ "\u2581fa",
909
+ "\u2581env",
910
+ "ards",
911
+ "\u2581job",
912
+ "ium",
913
+ "\u2581dot",
914
+ "\u2581obv",
915
+ "ina",
916
+ "side",
917
+ "elve",
918
+ "cu",
919
+ "\u2581jes",
920
+ "\u2581pot",
921
+ "\u2581pie",
922
+ "\u2581tre",
923
+ "\u2581hey",
924
+ "\u2581mag",
925
+ "ron",
926
+ "\u2581key",
927
+ "swer",
928
+ "\u2581win",
929
+ "ucat",
930
+ "work",
931
+ "ides",
932
+ "\u2581low",
933
+ "\u2581vol",
934
+ "\u2581oth",
935
+ "atic",
936
+ "lf",
937
+ "ads",
938
+ "inds",
939
+ "com",
940
+ "ths",
941
+ "\u2581ver",
942
+ "ised",
943
+ "lo",
944
+ "\u2581squ",
945
+ "\u2581cut",
946
+ "oked",
947
+ "irit",
948
+ "ateg",
949
+ "ppy",
950
+ "mitt",
951
+ "come",
952
+ "hn",
953
+ "igin",
954
+ "mand",
955
+ "\u2581dam",
956
+ "ho",
957
+ "\u2581da",
958
+ "\u2581fur",
959
+ "iron",
960
+ "ilar",
961
+ "\u2581fac",
962
+ "\u2581neg",
963
+ "\u2581ago",
964
+ "ged",
965
+ "miss",
966
+ "enth",
967
+ "\u2581dou",
968
+ "\u2581hit",
969
+ "\u2581guy",
970
+ "\u2581bi",
971
+ "ove",
972
+ "fess",
973
+ "ples",
974
+ "owed",
975
+ "ured",
976
+ "\u2581ris",
977
+ "ints",
978
+ "rew",
979
+ "\u2581sum",
980
+ "\u2581hu",
981
+ "ploy",
982
+ "ude",
983
+ "ried",
984
+ "\u2581cir",
985
+ "\u2581dev",
986
+ "ear",
987
+ "\u2581tot",
988
+ "\u2581ann",
989
+ "duc",
990
+ "ik",
991
+ "pon",
992
+ "sted",
993
+ "\u2581ide",
994
+ "\u2581'",
995
+ "ipp",
996
+ "\u2581eat",
997
+ "\u2581dom",
998
+ "\u2581",
999
+ "e",
1000
+ "t",
1001
+ "o",
1002
+ "a",
1003
+ "i",
1004
+ "n",
1005
+ "s",
1006
+ "r",
1007
+ "h",
1008
+ "l",
1009
+ "d",
1010
+ "u",
1011
+ "c",
1012
+ "m",
1013
+ "y",
1014
+ "g",
1015
+ "w",
1016
+ "f",
1017
+ "p",
1018
+ "b",
1019
+ "v",
1020
+ "k",
1021
+ "'",
1022
+ "j",
1023
+ "x",
1024
+ "q",
1025
+ "z",
1026
+ "<EOU>",
1027
+ "<EOB>"
1028
+ ]