Upload 39 files
Browse files- final_scripts/export_decoder_joint.py +188 -0
- final_scripts/export_encoder.py +149 -0
- final_scripts/export_preprocessor.py +95 -0
- final_scripts/inference_benchmark.py +847 -0
- final_scripts/inference_reference_nemo.py +238 -0
- parakeet_decoder.mlmodelc/analytics/coremldata.bin +3 -0
- parakeet_decoder.mlmodelc/coremldata.bin +3 -0
- parakeet_decoder.mlmodelc/metadata.json +116 -0
- parakeet_decoder.mlmodelc/model.mil +47 -0
- parakeet_decoder.mlmodelc/weights/weight.bin +3 -0
- parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- parakeet_decoder.mlpackage/Manifest.json +18 -0
- parakeet_joint.mlmodelc/analytics/coremldata.bin +3 -0
- parakeet_joint.mlmodelc/coremldata.bin +3 -0
- parakeet_joint.mlmodelc/metadata.json +74 -0
- parakeet_joint.mlmodelc/model.mil +23 -0
- parakeet_joint.mlmodelc/weights/weight.bin +3 -0
- parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- parakeet_joint.mlpackage/Manifest.json +18 -0
- preprocessor.mlmodelc/analytics/coremldata.bin +3 -0
- preprocessor.mlmodelc/coremldata.bin +3 -0
- preprocessor.mlmodelc/metadata.json +103 -0
- preprocessor.mlmodelc/model.mil +104 -0
- preprocessor.mlmodelc/weights/weight.bin +3 -0
- preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- preprocessor.mlpackage/Manifest.json +18 -0
- streaming_encoder.mlmodelc/analytics/coremldata.bin +3 -0
- streaming_encoder.mlmodelc/coremldata.bin +3 -0
- streaming_encoder.mlmodelc/metadata.json +167 -0
- streaming_encoder.mlmodelc/model.mil +0 -0
- streaming_encoder.mlmodelc/weights/weight.bin +3 -0
- streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- streaming_encoder.mlpackage/Manifest.json +18 -0
- tokenizer.model +3 -0
- vocab.json +1028 -1028
final_scripts/export_decoder_joint.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import nemo.collections.asr as nemo_asr
|
| 4 |
+
import coremltools as ct
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
class DecoderWrapper(nn.Module):
|
| 8 |
+
def __init__(self, decoder, hidden_size):
|
| 9 |
+
super().__init__()
|
| 10 |
+
self.decoder = decoder
|
| 11 |
+
self.hidden_size = hidden_size
|
| 12 |
+
|
| 13 |
+
def forward(self, targets, target_length, h_in, c_in):
|
| 14 |
+
# targets: [Batch, 1]
|
| 15 |
+
# target_length: [Batch]
|
| 16 |
+
# h_in, c_in: [Batch, Layers, Hidden] -> NeMo expects [Layers, Batch, Hidden]
|
| 17 |
+
|
| 18 |
+
# Transpose state for NeMo: [B, L, H] -> [L, B, H]
|
| 19 |
+
h_n = h_in.permute(1, 0, 2).contiguous()
|
| 20 |
+
c_n = c_in.permute(1, 0, 2).contiguous()
|
| 21 |
+
state = (h_n, c_n)
|
| 22 |
+
|
| 23 |
+
# Run decoder
|
| 24 |
+
# predict(y, state, add_sos=False, batch_size=None)
|
| 25 |
+
# y is [B, U] -> [1, 1]
|
| 26 |
+
dec_out, new_state = self.decoder.predict(targets, state, add_sos=False)
|
| 27 |
+
|
| 28 |
+
# dec_out: [B, U, H] -> [1, 1, 640]
|
| 29 |
+
|
| 30 |
+
# Transpose state back: [L, B, H] -> [B, L, H]
|
| 31 |
+
h_out = new_state[0].permute(1, 0, 2)
|
| 32 |
+
c_out = new_state[1].permute(1, 0, 2)
|
| 33 |
+
|
| 34 |
+
return dec_out, h_out, c_out
|
| 35 |
+
|
| 36 |
+
class JointWrapper(nn.Module):
|
| 37 |
+
def __init__(self, joint):
|
| 38 |
+
super().__init__()
|
| 39 |
+
self.joint = joint
|
| 40 |
+
|
| 41 |
+
def forward(self, encoder_output, decoder_output):
|
| 42 |
+
# encoder_output: [B, D, T] -> [1, 512, 1]
|
| 43 |
+
# decoder_output: [B, U, H] -> [1, 640, 1] (Wait, decoder output is usually [B, U, H])
|
| 44 |
+
|
| 45 |
+
# NeMo Joint expects:
|
| 46 |
+
# encoder_outputs: [B, D, T]
|
| 47 |
+
# decoder_outputs: [B, U, H]
|
| 48 |
+
|
| 49 |
+
# But wait, NeMo Joint usually projects them first?
|
| 50 |
+
# self.joint.joint_net(enc, dec)
|
| 51 |
+
|
| 52 |
+
# Let's check if we need to project.
|
| 53 |
+
# self.joint.project_encoder(encoder_output)
|
| 54 |
+
# self.joint.project_decoder(decoder_output)
|
| 55 |
+
|
| 56 |
+
# If inputs are RAW, we need to project.
|
| 57 |
+
# If inputs are already projected, we just sum and act.
|
| 58 |
+
|
| 59 |
+
# In this wrapper, we assume inputs are RAW (from Encoder and Decoder).
|
| 60 |
+
|
| 61 |
+
# Project Encoder
|
| 62 |
+
# encoder_output: [B, D, T] -> [B, T, D] for Linear?
|
| 63 |
+
# NeMo project_encoder handles transpose if needed?
|
| 64 |
+
# Usually project_encoder expects [B, D, T] and returns [B, T, D_joint] or similar.
|
| 65 |
+
|
| 66 |
+
# Let's use the high-level method if possible.
|
| 67 |
+
# res = self.joint(encoder_outputs=encoder_output, decoder_outputs=decoder_output)
|
| 68 |
+
# This returns LOGITS [B, T, U, V+1]
|
| 69 |
+
|
| 70 |
+
# Manually call projection and joint net to avoid length checks
|
| 71 |
+
# 1. Project Encoder
|
| 72 |
+
# encoder_output: [B, D, T] -> [B, T, D]
|
| 73 |
+
enc_in = encoder_output.transpose(1, 2)
|
| 74 |
+
f = self.joint.project_encoder(enc_in)
|
| 75 |
+
|
| 76 |
+
# 2. Project Decoder
|
| 77 |
+
# decoder_output: [B, U, H] -> [B, U, H] (Already correct? Check shape)
|
| 78 |
+
# If decoder_output is [1, 1, 640], it's fine.
|
| 79 |
+
g = self.joint.project_prednet(decoder_output)
|
| 80 |
+
|
| 81 |
+
# 3. Combine (Broadcasting)
|
| 82 |
+
# f: [B, T, D] -> [B, T, 1, D]
|
| 83 |
+
# g: [B, U, D] -> [B, 1, U, D]
|
| 84 |
+
# res: [B, T, U, D]
|
| 85 |
+
res = f.unsqueeze(2) + g.unsqueeze(1)
|
| 86 |
+
|
| 87 |
+
# 4. Joint Net (ReLU + Linear)
|
| 88 |
+
logits = self.joint.joint_net(res)
|
| 89 |
+
|
| 90 |
+
# logits: [1, 1, 1, Vocab]
|
| 91 |
+
|
| 92 |
+
return logits
|
| 93 |
+
|
| 94 |
+
def export_rnnt_decoder_joint(model_id="nvidia/parakeet_realtime_eou_120m-v1"):
|
| 95 |
+
print(f"Loading model: {model_id}")
|
| 96 |
+
asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
|
| 97 |
+
asr_model.eval()
|
| 98 |
+
|
| 99 |
+
decoder = asr_model.decoder
|
| 100 |
+
joint = asr_model.joint
|
| 101 |
+
|
| 102 |
+
hidden_size = decoder.pred_hidden # 640
|
| 103 |
+
vocab_size = decoder.vocab_size # 1024
|
| 104 |
+
|
| 105 |
+
print(f"Decoder Hidden Size: {hidden_size}")
|
| 106 |
+
print(f"Vocab Size: {vocab_size}")
|
| 107 |
+
|
| 108 |
+
# --- Export Decoder ---
|
| 109 |
+
print("Exporting Decoder...")
|
| 110 |
+
decoder_wrapper = DecoderWrapper(decoder, hidden_size)
|
| 111 |
+
decoder_wrapper.eval()
|
| 112 |
+
|
| 113 |
+
# Inputs
|
| 114 |
+
# targets: [1, 1]
|
| 115 |
+
# state: [1, 1, 640] (assuming 1 layer?)
|
| 116 |
+
# Check num layers
|
| 117 |
+
num_layers = decoder.pred_rnn_layers
|
| 118 |
+
print(f"Decoder Layers: {num_layers}")
|
| 119 |
+
|
| 120 |
+
example_targets = torch.zeros((1, 1), dtype=torch.int32)
|
| 121 |
+
example_length = torch.tensor([1], dtype=torch.int32)
|
| 122 |
+
example_h = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
|
| 123 |
+
example_c = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
|
| 124 |
+
|
| 125 |
+
traced_decoder = torch.jit.trace(decoder_wrapper, (example_targets, example_length, example_h, example_c))
|
| 126 |
+
|
| 127 |
+
decoder_mlmodel = ct.convert(
|
| 128 |
+
traced_decoder,
|
| 129 |
+
inputs=[
|
| 130 |
+
ct.TensorType(name="targets", shape=(1, 1), dtype=np.int32),
|
| 131 |
+
ct.TensorType(name="target_length", shape=(1,), dtype=np.int32),
|
| 132 |
+
ct.TensorType(name="h_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
|
| 133 |
+
ct.TensorType(name="c_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
|
| 134 |
+
],
|
| 135 |
+
outputs=[
|
| 136 |
+
ct.TensorType(name="decoder_output"),
|
| 137 |
+
ct.TensorType(name="h_out"),
|
| 138 |
+
ct.TensorType(name="c_out"),
|
| 139 |
+
],
|
| 140 |
+
minimum_deployment_target=ct.target.iOS17,
|
| 141 |
+
compute_units=ct.ComputeUnit.CPU_ONLY,
|
| 142 |
+
)
|
| 143 |
+
decoder_mlmodel.save("parakeet_decoder.mlpackage")
|
| 144 |
+
print("Saved parakeet_decoder.mlpackage")
|
| 145 |
+
|
| 146 |
+
# --- Export Joint ---
|
| 147 |
+
print("Exporting Joint...")
|
| 148 |
+
joint_wrapper = JointWrapper(joint)
|
| 149 |
+
joint_wrapper.eval()
|
| 150 |
+
|
| 151 |
+
# Inputs
|
| 152 |
+
# encoder: [1, 512, 1]
|
| 153 |
+
# decoder: [1, 640, 1] (Wait, decoder output from wrapper is [1, 1, 640]?)
|
| 154 |
+
# Let's check DecoderWrapper output shape.
|
| 155 |
+
# dec_out: [B, U, H] -> [1, 1, 640].
|
| 156 |
+
|
| 157 |
+
# NeMo Joint expects [B, D, T] and [B, U, H].
|
| 158 |
+
# So encoder should be [1, 512, 1].
|
| 159 |
+
# Decoder should be [1, 1, 640].
|
| 160 |
+
|
| 161 |
+
example_enc = torch.randn(1, 512, 1)
|
| 162 |
+
example_dec = torch.randn(1, 1, 640) # Note: Time/U dim is 2nd for decoder?
|
| 163 |
+
|
| 164 |
+
# Verify Joint forward
|
| 165 |
+
with torch.no_grad():
|
| 166 |
+
out = joint_wrapper(example_enc, example_dec)
|
| 167 |
+
print(f"Joint Output Shape: {out.shape}")
|
| 168 |
+
|
| 169 |
+
traced_joint = torch.jit.trace(joint_wrapper, (example_enc, example_dec))
|
| 170 |
+
|
| 171 |
+
joint_mlmodel = ct.convert(
|
| 172 |
+
traced_joint,
|
| 173 |
+
inputs=[
|
| 174 |
+
ct.TensorType(name="encoder_output", shape=(1, 512, 1), dtype=np.float32),
|
| 175 |
+
ct.TensorType(name="decoder_output", shape=(1, 1, 640), dtype=np.float32),
|
| 176 |
+
],
|
| 177 |
+
outputs=[
|
| 178 |
+
ct.TensorType(name="logits"),
|
| 179 |
+
],
|
| 180 |
+
minimum_deployment_target=ct.target.iOS17,
|
| 181 |
+
compute_units=ct.ComputeUnit.CPU_ONLY,
|
| 182 |
+
compute_precision=ct.precision.FLOAT32,
|
| 183 |
+
)
|
| 184 |
+
joint_mlmodel.save("parakeet_joint.mlpackage")
|
| 185 |
+
print("Saved parakeet_joint.mlpackage")
|
| 186 |
+
|
| 187 |
+
if __name__ == "__main__":
|
| 188 |
+
export_rnnt_decoder_joint()
|
final_scripts/export_encoder.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import nemo.collections.asr as nemo_asr
|
| 5 |
+
import coremltools as ct
|
| 6 |
+
import numpy as np
|
| 7 |
+
from typing import Tuple
|
| 8 |
+
|
| 9 |
+
class StreamingEncoderWrapper(nn.Module):
|
| 10 |
+
"""Wrapper for cache-aware streaming encoder."""
|
| 11 |
+
|
| 12 |
+
def __init__(self, encoder: nn.Module, keep_all_outputs: bool = True):
|
| 13 |
+
super().__init__()
|
| 14 |
+
self.encoder = encoder
|
| 15 |
+
self.keep_all_outputs = keep_all_outputs
|
| 16 |
+
|
| 17 |
+
if encoder.streaming_cfg is None:
|
| 18 |
+
encoder.setup_streaming_params()
|
| 19 |
+
self.streaming_cfg = encoder.streaming_cfg
|
| 20 |
+
|
| 21 |
+
def forward(
|
| 22 |
+
self,
|
| 23 |
+
mel: torch.Tensor,
|
| 24 |
+
mel_length: torch.Tensor,
|
| 25 |
+
cache_last_channel: torch.Tensor,
|
| 26 |
+
cache_last_time: torch.Tensor,
|
| 27 |
+
cache_last_channel_len: torch.Tensor,
|
| 28 |
+
) -> Tuple[torch.Tensor, ...]:
|
| 29 |
+
|
| 30 |
+
# Call encoder with cache
|
| 31 |
+
outputs = self.encoder.cache_aware_stream_step(
|
| 32 |
+
processed_signal=mel,
|
| 33 |
+
processed_signal_length=mel_length,
|
| 34 |
+
cache_last_channel=cache_last_channel,
|
| 35 |
+
cache_last_time=cache_last_time,
|
| 36 |
+
cache_last_channel_len=cache_last_channel_len,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Handle cache updates (ring buffer)
|
| 40 |
+
# NeMo returns only the updated part of the cache
|
| 41 |
+
# We need to concatenate it with the previous cache (shifted)
|
| 42 |
+
|
| 43 |
+
# 1. cache_last_channel: [layers, 1, T, D] -> dim 2
|
| 44 |
+
new_channel_cache = outputs[2]
|
| 45 |
+
update_len = new_channel_cache.size(2)
|
| 46 |
+
if update_len < cache_last_channel.size(2):
|
| 47 |
+
# Shift and append
|
| 48 |
+
full_channel_cache = torch.cat([
|
| 49 |
+
cache_last_channel[:, :, update_len:, :],
|
| 50 |
+
new_channel_cache
|
| 51 |
+
], dim=2)
|
| 52 |
+
else:
|
| 53 |
+
full_channel_cache = new_channel_cache
|
| 54 |
+
|
| 55 |
+
# 2. cache_last_time: [layers, 1, D, T] -> dim 3
|
| 56 |
+
new_time_cache = outputs[3]
|
| 57 |
+
update_len_time = new_time_cache.size(3)
|
| 58 |
+
if update_len_time < cache_last_time.size(3):
|
| 59 |
+
# Shift and append
|
| 60 |
+
full_time_cache = torch.cat([
|
| 61 |
+
cache_last_time[:, :, :, update_len_time:],
|
| 62 |
+
new_time_cache
|
| 63 |
+
], dim=3)
|
| 64 |
+
else:
|
| 65 |
+
full_time_cache = new_time_cache
|
| 66 |
+
|
| 67 |
+
# Construct new outputs tuple
|
| 68 |
+
# (encoder, encoder_len, full_channel_cache, full_time_cache, cache_len)
|
| 69 |
+
return (outputs[0], outputs[1], full_channel_cache, full_time_cache, outputs[4])
|
| 70 |
+
|
| 71 |
+
def export_streaming_encoder(model_id="nvidia/parakeet_realtime_eou_120m-v1", output_path="streaming_encoder.mlpackage", frames=16, shift=None, streaming_chunk_size=None):
|
| 72 |
+
print(f"Loading model: {model_id}")
|
| 73 |
+
asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
|
| 74 |
+
asr_model.eval()
|
| 75 |
+
|
| 76 |
+
encoder = asr_model.encoder
|
| 77 |
+
|
| 78 |
+
# Configure streaming params
|
| 79 |
+
# If streaming_chunk_size is provided, use it. Otherwise use frames.
|
| 80 |
+
c_size = streaming_chunk_size if streaming_chunk_size is not None else frames
|
| 81 |
+
s_size = shift if shift is not None else c_size
|
| 82 |
+
|
| 83 |
+
print(f"Setting up streaming params: chunk_size={c_size}, shift_size={s_size}")
|
| 84 |
+
encoder.setup_streaming_params(chunk_size=c_size, shift_size=s_size)
|
| 85 |
+
|
| 86 |
+
wrapper = StreamingEncoderWrapper(encoder)
|
| 87 |
+
wrapper.eval()
|
| 88 |
+
|
| 89 |
+
# Define input shapes
|
| 90 |
+
# 16 frames = 160ms
|
| 91 |
+
print(f"Exporting for chunk size: {frames} frames ({frames*10}ms)")
|
| 92 |
+
if shift:
|
| 93 |
+
print(f"Shift size: {shift} frames ({shift*10}ms)")
|
| 94 |
+
|
| 95 |
+
mel_dim = 128 # Parakeet uses 128 mel features, not 80
|
| 96 |
+
|
| 97 |
+
# Cache shapes: number of layers = 17 (FastConformer architecture)
|
| 98 |
+
num_layers = 17
|
| 99 |
+
|
| 100 |
+
example_input = (
|
| 101 |
+
torch.randn(1, mel_dim, frames),
|
| 102 |
+
torch.tensor([frames], dtype=torch.int32),
|
| 103 |
+
torch.randn(num_layers, 1, 70, 512), # cache_last_channel
|
| 104 |
+
torch.randn(num_layers, 1, 512, 8), # cache_last_time
|
| 105 |
+
torch.tensor([0], dtype=torch.int32) # cache_last_channel_len
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
print("Tracing model...")
|
| 109 |
+
traced_model = torch.jit.trace(wrapper, example_input, strict=False)
|
| 110 |
+
|
| 111 |
+
print("Converting to CoreML...")
|
| 112 |
+
inputs = [
|
| 113 |
+
ct.TensorType(name="mel", shape=(1, mel_dim, frames), dtype=np.float32),
|
| 114 |
+
ct.TensorType(name="mel_length", shape=(1,), dtype=np.int32),
|
| 115 |
+
ct.TensorType(name="cache_last_channel", shape=(num_layers, 1, 70, 512), dtype=np.float32),
|
| 116 |
+
ct.TensorType(name="cache_last_time", shape=(num_layers, 1, 512, 8), dtype=np.float32),
|
| 117 |
+
ct.TensorType(name="cache_last_channel_len", shape=(1,), dtype=np.int32),
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
outputs = [
|
| 121 |
+
ct.TensorType(name="encoder", dtype=np.float32),
|
| 122 |
+
ct.TensorType(name="encoder_length", dtype=np.int32),
|
| 123 |
+
ct.TensorType(name="cache_last_channel_out", dtype=np.float32),
|
| 124 |
+
ct.TensorType(name="cache_last_time_out", dtype=np.float32),
|
| 125 |
+
ct.TensorType(name="cache_last_channel_len_out", dtype=np.int32),
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
mlmodel = ct.convert(
|
| 129 |
+
traced_model,
|
| 130 |
+
inputs=inputs,
|
| 131 |
+
outputs=outputs,
|
| 132 |
+
minimum_deployment_target=ct.target.iOS17,
|
| 133 |
+
compute_units=ct.ComputeUnit.ALL,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
print(f"Saving to {output_path}")
|
| 137 |
+
mlmodel.save(output_path)
|
| 138 |
+
print("Done!")
|
| 139 |
+
|
| 140 |
+
if __name__ == "__main__":
|
| 141 |
+
import argparse
|
| 142 |
+
parser = argparse.ArgumentParser()
|
| 143 |
+
parser.add_argument("--frames", type=int, default=16, help="Number of frames per chunk (10ms per frame)")
|
| 144 |
+
parser.add_argument("--shift", type=int, default=None, help="Shift size in frames (default: same as frames)")
|
| 145 |
+
parser.add_argument("--model-chunk-size", type=int, default=None, help="Chunk size for model setup (output steps). If None, uses frames.")
|
| 146 |
+
parser.add_argument("--output", type=str, default="streaming_encoder.mlpackage", help="Output path")
|
| 147 |
+
args = parser.parse_args()
|
| 148 |
+
|
| 149 |
+
export_streaming_encoder(frames=args.frames, shift=args.shift, streaming_chunk_size=args.model_chunk_size, output_path=args.output)
|
final_scripts/export_preprocessor.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import nemo.collections.asr as nemo_asr
|
| 4 |
+
import coremltools as ct
|
| 5 |
+
import numpy as np
|
| 6 |
+
import argparse
|
| 7 |
+
|
| 8 |
+
class PreprocessorWrapper(nn.Module):
|
| 9 |
+
"""Wrapper for audio preprocessor."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, preprocessor: nn.Module):
|
| 12 |
+
super().__init__()
|
| 13 |
+
self.preprocessor = preprocessor
|
| 14 |
+
|
| 15 |
+
def forward(
|
| 16 |
+
self,
|
| 17 |
+
input_signal: torch.Tensor,
|
| 18 |
+
length: torch.Tensor,
|
| 19 |
+
):
|
| 20 |
+
# Call preprocessor
|
| 21 |
+
processed_signal, processed_signal_length = self.preprocessor(
|
| 22 |
+
input_signal=input_signal,
|
| 23 |
+
length=length
|
| 24 |
+
)
|
| 25 |
+
return processed_signal, processed_signal_length
|
| 26 |
+
|
| 27 |
+
def export_preprocessor(
|
| 28 |
+
model_id="nvidia/parakeet_realtime_eou_120m-v1",
|
| 29 |
+
output_path="preprocessor.mlpackage",
|
| 30 |
+
chunk_ms=160
|
| 31 |
+
):
|
| 32 |
+
print(f"Loading model: {model_id}")
|
| 33 |
+
asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
|
| 34 |
+
asr_model.eval()
|
| 35 |
+
|
| 36 |
+
preprocessor = asr_model.preprocessor
|
| 37 |
+
# Disable dither and padding for consistent inference
|
| 38 |
+
if hasattr(preprocessor, 'dither'):
|
| 39 |
+
preprocessor.dither = 0.0
|
| 40 |
+
if hasattr(preprocessor, 'pad_to'):
|
| 41 |
+
preprocessor.pad_to = 0
|
| 42 |
+
|
| 43 |
+
wrapper = PreprocessorWrapper(preprocessor)
|
| 44 |
+
wrapper.eval()
|
| 45 |
+
|
| 46 |
+
# Calculate audio samples for chunk
|
| 47 |
+
# 160ms at 16kHz = 2560 samples
|
| 48 |
+
chunk_samples = int(chunk_ms / 1000 * 16000)
|
| 49 |
+
|
| 50 |
+
print(f"Chunk: {chunk_ms}ms = {chunk_samples} samples")
|
| 51 |
+
|
| 52 |
+
# Create example input
|
| 53 |
+
example_input = (
|
| 54 |
+
torch.randn(1, chunk_samples),
|
| 55 |
+
torch.tensor([chunk_samples], dtype=torch.int64),
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
print("Tracing model...")
|
| 59 |
+
traced_model = torch.jit.trace(wrapper, example_input, strict=False)
|
| 60 |
+
|
| 61 |
+
print("Converting to CoreML...")
|
| 62 |
+
# Use RangeDim for variable-length audio input
|
| 63 |
+
inputs = [
|
| 64 |
+
ct.TensorType(
|
| 65 |
+
name="input_signal",
|
| 66 |
+
shape=ct.Shape(shape=(1, ct.RangeDim(lower_bound=1600, upper_bound=16000, default=chunk_samples))),
|
| 67 |
+
dtype=np.float32
|
| 68 |
+
),
|
| 69 |
+
ct.TensorType(name="length", shape=(1,), dtype=np.int32),
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
outputs = [
|
| 73 |
+
ct.TensorType(name="mel", dtype=np.float32),
|
| 74 |
+
ct.TensorType(name="mel_length", dtype=np.int32),
|
| 75 |
+
]
|
| 76 |
+
|
| 77 |
+
mlmodel = ct.convert(
|
| 78 |
+
traced_model,
|
| 79 |
+
inputs=inputs,
|
| 80 |
+
outputs=outputs,
|
| 81 |
+
compute_units=ct.ComputeUnit.CPU_ONLY,
|
| 82 |
+
minimum_deployment_target=ct.target.iOS17,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
print(f"Saving to {output_path}")
|
| 86 |
+
mlmodel.save(output_path)
|
| 87 |
+
print("Done!")
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
parser = argparse.ArgumentParser()
|
| 91 |
+
parser.add_argument("--chunk-ms", type=int, default=160, help="Chunk size in milliseconds")
|
| 92 |
+
parser.add_argument("--output-path", type=str, default="preprocessor.mlpackage", help="Output path")
|
| 93 |
+
args = parser.parse_args()
|
| 94 |
+
|
| 95 |
+
export_preprocessor(chunk_ms=args.chunk_ms, output_path=args.output_path)
|
final_scripts/inference_benchmark.py
ADDED
|
@@ -0,0 +1,847 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import torch
|
| 3 |
+
import torchaudio
|
| 4 |
+
import coremltools as ct
|
| 5 |
+
import numpy as np
|
| 6 |
+
import nemo.collections.asr as nemo_asr
|
| 7 |
+
from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
import jiwer
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
def load_manifest(dataset_path, subset='test-clean', max_files=None):
|
| 13 |
+
subset_dir = Path(dataset_path) / subset
|
| 14 |
+
if not subset_dir.exists():
|
| 15 |
+
raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
|
| 16 |
+
|
| 17 |
+
flac_files = list(subset_dir.rglob('*.flac'))
|
| 18 |
+
if not flac_files:
|
| 19 |
+
raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
|
| 20 |
+
|
| 21 |
+
# Sort for determinism
|
| 22 |
+
flac_files = sorted(flac_files)
|
| 23 |
+
|
| 24 |
+
entries = []
|
| 25 |
+
for flac_path in flac_files:
|
| 26 |
+
if max_files and len(entries) >= max_files:
|
| 27 |
+
break
|
| 28 |
+
|
| 29 |
+
speaker_id = flac_path.parent.parent.name
|
| 30 |
+
chapter_id = flac_path.parent.name
|
| 31 |
+
trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
|
| 32 |
+
|
| 33 |
+
if trans_file.exists():
|
| 34 |
+
utterance_id = flac_path.stem
|
| 35 |
+
with open(trans_file, 'r') as f:
|
| 36 |
+
for line in f:
|
| 37 |
+
parts = line.strip().split(' ', 1)
|
| 38 |
+
if len(parts) == 2 and parts[0] == utterance_id:
|
| 39 |
+
entries.append({
|
| 40 |
+
'audio_filepath': str(flac_path),
|
| 41 |
+
'text': parts[1],
|
| 42 |
+
'duration': 0
|
| 43 |
+
})
|
| 44 |
+
break
|
| 45 |
+
print(f"Loaded {len(entries)} entries from {subset_dir}")
|
| 46 |
+
return entries
|
| 47 |
+
|
| 48 |
+
def run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, audio_path, coreml_preprocessor=None):
|
| 49 |
+
# 1. Load Audio
|
| 50 |
+
try:
|
| 51 |
+
audio, sr = torchaudio.load(audio_path)
|
| 52 |
+
if sr != 16000:
|
| 53 |
+
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
|
| 54 |
+
audio = resampler(audio)
|
| 55 |
+
if audio.shape[0] > 1:
|
| 56 |
+
audio = audio.mean(dim=0, keepdim=True)
|
| 57 |
+
|
| 58 |
+
audio_tensor = audio
|
| 59 |
+
audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
|
| 60 |
+
except Exception as e:
|
| 61 |
+
print(f"Error loading {audio_path}: {e}")
|
| 62 |
+
return {'hypothesis': "", 'audio_length': 0}
|
| 63 |
+
|
| 64 |
+
# 2. Setup Streaming Params & Buffer
|
| 65 |
+
# Use chunk_size=4 to match PyTorch success (approx 320ms compute, 410ms input)
|
| 66 |
+
pytorch_model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
|
| 67 |
+
|
| 68 |
+
streaming_buffer = CacheAwareStreamingAudioBuffer(
|
| 69 |
+
model=pytorch_model,
|
| 70 |
+
online_normalization=False,
|
| 71 |
+
pad_and_drop_preencoded=False
|
| 72 |
+
)
|
| 73 |
+
streaming_buffer.append_audio_file(audio_path, stream_id=-1)
|
| 74 |
+
|
| 75 |
+
# 3. CoreML True Streaming Loop
|
| 76 |
+
# Initialize CoreML Cache (Encoder)
|
| 77 |
+
num_layers = 17
|
| 78 |
+
cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
|
| 79 |
+
cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
|
| 80 |
+
cache_last_channel_len = np.zeros((1,), dtype=np.int32)
|
| 81 |
+
|
| 82 |
+
# Initialize Decoder State
|
| 83 |
+
h_state = np.zeros((1, 1, 640), dtype=np.float32)
|
| 84 |
+
c_state = np.zeros((1, 1, 640), dtype=np.float32)
|
| 85 |
+
|
| 86 |
+
blank_token = 1026 # Parakeet blank
|
| 87 |
+
last_token = blank_token
|
| 88 |
+
|
| 89 |
+
hypothesis_tokens = []
|
| 90 |
+
max_symbols_per_step = 10
|
| 91 |
+
|
| 92 |
+
fixed_chunk_frames = 41 # Matches export for chunk_size=4
|
| 93 |
+
|
| 94 |
+
for chunk_audio, chunk_len in streaming_buffer:
|
| 95 |
+
# --- Encoder Step ---
|
| 96 |
+
# chunk_audio: [1, 128, T]
|
| 97 |
+
T_curr = chunk_audio.shape[2]
|
| 98 |
+
|
| 99 |
+
if T_curr < fixed_chunk_frames:
|
| 100 |
+
pad_amt = fixed_chunk_frames - T_curr
|
| 101 |
+
padding = torch.full((1, 128, pad_amt), -16.0)
|
| 102 |
+
chunk_audio = torch.cat([chunk_audio, padding], dim=2)
|
| 103 |
+
elif T_curr > fixed_chunk_frames:
|
| 104 |
+
chunk_audio = chunk_audio[:, :, :fixed_chunk_frames]
|
| 105 |
+
|
| 106 |
+
chunk_mel_input = chunk_audio.numpy()
|
| 107 |
+
mel_len_input = np.array([fixed_chunk_frames], dtype=np.int32)
|
| 108 |
+
|
| 109 |
+
inputs = {
|
| 110 |
+
"mel": chunk_mel_input,
|
| 111 |
+
"mel_length": mel_len_input,
|
| 112 |
+
"cache_last_channel": cache_last_channel,
|
| 113 |
+
"cache_last_time": cache_last_time,
|
| 114 |
+
"cache_last_channel_len": cache_last_channel_len
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
outputs = coreml_encoder.predict(inputs)
|
| 118 |
+
|
| 119 |
+
cache_last_channel = outputs["cache_last_channel_out"]
|
| 120 |
+
cache_last_time = outputs["cache_last_time_out"]
|
| 121 |
+
cache_last_channel_len = outputs["cache_last_channel_len_out"]
|
| 122 |
+
|
| 123 |
+
enc_out = outputs["encoder"] # [1, 512, 4]
|
| 124 |
+
|
| 125 |
+
# --- Decoder Step (Immediate) ---
|
| 126 |
+
T_enc = enc_out.shape[2]
|
| 127 |
+
|
| 128 |
+
for t in range(T_enc):
|
| 129 |
+
enc_t = enc_out[:, :, t:t+1] # [1, 512, 1]
|
| 130 |
+
|
| 131 |
+
# Initialize Decoder Output (Cache)
|
| 132 |
+
targets = np.array([[last_token]], dtype=np.int32)
|
| 133 |
+
target_length = np.array([1], dtype=np.int32)
|
| 134 |
+
|
| 135 |
+
dec_inputs = {
|
| 136 |
+
"targets": targets,
|
| 137 |
+
"target_length": target_length,
|
| 138 |
+
"h_in": h_state,
|
| 139 |
+
"c_in": c_state
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
dec_outputs = coreml_decoder.predict(dec_inputs)
|
| 143 |
+
decoder_step = dec_outputs["decoder_output"]
|
| 144 |
+
h_state_next = dec_outputs["h_out"]
|
| 145 |
+
c_state_next = dec_outputs["c_out"]
|
| 146 |
+
|
| 147 |
+
symbols_added = 0
|
| 148 |
+
while symbols_added < max_symbols_per_step:
|
| 149 |
+
joint_inputs = {
|
| 150 |
+
"encoder_output": enc_t,
|
| 151 |
+
"decoder_output": decoder_step
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
joint_outputs = coreml_joint.predict(joint_inputs)
|
| 155 |
+
|
| 156 |
+
logits = joint_outputs["logits"]
|
| 157 |
+
token_id = int(np.argmax(logits))
|
| 158 |
+
|
| 159 |
+
if token_id == blank_token:
|
| 160 |
+
break
|
| 161 |
+
|
| 162 |
+
# EOU Check (1024)
|
| 163 |
+
if token_id == 1024:
|
| 164 |
+
# Reset State
|
| 165 |
+
h_state = np.zeros((1, 1, 640), dtype=np.float32)
|
| 166 |
+
c_state = np.zeros((1, 1, 640), dtype=np.float32)
|
| 167 |
+
last_token = blank_token
|
| 168 |
+
break
|
| 169 |
+
|
| 170 |
+
else:
|
| 171 |
+
hypothesis_tokens.append(token_id)
|
| 172 |
+
last_token = token_id
|
| 173 |
+
symbols_added += 1
|
| 174 |
+
|
| 175 |
+
h_state = h_state_next
|
| 176 |
+
c_state = c_state_next
|
| 177 |
+
|
| 178 |
+
targets = np.array([[last_token]], dtype=np.int32)
|
| 179 |
+
dec_inputs = {
|
| 180 |
+
"targets": targets,
|
| 181 |
+
"target_length": target_length,
|
| 182 |
+
"h_in": h_state,
|
| 183 |
+
"c_in": c_state
|
| 184 |
+
}
|
| 185 |
+
dec_outputs = coreml_decoder.predict(dec_inputs)
|
| 186 |
+
decoder_step = dec_outputs["decoder_output"]
|
| 187 |
+
h_state_next = dec_outputs["h_out"]
|
| 188 |
+
c_state_next = dec_outputs["c_out"]
|
| 189 |
+
|
| 190 |
+
# Decode tokens
|
| 191 |
+
vocab_size = pytorch_model.tokenizer.vocab_size
|
| 192 |
+
valid_tokens = [t for t in hypothesis_tokens if t < vocab_size]
|
| 193 |
+
|
| 194 |
+
if len(valid_tokens) != len(hypothesis_tokens):
|
| 195 |
+
print(f"Filtered {len(hypothesis_tokens) - len(valid_tokens)} invalid tokens (>= {vocab_size})")
|
| 196 |
+
|
| 197 |
+
if not valid_tokens:
|
| 198 |
+
return {
|
| 199 |
+
'hypothesis': "",
|
| 200 |
+
'audio_length': audio.shape[1] / 16000
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
hypothesis = pytorch_model.decoding.decode_tokens_to_str([valid_tokens])[0]
|
| 204 |
+
hypothesis = hypothesis.replace("<EOU>", "").strip()
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
'hypothesis': hypothesis,
|
| 208 |
+
'audio_length': audio.shape[1] / 16000
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
|
| 212 |
+
# 1. Load Audio
|
| 213 |
+
try:
|
| 214 |
+
audio, sr = torchaudio.load(audio_path)
|
| 215 |
+
if sr != 16000:
|
| 216 |
+
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
|
| 217 |
+
audio = resampler(audio)
|
| 218 |
+
if audio.shape[0] > 1:
|
| 219 |
+
audio = audio.mean(dim=0, keepdim=True)
|
| 220 |
+
|
| 221 |
+
audio_tensor = audio
|
| 222 |
+
audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
|
| 223 |
+
except Exception as e:
|
| 224 |
+
print(f"Error loading {audio_path}: {e}")
|
| 225 |
+
return {'hypothesis': "", 'audio_length': 0}
|
| 226 |
+
|
| 227 |
+
# 2. Preprocessor
|
| 228 |
+
with torch.no_grad():
|
| 229 |
+
processed_signal, processed_signal_len = pytorch_model.preprocessor(
|
| 230 |
+
input_signal=audio_tensor, length=audio_len
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
# 3. Streaming Loop
|
| 234 |
+
total_frames = processed_signal.shape[2]
|
| 235 |
+
chunk_frames = 32 # Match CoreML
|
| 236 |
+
|
| 237 |
+
# Initialize Cache
|
| 238 |
+
num_layers = 17
|
| 239 |
+
cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
|
| 240 |
+
cache_last_time = torch.zeros(num_layers, 1, 512, 8)
|
| 241 |
+
cache_last_channel_len = torch.zeros(1, dtype=torch.long)
|
| 242 |
+
|
| 243 |
+
# Initialize Decoder State
|
| 244 |
+
decoder_state = None
|
| 245 |
+
last_token = torch.tensor([[1026]], dtype=torch.long) # Blank token
|
| 246 |
+
|
| 247 |
+
final_hyp_tokens = []
|
| 248 |
+
|
| 249 |
+
for i in range(0, total_frames, chunk_frames):
|
| 250 |
+
end = min(i + chunk_frames, total_frames)
|
| 251 |
+
chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
|
| 252 |
+
|
| 253 |
+
# Pad to chunk_frames if needed
|
| 254 |
+
if chunk_mel.shape[2] < chunk_frames:
|
| 255 |
+
pad_amt = chunk_frames - chunk_mel.shape[2]
|
| 256 |
+
chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
|
| 257 |
+
|
| 258 |
+
chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
|
| 259 |
+
|
| 260 |
+
with torch.no_grad():
|
| 261 |
+
# 1. Encoder Step
|
| 262 |
+
(
|
| 263 |
+
enc_out,
|
| 264 |
+
enc_len,
|
| 265 |
+
cache_last_channel,
|
| 266 |
+
cache_last_time,
|
| 267 |
+
cache_last_channel_len
|
| 268 |
+
) = pytorch_model.encoder.forward_internal(
|
| 269 |
+
audio_signal=chunk_mel,
|
| 270 |
+
length=chunk_len,
|
| 271 |
+
cache_last_channel=cache_last_channel,
|
| 272 |
+
cache_last_time=cache_last_time,
|
| 273 |
+
cache_last_channel_len=cache_last_channel_len
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
# enc_out: [B, D, T_out] -> [1, 512, T_out]
|
| 277 |
+
# Transpose to [B, T_out, D] for Joint
|
| 278 |
+
enc_out = enc_out.transpose(1, 2)
|
| 279 |
+
|
| 280 |
+
# 2. Greedy Decoding Loop (Symbol Loop)
|
| 281 |
+
# For each acoustic frame t
|
| 282 |
+
for t in range(enc_out.shape[1]):
|
| 283 |
+
f_t = enc_out[:, t:t+1, :] # [1, 1, 512]
|
| 284 |
+
|
| 285 |
+
# Project Encoder (Joint.enc)
|
| 286 |
+
# pytorch_model.joint.enc is the Linear layer
|
| 287 |
+
# Or use pytorch_model.joint(enc_out, dec_out) which does projection internally?
|
| 288 |
+
# Standard RNNTJoint: forward(f, g) -> res -> joint_net
|
| 289 |
+
# But we need to loop over symbols u.
|
| 290 |
+
|
| 291 |
+
# Pre-project encoder for this frame
|
| 292 |
+
f_t_proj = pytorch_model.joint.enc(f_t) # [1, 1, 640]
|
| 293 |
+
|
| 294 |
+
# Limit max symbols per frame (e.g. 10) to prevent infinite loops
|
| 295 |
+
max_symbols = 10
|
| 296 |
+
symbols_added = 0
|
| 297 |
+
|
| 298 |
+
while symbols_added < max_symbols:
|
| 299 |
+
# Decoder Step
|
| 300 |
+
# decoder.forward(targets, lengths, states)
|
| 301 |
+
# targets: [B, 1] (last token)
|
| 302 |
+
g, _, decoder_state = pytorch_model.decoder.forward(
|
| 303 |
+
targets=last_token,
|
| 304 |
+
target_length=torch.tensor([1]),
|
| 305 |
+
states=decoder_state
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# g: [B, 640, U+1?] -> [1, 640, 2]
|
| 309 |
+
# We want the last step output
|
| 310 |
+
g = g[:, :, -1:] # [1, 640, 1]
|
| 311 |
+
g = g.transpose(1, 2) # [1, 1, 640]
|
| 312 |
+
|
| 313 |
+
# Project Decoder (Joint.pred)
|
| 314 |
+
g_proj = pytorch_model.joint.pred(g) # [1, 1, 640]
|
| 315 |
+
|
| 316 |
+
# Joint
|
| 317 |
+
# joint_net(f + g)
|
| 318 |
+
# Note: f_t_proj and g_proj are [1, 1, 640]
|
| 319 |
+
# We broadcast? They are same shape here.
|
| 320 |
+
out = pytorch_model.joint.joint_net(f_t_proj + g_proj) # [1, 1, 1027]
|
| 321 |
+
|
| 322 |
+
# Argmax
|
| 323 |
+
k = out.argmax(dim=-1) # [1, 1]
|
| 324 |
+
pred_token = k.item()
|
| 325 |
+
|
| 326 |
+
if pred_token == 1026: # Blank
|
| 327 |
+
break
|
| 328 |
+
else:
|
| 329 |
+
final_hyp_tokens.append(pred_token)
|
| 330 |
+
last_token = k # Update last token
|
| 331 |
+
# decoder_state is already updated by forward()
|
| 332 |
+
# But wait! If we predict a symbol, we advance decoder state.
|
| 333 |
+
# If we predict blank, we DO NOT advance decoder state?
|
| 334 |
+
# In standard RNNT:
|
| 335 |
+
# If blank: advance t (next acoustic frame), keep u (decoder state).
|
| 336 |
+
# If symbol: advance u (update decoder state), keep t (same acoustic frame).
|
| 337 |
+
|
| 338 |
+
# My decoder.forward call UPDATED the state.
|
| 339 |
+
# If I predict blank, I should DISCARD the new state?
|
| 340 |
+
# YES!
|
| 341 |
+
# But wait, decoder.forward takes the *previous* token/state and produces the *current* embedding/state.
|
| 342 |
+
# The state returned is the state AFTER processing `last_token`.
|
| 343 |
+
# This state is what we use to predict the NEXT token.
|
| 344 |
+
# So if we predict a symbol, we KEEP this state and use it for the next step.
|
| 345 |
+
# If we predict blank, we KEEP the *previous* state (before this forward)?
|
| 346 |
+
# No, the state corresponds to the *current* position `u`.
|
| 347 |
+
# The `g` vector corresponds to `h_u`.
|
| 348 |
+
# `f_t` corresponds to `h_t`.
|
| 349 |
+
# `Joint(h_t, h_u)` produces prob of `y_{u+1}` or `blank`.
|
| 350 |
+
|
| 351 |
+
# If `blank`: we move to `t+1`. We stay at `u`. State `h_u` is unchanged.
|
| 352 |
+
# If `symbol`: we move to `u+1`. We update `h_u` to `h_{u+1}`. We stay at `t`.
|
| 353 |
+
|
| 354 |
+
# So:
|
| 355 |
+
# 1. We have `decoder_state` (corresponding to `u`).
|
| 356 |
+
# 2. We compute `g` from `last_token` and `decoder_state`.
|
| 357 |
+
# Wait, `decoder.forward` usually takes `last_token` and `previous_state` and returns `current_embedding` and `new_state`.
|
| 358 |
+
# So `g` is `P(u)`. `decoder_state` is `State(u)`.
|
| 359 |
+
# Actually, for LSTM, `forward` does one step.
|
| 360 |
+
|
| 361 |
+
# Let's verify:
|
| 362 |
+
# `g, _, new_state = decoder(last_token, state)`
|
| 363 |
+
# `out = joint(f, g)`
|
| 364 |
+
# If `out` -> Symbol:
|
| 365 |
+
# We accept `new_state` as the current state.
|
| 366 |
+
# We update `last_token` to Symbol.
|
| 367 |
+
# We loop again (same `t`).
|
| 368 |
+
# If `out` -> Blank:
|
| 369 |
+
# We discard `new_state`.
|
| 370 |
+
# We keep `state` (old).
|
| 371 |
+
# We break loop (next `t`).
|
| 372 |
+
|
| 373 |
+
# BUT, `decoder.forward` is expensive. We don't want to re-compute it if we stay at `u`.
|
| 374 |
+
# But we only stay at `u` if we predict Blank, which means we move to next `t`.
|
| 375 |
+
# For the next `t`, we need `g` (which depends on `u`).
|
| 376 |
+
# So we should cache `g` and `state`?
|
| 377 |
+
# Yes.
|
| 378 |
+
|
| 379 |
+
# Correct Logic:
|
| 380 |
+
# Initialize `decoder_state = None`.
|
| 381 |
+
# Initialize `last_token = Blank`.
|
| 382 |
+
# Compute `g, _, next_decoder_state = decoder(last_token, decoder_state)` ONCE.
|
| 383 |
+
# `g_proj = joint.pred(g)`
|
| 384 |
+
|
| 385 |
+
# Loop t:
|
| 386 |
+
# `f_t_proj = ...`
|
| 387 |
+
# Loop u:
|
| 388 |
+
# `logits = joint(f_t_proj + g_proj)`
|
| 389 |
+
# `k = argmax`
|
| 390 |
+
# If k == Blank:
|
| 391 |
+
# break (advance t)
|
| 392 |
+
# Else:
|
| 393 |
+
# Append k.
|
| 394 |
+
# `last_token = k`
|
| 395 |
+
# `decoder_state = next_decoder_state` (Accept the state transition)
|
| 396 |
+
# # Compute NEXT g and state
|
| 397 |
+
# `g, _, next_decoder_state = decoder(last_token, decoder_state)`
|
| 398 |
+
# `g_proj = joint.pred(g)`
|
| 399 |
+
|
| 400 |
+
# This looks correct.
|
| 401 |
+
# But I need to initialize `g` and `next_decoder_state` before the loop.
|
| 402 |
+
|
| 403 |
+
pass
|
| 404 |
+
|
| 405 |
+
# Refined Logic Implementation
|
| 406 |
+
|
| 407 |
+
# Initialize Decoder
|
| 408 |
+
# First step: Feed Blank/SOS to get initial g and state
|
| 409 |
+
# Note: Parakeet uses Blank (1026) as SOS? Or does it rely on zero state?
|
| 410 |
+
# Usually we feed SOS. Let's assume 1026 is SOS.
|
| 411 |
+
|
| 412 |
+
last_token = torch.tensor([[1026]], dtype=torch.long)
|
| 413 |
+
decoder_state = None
|
| 414 |
+
|
| 415 |
+
# Pre-compute initial g
|
| 416 |
+
g, _, next_decoder_state = pytorch_model.decoder.forward(
|
| 417 |
+
targets=last_token,
|
| 418 |
+
target_length=torch.tensor([1]),
|
| 419 |
+
states=decoder_state
|
| 420 |
+
)
|
| 421 |
+
# g: [1, 640, 2] -> Slice last
|
| 422 |
+
g = g[:, :, -1:] # [1, 640, 1]
|
| 423 |
+
g = g.transpose(1, 2) # [1, 1, 640]
|
| 424 |
+
|
| 425 |
+
g_proj = pytorch_model.joint.pred(g)
|
| 426 |
+
|
| 427 |
+
# Update decoder_state to next_decoder_state?
|
| 428 |
+
# No, `next_decoder_state` is the state AFTER processing `last_token`.
|
| 429 |
+
# This is the state we need for the NEXT step if we emit a token.
|
| 430 |
+
# Wait, `g` is the embedding used for prediction.
|
| 431 |
+
# So `g` and `next_decoder_state` go together.
|
| 432 |
+
# We hold `g` and `next_decoder_state` as "Current Decoder Output".
|
| 433 |
+
# If we emit a symbol, we use `next_decoder_state` as the input for the NEXT decoder step.
|
| 434 |
+
|
| 435 |
+
# Let's call the holding variables `current_g_proj` and `candidate_state`.
|
| 436 |
+
current_g_proj = g_proj
|
| 437 |
+
candidate_state = next_decoder_state
|
| 438 |
+
|
| 439 |
+
# Current state input to decoder (for next step)
|
| 440 |
+
# Actually, `decoder.forward` takes `states`.
|
| 441 |
+
# If we emit a symbol, the `states` for the NEXT call should be `candidate_state`.
|
| 442 |
+
# So we need to track `current_state_for_input`.
|
| 443 |
+
# Initially `None`.
|
| 444 |
+
# After first call, `candidate_state` is the state after SOS.
|
| 445 |
+
|
| 446 |
+
# Wait, if we emit a symbol, we call decoder with that symbol and `candidate_state`.
|
| 447 |
+
# So `candidate_state` IS the state we maintain.
|
| 448 |
+
|
| 449 |
+
# Let's verify:
|
| 450 |
+
# 1. Start: `last_token=SOS`, `state=None`.
|
| 451 |
+
# 2. `g, _, state = decoder(SOS, None)`.
|
| 452 |
+
# 3. `g` is used to predict first token.
|
| 453 |
+
# 4. `joint(f, g)`.
|
| 454 |
+
# 5. If `k` (symbol):
|
| 455 |
+
# `last_token = k`.
|
| 456 |
+
# `g, _, state = decoder(k, state)`.
|
| 457 |
+
# Loop.
|
| 458 |
+
# 6. If `Blank`:
|
| 459 |
+
# Keep `g` and `state` as is.
|
| 460 |
+
# Advance `f`.
|
| 461 |
+
|
| 462 |
+
# Yes, this is correct.
|
| 463 |
+
|
| 464 |
+
# So,
|
| 465 |
+
def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
|
| 466 |
+
import librosa
|
| 467 |
+
# Load audio
|
| 468 |
+
audio, sample_rate = librosa.load(audio_path, sr=16000)
|
| 469 |
+
|
| 470 |
+
# Preprocessing
|
| 471 |
+
processed_signal, processed_signal_length = pytorch_model.preprocessor(
|
| 472 |
+
input_signal=torch.tensor([audio]),
|
| 473 |
+
length=torch.tensor([len(audio)])
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
# Switch to Greedy Strategy (if not already)
|
| 477 |
+
if pytorch_model.decoding.cfg.strategy != 'greedy':
|
| 478 |
+
print("Switching to 'greedy' decoding strategy for streaming...")
|
| 479 |
+
from omegaconf import OmegaConf
|
| 480 |
+
from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
|
| 481 |
+
|
| 482 |
+
new_cfg = OmegaConf.create({
|
| 483 |
+
'strategy': 'greedy',
|
| 484 |
+
'greedy': {'max_symbols': 10},
|
| 485 |
+
'preserve_alignments': True,
|
| 486 |
+
'compute_timestamps': False
|
| 487 |
+
})
|
| 488 |
+
|
| 489 |
+
pytorch_model.decoding = RNNTBPEDecoding(
|
| 490 |
+
decoding_cfg=new_cfg,
|
| 491 |
+
decoder=pytorch_model.decoder,
|
| 492 |
+
joint=pytorch_model.joint,
|
| 493 |
+
tokenizer=pytorch_model.tokenizer
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
# Streaming Loop
|
| 497 |
+
total_frames = processed_signal.shape[2]
|
| 498 |
+
chunk_frames = 32
|
| 499 |
+
|
| 500 |
+
# Initialize Cache
|
| 501 |
+
num_layers = 17
|
| 502 |
+
cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
|
| 503 |
+
cache_last_time = torch.zeros(num_layers, 1, 512, 8)
|
| 504 |
+
cache_last_channel_len = torch.zeros(1, dtype=torch.long)
|
| 505 |
+
|
| 506 |
+
previous_hypotheses = None
|
| 507 |
+
previous_pred_out = None
|
| 508 |
+
|
| 509 |
+
final_hyp = ""
|
| 510 |
+
|
| 511 |
+
for i in range(0, total_frames, chunk_frames):
|
| 512 |
+
end = min(i + chunk_frames, total_frames)
|
| 513 |
+
chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
|
| 514 |
+
|
| 515 |
+
# Pad to chunk_frames if needed
|
| 516 |
+
if chunk_mel.shape[2] < chunk_frames:
|
| 517 |
+
pad_amt = chunk_frames - chunk_mel.shape[2]
|
| 518 |
+
chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
|
| 519 |
+
|
| 520 |
+
chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
|
| 521 |
+
|
| 522 |
+
with torch.no_grad():
|
| 523 |
+
# Native Streaming Step
|
| 524 |
+
(
|
| 525 |
+
greedy_predictions,
|
| 526 |
+
all_hyp_text,
|
| 527 |
+
cache_last_channel,
|
| 528 |
+
cache_last_time,
|
| 529 |
+
cache_last_channel_len,
|
| 530 |
+
best_hyp_list, # This is the Hypothesis list
|
| 531 |
+
) = pytorch_model.conformer_stream_step(
|
| 532 |
+
processed_signal=chunk_mel,
|
| 533 |
+
processed_signal_length=chunk_len,
|
| 534 |
+
cache_last_channel=cache_last_channel,
|
| 535 |
+
cache_last_time=cache_last_time,
|
| 536 |
+
cache_last_channel_len=cache_last_channel_len,
|
| 537 |
+
previous_hypotheses=previous_hypotheses,
|
| 538 |
+
previous_pred_out=previous_pred_out
|
| 539 |
+
)
|
| 540 |
+
|
| 541 |
+
# Update previous_hypotheses for next step
|
| 542 |
+
previous_hypotheses = best_hyp_list
|
| 543 |
+
|
| 544 |
+
# Extract text from best_hyp
|
| 545 |
+
current_hyp_obj = None
|
| 546 |
+
if best_hyp_list:
|
| 547 |
+
if isinstance(best_hyp_list, list):
|
| 548 |
+
current_hyp_obj = best_hyp_list[0]
|
| 549 |
+
else:
|
| 550 |
+
current_hyp_obj = best_hyp_list
|
| 551 |
+
|
| 552 |
+
# Check for EOU (1024)
|
| 553 |
+
is_eou = False
|
| 554 |
+
if current_hyp_obj:
|
| 555 |
+
if hasattr(current_hyp_obj, 'y_sequence'):
|
| 556 |
+
# y_sequence might be list or tensor
|
| 557 |
+
y_seq = current_hyp_obj.y_sequence
|
| 558 |
+
if isinstance(y_seq, list):
|
| 559 |
+
if 1024 in y_seq:
|
| 560 |
+
is_eou = True
|
| 561 |
+
elif torch.is_tensor(y_seq):
|
| 562 |
+
if (y_seq == 1024).any():
|
| 563 |
+
is_eou = True
|
| 564 |
+
|
| 565 |
+
if is_eou:
|
| 566 |
+
# EOU detected
|
| 567 |
+
# Append current segment text to final_hyp
|
| 568 |
+
if current_hyp_obj and hasattr(current_hyp_obj, 'text'):
|
| 569 |
+
final_hyp += current_hyp_obj.text + " "
|
| 570 |
+
|
| 571 |
+
# Reset state for next segment
|
| 572 |
+
previous_hypotheses = None
|
| 573 |
+
print("DEBUG: EOU detected, resetting previous_hypotheses")
|
| 574 |
+
else:
|
| 575 |
+
# Not EOU, just update final_hyp with current segment text (temporarily)
|
| 576 |
+
# We can't just append, we need to store it.
|
| 577 |
+
# But since we return final_hyp at the end, we need to combine committed text + current segment.
|
| 578 |
+
pass
|
| 579 |
+
|
| 580 |
+
# End of loop
|
| 581 |
+
# Append any remaining text from the last segment
|
| 582 |
+
if previous_hypotheses:
|
| 583 |
+
last_hyp_list = previous_hypotheses
|
| 584 |
+
if isinstance(last_hyp_list, list):
|
| 585 |
+
last_hyp_obj = last_hyp_list[0]
|
| 586 |
+
else:
|
| 587 |
+
last_hyp_obj = last_hyp_list
|
| 588 |
+
|
| 589 |
+
if last_hyp_obj and hasattr(last_hyp_obj, 'text'):
|
| 590 |
+
final_hyp += last_hyp_obj.text
|
| 591 |
+
|
| 592 |
+
# Strip <eou>
|
| 593 |
+
final_hyp = final_hyp.replace("<eou>", "").replace("<EOU>", "").strip()
|
| 594 |
+
|
| 595 |
+
return {
|
| 596 |
+
'hypothesis': final_hyp,
|
| 597 |
+
'audio_length': audio.shape[0] / 16000
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
def run_pytorch_pipeline(pytorch_model, audio_path):
|
| 601 |
+
# Pure PyTorch inference using transcribe() (Offline)
|
| 602 |
+
try:
|
| 603 |
+
# Try positional argument for paths2audio_files
|
| 604 |
+
hypotheses = pytorch_model.transcribe([audio_path], batch_size=1, verbose=False)
|
| 605 |
+
|
| 606 |
+
if isinstance(hypotheses, tuple):
|
| 607 |
+
hypotheses = hypotheses[0]
|
| 608 |
+
|
| 609 |
+
hypothesis = hypotheses[0]
|
| 610 |
+
|
| 611 |
+
# Hypothesis object has 'text' attribute?
|
| 612 |
+
if hasattr(hypothesis, 'text'):
|
| 613 |
+
hypothesis = hypothesis.text
|
| 614 |
+
|
| 615 |
+
# Strip <eou> and <EOU>
|
| 616 |
+
if isinstance(hypothesis, str):
|
| 617 |
+
hypothesis = hypothesis.replace("<eou>", "").replace("<EOU>", "").strip()
|
| 618 |
+
|
| 619 |
+
return {
|
| 620 |
+
'hypothesis': hypothesis,
|
| 621 |
+
'audio_length': 0 # Placeholder
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
except Exception as e:
|
| 625 |
+
print(f"Error running PyTorch pipeline on {audio_path}: {e}")
|
| 626 |
+
return {'hypothesis': "", 'audio_length': 0}
|
| 627 |
+
|
| 628 |
+
def main():
|
| 629 |
+
parser = argparse.ArgumentParser()
|
| 630 |
+
parser.add_argument('--dataset', default='/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech', help='Path to LibriSpeech')
|
| 631 |
+
parser.add_argument('--subset', default='test-clean', help='Subset to test')
|
| 632 |
+
parser.add_argument('--max-files', type=int, default=100, help='Number of files to process')
|
| 633 |
+
|
| 634 |
+
# Default paths based on file list
|
| 635 |
+
parser.add_argument('--coreml-encoder', default='streaming_encoder_320ms.mlpackage')
|
| 636 |
+
parser.add_argument('--coreml-decoder', default='parakeet_decoder.mlpackage')
|
| 637 |
+
parser.add_argument('--coreml-joint', default='parakeet_joint.mlpackage')
|
| 638 |
+
|
| 639 |
+
parser.add_argument('--pytorch-model', default='nvidia/parakeet_realtime_eou_120m-v1')
|
| 640 |
+
parser.add_argument('--coreml-preprocessor', default='preprocessor_160ms.mlpackage')
|
| 641 |
+
parser.add_argument('--hybrid', action='store_true', help='Use Hybrid mode (CoreML Encoder + PyTorch Decoder)')
|
| 642 |
+
parser.add_argument('--pytorch-only', action='store_true', help='Use pure PyTorch model (Offline)')
|
| 643 |
+
parser.add_argument('--pytorch-streaming', action='store_true', help='Use pure PyTorch model (Streaming Simulation)')
|
| 644 |
+
args = parser.parse_args()
|
| 645 |
+
|
| 646 |
+
print(f"Loading PyTorch model: {args.pytorch_model}")
|
| 647 |
+
pytorch_model = nemo_asr.models.ASRModel.from_pretrained(args.pytorch_model, map_location="cpu")
|
| 648 |
+
pytorch_model.eval()
|
| 649 |
+
|
| 650 |
+
# Only load CoreML if not pytorch-only or pytorch-streaming
|
| 651 |
+
coreml_encoder = None
|
| 652 |
+
coreml_decoder = None
|
| 653 |
+
coreml_joint = None
|
| 654 |
+
coreml_preprocessor = None
|
| 655 |
+
|
| 656 |
+
if not args.pytorch_only and not args.pytorch_streaming:
|
| 657 |
+
print(f"Loading CoreML Encoder: {args.coreml_encoder}")
|
| 658 |
+
coreml_encoder = ct.models.MLModel(args.coreml_encoder)
|
| 659 |
+
|
| 660 |
+
if args.hybrid:
|
| 661 |
+
print(f"Loading CoreML Preprocessor: {args.coreml_preprocessor}")
|
| 662 |
+
try:
|
| 663 |
+
coreml_preprocessor = ct.models.MLModel(args.coreml_preprocessor)
|
| 664 |
+
except Exception as e:
|
| 665 |
+
print(f"Failed to load CoreML Preprocessor: {e}")
|
| 666 |
+
print("Falling back to PyTorch Preprocessor")
|
| 667 |
+
|
| 668 |
+
if not args.hybrid:
|
| 669 |
+
print(f"Loading CoreML Decoder: {args.coreml_decoder}")
|
| 670 |
+
coreml_decoder = ct.models.MLModel(args.coreml_decoder)
|
| 671 |
+
|
| 672 |
+
print(f"Loading CoreML Joint: {args.coreml_joint}")
|
| 673 |
+
coreml_joint = ct.models.MLModel(args.coreml_joint)
|
| 674 |
+
elif args.pytorch_streaming:
|
| 675 |
+
print("Running in PYTORCH-STREAMING mode")
|
| 676 |
+
else:
|
| 677 |
+
print("Running in PYTORCH-ONLY (Offline) mode")
|
| 678 |
+
|
| 679 |
+
entries = load_manifest(args.dataset, args.subset, args.max_files)
|
| 680 |
+
|
| 681 |
+
total_wer = 0
|
| 682 |
+
count = 0
|
| 683 |
+
start_time = time.time()
|
| 684 |
+
|
| 685 |
+
print(f"Starting Benchmark on {len(entries)} files...")
|
| 686 |
+
|
| 687 |
+
for i, entry in enumerate(entries):
|
| 688 |
+
try:
|
| 689 |
+
if args.pytorch_streaming:
|
| 690 |
+
result = run_pytorch_streaming_pipeline(pytorch_model, entry['audio_filepath'])
|
| 691 |
+
elif args.pytorch_only:
|
| 692 |
+
result = run_pytorch_pipeline(pytorch_model, entry['audio_filepath'])
|
| 693 |
+
elif args.hybrid:
|
| 694 |
+
result = run_hybrid_pipeline(coreml_encoder, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
|
| 695 |
+
else:
|
| 696 |
+
result = run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
|
| 697 |
+
|
| 698 |
+
ref = entry['text'].lower()
|
| 699 |
+
hyp = result['hypothesis'].lower()
|
| 700 |
+
|
| 701 |
+
wer = jiwer.wer(ref, hyp)
|
| 702 |
+
total_wer += wer
|
| 703 |
+
count += 1
|
| 704 |
+
|
| 705 |
+
print(f"[{i+1}/{len(entries)}] {Path(entry['audio_filepath']).name} | WER: {wer:.2%} | Ref: '{ref}' | Hyp: '{hyp}'")
|
| 706 |
+
except Exception as e:
|
| 707 |
+
print(f"[{i+1}/{len(entries)}] Failed: {e}")
|
| 708 |
+
import traceback
|
| 709 |
+
traceback.print_exc()
|
| 710 |
+
|
| 711 |
+
if count > 0:
|
| 712 |
+
avg_wer = total_wer / count
|
| 713 |
+
print(f"\nAverage WER over {count} files: {avg_wer:.2%}")
|
| 714 |
+
else:
|
| 715 |
+
print("\nNo files processed successfully.")
|
| 716 |
+
|
| 717 |
+
def run_hybrid_pipeline(coreml_encoder, pytorch_model, audio_path, coreml_preprocessor=None):
|
| 718 |
+
# 1. Load Audio
|
| 719 |
+
try:
|
| 720 |
+
audio, sr = torchaudio.load(audio_path)
|
| 721 |
+
if sr != 16000:
|
| 722 |
+
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
|
| 723 |
+
audio = resampler(audio)
|
| 724 |
+
if audio.shape[0] > 1:
|
| 725 |
+
audio = audio.mean(dim=0, keepdim=True)
|
| 726 |
+
|
| 727 |
+
audio_tensor = audio
|
| 728 |
+
audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
|
| 729 |
+
except Exception as e:
|
| 730 |
+
print(f"Error loading {audio_path}: {e}")
|
| 731 |
+
return {'hypothesis': "", 'audio_length': 0}
|
| 732 |
+
|
| 733 |
+
# 2. Preprocessor
|
| 734 |
+
if coreml_preprocessor:
|
| 735 |
+
# CoreML Preprocessor
|
| 736 |
+
# Input: input_signal (1, N)
|
| 737 |
+
# Output: mel (1, 128, T)
|
| 738 |
+
audio_np = audio.numpy()
|
| 739 |
+
if audio_np.ndim == 2:
|
| 740 |
+
audio_np = audio_np.reshape(1, -1) # Ensure (1, N)
|
| 741 |
+
|
| 742 |
+
inputs = {
|
| 743 |
+
"input_signal": audio_np,
|
| 744 |
+
"length": np.array([audio_np.shape[1]], dtype=np.float32)
|
| 745 |
+
}
|
| 746 |
+
out = coreml_preprocessor.predict(inputs)
|
| 747 |
+
processed_signal = torch.from_numpy(out["mel"]) # (1, 128, T)
|
| 748 |
+
# CoreML might return (1, 1, 128, T) or similar?
|
| 749 |
+
if processed_signal.ndim == 4:
|
| 750 |
+
processed_signal = processed_signal.squeeze(0)
|
| 751 |
+
|
| 752 |
+
# Check shape
|
| 753 |
+
# PyTorch expects (1, 128, T)
|
| 754 |
+
else:
|
| 755 |
+
# PyTorch Preprocessor
|
| 756 |
+
with torch.no_grad():
|
| 757 |
+
processed_signal, processed_signal_len = pytorch_model.preprocessor(
|
| 758 |
+
input_signal=audio_tensor, length=audio_len
|
| 759 |
+
)
|
| 760 |
+
|
| 761 |
+
# 3. CoreML Encoder Loop
|
| 762 |
+
total_frames = processed_signal.shape[2]
|
| 763 |
+
|
| 764 |
+
# Initialize CoreML Cache
|
| 765 |
+
num_layers = 17
|
| 766 |
+
cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
|
| 767 |
+
cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
|
| 768 |
+
cache_last_channel_len = np.zeros((1,), dtype=np.int32)
|
| 769 |
+
|
| 770 |
+
accumulated_encoder_output = []
|
| 771 |
+
|
| 772 |
+
fixed_chunk_size = 32
|
| 773 |
+
chunk_frames = 32
|
| 774 |
+
|
| 775 |
+
for i in range(0, total_frames, chunk_frames):
|
| 776 |
+
end = min(i + chunk_frames, total_frames)
|
| 777 |
+
chunk_mel = processed_signal[:, :, i:end].numpy() # [1, 128, T]
|
| 778 |
+
|
| 779 |
+
current_chunk_len = chunk_mel.shape[2]
|
| 780 |
+
|
| 781 |
+
# Pad if needed
|
| 782 |
+
if current_chunk_len < fixed_chunk_size:
|
| 783 |
+
pad_amt = fixed_chunk_size - current_chunk_len
|
| 784 |
+
padding = np.full((1, 128, pad_amt), -16.0, dtype=np.float32)
|
| 785 |
+
chunk_mel_input = np.concatenate([chunk_mel, padding], axis=2)
|
| 786 |
+
mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
|
| 787 |
+
else:
|
| 788 |
+
chunk_mel_input = chunk_mel
|
| 789 |
+
mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
|
| 790 |
+
|
| 791 |
+
inputs = {
|
| 792 |
+
"mel": chunk_mel_input,
|
| 793 |
+
"mel_length": mel_len_input,
|
| 794 |
+
"cache_last_channel": cache_last_channel,
|
| 795 |
+
"cache_last_time": cache_last_time,
|
| 796 |
+
"cache_last_channel_len": cache_last_channel_len
|
| 797 |
+
}
|
| 798 |
+
|
| 799 |
+
outputs = coreml_encoder.predict(inputs)
|
| 800 |
+
|
| 801 |
+
cache_last_channel = outputs["cache_last_channel_out"]
|
| 802 |
+
cache_last_time = outputs["cache_last_time_out"]
|
| 803 |
+
cache_last_channel_len = outputs["cache_last_channel_len_out"]
|
| 804 |
+
|
| 805 |
+
enc_out = outputs["encoder"]
|
| 806 |
+
# enc_len = outputs["encoder_length"] # Always 3?
|
| 807 |
+
|
| 808 |
+
accumulated_encoder_output.append(enc_out)
|
| 809 |
+
|
| 810 |
+
if not accumulated_encoder_output:
|
| 811 |
+
return {'hypothesis': "", 'audio_length': audio.shape[1] / 16000}
|
| 812 |
+
|
| 813 |
+
# Concatenate Encoder Outputs: [1, 512, T]
|
| 814 |
+
encoder_output = np.concatenate(accumulated_encoder_output, axis=2)
|
| 815 |
+
|
| 816 |
+
# 4. PyTorch Decoding
|
| 817 |
+
# Convert to Tensor
|
| 818 |
+
encoder_output_tensor = torch.from_numpy(encoder_output) # [1, 512, T]
|
| 819 |
+
# Transpose to [B, D, T] (It is already)
|
| 820 |
+
|
| 821 |
+
# We need to pass valid length.
|
| 822 |
+
# Estimate from original audio length?
|
| 823 |
+
# Or just use full T.
|
| 824 |
+
# Parakeet subsampling is 4x? 8x?
|
| 825 |
+
# 320ms (32 frames) -> 3 frames.
|
| 826 |
+
# 32 / 3 = 10.6?
|
| 827 |
+
# Actually, let's trust the decoder to handle padding or just pass full length.
|
| 828 |
+
encoded_lengths = torch.tensor([encoder_output.shape[2]], dtype=torch.long)
|
| 829 |
+
|
| 830 |
+
with torch.no_grad():
|
| 831 |
+
# Use greedy decoding
|
| 832 |
+
# rnnt_decoder_predictions_tensor expects (B, D, T)
|
| 833 |
+
hypotheses = pytorch_model.decoding.rnnt_decoder_predictions_tensor(
|
| 834 |
+
encoder_output=encoder_output_tensor,
|
| 835 |
+
encoded_lengths=encoded_lengths,
|
| 836 |
+
return_hypotheses=True
|
| 837 |
+
)
|
| 838 |
+
|
| 839 |
+
hypothesis = hypotheses[0].text
|
| 840 |
+
|
| 841 |
+
return {
|
| 842 |
+
'hypothesis': hypothesis,
|
| 843 |
+
'audio_length': audio.shape[1] / 16000
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
if __name__ == "__main__":
|
| 847 |
+
main()
|
final_scripts/inference_reference_nemo.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import soundfile as sf
|
| 3 |
+
import librosa
|
| 4 |
+
import numpy as np
|
| 5 |
+
import logging
|
| 6 |
+
from omegaconf import OmegaConf, open_dict
|
| 7 |
+
import nemo.collections.asr as nemo_asr
|
| 8 |
+
from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
|
| 9 |
+
from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
|
| 10 |
+
from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
|
| 11 |
+
|
| 12 |
+
# Configure logging
|
| 13 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 14 |
+
|
| 15 |
+
def setup_decoding_strategy(asr_model, strategy='greedy'):
|
| 16 |
+
"""
|
| 17 |
+
Sets up the decoding strategy.
|
| 18 |
+
Adapted from NeMo example, but with fallback for RNNTBPEDecoding.
|
| 19 |
+
"""
|
| 20 |
+
print(f"Setting up decoding strategy: {strategy}")
|
| 21 |
+
|
| 22 |
+
# Create a config for the desired strategy
|
| 23 |
+
# The example uses cfg.rnnt_decoding, we'll create a minimal one
|
| 24 |
+
decoding_cfg = OmegaConf.create({
|
| 25 |
+
'strategy': strategy,
|
| 26 |
+
'greedy': {'max_symbols': 10}, # Standard greedy params
|
| 27 |
+
'fused_batch_size': -1,
|
| 28 |
+
'compute_timestamps': False, # Disable for stability
|
| 29 |
+
'preserve_alignments': False
|
| 30 |
+
})
|
| 31 |
+
|
| 32 |
+
if hasattr(asr_model, 'change_decoding_strategy'):
|
| 33 |
+
try:
|
| 34 |
+
asr_model.change_decoding_strategy(decoding_cfg)
|
| 35 |
+
print("Successfully changed decoding strategy via change_decoding_strategy")
|
| 36 |
+
return
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Standard change_decoding_strategy failed: {e}")
|
| 39 |
+
print("Attempting manual replacement...")
|
| 40 |
+
|
| 41 |
+
# Manual replacement fallback (Required for Parakeet EOU)
|
| 42 |
+
if hasattr(asr_model, 'decoding') and isinstance(asr_model.decoding, RNNTBPEDecoding):
|
| 43 |
+
new_decoding = RNNTBPEDecoding(
|
| 44 |
+
decoding_cfg=decoding_cfg,
|
| 45 |
+
decoder=asr_model.decoder,
|
| 46 |
+
joint=asr_model.joint,
|
| 47 |
+
tokenizer=asr_model.tokenizer
|
| 48 |
+
)
|
| 49 |
+
asr_model.decoding = new_decoding
|
| 50 |
+
print("Successfully replaced decoding strategy manually.")
|
| 51 |
+
else:
|
| 52 |
+
print("Could not change decoding strategy.")
|
| 53 |
+
|
| 54 |
+
def perform_streaming(asr_model, streaming_buffer, device):
|
| 55 |
+
"""
|
| 56 |
+
Performs streaming inference using conformer_stream_step.
|
| 57 |
+
Follows the NeMo example structure.
|
| 58 |
+
"""
|
| 59 |
+
# Get initial cache state
|
| 60 |
+
# Note: The example uses batch_size from buffer, we assume 1 for simplicity here
|
| 61 |
+
batch_size = 1
|
| 62 |
+
cache_last_channel, cache_last_time, cache_last_channel_len = asr_model.encoder.get_initial_cache_state(
|
| 63 |
+
batch_size=batch_size
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Move cache to device
|
| 67 |
+
if cache_last_channel is not None:
|
| 68 |
+
cache_last_channel = cache_last_channel.to(device)
|
| 69 |
+
cache_last_time = cache_last_time.to(device)
|
| 70 |
+
cache_last_channel_len = cache_last_channel_len.to(device)
|
| 71 |
+
|
| 72 |
+
previous_hypotheses = None
|
| 73 |
+
previous_pred_out = None
|
| 74 |
+
|
| 75 |
+
final_transcription = ""
|
| 76 |
+
|
| 77 |
+
print("Starting streaming loop...")
|
| 78 |
+
|
| 79 |
+
for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer):
|
| 80 |
+
chunk_audio = chunk_audio.to(device)
|
| 81 |
+
chunk_lengths = chunk_lengths.to(device)
|
| 82 |
+
|
| 83 |
+
print(f"Step {step_num}: chunk_audio shape: {chunk_audio.shape}")
|
| 84 |
+
|
| 85 |
+
# conformer_stream_step
|
| 86 |
+
with torch.no_grad():
|
| 87 |
+
(
|
| 88 |
+
greedy_predictions,
|
| 89 |
+
transcribed_texts,
|
| 90 |
+
cache_last_channel,
|
| 91 |
+
cache_last_time,
|
| 92 |
+
cache_last_channel_len,
|
| 93 |
+
best_hyp_list,
|
| 94 |
+
) = asr_model.conformer_stream_step(
|
| 95 |
+
processed_signal=chunk_audio,
|
| 96 |
+
processed_signal_length=chunk_lengths,
|
| 97 |
+
cache_last_channel=cache_last_channel,
|
| 98 |
+
cache_last_time=cache_last_time,
|
| 99 |
+
cache_last_channel_len=cache_last_channel_len,
|
| 100 |
+
keep_all_outputs=False, # We don't need to keep all outputs for now
|
| 101 |
+
previous_hypotheses=previous_hypotheses,
|
| 102 |
+
previous_pred_out=previous_pred_out,
|
| 103 |
+
return_transcription=True
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Update state for next step
|
| 107 |
+
previous_hypotheses = best_hyp_list
|
| 108 |
+
|
| 109 |
+
# Extract text and handle EOU (The "Complex" Part)
|
| 110 |
+
current_hyp = best_hyp_list[0] if isinstance(best_hyp_list, list) else best_hyp_list
|
| 111 |
+
|
| 112 |
+
# Check for EOU (1024)
|
| 113 |
+
is_eou = False
|
| 114 |
+
if hasattr(current_hyp, 'y_sequence'):
|
| 115 |
+
y_seq = current_hyp.y_sequence
|
| 116 |
+
if isinstance(y_seq, list) and 1024 in y_seq:
|
| 117 |
+
is_eou = True
|
| 118 |
+
elif torch.is_tensor(y_seq) and (y_seq == 1024).any():
|
| 119 |
+
is_eou = True
|
| 120 |
+
|
| 121 |
+
if is_eou:
|
| 122 |
+
# FIX: Reset decoder state on EOU
|
| 123 |
+
previous_hypotheses = None
|
| 124 |
+
if hasattr(current_hyp, 'text'):
|
| 125 |
+
final_transcription += current_hyp.text + " "
|
| 126 |
+
|
| 127 |
+
# Note: If not EOU, we don't append text yet because it's partial.
|
| 128 |
+
# The example accumulates `transcribed_texts` but that might be for the whole batch/history?
|
| 129 |
+
# In strict streaming, we usually only commit on EOU or stability.
|
| 130 |
+
# For this demo, we'll just print partials.
|
| 131 |
+
|
| 132 |
+
# print(f"Step {step_num}: {current_hyp.text if hasattr(current_hyp, 'text') else ''}")
|
| 133 |
+
|
| 134 |
+
# Append final bit
|
| 135 |
+
if previous_hypotheses:
|
| 136 |
+
last_hyp = previous_hypotheses[0] if isinstance(previous_hypotheses, list) else previous_hypotheses
|
| 137 |
+
if hasattr(last_hyp, 'text'):
|
| 138 |
+
final_transcription += last_hyp.text
|
| 139 |
+
|
| 140 |
+
return final_transcription.replace("<eou>", "").strip()
|
| 141 |
+
|
| 142 |
+
import argparse
|
| 143 |
+
import jiwer
|
| 144 |
+
from pathlib import Path
|
| 145 |
+
|
| 146 |
+
def load_manifest(dataset_path, subset='test-clean', max_files=None):
|
| 147 |
+
subset_dir = Path(dataset_path) / subset
|
| 148 |
+
if not subset_dir.exists():
|
| 149 |
+
raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
|
| 150 |
+
|
| 151 |
+
flac_files = list(subset_dir.rglob('*.flac'))
|
| 152 |
+
if not flac_files:
|
| 153 |
+
raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
|
| 154 |
+
|
| 155 |
+
# Sort for determinism
|
| 156 |
+
flac_files = sorted(flac_files)
|
| 157 |
+
|
| 158 |
+
entries = []
|
| 159 |
+
for flac_path in flac_files:
|
| 160 |
+
if max_files and len(entries) >= max_files:
|
| 161 |
+
break
|
| 162 |
+
|
| 163 |
+
speaker_id = flac_path.parent.parent.name
|
| 164 |
+
chapter_id = flac_path.parent.name
|
| 165 |
+
trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
|
| 166 |
+
|
| 167 |
+
if trans_file.exists():
|
| 168 |
+
utterance_id = flac_path.stem
|
| 169 |
+
with open(trans_file, 'r') as f:
|
| 170 |
+
for line in f:
|
| 171 |
+
parts = line.strip().split(' ', 1)
|
| 172 |
+
if len(parts) == 2 and parts[0] == utterance_id:
|
| 173 |
+
entries.append({
|
| 174 |
+
'audio_filepath': str(flac_path),
|
| 175 |
+
'text': parts[1],
|
| 176 |
+
'duration': 0
|
| 177 |
+
})
|
| 178 |
+
break
|
| 179 |
+
print(f"Loaded {len(entries)} entries from {subset_dir}")
|
| 180 |
+
return entries
|
| 181 |
+
|
| 182 |
+
def main():
|
| 183 |
+
parser = argparse.ArgumentParser()
|
| 184 |
+
parser.add_argument('--max-files', type=int, default=100)
|
| 185 |
+
args = parser.parse_args()
|
| 186 |
+
|
| 187 |
+
model_id = "nvidia/parakeet_realtime_eou_120m-v1"
|
| 188 |
+
dataset_path = "/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech"
|
| 189 |
+
|
| 190 |
+
device = torch.device("cpu") # Force CPU for now
|
| 191 |
+
|
| 192 |
+
print(f"Loading model: {model_id}")
|
| 193 |
+
model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location=device)
|
| 194 |
+
model.eval()
|
| 195 |
+
|
| 196 |
+
# 1. Setup Decoding Strategy (Crucial Step)
|
| 197 |
+
setup_decoding_strategy(model, strategy='greedy')
|
| 198 |
+
|
| 199 |
+
# 2. Setup Streaming Params
|
| 200 |
+
model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
|
| 201 |
+
print(f"Updated Streaming Config: {model.encoder.streaming_cfg}")
|
| 202 |
+
|
| 203 |
+
# Load Data
|
| 204 |
+
entries = load_manifest(dataset_path, max_files=args.max_files)
|
| 205 |
+
|
| 206 |
+
total_wer = 0
|
| 207 |
+
count = 0
|
| 208 |
+
|
| 209 |
+
print(f"Starting Benchmark on {len(entries)} files...")
|
| 210 |
+
|
| 211 |
+
for i, entry in enumerate(entries):
|
| 212 |
+
audio_file = entry['audio_filepath']
|
| 213 |
+
ref_text = entry['text'].lower()
|
| 214 |
+
|
| 215 |
+
# Create buffer per file (clean state)
|
| 216 |
+
streaming_buffer = CacheAwareStreamingAudioBuffer(
|
| 217 |
+
model=model,
|
| 218 |
+
online_normalization=False,
|
| 219 |
+
pad_and_drop_preencoded=False
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
streaming_buffer.append_audio_file(audio_file, stream_id=-1)
|
| 223 |
+
|
| 224 |
+
# 3. Perform Streaming
|
| 225 |
+
hyp_text = perform_streaming(model, streaming_buffer, device)
|
| 226 |
+
|
| 227 |
+
# Calculate WER
|
| 228 |
+
wer = jiwer.wer(ref_text, hyp_text)
|
| 229 |
+
total_wer += wer
|
| 230 |
+
count += 1
|
| 231 |
+
|
| 232 |
+
print(f"[{i+1}/{len(entries)}] {Path(audio_file).name} | WER: {wer*100:.2f}% | Ref: '{ref_text}' | Hyp: '{hyp_text}'")
|
| 233 |
+
|
| 234 |
+
avg_wer = total_wer / count if count > 0 else 0
|
| 235 |
+
print(f"\nAverage WER over {count} files: {avg_wer*100:.2f}%")
|
| 236 |
+
|
| 237 |
+
if __name__ == "__main__":
|
| 238 |
+
main()
|
parakeet_decoder.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abebbb833404b4a9bcc374a9430574d574061f65f6327cba59d8cc1a8b95cfaa
|
| 3 |
+
size 243
|
parakeet_decoder.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea4dbff3f49ae48e899d4dc785cdb8ffa8614bba395c623db025f08bdd633381
|
| 3 |
+
size 439
|
parakeet_decoder.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float16",
|
| 10 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 640)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 1, 640]",
|
| 13 |
+
"name" : "decoder_output",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Float16",
|
| 20 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 640)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1, 1, 640]",
|
| 23 |
+
"name" : "h_out",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"hasShapeFlexibility" : "0",
|
| 28 |
+
"isOptional" : "0",
|
| 29 |
+
"dataType" : "Float16",
|
| 30 |
+
"formattedType" : "MultiArray (Float16 1 × 1 × 640)",
|
| 31 |
+
"shortDescription" : "",
|
| 32 |
+
"shape" : "[1, 1, 640]",
|
| 33 |
+
"name" : "c_out",
|
| 34 |
+
"type" : "MultiArray"
|
| 35 |
+
}
|
| 36 |
+
],
|
| 37 |
+
"modelParameters" : [
|
| 38 |
+
|
| 39 |
+
],
|
| 40 |
+
"specificationVersion" : 8,
|
| 41 |
+
"mlProgramOperationTypeHistogram" : {
|
| 42 |
+
"Ios17.squeeze" : 2,
|
| 43 |
+
"Ios17.gather" : 1,
|
| 44 |
+
"Ios17.cast" : 3,
|
| 45 |
+
"Ios17.lstm" : 1,
|
| 46 |
+
"Ios17.transpose" : 6,
|
| 47 |
+
"Identity" : 1,
|
| 48 |
+
"Ios17.expandDims" : 2
|
| 49 |
+
},
|
| 50 |
+
"computePrecision" : "Mixed (Float16, Int16, Int32)",
|
| 51 |
+
"isUpdatable" : "0",
|
| 52 |
+
"stateSchema" : [
|
| 53 |
+
|
| 54 |
+
],
|
| 55 |
+
"availability" : {
|
| 56 |
+
"macOS" : "14.0",
|
| 57 |
+
"tvOS" : "17.0",
|
| 58 |
+
"visionOS" : "1.0",
|
| 59 |
+
"watchOS" : "10.0",
|
| 60 |
+
"iOS" : "17.0",
|
| 61 |
+
"macCatalyst" : "17.0"
|
| 62 |
+
},
|
| 63 |
+
"modelType" : {
|
| 64 |
+
"name" : "MLModelType_mlProgram"
|
| 65 |
+
},
|
| 66 |
+
"userDefinedMetadata" : {
|
| 67 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
| 68 |
+
"com.github.apple.coremltools.source" : "torch==2.4.0",
|
| 69 |
+
"com.github.apple.coremltools.version" : "8.3.0"
|
| 70 |
+
},
|
| 71 |
+
"inputSchema" : [
|
| 72 |
+
{
|
| 73 |
+
"hasShapeFlexibility" : "0",
|
| 74 |
+
"isOptional" : "0",
|
| 75 |
+
"dataType" : "Int32",
|
| 76 |
+
"formattedType" : "MultiArray (Int32 1 × 1)",
|
| 77 |
+
"shortDescription" : "",
|
| 78 |
+
"shape" : "[1, 1]",
|
| 79 |
+
"name" : "targets",
|
| 80 |
+
"type" : "MultiArray"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"hasShapeFlexibility" : "0",
|
| 84 |
+
"isOptional" : "0",
|
| 85 |
+
"dataType" : "Int32",
|
| 86 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 87 |
+
"shortDescription" : "",
|
| 88 |
+
"shape" : "[1]",
|
| 89 |
+
"name" : "target_length",
|
| 90 |
+
"type" : "MultiArray"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"hasShapeFlexibility" : "0",
|
| 94 |
+
"isOptional" : "0",
|
| 95 |
+
"dataType" : "Float32",
|
| 96 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 640)",
|
| 97 |
+
"shortDescription" : "",
|
| 98 |
+
"shape" : "[1, 1, 640]",
|
| 99 |
+
"name" : "h_in",
|
| 100 |
+
"type" : "MultiArray"
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"hasShapeFlexibility" : "0",
|
| 104 |
+
"isOptional" : "0",
|
| 105 |
+
"dataType" : "Float32",
|
| 106 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 640)",
|
| 107 |
+
"shortDescription" : "",
|
| 108 |
+
"shape" : "[1, 1, 640]",
|
| 109 |
+
"name" : "c_in",
|
| 110 |
+
"type" : "MultiArray"
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"generatedClassName" : "parakeet_decoder",
|
| 114 |
+
"method" : "predict"
|
| 115 |
+
}
|
| 116 |
+
]
|
parakeet_decoder.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios17>(tensor<fp32, [1, 1, 640]> c_in, tensor<fp32, [1, 1, 640]> h_in, tensor<int32, [1]> target_length, tensor<int32, [1, 1]> targets) {
|
| 5 |
+
tensor<int32, [3]> var_14 = const()[name = tensor<string, []>("op_14"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 6 |
+
tensor<string, []> h_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("h_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 7 |
+
tensor<int32, [3]> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 8 |
+
tensor<string, []> c_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("c_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 9 |
+
tensor<int32, []> y_axis_0 = const()[name = tensor<string, []>("y_axis_0"), val = tensor<int32, []>(0)];
|
| 10 |
+
tensor<int32, []> y_batch_dims_0 = const()[name = tensor<string, []>("y_batch_dims_0"), val = tensor<int32, []>(0)];
|
| 11 |
+
tensor<bool, []> y_validate_indices_0 = const()[name = tensor<string, []>("y_validate_indices_0"), val = tensor<bool, []>(false)];
|
| 12 |
+
tensor<fp16, [1027, 640]> decoder_prediction_embed_weight_to_fp16 = const()[name = tensor<string, []>("decoder_prediction_embed_weight_to_fp16"), val = tensor<fp16, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 13 |
+
tensor<string, []> targets_to_int16_dtype_0 = const()[name = tensor<string, []>("targets_to_int16_dtype_0"), val = tensor<string, []>("int16")];
|
| 14 |
+
tensor<int16, [1, 1]> targets_to_int16 = cast(dtype = targets_to_int16_dtype_0, x = targets)[name = tensor<string, []>("cast_4")];
|
| 15 |
+
tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16 = gather(axis = y_axis_0, batch_dims = y_batch_dims_0, indices = targets_to_int16, validate_indices = y_validate_indices_0, x = decoder_prediction_embed_weight_to_fp16)[name = tensor<string, []>("y_cast_fp16_cast_uint16")];
|
| 16 |
+
tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 17 |
+
tensor<int32, [1]> input_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
|
| 18 |
+
tensor<fp16, [1, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = tensor<string, []>("cast_6")];
|
| 19 |
+
tensor<fp16, [1, 1, 640]> var_15_cast_fp16 = transpose(perm = var_14, x = h_in_to_fp16)[name = tensor<string, []>("transpose_5")];
|
| 20 |
+
tensor<fp16, [1, 640]> input_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_lstm_h0_squeeze_axes_0, x = var_15_cast_fp16)[name = tensor<string, []>("input_lstm_h0_squeeze_cast_fp16")];
|
| 21 |
+
tensor<int32, [1]> input_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
|
| 22 |
+
tensor<fp16, [1, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = tensor<string, []>("cast_5")];
|
| 23 |
+
tensor<fp16, [1, 1, 640]> var_22_cast_fp16 = transpose(perm = var_21, x = c_in_to_fp16)[name = tensor<string, []>("transpose_4")];
|
| 24 |
+
tensor<fp16, [1, 640]> input_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_lstm_c0_squeeze_axes_0, x = var_22_cast_fp16)[name = tensor<string, []>("input_lstm_c0_squeeze_cast_fp16")];
|
| 25 |
+
tensor<string, []> input_direction_0 = const()[name = tensor<string, []>("input_direction_0"), val = tensor<string, []>("forward")];
|
| 26 |
+
tensor<bool, []> input_output_sequence_0 = const()[name = tensor<string, []>("input_output_sequence_0"), val = tensor<bool, []>(true)];
|
| 27 |
+
tensor<string, []> input_recurrent_activation_0 = const()[name = tensor<string, []>("input_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
|
| 28 |
+
tensor<string, []> input_cell_activation_0 = const()[name = tensor<string, []>("input_cell_activation_0"), val = tensor<string, []>("tanh")];
|
| 29 |
+
tensor<string, []> input_activation_0 = const()[name = tensor<string, []>("input_activation_0"), val = tensor<string, []>("tanh")];
|
| 30 |
+
tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = tensor<string, []>("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1314688)))];
|
| 31 |
+
tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = tensor<string, []>("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4591552)))];
|
| 32 |
+
tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = tensor<string, []>("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7868416)))];
|
| 33 |
+
tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = y_cast_fp16_cast_uint16)[name = tensor<string, []>("transpose_3")];
|
| 34 |
+
tensor<fp16, [1, 1, 640]> input_cast_fp16_0, tensor<fp16, [1, 640]> input_cast_fp16_1, tensor<fp16, [1, 640]> input_cast_fp16_2 = lstm(activation = input_activation_0, bias = concat_0_to_fp16, cell_activation = input_cell_activation_0, direction = input_direction_0, initial_c = input_lstm_c0_squeeze_cast_fp16, initial_h = input_lstm_h0_squeeze_cast_fp16, output_sequence = input_output_sequence_0, recurrent_activation = input_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
|
| 35 |
+
tensor<int32, [1]> var_44_axes_0 = const()[name = tensor<string, []>("op_44_axes_0"), val = tensor<int32, [1]>([0])];
|
| 36 |
+
tensor<fp16, [1, 1, 640]> var_44_cast_fp16 = expand_dims(axes = var_44_axes_0, x = input_cast_fp16_1)[name = tensor<string, []>("op_44_cast_fp16")];
|
| 37 |
+
tensor<int32, [1]> var_45_axes_0 = const()[name = tensor<string, []>("op_45_axes_0"), val = tensor<int32, [1]>([0])];
|
| 38 |
+
tensor<fp16, [1, 1, 640]> var_45_cast_fp16 = expand_dims(axes = var_45_axes_0, x = input_cast_fp16_2)[name = tensor<string, []>("op_45_cast_fp16")];
|
| 39 |
+
tensor<int32, [3]> var_57_perm_0 = const()[name = tensor<string, []>("op_57_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 40 |
+
tensor<int32, [3]> var_61 = const()[name = tensor<string, []>("op_61"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 41 |
+
tensor<int32, [3]> var_66 = const()[name = tensor<string, []>("op_66"), val = tensor<int32, [3]>([1, 0, 2])];
|
| 42 |
+
tensor<fp16, [1, 1, 640]> c_out = transpose(perm = var_66, x = var_45_cast_fp16)[name = tensor<string, []>("transpose_0")];
|
| 43 |
+
tensor<fp16, [1, 1, 640]> h_out = transpose(perm = var_61, x = var_44_cast_fp16)[name = tensor<string, []>("transpose_1")];
|
| 44 |
+
tensor<fp16, [1, 1, 640]> decoder_output = transpose(perm = var_57_perm_0, x = input_cast_fp16_0)[name = tensor<string, []>("transpose_2")];
|
| 45 |
+
tensor<int32, [1]> target_length_tmp = identity(x = target_length)[name = tensor<string, []>("target_length_tmp")];
|
| 46 |
+
} -> (decoder_output, h_out, c_out);
|
| 47 |
+
}
|
parakeet_decoder.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
|
| 3 |
+
size 7873600
|
parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b706227c9c2a2d64ea0fa3879ca9a4673e61944e8e374160e5a20ae7382207c3
|
| 3 |
+
size 6750
|
parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
|
| 3 |
+
size 7873600
|
parakeet_decoder.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"2A776510-11A3-4993-A996-06C985BF1840": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"5111D7BD-E8E5-42A4-A8E2-11BD568F106B": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "5111D7BD-E8E5-42A4-A8E2-11BD568F106B"
|
| 18 |
+
}
|
parakeet_joint.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51354af666471dab9e2344e1a7b93004c7fef44c3d455dde75bcaf0abbcc72af
|
| 3 |
+
size 243
|
parakeet_joint.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9ecae4f1db9350ad8bd7050c3c1b973926798c2f7ff408e9ad512d3013f238b
|
| 3 |
+
size 355
|
parakeet_joint.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float32",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float32",
|
| 10 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 1 × 1027)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 1, 1, 1027]",
|
| 13 |
+
"name" : "logits",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"modelParameters" : [
|
| 18 |
+
|
| 19 |
+
],
|
| 20 |
+
"specificationVersion" : 8,
|
| 21 |
+
"mlProgramOperationTypeHistogram" : {
|
| 22 |
+
"Ios17.expandDims" : 2,
|
| 23 |
+
"Ios17.transpose" : 1,
|
| 24 |
+
"Ios17.linear" : 3,
|
| 25 |
+
"Ios17.add" : 1,
|
| 26 |
+
"Ios16.relu" : 1
|
| 27 |
+
},
|
| 28 |
+
"computePrecision" : "Mixed (Float32, Int32)",
|
| 29 |
+
"isUpdatable" : "0",
|
| 30 |
+
"stateSchema" : [
|
| 31 |
+
|
| 32 |
+
],
|
| 33 |
+
"availability" : {
|
| 34 |
+
"macOS" : "14.0",
|
| 35 |
+
"tvOS" : "17.0",
|
| 36 |
+
"visionOS" : "1.0",
|
| 37 |
+
"watchOS" : "10.0",
|
| 38 |
+
"iOS" : "17.0",
|
| 39 |
+
"macCatalyst" : "17.0"
|
| 40 |
+
},
|
| 41 |
+
"modelType" : {
|
| 42 |
+
"name" : "MLModelType_mlProgram"
|
| 43 |
+
},
|
| 44 |
+
"userDefinedMetadata" : {
|
| 45 |
+
"com.github.apple.coremltools.version" : "8.3.0",
|
| 46 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
| 47 |
+
"com.github.apple.coremltools.source" : "torch==2.4.0"
|
| 48 |
+
},
|
| 49 |
+
"inputSchema" : [
|
| 50 |
+
{
|
| 51 |
+
"hasShapeFlexibility" : "0",
|
| 52 |
+
"isOptional" : "0",
|
| 53 |
+
"dataType" : "Float32",
|
| 54 |
+
"formattedType" : "MultiArray (Float32 1 × 512 × 1)",
|
| 55 |
+
"shortDescription" : "",
|
| 56 |
+
"shape" : "[1, 512, 1]",
|
| 57 |
+
"name" : "encoder_output",
|
| 58 |
+
"type" : "MultiArray"
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"hasShapeFlexibility" : "0",
|
| 62 |
+
"isOptional" : "0",
|
| 63 |
+
"dataType" : "Float32",
|
| 64 |
+
"formattedType" : "MultiArray (Float32 1 × 1 × 640)",
|
| 65 |
+
"shortDescription" : "",
|
| 66 |
+
"shape" : "[1, 1, 640]",
|
| 67 |
+
"name" : "decoder_output",
|
| 68 |
+
"type" : "MultiArray"
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"generatedClassName" : "parakeet_joint",
|
| 72 |
+
"method" : "predict"
|
| 73 |
+
}
|
| 74 |
+
]
|
parakeet_joint.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios17>(tensor<fp32, [1, 1, 640]> decoder_output, tensor<fp32, [1, 512, 1]> encoder_output) {
|
| 5 |
+
tensor<fp32, [640]> joint_enc_bias = const()[name = tensor<string, []>("joint_enc_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 6 |
+
tensor<fp32, [640, 512]> joint_enc_weight = const()[name = tensor<string, []>("joint_enc_weight"), val = tensor<fp32, [640, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2688)))];
|
| 7 |
+
tensor<fp32, [640]> joint_pred_bias = const()[name = tensor<string, []>("joint_pred_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1313472)))];
|
| 8 |
+
tensor<fp32, [640, 640]> joint_pred_weight = const()[name = tensor<string, []>("joint_pred_weight"), val = tensor<fp32, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1316096)))];
|
| 9 |
+
tensor<fp32, [1027]> joint_joint_net_2_bias = const()[name = tensor<string, []>("joint_joint_net_2_bias"), val = tensor<fp32, [1027]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2954560)))];
|
| 10 |
+
tensor<fp32, [1027, 640]> joint_joint_net_2_weight = const()[name = tensor<string, []>("joint_joint_net_2_weight"), val = tensor<fp32, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2958784)))];
|
| 11 |
+
tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
|
| 12 |
+
tensor<fp32, [1, 1, 512]> input_1 = transpose(perm = input_1_perm_0, x = encoder_output)[name = tensor<string, []>("transpose_0")];
|
| 13 |
+
tensor<fp32, [1, 1, 640]> f = linear(bias = joint_enc_bias, weight = joint_enc_weight, x = input_1)[name = tensor<string, []>("linear_0")];
|
| 14 |
+
tensor<fp32, [1, 1, 640]> g = linear(bias = joint_pred_bias, weight = joint_pred_weight, x = decoder_output)[name = tensor<string, []>("linear_1")];
|
| 15 |
+
tensor<int32, [1]> var_19_axes_0 = const()[name = tensor<string, []>("op_19_axes_0"), val = tensor<int32, [1]>([2])];
|
| 16 |
+
tensor<fp32, [1, 1, 1, 640]> var_19 = expand_dims(axes = var_19_axes_0, x = f)[name = tensor<string, []>("op_19")];
|
| 17 |
+
tensor<int32, [1]> var_21_axes_0 = const()[name = tensor<string, []>("op_21_axes_0"), val = tensor<int32, [1]>([1])];
|
| 18 |
+
tensor<fp32, [1, 1, 1, 640]> var_21 = expand_dims(axes = var_21_axes_0, x = g)[name = tensor<string, []>("op_21")];
|
| 19 |
+
tensor<fp32, [1, 1, 1, 640]> input_3 = add(x = var_19, y = var_21)[name = tensor<string, []>("input_3")];
|
| 20 |
+
tensor<fp32, [1, 1, 1, 640]> input_5 = relu(x = input_3)[name = tensor<string, []>("input_5")];
|
| 21 |
+
tensor<fp32, [1, 1, 1, 1027]> logits = linear(bias = joint_joint_net_2_bias, weight = joint_joint_net_2_weight, x = input_5)[name = tensor<string, []>("linear_2")];
|
| 22 |
+
} -> (logits);
|
| 23 |
+
}
|
parakeet_joint.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
|
| 3 |
+
size 5587968
|
parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:570c88c720a7cca648db2d493635420c24ff837099586e384099c705425b207e
|
| 3 |
+
size 3015
|
parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
|
| 3 |
+
size 5587968
|
parakeet_joint.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"A8C1223F-3E7A-421C-AFF6-DB3EADE3826B": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"C6F9C4E8-810B-42F5-9184-A7F28B430B15": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "C6F9C4E8-810B-42F5-9184-A7F28B430B15"
|
| 18 |
+
}
|
preprocessor.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66bc489d462bb3131074b17c0cf18efe85bc0619b1e22f4a94c69d25576c1041
|
| 3 |
+
size 243
|
preprocessor.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61ed911e33fe8a791a4655ff7539a086dd760a60b60b233153cb769d85f41311
|
| 3 |
+
size 373
|
preprocessor.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float32",
|
| 10 |
+
"formattedType" : "MultiArray (Float32)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[]",
|
| 13 |
+
"name" : "mel",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Int32",
|
| 20 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1]",
|
| 23 |
+
"name" : "mel_length",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
}
|
| 26 |
+
],
|
| 27 |
+
"modelParameters" : [
|
| 28 |
+
|
| 29 |
+
],
|
| 30 |
+
"specificationVersion" : 8,
|
| 31 |
+
"mlProgramOperationTypeHistogram" : {
|
| 32 |
+
"Range1d" : 1,
|
| 33 |
+
"Ios17.reshape" : 2,
|
| 34 |
+
"Identity" : 1,
|
| 35 |
+
"Ios17.matmul" : 1,
|
| 36 |
+
"Ios17.expandDims" : 5,
|
| 37 |
+
"Select" : 1,
|
| 38 |
+
"Ios17.add" : 3,
|
| 39 |
+
"Ios17.sliceByIndex" : 3,
|
| 40 |
+
"Ios16.reduceSum" : 1,
|
| 41 |
+
"Shape" : 1,
|
| 42 |
+
"Ios17.gather" : 1,
|
| 43 |
+
"Pad" : 1,
|
| 44 |
+
"Ios17.log" : 1,
|
| 45 |
+
"Ios17.conv" : 2,
|
| 46 |
+
"Ios17.sub" : 2,
|
| 47 |
+
"Ios17.pow" : 1,
|
| 48 |
+
"Ios17.cast" : 6,
|
| 49 |
+
"Stack" : 1,
|
| 50 |
+
"Ios17.concat" : 1,
|
| 51 |
+
"Ios17.floorDiv" : 1,
|
| 52 |
+
"Ios17.greaterEqual" : 1,
|
| 53 |
+
"Ios17.mul" : 1
|
| 54 |
+
},
|
| 55 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32, UInt16)",
|
| 56 |
+
"isUpdatable" : "0",
|
| 57 |
+
"stateSchema" : [
|
| 58 |
+
|
| 59 |
+
],
|
| 60 |
+
"availability" : {
|
| 61 |
+
"macOS" : "14.0",
|
| 62 |
+
"tvOS" : "17.0",
|
| 63 |
+
"visionOS" : "1.0",
|
| 64 |
+
"watchOS" : "10.0",
|
| 65 |
+
"iOS" : "17.0",
|
| 66 |
+
"macCatalyst" : "17.0"
|
| 67 |
+
},
|
| 68 |
+
"modelType" : {
|
| 69 |
+
"name" : "MLModelType_mlProgram"
|
| 70 |
+
},
|
| 71 |
+
"userDefinedMetadata" : {
|
| 72 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript",
|
| 73 |
+
"com.github.apple.coremltools.source" : "torch==2.4.0",
|
| 74 |
+
"com.github.apple.coremltools.version" : "8.3.0"
|
| 75 |
+
},
|
| 76 |
+
"inputSchema" : [
|
| 77 |
+
{
|
| 78 |
+
"dataType" : "Float32",
|
| 79 |
+
"hasShapeFlexibility" : "1",
|
| 80 |
+
"isOptional" : "0",
|
| 81 |
+
"shapeFlexibility" : "1 × 1600...16000",
|
| 82 |
+
"shapeRange" : "[[1, 1], [1600, 16000]]",
|
| 83 |
+
"formattedType" : "MultiArray (Float32 1 × 6560)",
|
| 84 |
+
"type" : "MultiArray",
|
| 85 |
+
"shape" : "[1, 6560]",
|
| 86 |
+
"name" : "input_signal",
|
| 87 |
+
"shortDescription" : ""
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"hasShapeFlexibility" : "0",
|
| 91 |
+
"isOptional" : "0",
|
| 92 |
+
"dataType" : "Int32",
|
| 93 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 94 |
+
"shortDescription" : "",
|
| 95 |
+
"shape" : "[1]",
|
| 96 |
+
"name" : "length",
|
| 97 |
+
"type" : "MultiArray"
|
| 98 |
+
}
|
| 99 |
+
],
|
| 100 |
+
"generatedClassName" : "preprocessor",
|
| 101 |
+
"method" : "predict"
|
| 102 |
+
}
|
| 103 |
+
]
|
preprocessor.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios17>(tensor<fp32, [1, ?]> input_signal, tensor<int32, [1]> length) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"input_signal", [1, 6560]}}), ("RangeDims", {{"input_signal", [[1, 1], [1600, 16000]]}})))] {
|
| 5 |
+
tensor<int32, []> var_4 = const()[name = tensor<string, []>("op_4"), val = tensor<int32, []>(1)];
|
| 6 |
+
tensor<int32, []> var_5 = const()[name = tensor<string, []>("op_5"), val = tensor<int32, []>(160)];
|
| 7 |
+
tensor<int32, []> var_27 = const()[name = tensor<string, []>("op_27"), val = tensor<int32, []>(512)];
|
| 8 |
+
tensor<int32, [1]> var_28 = add(x = length, y = var_27)[name = tensor<string, []>("op_28")];
|
| 9 |
+
tensor<int32, []> var_29 = const()[name = tensor<string, []>("op_29"), val = tensor<int32, []>(512)];
|
| 10 |
+
tensor<int32, [1]> var_30 = sub(x = var_28, y = var_29)[name = tensor<string, []>("op_30")];
|
| 11 |
+
tensor<int32, [1]> floor_div_0 = floor_div(x = var_30, y = var_5)[name = tensor<string, []>("floor_div_0")];
|
| 12 |
+
tensor<string, []> var_31_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_31_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 13 |
+
tensor<fp16, []> var_32_promoted_to_fp16 = const()[name = tensor<string, []>("op_32_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
|
| 14 |
+
tensor<fp16, [1]> floor_div_0_to_fp16 = cast(dtype = var_31_to_fp16_dtype_0, x = floor_div_0)[name = tensor<string, []>("cast_14")];
|
| 15 |
+
tensor<fp16, [1]> seq_len_1_cast_fp16 = add(x = floor_div_0_to_fp16, y = var_32_promoted_to_fp16)[name = tensor<string, []>("seq_len_1_cast_fp16")];
|
| 16 |
+
tensor<string, []> cast_1_dtype_0 = const()[name = tensor<string, []>("cast_1_dtype_0"), val = tensor<string, []>("int32")];
|
| 17 |
+
tensor<int32, [2]> var_36_begin_0 = const()[name = tensor<string, []>("op_36_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
| 18 |
+
tensor<int32, [2]> var_36_end_0 = const()[name = tensor<string, []>("op_36_end_0"), val = tensor<int32, [2]>([1, 1])];
|
| 19 |
+
tensor<bool, [2]> var_36_end_mask_0 = const()[name = tensor<string, []>("op_36_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
| 20 |
+
tensor<bool, [2]> var_36_squeeze_mask_0 = const()[name = tensor<string, []>("op_36_squeeze_mask_0"), val = tensor<bool, [2]>([false, true])];
|
| 21 |
+
tensor<string, []> input_signal_to_fp16_dtype_0 = const()[name = tensor<string, []>("input_signal_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 22 |
+
tensor<fp16, [1, ?]> input_signal_to_fp16 = cast(dtype = input_signal_to_fp16_dtype_0, x = input_signal)[name = tensor<string, []>("cast_12")];
|
| 23 |
+
tensor<fp16, [1]> var_36_cast_fp16 = slice_by_index(begin = var_36_begin_0, end = var_36_end_0, end_mask = var_36_end_mask_0, squeeze_mask = var_36_squeeze_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_36_cast_fp16")];
|
| 24 |
+
tensor<int32, [1]> var_37_axes_0 = const()[name = tensor<string, []>("op_37_axes_0"), val = tensor<int32, [1]>([1])];
|
| 25 |
+
tensor<fp16, [1, 1]> var_37_cast_fp16 = expand_dims(axes = var_37_axes_0, x = var_36_cast_fp16)[name = tensor<string, []>("op_37_cast_fp16")];
|
| 26 |
+
tensor<int32, [2]> var_39_begin_0 = const()[name = tensor<string, []>("op_39_begin_0"), val = tensor<int32, [2]>([0, 1])];
|
| 27 |
+
tensor<int32, [2]> var_39_end_0 = const()[name = tensor<string, []>("op_39_end_0"), val = tensor<int32, [2]>([1, 0])];
|
| 28 |
+
tensor<bool, [2]> var_39_end_mask_0 = const()[name = tensor<string, []>("op_39_end_mask_0"), val = tensor<bool, [2]>([true, true])];
|
| 29 |
+
tensor<fp16, [1, ?]> var_39_cast_fp16 = slice_by_index(begin = var_39_begin_0, end = var_39_end_0, end_mask = var_39_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_39_cast_fp16")];
|
| 30 |
+
tensor<int32, [2]> var_41_begin_0 = const()[name = tensor<string, []>("op_41_begin_0"), val = tensor<int32, [2]>([0, 0])];
|
| 31 |
+
tensor<int32, [2]> var_41_end_0 = const()[name = tensor<string, []>("op_41_end_0"), val = tensor<int32, [2]>([1, -1])];
|
| 32 |
+
tensor<bool, [2]> var_41_end_mask_0 = const()[name = tensor<string, []>("op_41_end_mask_0"), val = tensor<bool, [2]>([true, false])];
|
| 33 |
+
tensor<fp16, [1, ?]> var_41_cast_fp16 = slice_by_index(begin = var_41_begin_0, end = var_41_end_0, end_mask = var_41_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_41_cast_fp16")];
|
| 34 |
+
tensor<fp16, []> var_42_to_fp16 = const()[name = tensor<string, []>("op_42_to_fp16"), val = tensor<fp16, []>(0x1.f0cp-1)];
|
| 35 |
+
tensor<fp16, [1, ?]> var_43_cast_fp16 = mul(x = var_41_cast_fp16, y = var_42_to_fp16)[name = tensor<string, []>("op_43_cast_fp16")];
|
| 36 |
+
tensor<fp16, [1, ?]> var_44_cast_fp16 = sub(x = var_39_cast_fp16, y = var_43_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
|
| 37 |
+
tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
|
| 38 |
+
tensor<fp16, [1, ?]> input_1_cast_fp16 = concat(axis = var_4, interleave = input_1_interleave_0, values = (var_37_cast_fp16, var_44_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
|
| 39 |
+
tensor<int32, [3]> concat_0x = const()[name = tensor<string, []>("concat_0x"), val = tensor<int32, [3]>([1, 1, -1])];
|
| 40 |
+
tensor<fp16, [1, 1, ?]> input_3_cast_fp16 = reshape(shape = concat_0x, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
|
| 41 |
+
tensor<int32, [6]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 256, 256])];
|
| 42 |
+
tensor<string, []> input_5_mode_0 = const()[name = tensor<string, []>("input_5_mode_0"), val = tensor<string, []>("reflect")];
|
| 43 |
+
tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
|
| 44 |
+
tensor<fp16, [1, 1, ?]> input_5_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_5_mode_0, pad = input_5_pad_0, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
|
| 45 |
+
tensor<int32, [2]> concat_1x = const()[name = tensor<string, []>("concat_1x"), val = tensor<int32, [2]>([1, -1])];
|
| 46 |
+
tensor<fp16, [1, ?]> input_cast_fp16 = reshape(shape = concat_1x, x = input_5_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
|
| 47 |
+
tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
|
| 48 |
+
tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
|
| 49 |
+
tensor<fp16, [1, 1, ?]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
|
| 50 |
+
tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
|
| 51 |
+
tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
|
| 52 |
+
tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
|
| 53 |
+
tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
|
| 54 |
+
tensor<fp16, [257, 1, 512]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 55 |
+
tensor<fp16, [1, 257, ?]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
|
| 56 |
+
tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
|
| 57 |
+
tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
|
| 58 |
+
tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
|
| 59 |
+
tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
|
| 60 |
+
tensor<fp16, [257, 1, 512]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263296)))];
|
| 61 |
+
tensor<fp16, [1, 257, ?]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
|
| 62 |
+
tensor<int32, []> stack_0_axis_0 = const()[name = tensor<string, []>("stack_0_axis_0"), val = tensor<int32, []>(-1)];
|
| 63 |
+
tensor<fp16, [1, 257, ?, 2]> stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = tensor<string, []>("stack_0_cast_fp16")];
|
| 64 |
+
tensor<fp16, []> var_12_promoted_to_fp16 = const()[name = tensor<string, []>("op_12_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
|
| 65 |
+
tensor<fp16, [1, 257, ?, 2]> var_60_cast_fp16 = pow(x = stack_0_cast_fp16, y = var_12_promoted_to_fp16)[name = tensor<string, []>("op_60_cast_fp16")];
|
| 66 |
+
tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 67 |
+
tensor<bool, []> var_62_keep_dims_0 = const()[name = tensor<string, []>("op_62_keep_dims_0"), val = tensor<bool, []>(false)];
|
| 68 |
+
tensor<fp16, [1, 257, ?]> var_62_cast_fp16 = reduce_sum(axes = var_62_axes_0, keep_dims = var_62_keep_dims_0, x = var_60_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
|
| 69 |
+
tensor<fp16, [1, 257, ?]> x_9_cast_fp16 = identity(x = var_62_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
|
| 70 |
+
tensor<bool, []> x_11_transpose_x_0 = const()[name = tensor<string, []>("x_11_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 71 |
+
tensor<bool, []> x_11_transpose_y_0 = const()[name = tensor<string, []>("x_11_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 72 |
+
tensor<fp16, [1, 128, 257]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1, 128, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526528)))];
|
| 73 |
+
tensor<fp16, [1, 128, ?]> x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_2_to_fp16, y = x_9_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
|
| 74 |
+
tensor<fp16, []> var_69_to_fp16 = const()[name = tensor<string, []>("op_69_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
|
| 75 |
+
tensor<fp16, [1, 128, ?]> var_70_cast_fp16 = add(x = x_11_cast_fp16, y = var_69_to_fp16)[name = tensor<string, []>("op_70_cast_fp16")];
|
| 76 |
+
tensor<fp32, []> x_epsilon_0 = const()[name = tensor<string, []>("x_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
|
| 77 |
+
tensor<fp16, [1, 128, ?]> x_cast_fp16 = log(epsilon = x_epsilon_0, x = var_70_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
|
| 78 |
+
tensor<int32, [3]> var_72_shape_cast_fp16 = shape(x = x_cast_fp16)[name = tensor<string, []>("op_72_shape_cast_fp16")];
|
| 79 |
+
tensor<int32, []> gather_4_axis_0 = const()[name = tensor<string, []>("gather_4_axis_0"), val = tensor<int32, []>(0)];
|
| 80 |
+
tensor<int32, []> gather_4_batch_dims_0 = const()[name = tensor<string, []>("gather_4_batch_dims_0"), val = tensor<int32, []>(0)];
|
| 81 |
+
tensor<bool, []> gather_4_validate_indices_0 = const()[name = tensor<string, []>("gather_4_validate_indices_0"), val = tensor<bool, []>(false)];
|
| 82 |
+
tensor<string, []> var_72_shape_cast_fp16_to_uint16_dtype_0 = const()[name = tensor<string, []>("op_72_shape_cast_fp16_to_uint16_dtype_0"), val = tensor<string, []>("uint16")];
|
| 83 |
+
tensor<uint16, []> select_4_to_uint16 = const()[name = tensor<string, []>("select_4_to_uint16"), val = tensor<uint16, []>(2)];
|
| 84 |
+
tensor<uint16, [3]> var_72_shape_cast_fp16_to_uint16 = cast(dtype = var_72_shape_cast_fp16_to_uint16_dtype_0, x = var_72_shape_cast_fp16)[name = tensor<string, []>("cast_11")];
|
| 85 |
+
tensor<uint16, []> gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_72_shape_cast_fp16_to_uint16)[name = tensor<string, []>("gather_4_cast_uint16")];
|
| 86 |
+
tensor<string, []> gather_4_cast_uint16_to_int32_dtype_0 = const()[name = tensor<string, []>("gather_4_cast_uint16_to_int32_dtype_0"), val = tensor<string, []>("int32")];
|
| 87 |
+
tensor<int32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<int32, []>(0)];
|
| 88 |
+
tensor<int32, []> const_4 = const()[name = tensor<string, []>("const_4"), val = tensor<int32, []>(1)];
|
| 89 |
+
tensor<int32, []> gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = tensor<string, []>("cast_10")];
|
| 90 |
+
tensor<int32, [?]> mask_1 = range_1d(end = gather_4_cast_uint16_to_int32, start = const_3, step = const_4)[name = tensor<string, []>("mask_1")];
|
| 91 |
+
tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
|
| 92 |
+
tensor<int32, [1, ?]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = mask_1)[name = tensor<string, []>("expand_dims_0")];
|
| 93 |
+
tensor<int32, [1]> var_77_axes_0 = const()[name = tensor<string, []>("op_77_axes_0"), val = tensor<int32, [1]>([1])];
|
| 94 |
+
tensor<int32, [1]> mel_length = cast(dtype = cast_1_dtype_0, x = seq_len_1_cast_fp16)[name = tensor<string, []>("cast_13")];
|
| 95 |
+
tensor<int32, [1, 1]> var_77 = expand_dims(axes = var_77_axes_0, x = mel_length)[name = tensor<string, []>("op_77")];
|
| 96 |
+
tensor<bool, [1, ?]> mask = greater_equal(x = expand_dims_0, y = var_77)[name = tensor<string, []>("mask")];
|
| 97 |
+
tensor<int32, [1]> var_79_axes_0 = const()[name = tensor<string, []>("op_79_axes_0"), val = tensor<int32, [1]>([1])];
|
| 98 |
+
tensor<bool, [1, 1, ?]> var_79 = expand_dims(axes = var_79_axes_0, x = mask)[name = tensor<string, []>("op_79")];
|
| 99 |
+
tensor<fp16, []> cast_6_to_fp16 = const()[name = tensor<string, []>("cast_6_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
|
| 100 |
+
tensor<fp16, [1, 128, ?]> processed_signal_cast_fp16 = select(a = cast_6_to_fp16, b = x_cast_fp16, cond = var_79)[name = tensor<string, []>("processed_signal_cast_fp16")];
|
| 101 |
+
tensor<string, []> processed_signal_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("processed_signal_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
|
| 102 |
+
tensor<fp32, [1, 128, ?]> mel = cast(dtype = processed_signal_cast_fp16_to_fp32_dtype_0, x = processed_signal_cast_fp16)[name = tensor<string, []>("cast_9")];
|
| 103 |
+
} -> (mel, mel_length);
|
| 104 |
+
}
|
preprocessor.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
|
| 3 |
+
size 592384
|
preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eeb46c27ed7a75560111435ef86afbe9128669301b897d613a3fb1cbf8753fc2
|
| 3 |
+
size 13695
|
preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
|
| 3 |
+
size 592384
|
preprocessor.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"41A87408-9448-4732-A714-AABD9E8264CD": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"D1D243D6-CE3A-446A-A657-4F2BA0FC58CE": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "D1D243D6-CE3A-446A-A657-4F2BA0FC58CE"
|
| 18 |
+
}
|
streaming_encoder.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d10e88a440fa9a238b34284f6be7310ebe682ec7f5240053007b26fe4991edc
|
| 3 |
+
size 243
|
streaming_encoder.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdcbb3050dd6912cbe43025b9d5e5bbbdfa9471bc08ce5b32565e51a75109638
|
| 3 |
+
size 594
|
streaming_encoder.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"metadataOutputVersion" : "3.0",
|
| 4 |
+
"storagePrecision" : "Float16",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float32",
|
| 10 |
+
"formattedType" : "MultiArray (Float32 1 × 512 × 4)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 512, 4]",
|
| 13 |
+
"name" : "encoder",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"hasShapeFlexibility" : "0",
|
| 18 |
+
"isOptional" : "0",
|
| 19 |
+
"dataType" : "Int32",
|
| 20 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 21 |
+
"shortDescription" : "",
|
| 22 |
+
"shape" : "[1]",
|
| 23 |
+
"name" : "encoder_length",
|
| 24 |
+
"type" : "MultiArray"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"hasShapeFlexibility" : "0",
|
| 28 |
+
"isOptional" : "0",
|
| 29 |
+
"dataType" : "Float32",
|
| 30 |
+
"formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
|
| 31 |
+
"shortDescription" : "",
|
| 32 |
+
"shape" : "[17, 1, 70, 512]",
|
| 33 |
+
"name" : "cache_last_channel_out",
|
| 34 |
+
"type" : "MultiArray"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"hasShapeFlexibility" : "0",
|
| 38 |
+
"isOptional" : "0",
|
| 39 |
+
"dataType" : "Float32",
|
| 40 |
+
"formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
|
| 41 |
+
"shortDescription" : "",
|
| 42 |
+
"shape" : "[17, 1, 512, 8]",
|
| 43 |
+
"name" : "cache_last_time_out",
|
| 44 |
+
"type" : "MultiArray"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"hasShapeFlexibility" : "0",
|
| 48 |
+
"isOptional" : "0",
|
| 49 |
+
"dataType" : "Int32",
|
| 50 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 51 |
+
"shortDescription" : "",
|
| 52 |
+
"shape" : "[1]",
|
| 53 |
+
"name" : "cache_last_channel_len_out",
|
| 54 |
+
"type" : "MultiArray"
|
| 55 |
+
}
|
| 56 |
+
],
|
| 57 |
+
"modelParameters" : [
|
| 58 |
+
|
| 59 |
+
],
|
| 60 |
+
"specificationVersion" : 8,
|
| 61 |
+
"mlProgramOperationTypeHistogram" : {
|
| 62 |
+
"Ios17.floor" : 3,
|
| 63 |
+
"Ios17.logicalAnd" : 3,
|
| 64 |
+
"Ios17.reshape" : 103,
|
| 65 |
+
"Ios16.softmax" : 17,
|
| 66 |
+
"Ios17.matmul" : 51,
|
| 67 |
+
"Ios17.transpose" : 157,
|
| 68 |
+
"Split" : 17,
|
| 69 |
+
"Ios17.expandDims" : 6,
|
| 70 |
+
"Select" : 51,
|
| 71 |
+
"Ios17.add" : 125,
|
| 72 |
+
"Tile" : 1,
|
| 73 |
+
"Ios17.sliceByIndex" : 105,
|
| 74 |
+
"Ios16.sigmoid" : 17,
|
| 75 |
+
"Pad" : 20,
|
| 76 |
+
"Ios17.logicalNot" : 2,
|
| 77 |
+
"Ios17.layerNorm" : 102,
|
| 78 |
+
"Ios17.less" : 1,
|
| 79 |
+
"Ios17.sub" : 1,
|
| 80 |
+
"Ios17.conv" : 56,
|
| 81 |
+
"Ios17.clip" : 2,
|
| 82 |
+
"Ios16.relu" : 3,
|
| 83 |
+
"Ios17.linear" : 137,
|
| 84 |
+
"Ios17.greaterEqual" : 1,
|
| 85 |
+
"Ios17.cast" : 12,
|
| 86 |
+
"Ios16.silu" : 51,
|
| 87 |
+
"Ios17.concat" : 51,
|
| 88 |
+
"Stack" : 2,
|
| 89 |
+
"Ios17.mul" : 72
|
| 90 |
+
},
|
| 91 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
| 92 |
+
"isUpdatable" : "0",
|
| 93 |
+
"stateSchema" : [
|
| 94 |
+
|
| 95 |
+
],
|
| 96 |
+
"availability" : {
|
| 97 |
+
"macOS" : "14.0",
|
| 98 |
+
"tvOS" : "17.0",
|
| 99 |
+
"visionOS" : "1.0",
|
| 100 |
+
"watchOS" : "10.0",
|
| 101 |
+
"iOS" : "17.0",
|
| 102 |
+
"macCatalyst" : "17.0"
|
| 103 |
+
},
|
| 104 |
+
"modelType" : {
|
| 105 |
+
"name" : "MLModelType_mlProgram"
|
| 106 |
+
},
|
| 107 |
+
"userDefinedMetadata" : {
|
| 108 |
+
"com.github.apple.coremltools.version" : "8.3.0",
|
| 109 |
+
"com.github.apple.coremltools.source" : "torch==2.4.0",
|
| 110 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 111 |
+
},
|
| 112 |
+
"inputSchema" : [
|
| 113 |
+
{
|
| 114 |
+
"hasShapeFlexibility" : "0",
|
| 115 |
+
"isOptional" : "0",
|
| 116 |
+
"dataType" : "Float32",
|
| 117 |
+
"formattedType" : "MultiArray (Float32 1 × 128 × 41)",
|
| 118 |
+
"shortDescription" : "",
|
| 119 |
+
"shape" : "[1, 128, 41]",
|
| 120 |
+
"name" : "mel",
|
| 121 |
+
"type" : "MultiArray"
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"hasShapeFlexibility" : "0",
|
| 125 |
+
"isOptional" : "0",
|
| 126 |
+
"dataType" : "Int32",
|
| 127 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 128 |
+
"shortDescription" : "",
|
| 129 |
+
"shape" : "[1]",
|
| 130 |
+
"name" : "mel_length",
|
| 131 |
+
"type" : "MultiArray"
|
| 132 |
+
},
|
| 133 |
+
{
|
| 134 |
+
"hasShapeFlexibility" : "0",
|
| 135 |
+
"isOptional" : "0",
|
| 136 |
+
"dataType" : "Float32",
|
| 137 |
+
"formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
|
| 138 |
+
"shortDescription" : "",
|
| 139 |
+
"shape" : "[17, 1, 70, 512]",
|
| 140 |
+
"name" : "cache_last_channel",
|
| 141 |
+
"type" : "MultiArray"
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"hasShapeFlexibility" : "0",
|
| 145 |
+
"isOptional" : "0",
|
| 146 |
+
"dataType" : "Float32",
|
| 147 |
+
"formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
|
| 148 |
+
"shortDescription" : "",
|
| 149 |
+
"shape" : "[17, 1, 512, 8]",
|
| 150 |
+
"name" : "cache_last_time",
|
| 151 |
+
"type" : "MultiArray"
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"hasShapeFlexibility" : "0",
|
| 155 |
+
"isOptional" : "0",
|
| 156 |
+
"dataType" : "Int32",
|
| 157 |
+
"formattedType" : "MultiArray (Int32 1)",
|
| 158 |
+
"shortDescription" : "",
|
| 159 |
+
"shape" : "[1]",
|
| 160 |
+
"name" : "cache_last_channel_len",
|
| 161 |
+
"type" : "MultiArray"
|
| 162 |
+
}
|
| 163 |
+
],
|
| 164 |
+
"generatedClassName" : "streaming_encoder",
|
| 165 |
+
"method" : "predict"
|
| 166 |
+
}
|
| 167 |
+
]
|
streaming_encoder.mlmodelc/model.mil
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
streaming_encoder.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
|
| 3 |
+
size 212726592
|
streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:617c9a05405e9e6134838a5c760ab93b83f41b5c1407ce671526c172f94a0c9c
|
| 3 |
+
size 504210
|
streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
|
| 3 |
+
size 212726592
|
streaming_encoder.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"7B68916D-2718-4249-8DA5-9B31FEE8478A": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Weights",
|
| 7 |
+
"name": "weights",
|
| 8 |
+
"path": "com.apple.CoreML/weights"
|
| 9 |
+
},
|
| 10 |
+
"E9E4EE1E-0F56-46D5-9093-67095CF85F35": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Specification",
|
| 13 |
+
"name": "model.mlmodel",
|
| 14 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "E9E4EE1E-0F56-46D5-9093-67095CF85F35"
|
| 18 |
+
}
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d556e51ba5b89db64a8cb2be6798fb29974edcadb58b0c7b80418eb5d8752303
|
| 3 |
+
size 258183
|
vocab.json
CHANGED
|
@@ -1,1028 +1,1028 @@
|
|
| 1 |
-
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
| 12 |
-
"
|
| 13 |
-
"
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
"
|
| 17 |
-
"
|
| 18 |
-
"
|
| 19 |
-
"
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"
|
| 23 |
-
"
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
-
"
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
-
"
|
| 70 |
-
"
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
-
"
|
| 82 |
-
"
|
| 83 |
-
"
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
-
"
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
-
"
|
| 97 |
-
"
|
| 98 |
-
"
|
| 99 |
-
"
|
| 100 |
-
"
|
| 101 |
-
"
|
| 102 |
-
"
|
| 103 |
-
"
|
| 104 |
-
"
|
| 105 |
-
"
|
| 106 |
-
"
|
| 107 |
-
"
|
| 108 |
-
"
|
| 109 |
-
"
|
| 110 |
-
"
|
| 111 |
-
"
|
| 112 |
-
"
|
| 113 |
-
"
|
| 114 |
-
"
|
| 115 |
-
"
|
| 116 |
-
"
|
| 117 |
-
"
|
| 118 |
-
"
|
| 119 |
-
"
|
| 120 |
-
"
|
| 121 |
-
"
|
| 122 |
-
"
|
| 123 |
-
"
|
| 124 |
-
"
|
| 125 |
-
"
|
| 126 |
-
"
|
| 127 |
-
"
|
| 128 |
-
"
|
| 129 |
-
"
|
| 130 |
-
"
|
| 131 |
-
"
|
| 132 |
-
"
|
| 133 |
-
"
|
| 134 |
-
"
|
| 135 |
-
"
|
| 136 |
-
"
|
| 137 |
-
"
|
| 138 |
-
"
|
| 139 |
-
"
|
| 140 |
-
"
|
| 141 |
-
"
|
| 142 |
-
"
|
| 143 |
-
"
|
| 144 |
-
"
|
| 145 |
-
"
|
| 146 |
-
"
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
-
"
|
| 150 |
-
"
|
| 151 |
-
"
|
| 152 |
-
"
|
| 153 |
-
"
|
| 154 |
-
"
|
| 155 |
-
"
|
| 156 |
-
"
|
| 157 |
-
"
|
| 158 |
-
"
|
| 159 |
-
"
|
| 160 |
-
"
|
| 161 |
-
"
|
| 162 |
-
"
|
| 163 |
-
"
|
| 164 |
-
"
|
| 165 |
-
"
|
| 166 |
-
"
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
-
"
|
| 173 |
-
"
|
| 174 |
-
"
|
| 175 |
-
"
|
| 176 |
-
"
|
| 177 |
-
"
|
| 178 |
-
"
|
| 179 |
-
"
|
| 180 |
-
"
|
| 181 |
-
"
|
| 182 |
-
"
|
| 183 |
-
"
|
| 184 |
-
"
|
| 185 |
-
"
|
| 186 |
-
"
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
"
|
| 194 |
-
"
|
| 195 |
-
"
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
-
"
|
| 200 |
-
"
|
| 201 |
-
"
|
| 202 |
-
"
|
| 203 |
-
"
|
| 204 |
-
"
|
| 205 |
-
"
|
| 206 |
-
"
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
-
"
|
| 210 |
-
"
|
| 211 |
-
"
|
| 212 |
-
"
|
| 213 |
-
"
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
| 223 |
-
"
|
| 224 |
-
"
|
| 225 |
-
"
|
| 226 |
-
"
|
| 227 |
-
"
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
| 233 |
-
"
|
| 234 |
-
"
|
| 235 |
-
"
|
| 236 |
-
"
|
| 237 |
-
"
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
| 243 |
-
"
|
| 244 |
-
"
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
| 248 |
-
"
|
| 249 |
-
"
|
| 250 |
-
"
|
| 251 |
-
"
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
-
"
|
| 259 |
-
"
|
| 260 |
-
"
|
| 261 |
-
"
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
| 274 |
-
"
|
| 275 |
-
"
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
-
"
|
| 281 |
-
"
|
| 282 |
-
"
|
| 283 |
-
"
|
| 284 |
-
"
|
| 285 |
-
"
|
| 286 |
-
"
|
| 287 |
-
"
|
| 288 |
-
"
|
| 289 |
-
"
|
| 290 |
-
"
|
| 291 |
-
"
|
| 292 |
-
"
|
| 293 |
-
"
|
| 294 |
-
"
|
| 295 |
-
"
|
| 296 |
-
"
|
| 297 |
-
"
|
| 298 |
-
"
|
| 299 |
-
"
|
| 300 |
-
"
|
| 301 |
-
"
|
| 302 |
-
"
|
| 303 |
-
"
|
| 304 |
-
"
|
| 305 |
-
"
|
| 306 |
-
"
|
| 307 |
-
"
|
| 308 |
-
"
|
| 309 |
-
"
|
| 310 |
-
"
|
| 311 |
-
"
|
| 312 |
-
"
|
| 313 |
-
"
|
| 314 |
-
"
|
| 315 |
-
"
|
| 316 |
-
"
|
| 317 |
-
"
|
| 318 |
-
"
|
| 319 |
-
"
|
| 320 |
-
"
|
| 321 |
-
"
|
| 322 |
-
"
|
| 323 |
-
"
|
| 324 |
-
"
|
| 325 |
-
"
|
| 326 |
-
"
|
| 327 |
-
"
|
| 328 |
-
"
|
| 329 |
-
"
|
| 330 |
-
"
|
| 331 |
-
"
|
| 332 |
-
"
|
| 333 |
-
"
|
| 334 |
-
"
|
| 335 |
-
"
|
| 336 |
-
"
|
| 337 |
-
"
|
| 338 |
-
"
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
-
"
|
| 342 |
-
"
|
| 343 |
-
"
|
| 344 |
-
"
|
| 345 |
-
"
|
| 346 |
-
"
|
| 347 |
-
"
|
| 348 |
-
"
|
| 349 |
-
"
|
| 350 |
-
"
|
| 351 |
-
"
|
| 352 |
-
"
|
| 353 |
-
"
|
| 354 |
-
"
|
| 355 |
-
"
|
| 356 |
-
"
|
| 357 |
-
"
|
| 358 |
-
"
|
| 359 |
-
"
|
| 360 |
-
"
|
| 361 |
-
"
|
| 362 |
-
"
|
| 363 |
-
"
|
| 364 |
-
"
|
| 365 |
-
"
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
-
"
|
| 369 |
-
"
|
| 370 |
-
"
|
| 371 |
-
"
|
| 372 |
-
"
|
| 373 |
-
"
|
| 374 |
-
"
|
| 375 |
-
"
|
| 376 |
-
"
|
| 377 |
-
"
|
| 378 |
-
"
|
| 379 |
-
"
|
| 380 |
-
"
|
| 381 |
-
"
|
| 382 |
-
"
|
| 383 |
-
"
|
| 384 |
-
"
|
| 385 |
-
"
|
| 386 |
-
"
|
| 387 |
-
"
|
| 388 |
-
"
|
| 389 |
-
"
|
| 390 |
-
"
|
| 391 |
-
"
|
| 392 |
-
"
|
| 393 |
-
"
|
| 394 |
-
"
|
| 395 |
-
"
|
| 396 |
-
"
|
| 397 |
-
"
|
| 398 |
-
"
|
| 399 |
-
"
|
| 400 |
-
"
|
| 401 |
-
"
|
| 402 |
-
"
|
| 403 |
-
"
|
| 404 |
-
"
|
| 405 |
-
"
|
| 406 |
-
"
|
| 407 |
-
"
|
| 408 |
-
"
|
| 409 |
-
"
|
| 410 |
-
"
|
| 411 |
-
"
|
| 412 |
-
"
|
| 413 |
-
"
|
| 414 |
-
"
|
| 415 |
-
"
|
| 416 |
-
"
|
| 417 |
-
"
|
| 418 |
-
"
|
| 419 |
-
"
|
| 420 |
-
"
|
| 421 |
-
"
|
| 422 |
-
"
|
| 423 |
-
"
|
| 424 |
-
"
|
| 425 |
-
"
|
| 426 |
-
"
|
| 427 |
-
"
|
| 428 |
-
"
|
| 429 |
-
"
|
| 430 |
-
"
|
| 431 |
-
"
|
| 432 |
-
"
|
| 433 |
-
"
|
| 434 |
-
"
|
| 435 |
-
"
|
| 436 |
-
"
|
| 437 |
-
"
|
| 438 |
-
"
|
| 439 |
-
"
|
| 440 |
-
"
|
| 441 |
-
"
|
| 442 |
-
"
|
| 443 |
-
"
|
| 444 |
-
"
|
| 445 |
-
"
|
| 446 |
-
"
|
| 447 |
-
"
|
| 448 |
-
"
|
| 449 |
-
"
|
| 450 |
-
"
|
| 451 |
-
"
|
| 452 |
-
"
|
| 453 |
-
"
|
| 454 |
-
"
|
| 455 |
-
"
|
| 456 |
-
"
|
| 457 |
-
"
|
| 458 |
-
"
|
| 459 |
-
"
|
| 460 |
-
"
|
| 461 |
-
"
|
| 462 |
-
"
|
| 463 |
-
"
|
| 464 |
-
"
|
| 465 |
-
"
|
| 466 |
-
"
|
| 467 |
-
"
|
| 468 |
-
"
|
| 469 |
-
"
|
| 470 |
-
"
|
| 471 |
-
"
|
| 472 |
-
"
|
| 473 |
-
"
|
| 474 |
-
"
|
| 475 |
-
"
|
| 476 |
-
"
|
| 477 |
-
"
|
| 478 |
-
"
|
| 479 |
-
"
|
| 480 |
-
"
|
| 481 |
-
"
|
| 482 |
-
"
|
| 483 |
-
"
|
| 484 |
-
"
|
| 485 |
-
"
|
| 486 |
-
"
|
| 487 |
-
"
|
| 488 |
-
"
|
| 489 |
-
"
|
| 490 |
-
"
|
| 491 |
-
"
|
| 492 |
-
"
|
| 493 |
-
"
|
| 494 |
-
"
|
| 495 |
-
"
|
| 496 |
-
"
|
| 497 |
-
"
|
| 498 |
-
"
|
| 499 |
-
"
|
| 500 |
-
"
|
| 501 |
-
"
|
| 502 |
-
"
|
| 503 |
-
"
|
| 504 |
-
"
|
| 505 |
-
"
|
| 506 |
-
"
|
| 507 |
-
"
|
| 508 |
-
"
|
| 509 |
-
"
|
| 510 |
-
"
|
| 511 |
-
"
|
| 512 |
-
"
|
| 513 |
-
"
|
| 514 |
-
"
|
| 515 |
-
"
|
| 516 |
-
"
|
| 517 |
-
"
|
| 518 |
-
"
|
| 519 |
-
"
|
| 520 |
-
"
|
| 521 |
-
"
|
| 522 |
-
"
|
| 523 |
-
"
|
| 524 |
-
"
|
| 525 |
-
"
|
| 526 |
-
"
|
| 527 |
-
"
|
| 528 |
-
"
|
| 529 |
-
"
|
| 530 |
-
"
|
| 531 |
-
"
|
| 532 |
-
"
|
| 533 |
-
"
|
| 534 |
-
"
|
| 535 |
-
"
|
| 536 |
-
"
|
| 537 |
-
"
|
| 538 |
-
"
|
| 539 |
-
"
|
| 540 |
-
"
|
| 541 |
-
"
|
| 542 |
-
"
|
| 543 |
-
"
|
| 544 |
-
"
|
| 545 |
-
"
|
| 546 |
-
"
|
| 547 |
-
"
|
| 548 |
-
"
|
| 549 |
-
"
|
| 550 |
-
"
|
| 551 |
-
"
|
| 552 |
-
"
|
| 553 |
-
"
|
| 554 |
-
"
|
| 555 |
-
"
|
| 556 |
-
"
|
| 557 |
-
"
|
| 558 |
-
"
|
| 559 |
-
"
|
| 560 |
-
"
|
| 561 |
-
"
|
| 562 |
-
"
|
| 563 |
-
"
|
| 564 |
-
"
|
| 565 |
-
"
|
| 566 |
-
"
|
| 567 |
-
"
|
| 568 |
-
"
|
| 569 |
-
"
|
| 570 |
-
"
|
| 571 |
-
"
|
| 572 |
-
"
|
| 573 |
-
"
|
| 574 |
-
"
|
| 575 |
-
"
|
| 576 |
-
"
|
| 577 |
-
"
|
| 578 |
-
"
|
| 579 |
-
"
|
| 580 |
-
"
|
| 581 |
-
"
|
| 582 |
-
"
|
| 583 |
-
"
|
| 584 |
-
"
|
| 585 |
-
"
|
| 586 |
-
"
|
| 587 |
-
"
|
| 588 |
-
"
|
| 589 |
-
"
|
| 590 |
-
"
|
| 591 |
-
"
|
| 592 |
-
"
|
| 593 |
-
"
|
| 594 |
-
"
|
| 595 |
-
"
|
| 596 |
-
"
|
| 597 |
-
"
|
| 598 |
-
"
|
| 599 |
-
"
|
| 600 |
-
"
|
| 601 |
-
"
|
| 602 |
-
"
|
| 603 |
-
"
|
| 604 |
-
"
|
| 605 |
-
"
|
| 606 |
-
"
|
| 607 |
-
"
|
| 608 |
-
"
|
| 609 |
-
"
|
| 610 |
-
"
|
| 611 |
-
"
|
| 612 |
-
"
|
| 613 |
-
"
|
| 614 |
-
"
|
| 615 |
-
"
|
| 616 |
-
"
|
| 617 |
-
"
|
| 618 |
-
"
|
| 619 |
-
"
|
| 620 |
-
"
|
| 621 |
-
"
|
| 622 |
-
"
|
| 623 |
-
"
|
| 624 |
-
"
|
| 625 |
-
"
|
| 626 |
-
"
|
| 627 |
-
"
|
| 628 |
-
"
|
| 629 |
-
"
|
| 630 |
-
"
|
| 631 |
-
"
|
| 632 |
-
"
|
| 633 |
-
"
|
| 634 |
-
"
|
| 635 |
-
"
|
| 636 |
-
"
|
| 637 |
-
"
|
| 638 |
-
"
|
| 639 |
-
"
|
| 640 |
-
"
|
| 641 |
-
"
|
| 642 |
-
"
|
| 643 |
-
"
|
| 644 |
-
"
|
| 645 |
-
"
|
| 646 |
-
"
|
| 647 |
-
"
|
| 648 |
-
"
|
| 649 |
-
"
|
| 650 |
-
"
|
| 651 |
-
"
|
| 652 |
-
"
|
| 653 |
-
"
|
| 654 |
-
"
|
| 655 |
-
"
|
| 656 |
-
"
|
| 657 |
-
"
|
| 658 |
-
"
|
| 659 |
-
"
|
| 660 |
-
"
|
| 661 |
-
"
|
| 662 |
-
"
|
| 663 |
-
"
|
| 664 |
-
"
|
| 665 |
-
"
|
| 666 |
-
"
|
| 667 |
-
"
|
| 668 |
-
"
|
| 669 |
-
"
|
| 670 |
-
"
|
| 671 |
-
"
|
| 672 |
-
"
|
| 673 |
-
"
|
| 674 |
-
"
|
| 675 |
-
"
|
| 676 |
-
"
|
| 677 |
-
"
|
| 678 |
-
"
|
| 679 |
-
"
|
| 680 |
-
"
|
| 681 |
-
"
|
| 682 |
-
"
|
| 683 |
-
"
|
| 684 |
-
"
|
| 685 |
-
"
|
| 686 |
-
"
|
| 687 |
-
"
|
| 688 |
-
"
|
| 689 |
-
"
|
| 690 |
-
"
|
| 691 |
-
"
|
| 692 |
-
"
|
| 693 |
-
"
|
| 694 |
-
"
|
| 695 |
-
"
|
| 696 |
-
"
|
| 697 |
-
"
|
| 698 |
-
"
|
| 699 |
-
"
|
| 700 |
-
"
|
| 701 |
-
"
|
| 702 |
-
"
|
| 703 |
-
"
|
| 704 |
-
"
|
| 705 |
-
"
|
| 706 |
-
"
|
| 707 |
-
"
|
| 708 |
-
"
|
| 709 |
-
"
|
| 710 |
-
"
|
| 711 |
-
"
|
| 712 |
-
"
|
| 713 |
-
"
|
| 714 |
-
"
|
| 715 |
-
"
|
| 716 |
-
"
|
| 717 |
-
"
|
| 718 |
-
"
|
| 719 |
-
"
|
| 720 |
-
"
|
| 721 |
-
"
|
| 722 |
-
"
|
| 723 |
-
"
|
| 724 |
-
"
|
| 725 |
-
"
|
| 726 |
-
"
|
| 727 |
-
"
|
| 728 |
-
"
|
| 729 |
-
"
|
| 730 |
-
"
|
| 731 |
-
"
|
| 732 |
-
"
|
| 733 |
-
"
|
| 734 |
-
"
|
| 735 |
-
"
|
| 736 |
-
"
|
| 737 |
-
"
|
| 738 |
-
"
|
| 739 |
-
"
|
| 740 |
-
"
|
| 741 |
-
"
|
| 742 |
-
"
|
| 743 |
-
"
|
| 744 |
-
"
|
| 745 |
-
"
|
| 746 |
-
"
|
| 747 |
-
"
|
| 748 |
-
"
|
| 749 |
-
"
|
| 750 |
-
"
|
| 751 |
-
"
|
| 752 |
-
"
|
| 753 |
-
"
|
| 754 |
-
"
|
| 755 |
-
"
|
| 756 |
-
"
|
| 757 |
-
"
|
| 758 |
-
"
|
| 759 |
-
"
|
| 760 |
-
"
|
| 761 |
-
"
|
| 762 |
-
"
|
| 763 |
-
"
|
| 764 |
-
"
|
| 765 |
-
"
|
| 766 |
-
"
|
| 767 |
-
"
|
| 768 |
-
"
|
| 769 |
-
"
|
| 770 |
-
"
|
| 771 |
-
"
|
| 772 |
-
"
|
| 773 |
-
"
|
| 774 |
-
"
|
| 775 |
-
"
|
| 776 |
-
"
|
| 777 |
-
"
|
| 778 |
-
"
|
| 779 |
-
"
|
| 780 |
-
"
|
| 781 |
-
"
|
| 782 |
-
"
|
| 783 |
-
"
|
| 784 |
-
"
|
| 785 |
-
"
|
| 786 |
-
"
|
| 787 |
-
"
|
| 788 |
-
"
|
| 789 |
-
"
|
| 790 |
-
"
|
| 791 |
-
"
|
| 792 |
-
"
|
| 793 |
-
"
|
| 794 |
-
"
|
| 795 |
-
"
|
| 796 |
-
"
|
| 797 |
-
"
|
| 798 |
-
"
|
| 799 |
-
"
|
| 800 |
-
"
|
| 801 |
-
"
|
| 802 |
-
"
|
| 803 |
-
"
|
| 804 |
-
"
|
| 805 |
-
"
|
| 806 |
-
"
|
| 807 |
-
"
|
| 808 |
-
"
|
| 809 |
-
"
|
| 810 |
-
"
|
| 811 |
-
"
|
| 812 |
-
"
|
| 813 |
-
"
|
| 814 |
-
"
|
| 815 |
-
"
|
| 816 |
-
"
|
| 817 |
-
"
|
| 818 |
-
"
|
| 819 |
-
"
|
| 820 |
-
"
|
| 821 |
-
"
|
| 822 |
-
"
|
| 823 |
-
"
|
| 824 |
-
"
|
| 825 |
-
"
|
| 826 |
-
"
|
| 827 |
-
"
|
| 828 |
-
"
|
| 829 |
-
"
|
| 830 |
-
"
|
| 831 |
-
"
|
| 832 |
-
"
|
| 833 |
-
"
|
| 834 |
-
"
|
| 835 |
-
"
|
| 836 |
-
"
|
| 837 |
-
"
|
| 838 |
-
"
|
| 839 |
-
"
|
| 840 |
-
"
|
| 841 |
-
"
|
| 842 |
-
"
|
| 843 |
-
"
|
| 844 |
-
"
|
| 845 |
-
"
|
| 846 |
-
"
|
| 847 |
-
"
|
| 848 |
-
"
|
| 849 |
-
"
|
| 850 |
-
"
|
| 851 |
-
"
|
| 852 |
-
"
|
| 853 |
-
"
|
| 854 |
-
"
|
| 855 |
-
"
|
| 856 |
-
"
|
| 857 |
-
"
|
| 858 |
-
"
|
| 859 |
-
"
|
| 860 |
-
"
|
| 861 |
-
"
|
| 862 |
-
"
|
| 863 |
-
"
|
| 864 |
-
"
|
| 865 |
-
"
|
| 866 |
-
"
|
| 867 |
-
"
|
| 868 |
-
"
|
| 869 |
-
"
|
| 870 |
-
"
|
| 871 |
-
"
|
| 872 |
-
"
|
| 873 |
-
"
|
| 874 |
-
"
|
| 875 |
-
"
|
| 876 |
-
"
|
| 877 |
-
"
|
| 878 |
-
"
|
| 879 |
-
"
|
| 880 |
-
"
|
| 881 |
-
"
|
| 882 |
-
"
|
| 883 |
-
"
|
| 884 |
-
"
|
| 885 |
-
"
|
| 886 |
-
"
|
| 887 |
-
"
|
| 888 |
-
"
|
| 889 |
-
"
|
| 890 |
-
"
|
| 891 |
-
"
|
| 892 |
-
"
|
| 893 |
-
"
|
| 894 |
-
"
|
| 895 |
-
"
|
| 896 |
-
"
|
| 897 |
-
"
|
| 898 |
-
"
|
| 899 |
-
"
|
| 900 |
-
"
|
| 901 |
-
"
|
| 902 |
-
"
|
| 903 |
-
"
|
| 904 |
-
"
|
| 905 |
-
"
|
| 906 |
-
"
|
| 907 |
-
"
|
| 908 |
-
"
|
| 909 |
-
"
|
| 910 |
-
"
|
| 911 |
-
"
|
| 912 |
-
"
|
| 913 |
-
"
|
| 914 |
-
"
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
-
"
|
| 918 |
-
"
|
| 919 |
-
"
|
| 920 |
-
"
|
| 921 |
-
"
|
| 922 |
-
"
|
| 923 |
-
"
|
| 924 |
-
"
|
| 925 |
-
"
|
| 926 |
-
"
|
| 927 |
-
"
|
| 928 |
-
"
|
| 929 |
-
"
|
| 930 |
-
"
|
| 931 |
-
"
|
| 932 |
-
"
|
| 933 |
-
"
|
| 934 |
-
"
|
| 935 |
-
"
|
| 936 |
-
"
|
| 937 |
-
"
|
| 938 |
-
"
|
| 939 |
-
"
|
| 940 |
-
"
|
| 941 |
-
"
|
| 942 |
-
"
|
| 943 |
-
"
|
| 944 |
-
"
|
| 945 |
-
"
|
| 946 |
-
"
|
| 947 |
-
"
|
| 948 |
-
"
|
| 949 |
-
"
|
| 950 |
-
"
|
| 951 |
-
"
|
| 952 |
-
"
|
| 953 |
-
"
|
| 954 |
-
"
|
| 955 |
-
"
|
| 956 |
-
"
|
| 957 |
-
"
|
| 958 |
-
"
|
| 959 |
-
"
|
| 960 |
-
"
|
| 961 |
-
"
|
| 962 |
-
"
|
| 963 |
-
"
|
| 964 |
-
"
|
| 965 |
-
"
|
| 966 |
-
"
|
| 967 |
-
"
|
| 968 |
-
"
|
| 969 |
-
"
|
| 970 |
-
"
|
| 971 |
-
"
|
| 972 |
-
"
|
| 973 |
-
"
|
| 974 |
-
"
|
| 975 |
-
"
|
| 976 |
-
"
|
| 977 |
-
"
|
| 978 |
-
"
|
| 979 |
-
"
|
| 980 |
-
"
|
| 981 |
-
"
|
| 982 |
-
"
|
| 983 |
-
"
|
| 984 |
-
"
|
| 985 |
-
"
|
| 986 |
-
"
|
| 987 |
-
"
|
| 988 |
-
"
|
| 989 |
-
"
|
| 990 |
-
"
|
| 991 |
-
"
|
| 992 |
-
"
|
| 993 |
-
"
|
| 994 |
-
"
|
| 995 |
-
"
|
| 996 |
-
"
|
| 997 |
-
"
|
| 998 |
-
"
|
| 999 |
-
"
|
| 1000 |
-
"
|
| 1001 |
-
"
|
| 1002 |
-
"
|
| 1003 |
-
"
|
| 1004 |
-
"
|
| 1005 |
-
"
|
| 1006 |
-
"
|
| 1007 |
-
"
|
| 1008 |
-
"
|
| 1009 |
-
"
|
| 1010 |
-
"
|
| 1011 |
-
"
|
| 1012 |
-
"
|
| 1013 |
-
"
|
| 1014 |
-
"
|
| 1015 |
-
"
|
| 1016 |
-
"
|
| 1017 |
-
"
|
| 1018 |
-
"
|
| 1019 |
-
"
|
| 1020 |
-
"
|
| 1021 |
-
"
|
| 1022 |
-
"
|
| 1023 |
-
"
|
| 1024 |
-
"
|
| 1025 |
-
"
|
| 1026 |
-
"
|
| 1027 |
-
"
|
| 1028 |
-
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"<unk>",
|
| 3 |
+
"\u2581t",
|
| 4 |
+
"\u2581th",
|
| 5 |
+
"\u2581a",
|
| 6 |
+
"\u2581i",
|
| 7 |
+
"\u2581the",
|
| 8 |
+
"\u2581s",
|
| 9 |
+
"re",
|
| 10 |
+
"\u2581w",
|
| 11 |
+
"\u2581o",
|
| 12 |
+
"in",
|
| 13 |
+
"at",
|
| 14 |
+
"er",
|
| 15 |
+
"nd",
|
| 16 |
+
"ou",
|
| 17 |
+
"\u2581c",
|
| 18 |
+
"\u2581b",
|
| 19 |
+
"\u2581h",
|
| 20 |
+
"en",
|
| 21 |
+
"on",
|
| 22 |
+
"\u2581m",
|
| 23 |
+
"\u2581f",
|
| 24 |
+
"ing",
|
| 25 |
+
"\u2581p",
|
| 26 |
+
"\u2581to",
|
| 27 |
+
"\u2581and",
|
| 28 |
+
"\u2581d",
|
| 29 |
+
"an",
|
| 30 |
+
"or",
|
| 31 |
+
"es",
|
| 32 |
+
"\u2581y",
|
| 33 |
+
"\u2581l",
|
| 34 |
+
"\u2581of",
|
| 35 |
+
"ll",
|
| 36 |
+
"\u2581in",
|
| 37 |
+
"ed",
|
| 38 |
+
"it",
|
| 39 |
+
"\u2581g",
|
| 40 |
+
"is",
|
| 41 |
+
"\u2581you",
|
| 42 |
+
"\u2581n",
|
| 43 |
+
"ar",
|
| 44 |
+
"om",
|
| 45 |
+
"as",
|
| 46 |
+
"ve",
|
| 47 |
+
"\u2581e",
|
| 48 |
+
"ic",
|
| 49 |
+
"\u2581it",
|
| 50 |
+
"al",
|
| 51 |
+
"us",
|
| 52 |
+
"\u2581wh",
|
| 53 |
+
"\u2581we",
|
| 54 |
+
"\u2581be",
|
| 55 |
+
"ion",
|
| 56 |
+
"ow",
|
| 57 |
+
"le",
|
| 58 |
+
"\u2581is",
|
| 59 |
+
"et",
|
| 60 |
+
"ent",
|
| 61 |
+
"ot",
|
| 62 |
+
"ut",
|
| 63 |
+
"\u2581re",
|
| 64 |
+
"\u2581on",
|
| 65 |
+
"ay",
|
| 66 |
+
"\u2581ha",
|
| 67 |
+
"ig",
|
| 68 |
+
"\u2581so",
|
| 69 |
+
"ct",
|
| 70 |
+
"\u2581he",
|
| 71 |
+
"\u2581for",
|
| 72 |
+
"ver",
|
| 73 |
+
"ke",
|
| 74 |
+
"ro",
|
| 75 |
+
"\u2581st",
|
| 76 |
+
"id",
|
| 77 |
+
"\u2581go",
|
| 78 |
+
"all",
|
| 79 |
+
"se",
|
| 80 |
+
"ly",
|
| 81 |
+
"\u2581u",
|
| 82 |
+
"ch",
|
| 83 |
+
"st",
|
| 84 |
+
"ld",
|
| 85 |
+
"\u2581k",
|
| 86 |
+
"ce",
|
| 87 |
+
"ur",
|
| 88 |
+
"\u2581li",
|
| 89 |
+
"am",
|
| 90 |
+
"\u2581r",
|
| 91 |
+
"ht",
|
| 92 |
+
"\u2581j",
|
| 93 |
+
"ith",
|
| 94 |
+
"\u2581se",
|
| 95 |
+
"ir",
|
| 96 |
+
"\u2581as",
|
| 97 |
+
"\u2581an",
|
| 98 |
+
"im",
|
| 99 |
+
"\u2581do",
|
| 100 |
+
"ad",
|
| 101 |
+
"\u2581was",
|
| 102 |
+
"ight",
|
| 103 |
+
"th",
|
| 104 |
+
"\u2581are",
|
| 105 |
+
"\u2581but",
|
| 106 |
+
"\u2581sh",
|
| 107 |
+
"ust",
|
| 108 |
+
"ally",
|
| 109 |
+
"\u2581not",
|
| 110 |
+
"\u2581or",
|
| 111 |
+
"\u2581com",
|
| 112 |
+
"\u2581can",
|
| 113 |
+
"\u2581me",
|
| 114 |
+
"op",
|
| 115 |
+
"\u2581mo",
|
| 116 |
+
"\u2581at",
|
| 117 |
+
"ill",
|
| 118 |
+
"\u2581ch",
|
| 119 |
+
"\u2581ne",
|
| 120 |
+
"ant",
|
| 121 |
+
"\u2581de",
|
| 122 |
+
"\u2581kn",
|
| 123 |
+
"\u2581one",
|
| 124 |
+
"il",
|
| 125 |
+
"ol",
|
| 126 |
+
"\u2581con",
|
| 127 |
+
"ter",
|
| 128 |
+
"\u2581ab",
|
| 129 |
+
"\u2581fr",
|
| 130 |
+
"ere",
|
| 131 |
+
"ck",
|
| 132 |
+
"\u2581al",
|
| 133 |
+
"\u2581all",
|
| 134 |
+
"qu",
|
| 135 |
+
"\u2581pro",
|
| 136 |
+
"\u2581som",
|
| 137 |
+
"ould",
|
| 138 |
+
"\u2581tw",
|
| 139 |
+
"ul",
|
| 140 |
+
"ra",
|
| 141 |
+
"od",
|
| 142 |
+
"ers",
|
| 143 |
+
"\u2581su",
|
| 144 |
+
"ive",
|
| 145 |
+
"\u2581v",
|
| 146 |
+
"use",
|
| 147 |
+
"ate",
|
| 148 |
+
"ge",
|
| 149 |
+
"if",
|
| 150 |
+
"\u2581ex",
|
| 151 |
+
"ess",
|
| 152 |
+
"pp",
|
| 153 |
+
"\u2581lo",
|
| 154 |
+
"out",
|
| 155 |
+
"\u2581if",
|
| 156 |
+
"est",
|
| 157 |
+
"ain",
|
| 158 |
+
"ist",
|
| 159 |
+
"and",
|
| 160 |
+
"ea",
|
| 161 |
+
"very",
|
| 162 |
+
"art",
|
| 163 |
+
"\u2581wor",
|
| 164 |
+
"\u2581my",
|
| 165 |
+
"ab",
|
| 166 |
+
"ment",
|
| 167 |
+
"\u2581bec",
|
| 168 |
+
"un",
|
| 169 |
+
"ity",
|
| 170 |
+
"ri",
|
| 171 |
+
"pe",
|
| 172 |
+
"ions",
|
| 173 |
+
"\u2581by",
|
| 174 |
+
"ok",
|
| 175 |
+
"our",
|
| 176 |
+
"ort",
|
| 177 |
+
"ind",
|
| 178 |
+
"ink",
|
| 179 |
+
"nt",
|
| 180 |
+
"\u2581up",
|
| 181 |
+
"um",
|
| 182 |
+
"\u2581don",
|
| 183 |
+
"\u2581get",
|
| 184 |
+
"red",
|
| 185 |
+
"\u2581out",
|
| 186 |
+
"el",
|
| 187 |
+
"ause",
|
| 188 |
+
"res",
|
| 189 |
+
"\u2581ma",
|
| 190 |
+
"ich",
|
| 191 |
+
"\u2581us",
|
| 192 |
+
"rou",
|
| 193 |
+
"\u2581int",
|
| 194 |
+
"em",
|
| 195 |
+
"os",
|
| 196 |
+
"ies",
|
| 197 |
+
"ie",
|
| 198 |
+
"\u2581pl",
|
| 199 |
+
"\u2581tr",
|
| 200 |
+
"ven",
|
| 201 |
+
"ous",
|
| 202 |
+
"\u2581le",
|
| 203 |
+
"\u2581two",
|
| 204 |
+
"ard",
|
| 205 |
+
"ine",
|
| 206 |
+
"\u2581co",
|
| 207 |
+
"een",
|
| 208 |
+
"\u2581now",
|
| 209 |
+
"ty",
|
| 210 |
+
"her",
|
| 211 |
+
"ack",
|
| 212 |
+
"\u2581pe",
|
| 213 |
+
"ame",
|
| 214 |
+
"\u2581how",
|
| 215 |
+
"\u2581who",
|
| 216 |
+
"\u2581see",
|
| 217 |
+
"\u2581tim",
|
| 218 |
+
"ect",
|
| 219 |
+
"ast",
|
| 220 |
+
"\u2581our",
|
| 221 |
+
"ci",
|
| 222 |
+
"ree",
|
| 223 |
+
"ople",
|
| 224 |
+
"gh",
|
| 225 |
+
"\u2581no",
|
| 226 |
+
"\u2581had",
|
| 227 |
+
"\u2581man",
|
| 228 |
+
"\u2581qu",
|
| 229 |
+
"\u2581en",
|
| 230 |
+
"ide",
|
| 231 |
+
"ure",
|
| 232 |
+
"ud",
|
| 233 |
+
"so",
|
| 234 |
+
"\u2581his",
|
| 235 |
+
"\u2581sa",
|
| 236 |
+
"\u2581sp",
|
| 237 |
+
"\u2581say",
|
| 238 |
+
"ose",
|
| 239 |
+
"ther",
|
| 240 |
+
"\u2581act",
|
| 241 |
+
"\u2581ta",
|
| 242 |
+
"\u2581cl",
|
| 243 |
+
"ings",
|
| 244 |
+
"pt",
|
| 245 |
+
"king",
|
| 246 |
+
"\u2581any",
|
| 247 |
+
"\u2581has",
|
| 248 |
+
"\u2581un",
|
| 249 |
+
"iv",
|
| 250 |
+
"\u2581im",
|
| 251 |
+
"\u2581ag",
|
| 252 |
+
"\u2581te",
|
| 253 |
+
"\u2581fe",
|
| 254 |
+
"one",
|
| 255 |
+
"per",
|
| 256 |
+
"ong",
|
| 257 |
+
"\u2581po",
|
| 258 |
+
"\u2581ad",
|
| 259 |
+
"ff",
|
| 260 |
+
"ore",
|
| 261 |
+
"itt",
|
| 262 |
+
"ans",
|
| 263 |
+
"iz",
|
| 264 |
+
"eah",
|
| 265 |
+
"reat",
|
| 266 |
+
"act",
|
| 267 |
+
"own",
|
| 268 |
+
"hing",
|
| 269 |
+
"enty",
|
| 270 |
+
"age",
|
| 271 |
+
"ber",
|
| 272 |
+
"ice",
|
| 273 |
+
"\u2581am",
|
| 274 |
+
"ple",
|
| 275 |
+
"are",
|
| 276 |
+
"\u2581per",
|
| 277 |
+
"und",
|
| 278 |
+
"ite",
|
| 279 |
+
"ix",
|
| 280 |
+
"pl",
|
| 281 |
+
"\u2581way",
|
| 282 |
+
"\u2581did",
|
| 283 |
+
"\u2581pr",
|
| 284 |
+
"\u2581got",
|
| 285 |
+
"ars",
|
| 286 |
+
"\u2581she",
|
| 287 |
+
"\u2581let",
|
| 288 |
+
"ag",
|
| 289 |
+
"\u2581ac",
|
| 290 |
+
"int",
|
| 291 |
+
"\u2581ar",
|
| 292 |
+
"ry",
|
| 293 |
+
"ign",
|
| 294 |
+
"ish",
|
| 295 |
+
"\u2581fir",
|
| 296 |
+
"ace",
|
| 297 |
+
"ble",
|
| 298 |
+
"og",
|
| 299 |
+
"ue",
|
| 300 |
+
"\u2581ye",
|
| 301 |
+
"ap",
|
| 302 |
+
"iff",
|
| 303 |
+
"\u2581ro",
|
| 304 |
+
"\u2581her",
|
| 305 |
+
"nder",
|
| 306 |
+
"\u2581ok",
|
| 307 |
+
"\u2581res",
|
| 308 |
+
"\u2581gu",
|
| 309 |
+
"ence",
|
| 310 |
+
"\u2581may",
|
| 311 |
+
"ated",
|
| 312 |
+
"ip",
|
| 313 |
+
"\u2581bo",
|
| 314 |
+
"\u2581him",
|
| 315 |
+
"way",
|
| 316 |
+
"ac",
|
| 317 |
+
"ical",
|
| 318 |
+
"ass",
|
| 319 |
+
"ase",
|
| 320 |
+
"\u2581dis",
|
| 321 |
+
"able",
|
| 322 |
+
"ick",
|
| 323 |
+
"\u2581app",
|
| 324 |
+
"ance",
|
| 325 |
+
"\u2581pre",
|
| 326 |
+
"\u2581six",
|
| 327 |
+
"\u2581off",
|
| 328 |
+
"\u2581new",
|
| 329 |
+
"ia",
|
| 330 |
+
"orm",
|
| 331 |
+
"ank",
|
| 332 |
+
"\u2581lot",
|
| 333 |
+
"ach",
|
| 334 |
+
"\u2581fo",
|
| 335 |
+
"inet",
|
| 336 |
+
"ire",
|
| 337 |
+
"ary",
|
| 338 |
+
"ult",
|
| 339 |
+
"\u2581tal",
|
| 340 |
+
"\u2581mu",
|
| 341 |
+
"\u2581bl",
|
| 342 |
+
"ount",
|
| 343 |
+
"sel",
|
| 344 |
+
"vel",
|
| 345 |
+
"\u2581br",
|
| 346 |
+
"\u2581imp",
|
| 347 |
+
"ep",
|
| 348 |
+
"cess",
|
| 349 |
+
"ord",
|
| 350 |
+
"\u2581sc",
|
| 351 |
+
"\u2581inc",
|
| 352 |
+
"ound",
|
| 353 |
+
"ang",
|
| 354 |
+
"be",
|
| 355 |
+
"ress",
|
| 356 |
+
"uct",
|
| 357 |
+
"\u2581ind",
|
| 358 |
+
"\u2581af",
|
| 359 |
+
"ving",
|
| 360 |
+
"\u2581oh",
|
| 361 |
+
"\u2581bet",
|
| 362 |
+
"\u2581use",
|
| 363 |
+
"ome",
|
| 364 |
+
"ens",
|
| 365 |
+
"ys",
|
| 366 |
+
"\u2581bu",
|
| 367 |
+
"co",
|
| 368 |
+
"ory",
|
| 369 |
+
"ater",
|
| 370 |
+
"ild",
|
| 371 |
+
"ght",
|
| 372 |
+
"ial",
|
| 373 |
+
"\u2581day",
|
| 374 |
+
"ning",
|
| 375 |
+
"na",
|
| 376 |
+
"ile",
|
| 377 |
+
"\u2581spe",
|
| 378 |
+
"\u2581mar",
|
| 379 |
+
"ody",
|
| 380 |
+
"ough",
|
| 381 |
+
"ade",
|
| 382 |
+
"vers",
|
| 383 |
+
"xt",
|
| 384 |
+
"\u2581fl",
|
| 385 |
+
"\u2581ke",
|
| 386 |
+
"ian",
|
| 387 |
+
"\u2581sy",
|
| 388 |
+
"\u2581put",
|
| 389 |
+
"fore",
|
| 390 |
+
"ub",
|
| 391 |
+
"\u2581ph",
|
| 392 |
+
"fe",
|
| 393 |
+
"\u2581em",
|
| 394 |
+
"\u2581ser",
|
| 395 |
+
"form",
|
| 396 |
+
"ting",
|
| 397 |
+
"te",
|
| 398 |
+
"av",
|
| 399 |
+
"ious",
|
| 400 |
+
"\u2581rec",
|
| 401 |
+
"ks",
|
| 402 |
+
"\u2581gr",
|
| 403 |
+
"ces",
|
| 404 |
+
"wn",
|
| 405 |
+
"ors",
|
| 406 |
+
"\u2581jo",
|
| 407 |
+
"ents",
|
| 408 |
+
"\u2581des",
|
| 409 |
+
"\u2581try",
|
| 410 |
+
"\u2581equ",
|
| 411 |
+
"\u2581z",
|
| 412 |
+
"\u2581rem",
|
| 413 |
+
"\u2581str",
|
| 414 |
+
"self",
|
| 415 |
+
"\u2581bit",
|
| 416 |
+
"ph",
|
| 417 |
+
"ved",
|
| 418 |
+
"\u2581why",
|
| 419 |
+
"\u2581bas",
|
| 420 |
+
"\u2581hel",
|
| 421 |
+
"\u2581rel",
|
| 422 |
+
"ath",
|
| 423 |
+
"ject",
|
| 424 |
+
"ail",
|
| 425 |
+
"\u2581la",
|
| 426 |
+
"ual",
|
| 427 |
+
"\u2581god",
|
| 428 |
+
"\u2581nat",
|
| 429 |
+
"erm",
|
| 430 |
+
"day",
|
| 431 |
+
"\u2581id",
|
| 432 |
+
"ft",
|
| 433 |
+
"\u2581wr",
|
| 434 |
+
"\u2581min",
|
| 435 |
+
"ates",
|
| 436 |
+
"\u2581gen",
|
| 437 |
+
"tain",
|
| 438 |
+
"\u2581ob",
|
| 439 |
+
"ull",
|
| 440 |
+
"ict",
|
| 441 |
+
"\u2581tra",
|
| 442 |
+
"\u2581end",
|
| 443 |
+
"\u2581hig",
|
| 444 |
+
"\u2581fif",
|
| 445 |
+
"oth",
|
| 446 |
+
"tern",
|
| 447 |
+
"\u2581its",
|
| 448 |
+
"vent",
|
| 449 |
+
"\u2581sm",
|
| 450 |
+
"ons",
|
| 451 |
+
"\u2581add",
|
| 452 |
+
"iss",
|
| 453 |
+
"\u2581bel",
|
| 454 |
+
"ful",
|
| 455 |
+
"get",
|
| 456 |
+
"\u2581ele",
|
| 457 |
+
"\u2581rep",
|
| 458 |
+
"ak",
|
| 459 |
+
"\u2581ho",
|
| 460 |
+
"\u2581pos",
|
| 461 |
+
"\u2581num",
|
| 462 |
+
"ange",
|
| 463 |
+
"ves",
|
| 464 |
+
"ific",
|
| 465 |
+
"urn",
|
| 466 |
+
"ise",
|
| 467 |
+
"\u2581cr",
|
| 468 |
+
"\u2581um",
|
| 469 |
+
"ward",
|
| 470 |
+
"\u2581reg",
|
| 471 |
+
"ady",
|
| 472 |
+
"ower",
|
| 473 |
+
"uc",
|
| 474 |
+
"\u2581dec",
|
| 475 |
+
"lic",
|
| 476 |
+
"\u2581set",
|
| 477 |
+
"\u2581gon",
|
| 478 |
+
"\u2581op",
|
| 479 |
+
"\u2581ear",
|
| 480 |
+
"\u2581sub",
|
| 481 |
+
"\u2581sl",
|
| 482 |
+
"les",
|
| 483 |
+
"stem",
|
| 484 |
+
"cial",
|
| 485 |
+
"olog",
|
| 486 |
+
"atch",
|
| 487 |
+
"ily",
|
| 488 |
+
"body",
|
| 489 |
+
"nds",
|
| 490 |
+
"ular",
|
| 491 |
+
"ren",
|
| 492 |
+
"\u2581own",
|
| 493 |
+
"\u2581too",
|
| 494 |
+
"cent",
|
| 495 |
+
"ible",
|
| 496 |
+
"pect",
|
| 497 |
+
"ered",
|
| 498 |
+
"ways",
|
| 499 |
+
"teen",
|
| 500 |
+
"\u2581uh",
|
| 501 |
+
"\u2581big",
|
| 502 |
+
"\u2581mod",
|
| 503 |
+
"\u2581att",
|
| 504 |
+
"\u2581car",
|
| 505 |
+
"gr",
|
| 506 |
+
"\u2581acc",
|
| 507 |
+
"ied",
|
| 508 |
+
"mun",
|
| 509 |
+
"ib",
|
| 510 |
+
"\u2581mon",
|
| 511 |
+
"\u2581sch",
|
| 512 |
+
"\u2581pol",
|
| 513 |
+
"\u2581dat",
|
| 514 |
+
"\u2581fin",
|
| 515 |
+
"\u2581sim",
|
| 516 |
+
"\u2581inv",
|
| 517 |
+
"\u2581def",
|
| 518 |
+
"ked",
|
| 519 |
+
"\u2581ent",
|
| 520 |
+
"\u2581yes",
|
| 521 |
+
"ows",
|
| 522 |
+
"ics",
|
| 523 |
+
"ited",
|
| 524 |
+
"ute",
|
| 525 |
+
"ism",
|
| 526 |
+
"ps",
|
| 527 |
+
"\u2581ed",
|
| 528 |
+
"\u2581el",
|
| 529 |
+
"ably",
|
| 530 |
+
"ppen",
|
| 531 |
+
"als",
|
| 532 |
+
"\u2581ten",
|
| 533 |
+
"ract",
|
| 534 |
+
"ss",
|
| 535 |
+
"\u2581ass",
|
| 536 |
+
"\u2581met",
|
| 537 |
+
"gan",
|
| 538 |
+
"\u2581eng",
|
| 539 |
+
"\u2581stu",
|
| 540 |
+
"ween",
|
| 541 |
+
"arch",
|
| 542 |
+
"\u2581gl",
|
| 543 |
+
"\u2581cor",
|
| 544 |
+
"\u2581dr",
|
| 545 |
+
"vern",
|
| 546 |
+
"\u2581ty",
|
| 547 |
+
"\u2581run",
|
| 548 |
+
"hip",
|
| 549 |
+
"cus",
|
| 550 |
+
"cond",
|
| 551 |
+
"\u2581ins",
|
| 552 |
+
"irty",
|
| 553 |
+
"\u2581pub",
|
| 554 |
+
"lud",
|
| 555 |
+
"llow",
|
| 556 |
+
"\u2581cou",
|
| 557 |
+
"ew",
|
| 558 |
+
"iew",
|
| 559 |
+
"\u2581sur",
|
| 560 |
+
"ero",
|
| 561 |
+
"ood",
|
| 562 |
+
"ness",
|
| 563 |
+
"\u2581fun",
|
| 564 |
+
"\u2581eff",
|
| 565 |
+
"cept",
|
| 566 |
+
"\u2581ca",
|
| 567 |
+
"\u2581exp",
|
| 568 |
+
"duct",
|
| 569 |
+
"\u2581sw",
|
| 570 |
+
"ize",
|
| 571 |
+
"ope",
|
| 572 |
+
"\u2581par",
|
| 573 |
+
"kes",
|
| 574 |
+
"cy",
|
| 575 |
+
"\u2581ev",
|
| 576 |
+
"\u2581ref",
|
| 577 |
+
"ell",
|
| 578 |
+
"\u2581bus",
|
| 579 |
+
"ug",
|
| 580 |
+
"rib",
|
| 581 |
+
"\u2581cur",
|
| 582 |
+
"mo",
|
| 583 |
+
"ock",
|
| 584 |
+
"ures",
|
| 585 |
+
"air",
|
| 586 |
+
"\u2581war",
|
| 587 |
+
"str",
|
| 588 |
+
"\u2581med",
|
| 589 |
+
"\u2581wa",
|
| 590 |
+
"\u2581val",
|
| 591 |
+
"\u2581sin",
|
| 592 |
+
"blem",
|
| 593 |
+
"\u2581fam",
|
| 594 |
+
"li",
|
| 595 |
+
"\u2581far",
|
| 596 |
+
"\u2581cle",
|
| 597 |
+
"\u2581col",
|
| 598 |
+
"mon",
|
| 599 |
+
"\u2581gra",
|
| 600 |
+
"led",
|
| 601 |
+
"ense",
|
| 602 |
+
"tin",
|
| 603 |
+
"ues",
|
| 604 |
+
"its",
|
| 605 |
+
"\u2581mem",
|
| 606 |
+
"\u2581inf",
|
| 607 |
+
"\u2581eas",
|
| 608 |
+
"ideo",
|
| 609 |
+
"\u2581top",
|
| 610 |
+
"io",
|
| 611 |
+
"pan",
|
| 612 |
+
"\u2581hum",
|
| 613 |
+
"\u2581old",
|
| 614 |
+
"ead",
|
| 615 |
+
"\u2581ord",
|
| 616 |
+
"ric",
|
| 617 |
+
"ants",
|
| 618 |
+
"oy",
|
| 619 |
+
"esn",
|
| 620 |
+
"uck",
|
| 621 |
+
"ason",
|
| 622 |
+
"ced",
|
| 623 |
+
"ool",
|
| 624 |
+
"rat",
|
| 625 |
+
"ouse",
|
| 626 |
+
"\u2581lar",
|
| 627 |
+
"\u2581art",
|
| 628 |
+
"\u2581wee",
|
| 629 |
+
"\u2581cer",
|
| 630 |
+
"ized",
|
| 631 |
+
"\u2581mat",
|
| 632 |
+
"con",
|
| 633 |
+
"erg",
|
| 634 |
+
"land",
|
| 635 |
+
"ines",
|
| 636 |
+
"\u2581chr",
|
| 637 |
+
"\u2581aut",
|
| 638 |
+
"\u2581lea",
|
| 639 |
+
"\u2581sou",
|
| 640 |
+
"oney",
|
| 641 |
+
"tty",
|
| 642 |
+
"\u2581ple",
|
| 643 |
+
"ulat",
|
| 644 |
+
"oks",
|
| 645 |
+
"\u2581few",
|
| 646 |
+
"\u2581sol",
|
| 647 |
+
"\u2581che",
|
| 648 |
+
"chn",
|
| 649 |
+
"ird",
|
| 650 |
+
"\u2581bre",
|
| 651 |
+
"\u2581dur",
|
| 652 |
+
"\u2581wom",
|
| 653 |
+
"me",
|
| 654 |
+
"izat",
|
| 655 |
+
"eric",
|
| 656 |
+
"ote",
|
| 657 |
+
"\u2581uni",
|
| 658 |
+
"eren",
|
| 659 |
+
"arn",
|
| 660 |
+
"ross",
|
| 661 |
+
"ices",
|
| 662 |
+
"ten",
|
| 663 |
+
"eral",
|
| 664 |
+
"ever",
|
| 665 |
+
"ieve",
|
| 666 |
+
"lish",
|
| 667 |
+
"ash",
|
| 668 |
+
"\u2581opp",
|
| 669 |
+
"alth",
|
| 670 |
+
"ger",
|
| 671 |
+
"\u2581sk",
|
| 672 |
+
"\u2581red",
|
| 673 |
+
"peri",
|
| 674 |
+
"\u2581det",
|
| 675 |
+
"\u2581ext",
|
| 676 |
+
"ner",
|
| 677 |
+
"ah",
|
| 678 |
+
"\u2581var",
|
| 679 |
+
"\u2581loc",
|
| 680 |
+
"gram",
|
| 681 |
+
"ists",
|
| 682 |
+
"ives",
|
| 683 |
+
"\u2581es",
|
| 684 |
+
"\u2581nor",
|
| 685 |
+
"tro",
|
| 686 |
+
"ale",
|
| 687 |
+
"\u2581iss",
|
| 688 |
+
"\u2581pri",
|
| 689 |
+
"gin",
|
| 690 |
+
"az",
|
| 691 |
+
"oc",
|
| 692 |
+
"\u2581pop",
|
| 693 |
+
"ern",
|
| 694 |
+
"\u2581sit",
|
| 695 |
+
"ket",
|
| 696 |
+
"\u2581pa",
|
| 697 |
+
"\u2581law",
|
| 698 |
+
"ages",
|
| 699 |
+
"br",
|
| 700 |
+
"\u2581cam",
|
| 701 |
+
"\u2581mom",
|
| 702 |
+
"osed",
|
| 703 |
+
"\u2581bro",
|
| 704 |
+
"ne",
|
| 705 |
+
"bs",
|
| 706 |
+
"\u2581cre",
|
| 707 |
+
"erat",
|
| 708 |
+
"\u2581sec",
|
| 709 |
+
"\u2581cap",
|
| 710 |
+
"\u2581vis",
|
| 711 |
+
"\u2581pat",
|
| 712 |
+
"ield",
|
| 713 |
+
"iet",
|
| 714 |
+
"\u2581tri",
|
| 715 |
+
"up",
|
| 716 |
+
"\u2581bra",
|
| 717 |
+
"ts",
|
| 718 |
+
"\u2581mot",
|
| 719 |
+
"\u2581unt",
|
| 720 |
+
"put",
|
| 721 |
+
"bo",
|
| 722 |
+
"ork",
|
| 723 |
+
"mer",
|
| 724 |
+
"ital",
|
| 725 |
+
"\u2581air",
|
| 726 |
+
"ined",
|
| 727 |
+
"\u2581beh",
|
| 728 |
+
"\u2581adv",
|
| 729 |
+
"\u2581ret",
|
| 730 |
+
"imes",
|
| 731 |
+
"\u2581tea",
|
| 732 |
+
"ural",
|
| 733 |
+
"sid",
|
| 734 |
+
"ters",
|
| 735 |
+
"\u2581pur",
|
| 736 |
+
"\u2581sci",
|
| 737 |
+
"bers",
|
| 738 |
+
"ient",
|
| 739 |
+
"ier",
|
| 740 |
+
"cc",
|
| 741 |
+
"sw",
|
| 742 |
+
"\u2581av",
|
| 743 |
+
"reen",
|
| 744 |
+
"ode",
|
| 745 |
+
"ont",
|
| 746 |
+
"\u2581dra",
|
| 747 |
+
"ann",
|
| 748 |
+
"nect",
|
| 749 |
+
"\u2581x",
|
| 750 |
+
"\u2581eu",
|
| 751 |
+
"ton",
|
| 752 |
+
"inat",
|
| 753 |
+
"ene",
|
| 754 |
+
"ared",
|
| 755 |
+
"els",
|
| 756 |
+
"\u2581mor",
|
| 757 |
+
"\u2581rat",
|
| 758 |
+
"cri",
|
| 759 |
+
"\u2581men",
|
| 760 |
+
"\u2581ah",
|
| 761 |
+
"ames",
|
| 762 |
+
"\u2581arm",
|
| 763 |
+
"eak",
|
| 764 |
+
"\u2581pay",
|
| 765 |
+
"\u2581hal",
|
| 766 |
+
"ins",
|
| 767 |
+
"ilit",
|
| 768 |
+
"stit",
|
| 769 |
+
"\u2581ra",
|
| 770 |
+
"\u2581leg",
|
| 771 |
+
"cl",
|
| 772 |
+
"pr",
|
| 773 |
+
"\u2581wal",
|
| 774 |
+
"\u2581bad",
|
| 775 |
+
"\u2581ge",
|
| 776 |
+
"roup",
|
| 777 |
+
"\u2581mus",
|
| 778 |
+
"man",
|
| 779 |
+
"\u2581gi",
|
| 780 |
+
"eds",
|
| 781 |
+
"\u2581aw",
|
| 782 |
+
"po",
|
| 783 |
+
"ark",
|
| 784 |
+
"row",
|
| 785 |
+
"\u2581dep",
|
| 786 |
+
"ully",
|
| 787 |
+
"ral",
|
| 788 |
+
"lect",
|
| 789 |
+
"pend",
|
| 790 |
+
"\u2581sev",
|
| 791 |
+
"ime",
|
| 792 |
+
"gest",
|
| 793 |
+
"here",
|
| 794 |
+
"\u2581yet",
|
| 795 |
+
"ted",
|
| 796 |
+
"\u2581rev",
|
| 797 |
+
"ds",
|
| 798 |
+
"\u2581ask",
|
| 799 |
+
"less",
|
| 800 |
+
"\u2581di",
|
| 801 |
+
"ets",
|
| 802 |
+
"line",
|
| 803 |
+
"\u2581aff",
|
| 804 |
+
"ired",
|
| 805 |
+
"\u2581est",
|
| 806 |
+
"ken",
|
| 807 |
+
"vid",
|
| 808 |
+
"most",
|
| 809 |
+
"ivid",
|
| 810 |
+
"unch",
|
| 811 |
+
"par",
|
| 812 |
+
"med",
|
| 813 |
+
"rop",
|
| 814 |
+
"ased",
|
| 815 |
+
"eone",
|
| 816 |
+
"\u2581ve",
|
| 817 |
+
"\u2581abs",
|
| 818 |
+
"ergy",
|
| 819 |
+
"ret",
|
| 820 |
+
"\u2581saw",
|
| 821 |
+
"\u2581ey",
|
| 822 |
+
"\u2581cal",
|
| 823 |
+
"uat",
|
| 824 |
+
"\u2581mid",
|
| 825 |
+
"vat",
|
| 826 |
+
"ream",
|
| 827 |
+
"vice",
|
| 828 |
+
"ians",
|
| 829 |
+
"rent",
|
| 830 |
+
"ctor",
|
| 831 |
+
"err",
|
| 832 |
+
"ush",
|
| 833 |
+
"ases",
|
| 834 |
+
"\u2581suc",
|
| 835 |
+
"erms",
|
| 836 |
+
"ave",
|
| 837 |
+
"angu",
|
| 838 |
+
"ries",
|
| 839 |
+
"\u2581wo",
|
| 840 |
+
"arts",
|
| 841 |
+
"\u2581fil",
|
| 842 |
+
"\u2581fat",
|
| 843 |
+
"\u2581cho",
|
| 844 |
+
"orts",
|
| 845 |
+
"\u2581fre",
|
| 846 |
+
"ee",
|
| 847 |
+
"ught",
|
| 848 |
+
"eng",
|
| 849 |
+
"ump",
|
| 850 |
+
"\u2581bar",
|
| 851 |
+
"ying",
|
| 852 |
+
"ane",
|
| 853 |
+
"\u2581tem",
|
| 854 |
+
"anks",
|
| 855 |
+
"ury",
|
| 856 |
+
"iat",
|
| 857 |
+
"mit",
|
| 858 |
+
"trol",
|
| 859 |
+
"\u2581net",
|
| 860 |
+
"\u2581maj",
|
| 861 |
+
"\u2581cra",
|
| 862 |
+
"ling",
|
| 863 |
+
"\u2581fig",
|
| 864 |
+
"orn",
|
| 865 |
+
"icat",
|
| 866 |
+
"pany",
|
| 867 |
+
"\u2581occ",
|
| 868 |
+
"ott",
|
| 869 |
+
"ands",
|
| 870 |
+
"\u2581exc",
|
| 871 |
+
"\u2581mr",
|
| 872 |
+
"ency",
|
| 873 |
+
"rope",
|
| 874 |
+
"itch",
|
| 875 |
+
"\u2581lit",
|
| 876 |
+
"abil",
|
| 877 |
+
"not",
|
| 878 |
+
"ma",
|
| 879 |
+
"\u2581typ",
|
| 880 |
+
"\u2581opt",
|
| 881 |
+
"ob",
|
| 882 |
+
"ser",
|
| 883 |
+
"ety",
|
| 884 |
+
"ms",
|
| 885 |
+
"peci",
|
| 886 |
+
"aces",
|
| 887 |
+
"aut",
|
| 888 |
+
"\u2581hon",
|
| 889 |
+
"cuss",
|
| 890 |
+
"\u2581sal",
|
| 891 |
+
"\u2581sor",
|
| 892 |
+
"att",
|
| 893 |
+
"\u2581lab",
|
| 894 |
+
"\u2581har",
|
| 895 |
+
"urch",
|
| 896 |
+
"nded",
|
| 897 |
+
"uce",
|
| 898 |
+
"ids",
|
| 899 |
+
"\u2581hy",
|
| 900 |
+
"\u2581fut",
|
| 901 |
+
"\u2581ste",
|
| 902 |
+
"ours",
|
| 903 |
+
"ems",
|
| 904 |
+
"utes",
|
| 905 |
+
"ng",
|
| 906 |
+
"ta",
|
| 907 |
+
"\u2581won",
|
| 908 |
+
"\u2581fa",
|
| 909 |
+
"\u2581env",
|
| 910 |
+
"ards",
|
| 911 |
+
"\u2581job",
|
| 912 |
+
"ium",
|
| 913 |
+
"\u2581dot",
|
| 914 |
+
"\u2581obv",
|
| 915 |
+
"ina",
|
| 916 |
+
"side",
|
| 917 |
+
"elve",
|
| 918 |
+
"cu",
|
| 919 |
+
"\u2581jes",
|
| 920 |
+
"\u2581pot",
|
| 921 |
+
"\u2581pie",
|
| 922 |
+
"\u2581tre",
|
| 923 |
+
"\u2581hey",
|
| 924 |
+
"\u2581mag",
|
| 925 |
+
"ron",
|
| 926 |
+
"\u2581key",
|
| 927 |
+
"swer",
|
| 928 |
+
"\u2581win",
|
| 929 |
+
"ucat",
|
| 930 |
+
"work",
|
| 931 |
+
"ides",
|
| 932 |
+
"\u2581low",
|
| 933 |
+
"\u2581vol",
|
| 934 |
+
"\u2581oth",
|
| 935 |
+
"atic",
|
| 936 |
+
"lf",
|
| 937 |
+
"ads",
|
| 938 |
+
"inds",
|
| 939 |
+
"com",
|
| 940 |
+
"ths",
|
| 941 |
+
"\u2581ver",
|
| 942 |
+
"ised",
|
| 943 |
+
"lo",
|
| 944 |
+
"\u2581squ",
|
| 945 |
+
"\u2581cut",
|
| 946 |
+
"oked",
|
| 947 |
+
"irit",
|
| 948 |
+
"ateg",
|
| 949 |
+
"ppy",
|
| 950 |
+
"mitt",
|
| 951 |
+
"come",
|
| 952 |
+
"hn",
|
| 953 |
+
"igin",
|
| 954 |
+
"mand",
|
| 955 |
+
"\u2581dam",
|
| 956 |
+
"ho",
|
| 957 |
+
"\u2581da",
|
| 958 |
+
"\u2581fur",
|
| 959 |
+
"iron",
|
| 960 |
+
"ilar",
|
| 961 |
+
"\u2581fac",
|
| 962 |
+
"\u2581neg",
|
| 963 |
+
"\u2581ago",
|
| 964 |
+
"ged",
|
| 965 |
+
"miss",
|
| 966 |
+
"enth",
|
| 967 |
+
"\u2581dou",
|
| 968 |
+
"\u2581hit",
|
| 969 |
+
"\u2581guy",
|
| 970 |
+
"\u2581bi",
|
| 971 |
+
"ove",
|
| 972 |
+
"fess",
|
| 973 |
+
"ples",
|
| 974 |
+
"owed",
|
| 975 |
+
"ured",
|
| 976 |
+
"\u2581ris",
|
| 977 |
+
"ints",
|
| 978 |
+
"rew",
|
| 979 |
+
"\u2581sum",
|
| 980 |
+
"\u2581hu",
|
| 981 |
+
"ploy",
|
| 982 |
+
"ude",
|
| 983 |
+
"ried",
|
| 984 |
+
"\u2581cir",
|
| 985 |
+
"\u2581dev",
|
| 986 |
+
"ear",
|
| 987 |
+
"\u2581tot",
|
| 988 |
+
"\u2581ann",
|
| 989 |
+
"duc",
|
| 990 |
+
"ik",
|
| 991 |
+
"pon",
|
| 992 |
+
"sted",
|
| 993 |
+
"\u2581ide",
|
| 994 |
+
"\u2581'",
|
| 995 |
+
"ipp",
|
| 996 |
+
"\u2581eat",
|
| 997 |
+
"\u2581dom",
|
| 998 |
+
"\u2581",
|
| 999 |
+
"e",
|
| 1000 |
+
"t",
|
| 1001 |
+
"o",
|
| 1002 |
+
"a",
|
| 1003 |
+
"i",
|
| 1004 |
+
"n",
|
| 1005 |
+
"s",
|
| 1006 |
+
"r",
|
| 1007 |
+
"h",
|
| 1008 |
+
"l",
|
| 1009 |
+
"d",
|
| 1010 |
+
"u",
|
| 1011 |
+
"c",
|
| 1012 |
+
"m",
|
| 1013 |
+
"y",
|
| 1014 |
+
"g",
|
| 1015 |
+
"w",
|
| 1016 |
+
"f",
|
| 1017 |
+
"p",
|
| 1018 |
+
"b",
|
| 1019 |
+
"v",
|
| 1020 |
+
"k",
|
| 1021 |
+
"'",
|
| 1022 |
+
"j",
|
| 1023 |
+
"x",
|
| 1024 |
+
"q",
|
| 1025 |
+
"z",
|
| 1026 |
+
"<EOU>",
|
| 1027 |
+
"<EOB>"
|
| 1028 |
+
]
|