alexwengg commited on Dec 2, 2025

Commit

9dd220c

verified ·

1 Parent(s): f8c382d

Upload 39 files

Browse files

Files changed (39) hide show

final_scripts/export_decoder_joint.py +188 -0
final_scripts/export_encoder.py +149 -0
final_scripts/export_preprocessor.py +95 -0
final_scripts/inference_benchmark.py +847 -0
final_scripts/inference_reference_nemo.py +238 -0
parakeet_decoder.mlmodelc/analytics/coremldata.bin +3 -0
parakeet_decoder.mlmodelc/coremldata.bin +3 -0
parakeet_decoder.mlmodelc/metadata.json +116 -0
parakeet_decoder.mlmodelc/model.mil +47 -0
parakeet_decoder.mlmodelc/weights/weight.bin +3 -0
parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
parakeet_decoder.mlpackage/Manifest.json +18 -0
parakeet_joint.mlmodelc/analytics/coremldata.bin +3 -0
parakeet_joint.mlmodelc/coremldata.bin +3 -0
parakeet_joint.mlmodelc/metadata.json +74 -0
parakeet_joint.mlmodelc/model.mil +23 -0
parakeet_joint.mlmodelc/weights/weight.bin +3 -0
parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
parakeet_joint.mlpackage/Manifest.json +18 -0
preprocessor.mlmodelc/analytics/coremldata.bin +3 -0
preprocessor.mlmodelc/coremldata.bin +3 -0
preprocessor.mlmodelc/metadata.json +103 -0
preprocessor.mlmodelc/model.mil +104 -0
preprocessor.mlmodelc/weights/weight.bin +3 -0
preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
preprocessor.mlpackage/Manifest.json +18 -0
streaming_encoder.mlmodelc/analytics/coremldata.bin +3 -0
streaming_encoder.mlmodelc/coremldata.bin +3 -0
streaming_encoder.mlmodelc/metadata.json +167 -0
streaming_encoder.mlmodelc/model.mil +0 -0
streaming_encoder.mlmodelc/weights/weight.bin +3 -0
streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
streaming_encoder.mlpackage/Manifest.json +18 -0
tokenizer.model +3 -0
vocab.json +1028 -1028

final_scripts/export_decoder_joint.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import torch
+import torch.nn as nn
+import nemo.collections.asr as nemo_asr
+import coremltools as ct
+import numpy as np
+class DecoderWrapper(nn.Module):
+    def __init__(self, decoder, hidden_size):
+        super().__init__()
+        self.decoder = decoder
+        self.hidden_size = hidden_size
+    def forward(self, targets, target_length, h_in, c_in):
+        # targets: [Batch, 1]
+        # target_length: [Batch]
+        # h_in, c_in: [Batch, Layers, Hidden] -> NeMo expects [Layers, Batch, Hidden]
+        # Transpose state for NeMo: [B, L, H] -> [L, B, H]
+        h_n = h_in.permute(1, 0, 2).contiguous()
+        c_n = c_in.permute(1, 0, 2).contiguous()
+        state = (h_n, c_n)
+        # Run decoder
+        # predict(y, state, add_sos=False, batch_size=None)
+        # y is [B, U] -> [1, 1]
+        dec_out, new_state = self.decoder.predict(targets, state, add_sos=False)
+        # dec_out: [B, U, H] -> [1, 1, 640]
+        # Transpose state back: [L, B, H] -> [B, L, H]
+        h_out = new_state[0].permute(1, 0, 2)
+        c_out = new_state[1].permute(1, 0, 2)
+        return dec_out, h_out, c_out
+class JointWrapper(nn.Module):
+    def __init__(self, joint):
+        super().__init__()
+        self.joint = joint
+    def forward(self, encoder_output, decoder_output):
+        # encoder_output: [B, D, T] -> [1, 512, 1]
+        # decoder_output: [B, U, H] -> [1, 640, 1] (Wait, decoder output is usually [B, U, H])
+        # NeMo Joint expects:
+        # encoder_outputs: [B, D, T]
+        # decoder_outputs: [B, U, H]
+        # But wait, NeMo Joint usually projects them first?
+        # self.joint.joint_net(enc, dec)
+        # Let's check if we need to project.
+        # self.joint.project_encoder(encoder_output)
+        # self.joint.project_decoder(decoder_output)
+        # If inputs are RAW, we need to project.
+        # If inputs are already projected, we just sum and act.
+        # In this wrapper, we assume inputs are RAW (from Encoder and Decoder).
+        # Project Encoder
+        # encoder_output: [B, D, T] -> [B, T, D] for Linear?
+        # NeMo project_encoder handles transpose if needed?
+        # Usually project_encoder expects [B, D, T] and returns [B, T, D_joint] or similar.
+        # Let's use the high-level method if possible.
+        # res = self.joint(encoder_outputs=encoder_output, decoder_outputs=decoder_output)
+        # This returns LOGITS [B, T, U, V+1]
+        # Manually call projection and joint net to avoid length checks
+        # 1. Project Encoder
+        # encoder_output: [B, D, T] -> [B, T, D]
+        enc_in = encoder_output.transpose(1, 2)
+        f = self.joint.project_encoder(enc_in)
+        # 2. Project Decoder
+        # decoder_output: [B, U, H] -> [B, U, H] (Already correct? Check shape)
+        # If decoder_output is [1, 1, 640], it's fine.
+        g = self.joint.project_prednet(decoder_output)
+        # 3. Combine (Broadcasting)
+        # f: [B, T, D] -> [B, T, 1, D]
+        # g: [B, U, D] -> [B, 1, U, D]
+        # res: [B, T, U, D]
+        res = f.unsqueeze(2) + g.unsqueeze(1)
+        # 4. Joint Net (ReLU + Linear)
+        logits = self.joint.joint_net(res)
+        # logits: [1, 1, 1, Vocab]
+        return logits
+def export_rnnt_decoder_joint(model_id="nvidia/parakeet_realtime_eou_120m-v1"):
+    print(f"Loading model: {model_id}")
+    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
+    asr_model.eval()
+    decoder = asr_model.decoder
+    joint = asr_model.joint
+    hidden_size = decoder.pred_hidden # 640
+    vocab_size = decoder.vocab_size # 1024
+    print(f"Decoder Hidden Size: {hidden_size}")
+    print(f"Vocab Size: {vocab_size}")
+    # --- Export Decoder ---
+    print("Exporting Decoder...")
+    decoder_wrapper = DecoderWrapper(decoder, hidden_size)
+    decoder_wrapper.eval()
+    # Inputs
+    # targets: [1, 1]
+    # state: [1, 1, 640] (assuming 1 layer?)
+    # Check num layers
+    num_layers = decoder.pred_rnn_layers
+    print(f"Decoder Layers: {num_layers}")
+    example_targets = torch.zeros((1, 1), dtype=torch.int32)
+    example_length = torch.tensor([1], dtype=torch.int32)
+    example_h = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
+    example_c = torch.zeros((1, num_layers, hidden_size), dtype=torch.float32)
+    traced_decoder = torch.jit.trace(decoder_wrapper, (example_targets, example_length, example_h, example_c))
+    decoder_mlmodel = ct.convert(
+        traced_decoder,
+        inputs=[
+            ct.TensorType(name="targets", shape=(1, 1), dtype=np.int32),
+            ct.TensorType(name="target_length", shape=(1,), dtype=np.int32),
+            ct.TensorType(name="h_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
+            ct.TensorType(name="c_in", shape=(1, num_layers, hidden_size), dtype=np.float32),
+        ],
+        outputs=[
+            ct.TensorType(name="decoder_output"),
+            ct.TensorType(name="h_out"),
+            ct.TensorType(name="c_out"),
+        ],
+        minimum_deployment_target=ct.target.iOS17,
+        compute_units=ct.ComputeUnit.CPU_ONLY,
+    )
+    decoder_mlmodel.save("parakeet_decoder.mlpackage")
+    print("Saved parakeet_decoder.mlpackage")
+    # --- Export Joint ---
+    print("Exporting Joint...")
+    joint_wrapper = JointWrapper(joint)
+    joint_wrapper.eval()
+    # Inputs
+    # encoder: [1, 512, 1]
+    # decoder: [1, 640, 1] (Wait, decoder output from wrapper is [1, 1, 640]?)
+    # Let's check DecoderWrapper output shape.
+    # dec_out: [B, U, H] -> [1, 1, 640].
+    # NeMo Joint expects [B, D, T] and [B, U, H].
+    # So encoder should be [1, 512, 1].
+    # Decoder should be [1, 1, 640].
+    example_enc = torch.randn(1, 512, 1)
+    example_dec = torch.randn(1, 1, 640) # Note: Time/U dim is 2nd for decoder?
+    # Verify Joint forward
+    with torch.no_grad():
+        out = joint_wrapper(example_enc, example_dec)
+        print(f"Joint Output Shape: {out.shape}")
+    traced_joint = torch.jit.trace(joint_wrapper, (example_enc, example_dec))
+    joint_mlmodel = ct.convert(
+        traced_joint,
+        inputs=[
+            ct.TensorType(name="encoder_output", shape=(1, 512, 1), dtype=np.float32),
+            ct.TensorType(name="decoder_output", shape=(1, 1, 640), dtype=np.float32),
+        ],
+        outputs=[
+            ct.TensorType(name="logits"),
+        ],
+        minimum_deployment_target=ct.target.iOS17,
+        compute_units=ct.ComputeUnit.CPU_ONLY,
+        compute_precision=ct.precision.FLOAT32,
+    )
+    joint_mlmodel.save("parakeet_joint.mlpackage")
+    print("Saved parakeet_joint.mlpackage")
+if __name__ == "__main__":
+    export_rnnt_decoder_joint()

final_scripts/export_encoder.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import torch
+import torch.nn as nn
+import nemo.collections.asr as nemo_asr
+import coremltools as ct
+import numpy as np
+from typing import Tuple
+class StreamingEncoderWrapper(nn.Module):
+    """Wrapper for cache-aware streaming encoder."""
+    def __init__(self, encoder: nn.Module, keep_all_outputs: bool = True):
+        super().__init__()
+        self.encoder = encoder
+        self.keep_all_outputs = keep_all_outputs
+        if encoder.streaming_cfg is None:
+            encoder.setup_streaming_params()
+        self.streaming_cfg = encoder.streaming_cfg
+    def forward(
+        self,
+        mel: torch.Tensor,
+        mel_length: torch.Tensor,
+        cache_last_channel: torch.Tensor,
+        cache_last_time: torch.Tensor,
+        cache_last_channel_len: torch.Tensor,
+    ) -> Tuple[torch.Tensor, ...]:
+        # Call encoder with cache
+        outputs = self.encoder.cache_aware_stream_step(
+            processed_signal=mel,
+            processed_signal_length=mel_length,
+            cache_last_channel=cache_last_channel,
+            cache_last_time=cache_last_time,
+            cache_last_channel_len=cache_last_channel_len,
+        )
+        # Handle cache updates (ring buffer)
+        # NeMo returns only the updated part of the cache
+        # We need to concatenate it with the previous cache (shifted)
+        # 1. cache_last_channel: [layers, 1, T, D] -> dim 2
+        new_channel_cache = outputs[2]
+        update_len = new_channel_cache.size(2)
+        if update_len < cache_last_channel.size(2):
+            # Shift and append
+            full_channel_cache = torch.cat([
+                cache_last_channel[:, :, update_len:, :],
+                new_channel_cache
+            ], dim=2)
+        else:
+            full_channel_cache = new_channel_cache
+        # 2. cache_last_time: [layers, 1, D, T] -> dim 3
+        new_time_cache = outputs[3]
+        update_len_time = new_time_cache.size(3)
+        if update_len_time < cache_last_time.size(3):
+            # Shift and append
+            full_time_cache = torch.cat([
+                cache_last_time[:, :, :, update_len_time:],
+                new_time_cache
+            ], dim=3)
+        else:
+            full_time_cache = new_time_cache
+        # Construct new outputs tuple
+        # (encoder, encoder_len, full_channel_cache, full_time_cache, cache_len)
+        return (outputs[0], outputs[1], full_channel_cache, full_time_cache, outputs[4])
+def export_streaming_encoder(model_id="nvidia/parakeet_realtime_eou_120m-v1", output_path="streaming_encoder.mlpackage", frames=16, shift=None, streaming_chunk_size=None):
+    print(f"Loading model: {model_id}")
+    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
+    asr_model.eval()
+    encoder = asr_model.encoder
+    # Configure streaming params
+    # If streaming_chunk_size is provided, use it. Otherwise use frames.
+    c_size = streaming_chunk_size if streaming_chunk_size is not None else frames
+    s_size = shift if shift is not None else c_size
+    print(f"Setting up streaming params: chunk_size={c_size}, shift_size={s_size}")
+    encoder.setup_streaming_params(chunk_size=c_size, shift_size=s_size)
+    wrapper = StreamingEncoderWrapper(encoder)
+    wrapper.eval()
+    # Define input shapes
+    # 16 frames = 160ms
+    print(f"Exporting for chunk size: {frames} frames ({frames*10}ms)")
+    if shift:
+        print(f"Shift size: {shift} frames ({shift*10}ms)")
+    mel_dim = 128  # Parakeet uses 128 mel features, not 80
+    # Cache shapes: number of layers = 17 (FastConformer architecture)
+    num_layers = 17
+    example_input = (
+        torch.randn(1, mel_dim, frames),
+        torch.tensor([frames], dtype=torch.int32),
+        torch.randn(num_layers, 1, 70, 512), # cache_last_channel
+        torch.randn(num_layers, 1, 512, 8),  # cache_last_time
+        torch.tensor([0], dtype=torch.int32) # cache_last_channel_len
+    )
+    print("Tracing model...")
+    traced_model = torch.jit.trace(wrapper, example_input, strict=False)
+    print("Converting to CoreML...")
+    inputs = [
+        ct.TensorType(name="mel", shape=(1, mel_dim, frames), dtype=np.float32),
+        ct.TensorType(name="mel_length", shape=(1,), dtype=np.int32),
+        ct.TensorType(name="cache_last_channel", shape=(num_layers, 1, 70, 512), dtype=np.float32),
+        ct.TensorType(name="cache_last_time", shape=(num_layers, 1, 512, 8), dtype=np.float32),
+        ct.TensorType(name="cache_last_channel_len", shape=(1,), dtype=np.int32),
+    ]
+    outputs = [
+        ct.TensorType(name="encoder", dtype=np.float32),
+        ct.TensorType(name="encoder_length", dtype=np.int32),
+        ct.TensorType(name="cache_last_channel_out", dtype=np.float32),
+        ct.TensorType(name="cache_last_time_out", dtype=np.float32),
+        ct.TensorType(name="cache_last_channel_len_out", dtype=np.int32),
+    ]
+    mlmodel = ct.convert(
+        traced_model,
+        inputs=inputs,
+        outputs=outputs,
+        minimum_deployment_target=ct.target.iOS17,
+        compute_units=ct.ComputeUnit.ALL,
+    )
+    print(f"Saving to {output_path}")
+    mlmodel.save(output_path)
+    print("Done!")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--frames", type=int, default=16, help="Number of frames per chunk (10ms per frame)")
+    parser.add_argument("--shift", type=int, default=None, help="Shift size in frames (default: same as frames)")
+    parser.add_argument("--model-chunk-size", type=int, default=None, help="Chunk size for model setup (output steps). If None, uses frames.")
+    parser.add_argument("--output", type=str, default="streaming_encoder.mlpackage", help="Output path")
+    args = parser.parse_args()
+    export_streaming_encoder(frames=args.frames, shift=args.shift, streaming_chunk_size=args.model_chunk_size, output_path=args.output)

final_scripts/export_preprocessor.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import torch
+import torch.nn as nn
+import nemo.collections.asr as nemo_asr
+import coremltools as ct
+import numpy as np
+import argparse
+class PreprocessorWrapper(nn.Module):
+    """Wrapper for audio preprocessor."""
+    def __init__(self, preprocessor: nn.Module):
+        super().__init__()
+        self.preprocessor = preprocessor
+    def forward(
+        self,
+        input_signal: torch.Tensor,
+        length: torch.Tensor,
+    ):
+        # Call preprocessor
+        processed_signal, processed_signal_length = self.preprocessor(
+            input_signal=input_signal,
+            length=length
+        )
+        return processed_signal, processed_signal_length
+def export_preprocessor(
+    model_id="nvidia/parakeet_realtime_eou_120m-v1",
+    output_path="preprocessor.mlpackage",
+    chunk_ms=160
+):
+    print(f"Loading model: {model_id}")
+    asr_model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location="cpu")
+    asr_model.eval()
+    preprocessor = asr_model.preprocessor
+    # Disable dither and padding for consistent inference
+    if hasattr(preprocessor, 'dither'):
+        preprocessor.dither = 0.0
+    if hasattr(preprocessor, 'pad_to'):
+        preprocessor.pad_to = 0
+    wrapper = PreprocessorWrapper(preprocessor)
+    wrapper.eval()
+    # Calculate audio samples for chunk
+    # 160ms at 16kHz = 2560 samples
+    chunk_samples = int(chunk_ms / 1000 * 16000)
+    print(f"Chunk: {chunk_ms}ms = {chunk_samples} samples")
+    # Create example input
+    example_input = (
+        torch.randn(1, chunk_samples),
+        torch.tensor([chunk_samples], dtype=torch.int64),
+    )
+    print("Tracing model...")
+    traced_model = torch.jit.trace(wrapper, example_input, strict=False)
+    print("Converting to CoreML...")
+    # Use RangeDim for variable-length audio input
+    inputs = [
+        ct.TensorType(
+            name="input_signal",
+            shape=ct.Shape(shape=(1, ct.RangeDim(lower_bound=1600, upper_bound=16000, default=chunk_samples))),
+            dtype=np.float32
+        ),
+        ct.TensorType(name="length", shape=(1,), dtype=np.int32),
+    ]
+    outputs = [
+        ct.TensorType(name="mel", dtype=np.float32),
+        ct.TensorType(name="mel_length", dtype=np.int32),
+    ]
+    mlmodel = ct.convert(
+        traced_model,
+        inputs=inputs,
+        outputs=outputs,
+        compute_units=ct.ComputeUnit.CPU_ONLY,
+        minimum_deployment_target=ct.target.iOS17,
+    )
+    print(f"Saving to {output_path}")
+    mlmodel.save(output_path)
+    print("Done!")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--chunk-ms", type=int, default=160, help="Chunk size in milliseconds")
+    parser.add_argument("--output-path", type=str, default="preprocessor.mlpackage", help="Output path")
+    args = parser.parse_args()
+    export_preprocessor(chunk_ms=args.chunk_ms, output_path=args.output_path)

final_scripts/inference_benchmark.py ADDED Viewed

	@@ -0,0 +1,847 @@

+import argparse
+import torch
+import torchaudio
+import coremltools as ct
+import numpy as np
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
+from pathlib import Path
+import jiwer
+import time
+def load_manifest(dataset_path, subset='test-clean', max_files=None):
+    subset_dir = Path(dataset_path) / subset
+    if not subset_dir.exists():
+        raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
+    flac_files = list(subset_dir.rglob('*.flac'))
+    if not flac_files:
+        raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
+    # Sort for determinism
+    flac_files = sorted(flac_files)
+    entries = []
+    for flac_path in flac_files:
+        if max_files and len(entries) >= max_files:
+            break
+        speaker_id = flac_path.parent.parent.name
+        chapter_id = flac_path.parent.name
+        trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
+        if trans_file.exists():
+            utterance_id = flac_path.stem
+            with open(trans_file, 'r') as f:
+                for line in f:
+                    parts = line.strip().split(' ', 1)
+                    if len(parts) == 2 and parts[0] == utterance_id:
+                        entries.append({
+                            'audio_filepath': str(flac_path),
+                            'text': parts[1],
+                            'duration': 0
+                        })
+                        break
+    print(f"Loaded {len(entries)} entries from {subset_dir}")
+    return entries
+def run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, audio_path, coreml_preprocessor=None):
+    # 1. Load Audio
+    try:
+        audio, sr = torchaudio.load(audio_path)
+        if sr != 16000:
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
+            audio = resampler(audio)
+        if audio.shape[0] > 1:
+            audio = audio.mean(dim=0, keepdim=True)
+        audio_tensor = audio
+        audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
+    except Exception as e:
+        print(f"Error loading {audio_path}: {e}")
+        return {'hypothesis': "", 'audio_length': 0}
+    # 2. Setup Streaming Params & Buffer
+    # Use chunk_size=4 to match PyTorch success (approx 320ms compute, 410ms input)
+    pytorch_model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
+    streaming_buffer = CacheAwareStreamingAudioBuffer(
+        model=pytorch_model,
+        online_normalization=False,
+        pad_and_drop_preencoded=False
+    )
+    streaming_buffer.append_audio_file(audio_path, stream_id=-1)
+    # 3. CoreML True Streaming Loop
+    # Initialize CoreML Cache (Encoder)
+    num_layers = 17
+    cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
+    cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
+    cache_last_channel_len = np.zeros((1,), dtype=np.int32)
+    # Initialize Decoder State
+    h_state = np.zeros((1, 1, 640), dtype=np.float32)
+    c_state = np.zeros((1, 1, 640), dtype=np.float32)
+    blank_token = 1026 # Parakeet blank
+    last_token = blank_token
+    hypothesis_tokens = []
+    max_symbols_per_step = 10
+    fixed_chunk_frames = 41 # Matches export for chunk_size=4
+    for chunk_audio, chunk_len in streaming_buffer:
+        # --- Encoder Step ---
+        # chunk_audio: [1, 128, T]
+        T_curr = chunk_audio.shape[2]
+        if T_curr < fixed_chunk_frames:
+            pad_amt = fixed_chunk_frames - T_curr
+            padding = torch.full((1, 128, pad_amt), -16.0)
+            chunk_audio = torch.cat([chunk_audio, padding], dim=2)
+        elif T_curr > fixed_chunk_frames:
+             chunk_audio = chunk_audio[:, :, :fixed_chunk_frames]
+        chunk_mel_input = chunk_audio.numpy()
+        mel_len_input = np.array([fixed_chunk_frames], dtype=np.int32)
+        inputs = {
+            "mel": chunk_mel_input,
+            "mel_length": mel_len_input,
+            "cache_last_channel": cache_last_channel,
+            "cache_last_time": cache_last_time,
+            "cache_last_channel_len": cache_last_channel_len
+        }
+        outputs = coreml_encoder.predict(inputs)
+        cache_last_channel = outputs["cache_last_channel_out"]
+        cache_last_time = outputs["cache_last_time_out"]
+        cache_last_channel_len = outputs["cache_last_channel_len_out"]
+        enc_out = outputs["encoder"] # [1, 512, 4]
+        # --- Decoder Step (Immediate) ---
+        T_enc = enc_out.shape[2]
+        for t in range(T_enc):
+            enc_t = enc_out[:, :, t:t+1] # [1, 512, 1]
+            # Initialize Decoder Output (Cache)
+            targets = np.array([[last_token]], dtype=np.int32)
+            target_length = np.array([1], dtype=np.int32)
+            dec_inputs = {
+                "targets": targets,
+                "target_length": target_length,
+                "h_in": h_state,
+                "c_in": c_state
+            }
+            dec_outputs = coreml_decoder.predict(dec_inputs)
+            decoder_step = dec_outputs["decoder_output"]
+            h_state_next = dec_outputs["h_out"]
+            c_state_next = dec_outputs["c_out"]
+            symbols_added = 0
+            while symbols_added < max_symbols_per_step:
+                joint_inputs = {
+                    "encoder_output": enc_t,
+                    "decoder_output": decoder_step
+                }
+                joint_outputs = coreml_joint.predict(joint_inputs)
+                logits = joint_outputs["logits"]
+                token_id = int(np.argmax(logits))
+                if token_id == blank_token:
+                    break
+                # EOU Check (1024)
+                if token_id == 1024:
+                    # Reset State
+                    h_state = np.zeros((1, 1, 640), dtype=np.float32)
+                    c_state = np.zeros((1, 1, 640), dtype=np.float32)
+                    last_token = blank_token
+                    break
+                else:
+                    hypothesis_tokens.append(token_id)
+                    last_token = token_id
+                    symbols_added += 1
+                    h_state = h_state_next
+                    c_state = c_state_next
+                    targets = np.array([[last_token]], dtype=np.int32)
+                    dec_inputs = {
+                        "targets": targets,
+                        "target_length": target_length,
+                        "h_in": h_state,
+                        "c_in": c_state
+                    }
+                    dec_outputs = coreml_decoder.predict(dec_inputs)
+                    decoder_step = dec_outputs["decoder_output"]
+                    h_state_next = dec_outputs["h_out"]
+                    c_state_next = dec_outputs["c_out"]
+    # Decode tokens
+    vocab_size = pytorch_model.tokenizer.vocab_size
+    valid_tokens = [t for t in hypothesis_tokens if t < vocab_size]
+    if len(valid_tokens) != len(hypothesis_tokens):
+        print(f"Filtered {len(hypothesis_tokens) - len(valid_tokens)} invalid tokens (>= {vocab_size})")
+    if not valid_tokens:
+        return {
+            'hypothesis': "",
+            'audio_length': audio.shape[1] / 16000
+        }
+    hypothesis = pytorch_model.decoding.decode_tokens_to_str([valid_tokens])[0]
+    hypothesis = hypothesis.replace("<EOU>", "").strip()
+    return {
+        'hypothesis': hypothesis,
+        'audio_length': audio.shape[1] / 16000
+    }
+def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
+    # 1. Load Audio
+    try:
+        audio, sr = torchaudio.load(audio_path)
+        if sr != 16000:
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
+            audio = resampler(audio)
+        if audio.shape[0] > 1:
+            audio = audio.mean(dim=0, keepdim=True)
+        audio_tensor = audio
+        audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
+    except Exception as e:
+        print(f"Error loading {audio_path}: {e}")
+        return {'hypothesis': "", 'audio_length': 0}
+    # 2. Preprocessor
+    with torch.no_grad():
+        processed_signal, processed_signal_len = pytorch_model.preprocessor(
+            input_signal=audio_tensor, length=audio_len
+        )
+    # 3. Streaming Loop
+    total_frames = processed_signal.shape[2]
+    chunk_frames = 32 # Match CoreML
+    # Initialize Cache
+    num_layers = 17
+    cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
+    cache_last_time = torch.zeros(num_layers, 1, 512, 8)
+    cache_last_channel_len = torch.zeros(1, dtype=torch.long)
+    # Initialize Decoder State
+    decoder_state = None
+    last_token = torch.tensor([[1026]], dtype=torch.long) # Blank token
+    final_hyp_tokens = []
+    for i in range(0, total_frames, chunk_frames):
+        end = min(i + chunk_frames, total_frames)
+        chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
+        # Pad to chunk_frames if needed
+        if chunk_mel.shape[2] < chunk_frames:
+            pad_amt = chunk_frames - chunk_mel.shape[2]
+            chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
+        chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
+        with torch.no_grad():
+            # 1. Encoder Step
+            (
+                enc_out,
+                enc_len,
+                cache_last_channel,
+                cache_last_time,
+                cache_last_channel_len
+            ) = pytorch_model.encoder.forward_internal(
+                audio_signal=chunk_mel,
+                length=chunk_len,
+                cache_last_channel=cache_last_channel,
+                cache_last_time=cache_last_time,
+                cache_last_channel_len=cache_last_channel_len
+            )
+            # enc_out: [B, D, T_out] -> [1, 512, T_out]
+            # Transpose to [B, T_out, D] for Joint
+            enc_out = enc_out.transpose(1, 2)
+            # 2. Greedy Decoding Loop (Symbol Loop)
+            # For each acoustic frame t
+            for t in range(enc_out.shape[1]):
+                f_t = enc_out[:, t:t+1, :] # [1, 1, 512]
+                # Project Encoder (Joint.enc)
+                # pytorch_model.joint.enc is the Linear layer
+                # Or use pytorch_model.joint(enc_out, dec_out) which does projection internally?
+                # Standard RNNTJoint: forward(f, g) -> res -> joint_net
+                # But we need to loop over symbols u.
+                # Pre-project encoder for this frame
+                f_t_proj = pytorch_model.joint.enc(f_t) # [1, 1, 640]
+                # Limit max symbols per frame (e.g. 10) to prevent infinite loops
+                max_symbols = 10
+                symbols_added = 0
+                while symbols_added < max_symbols:
+                    # Decoder Step
+                    # decoder.forward(targets, lengths, states)
+                    # targets: [B, 1] (last token)
+                    g, _, decoder_state = pytorch_model.decoder.forward(
+                        targets=last_token,
+                        target_length=torch.tensor([1]),
+                        states=decoder_state
+                    )
+                    # g: [B, 640, U+1?] -> [1, 640, 2]
+                    # We want the last step output
+                    g = g[:, :, -1:] # [1, 640, 1]
+                    g = g.transpose(1, 2) # [1, 1, 640]
+                    # Project Decoder (Joint.pred)
+                    g_proj = pytorch_model.joint.pred(g) # [1, 1, 640]
+                    # Joint
+                    # joint_net(f + g)
+                    # Note: f_t_proj and g_proj are [1, 1, 640]
+                    # We broadcast? They are same shape here.
+                    out = pytorch_model.joint.joint_net(f_t_proj + g_proj) # [1, 1, 1027]
+                    # Argmax
+                    k = out.argmax(dim=-1) # [1, 1]
+                    pred_token = k.item()
+                    if pred_token == 1026: # Blank
+                        break
+                    else:
+                        final_hyp_tokens.append(pred_token)
+                        last_token = k # Update last token
+                        # decoder_state is already updated by forward()
+                        # But wait! If we predict a symbol, we advance decoder state.
+                        # If we predict blank, we DO NOT advance decoder state?
+                        # In standard RNNT:
+                        # If blank: advance t (next acoustic frame), keep u (decoder state).
+                        # If symbol: advance u (update decoder state), keep t (same acoustic frame).
+                        # My decoder.forward call UPDATED the state.
+                        # If I predict blank, I should DISCARD the new state?
+                        # YES!
+                        # But wait, decoder.forward takes the *previous* token/state and produces the *current* embedding/state.
+                        # The state returned is the state AFTER processing `last_token`.
+                        # This state is what we use to predict the NEXT token.
+                        # So if we predict a symbol, we KEEP this state and use it for the next step.
+                        # If we predict blank, we KEEP the *previous* state (before this forward)?
+                        # No, the state corresponds to the *current* position `u`.
+                        # The `g` vector corresponds to `h_u`.
+                        # `f_t` corresponds to `h_t`.
+                        # `Joint(h_t, h_u)` produces prob of `y_{u+1}` or `blank`.
+                        # If `blank`: we move to `t+1`. We stay at `u`. State `h_u` is unchanged.
+                        # If `symbol`: we move to `u+1`. We update `h_u` to `h_{u+1}`. We stay at `t`.
+                        # So:
+                        # 1. We have `decoder_state` (corresponding to `u`).
+                        # 2. We compute `g` from `last_token` and `decoder_state`.
+                        #    Wait, `decoder.forward` usually takes `last_token` and `previous_state` and returns `current_embedding` and `new_state`.
+                        #    So `g` is `P(u)`. `decoder_state` is `State(u)`.
+                        #    Actually, for LSTM, `forward` does one step.
+                        # Let's verify:
+                        # `g, _, new_state = decoder(last_token, state)`
+                        # `out = joint(f, g)`
+                        # If `out` -> Symbol:
+                        #    We accept `new_state` as the current state.
+                        #    We update `last_token` to Symbol.
+                        #    We loop again (same `t`).
+                        # If `out` -> Blank:
+                        #    We discard `new_state`.
+                        #    We keep `state` (old).
+                        #    We break loop (next `t`).
+                        # BUT, `decoder.forward` is expensive. We don't want to re-compute it if we stay at `u`.
+                        # But we only stay at `u` if we predict Blank, which means we move to next `t`.
+                        # For the next `t`, we need `g` (which depends on `u`).
+                        # So we should cache `g` and `state`?
+                        # Yes.
+                        # Correct Logic:
+                        # Initialize `decoder_state = None`.
+                        # Initialize `last_token = Blank`.
+                        # Compute `g, _, next_decoder_state = decoder(last_token, decoder_state)` ONCE.
+                        # `g_proj = joint.pred(g)`
+                        # Loop t:
+                        #   `f_t_proj = ...`
+                        #   Loop u:
+                        #     `logits = joint(f_t_proj + g_proj)`
+                        #     `k = argmax`
+                        #     If k == Blank:
+                        #        break (advance t)
+                        #     Else:
+                        #        Append k.
+                        #        `last_token = k`
+                        #        `decoder_state = next_decoder_state` (Accept the state transition)
+                        #        # Compute NEXT g and state
+                        #        `g, _, next_decoder_state = decoder(last_token, decoder_state)`
+                        #        `g_proj = joint.pred(g)`
+                        # This looks correct.
+                        # But I need to initialize `g` and `next_decoder_state` before the loop.
+                        pass
+    # Refined Logic Implementation
+    # Initialize Decoder
+    # First step: Feed Blank/SOS to get initial g and state
+    # Note: Parakeet uses Blank (1026) as SOS? Or does it rely on zero state?
+    # Usually we feed SOS. Let's assume 1026 is SOS.
+    last_token = torch.tensor([[1026]], dtype=torch.long)
+    decoder_state = None
+    # Pre-compute initial g
+    g, _, next_decoder_state = pytorch_model.decoder.forward(
+        targets=last_token,
+        target_length=torch.tensor([1]),
+        states=decoder_state
+    )
+    # g: [1, 640, 2] -> Slice last
+    g = g[:, :, -1:] # [1, 640, 1]
+    g = g.transpose(1, 2) # [1, 1, 640]
+    g_proj = pytorch_model.joint.pred(g)
+    # Update decoder_state to next_decoder_state?
+    # No, `next_decoder_state` is the state AFTER processing `last_token`.
+    # This is the state we need for the NEXT step if we emit a token.
+    # Wait, `g` is the embedding used for prediction.
+    # So `g` and `next_decoder_state` go together.
+    # We hold `g` and `next_decoder_state` as "Current Decoder Output".
+    # If we emit a symbol, we use `next_decoder_state` as the input for the NEXT decoder step.
+    # Let's call the holding variables `current_g_proj` and `candidate_state`.
+    current_g_proj = g_proj
+    candidate_state = next_decoder_state
+    # Current state input to decoder (for next step)
+    # Actually, `decoder.forward` takes `states`.
+    # If we emit a symbol, the `states` for the NEXT call should be `candidate_state`.
+    # So we need to track `current_state_for_input`.
+    # Initially `None`.
+    # After first call, `candidate_state` is the state after SOS.
+    # Wait, if we emit a symbol, we call decoder with that symbol and `candidate_state`.
+    # So `candidate_state` IS the state we maintain.
+    # Let's verify:
+    # 1. Start: `last_token=SOS`, `state=None`.
+    # 2. `g, _, state = decoder(SOS, None)`.
+    # 3. `g` is used to predict first token.
+    # 4. `joint(f, g)`.
+    # 5. If `k` (symbol):
+    #    `last_token = k`.
+    #    `g, _, state = decoder(k, state)`.
+    #    Loop.
+    # 6. If `Blank`:
+    #    Keep `g` and `state` as is.
+    #    Advance `f`.
+    # Yes, this is correct.
+    # So,
+def run_pytorch_streaming_pipeline(pytorch_model, audio_path):
+    import librosa
+    # Load audio
+    audio, sample_rate = librosa.load(audio_path, sr=16000)
+    # Preprocessing
+    processed_signal, processed_signal_length = pytorch_model.preprocessor(
+        input_signal=torch.tensor([audio]),
+        length=torch.tensor([len(audio)])
+    )
+    # Switch to Greedy Strategy (if not already)
+    if pytorch_model.decoding.cfg.strategy != 'greedy':
+        print("Switching to 'greedy' decoding strategy for streaming...")
+        from omegaconf import OmegaConf
+        from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
+        new_cfg = OmegaConf.create({
+            'strategy': 'greedy',
+            'greedy': {'max_symbols': 10},
+            'preserve_alignments': True,
+            'compute_timestamps': False
+        })
+        pytorch_model.decoding = RNNTBPEDecoding(
+            decoding_cfg=new_cfg,
+            decoder=pytorch_model.decoder,
+            joint=pytorch_model.joint,
+            tokenizer=pytorch_model.tokenizer
+        )
+    # Streaming Loop
+    total_frames = processed_signal.shape[2]
+    chunk_frames = 32
+    # Initialize Cache
+    num_layers = 17
+    cache_last_channel = torch.zeros(num_layers, 1, 70, 512)
+    cache_last_time = torch.zeros(num_layers, 1, 512, 8)
+    cache_last_channel_len = torch.zeros(1, dtype=torch.long)
+    previous_hypotheses = None
+    previous_pred_out = None
+    final_hyp = ""
+    for i in range(0, total_frames, chunk_frames):
+        end = min(i + chunk_frames, total_frames)
+        chunk_mel = processed_signal[:, :, i:end] # [1, D, T]
+        # Pad to chunk_frames if needed
+        if chunk_mel.shape[2] < chunk_frames:
+            pad_amt = chunk_frames - chunk_mel.shape[2]
+            chunk_mel = torch.nn.functional.pad(chunk_mel, (0, pad_amt))
+        chunk_len = torch.tensor([chunk_mel.shape[2]], dtype=torch.long)
+        with torch.no_grad():
+            # Native Streaming Step
+            (
+                greedy_predictions,
+                all_hyp_text,
+                cache_last_channel,
+                cache_last_time,
+                cache_last_channel_len,
+                best_hyp_list, # This is the Hypothesis list
+            ) = pytorch_model.conformer_stream_step(
+                processed_signal=chunk_mel,
+                processed_signal_length=chunk_len,
+                cache_last_channel=cache_last_channel,
+                cache_last_time=cache_last_time,
+                cache_last_channel_len=cache_last_channel_len,
+                previous_hypotheses=previous_hypotheses,
+                previous_pred_out=previous_pred_out
+            )
+            # Update previous_hypotheses for next step
+            previous_hypotheses = best_hyp_list
+            # Extract text from best_hyp
+            current_hyp_obj = None
+            if best_hyp_list:
+                if isinstance(best_hyp_list, list):
+                    current_hyp_obj = best_hyp_list[0]
+                else:
+                    current_hyp_obj = best_hyp_list
+            # Check for EOU (1024)
+            is_eou = False
+            if current_hyp_obj:
+                if hasattr(current_hyp_obj, 'y_sequence'):
+                    # y_sequence might be list or tensor
+                    y_seq = current_hyp_obj.y_sequence
+                    if isinstance(y_seq, list):
+                        if 1024 in y_seq:
+                            is_eou = True
+                    elif torch.is_tensor(y_seq):
+                        if (y_seq == 1024).any():
+                            is_eou = True
+            if is_eou:
+                # EOU detected
+                # Append current segment text to final_hyp
+                if current_hyp_obj and hasattr(current_hyp_obj, 'text'):
+                    final_hyp += current_hyp_obj.text + " "
+                # Reset state for next segment
+                previous_hypotheses = None
+                print("DEBUG: EOU detected, resetting previous_hypotheses")
+            else:
+                # Not EOU, just update final_hyp with current segment text (temporarily)
+                # We can't just append, we need to store it.
+                # But since we return final_hyp at the end, we need to combine committed text + current segment.
+                pass
+    # End of loop
+    # Append any remaining text from the last segment
+    if previous_hypotheses:
+        last_hyp_list = previous_hypotheses
+        if isinstance(last_hyp_list, list):
+            last_hyp_obj = last_hyp_list[0]
+        else:
+            last_hyp_obj = last_hyp_list
+        if last_hyp_obj and hasattr(last_hyp_obj, 'text'):
+            final_hyp += last_hyp_obj.text
+    # Strip <eou>
+    final_hyp = final_hyp.replace("<eou>", "").replace("<EOU>", "").strip()
+    return {
+        'hypothesis': final_hyp,
+        'audio_length': audio.shape[0] / 16000
+    }
+def run_pytorch_pipeline(pytorch_model, audio_path):
+    # Pure PyTorch inference using transcribe() (Offline)
+    try:
+        # Try positional argument for paths2audio_files
+        hypotheses = pytorch_model.transcribe([audio_path], batch_size=1, verbose=False)
+        if isinstance(hypotheses, tuple):
+            hypotheses = hypotheses[0]
+        hypothesis = hypotheses[0]
+        # Hypothesis object has 'text' attribute?
+        if hasattr(hypothesis, 'text'):
+            hypothesis = hypothesis.text
+        # Strip <eou> and <EOU>
+        if isinstance(hypothesis, str):
+            hypothesis = hypothesis.replace("<eou>", "").replace("<EOU>", "").strip()
+        return {
+            'hypothesis': hypothesis,
+            'audio_length': 0 # Placeholder
+        }
+    except Exception as e:
+        print(f"Error running PyTorch pipeline on {audio_path}: {e}")
+        return {'hypothesis': "", 'audio_length': 0}
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dataset', default='/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech', help='Path to LibriSpeech')
+    parser.add_argument('--subset', default='test-clean', help='Subset to test')
+    parser.add_argument('--max-files', type=int, default=100, help='Number of files to process')
+    # Default paths based on file list
+    parser.add_argument('--coreml-encoder', default='streaming_encoder_320ms.mlpackage')
+    parser.add_argument('--coreml-decoder', default='parakeet_decoder.mlpackage')
+    parser.add_argument('--coreml-joint', default='parakeet_joint.mlpackage')
+    parser.add_argument('--pytorch-model', default='nvidia/parakeet_realtime_eou_120m-v1')
+    parser.add_argument('--coreml-preprocessor', default='preprocessor_160ms.mlpackage')
+    parser.add_argument('--hybrid', action='store_true', help='Use Hybrid mode (CoreML Encoder + PyTorch Decoder)')
+    parser.add_argument('--pytorch-only', action='store_true', help='Use pure PyTorch model (Offline)')
+    parser.add_argument('--pytorch-streaming', action='store_true', help='Use pure PyTorch model (Streaming Simulation)')
+    args = parser.parse_args()
+    print(f"Loading PyTorch model: {args.pytorch_model}")
+    pytorch_model = nemo_asr.models.ASRModel.from_pretrained(args.pytorch_model, map_location="cpu")
+    pytorch_model.eval()
+    # Only load CoreML if not pytorch-only or pytorch-streaming
+    coreml_encoder = None
+    coreml_decoder = None
+    coreml_joint = None
+    coreml_preprocessor = None
+    if not args.pytorch_only and not args.pytorch_streaming:
+        print(f"Loading CoreML Encoder: {args.coreml_encoder}")
+        coreml_encoder = ct.models.MLModel(args.coreml_encoder)
+        if args.hybrid:
+            print(f"Loading CoreML Preprocessor: {args.coreml_preprocessor}")
+            try:
+                coreml_preprocessor = ct.models.MLModel(args.coreml_preprocessor)
+            except Exception as e:
+                print(f"Failed to load CoreML Preprocessor: {e}")
+                print("Falling back to PyTorch Preprocessor")
+        if not args.hybrid:
+            print(f"Loading CoreML Decoder: {args.coreml_decoder}")
+            coreml_decoder = ct.models.MLModel(args.coreml_decoder)
+            print(f"Loading CoreML Joint: {args.coreml_joint}")
+            coreml_joint = ct.models.MLModel(args.coreml_joint)
+    elif args.pytorch_streaming:
+        print("Running in PYTORCH-STREAMING mode")
+    else:
+        print("Running in PYTORCH-ONLY (Offline) mode")
+    entries = load_manifest(args.dataset, args.subset, args.max_files)
+    total_wer = 0
+    count = 0
+    start_time = time.time()
+    print(f"Starting Benchmark on {len(entries)} files...")
+    for i, entry in enumerate(entries):
+        try:
+            if args.pytorch_streaming:
+                result = run_pytorch_streaming_pipeline(pytorch_model, entry['audio_filepath'])
+            elif args.pytorch_only:
+                result = run_pytorch_pipeline(pytorch_model, entry['audio_filepath'])
+            elif args.hybrid:
+                result = run_hybrid_pipeline(coreml_encoder, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
+            else:
+                result = run_coreml_pipeline(coreml_encoder, coreml_decoder, coreml_joint, pytorch_model, entry['audio_filepath'], coreml_preprocessor)
+            ref = entry['text'].lower()
+            hyp = result['hypothesis'].lower()
+            wer = jiwer.wer(ref, hyp)
+            total_wer += wer
+            count += 1
+            print(f"[{i+1}/{len(entries)}] {Path(entry['audio_filepath']).name} | WER: {wer:.2%} | Ref: '{ref}' | Hyp: '{hyp}'")
+        except Exception as e:
+            print(f"[{i+1}/{len(entries)}] Failed: {e}")
+            import traceback
+            traceback.print_exc()
+    if count > 0:
+        avg_wer = total_wer / count
+        print(f"\nAverage WER over {count} files: {avg_wer:.2%}")
+    else:
+        print("\nNo files processed successfully.")
+def run_hybrid_pipeline(coreml_encoder, pytorch_model, audio_path, coreml_preprocessor=None):
+    # 1. Load Audio
+    try:
+        audio, sr = torchaudio.load(audio_path)
+        if sr != 16000:
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
+            audio = resampler(audio)
+        if audio.shape[0] > 1:
+            audio = audio.mean(dim=0, keepdim=True)
+        audio_tensor = audio
+        audio_len = torch.tensor([audio.shape[1]], dtype=torch.long)
+    except Exception as e:
+        print(f"Error loading {audio_path}: {e}")
+        return {'hypothesis': "", 'audio_length': 0}
+    # 2. Preprocessor
+    if coreml_preprocessor:
+        # CoreML Preprocessor
+        # Input: input_signal (1, N)
+        # Output: mel (1, 128, T)
+        audio_np = audio.numpy()
+        if audio_np.ndim == 2:
+            audio_np = audio_np.reshape(1, -1) # Ensure (1, N)
+        inputs = {
+            "input_signal": audio_np,
+            "length": np.array([audio_np.shape[1]], dtype=np.float32)
+        }
+        out = coreml_preprocessor.predict(inputs)
+        processed_signal = torch.from_numpy(out["mel"]) # (1, 128, T)
+        # CoreML might return (1, 1, 128, T) or similar?
+        if processed_signal.ndim == 4:
+             processed_signal = processed_signal.squeeze(0)
+        # Check shape
+        # PyTorch expects (1, 128, T)
+    else:
+        # PyTorch Preprocessor
+        with torch.no_grad():
+            processed_signal, processed_signal_len = pytorch_model.preprocessor(
+                input_signal=audio_tensor, length=audio_len
+            )
+    # 3. CoreML Encoder Loop
+    total_frames = processed_signal.shape[2]
+    # Initialize CoreML Cache
+    num_layers = 17
+    cache_last_channel = np.zeros((num_layers, 1, 70, 512), dtype=np.float32)
+    cache_last_time = np.zeros((num_layers, 1, 512, 8), dtype=np.float32)
+    cache_last_channel_len = np.zeros((1,), dtype=np.int32)
+    accumulated_encoder_output = []
+    fixed_chunk_size = 32
+    chunk_frames = 32
+    for i in range(0, total_frames, chunk_frames):
+        end = min(i + chunk_frames, total_frames)
+        chunk_mel = processed_signal[:, :, i:end].numpy() # [1, 128, T]
+        current_chunk_len = chunk_mel.shape[2]
+        # Pad if needed
+        if current_chunk_len < fixed_chunk_size:
+            pad_amt = fixed_chunk_size - current_chunk_len
+            padding = np.full((1, 128, pad_amt), -16.0, dtype=np.float32)
+            chunk_mel_input = np.concatenate([chunk_mel, padding], axis=2)
+            mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
+        else:
+            chunk_mel_input = chunk_mel
+            mel_len_input = np.array([fixed_chunk_size], dtype=np.int32)
+        inputs = {
+            "mel": chunk_mel_input,
+            "mel_length": mel_len_input,
+            "cache_last_channel": cache_last_channel,
+            "cache_last_time": cache_last_time,
+            "cache_last_channel_len": cache_last_channel_len
+        }
+        outputs = coreml_encoder.predict(inputs)
+        cache_last_channel = outputs["cache_last_channel_out"]
+        cache_last_time = outputs["cache_last_time_out"]
+        cache_last_channel_len = outputs["cache_last_channel_len_out"]
+        enc_out = outputs["encoder"]
+        # enc_len = outputs["encoder_length"] # Always 3?
+        accumulated_encoder_output.append(enc_out)
+    if not accumulated_encoder_output:
+        return {'hypothesis': "", 'audio_length': audio.shape[1] / 16000}
+    # Concatenate Encoder Outputs: [1, 512, T]
+    encoder_output = np.concatenate(accumulated_encoder_output, axis=2)
+    # 4. PyTorch Decoding
+    # Convert to Tensor
+    encoder_output_tensor = torch.from_numpy(encoder_output) # [1, 512, T]
+    # Transpose to [B, D, T] (It is already)
+    # We need to pass valid length.
+    # Estimate from original audio length?
+    # Or just use full T.
+    # Parakeet subsampling is 4x? 8x?
+    # 320ms (32 frames) -> 3 frames.
+    # 32 / 3 = 10.6?
+    # Actually, let's trust the decoder to handle padding or just pass full length.
+    encoded_lengths = torch.tensor([encoder_output.shape[2]], dtype=torch.long)
+    with torch.no_grad():
+        # Use greedy decoding
+        # rnnt_decoder_predictions_tensor expects (B, D, T)
+        hypotheses = pytorch_model.decoding.rnnt_decoder_predictions_tensor(
+            encoder_output=encoder_output_tensor,
+            encoded_lengths=encoded_lengths,
+            return_hypotheses=True
+        )
+    hypothesis = hypotheses[0].text
+    return {
+        'hypothesis': hypothesis,
+        'audio_length': audio.shape[1] / 16000
+    }
+if __name__ == "__main__":
+    main()

final_scripts/inference_reference_nemo.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import torch
+import soundfile as sf
+import librosa
+import numpy as np
+import logging
+from omegaconf import OmegaConf, open_dict
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.parts.utils.streaming_utils import CacheAwareStreamingAudioBuffer
+from nemo.collections.asr.parts.submodules.rnnt_decoding import RNNTBPEDecoding
+from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def setup_decoding_strategy(asr_model, strategy='greedy'):
+    """
+    Sets up the decoding strategy.
+    Adapted from NeMo example, but with fallback for RNNTBPEDecoding.
+    """
+    print(f"Setting up decoding strategy: {strategy}")
+    # Create a config for the desired strategy
+    # The example uses cfg.rnnt_decoding, we'll create a minimal one
+    decoding_cfg = OmegaConf.create({
+        'strategy': strategy,
+        'greedy': {'max_symbols': 10}, # Standard greedy params
+        'fused_batch_size': -1,
+        'compute_timestamps': False, # Disable for stability
+        'preserve_alignments': False
+    })
+    if hasattr(asr_model, 'change_decoding_strategy'):
+        try:
+            asr_model.change_decoding_strategy(decoding_cfg)
+            print("Successfully changed decoding strategy via change_decoding_strategy")
+            return
+        except Exception as e:
+            print(f"Standard change_decoding_strategy failed: {e}")
+            print("Attempting manual replacement...")
+    # Manual replacement fallback (Required for Parakeet EOU)
+    if hasattr(asr_model, 'decoding') and isinstance(asr_model.decoding, RNNTBPEDecoding):
+        new_decoding = RNNTBPEDecoding(
+            decoding_cfg=decoding_cfg,
+            decoder=asr_model.decoder,
+            joint=asr_model.joint,
+            tokenizer=asr_model.tokenizer
+        )
+        asr_model.decoding = new_decoding
+        print("Successfully replaced decoding strategy manually.")
+    else:
+        print("Could not change decoding strategy.")
+def perform_streaming(asr_model, streaming_buffer, device):
+    """
+    Performs streaming inference using conformer_stream_step.
+    Follows the NeMo example structure.
+    """
+    # Get initial cache state
+    # Note: The example uses batch_size from buffer, we assume 1 for simplicity here
+    batch_size = 1
+    cache_last_channel, cache_last_time, cache_last_channel_len = asr_model.encoder.get_initial_cache_state(
+        batch_size=batch_size
+    )
+    # Move cache to device
+    if cache_last_channel is not None:
+        cache_last_channel = cache_last_channel.to(device)
+        cache_last_time = cache_last_time.to(device)
+        cache_last_channel_len = cache_last_channel_len.to(device)
+    previous_hypotheses = None
+    previous_pred_out = None
+    final_transcription = ""
+    print("Starting streaming loop...")
+    for step_num, (chunk_audio, chunk_lengths) in enumerate(streaming_buffer):
+        chunk_audio = chunk_audio.to(device)
+        chunk_lengths = chunk_lengths.to(device)
+        print(f"Step {step_num}: chunk_audio shape: {chunk_audio.shape}")
+        # conformer_stream_step
+        with torch.no_grad():
+            (
+                greedy_predictions,
+                transcribed_texts,
+                cache_last_channel,
+                cache_last_time,
+                cache_last_channel_len,
+                best_hyp_list,
+            ) = asr_model.conformer_stream_step(
+                processed_signal=chunk_audio,
+                processed_signal_length=chunk_lengths,
+                cache_last_channel=cache_last_channel,
+                cache_last_time=cache_last_time,
+                cache_last_channel_len=cache_last_channel_len,
+                keep_all_outputs=False, # We don't need to keep all outputs for now
+                previous_hypotheses=previous_hypotheses,
+                previous_pred_out=previous_pred_out,
+                return_transcription=True
+            )
+        # Update state for next step
+        previous_hypotheses = best_hyp_list
+        # Extract text and handle EOU (The "Complex" Part)
+        current_hyp = best_hyp_list[0] if isinstance(best_hyp_list, list) else best_hyp_list
+        # Check for EOU (1024)
+        is_eou = False
+        if hasattr(current_hyp, 'y_sequence'):
+             y_seq = current_hyp.y_sequence
+             if isinstance(y_seq, list) and 1024 in y_seq:
+                 is_eou = True
+             elif torch.is_tensor(y_seq) and (y_seq == 1024).any():
+                 is_eou = True
+        if is_eou:
+            # FIX: Reset decoder state on EOU
+            previous_hypotheses = None
+            if hasattr(current_hyp, 'text'):
+                final_transcription += current_hyp.text + " "
+        # Note: If not EOU, we don't append text yet because it's partial.
+        # The example accumulates `transcribed_texts` but that might be for the whole batch/history?
+        # In strict streaming, we usually only commit on EOU or stability.
+        # For this demo, we'll just print partials.
+        # print(f"Step {step_num}: {current_hyp.text if hasattr(current_hyp, 'text') else ''}")
+    # Append final bit
+    if previous_hypotheses:
+        last_hyp = previous_hypotheses[0] if isinstance(previous_hypotheses, list) else previous_hypotheses
+        if hasattr(last_hyp, 'text'):
+            final_transcription += last_hyp.text
+    return final_transcription.replace("<eou>", "").strip()
+import argparse
+import jiwer
+from pathlib import Path
+def load_manifest(dataset_path, subset='test-clean', max_files=None):
+    subset_dir = Path(dataset_path) / subset
+    if not subset_dir.exists():
+        raise FileNotFoundError(f"Dataset directory not found: {subset_dir}")
+    flac_files = list(subset_dir.rglob('*.flac'))
+    if not flac_files:
+        raise FileNotFoundError(f"No FLAC files found in {subset_dir}")
+    # Sort for determinism
+    flac_files = sorted(flac_files)
+    entries = []
+    for flac_path in flac_files:
+        if max_files and len(entries) >= max_files:
+            break
+        speaker_id = flac_path.parent.parent.name
+        chapter_id = flac_path.parent.name
+        trans_file = flac_path.parent / f"{speaker_id}-{chapter_id}.trans.txt"
+        if trans_file.exists():
+            utterance_id = flac_path.stem
+            with open(trans_file, 'r') as f:
+                for line in f:
+                    parts = line.strip().split(' ', 1)
+                    if len(parts) == 2 and parts[0] == utterance_id:
+                        entries.append({
+                            'audio_filepath': str(flac_path),
+                            'text': parts[1],
+                            'duration': 0
+                        })
+                        break
+    print(f"Loaded {len(entries)} entries from {subset_dir}")
+    return entries
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--max-files', type=int, default=100)
+    args = parser.parse_args()
+    model_id = "nvidia/parakeet_realtime_eou_120m-v1"
+    dataset_path = "/Users/kikow/Library/Caches/fluidaudio/LibriSpeech/LibriSpeech"
+    device = torch.device("cpu") # Force CPU for now
+    print(f"Loading model: {model_id}")
+    model = nemo_asr.models.ASRModel.from_pretrained(model_id, map_location=device)
+    model.eval()
+    # 1. Setup Decoding Strategy (Crucial Step)
+    setup_decoding_strategy(model, strategy='greedy')
+    # 2. Setup Streaming Params
+    model.encoder.setup_streaming_params(chunk_size=4, shift_size=4)
+    print(f"Updated Streaming Config: {model.encoder.streaming_cfg}")
+    # Load Data
+    entries = load_manifest(dataset_path, max_files=args.max_files)
+    total_wer = 0
+    count = 0
+    print(f"Starting Benchmark on {len(entries)} files...")
+    for i, entry in enumerate(entries):
+        audio_file = entry['audio_filepath']
+        ref_text = entry['text'].lower()
+        # Create buffer per file (clean state)
+        streaming_buffer = CacheAwareStreamingAudioBuffer(
+            model=model,
+            online_normalization=False,
+            pad_and_drop_preencoded=False
+        )
+        streaming_buffer.append_audio_file(audio_file, stream_id=-1)
+        # 3. Perform Streaming
+        hyp_text = perform_streaming(model, streaming_buffer, device)
+        # Calculate WER
+        wer = jiwer.wer(ref_text, hyp_text)
+        total_wer += wer
+        count += 1
+        print(f"[{i+1}/{len(entries)}] {Path(audio_file).name} | WER: {wer*100:.2f}% | Ref: '{ref_text}' | Hyp: '{hyp_text}'")
+    avg_wer = total_wer / count if count > 0 else 0
+    print(f"\nAverage WER over {count} files: {avg_wer*100:.2f}%")
+if __name__ == "__main__":
+    main()

parakeet_decoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abebbb833404b4a9bcc374a9430574d574061f65f6327cba59d8cc1a8b95cfaa
+size 243

parakeet_decoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4dbff3f49ae48e899d4dc785cdb8ffa8614bba395c623db025f08bdd633381
+size 439

parakeet_decoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,116 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "decoder_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "h_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "c_out",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios17.squeeze" : 2,
+      "Ios17.gather" : 1,
+      "Ios17.cast" : 3,
+      "Ios17.lstm" : 1,
+      "Ios17.transpose" : 6,
+      "Identity" : 1,
+      "Ios17.expandDims" : 2
+    },
+    "computePrecision" : "Mixed (Float16, Int16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.0",
+      "com.github.apple.coremltools.version" : "8.3.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 1]",
+        "name" : "targets",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "target_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "h_in",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "c_in",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "parakeet_decoder",
+    "method" : "predict"
+  }
+]

parakeet_decoder.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,47 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios17>(tensor<fp32, [1, 1, 640]> c_in, tensor<fp32, [1, 1, 640]> h_in, tensor<int32, [1]> target_length, tensor<int32, [1, 1]> targets) {
+            tensor<int32, [3]> var_14 = const()[name = tensor<string, []>("op_14"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<string, []> h_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("h_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<int32, [3]> var_21 = const()[name = tensor<string, []>("op_21"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<string, []> c_in_to_fp16_dtype_0 = const()[name = tensor<string, []>("c_in_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<int32, []> y_axis_0 = const()[name = tensor<string, []>("y_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> y_batch_dims_0 = const()[name = tensor<string, []>("y_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<bool, []> y_validate_indices_0 = const()[name = tensor<string, []>("y_validate_indices_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1027, 640]> decoder_prediction_embed_weight_to_fp16 = const()[name = tensor<string, []>("decoder_prediction_embed_weight_to_fp16"), val = tensor<fp16, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<string, []> targets_to_int16_dtype_0 = const()[name = tensor<string, []>("targets_to_int16_dtype_0"), val = tensor<string, []>("int16")];
+            tensor<int16, [1, 1]> targets_to_int16 = cast(dtype = targets_to_int16_dtype_0, x = targets)[name = tensor<string, []>("cast_4")];
+            tensor<fp16, [1, 1, 640]> y_cast_fp16_cast_uint16 = gather(axis = y_axis_0, batch_dims = y_batch_dims_0, indices = targets_to_int16, validate_indices = y_validate_indices_0, x = decoder_prediction_embed_weight_to_fp16)[name = tensor<string, []>("y_cast_fp16_cast_uint16")];
+            tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<int32, [1]> input_lstm_h0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_h0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 1, 640]> h_in_to_fp16 = cast(dtype = h_in_to_fp16_dtype_0, x = h_in)[name = tensor<string, []>("cast_6")];
+            tensor<fp16, [1, 1, 640]> var_15_cast_fp16 = transpose(perm = var_14, x = h_in_to_fp16)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 640]> input_lstm_h0_squeeze_cast_fp16 = squeeze(axes = input_lstm_h0_squeeze_axes_0, x = var_15_cast_fp16)[name = tensor<string, []>("input_lstm_h0_squeeze_cast_fp16")];
+            tensor<int32, [1]> input_lstm_c0_squeeze_axes_0 = const()[name = tensor<string, []>("input_lstm_c0_squeeze_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 1, 640]> c_in_to_fp16 = cast(dtype = c_in_to_fp16_dtype_0, x = c_in)[name = tensor<string, []>("cast_5")];
+            tensor<fp16, [1, 1, 640]> var_22_cast_fp16 = transpose(perm = var_21, x = c_in_to_fp16)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 640]> input_lstm_c0_squeeze_cast_fp16 = squeeze(axes = input_lstm_c0_squeeze_axes_0, x = var_22_cast_fp16)[name = tensor<string, []>("input_lstm_c0_squeeze_cast_fp16")];
+            tensor<string, []> input_direction_0 = const()[name = tensor<string, []>("input_direction_0"), val = tensor<string, []>("forward")];
+            tensor<bool, []> input_output_sequence_0 = const()[name = tensor<string, []>("input_output_sequence_0"), val = tensor<bool, []>(true)];
+            tensor<string, []> input_recurrent_activation_0 = const()[name = tensor<string, []>("input_recurrent_activation_0"), val = tensor<string, []>("sigmoid")];
+            tensor<string, []> input_cell_activation_0 = const()[name = tensor<string, []>("input_cell_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<string, []> input_activation_0 = const()[name = tensor<string, []>("input_activation_0"), val = tensor<string, []>("tanh")];
+            tensor<fp16, [2560, 640]> concat_1_to_fp16 = const()[name = tensor<string, []>("concat_1_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1314688)))];
+            tensor<fp16, [2560, 640]> concat_2_to_fp16 = const()[name = tensor<string, []>("concat_2_to_fp16"), val = tensor<fp16, [2560, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(4591552)))];
+            tensor<fp16, [2560]> concat_0_to_fp16 = const()[name = tensor<string, []>("concat_0_to_fp16"), val = tensor<fp16, [2560]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(7868416)))];
+            tensor<fp16, [1, 1, 640]> input_1_cast_fp16 = transpose(perm = input_1_perm_0, x = y_cast_fp16_cast_uint16)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 1, 640]> input_cast_fp16_0, tensor<fp16, [1, 640]> input_cast_fp16_1, tensor<fp16, [1, 640]> input_cast_fp16_2 = lstm(activation = input_activation_0, bias = concat_0_to_fp16, cell_activation = input_cell_activation_0, direction = input_direction_0, initial_c = input_lstm_c0_squeeze_cast_fp16, initial_h = input_lstm_h0_squeeze_cast_fp16, output_sequence = input_output_sequence_0, recurrent_activation = input_recurrent_activation_0, weight_hh = concat_2_to_fp16, weight_ih = concat_1_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> var_44_axes_0 = const()[name = tensor<string, []>("op_44_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 1, 640]> var_44_cast_fp16 = expand_dims(axes = var_44_axes_0, x = input_cast_fp16_1)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<int32, [1]> var_45_axes_0 = const()[name = tensor<string, []>("op_45_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 1, 640]> var_45_cast_fp16 = expand_dims(axes = var_45_axes_0, x = input_cast_fp16_2)[name = tensor<string, []>("op_45_cast_fp16")];
+            tensor<int32, [3]> var_57_perm_0 = const()[name = tensor<string, []>("op_57_perm_0"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<int32, [3]> var_61 = const()[name = tensor<string, []>("op_61"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<int32, [3]> var_66 = const()[name = tensor<string, []>("op_66"), val = tensor<int32, [3]>([1, 0, 2])];
+            tensor<fp16, [1, 1, 640]> c_out = transpose(perm = var_66, x = var_45_cast_fp16)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 1, 640]> h_out = transpose(perm = var_61, x = var_44_cast_fp16)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 1, 640]> decoder_output = transpose(perm = var_57_perm_0, x = input_cast_fp16_0)[name = tensor<string, []>("transpose_2")];
+            tensor<int32, [1]> target_length_tmp = identity(x = target_length)[name = tensor<string, []>("target_length_tmp")];
+        } -> (decoder_output, h_out, c_out);
+}

parakeet_decoder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
+size 7873600

parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b706227c9c2a2d64ea0fa3879ca9a4673e61944e8e374160e5a20ae7382207c3
+size 6750

parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
+size 7873600

parakeet_decoder.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "2A776510-11A3-4993-A996-06C985BF1840": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "5111D7BD-E8E5-42A4-A8E2-11BD568F106B": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "5111D7BD-E8E5-42A4-A8E2-11BD568F106B"
+}

parakeet_joint.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51354af666471dab9e2344e1a7b93004c7fef44c3d455dde75bcaf0abbcc72af
+size 243

parakeet_joint.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9ecae4f1db9350ad8bd7050c3c1b973926798c2f7ff408e9ad512d3013f238b
+size 355

parakeet_joint.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,74 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float32",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 1 × 1027)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 1, 1027]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios17.expandDims" : 2,
+      "Ios17.transpose" : 1,
+      "Ios17.linear" : 3,
+      "Ios17.add" : 1,
+      "Ios16.relu" : 1
+    },
+    "computePrecision" : "Mixed (Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 512 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 1]",
+        "name" : "encoder_output",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 640]",
+        "name" : "decoder_output",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "parakeet_joint",
+    "method" : "predict"
+  }
+]

parakeet_joint.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,23 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios17>(tensor<fp32, [1, 1, 640]> decoder_output, tensor<fp32, [1, 512, 1]> encoder_output) {
+            tensor<fp32, [640]> joint_enc_bias = const()[name = tensor<string, []>("joint_enc_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp32, [640, 512]> joint_enc_weight = const()[name = tensor<string, []>("joint_enc_weight"), val = tensor<fp32, [640, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2688)))];
+            tensor<fp32, [640]> joint_pred_bias = const()[name = tensor<string, []>("joint_pred_bias"), val = tensor<fp32, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1313472)))];
+            tensor<fp32, [640, 640]> joint_pred_weight = const()[name = tensor<string, []>("joint_pred_weight"), val = tensor<fp32, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1316096)))];
+            tensor<fp32, [1027]> joint_joint_net_2_bias = const()[name = tensor<string, []>("joint_joint_net_2_bias"), val = tensor<fp32, [1027]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2954560)))];
+            tensor<fp32, [1027, 640]> joint_joint_net_2_weight = const()[name = tensor<string, []>("joint_joint_net_2_weight"), val = tensor<fp32, [1027, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2958784)))];
+            tensor<int32, [3]> input_1_perm_0 = const()[name = tensor<string, []>("input_1_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp32, [1, 1, 512]> input_1 = transpose(perm = input_1_perm_0, x = encoder_output)[name = tensor<string, []>("transpose_0")];
+            tensor<fp32, [1, 1, 640]> f = linear(bias = joint_enc_bias, weight = joint_enc_weight, x = input_1)[name = tensor<string, []>("linear_0")];
+            tensor<fp32, [1, 1, 640]> g = linear(bias = joint_pred_bias, weight = joint_pred_weight, x = decoder_output)[name = tensor<string, []>("linear_1")];
+            tensor<int32, [1]> var_19_axes_0 = const()[name = tensor<string, []>("op_19_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp32, [1, 1, 1, 640]> var_19 = expand_dims(axes = var_19_axes_0, x = f)[name = tensor<string, []>("op_19")];
+            tensor<int32, [1]> var_21_axes_0 = const()[name = tensor<string, []>("op_21_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp32, [1, 1, 1, 640]> var_21 = expand_dims(axes = var_21_axes_0, x = g)[name = tensor<string, []>("op_21")];
+            tensor<fp32, [1, 1, 1, 640]> input_3 = add(x = var_19, y = var_21)[name = tensor<string, []>("input_3")];
+            tensor<fp32, [1, 1, 1, 640]> input_5 = relu(x = input_3)[name = tensor<string, []>("input_5")];
+            tensor<fp32, [1, 1, 1, 1027]> logits = linear(bias = joint_joint_net_2_bias, weight = joint_joint_net_2_weight, x = input_5)[name = tensor<string, []>("linear_2")];
+        } -> (logits);
+}

parakeet_joint.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
+size 5587968

parakeet_joint.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:570c88c720a7cca648db2d493635420c24ff837099586e384099c705425b207e
+size 3015

parakeet_joint.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f909b079b4923a05e522c15926ab7c8614c3d088a0da555970b16eb5447ce19c
+size 5587968

parakeet_joint.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "A8C1223F-3E7A-421C-AFF6-DB3EADE3826B": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "C6F9C4E8-810B-42F5-9184-A7F28B430B15": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "C6F9C4E8-810B-42F5-9184-A7F28B430B15"
+}

preprocessor.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66bc489d462bb3131074b17c0cf18efe85bc0619b1e22f4a94c69d25576c1041
+size 243

preprocessor.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61ed911e33fe8a791a4655ff7539a086dd760a60b60b233153cb769d85f41311
+size 373

preprocessor.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,103 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "mel",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "mel_length",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Range1d" : 1,
+      "Ios17.reshape" : 2,
+      "Identity" : 1,
+      "Ios17.matmul" : 1,
+      "Ios17.expandDims" : 5,
+      "Select" : 1,
+      "Ios17.add" : 3,
+      "Ios17.sliceByIndex" : 3,
+      "Ios16.reduceSum" : 1,
+      "Shape" : 1,
+      "Ios17.gather" : 1,
+      "Pad" : 1,
+      "Ios17.log" : 1,
+      "Ios17.conv" : 2,
+      "Ios17.sub" : 2,
+      "Ios17.pow" : 1,
+      "Ios17.cast" : 6,
+      "Stack" : 1,
+      "Ios17.concat" : 1,
+      "Ios17.floorDiv" : 1,
+      "Ios17.greaterEqual" : 1,
+      "Ios17.mul" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.4.0",
+      "com.github.apple.coremltools.version" : "8.3.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1 × 1600...16000",
+        "shapeRange" : "[[1, 1], [1600, 16000]]",
+        "formattedType" : "MultiArray (Float32 1 × 6560)",
+        "type" : "MultiArray",
+        "shape" : "[1, 6560]",
+        "name" : "input_signal",
+        "shortDescription" : ""
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "length",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "preprocessor",
+    "method" : "predict"
+  }
+]

preprocessor.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,104 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.4.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
+{
+    func main<ios17>(tensor<fp32, [1, ?]> input_signal, tensor<int32, [1]> length) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"input_signal", [1, 6560]}}), ("RangeDims", {{"input_signal", [[1, 1], [1600, 16000]]}})))] {
+            tensor<int32, []> var_4 = const()[name = tensor<string, []>("op_4"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_5 = const()[name = tensor<string, []>("op_5"), val = tensor<int32, []>(160)];
+            tensor<int32, []> var_27 = const()[name = tensor<string, []>("op_27"), val = tensor<int32, []>(512)];
+            tensor<int32, [1]> var_28 = add(x = length, y = var_27)[name = tensor<string, []>("op_28")];
+            tensor<int32, []> var_29 = const()[name = tensor<string, []>("op_29"), val = tensor<int32, []>(512)];
+            tensor<int32, [1]> var_30 = sub(x = var_28, y = var_29)[name = tensor<string, []>("op_30")];
+            tensor<int32, [1]> floor_div_0 = floor_div(x = var_30, y = var_5)[name = tensor<string, []>("floor_div_0")];
+            tensor<string, []> var_31_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_31_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, []> var_32_promoted_to_fp16 = const()[name = tensor<string, []>("op_32_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1]> floor_div_0_to_fp16 = cast(dtype = var_31_to_fp16_dtype_0, x = floor_div_0)[name = tensor<string, []>("cast_14")];
+            tensor<fp16, [1]> seq_len_1_cast_fp16 = add(x = floor_div_0_to_fp16, y = var_32_promoted_to_fp16)[name = tensor<string, []>("seq_len_1_cast_fp16")];
+            tensor<string, []> cast_1_dtype_0 = const()[name = tensor<string, []>("cast_1_dtype_0"), val = tensor<string, []>("int32")];
+            tensor<int32, [2]> var_36_begin_0 = const()[name = tensor<string, []>("op_36_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> var_36_end_0 = const()[name = tensor<string, []>("op_36_end_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<bool, [2]> var_36_end_mask_0 = const()[name = tensor<string, []>("op_36_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<bool, [2]> var_36_squeeze_mask_0 = const()[name = tensor<string, []>("op_36_squeeze_mask_0"), val = tensor<bool, [2]>([false, true])];
+            tensor<string, []> input_signal_to_fp16_dtype_0 = const()[name = tensor<string, []>("input_signal_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1, ?]> input_signal_to_fp16 = cast(dtype = input_signal_to_fp16_dtype_0, x = input_signal)[name = tensor<string, []>("cast_12")];
+            tensor<fp16, [1]> var_36_cast_fp16 = slice_by_index(begin = var_36_begin_0, end = var_36_end_0, end_mask = var_36_end_mask_0, squeeze_mask = var_36_squeeze_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_36_cast_fp16")];
+            tensor<int32, [1]> var_37_axes_0 = const()[name = tensor<string, []>("op_37_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1]> var_37_cast_fp16 = expand_dims(axes = var_37_axes_0, x = var_36_cast_fp16)[name = tensor<string, []>("op_37_cast_fp16")];
+            tensor<int32, [2]> var_39_begin_0 = const()[name = tensor<string, []>("op_39_begin_0"), val = tensor<int32, [2]>([0, 1])];
+            tensor<int32, [2]> var_39_end_0 = const()[name = tensor<string, []>("op_39_end_0"), val = tensor<int32, [2]>([1, 0])];
+            tensor<bool, [2]> var_39_end_mask_0 = const()[name = tensor<string, []>("op_39_end_mask_0"), val = tensor<bool, [2]>([true, true])];
+            tensor<fp16, [1, ?]> var_39_cast_fp16 = slice_by_index(begin = var_39_begin_0, end = var_39_end_0, end_mask = var_39_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_39_cast_fp16")];
+            tensor<int32, [2]> var_41_begin_0 = const()[name = tensor<string, []>("op_41_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> var_41_end_0 = const()[name = tensor<string, []>("op_41_end_0"), val = tensor<int32, [2]>([1, -1])];
+            tensor<bool, [2]> var_41_end_mask_0 = const()[name = tensor<string, []>("op_41_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [1, ?]> var_41_cast_fp16 = slice_by_index(begin = var_41_begin_0, end = var_41_end_0, end_mask = var_41_end_mask_0, x = input_signal_to_fp16)[name = tensor<string, []>("op_41_cast_fp16")];
+            tensor<fp16, []> var_42_to_fp16 = const()[name = tensor<string, []>("op_42_to_fp16"), val = tensor<fp16, []>(0x1.f0cp-1)];
+            tensor<fp16, [1, ?]> var_43_cast_fp16 = mul(x = var_41_cast_fp16, y = var_42_to_fp16)[name = tensor<string, []>("op_43_cast_fp16")];
+            tensor<fp16, [1, ?]> var_44_cast_fp16 = sub(x = var_39_cast_fp16, y = var_43_cast_fp16)[name = tensor<string, []>("op_44_cast_fp16")];
+            tensor<bool, []> input_1_interleave_0 = const()[name = tensor<string, []>("input_1_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, ?]> input_1_cast_fp16 = concat(axis = var_4, interleave = input_1_interleave_0, values = (var_37_cast_fp16, var_44_cast_fp16))[name = tensor<string, []>("input_1_cast_fp16")];
+            tensor<int32, [3]> concat_0x = const()[name = tensor<string, []>("concat_0x"), val = tensor<int32, [3]>([1, 1, -1])];
+            tensor<fp16, [1, 1, ?]> input_3_cast_fp16 = reshape(shape = concat_0x, x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [6]> input_5_pad_0 = const()[name = tensor<string, []>("input_5_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 256, 256])];
+            tensor<string, []> input_5_mode_0 = const()[name = tensor<string, []>("input_5_mode_0"), val = tensor<string, []>("reflect")];
+            tensor<fp16, []> const_1_to_fp16 = const()[name = tensor<string, []>("const_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 1, ?]> input_5_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_5_mode_0, pad = input_5_pad_0, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [2]> concat_1x = const()[name = tensor<string, []>("concat_1x"), val = tensor<int32, [2]>([1, -1])];
+            tensor<fp16, [1, ?]> input_cast_fp16 = reshape(shape = concat_1x, x = input_5_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = tensor<string, []>("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = tensor<string, []>("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, ?]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = input_cast_fp16)[name = tensor<string, []>("expand_dims_4_cast_fp16")];
+            tensor<string, []> conv_0_pad_type_0 = const()[name = tensor<string, []>("conv_0_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = tensor<string, []>("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = tensor<string, []>("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_0_groups_0 = const()[name = tensor<string, []>("conv_0_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [257, 1, 512]> expand_dims_1_to_fp16 = const()[name = tensor<string, []>("expand_dims_1_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 257, ?]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_0_cast_fp16")];
+            tensor<string, []> conv_1_pad_type_0 = const()[name = tensor<string, []>("conv_1_pad_type_0"), val = tensor<string, []>("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = tensor<string, []>("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = tensor<string, []>("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, []> conv_1_groups_0 = const()[name = tensor<string, []>("conv_1_groups_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [257, 1, 512]> expand_dims_2_to_fp16 = const()[name = tensor<string, []>("expand_dims_2_to_fp16"), val = tensor<fp16, [257, 1, 512]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(263296)))];
+            tensor<fp16, [1, 257, ?]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = tensor<string, []>("conv_1_cast_fp16")];
+            tensor<int32, []> stack_0_axis_0 = const()[name = tensor<string, []>("stack_0_axis_0"), val = tensor<int32, []>(-1)];
+            tensor<fp16, [1, 257, ?, 2]> stack_0_cast_fp16 = stack(axis = stack_0_axis_0, values = (conv_0_cast_fp16, conv_1_cast_fp16))[name = tensor<string, []>("stack_0_cast_fp16")];
+            tensor<fp16, []> var_12_promoted_to_fp16 = const()[name = tensor<string, []>("op_12_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 257, ?, 2]> var_60_cast_fp16 = pow(x = stack_0_cast_fp16, y = var_12_promoted_to_fp16)[name = tensor<string, []>("op_60_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = tensor<string, []>("op_62_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<bool, []> var_62_keep_dims_0 = const()[name = tensor<string, []>("op_62_keep_dims_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 257, ?]> var_62_cast_fp16 = reduce_sum(axes = var_62_axes_0, keep_dims = var_62_keep_dims_0, x = var_60_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+            tensor<fp16, [1, 257, ?]> x_9_cast_fp16 = identity(x = var_62_cast_fp16)[name = tensor<string, []>("x_9_cast_fp16")];
+            tensor<bool, []> x_11_transpose_x_0 = const()[name = tensor<string, []>("x_11_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_11_transpose_y_0 = const()[name = tensor<string, []>("x_11_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 128, 257]> const_2_to_fp16 = const()[name = tensor<string, []>("const_2_to_fp16"), val = tensor<fp16, [1, 128, 257]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(526528)))];
+            tensor<fp16, [1, 128, ?]> x_11_cast_fp16 = matmul(transpose_x = x_11_transpose_x_0, transpose_y = x_11_transpose_y_0, x = const_2_to_fp16, y = x_9_cast_fp16)[name = tensor<string, []>("x_11_cast_fp16")];
+            tensor<fp16, []> var_69_to_fp16 = const()[name = tensor<string, []>("op_69_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 128, ?]> var_70_cast_fp16 = add(x = x_11_cast_fp16, y = var_69_to_fp16)[name = tensor<string, []>("op_70_cast_fp16")];
+            tensor<fp32, []> x_epsilon_0 = const()[name = tensor<string, []>("x_epsilon_0"), val = tensor<fp32, []>(0x1p-149)];
+            tensor<fp16, [1, 128, ?]> x_cast_fp16 = log(epsilon = x_epsilon_0, x = var_70_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [3]> var_72_shape_cast_fp16 = shape(x = x_cast_fp16)[name = tensor<string, []>("op_72_shape_cast_fp16")];
+            tensor<int32, []> gather_4_axis_0 = const()[name = tensor<string, []>("gather_4_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> gather_4_batch_dims_0 = const()[name = tensor<string, []>("gather_4_batch_dims_0"), val = tensor<int32, []>(0)];
+            tensor<bool, []> gather_4_validate_indices_0 = const()[name = tensor<string, []>("gather_4_validate_indices_0"), val = tensor<bool, []>(false)];
+            tensor<string, []> var_72_shape_cast_fp16_to_uint16_dtype_0 = const()[name = tensor<string, []>("op_72_shape_cast_fp16_to_uint16_dtype_0"), val = tensor<string, []>("uint16")];
+            tensor<uint16, []> select_4_to_uint16 = const()[name = tensor<string, []>("select_4_to_uint16"), val = tensor<uint16, []>(2)];
+            tensor<uint16, [3]> var_72_shape_cast_fp16_to_uint16 = cast(dtype = var_72_shape_cast_fp16_to_uint16_dtype_0, x = var_72_shape_cast_fp16)[name = tensor<string, []>("cast_11")];
+            tensor<uint16, []> gather_4_cast_uint16 = gather(axis = gather_4_axis_0, batch_dims = gather_4_batch_dims_0, indices = select_4_to_uint16, validate_indices = gather_4_validate_indices_0, x = var_72_shape_cast_fp16_to_uint16)[name = tensor<string, []>("gather_4_cast_uint16")];
+            tensor<string, []> gather_4_cast_uint16_to_int32_dtype_0 = const()[name = tensor<string, []>("gather_4_cast_uint16_to_int32_dtype_0"), val = tensor<string, []>("int32")];
+            tensor<int32, []> const_3 = const()[name = tensor<string, []>("const_3"), val = tensor<int32, []>(0)];
+            tensor<int32, []> const_4 = const()[name = tensor<string, []>("const_4"), val = tensor<int32, []>(1)];
+            tensor<int32, []> gather_4_cast_uint16_to_int32 = cast(dtype = gather_4_cast_uint16_to_int32_dtype_0, x = gather_4_cast_uint16)[name = tensor<string, []>("cast_10")];
+            tensor<int32, [?]> mask_1 = range_1d(end = gather_4_cast_uint16_to_int32, start = const_3, step = const_4)[name = tensor<string, []>("mask_1")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = tensor<string, []>("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1, ?]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = mask_1)[name = tensor<string, []>("expand_dims_0")];
+            tensor<int32, [1]> var_77_axes_0 = const()[name = tensor<string, []>("op_77_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1]> mel_length = cast(dtype = cast_1_dtype_0, x = seq_len_1_cast_fp16)[name = tensor<string, []>("cast_13")];
+            tensor<int32, [1, 1]> var_77 = expand_dims(axes = var_77_axes_0, x = mel_length)[name = tensor<string, []>("op_77")];
+            tensor<bool, [1, ?]> mask = greater_equal(x = expand_dims_0, y = var_77)[name = tensor<string, []>("mask")];
+            tensor<int32, [1]> var_79_axes_0 = const()[name = tensor<string, []>("op_79_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<bool, [1, 1, ?]> var_79 = expand_dims(axes = var_79_axes_0, x = mask)[name = tensor<string, []>("op_79")];
+            tensor<fp16, []> cast_6_to_fp16 = const()[name = tensor<string, []>("cast_6_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [1, 128, ?]> processed_signal_cast_fp16 = select(a = cast_6_to_fp16, b = x_cast_fp16, cond = var_79)[name = tensor<string, []>("processed_signal_cast_fp16")];
+            tensor<string, []> processed_signal_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("processed_signal_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp32, [1, 128, ?]> mel = cast(dtype = processed_signal_cast_fp16_to_fp32_dtype_0, x = processed_signal_cast_fp16)[name = tensor<string, []>("cast_9")];
+        } -> (mel, mel_length);
+}

preprocessor.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
+size 592384

preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eeb46c27ed7a75560111435ef86afbe9128669301b897d613a3fb1cbf8753fc2
+size 13695

preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f257ad1ac11575d73a6ffda555319b2c96b0a224f0dc03ddd8c62950e9b18e53
+size 592384

preprocessor.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "41A87408-9448-4732-A714-AABD9E8264CD": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "D1D243D6-CE3A-446A-A657-4F2BA0FC58CE": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "D1D243D6-CE3A-446A-A657-4F2BA0FC58CE"
+}

streaming_encoder.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d10e88a440fa9a238b34284f6be7310ebe682ec7f5240053007b26fe4991edc
+size 243

streaming_encoder.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdcbb3050dd6912cbe43025b9d5e5bbbdfa9471bc08ce5b32565e51a75109638
+size 594

streaming_encoder.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,167 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 512 × 4)",
+        "shortDescription" : "",
+        "shape" : "[1, 512, 4]",
+        "name" : "encoder",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "encoder_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
+        "shortDescription" : "",
+        "shape" : "[17, 1, 70, 512]",
+        "name" : "cache_last_channel_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
+        "shortDescription" : "",
+        "shape" : "[17, 1, 512, 8]",
+        "name" : "cache_last_time_out",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_last_channel_len_out",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 8,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios17.floor" : 3,
+      "Ios17.logicalAnd" : 3,
+      "Ios17.reshape" : 103,
+      "Ios16.softmax" : 17,
+      "Ios17.matmul" : 51,
+      "Ios17.transpose" : 157,
+      "Split" : 17,
+      "Ios17.expandDims" : 6,
+      "Select" : 51,
+      "Ios17.add" : 125,
+      "Tile" : 1,
+      "Ios17.sliceByIndex" : 105,
+      "Ios16.sigmoid" : 17,
+      "Pad" : 20,
+      "Ios17.logicalNot" : 2,
+      "Ios17.layerNorm" : 102,
+      "Ios17.less" : 1,
+      "Ios17.sub" : 1,
+      "Ios17.conv" : 56,
+      "Ios17.clip" : 2,
+      "Ios16.relu" : 3,
+      "Ios17.linear" : 137,
+      "Ios17.greaterEqual" : 1,
+      "Ios17.cast" : 12,
+      "Ios16.silu" : 51,
+      "Ios17.concat" : 51,
+      "Stack" : 2,
+      "Ios17.mul" : 72
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "14.0",
+      "tvOS" : "17.0",
+      "visionOS" : "1.0",
+      "watchOS" : "10.0",
+      "iOS" : "17.0",
+      "macCatalyst" : "17.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.version" : "8.3.0",
+      "com.github.apple.coremltools.source" : "torch==2.4.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 41)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 41]",
+        "name" : "mel",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "mel_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 17 × 1 × 70 × 512)",
+        "shortDescription" : "",
+        "shape" : "[17, 1, 70, 512]",
+        "name" : "cache_last_channel",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 17 × 1 × 512 × 8)",
+        "shortDescription" : "",
+        "shape" : "[17, 1, 512, 8]",
+        "name" : "cache_last_time",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_last_channel_len",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "streaming_encoder",
+    "method" : "predict"
+  }
+]

streaming_encoder.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

streaming_encoder.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
+size 212726592

streaming_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:617c9a05405e9e6134838a5c760ab93b83f41b5c1407ce671526c172f94a0c9c
+size 504210

streaming_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:671ab3731e79a55d8b405d633971e7d0bf1b4a6ad8f07133172e868755457905
+size 212726592

streaming_encoder.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "7B68916D-2718-4249-8DA5-9B31FEE8478A": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "E9E4EE1E-0F56-46D5-9093-67095CF85F35": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "E9E4EE1E-0F56-46D5-9093-67095CF85F35"
+}

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d556e51ba5b89db64a8cb2be6798fb29974edcadb58b0c7b80418eb5d8752303
+size 258183

vocab.json CHANGED Viewed

@@ -1,1028 +1,1028 @@
-{
-  "0": "<unk>",
-  "1": "▁t",
-  "2": "▁th",
-  "3": "▁a",
-  "4": "▁i",
-  "5": "▁the",
-  "6": "▁s",
-  "7": "re",
-  "8": "▁w",
-  "9": "▁o",
-  "10": "in",
-  "11": "at",
-  "12": "er",
-  "13": "nd",
-  "14": "ou",
-  "15": "▁c",
-  "16": "▁b",
-  "17": "▁h",
-  "18": "en",
-  "19": "on",
-  "20": "▁m",
-  "21": "▁f",
-  "22": "ing",
-  "23": "▁p",
-  "24": "▁to",
-  "25": "▁and",
-  "26": "▁d",
-  "27": "an",
-  "28": "or",
-  "29": "es",
-  "30": "▁y",
-  "31": "▁l",
-  "32": "▁of",
-  "33": "ll",
-  "34": "▁in",
-  "35": "ed",
-  "36": "it",
-  "37": "▁g",
-  "38": "is",
-  "39": "▁you",
-  "40": "▁n",
-  "41": "ar",
-  "42": "om",
-  "43": "as",
-  "44": "ve",
-  "45": "▁e",
-  "46": "ic",
-  "47": "▁it",
-  "48": "al",
-  "49": "us",
-  "50": "▁wh",
-  "51": "▁we",
-  "52": "▁be",
-  "53": "ion",
-  "54": "ow",
-  "55": "le",
-  "56": "▁is",
-  "57": "et",
-  "58": "ent",
-  "59": "ot",
-  "60": "ut",
-  "61": "▁re",
-  "62": "▁on",
-  "63": "ay",
-  "64": "▁ha",
-  "65": "ig",
-  "66": "▁so",
-  "67": "ct",
-  "68": "▁he",
-  "69": "▁for",
-  "70": "ver",
-  "71": "ke",
-  "72": "ro",
-  "73": "▁st",
-  "74": "id",
-  "75": "▁go",
-  "76": "all",
-  "77": "se",
-  "78": "ly",
-  "79": "▁u",
-  "80": "ch",
-  "81": "st",
-  "82": "ld",
-  "83": "▁k",
-  "84": "ce",
-  "85": "ur",
-  "86": "▁li",
-  "87": "am",
-  "88": "▁r",
-  "89": "ht",
-  "90": "▁j",
-  "91": "ith",
-  "92": "▁se",
-  "93": "ir",
-  "94": "▁as",
-  "95": "▁an",
-  "96": "im",
-  "97": "▁do",
-  "98": "ad",
-  "99": "▁was",
-  "100": "ight",
-  "101": "th",
-  "102": "▁are",
-  "103": "▁but",
-  "104": "▁sh",
-  "105": "ust",
-  "106": "ally",
-  "107": "▁not",
-  "108": "▁or",
-  "109": "▁com",
-  "110": "▁can",
-  "111": "▁me",
-  "112": "op",
-  "113": "▁mo",
-  "114": "▁at",
-  "115": "ill",
-  "116": "▁ch",
-  "117": "▁ne",
-  "118": "ant",
-  "119": "▁de",
-  "120": "▁kn",
-  "121": "▁one",
-  "122": "il",
-  "123": "ol",
-  "124": "▁con",
-  "125": "ter",
-  "126": "▁ab",
-  "127": "▁fr",
-  "128": "ere",
-  "129": "ck",
-  "130": "▁al",
-  "131": "▁all",
-  "132": "qu",
-  "133": "▁pro",
-  "134": "▁som",
-  "135": "ould",
-  "136": "▁tw",
-  "137": "ul",
-  "138": "ra",
-  "139": "od",
-  "140": "ers",
-  "141": "▁su",
-  "142": "ive",
-  "143": "▁v",
-  "144": "use",
-  "145": "ate",
-  "146": "ge",
-  "147": "if",
-  "148": "▁ex",
-  "149": "ess",
-  "150": "pp",
-  "151": "▁lo",
-  "152": "out",
-  "153": "▁if",
-  "154": "est",
-  "155": "ain",
-  "156": "ist",
-  "157": "and",
-  "158": "ea",
-  "159": "very",
-  "160": "art",
-  "161": "▁wor",
-  "162": "▁my",
-  "163": "ab",
-  "164": "ment",
-  "165": "▁bec",
-  "166": "un",
-  "167": "ity",
-  "168": "ri",
-  "169": "pe",
-  "170": "ions",
-  "171": "▁by",
-  "172": "ok",
-  "173": "our",
-  "174": "ort",
-  "175": "ind",
-  "176": "ink",
-  "177": "nt",
-  "178": "▁up",
-  "179": "um",
-  "180": "▁don",
-  "181": "▁get",
-  "182": "red",
-  "183": "▁out",
-  "184": "el",
-  "185": "ause",
-  "186": "res",
-  "187": "▁ma",
-  "188": "ich",
-  "189": "▁us",
-  "190": "rou",
-  "191": "▁int",
-  "192": "em",
-  "193": "os",
-  "194": "ies",
-  "195": "ie",
-  "196": "▁pl",
-  "197": "▁tr",
-  "198": "ven",
-  "199": "ous",
-  "200": "▁le",
-  "201": "▁two",
-  "202": "ard",
-  "203": "ine",
-  "204": "▁co",
-  "205": "een",
-  "206": "▁now",
-  "207": "ty",
-  "208": "her",
-  "209": "ack",
-  "210": "▁pe",
-  "211": "ame",
-  "212": "▁how",
-  "213": "▁who",
-  "214": "▁see",
-  "215": "▁tim",
-  "216": "ect",
-  "217": "ast",
-  "218": "▁our",
-  "219": "ci",
-  "220": "ree",
-  "221": "ople",
-  "222": "gh",
-  "223": "▁no",
-  "224": "▁had",
-  "225": "▁man",
-  "226": "▁qu",
-  "227": "▁en",
-  "228": "ide",
-  "229": "ure",
-  "230": "ud",
-  "231": "so",
-  "232": "▁his",
-  "233": "▁sa",
-  "234": "▁sp",
-  "235": "▁say",
-  "236": "ose",
-  "237": "ther",
-  "238": "▁act",
-  "239": "▁ta",
-  "240": "▁cl",
-  "241": "ings",
-  "242": "pt",
-  "243": "king",
-  "244": "▁any",
-  "245": "▁has",
-  "246": "▁un",
-  "247": "iv",
-  "248": "▁im",
-  "249": "▁ag",
-  "250": "▁te",
-  "251": "▁fe",
-  "252": "one",
-  "253": "per",
-  "254": "ong",
-  "255": "▁po",
-  "256": "▁ad",
-  "257": "ff",
-  "258": "ore",
-  "259": "itt",
-  "260": "ans",
-  "261": "iz",
-  "262": "eah",
-  "263": "reat",
-  "264": "act",
-  "265": "own",
-  "266": "hing",
-  "267": "enty",
-  "268": "age",
-  "269": "ber",
-  "270": "ice",
-  "271": "▁am",
-  "272": "ple",
-  "273": "are",
-  "274": "▁per",
-  "275": "und",
-  "276": "ite",
-  "277": "ix",
-  "278": "pl",
-  "279": "▁way",
-  "280": "▁did",
-  "281": "▁pr",
-  "282": "▁got",
-  "283": "ars",
-  "284": "▁she",
-  "285": "▁let",
-  "286": "ag",
-  "287": "▁ac",
-  "288": "int",
-  "289": "▁ar",
-  "290": "ry",
-  "291": "ign",
-  "292": "ish",
-  "293": "���fir",
-  "294": "ace",
-  "295": "ble",
-  "296": "og",
-  "297": "ue",
-  "298": "▁ye",
-  "299": "ap",
-  "300": "iff",
-  "301": "▁ro",
-  "302": "▁her",
-  "303": "nder",
-  "304": "▁ok",
-  "305": "▁res",
-  "306": "▁gu",
-  "307": "ence",
-  "308": "▁may",
-  "309": "ated",
-  "310": "ip",
-  "311": "▁bo",
-  "312": "▁him",
-  "313": "way",
-  "314": "ac",
-  "315": "ical",
-  "316": "ass",
-  "317": "ase",
-  "318": "▁dis",
-  "319": "able",
-  "320": "ick",
-  "321": "▁app",
-  "322": "ance",
-  "323": "▁pre",
-  "324": "▁six",
-  "325": "▁off",
-  "326": "▁new",
-  "327": "ia",
-  "328": "orm",
-  "329": "ank",
-  "330": "▁lot",
-  "331": "ach",
-  "332": "▁fo",
-  "333": "inet",
-  "334": "ire",
-  "335": "ary",
-  "336": "ult",
-  "337": "▁tal",
-  "338": "▁mu",
-  "339": "▁bl",
-  "340": "ount",
-  "341": "sel",
-  "342": "vel",
-  "343": "▁br",
-  "344": "▁imp",
-  "345": "ep",
-  "346": "cess",
-  "347": "ord",
-  "348": "▁sc",
-  "349": "▁inc",
-  "350": "ound",
-  "351": "ang",
-  "352": "be",
-  "353": "ress",
-  "354": "uct",
-  "355": "▁ind",
-  "356": "▁af",
-  "357": "ving",
-  "358": "▁oh",
-  "359": "▁bet",
-  "360": "▁use",
-  "361": "ome",
-  "362": "ens",
-  "363": "ys",
-  "364": "▁bu",
-  "365": "co",
-  "366": "ory",
-  "367": "ater",
-  "368": "ild",
-  "369": "ght",
-  "370": "ial",
-  "371": "▁day",
-  "372": "ning",
-  "373": "na",
-  "374": "ile",
-  "375": "▁spe",
-  "376": "▁mar",
-  "377": "ody",
-  "378": "ough",
-  "379": "ade",
-  "380": "vers",
-  "381": "xt",
-  "382": "▁fl",
-  "383": "▁ke",
-  "384": "ian",
-  "385": "▁sy",
-  "386": "▁put",
-  "387": "fore",
-  "388": "ub",
-  "389": "▁ph",
-  "390": "fe",
-  "391": "▁em",
-  "392": "▁ser",
-  "393": "form",
-  "394": "ting",
-  "395": "te",
-  "396": "av",
-  "397": "ious",
-  "398": "▁rec",
-  "399": "ks",
-  "400": "▁gr",
-  "401": "ces",
-  "402": "wn",
-  "403": "ors",
-  "404": "▁jo",
-  "405": "ents",
-  "406": "▁des",
-  "407": "▁try",
-  "408": "▁equ",
-  "409": "▁z",
-  "410": "▁rem",
-  "411": "▁str",
-  "412": "self",
-  "413": "▁bit",
-  "414": "ph",
-  "415": "ved",
-  "416": "▁why",
-  "417": "▁bas",
-  "418": "▁hel",
-  "419": "▁rel",
-  "420": "ath",
-  "421": "ject",
-  "422": "ail",
-  "423": "▁la",
-  "424": "ual",
-  "425": "▁god",
-  "426": "▁nat",
-  "427": "erm",
-  "428": "day",
-  "429": "▁id",
-  "430": "ft",
-  "431": "▁wr",
-  "432": "▁min",
-  "433": "ates",
-  "434": "▁gen",
-  "435": "tain",
-  "436": "▁ob",
-  "437": "ull",
-  "438": "ict",
-  "439": "▁tra",
-  "440": "▁end",
-  "441": "▁hig",
-  "442": "▁fif",
-  "443": "oth",
-  "444": "tern",
-  "445": "▁its",
-  "446": "vent",
-  "447": "▁sm",
-  "448": "ons",
-  "449": "▁add",
-  "450": "iss",
-  "451": "▁bel",
-  "452": "ful",
-  "453": "get",
-  "454": "▁ele",
-  "455": "▁rep",
-  "456": "ak",
-  "457": "▁ho",
-  "458": "▁pos",
-  "459": "▁num",
-  "460": "ange",
-  "461": "ves",
-  "462": "ific",
-  "463": "urn",
-  "464": "ise",
-  "465": "▁cr",
-  "466": "▁um",
-  "467": "ward",
-  "468": "▁reg",
-  "469": "ady",
-  "470": "ower",
-  "471": "uc",
-  "472": "▁dec",
-  "473": "lic",
-  "474": "▁set",
-  "475": "▁gon",
-  "476": "▁op",
-  "477": "▁ear",
-  "478": "▁sub",
-  "479": "▁sl",
-  "480": "les",
-  "481": "stem",
-  "482": "cial",
-  "483": "olog",
-  "484": "atch",
-  "485": "ily",
-  "486": "body",
-  "487": "nds",
-  "488": "ular",
-  "489": "ren",
-  "490": "▁own",
-  "491": "▁too",
-  "492": "cent",
-  "493": "ible",
-  "494": "pect",
-  "495": "ered",
-  "496": "ways",
-  "497": "teen",
-  "498": "▁uh",
-  "499": "▁big",
-  "500": "▁mod",
-  "501": "▁att",
-  "502": "▁car",
-  "503": "gr",
-  "504": "▁acc",
-  "505": "ied",
-  "506": "mun",
-  "507": "ib",
-  "508": "▁mon",
-  "509": "▁sch",
-  "510": "▁pol",
-  "511": "▁dat",
-  "512": "▁fin",
-  "513": "▁sim",
-  "514": "▁inv",
-  "515": "▁def",
-  "516": "ked",
-  "517": "▁ent",
-  "518": "▁yes",
-  "519": "ows",
-  "520": "ics",
-  "521": "ited",
-  "522": "ute",
-  "523": "ism",
-  "524": "ps",
-  "525": "▁ed",
-  "526": "▁el",
-  "527": "ably",
-  "528": "ppen",
-  "529": "als",
-  "530": "▁ten",
-  "531": "ract",
-  "532": "ss",
-  "533": "▁ass",
-  "534": "▁met",
-  "535": "gan",
-  "536": "▁eng",
-  "537": "▁stu",
-  "538": "ween",
-  "539": "arch",
-  "540": "▁gl",
-  "541": "▁cor",
-  "542": "▁dr",
-  "543": "vern",
-  "544": "▁ty",
-  "545": "▁run",
-  "546": "hip",
-  "547": "cus",
-  "548": "cond",
-  "549": "▁ins",
-  "550": "irty",
-  "551": "▁pub",
-  "552": "lud",
-  "553": "llow",
-  "554": "▁cou",
-  "555": "ew",
-  "556": "iew",
-  "557": "▁sur",
-  "558": "ero",
-  "559": "ood",
-  "560": "ness",
-  "561": "▁fun",
-  "562": "▁eff",
-  "563": "cept",
-  "564": "▁ca",
-  "565": "▁exp",
-  "566": "duct",
-  "567": "▁sw",
-  "568": "ize",
-  "569": "ope",
-  "570": "▁par",
-  "571": "kes",
-  "572": "cy",
-  "573": "▁ev",
-  "574": "▁ref",
-  "575": "ell",
-  "576": "▁bus",
-  "577": "ug",
-  "578": "rib",
-  "579": "▁cur",
-  "580": "mo",
-  "581": "ock",
-  "582": "ures",
-  "583": "air",
-  "584": "▁war",
-  "585": "str",
-  "586": "▁med",
-  "587": "▁wa",
-  "588": "▁val",
-  "589": "▁sin",
-  "590": "blem",
-  "591": "▁fam",
-  "592": "li",
-  "593": "▁far",
-  "594": "▁cle",
-  "595": "▁col",
-  "596": "mon",
-  "597": "▁gra",
-  "598": "led",
-  "599": "ense",
-  "600": "tin",
-  "601": "ues",
-  "602": "its",
-  "603": "▁mem",
-  "604": "▁inf",
-  "605": "▁eas",
-  "606": "ideo",
-  "607": "▁top",
-  "608": "io",
-  "609": "pan",
-  "610": "▁hum",
-  "611": "▁old",
-  "612": "ead",
-  "613": "▁ord",
-  "614": "ric",
-  "615": "ants",
-  "616": "oy",
-  "617": "esn",
-  "618": "uck",
-  "619": "ason",
-  "620": "ced",
-  "621": "ool",
-  "622": "rat",
-  "623": "ouse",
-  "624": "▁lar",
-  "625": "▁art",
-  "626": "▁wee",
-  "627": "▁cer",
-  "628": "ized",
-  "629": "▁mat",
-  "630": "con",
-  "631": "erg",
-  "632": "land",
-  "633": "ines",
-  "634": "▁chr",
-  "635": "▁aut",
-  "636": "▁lea",
-  "637": "▁sou",
-  "638": "oney",
-  "639": "tty",
-  "640": "▁ple",
-  "641": "ulat",
-  "642": "oks",
-  "643": "▁few",
-  "644": "▁sol",
-  "645": "▁che",
-  "646": "chn",
-  "647": "ird",
-  "648": "▁bre",
-  "649": "▁dur",
-  "650": "▁wom",
-  "651": "me",
-  "652": "izat",
-  "653": "eric",
-  "654": "ote",
-  "655": "▁uni",
-  "656": "eren",
-  "657": "arn",
-  "658": "ross",
-  "659": "ices",
-  "660": "ten",
-  "661": "eral",
-  "662": "ever",
-  "663": "ieve",
-  "664": "lish",
-  "665": "ash",
-  "666": "▁opp",
-  "667": "alth",
-  "668": "ger",
-  "669": "▁sk",
-  "670": "▁red",
-  "671": "peri",
-  "672": "▁det",
-  "673": "▁ext",
-  "674": "ner",
-  "675": "ah",
-  "676": "▁var",
-  "677": "▁loc",
-  "678": "gram",
-  "679": "ists",
-  "680": "ives",
-  "681": "▁es",
-  "682": "▁nor",
-  "683": "tro",
-  "684": "ale",
-  "685": "▁iss",
-  "686": "▁pri",
-  "687": "gin",
-  "688": "az",
-  "689": "oc",
-  "690": "▁pop",
-  "691": "ern",
-  "692": "▁sit",
-  "693": "ket",
-  "694": "▁pa",
-  "695": "▁law",
-  "696": "ages",
-  "697": "br",
-  "698": "▁cam",
-  "699": "▁mom",
-  "700": "osed",
-  "701": "▁bro",
-  "702": "ne",
-  "703": "bs",
-  "704": "▁cre",
-  "705": "erat",
-  "706": "▁sec",
-  "707": "▁cap",
-  "708": "▁vis",
-  "709": "▁pat",
-  "710": "ield",
-  "711": "iet",
-  "712": "▁tri",
-  "713": "up",
-  "714": "▁bra",
-  "715": "ts",
-  "716": "▁mot",
-  "717": "▁unt",
-  "718": "put",
-  "719": "bo",
-  "720": "ork",
-  "721": "mer",
-  "722": "ital",
-  "723": "▁air",
-  "724": "ined",
-  "725": "▁beh",
-  "726": "▁adv",
-  "727": "▁ret",
-  "728": "imes",
-  "729": "▁tea",
-  "730": "ural",
-  "731": "sid",
-  "732": "ters",
-  "733": "▁pur",
-  "734": "▁sci",
-  "735": "bers",
-  "736": "ient",
-  "737": "ier",
-  "738": "cc",
-  "739": "sw",
-  "740": "▁av",
-  "741": "reen",
-  "742": "ode",
-  "743": "ont",
-  "744": "▁dra",
-  "745": "ann",
-  "746": "nect",
-  "747": "▁x",
-  "748": "▁eu",
-  "749": "ton",
-  "750": "inat",
-  "751": "ene",
-  "752": "ared",
-  "753": "els",
-  "754": "▁mor",
-  "755": "▁rat",
-  "756": "cri",
-  "757": "▁men",
-  "758": "▁ah",
-  "759": "ames",
-  "760": "▁arm",
-  "761": "eak",
-  "762": "▁pay",
-  "763": "▁hal",
-  "764": "ins",
-  "765": "ilit",
-  "766": "stit",
-  "767": "▁ra",
-  "768": "▁leg",
-  "769": "cl",
-  "770": "pr",
-  "771": "▁wal",
-  "772": "▁bad",
-  "773": "▁ge",
-  "774": "roup",
-  "775": "▁mus",
-  "776": "man",
-  "777": "▁gi",
-  "778": "eds",
-  "779": "▁aw",
-  "780": "po",
-  "781": "ark",
-  "782": "row",
-  "783": "▁dep",
-  "784": "ully",
-  "785": "ral",
-  "786": "lect",
-  "787": "pend",
-  "788": "▁sev",
-  "789": "ime",
-  "790": "gest",
-  "791": "here",
-  "792": "▁yet",
-  "793": "ted",
-  "794": "▁rev",
-  "795": "ds",
-  "796": "▁ask",
-  "797": "less",
-  "798": "▁di",
-  "799": "ets",
-  "800": "line",
-  "801": "▁aff",
-  "802": "ired",
-  "803": "▁est",
-  "804": "ken",
-  "805": "vid",
-  "806": "most",
-  "807": "ivid",
-  "808": "unch",
-  "809": "par",
-  "810": "med",
-  "811": "rop",
-  "812": "ased",
-  "813": "eone",
-  "814": "▁ve",
-  "815": "▁abs",
-  "816": "ergy",
-  "817": "ret",
-  "818": "▁saw",
-  "819": "▁ey",
-  "820": "▁cal",
-  "821": "uat",
-  "822": "▁mid",
-  "823": "vat",
-  "824": "ream",
-  "825": "vice",
-  "826": "ians",
-  "827": "rent",
-  "828": "ctor",
-  "829": "err",
-  "830": "ush",
-  "831": "ases",
-  "832": "▁suc",
-  "833": "erms",
-  "834": "ave",
-  "835": "angu",
-  "836": "ries",
-  "837": "▁wo",
-  "838": "arts",
-  "839": "▁fil",
-  "840": "▁fat",
-  "841": "▁cho",
-  "842": "orts",
-  "843": "▁fre",
-  "844": "ee",
-  "845": "ught",
-  "846": "eng",
-  "847": "ump",
-  "848": "▁bar",
-  "849": "ying",
-  "850": "ane",
-  "851": "▁tem",
-  "852": "anks",
-  "853": "ury",
-  "854": "iat",
-  "855": "mit",
-  "856": "trol",
-  "857": "▁net",
-  "858": "▁maj",
-  "859": "▁cra",
-  "860": "ling",
-  "861": "▁fig",
-  "862": "orn",
-  "863": "icat",
-  "864": "pany",
-  "865": "▁occ",
-  "866": "ott",
-  "867": "ands",
-  "868": "▁exc",
-  "869": "▁mr",
-  "870": "ency",
-  "871": "rope",
-  "872": "itch",
-  "873": "▁lit",
-  "874": "abil",
-  "875": "not",
-  "876": "ma",
-  "877": "▁typ",
-  "878": "▁opt",
-  "879": "ob",
-  "880": "ser",
-  "881": "ety",
-  "882": "ms",
-  "883": "peci",
-  "884": "aces",
-  "885": "aut",
-  "886": "▁hon",
-  "887": "cuss",
-  "888": "▁sal",
-  "889": "▁sor",
-  "890": "att",
-  "891": "▁lab",
-  "892": "▁har",
-  "893": "urch",
-  "894": "nded",
-  "895": "uce",
-  "896": "ids",
-  "897": "▁hy",
-  "898": "▁fut",
-  "899": "▁ste",
-  "900": "ours",
-  "901": "ems",
-  "902": "utes",
-  "903": "ng",
-  "904": "ta",
-  "905": "▁won",
-  "906": "▁fa",
-  "907": "▁env",
-  "908": "ards",
-  "909": "▁job",
-  "910": "ium",
-  "911": "▁dot",
-  "912": "▁obv",
-  "913": "ina",
-  "914": "side",
-  "915": "elve",
-  "916": "cu",
-  "917": "▁jes",
-  "918": "▁pot",
-  "919": "▁pie",
-  "920": "▁tre",
-  "921": "▁hey",
-  "922": "▁mag",
-  "923": "ron",
-  "924": "▁key",
-  "925": "swer",
-  "926": "▁win",
-  "927": "ucat",
-  "928": "work",
-  "929": "ides",
-  "930": "▁low",
-  "931": "▁vol",
-  "932": "▁oth",
-  "933": "atic",
-  "934": "lf",
-  "935": "ads",
-  "936": "inds",
-  "937": "com",
-  "938": "ths",
-  "939": "▁ver",
-  "940": "ised",
-  "941": "lo",
-  "942": "▁squ",
-  "943": "▁cut",
-  "944": "oked",
-  "945": "irit",
-  "946": "ateg",
-  "947": "ppy",
-  "948": "mitt",
-  "949": "come",
-  "950": "hn",
-  "951": "igin",
-  "952": "mand",
-  "953": "▁dam",
-  "954": "ho",
-  "955": "▁da",
-  "956": "▁fur",
-  "957": "iron",
-  "958": "ilar",
-  "959": "▁fac",
-  "960": "▁neg",
-  "961": "▁ago",
-  "962": "ged",
-  "963": "miss",
-  "964": "enth",
-  "965": "▁dou",
-  "966": "▁hit",
-  "967": "▁guy",
-  "968": "▁bi",
-  "969": "ove",
-  "970": "fess",
-  "971": "ples",
-  "972": "owed",
-  "973": "ured",
-  "974": "▁ris",
-  "975": "ints",
-  "976": "rew",
-  "977": "▁sum",
-  "978": "▁hu",
-  "979": "ploy",
-  "980": "ude",
-  "981": "ried",
-  "982": "▁cir",
-  "983": "▁dev",
-  "984": "ear",
-  "985": "▁tot",
-  "986": "▁ann",
-  "987": "duc",
-  "988": "ik",
-  "989": "pon",
-  "990": "sted",
-  "991": "▁ide",
-  "992": "▁'",
-  "993": "ipp",
-  "994": "▁eat",
-  "995": "▁dom",
-  "996": "▁",
-  "997": "e",
-  "998": "t",
-  "999": "o",
-  "1000": "a",
-  "1001": "i",
-  "1002": "n",
-  "1003": "s",
-  "1004": "r",
-  "1005": "h",
-  "1006": "l",
-  "1007": "d",
-  "1008": "u",
-  "1009": "c",
-  "1010": "m",
-  "1011": "y",
-  "1012": "g",
-  "1013": "w",
-  "1014": "f",
-  "1015": "p",
-  "1016": "b",
-  "1017": "v",
-  "1018": "k",
-  "1019": "'",
-  "1020": "j",
-  "1021": "x",
-  "1022": "q",
-  "1023": "z",
-  "1024": "<EOU>",
-  "1025": "<EOB>"
-}

+[
+  "<unk>",
+  "\u2581t",
+  "\u2581th",
+  "\u2581a",
+  "\u2581i",
+  "\u2581the",
+  "\u2581s",
+  "re",
+  "\u2581w",
+  "\u2581o",
+  "in",
+  "at",
+  "er",
+  "nd",
+  "ou",
+  "\u2581c",
+  "\u2581b",
+  "\u2581h",
+  "en",
+  "on",
+  "\u2581m",
+  "\u2581f",
+  "ing",
+  "\u2581p",
+  "\u2581to",
+  "\u2581and",
+  "\u2581d",
+  "an",
+  "or",
+  "es",
+  "\u2581y",
+  "\u2581l",
+  "\u2581of",
+  "ll",
+  "\u2581in",
+  "ed",
+  "it",
+  "\u2581g",
+  "is",
+  "\u2581you",
+  "\u2581n",
+  "ar",
+  "om",
+  "as",
+  "ve",
+  "\u2581e",
+  "ic",
+  "\u2581it",
+  "al",
+  "us",
+  "\u2581wh",
+  "\u2581we",
+  "\u2581be",
+  "ion",
+  "ow",
+  "le",
+  "\u2581is",
+  "et",
+  "ent",
+  "ot",
+  "ut",
+  "\u2581re",
+  "\u2581on",
+  "ay",
+  "\u2581ha",
+  "ig",
+  "\u2581so",
+  "ct",
+  "\u2581he",
+  "\u2581for",
+  "ver",
+  "ke",
+  "ro",
+  "\u2581st",
+  "id",
+  "\u2581go",
+  "all",
+  "se",
+  "ly",
+  "\u2581u",
+  "ch",
+  "st",
+  "ld",
+  "\u2581k",
+  "ce",
+  "ur",
+  "\u2581li",
+  "am",
+  "\u2581r",
+  "ht",
+  "\u2581j",
+  "ith",
+  "\u2581se",
+  "ir",
+  "\u2581as",
+  "\u2581an",
+  "im",
+  "\u2581do",
+  "ad",
+  "\u2581was",
+  "ight",
+  "th",
+  "\u2581are",
+  "\u2581but",
+  "\u2581sh",
+  "ust",
+  "ally",
+  "\u2581not",
+  "\u2581or",
+  "\u2581com",
+  "\u2581can",
+  "\u2581me",
+  "op",
+  "\u2581mo",
+  "\u2581at",
+  "ill",
+  "\u2581ch",
+  "\u2581ne",
+  "ant",
+  "\u2581de",
+  "\u2581kn",
+  "\u2581one",
+  "il",
+  "ol",
+  "\u2581con",
+  "ter",
+  "\u2581ab",
+  "\u2581fr",
+  "ere",
+  "ck",
+  "\u2581al",
+  "\u2581all",
+  "qu",
+  "\u2581pro",
+  "\u2581som",
+  "ould",
+  "\u2581tw",
+  "ul",
+  "ra",
+  "od",
+  "ers",
+  "\u2581su",
+  "ive",
+  "\u2581v",
+  "use",
+  "ate",
+  "ge",
+  "if",
+  "\u2581ex",
+  "ess",
+  "pp",
+  "\u2581lo",
+  "out",
+  "\u2581if",
+  "est",
+  "ain",
+  "ist",
+  "and",
+  "ea",
+  "very",
+  "art",
+  "\u2581wor",
+  "\u2581my",
+  "ab",
+  "ment",
+  "\u2581bec",
+  "un",
+  "ity",
+  "ri",
+  "pe",
+  "ions",
+  "\u2581by",
+  "ok",
+  "our",
+  "ort",
+  "ind",
+  "ink",
+  "nt",
+  "\u2581up",
+  "um",
+  "\u2581don",
+  "\u2581get",
+  "red",
+  "\u2581out",
+  "el",
+  "ause",
+  "res",
+  "\u2581ma",
+  "ich",
+  "\u2581us",
+  "rou",
+  "\u2581int",
+  "em",
+  "os",
+  "ies",
+  "ie",
+  "\u2581pl",
+  "\u2581tr",
+  "ven",
+  "ous",
+  "\u2581le",
+  "\u2581two",
+  "ard",
+  "ine",
+  "\u2581co",
+  "een",
+  "\u2581now",
+  "ty",
+  "her",
+  "ack",
+  "\u2581pe",
+  "ame",
+  "\u2581how",
+  "\u2581who",
+  "\u2581see",
+  "\u2581tim",
+  "ect",
+  "ast",
+  "\u2581our",
+  "ci",
+  "ree",
+  "ople",
+  "gh",
+  "\u2581no",
+  "\u2581had",
+  "\u2581man",
+  "\u2581qu",
+  "\u2581en",
+  "ide",
+  "ure",
+  "ud",
+  "so",
+  "\u2581his",
+  "\u2581sa",
+  "\u2581sp",
+  "\u2581say",
+  "ose",
+  "ther",
+  "\u2581act",
+  "\u2581ta",
+  "\u2581cl",
+  "ings",
+  "pt",
+  "king",
+  "\u2581any",
+  "\u2581has",
+  "\u2581un",
+  "iv",
+  "\u2581im",
+  "\u2581ag",
+  "\u2581te",
+  "\u2581fe",
+  "one",
+  "per",
+  "ong",
+  "\u2581po",
+  "\u2581ad",
+  "ff",
+  "ore",
+  "itt",
+  "ans",
+  "iz",
+  "eah",
+  "reat",
+  "act",
+  "own",
+  "hing",
+  "enty",
+  "age",
+  "ber",
+  "ice",
+  "\u2581am",
+  "ple",
+  "are",
+  "\u2581per",
+  "und",
+  "ite",
+  "ix",
+  "pl",
+  "\u2581way",
+  "\u2581did",
+  "\u2581pr",
+  "\u2581got",
+  "ars",
+  "\u2581she",
+  "\u2581let",
+  "ag",
+  "\u2581ac",
+  "int",
+  "\u2581ar",
+  "ry",
+  "ign",
+  "ish",
+  "\u2581fir",
+  "ace",
+  "ble",
+  "og",
+  "ue",
+  "\u2581ye",
+  "ap",
+  "iff",
+  "\u2581ro",
+  "\u2581her",
+  "nder",
+  "\u2581ok",
+  "\u2581res",
+  "\u2581gu",
+  "ence",
+  "\u2581may",
+  "ated",
+  "ip",
+  "\u2581bo",
+  "\u2581him",
+  "way",
+  "ac",
+  "ical",
+  "ass",
+  "ase",
+  "\u2581dis",
+  "able",
+  "ick",
+  "\u2581app",
+  "ance",
+  "\u2581pre",
+  "\u2581six",
+  "\u2581off",
+  "\u2581new",
+  "ia",
+  "orm",
+  "ank",
+  "\u2581lot",
+  "ach",
+  "\u2581fo",
+  "inet",
+  "ire",
+  "ary",
+  "ult",
+  "\u2581tal",
+  "\u2581mu",
+  "\u2581bl",
+  "ount",
+  "sel",
+  "vel",
+  "\u2581br",
+  "\u2581imp",
+  "ep",
+  "cess",
+  "ord",
+  "\u2581sc",
+  "\u2581inc",
+  "ound",
+  "ang",
+  "be",
+  "ress",
+  "uct",
+  "\u2581ind",
+  "\u2581af",
+  "ving",
+  "\u2581oh",
+  "\u2581bet",
+  "\u2581use",
+  "ome",
+  "ens",
+  "ys",
+  "\u2581bu",
+  "co",
+  "ory",
+  "ater",
+  "ild",
+  "ght",
+  "ial",
+  "\u2581day",
+  "ning",
+  "na",
+  "ile",
+  "\u2581spe",
+  "\u2581mar",
+  "ody",
+  "ough",
+  "ade",
+  "vers",
+  "xt",
+  "\u2581fl",
+  "\u2581ke",
+  "ian",
+  "\u2581sy",
+  "\u2581put",
+  "fore",
+  "ub",
+  "\u2581ph",
+  "fe",
+  "\u2581em",
+  "\u2581ser",
+  "form",
+  "ting",
+  "te",
+  "av",
+  "ious",
+  "\u2581rec",
+  "ks",
+  "\u2581gr",
+  "ces",
+  "wn",
+  "ors",
+  "\u2581jo",
+  "ents",
+  "\u2581des",
+  "\u2581try",
+  "\u2581equ",
+  "\u2581z",
+  "\u2581rem",
+  "\u2581str",
+  "self",
+  "\u2581bit",
+  "ph",
+  "ved",
+  "\u2581why",
+  "\u2581bas",
+  "\u2581hel",
+  "\u2581rel",
+  "ath",
+  "ject",
+  "ail",
+  "\u2581la",
+  "ual",
+  "\u2581god",
+  "\u2581nat",
+  "erm",
+  "day",
+  "\u2581id",
+  "ft",
+  "\u2581wr",
+  "\u2581min",
+  "ates",
+  "\u2581gen",
+  "tain",
+  "\u2581ob",
+  "ull",
+  "ict",
+  "\u2581tra",
+  "\u2581end",
+  "\u2581hig",
+  "\u2581fif",
+  "oth",
+  "tern",
+  "\u2581its",
+  "vent",
+  "\u2581sm",
+  "ons",
+  "\u2581add",
+  "iss",
+  "\u2581bel",
+  "ful",
+  "get",
+  "\u2581ele",
+  "\u2581rep",
+  "ak",
+  "\u2581ho",
+  "\u2581pos",
+  "\u2581num",
+  "ange",
+  "ves",
+  "ific",
+  "urn",
+  "ise",
+  "\u2581cr",
+  "\u2581um",
+  "ward",
+  "\u2581reg",
+  "ady",
+  "ower",
+  "uc",
+  "\u2581dec",
+  "lic",
+  "\u2581set",
+  "\u2581gon",
+  "\u2581op",
+  "\u2581ear",
+  "\u2581sub",
+  "\u2581sl",
+  "les",
+  "stem",
+  "cial",
+  "olog",
+  "atch",
+  "ily",
+  "body",
+  "nds",
+  "ular",
+  "ren",
+  "\u2581own",
+  "\u2581too",
+  "cent",
+  "ible",
+  "pect",
+  "ered",
+  "ways",
+  "teen",
+  "\u2581uh",
+  "\u2581big",
+  "\u2581mod",
+  "\u2581att",
+  "\u2581car",
+  "gr",
+  "\u2581acc",
+  "ied",
+  "mun",
+  "ib",
+  "\u2581mon",
+  "\u2581sch",
+  "\u2581pol",
+  "\u2581dat",
+  "\u2581fin",
+  "\u2581sim",
+  "\u2581inv",
+  "\u2581def",
+  "ked",
+  "\u2581ent",
+  "\u2581yes",
+  "ows",
+  "ics",
+  "ited",
+  "ute",
+  "ism",
+  "ps",
+  "\u2581ed",
+  "\u2581el",
+  "ably",
+  "ppen",
+  "als",
+  "\u2581ten",
+  "ract",
+  "ss",
+  "\u2581ass",
+  "\u2581met",
+  "gan",
+  "\u2581eng",
+  "\u2581stu",
+  "ween",
+  "arch",
+  "\u2581gl",
+  "\u2581cor",
+  "\u2581dr",
+  "vern",
+  "\u2581ty",
+  "\u2581run",
+  "hip",
+  "cus",
+  "cond",
+  "\u2581ins",
+  "irty",
+  "\u2581pub",
+  "lud",
+  "llow",
+  "\u2581cou",
+  "ew",
+  "iew",
+  "\u2581sur",
+  "ero",
+  "ood",
+  "ness",
+  "\u2581fun",
+  "\u2581eff",
+  "cept",
+  "\u2581ca",
+  "\u2581exp",
+  "duct",
+  "\u2581sw",
+  "ize",
+  "ope",
+  "\u2581par",
+  "kes",
+  "cy",
+  "\u2581ev",
+  "\u2581ref",
+  "ell",
+  "\u2581bus",
+  "ug",
+  "rib",
+  "\u2581cur",
+  "mo",
+  "ock",
+  "ures",
+  "air",
+  "\u2581war",
+  "str",
+  "\u2581med",
+  "\u2581wa",
+  "\u2581val",
+  "\u2581sin",
+  "blem",
+  "\u2581fam",
+  "li",
+  "\u2581far",
+  "\u2581cle",
+  "\u2581col",
+  "mon",
+  "\u2581gra",
+  "led",
+  "ense",
+  "tin",
+  "ues",
+  "its",
+  "\u2581mem",
+  "\u2581inf",
+  "\u2581eas",
+  "ideo",
+  "\u2581top",
+  "io",
+  "pan",
+  "\u2581hum",
+  "\u2581old",
+  "ead",
+  "\u2581ord",
+  "ric",
+  "ants",
+  "oy",
+  "esn",
+  "uck",
+  "ason",
+  "ced",
+  "ool",
+  "rat",
+  "ouse",
+  "\u2581lar",
+  "\u2581art",
+  "\u2581wee",
+  "\u2581cer",
+  "ized",
+  "\u2581mat",
+  "con",
+  "erg",
+  "land",
+  "ines",
+  "\u2581chr",
+  "\u2581aut",
+  "\u2581lea",
+  "\u2581sou",
+  "oney",
+  "tty",
+  "\u2581ple",
+  "ulat",
+  "oks",
+  "\u2581few",
+  "\u2581sol",
+  "\u2581che",
+  "chn",
+  "ird",
+  "\u2581bre",
+  "\u2581dur",
+  "\u2581wom",
+  "me",
+  "izat",
+  "eric",
+  "ote",
+  "\u2581uni",
+  "eren",
+  "arn",
+  "ross",
+  "ices",
+  "ten",
+  "eral",
+  "ever",
+  "ieve",
+  "lish",
+  "ash",
+  "\u2581opp",
+  "alth",
+  "ger",
+  "\u2581sk",
+  "\u2581red",
+  "peri",
+  "\u2581det",
+  "\u2581ext",
+  "ner",
+  "ah",
+  "\u2581var",
+  "\u2581loc",
+  "gram",
+  "ists",
+  "ives",
+  "\u2581es",
+  "\u2581nor",
+  "tro",
+  "ale",
+  "\u2581iss",
+  "\u2581pri",
+  "gin",
+  "az",
+  "oc",
+  "\u2581pop",
+  "ern",
+  "\u2581sit",
+  "ket",
+  "\u2581pa",
+  "\u2581law",
+  "ages",
+  "br",
+  "\u2581cam",
+  "\u2581mom",
+  "osed",
+  "\u2581bro",
+  "ne",
+  "bs",
+  "\u2581cre",
+  "erat",
+  "\u2581sec",
+  "\u2581cap",
+  "\u2581vis",
+  "\u2581pat",
+  "ield",
+  "iet",
+  "\u2581tri",
+  "up",
+  "\u2581bra",
+  "ts",
+  "\u2581mot",
+  "\u2581unt",
+  "put",
+  "bo",
+  "ork",
+  "mer",
+  "ital",
+  "\u2581air",
+  "ined",
+  "\u2581beh",
+  "\u2581adv",
+  "\u2581ret",
+  "imes",
+  "\u2581tea",
+  "ural",
+  "sid",
+  "ters",
+  "\u2581pur",
+  "\u2581sci",
+  "bers",
+  "ient",
+  "ier",
+  "cc",
+  "sw",
+  "\u2581av",
+  "reen",
+  "ode",
+  "ont",
+  "\u2581dra",
+  "ann",
+  "nect",
+  "\u2581x",
+  "\u2581eu",
+  "ton",
+  "inat",
+  "ene",
+  "ared",
+  "els",
+  "\u2581mor",
+  "\u2581rat",
+  "cri",
+  "\u2581men",
+  "\u2581ah",
+  "ames",
+  "\u2581arm",
+  "eak",
+  "\u2581pay",
+  "\u2581hal",
+  "ins",
+  "ilit",
+  "stit",
+  "\u2581ra",
+  "\u2581leg",
+  "cl",
+  "pr",
+  "\u2581wal",
+  "\u2581bad",
+  "\u2581ge",
+  "roup",
+  "\u2581mus",
+  "man",
+  "\u2581gi",
+  "eds",
+  "\u2581aw",
+  "po",
+  "ark",
+  "row",
+  "\u2581dep",
+  "ully",
+  "ral",
+  "lect",
+  "pend",
+  "\u2581sev",
+  "ime",
+  "gest",
+  "here",
+  "\u2581yet",
+  "ted",
+  "\u2581rev",
+  "ds",
+  "\u2581ask",
+  "less",
+  "\u2581di",
+  "ets",
+  "line",
+  "\u2581aff",
+  "ired",
+  "\u2581est",
+  "ken",
+  "vid",
+  "most",
+  "ivid",
+  "unch",
+  "par",
+  "med",
+  "rop",
+  "ased",
+  "eone",
+  "\u2581ve",
+  "\u2581abs",
+  "ergy",
+  "ret",
+  "\u2581saw",
+  "\u2581ey",
+  "\u2581cal",
+  "uat",
+  "\u2581mid",
+  "vat",
+  "ream",
+  "vice",
+  "ians",
+  "rent",
+  "ctor",
+  "err",
+  "ush",
+  "ases",
+  "\u2581suc",
+  "erms",
+  "ave",
+  "angu",
+  "ries",
+  "\u2581wo",
+  "arts",
+  "\u2581fil",
+  "\u2581fat",
+  "\u2581cho",
+  "orts",
+  "\u2581fre",
+  "ee",
+  "ught",
+  "eng",
+  "ump",
+  "\u2581bar",
+  "ying",
+  "ane",
+  "\u2581tem",
+  "anks",
+  "ury",
+  "iat",
+  "mit",
+  "trol",
+  "\u2581net",
+  "\u2581maj",
+  "\u2581cra",
+  "ling",
+  "\u2581fig",
+  "orn",
+  "icat",
+  "pany",
+  "\u2581occ",
+  "ott",
+  "ands",
+  "\u2581exc",
+  "\u2581mr",
+  "ency",
+  "rope",
+  "itch",
+  "\u2581lit",
+  "abil",
+  "not",
+  "ma",
+  "\u2581typ",
+  "\u2581opt",
+  "ob",
+  "ser",
+  "ety",
+  "ms",
+  "peci",
+  "aces",
+  "aut",
+  "\u2581hon",
+  "cuss",
+  "\u2581sal",
+  "\u2581sor",
+  "att",
+  "\u2581lab",
+  "\u2581har",
+  "urch",
+  "nded",
+  "uce",
+  "ids",
+  "\u2581hy",
+  "\u2581fut",
+  "\u2581ste",
+  "ours",
+  "ems",
+  "utes",
+  "ng",
+  "ta",
+  "\u2581won",
+  "\u2581fa",
+  "\u2581env",
+  "ards",
+  "\u2581job",
+  "ium",
+  "\u2581dot",
+  "\u2581obv",
+  "ina",
+  "side",
+  "elve",
+  "cu",
+  "\u2581jes",
+  "\u2581pot",
+  "\u2581pie",
+  "\u2581tre",
+  "\u2581hey",
+  "\u2581mag",
+  "ron",
+  "\u2581key",
+  "swer",
+  "\u2581win",
+  "ucat",
+  "work",
+  "ides",
+  "\u2581low",
+  "\u2581vol",
+  "\u2581oth",
+  "atic",
+  "lf",
+  "ads",
+  "inds",
+  "com",
+  "ths",
+  "\u2581ver",
+  "ised",
+  "lo",
+  "\u2581squ",
+  "\u2581cut",
+  "oked",
+  "irit",
+  "ateg",
+  "ppy",
+  "mitt",
+  "come",
+  "hn",
+  "igin",
+  "mand",
+  "\u2581dam",
+  "ho",
+  "\u2581da",
+  "\u2581fur",
+  "iron",
+  "ilar",
+  "\u2581fac",
+  "\u2581neg",
+  "\u2581ago",
+  "ged",
+  "miss",
+  "enth",
+  "\u2581dou",
+  "\u2581hit",
+  "\u2581guy",
+  "\u2581bi",
+  "ove",
+  "fess",
+  "ples",
+  "owed",
+  "ured",
+  "\u2581ris",
+  "ints",
+  "rew",
+  "\u2581sum",
+  "\u2581hu",
+  "ploy",
+  "ude",
+  "ried",
+  "\u2581cir",
+  "\u2581dev",
+  "ear",
+  "\u2581tot",
+  "\u2581ann",
+  "duc",
+  "ik",
+  "pon",
+  "sted",
+  "\u2581ide",
+  "\u2581'",
+  "ipp",
+  "\u2581eat",
+  "\u2581dom",
+  "\u2581",
+  "e",
+  "t",
+  "o",
+  "a",
+  "i",
+  "n",
+  "s",
+  "r",
+  "h",
+  "l",
+  "d",
+  "u",
+  "c",
+  "m",
+  "y",
+  "g",
+  "w",
+  "f",
+  "p",
+  "b",
+  "v",
+  "k",
+  "'",
+  "j",
+  "x",
+  "q",
+  "z",
+  "<EOU>",
+  "<EOB>"
+]