alexwengg commited on Dec 31, 2025

Commit

fe9b550

verified ·

1 Parent(s): 31bfda7

Upload 23 files

Browse files

Files changed (23) hide show

CTCHead.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
CTCHead.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
CTCHead.mlpackage/Manifest.json +18 -0
Decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
Decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
Decoder.mlpackage/Manifest.json +18 -0
Encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
Encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
Encoder.mlpackage/Manifest.json +18 -0
JointDecision.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
JointDecision.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
JointDecision.mlpackage/Manifest.json +18 -0
Preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
Preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
Preprocessor.mlpackage/Manifest.json +18 -0
README.md +164 -0
__init__.py +5 -0
convert_nemo_to_coreml.py +365 -0
inference.py +304 -0
metadata.json +18 -0
pyproject.toml +65 -0
uv.lock +0 -0
vocab.json +1 -1

CTCHead.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb619400f0ca4a5873c5f2bf7bf78a645944d3f4acd544bed689a7a420f4634
+size 2048

CTCHead.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb9bead064427ffcb7529c0e3f378e421b4dde8e6d81447b6d1ca3352ca850e1
+size 1051842

CTCHead.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "174E9828-F0D9-496B-B767-165878007DCB": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "ABA1F560-2FDF-40A2-BB0D-DE27A2824BED": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "ABA1F560-2FDF-40A2-BB0D-DE27A2824BED"
+}

Decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95c442cea0d8d78e3de7c45e6a0502a7284b783915971f161a0e58a4e1fa7153
+size 8544

Decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5c4adf473c11e8c86daae6da87dbf4a0bf1c8b716fdd4a9378906208b41381
+size 7872384

Decoder.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "7BB82BEE-DB48-4BA0-8F0E-AC39162FD7F3": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "E80560F7-8F68-462F-8B00-ADCB6B6F88F7": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "E80560F7-8F68-462F-8B00-ADCB6B6F88F7"
+}

Encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:987981947a1239c7d9d1936168534058ddeb39e0da2bb0b36f91381f00183b1e
+size 492504

Encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cecf7994b2758397d992802a4f6e5d656e3a1aeb7bbedc2aa430b1316d62474c
+size 215143424

Encoder.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "020BD32E-EB4A-4192-B46F-8CFA4932627D": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "242DF9D4-730A-4735-97CD-5C4C16E79595": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "242DF9D4-730A-4735-97CD-5C4C16E79595"
+}

JointDecision.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f0f6c8cb9481b7d303268c34ddd7fb9e69e1cfda9880c1dce06a64539cb389
+size 8788

JointDecision.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f771cb65b190f1873e39629676ed79b65a8361522f451b37bdba8b1106e6ff
+size 2798028

JointDecision.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "82C059F0-F2A7-4566-B14C-7BC1F1E136E2": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        },
+        "D30A2839-E7D8-40D0-AF8B-72C2EF998325": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        }
+    },
+    "rootModelIdentifier": "82C059F0-F2A7-4566-B14C-7BC1F1E136E2"
+}

Preprocessor.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:432a9ebe9eb0bee221560ed7bfef5278fb907652e4e0f20ba03b997c394a9335
+size 19924

Preprocessor.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1609930989479ea65e0608b2cd6c54fef7f1623cc240cd6d993e24e2491133ac
+size 807968

Preprocessor.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "C99314D3-C9C5-4FAD-8419-E34671E6E467": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "E684B627-55A9-4175-92F1-FA535236EE66": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "E684B627-55A9-4175-92F1-FA535236EE66"
+}

README.md ADDED Viewed

	@@ -0,0 +1,164 @@

+# Parakeet-TDT-CTC-110M CoreML
+NVIDIA's Parakeet-TDT-CTC-110M model converted to CoreML format for efficient inference on Apple Silicon.
+## Model Description
+This is a hybrid ASR model with a shared Conformer encoder and two decoder heads:
+- **CTC Head**: Fast greedy decoding, ideal for keyword spotting
+- **TDT Head**: Token-Duration Transducer for high-quality transcription
+### Architecture
+| Component | Description | Size |
+|-----------|-------------|------|
+| Preprocessor | Mel spectrogram extraction | ~1 MB |
+| Encoder | Conformer encoder (shared) | ~400 MB |
+| CTCHead | CTC output projection | ~4 MB |
+| Decoder | TDT prediction network (LSTM) | ~25 MB |
+| JointDecision | TDT joint network | ~6 MB |
+**Total size**: ~436 MB
+### Performance
+Benchmarked on Earnings22 dataset (772 audio files):
+| Metric | Value |
+|--------|-------|
+| Keyword Recall | 100% (1309/1309) |
+| WER | 17.97% |
+| RTFx (M4 Pro) | 358x real-time |
+## Requirements
+- macOS 13+ (Ventura or later)
+- Apple Silicon (M1/M2/M3/M4)
+- Python 3.10+
+## Installation
+```bash
+# Using uv (recommended)
+uv sync
+# Or using pip
+pip install -e .
+# For audio file support (WAV, MP3, etc.)
+pip install -e ".[audio]"
+```
+## Usage
+### Python Inference
+```python
+from scripts.inference import ParakeetCoreML
+# Load model
+model = ParakeetCoreML("./model")
+# Transcribe with TDT (higher quality)
+text = model.transcribe("audio.wav", mode="tdt")
+print(text)
+# Or use CTC for faster keyword spotting
+text = model.transcribe("audio.wav", mode="ctc")
+print(text)
+```
+### Command Line
+```bash
+# TDT decoding (default, higher quality)
+uv run scripts/inference.py --audio audio.wav --model-dir ./model
+# CTC decoding (faster, good for keyword spotting)
+uv run scripts/inference.py --audio audio.wav --model-dir ./model --mode ctc
+```
+## Model Conversion
+To convert from the original NeMo model:
+```bash
+# Install conversion dependencies
+uv sync --extra convert
+# Run conversion
+uv run scripts/convert_nemo_to_coreml.py --output-dir ./model
+```
+This will:
+1. Download the original model from NVIDIA (`nvidia/parakeet-tdt_ctc-110m`)
+2. Convert each component to CoreML format
+3. Extract vocabulary and create metadata
+## File Structure
+```
+model/
+├── Preprocessor.mlpackage    # Audio → Mel spectrogram
+├── Encoder.mlpackage         # Mel → Encoder features
+├── CTCHead.mlpackage         # Encoder → CTC log probs
+├── Decoder.mlpackage         # TDT prediction network
+├── JointDecision.mlpackage   # TDT joint network
+├── vocab.json                # Token vocabulary (1024 tokens)
+└── metadata.json             # Model configuration
+```
+## Decoding Modes
+### TDT Mode (Recommended for Transcription)
+- Uses Token-Duration Transducer decoding
+- Higher accuracy (17.97% WER)
+- Predicts both tokens and durations
+- Best for full transcription tasks
+### CTC Mode (Recommended for Keyword Spotting)
+- Greedy CTC decoding
+- Faster inference
+- 100% keyword recall on Earnings22
+- Best for detecting specific words/phrases
+## Custom Vocabulary / Keyword Spotting
+For keyword spotting, CTC mode with custom vocabulary boosting achieves 100% recall:
+```python
+# Load custom vocabulary with token IDs
+with open("custom_vocab.json") as f:
+    keywords = json.load(f)  # {"keyword": [token_ids], ...}
+# Run CTC decoding
+tokens = model.decode_ctc(encoder_output)
+# Check for keyword matches
+for keyword, expected_ids in keywords.items():
+    if is_subsequence(expected_ids, tokens):
+        print(f"Found keyword: {keyword}")
+```
+## License
+This model conversion is released under the Apache 2.0 License, same as the original NVIDIA model.
+## Citation
+If you use this model, please cite the original NVIDIA work:
+```bibtex
+@misc{nvidia_parakeet_tdt_ctc,
+  title={Parakeet-TDT-CTC-110M},
+  author={NVIDIA},
+  year={2024},
+  publisher={Hugging Face},
+  url={https://huggingface.co/nvidia/parakeet-tdt_ctc-110m}
+}
+```
+## Acknowledgments
+- Original model by [NVIDIA NeMo](https://github.com/NVIDIA/NeMo)
+- CoreML conversion by [FluidInference](https://github.com/FluidInference)

__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Parakeet-TDT-CTC-110M CoreML scripts."""
+from .inference import ParakeetCoreML
+__all__ = ["ParakeetCoreML"]

convert_nemo_to_coreml.py ADDED Viewed

	@@ -0,0 +1,365 @@

+#!/usr/bin/env python3
+"""
+Convert NVIDIA Parakeet-TDT-CTC-110M model from NeMo to CoreML format.
+This script downloads the original NeMo model from NVIDIA and converts it to
+CoreML format with separate components for efficient Apple Silicon inference.
+Components:
+- Preprocessor: Audio preprocessing (mel spectrogram)
+- Encoder: Conformer encoder (shared between TDT and CTC)
+- CTCHead: CTC output head for keyword spotting
+- Decoder: TDT decoder LSTM
+- JointDecision: TDT joint network for token + duration prediction
+Usage:
+    uv run scripts/convert_nemo_to_coreml.py --output-dir ./model
+Requirements:
+    - Python 3.10+
+    - PyTorch
+    - NeMo toolkit
+    - coremltools
+"""
+import argparse
+import json
+import os
+import shutil
+from pathlib import Path
+import coremltools as ct
+import numpy as np
+import torch
+import torch.nn as nn
+from nemo.collections.asr.models import EncDecHybridRNNTCTCModel
+def load_nemo_model(model_name: str = "nvidia/parakeet-tdt_ctc-110m") -> EncDecHybridRNNTCTCModel:
+    """Load the NeMo model from NVIDIA."""
+    print(f"Loading NeMo model: {model_name}")
+    model = EncDecHybridRNNTCTCModel.from_pretrained(model_name)
+    model.eval()
+    return model
+class PreprocessorWrapper(nn.Module):
+    """Wrapper for mel spectrogram preprocessing."""
+    def __init__(self, preprocessor):
+        super().__init__()
+        self.preprocessor = preprocessor
+    def forward(self, audio_signal: torch.Tensor, audio_length: torch.Tensor):
+        processed_signal, processed_length = self.preprocessor(
+            input_signal=audio_signal,
+            length=audio_length
+        )
+        return processed_signal, processed_length
+class EncoderWrapper(nn.Module):
+    """Wrapper for Conformer encoder."""
+    def __init__(self, encoder):
+        super().__init__()
+        self.encoder = encoder
+    def forward(self, mel: torch.Tensor, mel_length: torch.Tensor):
+        encoded, encoded_length = self.encoder(
+            audio_signal=mel,
+            length=mel_length
+        )
+        return encoded, encoded_length
+class CTCHeadWrapper(nn.Module):
+    """Wrapper for CTC output head."""
+    def __init__(self, ctc_decoder):
+        super().__init__()
+        self.ctc_decoder = ctc_decoder
+    def forward(self, encoder_output: torch.Tensor):
+        # CTC head outputs log probabilities over vocabulary
+        logits = self.ctc_decoder.decoder_layers(encoder_output)
+        log_probs = torch.log_softmax(logits, dim=-1)
+        return log_probs
+class DecoderWrapper(nn.Module):
+    """Wrapper for TDT decoder LSTM."""
+    def __init__(self, decoder):
+        super().__init__()
+        self.decoder = decoder
+        self.hidden_size = decoder.pred_hidden
+    def forward(self, targets: torch.Tensor, target_length: torch.Tensor,
+                h_in: torch.Tensor, c_in: torch.Tensor):
+        # Run decoder LSTM
+        output, (h_out, c_out) = self.decoder.prediction_network.lstm(
+            self.decoder.prediction_network.embed(targets),
+            (h_in, c_in)
+        )
+        # Project to decoder hidden dimension
+        decoder_output = self.decoder.prediction_network.dec_out(output)
+        return decoder_output, h_out, c_out
+class JointDecisionWrapper(nn.Module):
+    """Wrapper for joint network that predicts token and duration."""
+    def __init__(self, joint):
+        super().__init__()
+        self.joint = joint
+    def forward(self, encoder_step: torch.Tensor, decoder_step: torch.Tensor):
+        # Combine encoder and decoder features
+        combined = self.joint.joint_net(
+            torch.cat([encoder_step, decoder_step], dim=-1)
+        )
+        # Token prediction
+        token_logits = self.joint.joint(combined)
+        token_id = torch.argmax(token_logits, dim=-1)
+        token_prob = torch.softmax(token_logits, dim=-1).max(dim=-1).values
+        # Duration prediction
+        duration_logits = self.joint.tdt_joint(combined)
+        duration_bin = torch.argmax(duration_logits, dim=-1)
+        return token_id, token_prob, duration_bin
+def convert_preprocessor(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Convert preprocessor to CoreML."""
+    print("Converting Preprocessor...")
+    wrapper = PreprocessorWrapper(model.preprocessor)
+    wrapper.eval()
+    # Sample inputs
+    audio = torch.randn(1, 240000)  # 15 seconds at 16kHz
+    length = torch.tensor([240000])
+    traced = torch.jit.trace(wrapper, (audio, length))
+    mlmodel = ct.convert(
+        traced,
+        inputs=[
+            ct.TensorType(name="audio_signal", shape=(1, ct.RangeDim(16000, 240000))),
+            ct.TensorType(name="audio_length", shape=(1,)),
+        ],
+        outputs=[
+            ct.TensorType(name="mel"),
+            ct.TensorType(name="mel_length"),
+        ],
+        minimum_deployment_target=ct.target.iOS16,
+    )
+    mlmodel.save(output_dir / "Preprocessor.mlpackage")
+    print("  Saved Preprocessor.mlpackage")
+def convert_encoder(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Convert encoder to CoreML."""
+    print("Converting Encoder...")
+    wrapper = EncoderWrapper(model.encoder)
+    wrapper.eval()
+    # Sample inputs (mel spectrogram shape)
+    mel = torch.randn(1, 80, 1500)  # 80 mel bins, ~15 seconds
+    mel_length = torch.tensor([1500])
+    traced = torch.jit.trace(wrapper, (mel, mel_length))
+    mlmodel = ct.convert(
+        traced,
+        inputs=[
+            ct.TensorType(name="mel", shape=(1, 80, ct.RangeDim(100, 1500))),
+            ct.TensorType(name="mel_length", shape=(1,)),
+        ],
+        outputs=[
+            ct.TensorType(name="encoder"),
+            ct.TensorType(name="encoder_length"),
+        ],
+        minimum_deployment_target=ct.target.iOS16,
+    )
+    mlmodel.save(output_dir / "Encoder.mlpackage")
+    print("  Saved Encoder.mlpackage")
+def convert_ctc_head(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Convert CTC head to CoreML."""
+    print("Converting CTCHead...")
+    wrapper = CTCHeadWrapper(model.ctc_decoder)
+    wrapper.eval()
+    # Sample input (encoder output)
+    encoder_output = torch.randn(1, 188, 512)  # batch, time, hidden
+    traced = torch.jit.trace(wrapper, encoder_output)
+    mlmodel = ct.convert(
+        traced,
+        inputs=[
+            ct.TensorType(name="encoder_output", shape=(1, ct.RangeDim(10, 300), 512)),
+        ],
+        outputs=[
+            ct.TensorType(name="ctc_log_probs"),
+        ],
+        minimum_deployment_target=ct.target.iOS16,
+    )
+    mlmodel.save(output_dir / "CTCHead.mlpackage")
+    print("  Saved CTCHead.mlpackage")
+def convert_decoder(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Convert decoder to CoreML."""
+    print("Converting Decoder...")
+    wrapper = DecoderWrapper(model.decoder)
+    wrapper.eval()
+    hidden_size = wrapper.hidden_size
+    num_layers = model.decoder.pred_num_layers
+    # Sample inputs
+    targets = torch.zeros(1, 1, dtype=torch.long)
+    target_length = torch.tensor([1])
+    h_in = torch.zeros(num_layers, 1, hidden_size)
+    c_in = torch.zeros(num_layers, 1, hidden_size)
+    traced = torch.jit.trace(wrapper, (targets, target_length, h_in, c_in))
+    mlmodel = ct.convert(
+        traced,
+        inputs=[
+            ct.TensorType(name="targets", shape=(1, 1)),
+            ct.TensorType(name="target_length", shape=(1,)),
+            ct.TensorType(name="h_in", shape=(num_layers, 1, hidden_size)),
+            ct.TensorType(name="c_in", shape=(num_layers, 1, hidden_size)),
+        ],
+        outputs=[
+            ct.TensorType(name="decoder"),
+            ct.TensorType(name="h_out"),
+            ct.TensorType(name="c_out"),
+        ],
+        minimum_deployment_target=ct.target.iOS16,
+    )
+    mlmodel.save(output_dir / "Decoder.mlpackage")
+    print("  Saved Decoder.mlpackage")
+def convert_joint(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Convert joint network to CoreML."""
+    print("Converting JointDecision...")
+    wrapper = JointDecisionWrapper(model.joint)
+    wrapper.eval()
+    # Sample inputs
+    encoder_step = torch.randn(1, 512, 1)
+    decoder_step = torch.randn(1, 640, 1)
+    traced = torch.jit.trace(wrapper, (encoder_step, decoder_step))
+    mlmodel = ct.convert(
+        traced,
+        inputs=[
+            ct.TensorType(name="encoder_step", shape=(1, 512, 1)),
+            ct.TensorType(name="decoder_step", shape=(1, 640, 1)),
+        ],
+        outputs=[
+            ct.TensorType(name="token_id"),
+            ct.TensorType(name="token_prob"),
+            ct.TensorType(name="duration_bin"),
+        ],
+        minimum_deployment_target=ct.target.iOS16,
+    )
+    mlmodel.save(output_dir / "JointDecision.mlpackage")
+    print("  Saved JointDecision.mlpackage")
+def extract_vocabulary(model: EncDecHybridRNNTCTCModel, output_dir: Path):
+    """Extract vocabulary from model."""
+    print("Extracting vocabulary...")
+    vocab = model.decoding.decoding.vocabulary
+    vocab_dict = {i: token for i, token in enumerate(vocab)}
+    with open(output_dir / "vocab.json", "w") as f:
+        json.dump(vocab_dict, f, indent=2, ensure_ascii=False)
+    print(f"  Saved vocab.json ({len(vocab_dict)} tokens)")
+    return len(vocab_dict)
+def create_metadata(model: EncDecHybridRNNTCTCModel, output_dir: Path, vocab_size: int):
+    """Create metadata file."""
+    print("Creating metadata...")
+    metadata = {
+        "model_id": "nvidia/parakeet-tdt_ctc-110m",
+        "sample_rate": 16000,
+        "max_audio_seconds": 15.0,
+        "max_audio_samples": 240000,
+        "vocab_size": vocab_size,
+        "vocab_with_blank": vocab_size + 1,
+        "num_extra": 5,  # TDT duration bins
+        "decoder_hidden_dim": model.decoder.pred_hidden,
+        "decoder_num_layers": model.decoder.pred_num_layers,
+        "components": {
+            "preprocessor": "Preprocessor.mlpackage",
+            "encoder": "Encoder.mlpackage",
+            "ctc_head": "CTCHead.mlpackage",
+            "decoder": "Decoder.mlpackage",
+            "joint_decision": "JointDecision.mlpackage"
+        }
+    }
+    with open(output_dir / "metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    print("  Saved metadata.json")
+def main():
+    parser = argparse.ArgumentParser(description="Convert Parakeet-TDT-CTC-110M to CoreML")
+    parser.add_argument("--output-dir", type=str, default="./model",
+                        help="Output directory for CoreML models")
+    parser.add_argument("--model-name", type=str, default="nvidia/parakeet-tdt_ctc-110m",
+                        help="NeMo model name or path")
+    args = parser.parse_args()
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Load model
+    model = load_nemo_model(args.model_name)
+    # Convert components
+    convert_preprocessor(model, output_dir)
+    convert_encoder(model, output_dir)
+    convert_ctc_head(model, output_dir)
+    convert_decoder(model, output_dir)
+    convert_joint(model, output_dir)
+    # Extract vocabulary and create metadata
+    vocab_size = extract_vocabulary(model, output_dir)
+    create_metadata(model, output_dir, vocab_size)
+    print(f"\nConversion complete! Models saved to: {output_dir}")
+    print("\nTo compile models for Apple Silicon:")
+    print("  xcrun coremlcompiler compile Encoder.mlpackage Encoder.mlmodelc")
+if __name__ == "__main__":
+    main()

inference.py ADDED Viewed

	@@ -0,0 +1,304 @@

+#!/usr/bin/env python3
+"""
+Inference script for Parakeet-TDT-CTC-110M CoreML model.
+This script demonstrates how to run inference using the converted CoreML models
+on Apple Silicon. It supports both TDT (Token-Duration Transducer) decoding for
+full transcription and CTC decoding for keyword spotting.
+Usage:
+    uv run scripts/inference.py --audio audio.wav --mode tdt
+    uv run scripts/inference.py --audio audio.wav --mode ctc
+Requirements:
+    - macOS 13+ with Apple Silicon
+    - Python 3.10+
+    - coremltools
+"""
+import argparse
+import json
+from pathlib import Path
+import coremltools as ct
+import numpy as np
+class ParakeetCoreML:
+    """CoreML inference wrapper for Parakeet-TDT-CTC-110M."""
+    def __init__(self, model_dir: str):
+        """Load CoreML models from directory.
+        Args:
+            model_dir: Path to directory containing .mlpackage files
+        """
+        self.model_dir = Path(model_dir)
+        # Load metadata
+        with open(self.model_dir / "metadata.json") as f:
+            self.metadata = json.load(f)
+        # Load vocabulary
+        with open(self.model_dir / "vocab.json") as f:
+            vocab_dict = json.load(f)
+            self.vocab = {int(k): v for k, v in vocab_dict.items()}
+        self.blank_id = len(self.vocab)  # Blank token is last
+        # Load models
+        print("Loading CoreML models...")
+        self.preprocessor = ct.models.MLModel(
+            str(self.model_dir / "Preprocessor.mlpackage")
+        )
+        self.encoder = ct.models.MLModel(
+            str(self.model_dir / "Encoder.mlpackage")
+        )
+        self.ctc_head = ct.models.MLModel(
+            str(self.model_dir / "CTCHead.mlpackage")
+        )
+        self.decoder = ct.models.MLModel(
+            str(self.model_dir / "Decoder.mlpackage")
+        )
+        self.joint = ct.models.MLModel(
+            str(self.model_dir / "JointDecision.mlpackage")
+        )
+        print("Models loaded successfully.")
+    def load_audio(self, audio_path: str) -> np.ndarray:
+        """Load audio file and convert to 16kHz mono.
+        Args:
+            audio_path: Path to audio file (WAV, MP3, etc.)
+        Returns:
+            Audio samples as float32 numpy array
+        """
+        try:
+            import librosa
+            audio, sr = librosa.load(audio_path, sr=16000, mono=True)
+            return audio.astype(np.float32)
+        except ImportError:
+            # Fallback to scipy for WAV files
+            from scipy.io import wavfile
+            sr, audio = wavfile.read(audio_path)
+            # Convert to mono if stereo
+            if len(audio.shape) > 1:
+                audio = audio.mean(axis=1)
+            # Resample if needed
+            if sr != 16000:
+                from scipy import signal
+                num_samples = int(len(audio) * 16000 / sr)
+                audio = signal.resample(audio, num_samples)
+            # Normalize to float32 [-1, 1]
+            if audio.dtype == np.int16:
+                audio = audio.astype(np.float32) / 32768.0
+            elif audio.dtype == np.int32:
+                audio = audio.astype(np.float32) / 2147483648.0
+            return audio.astype(np.float32)
+    def preprocess(self, audio: np.ndarray) -> tuple[np.ndarray, int]:
+        """Convert audio to mel spectrogram.
+        Args:
+            audio: Audio samples as float32 array
+        Returns:
+            Tuple of (mel spectrogram, mel length)
+        """
+        audio_signal = audio.reshape(1, -1).astype(np.float32)
+        audio_length = np.array([len(audio)], dtype=np.int32)
+        result = self.preprocessor.predict({
+            "audio_signal": audio_signal,
+            "audio_length": audio_length
+        })
+        return result["mel"], int(result["mel_length"][0])
+    def encode(self, mel: np.ndarray, mel_length: int) -> tuple[np.ndarray, int]:
+        """Run encoder on mel spectrogram.
+        Args:
+            mel: Mel spectrogram from preprocessor
+            mel_length: Length of mel spectrogram
+        Returns:
+            Tuple of (encoder output, encoder length)
+        """
+        result = self.encoder.predict({
+            "mel": mel,
+            "mel_length": np.array([mel_length], dtype=np.int32)
+        })
+        return result["encoder"], int(result["encoder_length"][0])
+    def decode_ctc(self, encoder_output: np.ndarray) -> list[int]:
+        """CTC greedy decoding.
+        Args:
+            encoder_output: Output from encoder
+        Returns:
+            List of token IDs (with duplicates and blanks removed)
+        """
+        result = self.ctc_head.predict({"encoder_output": encoder_output})
+        log_probs = result["ctc_log_probs"]
+        # Greedy decoding: take argmax at each timestep
+        predictions = np.argmax(log_probs[0], axis=-1)
+        # Remove duplicates and blanks
+        tokens = []
+        prev_token = self.blank_id
+        for token in predictions:
+            if token != self.blank_id and token != prev_token:
+                tokens.append(int(token))
+            prev_token = token
+        return tokens
+    def decode_tdt(self, encoder_output: np.ndarray, encoder_length: int) -> list[int]:
+        """TDT (Token-Duration Transducer) decoding.
+        Args:
+            encoder_output: Output from encoder
+            encoder_length: Length of encoder output
+        Returns:
+            List of token IDs
+        """
+        hidden_size = self.metadata["decoder_hidden_dim"]
+        num_layers = self.metadata["decoder_num_layers"]
+        # Initialize decoder state
+        h = np.zeros((num_layers, 1, hidden_size), dtype=np.float32)
+        c = np.zeros((num_layers, 1, hidden_size), dtype=np.float32)
+        # Start with blank token
+        targets = np.zeros((1, 1), dtype=np.int32)
+        target_length = np.array([1], dtype=np.int32)
+        tokens = []
+        frame = 0
+        max_tokens = 1000  # Safety limit
+        while frame < encoder_length and len(tokens) < max_tokens:
+            # Get decoder output
+            decoder_result = self.decoder.predict({
+                "targets": targets,
+                "target_length": target_length,
+                "h_in": h,
+                "c_in": c
+            })
+            decoder_output = decoder_result["decoder"]
+            h = decoder_result["h_out"]
+            c = decoder_result["c_out"]
+            # Get encoder step
+            encoder_step = encoder_output[0, frame:frame+1, :].T.reshape(1, -1, 1)
+            decoder_step = decoder_output.T.reshape(1, -1, 1)
+            # Joint prediction
+            joint_result = self.joint.predict({
+                "encoder_step": encoder_step.astype(np.float32),
+                "decoder_step": decoder_step.astype(np.float32)
+            })
+            token_id = int(joint_result["token_id"])
+            duration_bin = int(joint_result["duration_bin"])
+            # Duration bins: 0=0, 1=1, 2=2, 3=3, 4=4+
+            durations = [0, 1, 2, 3, 4]
+            duration = durations[min(duration_bin, 4)]
+            if token_id != self.blank_id:
+                tokens.append(token_id)
+                # Update decoder input
+                targets = np.array([[token_id]], dtype=np.int32)
+            # Advance by duration (minimum 1 frame)
+            frame += max(1, duration)
+        return tokens
+    def tokens_to_text(self, tokens: list[int]) -> str:
+        """Convert token IDs to text.
+        Args:
+            tokens: List of token IDs
+        Returns:
+            Decoded text string
+        """
+        pieces = [self.vocab.get(t, "") for t in tokens]
+        # Join and handle SentencePiece encoding
+        text = "".join(pieces).replace("▁", " ").strip()
+        return text
+    def transcribe(self, audio_path: str, mode: str = "tdt") -> str:
+        """Transcribe audio file.
+        Args:
+            audio_path: Path to audio file
+            mode: Decoding mode - "tdt" for full transcription, "ctc" for keyword spotting
+        Returns:
+            Transcribed text
+        """
+        # Load and preprocess audio
+        audio = self.load_audio(audio_path)
+        mel, mel_length = self.preprocess(audio)
+        # Encode
+        encoder_output, encoder_length = self.encode(mel, mel_length)
+        # Decode
+        if mode == "ctc":
+            tokens = self.decode_ctc(encoder_output)
+        else:
+            tokens = self.decode_tdt(encoder_output, encoder_length)
+        # Convert to text
+        text = self.tokens_to_text(tokens)
+        return text
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run inference with Parakeet-TDT-CTC-110M CoreML model"
+    )
+    parser.add_argument(
+        "--audio", type=str, required=True,
+        help="Path to audio file (WAV, MP3, etc.)"
+    )
+    parser.add_argument(
+        "--model-dir", type=str, default="./model",
+        help="Directory containing CoreML model files"
+    )
+    parser.add_argument(
+        "--mode", type=str, choices=["tdt", "ctc"], default="tdt",
+        help="Decoding mode: 'tdt' for transcription, 'ctc' for keyword spotting"
+    )
+    args = parser.parse_args()
+    # Load model
+    model = ParakeetCoreML(args.model_dir)
+    # Transcribe
+    print(f"\nTranscribing: {args.audio}")
+    print(f"Mode: {args.mode.upper()}")
+    print("-" * 40)
+    text = model.transcribe(args.audio, mode=args.mode)
+    print(f"Result: {text}")
+if __name__ == "__main__":
+    main()

metadata.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_id": "nvidia/parakeet-tdt_ctc-110m",
+  "sample_rate": 16000,
+  "max_audio_seconds": 15.0,
+  "max_audio_samples": 240000,
+  "vocab_size": 1024,
+  "vocab_with_blank": 1025,
+  "num_extra": 5,
+  "decoder_hidden_dim": 640,
+  "decoder_num_layers": 1,
+  "components": {
+    "preprocessor": "Preprocessor.mlpackage",
+    "encoder": "Encoder.mlpackage",
+    "ctc_head": "CTCHead.mlpackage",
+    "decoder": "Decoder.mlpackage",
+    "joint_decision": "JointDecision.mlpackage"
+  }
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,65 @@

+[project]
+name = "parakeet-tdt-ctc-110m-coreml"
+version = "1.0.0"
+description = "NVIDIA Parakeet-TDT-CTC-110M converted to CoreML format for Apple Silicon"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+authors = [
+    { name = "FluidInference" }
+]
+keywords = ["asr", "speech-recognition", "coreml", "apple-silicon", "nvidia", "parakeet"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: MacOS",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Multimedia :: Sound/Audio :: Speech",
+]
+dependencies = [
+    "coremltools>=7.0",
+    "numpy>=1.24.0",
+    "scipy>=1.10.0",
+]
+[project.optional-dependencies]
+convert = [
+    "torch>=2.0.0",
+    "nemo-toolkit[asr]>=1.20.0",
+]
+audio = [
+    "librosa>=0.10.0",
+]
+dev = [
+    "pytest>=7.0.0",
+    "ruff>=0.1.0",
+]
+[project.urls]
+Homepage = "https://huggingface.co/FluidInference/parakeet-tdt-ctc-110m-coreml"
+Repository = "https://huggingface.co/FluidInference/parakeet-tdt-ctc-110m-coreml"
+Issues = "https://github.com/FluidInference/fluidaudio/issues"
+[project.scripts]
+parakeet-inference = "scripts.inference:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["scripts"]
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W"]
+ignore = ["E501"]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.json CHANGED Viewed

@@ -1 +1 @@

- {"0": "<unk>", "1": " t", "2": " th", "3": " a", "4": "in", "5": "re", "6": " the", "7": " w", "8": " s", "9": " o", "10": "er", "11": "ou", "12": "at", "13": "nd", "14": "it", "15": " h", "16": " c", "17": " b", "18": "is", "19": "en", "20": "on", "21": "ing", "22": " f", "23": " to", "24": " m", "25": "es", "26": " p", "27": "or", "28": "an", "29": " d", "30": "ll", "31": " I", "32": "ed", "33": " and", "34": " l", "35": " of", "36": " in", "37": " y", "38": "ar", "39": " g", "40": " you", "41": "as", "42": "om", "43": " n", "44": "ve", "45": " that", "46": "le", "47": "ic", "48": "us", "49": "ow", "50": "et", "51": "al", "52": " e", "53": "ut", "54": " it", "55": "ot", "56": " be", "57": " T", "58": "ion", "59": " is", "60": " wh", "61": " re", "62": " on", "63": " we", "64": "ent", "65": " A", "66": "ay", "67": " ha", "68": " Th", "69": "id", "70": " S", "71": "ac", "72": "gh", "73": "ver", "74": "ke", "75": " for", "76": "im", "77": "ly", "78": "ur", "79": "ld", "80": " he", "81": " st", "82": "all", "83": "ro", "84": "st", "85": "se", "86": "ct", "87": "ith", "88": "ir", "89": "am", "90": " this", "91": "if", "92": " W", "93": "oo", "94": "ri", "95": " was", "96": "ght", "97": " u", "98": " with", "99": "ad", "100": "ch", "101": " se", "102": " k", "103": " an", "104": " The", "105": " li", "106": " do", "107": " B", "108": " have", "109": " as", "110": "th", "111": " are", "112": " sh", "113": "ust", "114": "ce", "115": "ally", "116": "ill", "117": " H", "118": " j", "119": "ter", "120": " go", "121": " And", "122": "ation", "123": " C", "124": " so", "125": "ome", "126": " not", "127": "op", "128": "il", "129": "ore", "130": " ne", "131": " can", "132": " me", "133": " at", "134": "ould", "135": "ant", "136": " M", "137": " like", "138": "ere", "139": " they", "140": "ra", "141": "ers", "142": " ab", "143": " de", "144": " kn", "145": "ge", "146": " Y", "147": " ch", "148": "ul", "149": "pp", "150": " or", "151": " al", "152": " con", "153": " com", "154": "ess", "155": " su", "156": "out", "157": " your", "158": " So", "159": "ate", "160": " one", "161": " all", "162": " ex", "163": "est", "164": " fr", "165": " just", "166": " pro", "167": " know", "168": " O", "169": "ain", "170": " but", "171": "ol", "172": "ive", "173": " v", "174": "use", "175": "very", "176": "art", "177": "qu", "178": " my", "179": "el", "180": " N", "181": "nt", "182": " It", "183": " what", "184": "ab", "185": " P", "186": " wor", "187": " out", "188": " there", "189": " up", "190": "um", "191": " from", "192": "pe", "193": " tw", "194": " r", "195": "and", "196": "ight", "197": "ort", "198": "un", "199": " L", "200": "ist", "201": " about", "202": "ide", "203": "ig", "204": "ake", "205": " D", "206": "em", "207": "os", "208": "king", "209": "rou", "210": "ind", "211": "our", "212": "res", "213": " We", "214": " get", "215": " E", "216": " G", "217": "ack", "218": " le", "219": "ity", "220": "od", "221": " F", "222": "ard", "223": " pl", "224": " our", "225": " int", "226": "ment", "227": " will", "228": "ies", "229": " by", "230": "ink", "231": "ca", "232": " if", "233": "red", "234": "her", "235": "ie", "236": " us", "237": " some", "238": " don", "239": "ven", "240": "ood", "241": "ast", "242": " R", "243": " his", "244": " tim", "245": " tr", "246": " more", "247": "ich", "248": "ous", "249": "ame", "250": " going", "251": " had", "252": " them", "253": "ook", "254": " pe", "255": " Wh", "256": " You", "257": " But", "258": "ine", "259": " here", "260": " would", "261": "cause", "262": "right", "263": "so", "264": "ost", "265": "ure", "266": " has", "267": "ect", "268": " think", "269": " fe", "270": "ong", "271": " see", "272": " when", "273": " who", "274": " were", "275": " really", "276": " their", "277": " want", "278": "one", "279": "ople", "280": " then", "281": " time", "282": " sa", "283": "ap", "284": " te", "285": " He", "286": " ye", "287": "ck", "288": " her", "289": " thing", "290": " right", "291": " which", "292": "itt", "293": "ice", "294": "act", "295": " people", "296": "ty", "297": " two", "298": " J", "299": " im", "300": "ther", "301": "ci", "302": "ose", "303": " cl", "304": " qu", "305": " man", "306": " also", "307": "ree", "308": " en", "309": "ud", "310": " how", "311": "reat", "312": "ak", "313": "hing", "314": "ag", "315": " any", "316": "ff", "317": "ace", "318": "per", "319": " because", "320": " very", "321": "own", "322": " ad", "323": " act", "324": " been", "325": " now", "326": " ag", "327": " into", "328": " comp", "329": "ars", "330": "ions", "331": "are", "332": "ite", "333": "iv", "334": " these", "335": "ays", "336": "ep", "337": " This", "338": " she", "339": "ans", "340": "ah", "341": "een", "342": " over", "343": "ry", "344": " lo", "345": "age", "346": " pr", "347": " sp", "348": "ue", "349": " co", "350": "ick", "351": "ber", "352": " did", "353": "ip", "354": "ach", "355": " back", "356": " no", "357": " cont", "358": " other", "359": " every", "360": "pt", "361": " need", "362": " him", "363": " U", "364": " In", "365": " work", "366": "irst", "367": " part", "368": " look", "369": "ittle", "370": "ble", "371": "iz", "372": " un", "373": " make", "374": "omet", "375": "nder", "376": "ish", "377": "na", "378": " little", "379": " off", "380": " than", "381": " got", "382": "ually", "383": " per", "384": " good", "385": " way", "386": " could", "387": " ac", "388": " imp", "389": "able", "390": " where", "391": "iff", "392": " That", "393": " res", "394": "ount", "395": "pl", "396": "ance", "397": " first", "398": " ro", "399": " pre", "400": "ass", "401": " say", "402": "int", "403": "ated", "404": "ire", "405": "uch", "406": "ase", "407": " somet", "408": "ound", "409": " down", "410": " diff", "411": "sel", "412": " gu", "413": " am", "414": "ress", "415": " lot", "416": "ence", "417": " dis", "418": "orm", "419": "ix", "420": " po", "421": "ving", "422": "enty", "423": " K", "424": " spe", "425": "und", "426": "he", "427": " much", "428": " ar", "429": "round", "430": " app", "431": "co", "432": "ark", "433": " new", "434": "ater", "435": "ult", "436": "end", "437": " even", "438": " start", "439": "ations", "440": "rough", "441": "ile", "442": "fter", "443": " well", "444": "be", "445": " They", "446": " three", "447": "ign", "448": "ild", "449": " said", "450": "ough", "451": "ang", "452": " too", "453": "ade", "454": " bl", "455": "ens", "456": " inc", "457": "ia", "458": " those", "459": " mo", "460": " take", "461": " through", "462": " fl", "463": " kind", "464": " things", "465": " bet", "466": " only", "467": " St", "468": " let", "469": "cess", "470": " Ch", "471": "ary", "472": "vel", "473": " If", "474": "xt", "475": "other", "476": "av", "477": "ical", "478": "ord", "479": " again", "480": " something", "481": "onna", "482": "fore", "483": " may", "484": "ting", "485": " bu", "486": " differe", "487": "urn", "488": " gonna", "489": " does", "490": "uct", "491": "og", "492": " twenty", "493": " gr", "494": " Ye", "495": "wn", "496": " should", "497": " comm", "498": "ition", "499": " under", "500": " hel", "501": "ory", "502": " fo", "503": " use", "504": "igh", "505": "ife", "506": " actually", "507": " tal", "508": " call", "509": "ents", "510": "ious", "511": "ull", "512": " There", "513": " Yeah", "514": " most", "515": " ke", "516": "ors", "517": "ved", "518": "ys", "519": " sc", "520": " happ", "521": "ope", "522": " help", "523": "atch", "524": " What", "525": " rem", "526": "ple", "527": " Now", "528": " br", "529": "ool", "530": "oth", "531": " four", "532": "self", "533": " str", "534": "ne", "535": "thing", "536": " put", "537": "ial", "538": " great", "539": "ail", "540": "ub", "541": "ning", "542": " sm", "543": " feel", "544": " five", "545": "ody", "546": "undred", "547": "iss", "548": "ank", "549": "get", "550": "aking", "551": " many", "552": " hundred", "553": " years", "554": " being", "555": " come", "556": " mean", "557": "ily", "558": " different", "559": " after", "560": " ser", "561": " show", "562": "form", "563": "ful", "564": "oy", "565": " six", "566": " vide", "567": " V", "568": " its", "569": " point", "570": " day", "571": " des", "572": "ons", "573": " bit", "574": " bel", "575": " before", "576": " aw", "577": " end", "578": " Oh", "579": " still", "580": "ath", "581": " long", "582": " '", "583": "ise", "584": "ob", "585": "day", "586": " add", "587": "ft", "588": "ves", "589": "ces", "590": "ady", "591": " cr", "592": " around", "593": " try", "594": "les", "595": "vers", "596": "kay", "597": "ian", "598": "ates", "599": " find", "600": "ward", "601": " As", "602": " eight", "603": "lic", "604": " same", "605": " pos", "606": " em", "607": " made", "608": " supp", "609": " life", "610": " Be", "611": "pect", "612": " dec", "613": " play", "614": "ange", "615": " att", "616": " pers", "617": "ways", "618": " high", "619": " hand", "620": " next", "621": " cons", "622": " own", "623": " inv", "624": "ower", "625": " ind", "626": "ert", "627": "ng", "628": "ave", "629": " year", "630": " big", "631": "ating", "632": " world", "633": " rel", "634": " sure", "635": " tra", "636": "ew", "637": "ered", "638": " fin", "639": " Well", "640": " sl", "641": " doing", "642": "bs", "643": " set", "644": " rec", "645": "ual", "646": "cial", "647": " ph", "648": "erm", "649": " love", "650": "ph", "651": " real", "652": " last", "653": "ict", "654": " bo", "655": " ra", "656": "ible", "657": " wr", "658": "mer", "659": " count", "660": "ities", "661": " always", "662": "inet", "663": "ments", "664": "uc", "665": " might", "666": " inter", "667": " video", "668": "gin", "669": " tell", "670": " never", "671": "vent", "672": " import", "673": "ied", "674": " sy", "675": " How", "676": "ically", "677": "ought", "678": " thir", "679": " rep", "680": "ks", "681": "ib", "682": " fam", "683": "ject", "684": " bas", "685": " She", "686": " give", "687": "akes", "688": " ninet", "689": " reg", "690": " min", "691": " op", "692": " def", "693": " didn", "694": "te", "695": " cour", "696": " why", "697": " ent", "698": " place", "699": " ins", "700": " car", "701": "ather", "702": " person", "703": "ular", "704": " inst", "705": " prod", "706": "lect", "707": " Al", "708": " today", "709": " bec", "710": " sur", "711": " All", "712": " another", "713": " bus", "714": " keep", "715": "ell", "716": "ese", "717": "riend", "718": " quest", "719": " talk", "720": "als", "721": "ings", "722": " mon", "723": "cond", "724": "old", "725": " acc", "726": " la", "727": " num", "728": "ident", "729": " che", "730": "iness", "731": " turn", "732": " ear", "733": " No", "734": "ousand", "735": " better", "736": "ific", "737": " loo", "738": " gl", "739": "oc", "740": " important", "741": "ited", "742": " An", "743": " thousand", "744": "ility", "745": "llow", "746": " used", "747": " gen", "748": " sim", "749": "li", "750": " happen", "751": " Un", "752": " Let", "753": "air", "754": "ock", "755": "ably", "756": "gg", "757": " watch", "758": " For", "759": " sw", "760": "ren", "761": "ute", "762": "ever", "763": " pol", "764": " sch", "765": " When", "766": " such", "767": " fif", "768": " home", "769": " cle", "770": " contin", "771": "ouse", "772": " friend", "773": "uring", "774": " Okay", "775": "gr", "776": " able", "777": " stud", "778": " eff", "779": "hip", "780": "body", "781": " top", "782": "ness", "783": " exper", "784": " pret", "785": " both", "786": " done", "787": "cri", "788": " mark", "789": " while", "790": " old", "791": "ros", "792": "ont", "793": " second", "794": "ative", "795": " thought", "796": " best", "797": " found", "798": "iew", "799": " belie", "800": " each", "801": "erest", "802": " tri", "803": " eas", "804": " ca", "805": " fact", "806": " care", "807": " fun", "808": "atter", "809": "ures", "810": " head", "811": " lear", "812": " water", "813": " hard", "814": " few", "815": " side", "816": "ween", "817": " exp", "818": " away", "819": "its", "820": " ext", "821": "lud", "822": " run", "823": " trans", "824": "ince", "825": " sk", "826": " open", "827": "cus", "828": " between", "829": " called", "830": " wee", "831": " pretty", "832": "ason", "833": " far", "834": "ember", "835": "omm", "836": " interest", "837": "any", "838": "ner", "839": "uff", "840": " pres", "841": " cur", "842": " child", "843": "ee", "844": " toget", "845": " together", "846": "olog", "847": " God", "848": "ond", "849": " char", "850": " looking", "851": "stem", "852": "az", "853": "cent", "854": " ob", "855": " ass", "856": "land", "857": " doesn", "858": " business", "859": " course", "860": " ten", "861": "ps", "862": "arch", "863": "ced", "864": "ms", "865": "ize", "866": "nce", "867": " ref", "868": " name", "869": "ross", "870": " grow", "871": "oney", "872": " went", "873": "ics", "874": "teen", "875": " cou", "876": " prob", "877": " ret", "878": " guys", "879": " came", "880": "ash", "881": "led", "882": " Eur", "883": "ues", "884": " ide", "885": "gan", "886": " everything", "887": " getting", "888": " ask", "889": " cor", "890": " build", "891": " sign", "892": " small", "893": "uck", "894": " el", "895": " col", "896": " Is", "897": "ational", "898": "stand", "899": "cy", "900": " conf", "901": "der", "902": " bre", "903": " cap", "904": " mod", "905": "ets", "906": "ike", "907": " number", "908": " comple", "909": "ertain", "910": " ever", "911": " coll", "912": " hum", "913": " Europe", "914": " cre", "915": " met", "916": " exam", "917": " move", "918": " pass", "919": " left", "920": " system", "921": " includ", "922": " Thank", "923": "cept", "924": " wom", "925": " product", "926": "ten", "927": " rest", "928": " probably", "929": " dri", "930": " Do", "931": " gener", "932": " anything", "933": " lar", "934": " My", "935": " school", "936": " lead", "937": " sub", "938": " ty", "939": " plan", "940": " seem", "941": " whole", "942": "irect", "943": " light", "944": " must", "945": " mom", "946": " opp", "947": " support", "948": " family", "949": "ices", "950": "amp", "951": " proble", "952": " dr", "953": "ready", "954": " using", "955": "ense", "956": " prov", "957": "ush", "958": "ax", "959": " power", "960": " Re", "961": "alth", "962": " ev", "963": " stand", "964": " war", "965": "ts", "966": " ", "967": "e", "968": "t", "969": "o", "970": "a", "971": "n", "972": "i", "973": "s", "974": "r", "975": "h", "976": "l", "977": "d", "978": "u", "979": "c", "980": "m", "981": "y", "982": "g", "983": "w", "984": "f", "985": "p", "986": ".", "987": "b", "988": ",", "989": "v", "990": "k", "991": "'", "992": "I", "993": "T", "994": "A", "995": "S", "996": "x", "997": "W", "998": "j", "999": "B", "1000": "C", "1001": "H", "1002": "?", "1003": "M", "1004": "O", "1005": "Y", "1006": "N", "1007": "P", "1008": "E", "1009": "q", "1010": "L", "1011": "D", "1012": "z", "1013": "G", "1014": "F", "1015": "R", "1016": "!", "1017": "J", "1018": "U", "1019": "K", "1020": "V", "1021": "Q", "1022": "Z", "1023": "X"}

+ {"0": "<unk>", "1": "\u2581t", "2": "\u2581th", "3": "\u2581a", "4": "in", "5": "re", "6": "\u2581the", "7": "\u2581w", "8": "\u2581s", "9": "\u2581o", "10": "er", "11": "ou", "12": "at", "13": "nd", "14": "it", "15": "\u2581h", "16": "\u2581c", "17": "\u2581b", "18": "is", "19": "en", "20": "on", "21": "ing", "22": "\u2581f", "23": "\u2581to", "24": "\u2581m", "25": "es", "26": "\u2581p", "27": "or", "28": "an", "29": "\u2581d", "30": "ll", "31": "\u2581I", "32": "ed", "33": "\u2581and", "34": "\u2581l", "35": "\u2581of", "36": "\u2581in", "37": "\u2581y", "38": "ar", "39": "\u2581g", "40": "\u2581you", "41": "as", "42": "om", "43": "\u2581n", "44": "ve", "45": "\u2581that", "46": "le", "47": "ic", "48": "us", "49": "ow", "50": "et", "51": "al", "52": "\u2581e", "53": "ut", "54": "\u2581it", "55": "ot", "56": "\u2581be", "57": "\u2581T", "58": "ion", "59": "\u2581is", "60": "\u2581wh", "61": "\u2581re", "62": "\u2581on", "63": "\u2581we", "64": "ent", "65": "\u2581A", "66": "ay", "67": "\u2581ha", "68": "\u2581Th", "69": "id", "70": "\u2581S", "71": "ac", "72": "gh", "73": "ver", "74": "ke", "75": "\u2581for", "76": "im", "77": "ly", "78": "ur", "79": "ld", "80": "\u2581he", "81": "\u2581st", "82": "all", "83": "ro", "84": "st", "85": "se", "86": "ct", "87": "ith", "88": "ir", "89": "am", "90": "\u2581this", "91": "if", "92": "\u2581W", "93": "oo", "94": "ri", "95": "\u2581was", "96": "ght", "97": "\u2581u", "98": "\u2581with", "99": "ad", "100": "ch", "101": "\u2581se", "102": "\u2581k", "103": "\u2581an", "104": "\u2581The", "105": "\u2581li", "106": "\u2581do", "107": "\u2581B", "108": "\u2581have", "109": "\u2581as", "110": "th", "111": "\u2581are", "112": "\u2581sh", "113": "ust", "114": "ce", "115": "ally", "116": "ill", "117": "\u2581H", "118": "\u2581j", "119": "ter", "120": "\u2581go", "121": "\u2581And", "122": "ation", "123": "\u2581C", "124": "\u2581so", "125": "ome", "126": "\u2581not", "127": "op", "128": "il", "129": "ore", "130": "\u2581ne", "131": "\u2581can", "132": "\u2581me", "133": "\u2581at", "134": "ould", "135": "ant", "136": "\u2581M", "137": "\u2581like", "138": "ere", "139": "\u2581they", "140": "ra", "141": "ers", "142": "\u2581ab", "143": "\u2581de", "144": "\u2581kn", "145": "ge", "146": "\u2581Y", "147": "\u2581ch", "148": "ul", "149": "pp", "150": "\u2581or", "151": "\u2581al", "152": "\u2581con", "153": "\u2581com", "154": "ess", "155": "\u2581su", "156": "out", "157": "\u2581your", "158": "\u2581So", "159": "ate", "160": "\u2581one", "161": "\u2581all", "162": "\u2581ex", "163": "est", "164": "\u2581fr", "165": "\u2581just", "166": "\u2581pro", "167": "\u2581know", "168": "\u2581O", "169": "ain", "170": "\u2581but", "171": "ol", "172": "ive", "173": "\u2581v", "174": "use", "175": "very", "176": "art", "177": "qu", "178": "\u2581my", "179": "el", "180": "\u2581N", "181": "nt", "182": "\u2581It", "183": "\u2581what", "184": "ab", "185": "\u2581P", "186": "\u2581wor", "187": "\u2581out", "188": "\u2581there", "189": "\u2581up", "190": "um", "191": "\u2581from", "192": "pe", "193": "\u2581tw", "194": "\u2581r", "195": "and", "196": "ight", "197": "ort", "198": "un", "199": "\u2581L", "200": "ist", "201": "\u2581about", "202": "ide", "203": "ig", "204": "ake", "205": "\u2581D", "206": "em", "207": "os", "208": "king", "209": "rou", "210": "ind", "211": "our", "212": "res", "213": "\u2581We", "214": "\u2581get", "215": "\u2581E", "216": "\u2581G", "217": "ack", "218": "\u2581le", "219": "ity", "220": "od", "221": "\u2581F", "222": "ard", "223": "\u2581pl", "224": "\u2581our", "225": "\u2581int", "226": "ment", "227": "\u2581will", "228": "ies", "229": "\u2581by", "230": "ink", "231": "ca", "232": "\u2581if", "233": "red", "234": "her", "235": "ie", "236": "\u2581us", "237": "\u2581some", "238": "\u2581don", "239": "ven", "240": "ood", "241": "ast", "242": "\u2581R", "243": "\u2581his", "244": "\u2581tim", "245": "\u2581tr", "246": "\u2581more", "247": "ich", "248": "ous", "249": "ame", "250": "\u2581going", "251": "\u2581had", "252": "\u2581them", "253": "ook", "254": "\u2581pe", "255": "\u2581Wh", "256": "\u2581You", "257": "\u2581But", "258": "ine", "259": "\u2581here", "260": "\u2581would", "261": "cause", "262": "right", "263": "so", "264": "ost", "265": "ure", "266": "\u2581has", "267": "ect", "268": "\u2581think", "269": "\u2581fe", "270": "ong", "271": "\u2581see", "272": "\u2581when", "273": "\u2581who", "274": "\u2581were", "275": "\u2581really", "276": "\u2581their", "277": "\u2581want", "278": "one", "279": "ople", "280": "\u2581then", "281": "\u2581time", "282": "\u2581sa", "283": "ap", "284": "\u2581te", "285": "\u2581He", "286": "\u2581ye", "287": "ck", "288": "\u2581her", "289": "\u2581thing", "290": "\u2581right", "291": "\u2581which", "292": "itt", "293": "ice", "294": "act", "295": "\u2581people", "296": "ty", "297": "\u2581two", "298": "\u2581J", "299": "\u2581im", "300": "ther", "301": "ci", "302": "ose", "303": "\u2581cl", "304": "\u2581qu", "305": "\u2581man", "306": "\u2581also", "307": "ree", "308": "\u2581en", "309": "ud", "310": "\u2581how", "311": "reat", "312": "ak", "313": "hing", "314": "ag", "315": "\u2581any", "316": "ff", "317": "ace", "318": "per", "319": "\u2581because", "320": "\u2581very", "321": "own", "322": "\u2581ad", "323": "\u2581act", "324": "\u2581been", "325": "\u2581now", "326": "\u2581ag", "327": "\u2581into", "328": "\u2581comp", "329": "ars", "330": "ions", "331": "are", "332": "ite", "333": "iv", "334": "\u2581these", "335": "ays", "336": "ep", "337": "\u2581This", "338": "\u2581she", "339": "ans", "340": "ah", "341": "een", "342": "\u2581over", "343": "ry", "344": "\u2581lo", "345": "age", "346": "\u2581pr", "347": "\u2581sp", "348": "ue", "349": "\u2581co", "350": "ick", "351": "ber", "352": "\u2581did", "353": "ip", "354": "ach", "355": "\u2581back", "356": "\u2581no", "357": "\u2581cont", "358": "\u2581other", "359": "\u2581every", "360": "pt", "361": "\u2581need", "362": "\u2581him", "363": "\u2581U", "364": "\u2581In", "365": "\u2581work", "366": "irst", "367": "\u2581part", "368": "\u2581look", "369": "ittle", "370": "ble", "371": "iz", "372": "\u2581un", "373": "\u2581make", "374": "omet", "375": "nder", "376": "ish", "377": "na", "378": "\u2581little", "379": "\u2581off", "380": "\u2581than", "381": "\u2581got", "382": "ually", "383": "\u2581per", "384": "\u2581good", "385": "\u2581way", "386": "\u2581could", "387": "\u2581ac", "388": "\u2581imp", "389": "able", "390": "\u2581where", "391": "iff", "392": "\u2581That", "393": "\u2581res", "394": "ount", "395": "pl", "396": "ance", "397": "\u2581first", "398": "\u2581ro", "399": "\u2581pre", "400": "ass", "401": "\u2581say", "402": "int", "403": "ated", "404": "ire", "405": "uch", "406": "ase", "407": "\u2581somet", "408": "ound", "409": "\u2581down", "410": "\u2581diff", "411": "sel", "412": "\u2581gu", "413": "\u2581am", "414": "ress", "415": "\u2581lot", "416": "ence", "417": "\u2581dis", "418": "orm", "419": "ix", "420": "\u2581po", "421": "ving", "422": "enty", "423": "\u2581K", "424": "\u2581spe", "425": "und", "426": "he", "427": "\u2581much", "428": "\u2581ar", "429": "round", "430": "\u2581app", "431": "co", "432": "ark", "433": "\u2581new", "434": "ater", "435": "ult", "436": "end", "437": "\u2581even", "438": "\u2581start", "439": "ations", "440": "rough", "441": "ile", "442": "fter", "443": "\u2581well", "444": "be", "445": "\u2581They", "446": "\u2581three", "447": "ign", "448": "ild", "449": "\u2581said", "450": "ough", "451": "ang", "452": "\u2581too", "453": "ade", "454": "\u2581bl", "455": "ens", "456": "\u2581inc", "457": "ia", "458": "\u2581those", "459": "\u2581mo", "460": "\u2581take", "461": "\u2581through", "462": "\u2581fl", "463": "\u2581kind", "464": "\u2581things", "465": "\u2581bet", "466": "\u2581only", "467": "\u2581St", "468": "\u2581let", "469": "cess", "470": "\u2581Ch", "471": "ary", "472": "vel", "473": "\u2581If", "474": "xt", "475": "other", "476": "av", "477": "ical", "478": "ord", "479": "\u2581again", "480": "\u2581something", "481": "onna", "482": "fore", "483": "\u2581may", "484": "ting", "485": "\u2581bu", "486": "\u2581differe", "487": "urn", "488": "\u2581gonna", "489": "\u2581does", "490": "uct", "491": "og", "492": "\u2581twenty", "493": "\u2581gr", "494": "\u2581Ye", "495": "wn", "496": "\u2581should", "497": "\u2581comm", "498": "ition", "499": "\u2581under", "500": "\u2581hel", "501": "ory", "502": "\u2581fo", "503": "\u2581use", "504": "igh", "505": "ife", "506": "\u2581actually", "507": "\u2581tal", "508": "\u2581call", "509": "ents", "510": "ious", "511": "ull", "512": "\u2581There", "513": "\u2581Yeah", "514": "\u2581most", "515": "\u2581ke", "516": "ors", "517": "ved", "518": "ys", "519": "\u2581sc", "520": "\u2581happ", "521": "ope", "522": "\u2581help", "523": "atch", "524": "\u2581What", "525": "\u2581rem", "526": "ple", "527": "\u2581Now", "528": "\u2581br", "529": "ool", "530": "oth", "531": "\u2581four", "532": "self", "533": "\u2581str", "534": "ne", "535": "thing", "536": "\u2581put", "537": "ial", "538": "\u2581great", "539": "ail", "540": "ub", "541": "ning", "542": "\u2581sm", "543": "\u2581feel", "544": "\u2581five", "545": "ody", "546": "undred", "547": "iss", "548": "ank", "549": "get", "550": "aking", "551": "\u2581many", "552": "\u2581hundred", "553": "\u2581years", "554": "\u2581being", "555": "\u2581come", "556": "\u2581mean", "557": "ily", "558": "\u2581different", "559": "\u2581after", "560": "\u2581ser", "561": "\u2581show", "562": "form", "563": "ful", "564": "oy", "565": "\u2581six", "566": "\u2581vide", "567": "\u2581V", "568": "\u2581its", "569": "\u2581point", "570": "\u2581day", "571": "\u2581des", "572": "ons", "573": "\u2581bit", "574": "\u2581bel", "575": "\u2581before", "576": "\u2581aw", "577": "\u2581end", "578": "\u2581Oh", "579": "\u2581still", "580": "ath", "581": "\u2581long", "582": "\u2581'", "583": "ise", "584": "ob", "585": "day", "586": "\u2581add", "587": "ft", "588": "ves", "589": "ces", "590": "ady", "591": "\u2581cr", "592": "\u2581around", "593": "\u2581try", "594": "les", "595": "vers", "596": "kay", "597": "ian", "598": "ates", "599": "\u2581find", "600": "ward", "601": "\u2581As", "602": "\u2581eight", "603": "lic", "604": "\u2581same", "605": "\u2581pos", "606": "\u2581em", "607": "\u2581made", "608": "\u2581supp", "609": "\u2581life", "610": "\u2581Be", "611": "pect", "612": "\u2581dec", "613": "\u2581play", "614": "ange", "615": "\u2581att", "616": "\u2581pers", "617": "ways", "618": "\u2581high", "619": "\u2581hand", "620": "\u2581next", "621": "\u2581cons", "622": "\u2581own", "623": "\u2581inv", "624": "ower", "625": "\u2581ind", "626": "ert", "627": "ng", "628": "ave", "629": "\u2581year", "630": "\u2581big", "631": "ating", "632": "\u2581world", "633": "\u2581rel", "634": "\u2581sure", "635": "\u2581tra", "636": "ew", "637": "ered", "638": "\u2581fin", "639": "\u2581Well", "640": "\u2581sl", "641": "\u2581doing", "642": "bs", "643": "\u2581set", "644": "\u2581rec", "645": "ual", "646": "cial", "647": "\u2581ph", "648": "erm", "649": "\u2581love", "650": "ph", "651": "\u2581real", "652": "\u2581last", "653": "ict", "654": "\u2581bo", "655": "\u2581ra", "656": "ible", "657": "\u2581wr", "658": "mer", "659": "\u2581count", "660": "ities", "661": "\u2581always", "662": "inet", "663": "ments", "664": "uc", "665": "\u2581might", "666": "\u2581inter", "667": "\u2581video", "668": "gin", "669": "\u2581tell", "670": "\u2581never", "671": "vent", "672": "\u2581import", "673": "ied", "674": "\u2581sy", "675": "\u2581How", "676": "ically", "677": "ought", "678": "\u2581thir", "679": "\u2581rep", "680": "ks", "681": "ib", "682": "\u2581fam", "683": "ject", "684": "\u2581bas", "685": "\u2581She", "686": "\u2581give", "687": "akes", "688": "\u2581ninet", "689": "\u2581reg", "690": "\u2581min", "691": "\u2581op", "692": "\u2581def", "693": "\u2581didn", "694": "te", "695": "\u2581cour", "696": "\u2581why", "697": "\u2581ent", "698": "\u2581place", "699": "\u2581ins", "700": "\u2581car", "701": "ather", "702": "\u2581person", "703": "ular", "704": "\u2581inst", "705": "\u2581prod", "706": "lect", "707": "\u2581Al", "708": "\u2581today", "709": "\u2581bec", "710": "\u2581sur", "711": "\u2581All", "712": "\u2581another", "713": "\u2581bus", "714": "\u2581keep", "715": "ell", "716": "ese", "717": "riend", "718": "\u2581quest", "719": "\u2581talk", "720": "als", "721": "ings", "722": "\u2581mon", "723": "cond", "724": "old", "725": "\u2581acc", "726": "\u2581la", "727": "\u2581num", "728": "ident", "729": "\u2581che", "730": "iness", "731": "\u2581turn", "732": "\u2581ear", "733": "\u2581No", "734": "ousand", "735": "\u2581better", "736": "ific", "737": "\u2581loo", "738": "\u2581gl", "739": "oc", "740": "\u2581important", "741": "ited", "742": "\u2581An", "743": "\u2581thousand", "744": "ility", "745": "llow", "746": "\u2581used", "747": "\u2581gen", "748": "\u2581sim", "749": "li", "750": "\u2581happen", "751": "\u2581Un", "752": "\u2581Let", "753": "air", "754": "ock", "755": "ably", "756": "gg", "757": "\u2581watch", "758": "\u2581For", "759": "\u2581sw", "760": "ren", "761": "ute", "762": "ever", "763": "\u2581pol", "764": "\u2581sch", "765": "\u2581When", "766": "\u2581such", "767": "\u2581fif", "768": "\u2581home", "769": "\u2581cle", "770": "\u2581contin", "771": "ouse", "772": "\u2581friend", "773": "uring", "774": "\u2581Okay", "775": "gr", "776": "\u2581able", "777": "\u2581stud", "778": "\u2581eff", "779": "hip", "780": "body", "781": "\u2581top", "782": "ness", "783": "\u2581exper", "784": "\u2581pret", "785": "\u2581both", "786": "\u2581done", "787": "cri", "788": "\u2581mark", "789": "\u2581while", "790": "\u2581old", "791": "ros", "792": "ont", "793": "\u2581second", "794": "ative", "795": "\u2581thought", "796": "\u2581best", "797": "\u2581found", "798": "iew", "799": "\u2581belie", "800": "\u2581each", "801": "erest", "802": "\u2581tri", "803": "\u2581eas", "804": "\u2581ca", "805": "\u2581fact", "806": "\u2581care", "807": "\u2581fun", "808": "atter", "809": "ures", "810": "\u2581head", "811": "\u2581lear", "812": "\u2581water", "813": "\u2581hard", "814": "\u2581few", "815": "\u2581side", "816": "ween", "817": "\u2581exp", "818": "\u2581away", "819": "its", "820": "\u2581ext", "821": "lud", "822": "\u2581run", "823": "\u2581trans", "824": "ince", "825": "\u2581sk", "826": "\u2581open", "827": "cus", "828": "\u2581between", "829": "\u2581called", "830": "\u2581wee", "831": "\u2581pretty", "832": "ason", "833": "\u2581far", "834": "ember", "835": "omm", "836": "\u2581interest", "837": "any", "838": "ner", "839": "uff", "840": "\u2581pres", "841": "\u2581cur", "842": "\u2581child", "843": "ee", "844": "\u2581toget", "845": "\u2581together", "846": "olog", "847": "\u2581God", "848": "ond", "849": "\u2581char", "850": "\u2581looking", "851": "stem", "852": "az", "853": "cent", "854": "\u2581ob", "855": "\u2581ass", "856": "land", "857": "\u2581doesn", "858": "\u2581business", "859": "\u2581course", "860": "\u2581ten", "861": "ps", "862": "arch", "863": "ced", "864": "ms", "865": "ize", "866": "nce", "867": "\u2581ref", "868": "\u2581name", "869": "ross", "870": "\u2581grow", "871": "oney", "872": "\u2581went", "873": "ics", "874": "teen", "875": "\u2581cou", "876": "\u2581prob", "877": "\u2581ret", "878": "\u2581guys", "879": "\u2581came", "880": "ash", "881": "led", "882": "\u2581Eur", "883": "ues", "884": "\u2581ide", "885": "gan", "886": "\u2581everything", "887": "\u2581getting", "888": "\u2581ask", "889": "\u2581cor", "890": "\u2581build", "891": "\u2581sign", "892": "\u2581small", "893": "uck", "894": "\u2581el", "895": "\u2581col", "896": "\u2581Is", "897": "ational", "898": "stand", "899": "cy", "900": "\u2581conf", "901": "der", "902": "\u2581bre", "903": "\u2581cap", "904": "\u2581mod", "905": "ets", "906": "ike", "907": "\u2581number", "908": "\u2581comple", "909": "ertain", "910": "\u2581ever", "911": "\u2581coll", "912": "\u2581hum", "913": "\u2581Europe", "914": "\u2581cre", "915": "\u2581met", "916": "\u2581exam", "917": "\u2581move", "918": "\u2581pass", "919": "\u2581left", "920": "\u2581system", "921": "\u2581includ", "922": "\u2581Thank", "923": "cept", "924": "\u2581wom", "925": "\u2581product", "926": "ten", "927": "\u2581rest", "928": "\u2581probably", "929": "\u2581dri", "930": "\u2581Do", "931": "\u2581gener", "932": "\u2581anything", "933": "\u2581lar", "934": "\u2581My", "935": "\u2581school", "936": "\u2581lead", "937": "\u2581sub", "938": "\u2581ty", "939": "\u2581plan", "940": "\u2581seem", "941": "\u2581whole", "942": "irect", "943": "\u2581light", "944": "\u2581must", "945": "\u2581mom", "946": "\u2581opp", "947": "\u2581support", "948": "\u2581family", "949": "ices", "950": "amp", "951": "\u2581proble", "952": "\u2581dr", "953": "ready", "954": "\u2581using", "955": "ense", "956": "\u2581prov", "957": "ush", "958": "ax", "959": "\u2581power", "960": "\u2581Re", "961": "alth", "962": "\u2581ev", "963": "\u2581stand", "964": "\u2581war", "965": "ts", "966": "\u2581", "967": "e", "968": "t", "969": "o", "970": "a", "971": "n", "972": "i", "973": "s", "974": "r", "975": "h", "976": "l", "977": "d", "978": "u", "979": "c", "980": "m", "981": "y", "982": "g", "983": "w", "984": "f", "985": "p", "986": ".", "987": "b", "988": ",", "989": "v", "990": "k", "991": "'", "992": "I", "993": "T", "994": "A", "995": "S", "996": "x", "997": "W", "998": "j", "999": "B", "1000": "C", "1001": "H", "1002": "?", "1003": "M", "1004": "O", "1005": "Y", "1006": "N", "1007": "P", "1008": "E", "1009": "q", "1010": "L", "1011": "D", "1012": "z", "1013": "G", "1014": "F", "1015": "R", "1016": "!", "1017": "J", "1018": "U", "1019": "K", "1020": "V", "1021": "Q", "1022": "Z", "1023": "X"}