Jarbas commited on
Commit
dd95e9b
·
verified ·
1 Parent(s): bff0dbc

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,5 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
 
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.mlpackage filter=lfs diff=lfs merge=lfs -text
2
+ *.mlpackage/** filter=lfs diff=lfs merge=lfs -text
3
+ weights/** filter=lfs diff=lfs merge=lfs -text
4
  *.bin filter=lfs diff=lfs merge=lfs -text
5
+ *.nemo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-4.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - asr
7
+ - speech
8
+ - coreml
9
+ - nemo
10
+ - parakeet
11
+ - nvidia
12
+ library_name: coremltools
13
+ pipeline_tag: automatic-speech-recognition
14
+ base_model: nvidia/parakeet_realtime_eou_120m-v1
15
+ ---
16
+
17
+ # parakeet-rnnt-120m-eou-coreml
18
+
19
+ CoreML conversion of [nvidia/parakeet_realtime_eou_120m-v1](https://huggingface.co/nvidia/parakeet_realtime_eou_120m-v1).
20
+
21
+ | | |
22
+ |---|---|
23
+ | **Architecture** | RNNT |
24
+ | **Language** | English |
25
+ | **Sample rate** | 16000 Hz |
26
+ | **Max audio** | 15.0s |
27
+ | **Vocab size** | 1026 |
28
+ | **Framework** | NVIDIA NeMo → CoreML (coremltools) |
29
+
30
+ ## Components
31
+
32
+ | File | Component | Best compute |
33
+ |------|-----------|--------------|
34
+ | `parakeet_mel_encoder.mlpackage` | mel_encoder | ANE / GPU |
35
+ | `parakeet_decoder.mlpackage` | decoder | CPU only |
36
+ | `parakeet_joint_decision_single_step.mlpackage` | joint_decision_single_step | ANE / GPU |
37
+
38
+ ## Usage
39
+
40
+ ```bash
41
+ pip install ovos-stt-plugin-coreml
42
+ ```
43
+
44
+ ```python
45
+ from ovos_stt_plugin_coreml import CoremlSTT
46
+ from ovos_plugin_manager.utils.audio import AudioFile
47
+
48
+ stt = CoremlSTT(config={"metadata": "metadata.json"})
49
+
50
+ with AudioFile("speech.wav") as f:
51
+ audio = f.read()
52
+ print(stt.execute(audio))
53
+ ```
54
+
55
+ ## Source model
56
+
57
+ [nvidia/parakeet_realtime_eou_120m-v1](https://huggingface.co/nvidia/parakeet_realtime_eou_120m-v1)
58
+
infer.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Greedy TDT/RNNT inference with CoreML parakeet-rnnt-120m-eou.
3
+
4
+ Usage: python infer.py audio.wav
5
+ """
6
+ import json, sys
7
+ from pathlib import Path
8
+
9
+ import coremltools as ct
10
+ import numpy as np
11
+ import soundfile as sf
12
+
13
+ REPO_DIR = Path(__file__).parent
14
+ SAMPLE_RATE = 16_000
15
+
16
+ def load_audio(path, max_samples):
17
+ data, sr = sf.read(path, dtype="float32", always_2d=False)
18
+ if sr != SAMPLE_RATE: raise ValueError(f"Expected {SAMPLE_RATE} Hz.")
19
+ if data.ndim > 1: data = data[:, 0]
20
+ actual = min(len(data), max_samples)
21
+ data = np.pad(data, (0, max(0, max_samples - len(data))))[:max_samples]
22
+ return data.reshape(1, -1).astype(np.float32), actual
23
+
24
+ def transcribe(audio_path, compute_units="ALL"):
25
+ meta = json.loads((REPO_DIR / "metadata.json").read_text())
26
+ vocab = json.loads((REPO_DIR / "vocab.json").read_text())
27
+ blank = meta["blank_id"]
28
+ n = meta["max_audio_samples"]
29
+ bins = meta.get("duration_bins", [1])
30
+ comps = meta["components"]["decoder"]["inputs"]
31
+ d_layers = comps["h_in"][0]
32
+ d_hidden = comps["h_in"][2]
33
+
34
+ cu_map = {"ALL": ct.ComputeUnit.ALL, "CPU_ONLY": ct.ComputeUnit.CPU_ONLY,
35
+ "CPU_AND_NE": ct.ComputeUnit.CPU_AND_NE}
36
+ cu = cu_map.get(compute_units.upper(), ct.ComputeUnit.ALL)
37
+ mel_enc = ct.models.MLModel(str(REPO_DIR / "parakeet_mel_encoder.mlpackage"), compute_units=cu)
38
+ dec_model = ct.models.MLModel(str(REPO_DIR / "parakeet_decoder.mlpackage"),
39
+ compute_units=ct.ComputeUnit.CPU_ONLY)
40
+ jd_model = ct.models.MLModel(str(REPO_DIR / "parakeet_joint_decision_single_step.mlpackage"),
41
+ compute_units=cu)
42
+
43
+ audio, actual = load_audio(audio_path, n)
44
+ length = np.array([actual], dtype=np.int32)
45
+ enc_out = mel_enc.predict({"audio_signal": audio, "audio_length": length})
46
+ encoder = enc_out["encoder"]
47
+ enc_len = int(enc_out["encoder_length"][0])
48
+
49
+ h = np.zeros((d_layers, 1, d_hidden), dtype=np.float32)
50
+ c = np.zeros((d_layers, 1, d_hidden), dtype=np.float32)
51
+ prev = np.array([[blank]], dtype=np.int32)
52
+ tlen = np.array([1], dtype=np.int32)
53
+ dec_out = dec_model.predict({"targets": prev, "target_length": tlen, "h_in": h, "c_in": c})
54
+ dec_state, h, c = dec_out["decoder"], dec_out["h_out"], dec_out["c_out"]
55
+
56
+ tokens, t = [], 0
57
+ while t < enc_len:
58
+ jd = jd_model.predict({"encoder_step": encoder[:,:,t:t+1], "decoder_step": dec_state[:,:,:1]})
59
+ tok = int(jd["token_id"].flat[0])
60
+ dur = int(jd["duration"].flat[0])
61
+ adv = bins[min(dur, len(bins)-1)] if bins else 1
62
+ if tok != blank:
63
+ tokens.append(tok)
64
+ dec_out = dec_model.predict({"targets": np.array([[tok]], dtype=np.int32),
65
+ "target_length": tlen, "h_in": h, "c_in": c})
66
+ dec_state, h, c = dec_out["decoder"], dec_out["h_out"], dec_out["c_out"]
67
+ t += max(1, adv)
68
+
69
+ return "".join(vocab[i] for i in tokens if i < len(vocab)).replace("▁", " ").strip()
70
+
71
+ if __name__ == "__main__":
72
+ args = sys.argv[1:]
73
+ if not args:
74
+ print("Usage: python infer.py <audio.wav> [--compute-units ALL|CPU_ONLY|CPU_AND_NE]")
75
+ sys.exit(1)
76
+ cu = "ALL"
77
+ if "--compute-units" in args:
78
+ cu = args[args.index("--compute-units") + 1]
79
+ print(transcribe(args[0], cu))
metadata.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_id": "nvidia/parakeet_realtime_eou_120m-v1",
3
+ "model_type": "parakeet_rnnt",
4
+ "language": "",
5
+ "sample_rate": 16000,
6
+ "max_audio_seconds": 15.0,
7
+ "max_audio_samples": 240000,
8
+ "vocab_size": 1026,
9
+ "blank_id": 1026,
10
+ "checkpoint": {
11
+ "type": "pretrained",
12
+ "model_id": "nvidia/parakeet_realtime_eou_120m-v1"
13
+ },
14
+ "coreml": {
15
+ "compute_precision": "FLOAT32",
16
+ "quantization": "none"
17
+ },
18
+ "components": {
19
+ "mel_encoder": {
20
+ "path": "parakeet_mel_encoder.mlpackage",
21
+ "inputs": {
22
+ "audio_signal": [
23
+ 1,
24
+ 240000
25
+ ],
26
+ "audio_length": [
27
+ 1
28
+ ]
29
+ },
30
+ "outputs": {
31
+ "encoder": [
32
+ 1,
33
+ 512,
34
+ 189
35
+ ],
36
+ "encoder_length": [
37
+ 1
38
+ ]
39
+ }
40
+ },
41
+ "decoder": {
42
+ "path": "parakeet_decoder.mlpackage",
43
+ "inputs": {
44
+ "targets": [
45
+ 1,
46
+ 1
47
+ ],
48
+ "target_length": [
49
+ 1
50
+ ],
51
+ "h_in": [
52
+ 1,
53
+ 1,
54
+ 640
55
+ ],
56
+ "c_in": [
57
+ 1,
58
+ 1,
59
+ 640
60
+ ]
61
+ },
62
+ "outputs": {
63
+ "decoder": [
64
+ 1,
65
+ 640,
66
+ 1
67
+ ],
68
+ "h_out": [
69
+ 1,
70
+ 1,
71
+ 640
72
+ ],
73
+ "c_out": [
74
+ 1,
75
+ 1,
76
+ 640
77
+ ]
78
+ }
79
+ },
80
+ "joint_decision_single_step": {
81
+ "path": "parakeet_joint_decision_single_step.mlpackage",
82
+ "inputs": {
83
+ "encoder_step": [
84
+ 1,
85
+ 512,
86
+ 1
87
+ ],
88
+ "decoder_step": [
89
+ 1,
90
+ 640,
91
+ 1
92
+ ]
93
+ },
94
+ "outputs": {
95
+ "token_id": [
96
+ 1,
97
+ 1,
98
+ 1
99
+ ],
100
+ "token_prob": [
101
+ 1,
102
+ 1,
103
+ 1
104
+ ],
105
+ "duration": [
106
+ 1,
107
+ 1,
108
+ 1
109
+ ]
110
+ }
111
+ }
112
+ }
113
+ }
parakeet_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386b4563e16d9272fc04e049114470eff0cc27bc69a68f07bc348268deb5e469
3
+ size 7962
parakeet_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4cacecdcd9df79ab1e56de67230baf5a8664d2afe0bb8f3408eefa972cb2f4
3
+ size 7873600
parakeet_decoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "542E1620-783D-40F7-B856-BBC81F8FB1CC": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "9277620F-372A-4009-B3F0-4579EED5CF6F": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "9277620F-372A-4009-B3F0-4579EED5CF6F"
18
+ }
parakeet_joint_decision_single_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a22b06a45ff04538396f5d3d606436dfffcae4ad70bf0ea2c885ecd2b15661ee
3
+ size 7043
parakeet_joint_decision_single_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7039b2010a269153f5a96edf28637f921a86ef8822f248f2d6712f7a6bce84b4
3
+ size 2794182
parakeet_joint_decision_single_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "81E88E5B-0A04-42EB-8AA9-1AFEF983082B": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "CCCCAB77-9146-4F26-8C99-DC908FADD489": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "CCCCAB77-9146-4F26-8C99-DC908FADD489"
18
+ }
parakeet_mel_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ffa15cd78b271a04f944e7cb8105494b15999f112cbe0f9565ed67c3be8565
3
+ size 497550
parakeet_mel_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1ce22a11c2fa443776934f6aa20f4559086ccdc437d224461dac905ffe7ebf
3
+ size 217322496
parakeet_mel_encoder.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "7865CF57-8368-4EE9-89C4-AA0565E1837A": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "BB94861E-95D2-4B35-98E0-28A388EE20D6": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "BB94861E-95D2-4B35-98E0-28A388EE20D6"
18
+ }
vocab.json ADDED
@@ -0,0 +1,1028 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "<unk>",
3
+ "▁t",
4
+ "▁th",
5
+ "▁a",
6
+ "▁i",
7
+ "▁the",
8
+ "▁s",
9
+ "re",
10
+ "▁w",
11
+ "▁o",
12
+ "in",
13
+ "at",
14
+ "er",
15
+ "nd",
16
+ "ou",
17
+ "▁c",
18
+ "▁b",
19
+ "▁h",
20
+ "en",
21
+ "on",
22
+ "▁m",
23
+ "▁f",
24
+ "ing",
25
+ "▁p",
26
+ "▁to",
27
+ "▁and",
28
+ "▁d",
29
+ "an",
30
+ "or",
31
+ "es",
32
+ "▁y",
33
+ "▁l",
34
+ "▁of",
35
+ "ll",
36
+ "▁in",
37
+ "ed",
38
+ "it",
39
+ "▁g",
40
+ "is",
41
+ "▁you",
42
+ "▁n",
43
+ "ar",
44
+ "om",
45
+ "as",
46
+ "ve",
47
+ "▁e",
48
+ "ic",
49
+ "▁it",
50
+ "al",
51
+ "us",
52
+ "▁wh",
53
+ "▁we",
54
+ "▁be",
55
+ "ion",
56
+ "ow",
57
+ "le",
58
+ "▁is",
59
+ "et",
60
+ "ent",
61
+ "ot",
62
+ "ut",
63
+ "▁re",
64
+ "▁on",
65
+ "ay",
66
+ "▁ha",
67
+ "ig",
68
+ "▁so",
69
+ "ct",
70
+ "▁he",
71
+ "▁for",
72
+ "ver",
73
+ "ke",
74
+ "ro",
75
+ "▁st",
76
+ "id",
77
+ "▁go",
78
+ "all",
79
+ "se",
80
+ "ly",
81
+ "▁u",
82
+ "ch",
83
+ "st",
84
+ "ld",
85
+ "▁k",
86
+ "ce",
87
+ "ur",
88
+ "▁li",
89
+ "am",
90
+ "▁r",
91
+ "ht",
92
+ "▁j",
93
+ "ith",
94
+ "▁se",
95
+ "ir",
96
+ "▁as",
97
+ "▁an",
98
+ "im",
99
+ "▁do",
100
+ "ad",
101
+ "▁was",
102
+ "ight",
103
+ "th",
104
+ "▁are",
105
+ "▁but",
106
+ "▁sh",
107
+ "ust",
108
+ "ally",
109
+ "▁not",
110
+ "▁or",
111
+ "▁com",
112
+ "▁can",
113
+ "▁me",
114
+ "op",
115
+ "▁mo",
116
+ "▁at",
117
+ "ill",
118
+ "▁ch",
119
+ "▁ne",
120
+ "ant",
121
+ "▁de",
122
+ "▁kn",
123
+ "▁one",
124
+ "il",
125
+ "ol",
126
+ "▁con",
127
+ "ter",
128
+ "▁ab",
129
+ "▁fr",
130
+ "ere",
131
+ "ck",
132
+ "▁al",
133
+ "▁all",
134
+ "qu",
135
+ "▁pro",
136
+ "▁som",
137
+ "ould",
138
+ "▁tw",
139
+ "ul",
140
+ "ra",
141
+ "od",
142
+ "ers",
143
+ "▁su",
144
+ "ive",
145
+ "▁v",
146
+ "use",
147
+ "ate",
148
+ "ge",
149
+ "if",
150
+ "▁ex",
151
+ "ess",
152
+ "pp",
153
+ "▁lo",
154
+ "out",
155
+ "▁if",
156
+ "est",
157
+ "ain",
158
+ "ist",
159
+ "and",
160
+ "ea",
161
+ "very",
162
+ "art",
163
+ "▁wor",
164
+ "▁my",
165
+ "ab",
166
+ "ment",
167
+ "▁bec",
168
+ "un",
169
+ "ity",
170
+ "ri",
171
+ "pe",
172
+ "ions",
173
+ "▁by",
174
+ "ok",
175
+ "our",
176
+ "ort",
177
+ "ind",
178
+ "ink",
179
+ "nt",
180
+ "▁up",
181
+ "um",
182
+ "▁don",
183
+ "▁get",
184
+ "red",
185
+ "▁out",
186
+ "el",
187
+ "ause",
188
+ "res",
189
+ "▁ma",
190
+ "ich",
191
+ "▁us",
192
+ "rou",
193
+ "▁int",
194
+ "em",
195
+ "os",
196
+ "ies",
197
+ "ie",
198
+ "▁pl",
199
+ "▁tr",
200
+ "ven",
201
+ "ous",
202
+ "▁le",
203
+ "▁two",
204
+ "ard",
205
+ "ine",
206
+ "▁co",
207
+ "een",
208
+ "▁now",
209
+ "ty",
210
+ "her",
211
+ "ack",
212
+ "▁pe",
213
+ "ame",
214
+ "▁how",
215
+ "▁who",
216
+ "▁see",
217
+ "▁tim",
218
+ "ect",
219
+ "ast",
220
+ "▁our",
221
+ "ci",
222
+ "ree",
223
+ "ople",
224
+ "gh",
225
+ "▁no",
226
+ "▁had",
227
+ "▁man",
228
+ "▁qu",
229
+ "▁en",
230
+ "ide",
231
+ "ure",
232
+ "ud",
233
+ "so",
234
+ "▁his",
235
+ "▁sa",
236
+ "▁sp",
237
+ "▁say",
238
+ "ose",
239
+ "ther",
240
+ "▁act",
241
+ "▁ta",
242
+ "▁cl",
243
+ "ings",
244
+ "pt",
245
+ "king",
246
+ "▁any",
247
+ "▁has",
248
+ "▁un",
249
+ "iv",
250
+ "▁im",
251
+ "▁ag",
252
+ "▁te",
253
+ "▁fe",
254
+ "one",
255
+ "per",
256
+ "ong",
257
+ "▁po",
258
+ "▁ad",
259
+ "ff",
260
+ "ore",
261
+ "itt",
262
+ "ans",
263
+ "iz",
264
+ "eah",
265
+ "reat",
266
+ "act",
267
+ "own",
268
+ "hing",
269
+ "enty",
270
+ "age",
271
+ "ber",
272
+ "ice",
273
+ "▁am",
274
+ "ple",
275
+ "are",
276
+ "▁per",
277
+ "und",
278
+ "ite",
279
+ "ix",
280
+ "pl",
281
+ "▁way",
282
+ "▁did",
283
+ "▁pr",
284
+ "▁got",
285
+ "ars",
286
+ "▁she",
287
+ "▁let",
288
+ "ag",
289
+ "▁ac",
290
+ "int",
291
+ "▁ar",
292
+ "ry",
293
+ "ign",
294
+ "ish",
295
+ "▁fir",
296
+ "ace",
297
+ "ble",
298
+ "og",
299
+ "ue",
300
+ "▁ye",
301
+ "ap",
302
+ "iff",
303
+ "▁ro",
304
+ "▁her",
305
+ "nder",
306
+ "▁ok",
307
+ "▁res",
308
+ "▁gu",
309
+ "ence",
310
+ "▁may",
311
+ "ated",
312
+ "ip",
313
+ "▁bo",
314
+ "▁him",
315
+ "way",
316
+ "ac",
317
+ "ical",
318
+ "ass",
319
+ "ase",
320
+ "▁dis",
321
+ "able",
322
+ "ick",
323
+ "▁app",
324
+ "ance",
325
+ "▁pre",
326
+ "▁six",
327
+ "▁off",
328
+ "▁new",
329
+ "ia",
330
+ "orm",
331
+ "ank",
332
+ "▁lot",
333
+ "ach",
334
+ "▁fo",
335
+ "inet",
336
+ "ire",
337
+ "ary",
338
+ "ult",
339
+ "▁tal",
340
+ "▁mu",
341
+ "▁bl",
342
+ "ount",
343
+ "sel",
344
+ "vel",
345
+ "▁br",
346
+ "▁imp",
347
+ "ep",
348
+ "cess",
349
+ "ord",
350
+ "▁sc",
351
+ "▁inc",
352
+ "ound",
353
+ "ang",
354
+ "be",
355
+ "ress",
356
+ "uct",
357
+ "▁ind",
358
+ "▁af",
359
+ "ving",
360
+ "▁oh",
361
+ "▁bet",
362
+ "▁use",
363
+ "ome",
364
+ "ens",
365
+ "ys",
366
+ "▁bu",
367
+ "co",
368
+ "ory",
369
+ "ater",
370
+ "ild",
371
+ "ght",
372
+ "ial",
373
+ "▁day",
374
+ "ning",
375
+ "na",
376
+ "ile",
377
+ "▁spe",
378
+ "▁mar",
379
+ "ody",
380
+ "ough",
381
+ "ade",
382
+ "vers",
383
+ "xt",
384
+ "▁fl",
385
+ "▁ke",
386
+ "ian",
387
+ "▁sy",
388
+ "▁put",
389
+ "fore",
390
+ "ub",
391
+ "▁ph",
392
+ "fe",
393
+ "▁em",
394
+ "▁ser",
395
+ "form",
396
+ "ting",
397
+ "te",
398
+ "av",
399
+ "ious",
400
+ "▁rec",
401
+ "ks",
402
+ "▁gr",
403
+ "ces",
404
+ "wn",
405
+ "ors",
406
+ "▁jo",
407
+ "ents",
408
+ "▁des",
409
+ "▁try",
410
+ "▁equ",
411
+ "▁z",
412
+ "▁rem",
413
+ "▁str",
414
+ "self",
415
+ "▁bit",
416
+ "ph",
417
+ "ved",
418
+ "▁why",
419
+ "▁bas",
420
+ "▁hel",
421
+ "▁rel",
422
+ "ath",
423
+ "ject",
424
+ "ail",
425
+ "▁la",
426
+ "ual",
427
+ "▁god",
428
+ "▁nat",
429
+ "erm",
430
+ "day",
431
+ "▁id",
432
+ "ft",
433
+ "▁wr",
434
+ "▁min",
435
+ "ates",
436
+ "▁gen",
437
+ "tain",
438
+ "▁ob",
439
+ "ull",
440
+ "ict",
441
+ "▁tra",
442
+ "▁end",
443
+ "▁hig",
444
+ "▁fif",
445
+ "oth",
446
+ "tern",
447
+ "▁its",
448
+ "vent",
449
+ "▁sm",
450
+ "ons",
451
+ "▁add",
452
+ "iss",
453
+ "▁bel",
454
+ "ful",
455
+ "get",
456
+ "▁ele",
457
+ "▁rep",
458
+ "ak",
459
+ "▁ho",
460
+ "▁pos",
461
+ "▁num",
462
+ "ange",
463
+ "ves",
464
+ "ific",
465
+ "urn",
466
+ "ise",
467
+ "▁cr",
468
+ "▁um",
469
+ "ward",
470
+ "▁reg",
471
+ "ady",
472
+ "ower",
473
+ "uc",
474
+ "▁dec",
475
+ "lic",
476
+ "▁set",
477
+ "▁gon",
478
+ "▁op",
479
+ "▁ear",
480
+ "▁sub",
481
+ "▁sl",
482
+ "les",
483
+ "stem",
484
+ "cial",
485
+ "olog",
486
+ "atch",
487
+ "ily",
488
+ "body",
489
+ "nds",
490
+ "ular",
491
+ "ren",
492
+ "▁own",
493
+ "▁too",
494
+ "cent",
495
+ "ible",
496
+ "pect",
497
+ "ered",
498
+ "ways",
499
+ "teen",
500
+ "▁uh",
501
+ "▁big",
502
+ "▁mod",
503
+ "▁att",
504
+ "▁car",
505
+ "gr",
506
+ "▁acc",
507
+ "ied",
508
+ "mun",
509
+ "ib",
510
+ "▁mon",
511
+ "▁sch",
512
+ "▁pol",
513
+ "▁dat",
514
+ "▁fin",
515
+ "▁sim",
516
+ "▁inv",
517
+ "▁def",
518
+ "ked",
519
+ "▁ent",
520
+ "▁yes",
521
+ "ows",
522
+ "ics",
523
+ "ited",
524
+ "ute",
525
+ "ism",
526
+ "ps",
527
+ "▁ed",
528
+ "▁el",
529
+ "ably",
530
+ "ppen",
531
+ "als",
532
+ "▁ten",
533
+ "ract",
534
+ "ss",
535
+ "▁ass",
536
+ "▁met",
537
+ "gan",
538
+ "▁eng",
539
+ "▁stu",
540
+ "ween",
541
+ "arch",
542
+ "▁gl",
543
+ "▁cor",
544
+ "▁dr",
545
+ "vern",
546
+ "▁ty",
547
+ "▁run",
548
+ "hip",
549
+ "cus",
550
+ "cond",
551
+ "▁ins",
552
+ "irty",
553
+ "▁pub",
554
+ "lud",
555
+ "llow",
556
+ "▁cou",
557
+ "ew",
558
+ "iew",
559
+ "▁sur",
560
+ "ero",
561
+ "ood",
562
+ "ness",
563
+ "▁fun",
564
+ "▁eff",
565
+ "cept",
566
+ "▁ca",
567
+ "▁exp",
568
+ "duct",
569
+ "▁sw",
570
+ "ize",
571
+ "ope",
572
+ "▁par",
573
+ "kes",
574
+ "cy",
575
+ "▁ev",
576
+ "▁ref",
577
+ "ell",
578
+ "▁bus",
579
+ "ug",
580
+ "rib",
581
+ "▁cur",
582
+ "mo",
583
+ "ock",
584
+ "ures",
585
+ "air",
586
+ "▁war",
587
+ "str",
588
+ "▁med",
589
+ "▁wa",
590
+ "▁val",
591
+ "▁sin",
592
+ "blem",
593
+ "▁fam",
594
+ "li",
595
+ "▁far",
596
+ "▁cle",
597
+ "▁col",
598
+ "mon",
599
+ "▁gra",
600
+ "led",
601
+ "ense",
602
+ "tin",
603
+ "ues",
604
+ "its",
605
+ "▁mem",
606
+ "▁inf",
607
+ "▁eas",
608
+ "ideo",
609
+ "▁top",
610
+ "io",
611
+ "pan",
612
+ "▁hum",
613
+ "▁old",
614
+ "ead",
615
+ "▁ord",
616
+ "ric",
617
+ "ants",
618
+ "oy",
619
+ "esn",
620
+ "uck",
621
+ "ason",
622
+ "ced",
623
+ "ool",
624
+ "rat",
625
+ "ouse",
626
+ "▁lar",
627
+ "▁art",
628
+ "▁wee",
629
+ "▁cer",
630
+ "ized",
631
+ "▁mat",
632
+ "con",
633
+ "erg",
634
+ "land",
635
+ "ines",
636
+ "▁chr",
637
+ "▁aut",
638
+ "▁lea",
639
+ "▁sou",
640
+ "oney",
641
+ "tty",
642
+ "▁ple",
643
+ "ulat",
644
+ "oks",
645
+ "▁few",
646
+ "▁sol",
647
+ "▁che",
648
+ "chn",
649
+ "ird",
650
+ "▁bre",
651
+ "▁dur",
652
+ "▁wom",
653
+ "me",
654
+ "izat",
655
+ "eric",
656
+ "ote",
657
+ "▁uni",
658
+ "eren",
659
+ "arn",
660
+ "ross",
661
+ "ices",
662
+ "ten",
663
+ "eral",
664
+ "ever",
665
+ "ieve",
666
+ "lish",
667
+ "ash",
668
+ "▁opp",
669
+ "alth",
670
+ "ger",
671
+ "▁sk",
672
+ "▁red",
673
+ "peri",
674
+ "▁det",
675
+ "▁ext",
676
+ "ner",
677
+ "ah",
678
+ "▁var",
679
+ "▁loc",
680
+ "gram",
681
+ "ists",
682
+ "ives",
683
+ "▁es",
684
+ "▁nor",
685
+ "tro",
686
+ "ale",
687
+ "▁iss",
688
+ "▁pri",
689
+ "gin",
690
+ "az",
691
+ "oc",
692
+ "▁pop",
693
+ "ern",
694
+ "▁sit",
695
+ "ket",
696
+ "▁pa",
697
+ "▁law",
698
+ "ages",
699
+ "br",
700
+ "▁cam",
701
+ "▁mom",
702
+ "osed",
703
+ "▁bro",
704
+ "ne",
705
+ "bs",
706
+ "▁cre",
707
+ "erat",
708
+ "▁sec",
709
+ "▁cap",
710
+ "▁vis",
711
+ "▁pat",
712
+ "ield",
713
+ "iet",
714
+ "▁tri",
715
+ "up",
716
+ "▁bra",
717
+ "ts",
718
+ "▁mot",
719
+ "▁unt",
720
+ "put",
721
+ "bo",
722
+ "ork",
723
+ "mer",
724
+ "ital",
725
+ "▁air",
726
+ "ined",
727
+ "▁beh",
728
+ "▁adv",
729
+ "▁ret",
730
+ "imes",
731
+ "▁tea",
732
+ "ural",
733
+ "sid",
734
+ "ters",
735
+ "▁pur",
736
+ "▁sci",
737
+ "bers",
738
+ "ient",
739
+ "ier",
740
+ "cc",
741
+ "sw",
742
+ "▁av",
743
+ "reen",
744
+ "ode",
745
+ "ont",
746
+ "▁dra",
747
+ "ann",
748
+ "nect",
749
+ "▁x",
750
+ "▁eu",
751
+ "ton",
752
+ "inat",
753
+ "ene",
754
+ "ared",
755
+ "els",
756
+ "▁mor",
757
+ "▁rat",
758
+ "cri",
759
+ "▁men",
760
+ "▁ah",
761
+ "ames",
762
+ "▁arm",
763
+ "eak",
764
+ "▁pay",
765
+ "▁hal",
766
+ "ins",
767
+ "ilit",
768
+ "stit",
769
+ "▁ra",
770
+ "▁leg",
771
+ "cl",
772
+ "pr",
773
+ "▁wal",
774
+ "▁bad",
775
+ "▁ge",
776
+ "roup",
777
+ "▁mus",
778
+ "man",
779
+ "▁gi",
780
+ "eds",
781
+ "▁aw",
782
+ "po",
783
+ "ark",
784
+ "row",
785
+ "▁dep",
786
+ "ully",
787
+ "ral",
788
+ "lect",
789
+ "pend",
790
+ "▁sev",
791
+ "ime",
792
+ "gest",
793
+ "here",
794
+ "▁yet",
795
+ "ted",
796
+ "▁rev",
797
+ "ds",
798
+ "▁ask",
799
+ "less",
800
+ "▁di",
801
+ "ets",
802
+ "line",
803
+ "▁aff",
804
+ "ired",
805
+ "▁est",
806
+ "ken",
807
+ "vid",
808
+ "most",
809
+ "ivid",
810
+ "unch",
811
+ "par",
812
+ "med",
813
+ "rop",
814
+ "ased",
815
+ "eone",
816
+ "▁ve",
817
+ "▁abs",
818
+ "ergy",
819
+ "ret",
820
+ "▁saw",
821
+ "▁ey",
822
+ "▁cal",
823
+ "uat",
824
+ "▁mid",
825
+ "vat",
826
+ "ream",
827
+ "vice",
828
+ "ians",
829
+ "rent",
830
+ "ctor",
831
+ "err",
832
+ "ush",
833
+ "ases",
834
+ "▁suc",
835
+ "erms",
836
+ "ave",
837
+ "angu",
838
+ "ries",
839
+ "▁wo",
840
+ "arts",
841
+ "▁fil",
842
+ "▁fat",
843
+ "▁cho",
844
+ "orts",
845
+ "▁fre",
846
+ "ee",
847
+ "ught",
848
+ "eng",
849
+ "ump",
850
+ "▁bar",
851
+ "ying",
852
+ "ane",
853
+ "▁tem",
854
+ "anks",
855
+ "ury",
856
+ "iat",
857
+ "mit",
858
+ "trol",
859
+ "▁net",
860
+ "▁maj",
861
+ "▁cra",
862
+ "ling",
863
+ "▁fig",
864
+ "orn",
865
+ "icat",
866
+ "pany",
867
+ "▁occ",
868
+ "ott",
869
+ "ands",
870
+ "▁exc",
871
+ "▁mr",
872
+ "ency",
873
+ "rope",
874
+ "itch",
875
+ "▁lit",
876
+ "abil",
877
+ "not",
878
+ "ma",
879
+ "▁typ",
880
+ "▁opt",
881
+ "ob",
882
+ "ser",
883
+ "ety",
884
+ "ms",
885
+ "peci",
886
+ "aces",
887
+ "aut",
888
+ "▁hon",
889
+ "cuss",
890
+ "▁sal",
891
+ "▁sor",
892
+ "att",
893
+ "▁lab",
894
+ "▁har",
895
+ "urch",
896
+ "nded",
897
+ "uce",
898
+ "ids",
899
+ "▁hy",
900
+ "▁fut",
901
+ "▁ste",
902
+ "ours",
903
+ "ems",
904
+ "utes",
905
+ "ng",
906
+ "ta",
907
+ "▁won",
908
+ "▁fa",
909
+ "▁env",
910
+ "ards",
911
+ "▁job",
912
+ "ium",
913
+ "▁dot",
914
+ "▁obv",
915
+ "ina",
916
+ "side",
917
+ "elve",
918
+ "cu",
919
+ "▁jes",
920
+ "▁pot",
921
+ "▁pie",
922
+ "▁tre",
923
+ "▁hey",
924
+ "▁mag",
925
+ "ron",
926
+ "▁key",
927
+ "swer",
928
+ "▁win",
929
+ "ucat",
930
+ "work",
931
+ "ides",
932
+ "▁low",
933
+ "▁vol",
934
+ "▁oth",
935
+ "atic",
936
+ "lf",
937
+ "ads",
938
+ "inds",
939
+ "com",
940
+ "ths",
941
+ "▁ver",
942
+ "ised",
943
+ "lo",
944
+ "▁squ",
945
+ "▁cut",
946
+ "oked",
947
+ "irit",
948
+ "ateg",
949
+ "ppy",
950
+ "mitt",
951
+ "come",
952
+ "hn",
953
+ "igin",
954
+ "mand",
955
+ "▁dam",
956
+ "ho",
957
+ "▁da",
958
+ "▁fur",
959
+ "iron",
960
+ "ilar",
961
+ "▁fac",
962
+ "▁neg",
963
+ "▁ago",
964
+ "ged",
965
+ "miss",
966
+ "enth",
967
+ "▁dou",
968
+ "▁hit",
969
+ "▁guy",
970
+ "▁bi",
971
+ "ove",
972
+ "fess",
973
+ "ples",
974
+ "owed",
975
+ "ured",
976
+ "▁ris",
977
+ "ints",
978
+ "rew",
979
+ "▁sum",
980
+ "▁hu",
981
+ "ploy",
982
+ "ude",
983
+ "ried",
984
+ "▁cir",
985
+ "▁dev",
986
+ "ear",
987
+ "▁tot",
988
+ "▁ann",
989
+ "duc",
990
+ "ik",
991
+ "pon",
992
+ "sted",
993
+ "▁ide",
994
+ "▁'",
995
+ "ipp",
996
+ "▁eat",
997
+ "▁dom",
998
+ "▁",
999
+ "e",
1000
+ "t",
1001
+ "o",
1002
+ "a",
1003
+ "i",
1004
+ "n",
1005
+ "s",
1006
+ "r",
1007
+ "h",
1008
+ "l",
1009
+ "d",
1010
+ "u",
1011
+ "c",
1012
+ "m",
1013
+ "y",
1014
+ "g",
1015
+ "w",
1016
+ "f",
1017
+ "p",
1018
+ "b",
1019
+ "v",
1020
+ "k",
1021
+ "'",
1022
+ "j",
1023
+ "x",
1024
+ "q",
1025
+ "z",
1026
+ "<EOU>",
1027
+ "<EOB>"
1028
+ ]