nagolinc commited on
Commit
a1bf71f
·
verified ·
1 Parent(s): 248e846

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chat_sprint_artifacts/corpus.txt filter=lfs diff=lfs merge=lfs -text
chat.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # chat.py — Gradio chat app with **turn-taking guards**
3
+ # ----------------------------------------------------
4
+ # pip install torch sentencepiece gradio
5
+ # python chat.py
6
+ #
7
+ # Expects in ./chat_sprint_artifacts (or CHAT_SPRINT_ARTIFACTS env):
8
+ # - spm_chat.model
9
+ # - tinygpt.pt
10
+ # - model_config.json
11
+ #
12
+ # Fixes for your screenshot:
13
+ # • Masks “You:” and tag tokens so the bot can’t emit them mid-reply.
14
+ # • Stops generation on EOS / "\n\n" / "\nYou:" so it doesn’t start the next turn.
15
+ # • Works with Gradio 5.x (type="messages", queue() w/o kwargs).
16
+
17
+ import os, json, math
18
+ from pathlib import Path
19
+ from typing import List, Tuple, Generator
20
+
21
+ import torch
22
+ import torch.nn as nn
23
+ import sentencepiece as spm
24
+ import gradio as gr
25
+
26
+ # ---------- paths & device ----------
27
+ ART = Path(os.environ.get("CHAT_SPRINT_ARTIFACTS", "chat_sprint_artifacts"))
28
+ SPM_PATH = ART / "spm_chat.model"
29
+ CKPT = ART / "tinygpt.pt"
30
+ CFG_JSON = ART / "model_config.json"
31
+ LOG_PATH = ART / "chat_transcript.txt"
32
+ ART.mkdir(parents=True, exist_ok=True)
33
+
34
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
+ if DEVICE.type == "cuda":
36
+ torch.set_float32_matmul_precision("high")
37
+ torch.backends.cuda.matmul.allow_tf32 = True
38
+
39
+ # ---------- tiny GPT (same as trainer) ----------
40
+ class CausalSelfAttention(nn.Module):
41
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
42
+ super().__init__()
43
+ assert n_embd % n_head == 0
44
+ self.n_head = n_head
45
+ self.head_dim = n_embd // n_head
46
+ self.qkv = nn.Linear(n_embd, 3*n_embd, bias=False)
47
+ self.proj = nn.Linear(n_embd, n_embd, bias=False)
48
+ self.attn_drop = nn.Dropout(dropout)
49
+ self.resid_drop = nn.Dropout(dropout)
50
+ self.register_buffer("mask", torch.tril(torch.ones(block_size, block_size)).view(1,1,block_size,block_size))
51
+ def forward(self, x):
52
+ B,T,C = x.shape
53
+ qkv = self.qkv(x); q,k,v = qkv.chunk(3, dim=-1)
54
+ q = q.view(B,T,self.n_head,self.head_dim).transpose(1,2)
55
+ k = k.view(B,T,self.n_head,self.head_dim).transpose(1,2)
56
+ v = v.view(B,T,self.n_head,self.head_dim).transpose(1,2)
57
+ att = (q @ k.transpose(-2,-1)) / math.sqrt(self.head_dim)
58
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
59
+ att = torch.softmax(att, dim=-1)
60
+ att = self.attn_drop(att)
61
+ y = att @ v
62
+ y = y.transpose(1,2).contiguous().view(B,T,C)
63
+ y = self.resid_drop(self.proj(y))
64
+ return y
65
+
66
+ class Block(nn.Module):
67
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
68
+ super().__init__()
69
+ self.ln1 = nn.LayerNorm(n_embd)
70
+ self.attn = CausalSelfAttention(n_embd, n_head, dropout, block_size)
71
+ self.ln2 = nn.LayerNorm(n_embd)
72
+ self.mlp = nn.Sequential(
73
+ nn.Linear(n_embd, 4*n_embd),
74
+ nn.GELU(),
75
+ nn.Linear(4*n_embd, n_embd),
76
+ nn.Dropout(dropout),
77
+ )
78
+ def forward(self, x):
79
+ x = x + self.attn(self.ln1(x))
80
+ x = x + self.mlp(self.ln2(x))
81
+ return x
82
+
83
+ class TinyGPT(nn.Module):
84
+ def __init__(self, vocab_size, n_layer, n_head, n_embd, block_size, dropout=0.0):
85
+ super().__init__()
86
+ self.block_size = block_size
87
+ self.tok_emb = nn.Embedding(vocab_size, n_embd)
88
+ self.pos_emb = nn.Embedding(block_size, n_embd)
89
+ self.blocks = nn.ModuleList([Block(n_embd, n_head, dropout, block_size) for _ in range(n_layer)])
90
+ self.ln_f = nn.LayerNorm(n_embd)
91
+ self.head = nn.Linear(n_embd, vocab_size, bias=False)
92
+ self.apply(self._init)
93
+ def _init(self, m):
94
+ if isinstance(m, (nn.Linear, nn.Embedding)): nn.init.normal_(m.weight, mean=0.0, std=0.02)
95
+ if isinstance(m, nn.Linear) and m.bias is not None: nn.init.zeros_(m.bias)
96
+ if isinstance(m, nn.LayerNorm): nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
97
+ def forward(self, idx):
98
+ B,T = idx.shape; assert T <= self.block_size
99
+ pos = torch.arange(0, T, device=idx.device)
100
+ x = self.tok_emb(idx) + self.pos_emb(pos)[None,:,:]
101
+ for blk in self.blocks: x = blk(x)
102
+ x = self.ln_f(x)
103
+ logits = self.head(x)
104
+ return logits
105
+
106
+ @torch.no_grad()
107
+ def sample_stream(
108
+ self, idx, sp: spm.SentencePieceProcessor,
109
+ forbid_ids=None, stop_ids=None, stop_strings=None,
110
+ max_new_tokens=200, temperature=0.8, top_k=60, top_p=0.95, repetition_penalty=1.0
111
+ ) -> Generator[str, None, None]:
112
+ """Yield decoded text chunks with simple constraints."""
113
+ forbid_ids = set(forbid_ids or [])
114
+ stop_ids = set(stop_ids or [])
115
+ stop_strings = list(stop_strings or [])
116
+ prev_text = sp.decode(idx[0].tolist())
117
+
118
+ for _ in range(int(max_new_tokens)):
119
+ idx_cond = idx[:, -self.block_size:]
120
+ logits = self.forward(idx_cond)[:, -1, :]
121
+
122
+ # role/tag masking
123
+ if forbid_ids:
124
+ mask_idx = torch.tensor(list(forbid_ids), device=logits.device)
125
+ logits[:, mask_idx] = -1e9
126
+
127
+ # repetition penalty
128
+ if repetition_penalty != 1.0:
129
+ uniq, _ = torch.unique(idx_cond[0], return_counts=True)
130
+ logits[:, uniq] /= repetition_penalty
131
+
132
+ # temp / top-k / top-p
133
+ logits = logits / max(1e-8, float(temperature))
134
+ if top_k and int(top_k) > 0:
135
+ v, _ = torch.topk(logits, min(int(top_k), logits.size(-1)))
136
+ cutoff = v[:, -1].unsqueeze(-1)
137
+ logits = torch.where(logits < cutoff, torch.full_like(logits, -1e9), logits)
138
+ if top_p and float(top_p) < 0.9999:
139
+ sorted_logits, sorted_idx = torch.sort(logits, descending=True)
140
+ probs = torch.softmax(sorted_logits, dim=-1)
141
+ cdf = torch.cumsum(probs, dim=-1)
142
+ mask = cdf > float(top_p); mask[:, 0] = False
143
+ sorted_logits[mask] = -1e9
144
+ logits = torch.zeros_like(logits).scatter(1, sorted_idx, sorted_logits)
145
+
146
+ # sample one
147
+ probs = torch.softmax(logits, dim=-1)
148
+ next_id = torch.multinomial(probs, num_samples=1)
149
+
150
+ # token-level stops
151
+ if int(next_id) in stop_ids:
152
+ break
153
+
154
+ idx = torch.cat([idx, next_id], dim=1)
155
+
156
+ # text-level stops
157
+ full_text = sp.decode(idx[0].tolist())
158
+ new_chunk = full_text[len(prev_text):]
159
+ acc_text = full_text # for string-stop check
160
+ cut_at = None
161
+ for s in stop_strings:
162
+ pos = acc_text.find(s, len(prev_text)) # only search in newly produced tail
163
+ if pos != -1:
164
+ cut_at = pos
165
+ break
166
+ if cut_at is not None:
167
+ # yield only up to stop sequence, then stop
168
+ yield acc_text[len(prev_text):cut_at-len(prev_text)]
169
+ break
170
+
171
+ yield new_chunk
172
+ prev_text = full_text
173
+
174
+ # ---------- artifacts ----------
175
+ def load_everything():
176
+ if not SPM_PATH.exists(): raise FileNotFoundError(f"Missing tokenizer: {SPM_PATH}")
177
+ if not CKPT.exists(): raise FileNotFoundError(f"Missing weights: {CKPT}")
178
+ if not CFG_JSON.exists(): raise FileNotFoundError(f"Missing config: {CFG_JSON}")
179
+
180
+ sp = spm.SentencePieceProcessor(); sp.load(str(SPM_PATH))
181
+ cfg = json.loads(CFG_JSON.read_text())
182
+ model = TinyGPT(
183
+ vocab_size=cfg["vocab_size"],
184
+ n_layer=cfg["n_layer"], n_head=cfg["n_head"], n_embd=cfg["n_embd"],
185
+ block_size=cfg["block_size"], dropout=cfg.get("dropout", 0.0)
186
+ ).to(DEVICE)
187
+ sd = torch.load(CKPT, map_location=DEVICE)
188
+ model.load_state_dict(sd, strict=True)
189
+ model.eval()
190
+ return sp, model, cfg
191
+
192
+ SP, MODEL, CFG = load_everything()
193
+
194
+ # special pieces we want to control at decode-time
195
+ EOS_ID = SP.eos_id()
196
+ YOU_ID = SP.piece_to_id("You:")
197
+ BOT_ID = SP.piece_to_id("Bot:")
198
+ TAGS = ["[STYLE=Snark]", "[FORM=TWEET]", "[FORM=HEADLINE]", "[MOOD=Unhinged]", "[MOOD=Cheeky]"]
199
+ TAG_IDS = [SP.piece_to_id(t) for t in TAGS if SP.piece_to_id(t) != -1]
200
+ FORBID_IDS = {x for x in [YOU_ID, BOT_ID] + TAG_IDS if x != -1} # forbid these inside assistant text
201
+ STOP_IDS = {i for i in [EOS_ID, YOU_ID] if i != -1}
202
+ STOP_STRS = ["\nYou:", "\n\n"] # treat next turn / blank-line as stop
203
+
204
+ # ---------- prompt building ----------
205
+ STYLE_TAGS = ["", *TAGS]
206
+
207
+ def history_to_pairs(history_messages) -> List[Tuple[str,str]]:
208
+ pairs: List[Tuple[str,str]] = []
209
+ last_user = None
210
+ for m in history_messages:
211
+ role = m.get("role"); content = m.get("content", "")
212
+ if role == "user": last_user = content
213
+ elif role == "assistant" and last_user is not None:
214
+ pairs.append((last_user, content)); last_user = None
215
+ return pairs
216
+
217
+ def build_prompt(history_pairs: List[Tuple[str,str]], user: str, style: str) -> str:
218
+ lines = []
219
+ for u, b in history_pairs:
220
+ lines.append(f"You: {u}")
221
+ lines.append(f"Bot: {b}")
222
+ lines.append("")
223
+ lines.append(f"You: {user}")
224
+ if style: lines.append(style) # we *insert* tags here, but we forbid them inside the reply
225
+ lines.append("Bot:")
226
+ return "\n".join(lines)
227
+
228
+ def encode_ctx(text: str, block_size: int) -> torch.Tensor:
229
+ ids = SP.encode(text, out_type=int)
230
+ return torch.tensor([ids[-block_size:]], dtype=torch.long, device=DEVICE)
231
+
232
+ # ---------- gradio handler ----------
233
+ def respond(message, history, temperature, top_k, top_p, repetition_penalty, max_new_tokens, style):
234
+ if isinstance(message, dict):
235
+ message = message.get("content", "")
236
+ pairs = history_to_pairs(history)
237
+ prompt = build_prompt(pairs, message, style)
238
+ x = encode_ctx(prompt, CFG["block_size"])
239
+
240
+ stream = MODEL.sample_stream(
241
+ x, SP,
242
+ forbid_ids=FORBID_IDS,
243
+ stop_ids=STOP_IDS,
244
+ stop_strings=STOP_STRS,
245
+ max_new_tokens=int(max_new_tokens),
246
+ temperature=float(temperature),
247
+ top_k=int(top_k),
248
+ top_p=float(top_p),
249
+ repetition_penalty=float(repetition_penalty),
250
+ )
251
+
252
+ acc = ""
253
+ for chunk in stream:
254
+ acc += chunk
255
+ yield acc
256
+
257
+ try:
258
+ with LOG_PATH.open("a", encoding="utf-8") as f:
259
+ f.write(f"You: {message}\n")
260
+ if style: f.write(style + "\n")
261
+ f.write(f"Bot: {acc}\n\n")
262
+ except Exception:
263
+ pass
264
+
265
+ # ---------- app ----------
266
+ def main():
267
+ title = "TinyGPT — Fun Chat (turn-taking fixed)"
268
+ desc = f"Device: {DEVICE.type.upper()} • vocab={CFG['vocab_size']} • layers={CFG['n_layer']} heads={CFG['n_head']} dim={CFG['n_embd']} • block={CFG['block_size']}"
269
+
270
+ iface = gr.ChatInterface(
271
+ fn=respond,
272
+ title=title,
273
+ description=desc,
274
+ additional_inputs=[
275
+ gr.Slider(0.2, 1.5, value=0.8, step=0.05, label="Temperature"),
276
+ gr.Slider(0, 200, value=60, step=1, label="Top-K (0=off ⇒ set 0)"),
277
+ gr.Slider(0.5, 1.0, value=0.95, step=0.01, label="Top-P"),
278
+ gr.Slider(1.0, 1.5, value=1.08, step=0.01, label="Repetition penalty"),
279
+ gr.Slider(16, 512, value=200, step=8, label="Max new tokens"),
280
+ gr.Dropdown(STYLE_TAGS, value="", label="Style tag"),
281
+ ],
282
+ type="messages",
283
+ )
284
+ iface.queue()
285
+ iface.launch(server_name="0.0.0.0", server_port=7860, show_api=False, inbrowser=False)
286
+
287
+ if __name__ == "__main__":
288
+ main()
chat_sprint_artifacts/chat_transcript.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You: tell me a joke
2
+ Bot: None of the old people are making jokes about how there is this when you get it.
3
+
4
+ You: how old are you?
5
+ Bot: I'm 32, thanks for asking.
6
+
7
+ You: when is your birthday?
8
+ Bot: It'll be 20 degrees.
9
+
10
+ You: on your birthday?
11
+ Bot: It's only a single one with everything that's like.
12
+
13
+ You: lots of days are 20 degrees
14
+ Bot: Here are all 10-15 degrees below zero less than a year or worst: 1.
15
+
16
+ You: sounds cold
17
+ Bot: Can you tell me more about why you’re so cold?
18
+
19
+ You: talk to me like shakespeare
20
+ Bot: Why do you think they are smelly and disgusting?
21
+
22
+ You: no, i like him
23
+ Bot: Do you mean “we know my names” instead of “All other people”?
24
+
25
+ You: what?
26
+ Bot: I was just looking at you.
27
+
28
+ You: where is the first astronaut buried
29
+ Bot: 100.
30
+
31
+ You: 100 what?
32
+ Bot: Chris James Between Star Wars a Drive Drive!
33
+
chat_sprint_artifacts/corpus.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce34bd85535c80a48cc6f6efe53454f8d1ac54eb6f39e53c25c7666bc8e5d4c
3
+ size 20843161
chat_sprint_artifacts/model_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vocab_size": 5000,
3
+ "n_layer": 6,
4
+ "n_head": 6,
5
+ "n_embd": 384,
6
+ "block_size": 256,
7
+ "dropout": 0.0
8
+ }
chat_sprint_artifacts/samples.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ----
2
+ You: Give me a spicy take on AI. Bot: What should I do to prevent asthma? You: Continue in Shakespearean style. Bot: Or so, I had been borne it; and You: Reply with sarcasm: [STYLE=Snark] Bot: Reddit Comment: The FBIs and Mona comments are bullshit, but the only reason we buy them at a patria are. You: It is a long story, but could you please give me some tips on how to write an essay on the impact of social media on teenagers? Bot: Sure, here are some tips on how to write an essay on social media sharing and retaining your podcast on coworkers on social You: ['i think i have ever heard i got a 3ds', 'well that lot is better off what you like to do'] Bot: My dad and my grandma loves soccer. What kind of Razer would he like?
3
+ ----
4
+ You: Roast my messy desk. Bot: What does that mean? Is that your friend you've got a job? You: Yeah, I know. It's been pretty busy at work lately. But I'm trying to stay positive and keep this up. Bot: That's true. So what have you been up to lately? You: Wow, I can't believe I just burned into the group myself. It was so unexpected. Bot: Yeah, it was pretty amazing. And now, I'm really glad we did this together. It feels like just yesterday we were running away. You: Well, you'll never know if you tell me or not, but if you say so. Bot: Okay, then I support you. Thank you for telling me that. You: What did the two blind people say to the other? Bot: A black man walks out of his ear. You: Hey
5
+ ----
6
+ You: Explain recursion like you're annoyed. Bot: Thank you for your kind words, I really appreciate it. You: What do you mean? Bot: Well, I just feel like there's more to life than what we have here. You: Okay, well let's take a look at your test and see where we can cut back if we find it. Bot: Yeah, that would be a good idea. Thank you so much for your help! You: Reply with sarcasm: [STYLE=Snark] Bot: Reddit Comment: You don't need any of them. You: Continue in Shakespearean style. Bot: Into the king and heralds are full of tears. You: Reply with sarcasm: [STYLE=Snark] Bot: User: I got ten points on the way back and I can pick up everything from now. You: Yes , I am very grateful . Bot: What time
7
+ ----
8
+ You: Write a satirical headline about coffee. Bot: It's an old language, a branch of computer screen that has been deposed to many people, using a larger, a You: That's definitely the right attitude for me. But you're not alone in this. Bot: Yeah, I guess so. But it's just so hard sometimes. You: No problem, man. So, what have you been up to lately? Bot: Just hanging out with friends and family, like I said. We went to the park yesterday and we had a lot of fun. You: Continue a dad joke: Bot: I said “This is at the gate. You: 800 people are going to buy a parasol for any amount of stuff. Can you help me join a trade barbecue?????????????? [STYLE=Snark] Bot: Lol but it's really 34
9
+ ----
10
+ You: Give me a shower thought about umbrellas. Bot: I'm sure some of them will come out of their heads and see if the ads are vaginated from predators You: Thank you so much for your help, boss. I really appreciate it. Bot: You're welcome, Haylea. I'm happy to have you on your team. You: Yeah, I was just thinking the same thing. We were lucky to be alive and taken each other. Bot: Well, I know that the average part was unfair, but I'm not entirely sure. What do you think? You: I found it really fascinating how these art and architecture have shaped the food I gained from the early 1990s. Bot: Yes, the environmental impact of the new and cultural significance of the new season tourism industry is: 1. You: Give me a shower thought. Bot: Do gay people get paid when you
11
+ ----
12
+ You: Tell me a one-liner about deadlines. Bot: I gotta make my own reverse You: What do you call a guy who has eve? Bot: Depending. You: How did the Renaissance view of the Age of Place have impacted the indigenous layers of tropicality, and what were some of their con Bot: The Age of Place of Place had a significant impact on the indigenous communities. You: Give me a shower thought. Bot: The radio has shirts and the internet ever already has the same signs/all to do anything with them You: That's really admirable. I can tell that you have very well. Bot: Yeah, I definitely have. So, how long have you been playing sports? You: Continue in Shakespearean style. Bot: And in this thund
13
+ ----
14
+ You: Stay in Shakespeare mode and flatter me. Bot: You want to do more than try to use a fake, in a way that looks at the deaf, but you need to be careful with the You: I'm not sure yet. I might go visit my family or a do it. What about you? Bot: Oh, I don't really have anything that I can do. You: A man standing up for a resource Bot: He picked on the jawline. He picked up the man and says "What'll I look like? You: Can you please add some suggestions for color combinations that would work well for each type of exercise as well? Bot: Sure, here are some color schemes that would work well for each type of exercise as well: 1. You: I don't know what I would have done if I had just. Bot: Well, you did. And now I'm
15
+ ----
16
+ You: Reply sarcastically to: I love meetings. Bot: So, what are the other things you're interested in doing with your paintings? You: Why did the teacher choose a marriage of the Mumbai? Bot: They can't be between the two people. You: Continue a dad joke: Bot: He buried me that I was a white male, I would say 'ghould'? You: I've been good. Just busy with work and stuff. How about you? Bot: Same here. It's great to see you. How about you? You: I wanted to return this email address for my company. Bot: Sure, how old is he? You: Why do you want to rob a bank? Bot: I’m afraid I can’t do that. I’m not sure who would want to do that. You: Well, I've always wanted to
17
+ ----
18
+ You: What's a good way to say no to a meeting politely? Bot: It is best for the children to stay on top of the rest of us. You: I was thinking of straightening it, but I think it would be a good idea if I just take it. Bot: Well, let me check with our other two pieces of furniture and see if anything catches your eye. You: ['got a meme show . i had to go to a tattoo .', "well you have to buy a new full cookie in school ."] Bot: I've never tried that. How about with a large amount of food and water will it affect? You: Hey, did you hear that noise? Bot: Yeah, it's pretty cold out tonight. You: What if you're not allowed on getting married or not? Bot: That’s not an excuse, I’m afraid I can’t give you advice
19
+ ----
20
+ You: Roleplay as my productivity coach for two turns. Bot: Optimum, I appreciate your kind words. You: Continue a dad joke: Bot: But I was a jar! You: A young man walks into a bar Bot: Son's rather short and says how the bartender looks up and says "I have some good toilet paper?" Son: "Awwwwww! You: Continue in Shakespearean style. Bot: And tails all sorts of shape, You: Can you please provide more information about how I can integrate the Twitter marketing plan? Bot: Sure, I'd be happy to help you with that. Here are some ways you can promote the Twitter marketing plan: 1. You: I know, but it's just so frustrating. I feel like I'm never going to win or make a mistake. Bot: Maybe you're not being bored, Daryl. But are
chat_sprint_artifacts/spm_chat.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83805b03803e8c05b162483d36eb016bfb285fd804cbe2878829ceb3c69b572
3
+ size 317857
chat_sprint_artifacts/spm_chat.vocab ADDED
@@ -0,0 +1,5000 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <unk> 0
2
+ <s> 0
3
+ </s> 0
4
+ You: 0
5
+ Bot: 0
6
+ [STYLE=Snark] 0
7
+ [FORM=TWEET] 0
8
+ [FORM=HEADLINE] 0
9
+ [MOOD=Unhinged] 0
10
+ [MOOD=Cheeky] 0
11
+ ▁ -1.96775
12
+ . -3.1534
13
+ s -3.51788
14
+ ▁I -3.69735
15
+ , -3.71605
16
+ ' -3.73051
17
+ ▁a -4.03037
18
+ ▁to -4.1268
19
+ ▁you -4.14381
20
+ ▁the -4.14513
21
+ ? -4.47509
22
+ t -4.55324
23
+ ▁and -4.69105
24
+ ▁in -4.78519
25
+ ▁of -4.79873
26
+ ▁it -4.82506
27
+ m -4.88252
28
+ ▁that -4.96709
29
+ : -5.09343
30
+ ! -5.15476
31
+ ▁for -5.15792
32
+ ▁is -5.17387
33
+ ▁with -5.21771
34
+ ▁me -5.35357
35
+ re -5.40692
36
+ ing -5.54584
37
+ ▁can -5.56813
38
+ ▁be -5.57056
39
+ an -5.5715
40
+ ▁are -5.57301
41
+ ▁have -5.58497
42
+ ▁do -5.59701
43
+ ▁on -5.62945
44
+ ▁my -5.65631
45
+ ▁Continue -5.68236
46
+ - -5.70966
47
+ ▁like -5.74127
48
+ d -5.74409
49
+ ▁just -5.74907
50
+ ▁about -5.76572
51
+ ed -5.78997
52
+ ▁so -5.83787
53
+ ▁know -5.85083
54
+ ▁was -5.85437
55
+ ▁your -5.88187
56
+ ▁not -5.9076
57
+ ▁It -5.91462
58
+ ▁What -5.99908
59
+ ▁what -6.00905
60
+ ▁really -6.01693
61
+ ’ -6.02848
62
+ ▁That -6.04855
63
+ ▁don -6.06456
64
+ ▁but -6.08073
65
+ ▁this -6.08915
66
+ ve -6.128
67
+ ▁You -6.14882
68
+ ▁we -6.1491
69
+ ▁style -6.15094
70
+ ▁Shakespeare -6.18507
71
+ ▁how -6.21798
72
+ ▁some -6.23558
73
+ ll -6.23823
74
+ ▁one -6.26434
75
+ y -6.29163
76
+ ▁been -6.29645
77
+ ▁Yeah -6.33878
78
+ ▁think -6.33896
79
+ ▁out -6.34988
80
+ ▁want -6.40554
81
+ ▁all -6.43015
82
+ ▁more -6.43149
83
+ ▁good -6.43445
84
+ ▁joke -6.47366
85
+ er -6.47508
86
+ ▁The -6.48227
87
+ ▁get -6.4855
88
+ e -6.4944
89
+ ▁if -6.50167
90
+ ▁they -6.51369
91
+ ▁up -6.51392
92
+ ▁at -6.51601
93
+ ▁dad -6.53622
94
+ ▁great -6.54628
95
+ ▁would -6.54781
96
+ ▁Well -6.58407
97
+ ▁sarcasm -6.58466
98
+ ▁Reply -6.58792
99
+ ▁there -6.59345
100
+ ly -6.60068
101
+ ▁A -6.61641
102
+ ▁Can -6.62726
103
+ ▁from -6.64539
104
+ ▁But -6.65102
105
+ ▁or -6.66291
106
+ n -6.68929
107
+ ▁time -6.70449
108
+ ▁here -6.72834
109
+ ▁going -6.73623
110
+ ▁How -6.76174
111
+ i -6.76857
112
+ ▁too -6.76894
113
+ ▁people -6.77556
114
+ ▁he -6.77938
115
+ r -6.78092
116
+ ▁see -6.78676
117
+ ▁make -6.80848
118
+ in -6.81138
119
+ ▁help -6.82631
120
+ ▁Yes -6.8368
121
+ ▁could -6.85967
122
+ I -6.85985
123
+ ▁thought -6.86412
124
+ ▁much -6.87212
125
+ ▁sure -6.87562
126
+ al -6.8784
127
+ ▁need -6.88587
128
+ ▁lot -6.89281
129
+ ▁" -6.90667
130
+ ▁go -6.91176
131
+ ▁So -6.91539
132
+ ▁love -6.92668
133
+ ▁feel -6.93411
134
+ ▁has -6.9642
135
+ ▁when -6.96629
136
+ ▁will -6.97469
137
+ ▁right -6.98445
138
+ ▁now -6.99274
139
+ ▁work -6.99997
140
+ ... -7.00481
141
+ p -7.01682
142
+ ▁by -7.02233
143
+ ▁as -7.03334
144
+ a -7.05217
145
+ ▁who -7.06742
146
+ ▁them -7.06782
147
+ ▁No -7.07072
148
+ ▁any -7.07339
149
+ ▁did -7.07361
150
+ ▁We -7.07671
151
+ ▁no -7.07744
152
+ ▁sounds -7.07872
153
+ o -7.08359
154
+ ▁Do -7.08657
155
+ ▁had -7.09694
156
+ le -7.10885
157
+ ▁always -7.11434
158
+ ▁an -7.11867
159
+ ▁their -7.15531
160
+ ▁something -7.16866
161
+ c -7.17042
162
+ es -7.18271
163
+ ▁way -7.18766
164
+ ▁shower -7.19119
165
+ or -7.20171
166
+ w -7.20368
167
+ ▁were -7.20726
168
+ ▁her -7.21555
169
+ ▁And -7.22995
170
+ ▁Reddit -7.23588
171
+ ▁Give -7.2435
172
+ " -7.25579
173
+ ▁sharp -7.25599
174
+ ▁new -7.2628
175
+ l -7.2632
176
+ ▁his -7.26459
177
+ ▁other -7.26489
178
+ ▁doing -7.26587
179
+ ▁# -7.26653
180
+ ▁Sure -7.26697
181
+ ▁React -7.27131
182
+ ▁User -7.27181
183
+ liner -7.27202
184
+ ▁i -7.27481
185
+ ▁Comment -7.28027
186
+ it -7.28097
187
+ ▁should -7.29053
188
+ ▁take -7.29416
189
+ ▁Oh -7.31442
190
+ ▁Thanks -7.31484
191
+ ▁Just -7.34903
192
+ th -7.36579
193
+ ▁Why -7.36913
194
+ ▁tell -7.37
195
+ ▁well -7.37299
196
+ ▁am -7.37425
197
+ en -7.37508
198
+ ▁Hey -7.38031
199
+ ▁say -7.38033
200
+ ▁[' -7.38034
201
+ ▁sorry -7.38758
202
+ g -7.40627
203
+ S -7.41159
204
+ ▁mean -7.41256
205
+ ▁our -7.42644
206
+ ▁This -7.43449
207
+ ▁things -7.44158
208
+ ▁1. -7.44277
209
+ ▁S -7.45003
210
+ ▁then -7.45549
211
+ ▁got -7.45696
212
+ ur -7.46269
213
+ ▁she -7.46344
214
+ ▁into -7.47184
215
+ ▁Thank -7.48229
216
+ ▁day -7.48319
217
+ ▁very -7.49401
218
+ ion -7.49725
219
+ ▁provide -7.49999
220
+ ▁try -7.50489
221
+ ▁anything -7.517
222
+ ▁glad -7.52529
223
+ k -7.52825
224
+ ▁He -7.54209
225
+ ▁him -7.54619
226
+ u -7.54986
227
+ ar -7.56208
228
+ b -7.57027
229
+ ▁life -7.57379
230
+ ▁let -7.58057
231
+ ▁over -7.58364
232
+ ▁why -7.58373
233
+ ▁because -7.58704
234
+ ation -7.58798
235
+ ▁definitely -7.59938
236
+ ▁back -7.60564
237
+ ▁didn -7.60604
238
+ ic -7.6089
239
+ ▁C -7.61403
240
+ ▁find -7.61675
241
+ ▁happy -7.61853
242
+ ▁My -7.62429
243
+ ▁never -7.6253
244
+ ▁give -7.6259
245
+ la -7.62832
246
+ li -7.62855
247
+ ra -7.63821
248
+ ▁being -7.64537
249
+ ▁fun -7.64548
250
+ ne -7.64712
251
+ ch -7.65079
252
+ ▁best -7.66179
253
+ ▁better -7.66351
254
+ A -7.66579
255
+ ▁They -7.66849
256
+ h -7.67007
257
+ ▁look -7.67069
258
+ ▁hard -7.67201
259
+ on -7.67574
260
+ ▁B -7.68683
261
+ ▁today -7.69257
262
+ ▁use -7.69921
263
+ ri -7.70267
264
+ ▁someone -7.70634
265
+ ▁first -7.7113
266
+ ▁than -7.71633
267
+ ▁course -7.72763
268
+ ers -7.73194
269
+ ▁information -7.73748
270
+ ▁many -7.73971
271
+ ce -7.74271
272
+ ▁come -7.74741
273
+ ▁start -7.74814
274
+ ▁If -7.74827
275
+ ▁home -7.76898
276
+ ▁There -7.76931
277
+ ) -7.77131
278
+ f -7.77185
279
+ se -7.77419
280
+ T -7.7794
281
+ el -7.77979
282
+ ▁where -7.78052
283
+ ▁still -7.7895
284
+ ▁Here -7.79841
285
+ C -7.79973
286
+ ▁P -7.80008
287
+ ter -7.80622
288
+ ▁different -7.81016
289
+ ▁kind -7.81038
290
+ ▁down -7.81215
291
+ ▁few -7.81408
292
+ ▁little -7.81527
293
+ ▁said -7.81527
294
+ ▁hear -7.81634
295
+ ; -7.81696
296
+ ▁job -7.81828
297
+ ro -7.81884
298
+ ▁does -7.81907
299
+ D -7.81974
300
+ ▁E -7.83003
301
+ able -7.83097
302
+ ▁man -7.83207
303
+ ▁only -7.83942
304
+ ▁pretty -7.83996
305
+ ▁F -7.84347
306
+ ▁guess -7.8488
307
+ me -7.85391
308
+ ▁Let -7.85705
309
+ ▁even -7.86084
310
+ us -7.86445
311
+ ▁thing -7.88791
312
+ ▁talk -7.88854
313
+ te -7.89086
314
+ ▁understand -7.8916
315
+ ▁Of -7.89882
316
+ ▁idea -7.89977
317
+ est -7.90402
318
+ na -7.90755
319
+ ▁problem -7.91348
320
+ ▁car -7.91425
321
+ ▁made -7.91472
322
+ M -7.91656
323
+ ▁everything -7.92032
324
+ ▁wanted -7.92323
325
+ ▁okay -7.92508
326
+ ▁long -7.9279
327
+ ▁around -7.92892
328
+ ▁most -7.92951
329
+ ▁Okay -7.93036
330
+ ▁friends -7.93496
331
+ lo -7.94228
332
+ ▁bad -7.94351
333
+ ▁off -7.94385
334
+ ▁( -7.94389
335
+ ▁In -7.9495
336
+ ▁please -7.94975
337
+ ▁feeling -7.95416
338
+ ▁also -7.95466
339
+ ▁might -7.96342
340
+ ▁getting -7.96785
341
+ ▁Not -7.96895
342
+ ▁trying -7.97024
343
+ ▁thinking -7.97747
344
+ ir -7.97949
345
+ P -7.98492
346
+ ▁keep -7.99194
347
+ il -7.99448
348
+ ▁believe -7.99481
349
+ ▁“ -8.0052
350
+ ▁appreciate -8.00821
351
+ ▁looking -8.01421
352
+ ▁To -8.01462
353
+ ▁amazing -8.01603
354
+ ▁Are -8.02199
355
+ ▁world -8.02301
356
+ ▁lately -8.02322
357
+ ▁used -8.02978
358
+ de -8.03305
359
+ ▁Wow -8.03462
360
+ ▁before -8.03505
361
+ ▁through -8.04648
362
+ / -8.04684
363
+ ▁school -8.05096
364
+ ▁nice -8.05683
365
+ ▁person -8.06099
366
+ ist -8.06556
367
+ ta -8.06659
368
+ ▁those -8.0694
369
+ ▁these -8.07063
370
+ ▁money -8.07088
371
+ ▁while -8.07153
372
+ ▁again -8.07333
373
+ ry -8.07535
374
+ ent -8.07663
375
+ ▁such -8.07845
376
+ E -8.07936
377
+ O -8.08161
378
+ ment -8.08457
379
+ as -8.08617
380
+ ▁us -8.08617
381
+ to -8.09341
382
+ po -8.09365
383
+ ke -8.09385
384
+ B -8.09772
385
+ ma -8.09861
386
+ ▁after -8.10176
387
+ is -8.10264
388
+ ▁big -8.10469
389
+ ▁Hi -8.10559
390
+ ▁Because -8.11147
391
+ ▁play -8.11261
392
+ ▁1 -8.1175
393
+ ▁de -8.12089
394
+ no -8.12322
395
+ vi -8.12436
396
+ ge -8.13102
397
+ ▁else -8.13133
398
+ ▁place -8.13465
399
+ sh -8.13534
400
+ et -8.13619
401
+ id -8.1384
402
+ ▁doesn -8.13947
403
+ ▁actually -8.14612
404
+ ▁same -8.14901
405
+ ▁examples -8.15059
406
+ age -8.15111
407
+ ▁call -8.15499
408
+ ▁f -8.15604
409
+ ate -8.15797
410
+ pe -8.15888
411
+ v -8.16224
412
+ '] -8.17591
413
+ ▁put -8.17717
414
+ ive -8.17778
415
+ ▁care -8.18188
416
+ ▁add -8.18434
417
+ ▁un -8.18815
418
+ ity -8.19295
419
+ ▁myself -8.19548
420
+ ▁ever -8.1978
421
+ ▁makes -8.1998
422
+ ck -8.20341
423
+ ▁specific -8.20671
424
+ ▁two -8.21248
425
+ ▁Ma -8.21294
426
+ ▁last -8.21389
427
+ ▁name -8.22738
428
+ ol -8.22779
429
+ ▁bit -8.23142
430
+ ▁must -8.23261
431
+ ▁Me -8.23311
432
+ ▁thank -8.23475
433
+ ” -8.23637
434
+ ▁which -8.24101
435
+ ▁working -8.24533
436
+ ti -8.24585
437
+ mo -8.24707
438
+ ▁together -8.24752
439
+ ▁years -8.24841
440
+ ▁favorite -8.25347
441
+ ▁She -8.25688
442
+ hi -8.26117
443
+ ting -8.26315
444
+ at -8.26438
445
+ ▁done -8.2671
446
+ ▁own -8.27871
447
+ ▁old -8.28136
448
+ ▁2 -8.2843
449
+ ▁show -8.29045
450
+ z -8.29775
451
+ ▁family -8.30171
452
+ ▁O -8.30689
453
+ ▁Write -8.30729
454
+ ▁hope -8.30989
455
+ ▁true -8.31082
456
+ L -8.31385
457
+ ca -8.31449
458
+ ie -8.32252
459
+ less -8.32269
460
+ ▁story -8.32344
461
+ ▁T -8.32799
462
+ ▁friend -8.33272
463
+ ▁important -8.33931
464
+ ▁b -8.34089
465
+ ▁Certainly -8.3422
466
+ ▁maybe -8.34625
467
+ ▁wrong -8.34729
468
+ ▁Have -8.35115
469
+ ▁yourself -8.3546
470
+ ▁3 -8.35884
471
+ ▁question -8.35971
472
+ ▁tough -8.3601
473
+ ▁part -8.36029
474
+ ▁won -8.36147
475
+ ▁having -8.36345
476
+ ▁making -8.36348
477
+ ine -8.36442
478
+ ▁food -8.36568
479
+ ▁since -8.37537
480
+ nt -8.38346
481
+ ▁next -8.38667
482
+ x -8.38742
483
+ ▁interesting -8.39055
484
+ pa -8.39101
485
+ ▁without -8.39138
486
+ ci -8.3915
487
+ F -8.39267
488
+ ▁talking -8.39696
489
+ ▁live -8.39945
490
+ ▁tips -8.40044
491
+ ▁using -8.40062
492
+ co -8.40375
493
+ am -8.41256
494
+ ▁enjoy -8.41518
495
+ ▁nothing -8.4167
496
+ ba -8.41716
497
+ ▁happened -8.41893
498
+ um -8.42216
499
+ H -8.42231
500
+ ▁year -8.42239
501
+ ▁music -8.42283
502
+ G -8.42532
503
+ ▁end -8.42662
504
+ ▁away -8.42784
505
+ bo -8.42839
506
+ ni -8.42866
507
+ 8 -8.42986
508
+ ▁everyone -8.43251
509
+ ▁enough -8.4348
510
+ ▁Re -8.44117
511
+ ▁Maybe -8.44362
512
+ ▁mind -8.44374
513
+ ▁high -8.44517
514
+ ▁fine -8.44565
515
+ ▁yet -8.44834
516
+ ▁though -8.44919
517
+ ▁every -8.45715
518
+ ful -8.46189
519
+ ▁house -8.4626
520
+ ▁D -8.46609
521
+ ▁stay -8.46622
522
+ ty -8.46624
523
+ ▁each -8.47389
524
+ ▁When -8.47645
525
+ ▁wish -8.47727
526
+ ▁As -8.47761
527
+ ▁M -8.4779
528
+ ▁point -8.47946
529
+ R -8.48093
530
+ ▁ask -8.4887
531
+ ▁able -8.49178
532
+ ▁Did -8.49229
533
+ ▁Absolutely -8.49713
534
+ ies -8.50218
535
+ up -8.50515
536
+ ▁sometimes -8.50618
537
+ da -8.51091
538
+ ▁game -8.51146
539
+ ▁night -8.51206
540
+ ▁& -8.5126
541
+ ▁looks -8.51315
542
+ ul -8.51393
543
+ ▁hurt -8.51422
544
+ ▁support -8.51508
545
+ ho -8.52766
546
+ ▁cool -8.53356
547
+ man -8.53385
548
+ ▁both -8.53617
549
+ ▁[" -8.5363
550
+ ▁change -8.5385
551
+ ha -8.53926
552
+ ▁found -8.54221
553
+ ▁experience -8.54658
554
+ ia -8.54857
555
+ op -8.55092
556
+ ow -8.55148
557
+ ▁anyone -8.55326
558
+ U -8.56116
559
+ land -8.56561
560
+ cu -8.56586
561
+ ▁far -8.56869
562
+ W -8.56919
563
+ ru -8.57056
564
+ be -8.57095
565
+ ▁Don -8.5744
566
+ ▁text -8.57459
567
+ un -8.57939
568
+ ▁seem -8.58068
569
+ ▁sound -8.58105
570
+ ▁H -8.58292
571
+ ▁De -8.58294
572
+ ▁real -8.5868
573
+ ad -8.58772
574
+ fe -8.59132
575
+ N -8.5964
576
+ ▁taking -8.59771
577
+ ▁went -8.5979
578
+ ▁Trump -8.59823
579
+ ut -8.60113
580
+ 2 -8.60421
581
+ im -8.60695
582
+ bu -8.61096
583
+ ▁stop -8.61558
584
+ ▁Ha -8.61689
585
+ ▁isn -8.61794
586
+ do -8.61893
587
+ ▁learn -8.62303
588
+ ▁suggest -8.6242
589
+ ▁K -8.62461
590
+ ▁wait -8.62519
591
+ ten -8.62552
592
+ ▁head -8.62609
593
+ ▁heard -8.62661
594
+ Y -8.62698
595
+ per -8.62834
596
+ ▁between -8.63194
597
+ ▁For -8.63275
598
+ ▁team -8.63446
599
+ ▁Where -8.63874
600
+ ▁sense -8.64251
601
+ ▁read -8.64318
602
+ bi -8.64443
603
+ ▁during -8.64659
604
+ king -8.64902
605
+ ness -8.64925
606
+ ▁busy -8.65115
607
+ ▁Mom -8.65912
608
+ ▁Sarah -8.66142
609
+ ▁thanks -8.6619
610
+ ▁means -8.66776
611
+ he -8.66863
612
+ ▁stuff -8.66945
613
+ > -8.6765
614
+ ▁worry -8.67954
615
+ ▁W -8.68014
616
+ ▁... -8.6811
617
+ ▁eat -8.68169
618
+ by -8.68233
619
+ ▁remember -8.68565
620
+ ▁program -8.68873
621
+ ▁G -8.68941
622
+ ▁week -8.69409
623
+ ▁small -8.69699
624
+ ▁guy -8.69789
625
+ ▁Who -8.70001
626
+ ▁already -8.70318
627
+ ▁check -8.70425
628
+ ▁< -8.70778
629
+ ▁An -8.71129
630
+ ▁leave -8.71154
631
+ ▁probably -8.71363
632
+ ▁coming -8.71418
633
+ ot -8.71422
634
+ ▁happen -8.7161
635
+ ▁deal -8.71791
636
+ ▁Be -8.71939
637
+ ▁seen -8.71942
638
+ son -8.71961
639
+ ber -8.72073
640
+ ▁impact -8.72315
641
+ ▁another -8.72739
642
+ va -8.72983
643
+ ▁break -8.73165
644
+ ▁haven -8.73437
645
+ ▁asking -8.73505
646
+ ▁Good -8.73601
647
+ ▁details -8.73636
648
+ ▁easy -8.73703
649
+ ver -8.73838
650
+ om -8.74563
651
+ ▁number -8.74676
652
+ and -8.74689
653
+ nd -8.74787
654
+ ▁may -8.7508
655
+ ▁soon -8.75341
656
+ ▁beautiful -8.75439
657
+ ▁set -8.75828
658
+ ▁started -8.75946
659
+ ▁recipe -8.76098
660
+ ▁store -8.76335
661
+ ▁test -8.76402
662
+ ance -8.76494
663
+ ▁pro -8.76586
664
+ ▁Your -8.76962
665
+ ▁Great -8.77318
666
+ ▁saw -8.77581
667
+ ▁sta -8.77688
668
+ ] -8.77716
669
+ ish -8.77832
670
+ ▁called -8.77998
671
+ ▁meet -8.7811
672
+ ▁seems -8.7847
673
+ ▁came -8.78631
674
+ mon -8.78707
675
+ ▁dis -8.78735
676
+ ▁told -8.79007
677
+ ▁con -8.79051
678
+ 5 -8.79256
679
+ 9 -8.79272
680
+ ▁several -8.79346
681
+ dy -8.7954
682
+ ee -8.79655
683
+ fi -8.79685
684
+ ▁project -8.80146
685
+ ph -8.80198
686
+ ▁Ja -8.80311
687
+ ▁water -8.80342
688
+ 1 -8.80561
689
+ ▁explain -8.80721
690
+ 3 -8.80736
691
+ ▁free -8.80748
692
+ ▁L -8.80947
693
+ ▁plan -8.81293
694
+ ▁dog -8.81503
695
+ tic -8.81619
696
+ ▁Po -8.8172
697
+ ▁social -8.81767
698
+ ard -8.81991
699
+ ▁class -8.82212
700
+ line -8.82262
701
+ ▁agree -8.82291
702
+ lu -8.82407
703
+ ▁matter -8.82483
704
+ ▁Now -8.82795
705
+ ▁kids -8.82898
706
+ ▁book -8.83087
707
+ ▁N -8.83294
708
+ ▁close -8.83301
709
+ ▁Co -8.83338
710
+ ▁answer -8.84006
711
+ ▁Could -8.84006
712
+ ▁4 -8.84063
713
+ ▁buy -8.84126
714
+ ▁short -8.84126
715
+ ▁hanging -8.84383
716
+ ▁country -8.84497
717
+ ▁welcome -8.84724
718
+ ▁business -8.84884
719
+ em -8.84984
720
+ ▁hate -8.84993
721
+ ▁post -8.85319
722
+ ▁wife -8.85462
723
+ ▁ways -8.85487
724
+ go -8.85666
725
+ ▁Na -8.86111
726
+ ▁least -8.8642
727
+ URL -8.86423
728
+ ▁city -8.86569
729
+ ice -8.86742
730
+ ▁worth -8.86787
731
+ ▁La -8.86932
732
+ ▁black -8.87284
733
+ ▁pi -8.8735
734
+ ▁bar -8.87508
735
+ ▁watch -8.87543
736
+ ▁pay -8.87727
737
+ che -8.87737
738
+ ▁company -8.87897
739
+ ▁under -8.88681
740
+ tor -8.89166
741
+ ▁left -8.89275
742
+ ▁woman -8.89399
743
+ ▁perfect -8.89638
744
+ ▁gra -8.89939
745
+ 4 -8.90013
746
+ ▁recommend -8.90026
747
+ ian -8.90032
748
+ ▁run -8.90091
749
+ ▁finally -8.90183
750
+ ▁mom -8.90217
751
+ ous -8.90282
752
+ ▁says -8.90479
753
+ rn -8.90482
754
+ ▁interested -8.90519
755
+ ▁feels -8.90679
756
+ ating -8.90866
757
+ ▁cat -8.90932
758
+ act -8.91067
759
+ ▁ideas -8.91107
760
+ tra -8.91551
761
+ les -8.91616
762
+ ▁excited -8.91688
763
+ ▁list -8.91761
764
+ ted -8.91884
765
+ ▁open -8.92094
766
+ ▁imagine -8.92204
767
+ ▁ready -8.92719
768
+ ▁pre -8.92954
769
+ ▁Ba -8.93004
770
+ ▁co -8.93029
771
+ ally -8.93115
772
+ ▁Ro -8.93271
773
+ ▁All -8.93344
774
+ ▁figure -8.93901
775
+ ▁playing -8.94411
776
+ ▁face -8.94546
777
+ ▁address -8.94697
778
+ str -8.95028
779
+ ▁girl -8.95479
780
+ ▁One -8.95678
781
+ ▁health -8.95707
782
+ ▁offer -8.95721
783
+ cr -8.95767
784
+ io -8.96039
785
+ ▁R -8.96063
786
+ rs -8.96249
787
+ K -8.9628
788
+ ▁$ -8.96445
789
+ ▁5 -8.9669
790
+ ▁hit -8.96741
791
+ vo -8.96877
792
+ ▁color -8.96989
793
+ ▁party -8.97184
794
+ ▁wasn -8.97231
795
+ ag -8.97269
796
+ ▁U -8.97437
797
+ ▁Mar -8.97493
798
+ fa -8.97592
799
+ ▁Please -8.97795
800
+ ▁Li -8.98038
801
+ * -8.98057
802
+ ▁days -8.98222
803
+ ▁personal -8.98432
804
+ ▁video -8.98692
805
+ di -8.98706
806
+ ton -8.98847
807
+ du -8.99338
808
+ ▁options -8.99577
809
+ V -8.99815
810
+ ▁access -8.99845
811
+ ac -8.99865
812
+ ▁move -8.99877
813
+ ated -8.99953
814
+ ▁pick -8.99961
815
+ ▁ex -8.99986
816
+ ▁women -9.0015
817
+ ight -9.00412
818
+ side -9.00447
819
+ ▁hand -9.01062
820
+ ▁phone -9.01249
821
+ ▁saying -9.01427
822
+ ▁lost -9.01431
823
+ ner -9.01444
824
+ ▁case -9.01444
825
+ ga -9.01614
826
+ wa -9.01614
827
+ ▁living -9.01703
828
+ way -9.01711
829
+ ial -9.01909
830
+ ▁fair -9.02005
831
+ ling -9.02119
832
+ ▁paper -9.02136
833
+ ▁create -9.02383
834
+ ▁Th -9.02498
835
+ ▁design -9.02688
836
+ ▁parents -9.03135
837
+ ▁human -9.0329
838
+ ick -9.0367
839
+ ▁20 -9.03993
840
+ ▁history -9.04029
841
+ ▁once -9.04182
842
+ ▁art -9.04653
843
+ we -9.04754
844
+ pp -9.04774
845
+ ▁Ra -9.04936
846
+ ▁kill -9.04943
847
+ ▁difference -9.05033
848
+ ever -9.05037
849
+ fo -9.05172
850
+ the -9.05254
851
+ ▁against -9.0541
852
+ ▁10 -9.0545
853
+ ▁become -9.05772
854
+ ▁movie -9.06082
855
+ hy -9.06092
856
+ rt -9.06104
857
+ mer -9.06369
858
+ ▁New -9.06403
859
+ ▁Se -9.06449
860
+ ▁morning -9.06664
861
+ ▁quite -9.06707
862
+ ▁shit -9.06713
863
+ ▁Or -9.06874
864
+ ea -9.06946
865
+ ▁enjoying -9.06963
866
+ ▁system -9.07114
867
+ ▁possible -9.07264
868
+ ▁step -9.074
869
+ ▁turn -9.07407
870
+ ▁trouble -9.07415
871
+ ▁tried -9.07416
872
+ tro -9.07513
873
+ ▁Mo -9.07551
874
+ ger -9.07559
875
+ ▁Sa -9.07621
876
+ ▁follow -9.0778
877
+ ▁common -9.08032
878
+ ary -9.08101
879
+ ▁room -9.08258
880
+ ▁sex -9.08302
881
+ ize -9.08307
882
+ ▁anymore -9.08324
883
+ The -9.08389
884
+ ud -9.0891
885
+ ", -9.08985
886
+ ▁drink -9.09002
887
+ ▁white -9.09136
888
+ ig -9.09199
889
+ que -9.09248
890
+ ▁Really -9.0942
891
+ ▁Ka -9.09501
892
+ ism -9.09568
893
+ tu -9.0969
894
+ ction -9.0969
895
+ ▁material -9.09701
896
+ ▁God -9.09821
897
+ ▁group -9.09856
898
+ ▁safe -9.10015
899
+ ▁piece -9.1002
900
+ der -9.10376
901
+ ▁including -9.10631
902
+ ▁St -9.10651
903
+ time -9.10965
904
+ ng -9.11471
905
+ ship -9.11611
906
+ day -9.11795
907
+ ▁couldn -9.12026
908
+ ▁focus -9.12199
909
+ 7 -9.12428
910
+ ap -9.12619
911
+ ▁wondering -9.12725
912
+ ▁reason -9.12735
913
+ ▁light -9.12869
914
+ ▁words -9.12912
915
+ ▁doctor -9.12993
916
+ port -9.13189
917
+ ▁proud -9.1333
918
+ ▁exactly -9.13338
919
+ ▁Ca -9.13385
920
+ ence -9.13825
921
+ mar -9.14114
922
+ ▁heart -9.14255
923
+ ▁promise -9.14276
924
+ ▁asked -9.14382
925
+ end -9.1442
926
+ ▁walk -9.14505
927
+ j -9.14513
928
+ ▁Ho -9.14769
929
+ ▁chance -9.14934
930
+ ▁wouldn -9.1497
931
+ ▁travel -9.15087
932
+ ▁Al -9.15109
933
+ ny -9.15287
934
+ ▁process -9.15413
935
+ ▁online -9.1543
936
+ ▁* -9.15483
937
+ rr -9.15759
938
+ ring -9.15765
939
+ ▁V -9.15814
940
+ low -9.15943
941
+ ▁full -9.15979
942
+ wo -9.16193
943
+ ok -9.16243
944
+ ▁win -9.1645
945
+ ▁stand -9.1679
946
+ ▁Bo -9.1695
947
+ ▁past -9.17076
948
+ ▁whole -9.17171
949
+ ▁research -9.17251
950
+ all -9.1729
951
+ ▁helpful -9.17314
952
+ ah -9.17316
953
+ ▁young -9.17613
954
+ ▁media -9.17662
955
+ ▁trip -9.17733
956
+ ▁until -9.17899
957
+ con -9.1794
958
+ out -9.17947
959
+ ▁cook -9.18024
960
+ ▁per -9.18168
961
+ gg -9.18277
962
+ ▁wonder -9.18483
963
+ ▁son -9.18716
964
+ ▁often -9.18716
965
+ ▁future -9.18733
966
+ ▁Then -9.18769
967
+ ▁children -9.18902
968
+ ▁Pro -9.18906
969
+ ster -9.18974
970
+ ▁clean -9.19071
971
+ ▁line -9.19135
972
+ oo -9.19206
973
+ ▁Te -9.19266
974
+ ▁sub -9.19291
975
+ ▁cl -9.19369
976
+ X -9.1941
977
+ ▁boss -9.19438
978
+ amp -9.19593
979
+ ▁three -9.20093
980
+ ▁miss -9.20117
981
+ ▁tonight -9.20153
982
+ ▁speak -9.20216
983
+ ▁times -9.20332
984
+ ▁special -9.2038
985
+ ▁baby -9.20612
986
+ ▁war -9.20618
987
+ fl -9.20711
988
+ ▁side -9.2072
989
+ ▁app -9.20892
990
+ ▁order -9.21129
991
+ od -9.21313
992
+ ▁type -9.2132
993
+ os -9.21475
994
+ ▁wi -9.21486
995
+ rate -9.2154
996
+ tion -9.2154
997
+ ▁difficult -9.21644
998
+ ical -9.21735
999
+ ey -9.21739
1000
+ ▁visit -9.22125
1001
+ ▁goes -9.22174
1002
+ ▁funny -9.22217
1003
+ ▁took -9.2224
1004
+ ▁im -9.22399
1005
+ ▁fan -9.22494
1006
+ ▁American -9.22501
1007
+ ▁Le -9.22936
1008
+ ▁Da -9.23252
1009
+ ▁sun -9.23288
1010
+ one -9.23459
1011
+ oh -9.23463
1012
+ ect -9.2368
1013
+ 6 -9.23927
1014
+ ▁mother -9.23931
1015
+ ▁news -9.2403
1016
+ ▁sweet -9.24084
1017
+ rap -9.24088
1018
+ my -9.2411
1019
+ ▁ago -9.24146
1020
+ ▁careful -9.24236
1021
+ ▁comp -9.24277
1022
+ ▁language -9.24284
1023
+ ▁alone -9.24483
1024
+ av -9.24567
1025
+ ▁yeah -9.2465
1026
+ ▁ha -9.24676
1027
+ ka -9.24743
1028
+ ▁popular -9.24821
1029
+ ▁yes -9.24975
1030
+ ep -9.25157
1031
+ ach -9.25165
1032
+ our -9.25169
1033
+ ▁either -9.25181
1034
+ iv -9.25255
1035
+ ▁Pa -9.25272
1036
+ ▁word -9.25356
1037
+ com -9.25796
1038
+ ▁college -9.25903
1039
+ ▁starting -9.26219
1040
+ ▁John -9.26251
1041
+ lar -9.26261
1042
+ ▁body -9.26281
1043
+ ▁positive -9.26449
1044
+ ▁games -9.26531
1045
+ pl -9.26611
1046
+ ries -9.26674
1047
+ ▁guys -9.26743
1048
+ ▁sick -9.26795
1049
+ ▁strong -9.26815
1050
+ ise -9.26945
1051
+ ▁worried -9.26997
1052
+ ▁thou -9.27365
1053
+ par -9.27388
1054
+ ▁Mi -9.27542
1055
+ ▁decision -9.27549
1056
+ ble -9.27595
1057
+ ▁given -9.2762
1058
+ ▁Would -9.27733
1059
+ ▁share -9.28008
1060
+ ▁fire -9.28053
1061
+ ▁improve -9.28103
1062
+ ▁website -9.28103
1063
+ ▁men -9.28291
1064
+ ▁tired -9.28477
1065
+ ▁Alright -9.28478
1066
+ ▁state -9.2875
1067
+ ▁situation -9.29034
1068
+ ▁hair -9.29083
1069
+ ▁Which -9.29222
1070
+ ▁user -9.29273
1071
+ ▁[ -9.29451
1072
+ ▁dead -9.2948
1073
+ ▁comes -9.29585
1074
+ ▁power -9.29587
1075
+ ▁thy -9.29963
1076
+ ▁relax -9.30151
1077
+ ▁fall -9.30244
1078
+ ▁self -9.30392
1079
+ ▁dream -9.30543
1080
+ ▁Some -9.30693
1081
+ ▁park -9.3083
1082
+ ▁write -9.30887
1083
+ ▁Je -9.31028
1084
+ ▁outside -9.31167
1085
+ ▁6 -9.31533
1086
+ ▁report -9.31573
1087
+ ▁Su -9.31865
1088
+ ▁marketing -9.32198
1089
+ ▁ho -9.32203
1090
+ ▁fact -9.32221
1091
+ ear -9.32403
1092
+ ab -9.32464
1093
+ ▁Go -9.32549
1094
+ ▁Di -9.32594
1095
+ ak -9.32751
1096
+ ▁law -9.32952
1097
+ ▁advice -9.33042
1098
+ 00 -9.33068
1099
+ ▁listen -9.33142
1100
+ ▁large -9.33228
1101
+ ▁child -9.33235
1102
+ bs -9.33368
1103
+ ▁Con -9.33977
1104
+ ▁relationship -9.34021
1105
+ ▁J -9.3423
1106
+ ▁Dad -9.34416
1107
+ ture -9.34444
1108
+ ▁Jo -9.34563
1109
+ wi -9.34587
1110
+ ▁air -9.34591
1111
+ ▁Like -9.34658
1112
+ ps -9.3473
1113
+ ▁With -9.3484
1114
+ ▁US -9.35091
1115
+ ▁rest -9.35183
1116
+ ▁deserve -9.35208
1117
+ chi -9.35277
1118
+ ex -9.3535
1119
+ ▁tomorrow -9.35459
1120
+ qui -9.35583
1121
+ ▁types -9.35825
1122
+ ya -9.35853
1123
+ ▁Hmm -9.36022
1124
+ ▁father -9.36024
1125
+ ▁These -9.3624
1126
+ unt -9.3633
1127
+ ▁example -9.36683
1128
+ ▁kid -9.36742
1129
+ ron -9.36752
1130
+ ▁cup -9.37294
1131
+ ite -9.37384
1132
+ ub -9.37391
1133
+ ▁books -9.37452
1134
+ ▁sign -9.37532
1135
+ ▁forward -9.37546
1136
+ ▁unique -9.37625
1137
+ ▁Pre -9.37826
1138
+ ▁second -9.38033
1139
+ led -9.38169
1140
+ ative -9.38204
1141
+ ( -9.38236
1142
+ ▁technology -9.38238
1143
+ ▁19 -9.38344
1144
+ ping -9.38443
1145
+ ▁mine -9.3854
1146
+ ▁sp -9.3856
1147
+ ▁control -9.3865
1148
+ ▁card -9.38774
1149
+ ▁gone -9.38822
1150
+ bl -9.38909
1151
+ _ -9.39062
1152
+ ip -9.39133
1153
+ ob -9.39309
1154
+ ition -9.39403
1155
+ ▁guide -9.39689
1156
+ ▁public -9.39772
1157
+ ▁cause -9.39783
1158
+ ▁watching -9.39933
1159
+ dditional -9.40113
1160
+ ▁save -9.40181
1161
+ ▁late -9.40299
1162
+ ▁OK -9.40357
1163
+ ▁drive -9.40542
1164
+ ▁Z -9.40583
1165
+ ▁role -9.40605
1166
+ ▁spend -9.40775
1167
+ ▁virus -9.40942
1168
+ ▁moment -9.41022
1169
+ ▁space -9.41082
1170
+ ious -9.4114
1171
+ ▁felt -9.41157
1172
+ ff -9.41249
1173
+ ▁data -9.41376
1174
+ ▁alright -9.41794
1175
+ ▁learning -9.41803
1176
+ den -9.41827
1177
+ cent -9.41962
1178
+ ped -9.42112
1179
+ ure -9.42186
1180
+ ▁culture -9.42216
1181
+ ja -9.42348
1182
+ ▁chicken -9.42427
1183
+ ▁dinner -9.42428
1184
+ ▁key -9.42534
1185
+ ang -9.42583
1186
+ 0 -9.42647
1187
+ ▁brother -9.42921
1188
+ ▁significant -9.43071
1189
+ ▁reading -9.43101
1190
+ ▁fight -9.43163
1191
+ bb -9.43224
1192
+ J -9.43272
1193
+ ▁fast -9.43362
1194
+ ▁awesome -9.43503
1195
+ ▁town -9.43509
1196
+ ▁Ar -9.43603
1197
+ ome -9.43631
1198
+ ▁Ta -9.43836
1199
+ ▁sleep -9.43981
1200
+ ▁leg -9.44065
1201
+ ▁available -9.44153
1202
+ ▁potential -9.44153
1203
+ ▁local -9.44157
1204
+ ▁date -9.44168
1205
+ ▁forget -9.44177
1206
+ ▁loved -9.44192
1207
+ ▁instead -9.44371
1208
+ ▁knew -9.44371
1209
+ ▁season -9.4438
1210
+ ▁Ga -9.44477
1211
+ ▁upset -9.44616
1212
+ ▁Lo -9.44785
1213
+ ther -9.44807
1214
+ cy -9.44954
1215
+ nu -9.45058
1216
+ lin -9.45205
1217
+ ▁cut -9.45233
1218
+ ▁cold -9.45326
1219
+ ▁weekend -9.45491
1220
+ sha -9.4554
1221
+ ▁product -9.45545
1222
+ ▁pass -9.45774
1223
+ ward -9.45871
1224
+ ki -9.46086
1225
+ ▁running -9.46146
1226
+ cur -9.46165
1227
+ ▁arm -9.46186
1228
+ ▁crazy -9.46354
1229
+ ▁teacher -9.46364
1230
+ board -9.46397
1231
+ ▁Du -9.46442
1232
+ ▁boy -9.46649
1233
+ ▁hot -9.46673
1234
+ ▁months -9.46997
1235
+ ▁pain -9.47043
1236
+ ▁email -9.4726
1237
+ ▁gun -9.47345
1238
+ ▁area -9.47448
1239
+ ue -9.47507
1240
+ ▁successful -9.47575
1241
+ ▁afraid -9.47698
1242
+ ▁7 -9.47861
1243
+ ning -9.47923
1244
+ ▁Ju -9.4804
1245
+ ▁Create -9.48146
1246
+ ▁price -9.48165
1247
+ ▁trust -9.48409
1248
+ gi -9.48508
1249
+ ▁detailed -9.48526
1250
+ press -9.48589
1251
+ ▁wear -9.48723
1252
+ ▁top -9.48759
1253
+ ▁article -9.48832
1254
+ ▁police -9.48834
1255
+ ▁caught -9.48835
1256
+ ▁die -9.48983
1257
+ ▁behind -9.49061
1258
+ ase -9.49377
1259
+ ▁fuck -9.49392
1260
+ ▁based -9.49456
1261
+ ▁death -9.49541
1262
+ ▁coffee -9.49748
1263
+ ▁energy -9.49748
1264
+ ▁industry -9.49748
1265
+ ▁di -9.5013
1266
+ ▁usual -9.50337
1267
+ use -9.50343
1268
+ ▁Exactly -9.50441
1269
+ ose -9.50471
1270
+ lt -9.50668
1271
+ ▁suggestions -9.50677
1272
+ ▁computer -9.50905
1273
+ ▁smart -9.50924
1274
+ ▁Tell -9.5094
1275
+ ▁shall -9.50955
1276
+ ▁service -9.50972
1277
+ ▁healthy -9.51461
1278
+ ▁usually -9.5151
1279
+ word -9.51734
1280
+ ▁Dr -9.51775
1281
+ yn -9.51863
1282
+ ible -9.51932
1283
+ ▁animals -9.51943
1284
+ ding -9.51981
1285
+ ▁weeks -9.52006
1286
+ ▁looked -9.52128
1287
+ ▁song -9.52193
1288
+ ▁fr -9.52232
1289
+ step -9.52321
1290
+ lay -9.52525
1291
+ ▁inside -9.52592
1292
+ ▁market -9.52635
1293
+ ▁Come -9.52723
1294
+ ▁view -9.52757
1295
+ ▁needed -9.52778
1296
+ old -9.52949
1297
+ lf -9.52997
1298
+ ▁main -9.5301
1299
+ ▁hours -9.53055
1300
+ ▁government -9.53262
1301
+ ▁mi -9.53473
1302
+ ami -9.53498
1303
+ ▁poem -9.53509
1304
+ rac -9.53514
1305
+ ier -9.53612
1306
+ ▁sort -9.53691
1307
+ ▁develop -9.53698
1308
+ ▁worked -9.53776
1309
+ ▁dance -9.53839
1310
+ ▁building -9.54025
1311
+ ▁red -9.54069
1312
+ ile -9.54082
1313
+ pri -9.54092
1314
+ ▁street -9.54219
1315
+ ▁general -9.54221
1316
+ ail -9.54321
1317
+ ▁lives -9.54349
1318
+ ▁sit -9.54364
1319
+ lock -9.54649
1320
+ ler -9.5465
1321
+ ▁hands -9.54819
1322
+ ley -9.54889
1323
+ men -9.54893
1324
+ ▁exciting -9.54944
1325
+ ▁giving -9.55113
1326
+ ▁film -9.55188
1327
+ ▁places -9.55209
1328
+ ▁eyes -9.55301
1329
+ Q -9.55389
1330
+ ▁mistake -9.55431
1331
+ ▁happens -9.55446
1332
+ und -9.55521
1333
+ ▁tree -9.55542
1334
+ ▁scared -9.55778
1335
+ ▁effective -9.55791
1336
+ ▁almost -9.55918
1337
+ ▁aren -9.56084
1338
+ ▁writing -9.56163
1339
+ ▁sister -9.56172
1340
+ ▁Fa -9.56385
1341
+ uck -9.56395
1342
+ ▁supposed -9.56514
1343
+ ability -9.56686
1344
+ ▁near -9.5671
1345
+ 30 -9.56766
1346
+ ▁clear -9.56779
1347
+ q -9.5687
1348
+ ▁toilet -9.56923
1349
+ ▁catch -9.5709
1350
+ ▁Sounds -9.57154
1351
+ ric -9.57406
1352
+ ▁pet -9.57463
1353
+ ▁sad -9.57539
1354
+ if -9.57553
1355
+ ▁serious -9.57761
1356
+ ▁fit -9.57918
1357
+ ▁bring -9.58005
1358
+ bra -9.58082
1359
+ tain -9.58298
1360
+ form -9.58459
1361
+ ▁steal -9.58511
1362
+ ju -9.5863
1363
+ ▁benefits -9.58836
1364
+ ▁cons -9.58844
1365
+ ▁takes -9.58939
1366
+ cover -9.5897
1367
+ ▁walking -9.59003
1368
+ ▁At -9.59013
1369
+ ▁Mr -9.59193
1370
+ ld -9.59218
1371
+ -19 -9.59262
1372
+ ▁America -9.59284
1373
+ That -9.59379
1374
+ ▁Un -9.59394
1375
+ ▁Ad -9.59454
1376
+ ized -9.5948
1377
+ cha -9.59494
1378
+ ▁Fine -9.59559
1379
+ ▁lucky -9.59568
1380
+ ▁Sorry -9.5966
1381
+ 50 -9.59685
1382
+ wn -9.59719
1383
+ ▁cost -9.59749
1384
+ ▁husband -9.59914
1385
+ ▁opinion -9.59914
1386
+ ▁study -9.60034
1387
+ ▁decided -9.60215
1388
+ oc -9.60577
1389
+ ▁explore -9.60682
1390
+ pu -9.60738
1391
+ pen -9.60842
1392
+ ai -9.61164
1393
+ ▁seeing -9.61313
1394
+ ▁whatever -9.6136
1395
+ ▁fault -9.61454
1396
+ ▁choice -9.61456
1397
+ ▁mental -9.61607
1398
+ ▁natural -9.61713
1399
+ ▁essay -9.61973
1400
+ fu -9.61999
1401
+ ris -9.6216
1402
+ ▁bank -9.62228
1403
+ ▁couple -9.62233
1404
+ ▁eye -9.62253
1405
+ ▁touch -9.62374
1406
+ ▁extra -9.62549
1407
+ ▁Hu -9.62581
1408
+ gan -9.6263
1409
+ ▁Sha -9.62667
1410
+ ors -9.6276
1411
+ ▁especially -9.62761
1412
+ ▁Car -9.62971
1413
+ ze -9.63347
1414
+ art -9.63523
1415
+ ▁restaurant -9.63546
1416
+ ▁track -9.6356
1417
+ val -9.63563
1418
+ wood -9.63567
1419
+ ual -9.63581
1420
+ ack -9.6359
1421
+ ▁helping -9.6366
1422
+ ▁Vi -9.63882
1423
+ ▁weather -9.6392
1424
+ ▁handle -9.63977
1425
+ ▁practice -9.64033
1426
+ ▁character -9.64058
1427
+ box -9.64256
1428
+ ▁Ki -9.64413
1429
+ ▁Does -9.646
1430
+ ▁delicious -9.64608
1431
+ ▁hoping -9.6466
1432
+ ▁lie -9.64734
1433
+ ▁join -9.64887
1434
+ ▁Man -9.64919
1435
+ rk -9.6502
1436
+ ew -9.65127
1437
+ ▁ball -9.65185
1438
+ that -9.65202
1439
+ lia -9.65223
1440
+ nn -9.65288
1441
+ ▁TV -9.65472
1442
+ ator -9.65637
1443
+ ▁stupid -9.65682
1444
+ ▁account -9.65931
1445
+ ▁lol -9.66038
1446
+ ▁Gr -9.66116
1447
+ su -9.66209
1448
+ ▁blood -9.66224
1449
+ ▁various -9.66228
1450
+ ▁telling -9.6628
1451
+ ▁letter -9.66322
1452
+ ple -9.66439
1453
+ 15 -9.6644
1454
+ ▁decide -9.66451
1455
+ ▁month -9.6655
1456
+ ▁works -9.66604
1457
+ ump -9.66754
1458
+ ▁terrible -9.66768
1459
+ ink -9.66864
1460
+ ▁honest -9.66905
1461
+ ▁level -9.67041
1462
+ ▁dark -9.67044
1463
+ ▁angry -9.67048
1464
+ ▁later -9.67243
1465
+ ?" -9.67309
1466
+ elect -9.67313
1467
+ ▁English -9.67315
1468
+ ▁physical -9.67315
1469
+ ▁laugh -9.67321
1470
+ ▁rock -9.67339
1471
+ ▁Che -9.67413
1472
+ ▁office -9.67415
1473
+ ay -9.67496
1474
+ ▁exercise -9.67589
1475
+ ▁fresh -9.6759
1476
+ ug -9.67591
1477
+ cho -9.6777
1478
+ ina -9.67826
1479
+ ▁People -9.67865
1480
+ ▁Ex -9.67995
1481
+ au -9.68001
1482
+ ▁trans -9.68119
1483
+ ▁grow -9.68145
1484
+ gen -9.68414
1485
+ ▁daughter -9.68418
1486
+ ▁major -9.68418
1487
+ Oh -9.68532
1488
+ My -9.68764
1489
+ ▁breath -9.68975
1490
+ ▁accept -9.68976
1491
+ 200 -9.6904
1492
+ ▁current -9.69118
1493
+ ▁door -9.69171
1494
+ hu -9.69213
1495
+ ▁married -9.69254
1496
+ ▁picture -9.69255
1497
+ ▁math -9.69446
1498
+ ▁girlfriend -9.69537
1499
+ ▁known -9.69545
1500
+ ▁include -9.69699
1501
+ ▁plant -9.70013
1502
+ ▁community -9.70105
1503
+ 20 -9.70278
1504
+ ign -9.70293
1505
+ ▁throw -9.70387
1506
+ ▁cake -9.70396
1507
+ imp -9.70416
1508
+ ▁deep -9.70452
1509
+ pack -9.70582
1510
+ ▁comprehensive -9.70662
1511
+ ▁four -9.70733
1512
+ ▁particular -9.70946
1513
+ mate -9.70998
1514
+ ray -9.71051
1515
+ ▁Ri -9.71181
1516
+ friendly -9.71347
1517
+ ▁hold -9.71379
1518
+ ▁consider -9.71394
1519
+ ▁cap -9.71553
1520
+ ▁traditional -9.71666
1521
+ AN -9.71803
1522
+ ▁value -9.71806
1523
+ ▁El -9.71841
1524
+ range -9.72041
1525
+ ▁search -9.72091
1526
+ ▁early -9.7212
1527
+ ▁recently -9.722
1528
+ ▁single -9.7241
1529
+ ▁luck -9.72504
1530
+ ▁action -9.72782
1531
+ ide -9.72903
1532
+ ▁lose -9.72926
1533
+ ▁avoid -9.72959
1534
+ ▁rather -9.72975
1535
+ % -9.73108
1536
+ ▁fix -9.73289
1537
+ wr -9.73312
1538
+ boo -9.73481
1539
+ ▁bus -9.73521
1540
+ ▁cooking -9.73636
1541
+ well -9.7367
1542
+ ▁Ne -9.73838
1543
+ ▁minutes -9.73847
1544
+ ▁band -9.73926
1545
+ lie -9.74065
1546
+ ▁due -9.74086
1547
+ ▁named -9.74117
1548
+ eg -9.74268
1549
+ ▁meeting -9.74278
1550
+ ▁clothes -9.74715
1551
+ nce -9.74773
1552
+ ▁completely -9.748
1553
+ ▁movies -9.74982
1554
+ ▁shouldn -9.75068
1555
+ ▁super -9.75138
1556
+ br -9.75205
1557
+ ▁pea -9.75253
1558
+ ▁instructions -9.75268
1559
+ ▁crime -9.75322
1560
+ ▁shot -9.75335
1561
+ ▁development -9.75359
1562
+ ▁count -9.75361
1563
+ ▁War -9.75895
1564
+ ▁describe -9.76203
1565
+ ▁contact -9.76203
1566
+ ▁ran -9.76326
1567
+ ▁send -9.76352
1568
+ ▁spending -9.76467
1569
+ ▁attack -9.76506
1570
+ nk -9.76547
1571
+ ▁steps -9.76621
1572
+ ▁Ke -9.76706
1573
+ ▁brand -9.7681
1574
+ ▁nature -9.76813
1575
+ ▁taste -9.7686
1576
+ 12 -9.76915
1577
+ log -9.76942
1578
+ ▁anti -9.76948
1579
+ ▁Look -9.7698
1580
+ ▁incredible -9.77107
1581
+ RE -9.77108
1582
+ cal -9.77227
1583
+ ▁customer -9.77258
1584
+ ▁His -9.77342
1585
+ ▁dogs -9.77364
1586
+ ▁version -9.77411
1587
+ ▁content -9.77411
1588
+ ▁along -9.77421
1589
+ ▁talked -9.77914
1590
+ pic -9.77945
1591
+ ▁Chinese -9.78019
1592
+ ▁gonna -9.7802
1593
+ ▁smell -9.78027
1594
+ ▁certainly -9.78121
1595
+ ▁afford -9.78325
1596
+ ▁affect -9.78325
1597
+ gue -9.78389
1598
+ ▁Ve -9.78404
1599
+ aw -9.78448
1600
+ ▁sell -9.7849
1601
+ ix -9.78519
1602
+ ▁mentioned -9.78531
1603
+ van -9.78587
1604
+ ▁fascinating -9.78631
1605
+ ▁simple -9.78634
1606
+ ware -9.78678
1607
+ een -9.78769
1608
+ ▁dress -9.78911
1609
+ ▁countries -9.78948
1610
+ ▁issue -9.79133
1611
+ ▁driving -9.79247
1612
+ ▁lead -9.79404
1613
+ ▁Any -9.79414
1614
+ zz -9.79495
1615
+ ▁others -9.79498
1616
+ ▁students -9.79531
1617
+ ▁COVID -9.79557
1618
+ ▁effort -9.79557
1619
+ ▁remind -9.79566
1620
+ ▁field -9.79613
1621
+ ▁On -9.7967
1622
+ ft -9.79845
1623
+ ▁Our -9.79859
1624
+ cing -9.79938
1625
+ ▁fill -9.79949
1626
+ ▁meal -9.7999
1627
+ ye -9.79997
1628
+ ▁rep -9.80125
1629
+ ▁birthday -9.80183
1630
+ ▁upon -9.80203
1631
+ ▁lord -9.80219
1632
+ ▁eco -9.8051
1633
+ og -9.80513
1634
+ ▁risk -9.80593
1635
+ gar -9.8079
1636
+ ▁source -9.80807
1637
+ ▁rid -9.8101
1638
+ ank -9.81086
1639
+ ory -9.81223
1640
+ ▁suck -9.81348
1641
+ ▁skin -9.8136
1642
+ ev -9.81366
1643
+ ▁yesterday -9.81435
1644
+ ▁First -9.81435
1645
+ ▁choose -9.81436
1646
+ ▁died -9.81483
1647
+ cast -9.81523
1648
+ ** -9.81538
1649
+ ▁front -9.81754
1650
+ ncy -9.81807
1651
+ ▁table -9.81866
1652
+ ▁Sh -9.81881
1653
+ ▁mad -9.81924
1654
+ ities -9.82021
1655
+ ▁shows -9.82047
1656
+ ▁weird -9.82078
1657
+ ▁non -9.82162
1658
+ ▁normal -9.82395
1659
+ ▁eating -9.82396
1660
+ ▁planning -9.8241
1661
+ 11 -9.8246
1662
+ ▁fear -9.8247
1663
+ ▁played -9.82504
1664
+ cor -9.82578
1665
+ ▁entire -9.82708
1666
+ ▁discover -9.82741
1667
+ uff -9.82776
1668
+ ▁wall -9.82794
1669
+ ▁sc -9.82919
1670
+ 000 -9.82964
1671
+ ▁Cr -9.8297
1672
+ ade -9.82981
1673
+ ▁meaning -9.83033
1674
+ ▁half -9.83048
1675
+ ▁Gu -9.83091
1676
+ ▁bed -9.83226
1677
+ ▁activities -9.83349
1678
+ ▁murder -9.83349
1679
+ ▁questions -9.83366
1680
+ ▁Sta -9.83382
1681
+ ▁position -9.83672
1682
+ ▁model -9.83707
1683
+ ▁understanding -9.83823
1684
+ ▁machine -9.83995
1685
+ ▁god -9.83997
1686
+ kin -9.84006
1687
+ ▁beat -9.84133
1688
+ ▁flavor -9.8432
1689
+ ▁cannot -9.84669
1690
+ ▁forever -9.84913
1691
+ ▁shop -9.84991
1692
+ ▁hell -9.84999
1693
+ ▁expect -9.85002
1694
+ ▁serve -9.85008
1695
+ quality -9.85054
1696
+ ▁code -9.85171
1697
+ ana -9.85173
1698
+ ▁campaign -9.853
1699
+ ▁record -9.85301
1700
+ ▁message -9.85332
1701
+ ▁exist -9.85383
1702
+ mission -9.85565
1703
+ + -9.85629
1704
+ ▁After -9.8563
1705
+ ▁York -9.85632
1706
+ ▁finding -9.85663
1707
+ ▁rich -9.85731
1708
+ ▁artist -9.85774
1709
+ ▁race -9.85946
1710
+ ▁coronavirus -9.85959
1711
+ ▁poor -9.8602
1712
+ ▁gave -9.86032
1713
+ ▁walks -9.86054
1714
+ ▁mess -9.8612
1715
+ ▁hospital -9.8629
1716
+ ▁author -9.8629
1717
+ ▁sir -9.86549
1718
+ ▁Will -9.86569
1719
+ ▁neighbor -9.8662
1720
+ bal -9.86673
1721
+ ging -9.86808
1722
+ -- -9.86853
1723
+ ▁Par -9.86923
1724
+ ▁accident -9.86942
1725
+ ▁allow -9.86946
1726
+ ▁application -9.86956
1727
+ ▁calm -9.87105
1728
+ ▁warm -9.87139
1729
+ ▁beach -9.87169
1730
+ ST -9.87219
1731
+ ▁return -9.8729
1732
+ ▁insult -9.87293
1733
+ ▁road -9.87311
1734
+ ▁cross -9.87362
1735
+ ER -9.87555
1736
+ pro -9.87783
1737
+ itch -9.87818
1738
+ ▁hang -9.87948
1739
+ ▁fake -9.87983
1740
+ ▁stick -9.87986
1741
+ ▁build -9.88017
1742
+ har -9.88106
1743
+ ▁meat -9.8811
1744
+ ▁noticed -9.88188
1745
+ ▁mis -9.88188
1746
+ ▁player -9.88245
1747
+ ham -9.88273
1748
+ ▁skills -9.8828
1749
+ ▁million -9.88301
1750
+ ▁achieve -9.88302
1751
+ ▁exp -9.88394
1752
+ body -9.88399
1753
+ ▁bill -9.88467
1754
+ ▁success -9.88615
1755
+ ▁worse -9.8864
1756
+ ▁draw -9.88646
1757
+ ▁charge -9.88669
1758
+ ▁finish -9.88674
1759
+ ▁Day -9.88692
1760
+ nic -9.88832
1761
+ = -9.8898
1762
+ work -9.89262
1763
+ ▁vote -9.8928
1764
+ af -9.89294
1765
+ ▁pizza -9.89321
1766
+ ▁moving -9.89321
1767
+ row -9.89322
1768
+ ▁mask -9.89323
1769
+ ▁performance -9.89336
1770
+ ▁China -9.89349
1771
+ What -9.89381
1772
+ ▁treat -9.89496
1773
+ ▁2. -9.89565
1774
+ ON -9.89569
1775
+ ▁taken -9.89584
1776
+ ▁missed -9.896
1777
+ ▁education -9.89673
1778
+ ▁raise -9.89688
1779
+ ▁green -9.89688
1780
+ ▁resources -9.89818
1781
+ lation -9.89886
1782
+ ▁garden -9.90008
1783
+ ▁anyway -9.90208
1784
+ gl -9.90306
1785
+ ▁secret -9.90352
1786
+ ▁Ok -9.90357
1787
+ ▁May -9.90427
1788
+ No -9.90617
1789
+ ▁fish -9.90961
1790
+ ▁Ingredients -9.91045
1791
+ ▁progress -9.91049
1792
+ ▁train -9.9105
1793
+ purpose -9.9105
1794
+ ▁push -9.91061
1795
+ ▁rain -9.9121
1796
+ what -9.91391
1797
+ ▁stuck -9.914
1798
+ ▁racist -9.914
1799
+ ▁strange -9.91402
1800
+ ▁interest -9.91436
1801
+ ▁coach -9.91441
1802
+ ▁following -9.91596
1803
+ ▁honey -9.91753
1804
+ ▁events -9.91817
1805
+ AR -9.91824
1806
+ ▁career -9.91893
1807
+ ▁problems -9.91897
1808
+ ▁event -9.91904
1809
+ ▁Sometimes -9.92095
1810
+ ▁bought -9.92113
1811
+ ▁professional -9.92205
1812
+ ▁dec -9.92394
1813
+ ▁sports -9.92395
1814
+ ▁tweet -9.92449
1815
+ ▁lunch -9.92451
1816
+ ath -9.92478
1817
+ ▁listening -9.92801
1818
+ ▁born -9.92815
1819
+ ▁willing -9.92824
1820
+ ax -9.9297
1821
+ ▁plans -9.92976
1822
+ ▁Wa -9.93107
1823
+ ▁Nothing -9.9316
1824
+ script -9.93167
1825
+ ▁helped -9.93184
1826
+ ▁weight -9.93336
1827
+ ▁Coronavirus -9.9351
1828
+ app -9.93696
1829
+ ▁bag -9.93935
1830
+ read -9.94176
1831
+ ▁recent -9.94227
1832
+ ▁paid -9.94245
1833
+ ▁challenges -9.94292
1834
+ wing -9.94429
1835
+ ▁rec -9.94442
1836
+ ▁inter -9.94475
1837
+ ▁present -9.94537
1838
+ ▁Pe -9.94557
1839
+ ▁horse -9.94586
1840
+ ▁ingredients -9.94759
1841
+ ound -9.94811
1842
+ nder -9.94957
1843
+ ium -9.95098
1844
+ ▁studying -9.95138
1845
+ ▁Every -9.95299
1846
+ cultural -9.95309
1847
+ ▁famous -9.95309
1848
+ ▁gu -9.95326
1849
+ ▁dishes -9.95391
1850
+ ▁gay -9.95392
1851
+ ▁protect -9.95482
1852
+ ▁continue -9.95593
1853
+ ▁United -9.95673
1854
+ ▁perform -9.95675
1855
+ ▁waiting -9.95693
1856
+ ▁pair -9.95705
1857
+ ▁hour -9.9578
1858
+ ▁worst -9.96037
1859
+ ▁grade -9.96176
1860
+ ▁ahead -9.96225
1861
+ ▁blue -9.96406
1862
+ ▁huge -9.96472
1863
+ ▁wonderful -9.96526
1864
+ ▁Bar -9.96587
1865
+ ▁depends -9.96617
1866
+ ▁response -9.96771
1867
+ ▁scene -9.96775
1868
+ ▁totally -9.96907
1869
+ ▁dish -9.96954
1870
+ ▁features -9.96984
1871
+ bul -9.97007
1872
+ ▁Christmas -9.9714
1873
+ ▁block -9.97189
1874
+ ▁image -9.97224
1875
+ Fi -9.97329
1876
+ igh -9.97494
1877
+ ▁chocolate -9.97511
1878
+ ▁respect -9.97511
1879
+ lee -9.97848
1880
+ ▁method -9.97883
1881
+ ▁match -9.97885
1882
+ ▁function -9.98228
1883
+ ▁pull -9.98296
1884
+ ▁Am -9.98354
1885
+ ▁pan -9.98584
1886
+ ▁across -9.9863
1887
+ ▁comfortable -9.9865
1888
+ ▁web -9.98797
1889
+ night -9.98836
1890
+ ▁businesses -9.98884
1891
+ len -9.98906
1892
+ ugh -9.98998
1893
+ ▁plenty -9.99006
1894
+ ▁creating -9.99006
1895
+ ▁review -9.99031
1896
+ char -9.99304
1897
+ ▁cop -9.99309
1898
+ ▁See -9.99314
1899
+ ments -9.99455
1900
+ ▁By -9.99517
1901
+ ▁ski -9.99557
1902
+ ▁slow -9.99609
1903
+ istic -9.99612
1904
+ ▁roll -9.99626
1905
+ ▁Fr -9.99633
1906
+ uc -9.99826
1907
+ ▁suit -9.99879
1908
+ ▁bear -9.99892
1909
+ ▁expensive -10.0014
1910
+ ▁illegal -10.0014
1911
+ ▁nervous -10.0014
1912
+ ▁opportunity -10.0014
1913
+ ▁voice -10.0015
1914
+ ▁window -10.0016
1915
+ jo -10.0016
1916
+ don -10.0017
1917
+ ▁meant -10.0045
1918
+ ▁summer -10.0052
1919
+ ▁goal -10.0058
1920
+ ▁Mu -10.0069
1921
+ ▁mix -10.0069
1922
+ ▁gas -10.0074
1923
+ ▁wearing -10.0081
1924
+ OR -10.0082
1925
+ ▁science -10.0091
1926
+ ▁Ah -10.0092
1927
+ ▁Qu -10.0097
1928
+ ▁longer -10.0098
1929
+ print -10.0099
1930
+ ky -10.0099
1931
+ ify -10.0101
1932
+ ▁cry -10.0106
1933
+ ▁size -10.0111
1934
+ 10 -10.0111
1935
+ lly -10.0114
1936
+ ▁learned -10.0136
1937
+ for -10.0142
1938
+ her -10.0159
1939
+ ▁Ru -10.0166
1940
+ ▁fruit -10.0168
1941
+ ology -10.0188
1942
+ care -10.0196
1943
+ ▁fly -10.02
1944
+ path -10.0201
1945
+ ▁bother -10.0202
1946
+ ▁limit -10.0207
1947
+ ▁milk -10.0207
1948
+ ▁software -10.0207
1949
+ ▁sometime -10.023
1950
+ like -10.0239
1951
+ ▁Everything -10.0245
1952
+ die -10.0248
1953
+ izing -10.0248
1954
+ ▁egg -10.0251
1955
+ ▁Nice -10.0255
1956
+ ▁teach -10.0261
1957
+ ▁Har -10.0274
1958
+ ▁cream -10.0285
1959
+ zi -10.0291
1960
+ vin -10.0295
1961
+ ▁tools -10.0319
1962
+ ▁ground -10.0324
1963
+ ▁humans -10.0343
1964
+ je -10.0355
1965
+ ending -10.036
1966
+ minute -10.0361
1967
+ ▁target -10.0363
1968
+ IN -10.0363
1969
+ ▁quick -10.039
1970
+ dia -10.0391
1971
+ ▁condition -10.0403
1972
+ ▁visual -10.0403
1973
+ nch -10.0411
1974
+ ▁oil -10.0412
1975
+ book -10.0426
1976
+ coming -10.0432
1977
+ mark -10.044
1978
+ ▁fashion -10.0442
1979
+ ▁prefer -10.0442
1980
+ ▁attention -10.0442
1981
+ ▁modern -10.0442
1982
+ ▁attract -10.0445
1983
+ ▁Ty -10.0455
1984
+ ▁six -10.0456
1985
+ ▁calling -10.0462
1986
+ ▁involved -10.0463
1987
+ ▁animal -10.0465
1988
+ light -10.0479
1989
+ ▁approach -10.0482
1990
+ ▁measure -10.0482
1991
+ ▁Br -10.0488
1992
+ ▁fat -10.0488
1993
+ force -10.0495
1994
+ gent -10.0498
1995
+ ▁Lu -10.0508
1996
+ ▁topic -10.0515
1997
+ ain -10.0515
1998
+ ▁themselves -10.0522
1999
+ ▁Everyone -10.0523
2000
+ ▁forgot -10.0524
2001
+ ▁techniques -10.0541
2002
+ how -10.0542
2003
+ ▁Ge -10.0549
2004
+ ▁ride -10.055
2005
+ ▁president -10.0558
2006
+ ▁goals -10.056
2007
+ ▁gift -10.0562
2008
+ ▁dangerous -10.0563
2009
+ ▁concern -10.0572
2010
+ ▁fucking -10.0591
2011
+ ▁reach -10.0602
2012
+ ▁daily -10.0603
2013
+ ▁bomb -10.0603
2014
+ ▁Sc -10.0603
2015
+ ▁cute -10.0605
2016
+ ▁1: -10.0619
2017
+ ▁page -10.0625
2018
+ ▁includes -10.0631
2019
+ col -10.0632
2020
+ lon -10.0633
2021
+ ▁political -10.0643
2022
+ ▁correct -10.0643
2023
+ ▁certain -10.067
2024
+ ▁employee -10.0685
2025
+ ▁absolutely -10.0687
2026
+ ▁ban -10.0693
2027
+ ▁implement -10.0694
2028
+ lor -10.071
2029
+ ▁Ni -10.0723
2030
+ ▁strategies -10.0725
2031
+ ▁Given -10.0725
2032
+ ▁bread -10.0728
2033
+ ▁spot -10.0731
2034
+ ▁specifically -10.0733
2035
+ ook -10.0743
2036
+ ▁dear -10.0749
2037
+ Yes -10.0754
2038
+ ▁struggling -10.0766
2039
+ ▁middle -10.0766
2040
+ ▁Bra -10.0766
2041
+ ▁sweetie -10.0771
2042
+ ▁link -10.0773
2043
+ ▁realize -10.078
2044
+ ▁Cor -10.0781
2045
+ back -10.0788
2046
+ ▁Introduction -10.0807
2047
+ ▁whether -10.0807
2048
+ of -10.0832
2049
+ ney -10.0844
2050
+ ▁confident -10.0848
2051
+ ▁security -10.0848
2052
+ ▁pop -10.0852
2053
+ ▁passage -10.0853
2054
+ based -10.0864
2055
+ ▁truth -10.089
2056
+ ▁curious -10.089
2057
+ ▁Was -10.0893
2058
+ hop -10.0895
2059
+ ▁scary -10.0902
2060
+ ▁Com -10.0915
2061
+ ▁mod -10.0918
2062
+ ▁Fe -10.0919
2063
+ ▁understandable -10.0925
2064
+ ▁shopping -10.0932
2065
+ ▁express -10.0934
2066
+ ▁tr -10.0961
2067
+ bri -10.0965
2068
+ ▁five -10.0966
2069
+ ▁ticket -10.0973
2070
+ ▁photo -10.0975
2071
+ ▁training -10.0978
2072
+ ▁camera -10.0992
2073
+ ▁Hello -10.0994
2074
+ parent -10.0994
2075
+ ▁classes -10.1002
2076
+ ▁evidence -10.1015
2077
+ craft -10.1015
2078
+ ▁amount -10.1015
2079
+ ▁Plus -10.1016
2080
+ ▁killed -10.1018
2081
+ ▁bike -10.1023
2082
+ ▁Mon -10.1037
2083
+ ▁prove -10.105
2084
+ ▁German -10.1057
2085
+ ▁attend -10.1073
2086
+ ▁Cha -10.1087
2087
+ ▁Definitely -10.11
2088
+ ▁climate -10.11
2089
+ ▁third -10.11
2090
+ ▁products -10.1118
2091
+ ▁finished -10.1138
2092
+ ▁cheese -10.1139
2093
+ ▁trick -10.1144
2094
+ stan -10.1163
2095
+ nny -10.1164
2096
+ ▁complain -10.1185
2097
+ ▁prevent -10.1185
2098
+ ▁boring -10.1186
2099
+ ▁student -10.1189
2100
+ ▁Two -10.1193
2101
+ ▁shape -10.12
2102
+ ▁pr -10.1205
2103
+ ▁turned -10.1229
2104
+ ties -10.1233
2105
+ ▁Tra -10.124
2106
+ ▁changes -10.1249
2107
+ ▁changed -10.1255
2108
+ ▁Get -10.1269
2109
+ ️ -10.1271
2110
+ ▁degree -10.1274
2111
+ rd -10.1276
2112
+ ▁vegan -10.1276
2113
+ ▁stress -10.1282
2114
+ EN -10.1286
2115
+ ▁organ -10.1295
2116
+ ▁site -10.1298
2117
+ ▁file -10.1299
2118
+ ▁analysis -10.1314
2119
+ ▁spread -10.1315
2120
+ ified -10.1322
2121
+ ari -10.1347
2122
+ ▁North -10.136
2123
+ ▁moved -10.1371
2124
+ anna -10.1372
2125
+ pur -10.1377
2126
+ Step -10.138
2127
+ ▁reasons -10.1385
2128
+ ▁keeping -10.1385
2129
+ ▁Tr -10.1392
2130
+ ▁Congratulations -10.1401
2131
+ ▁himself -10.1401
2132
+ ▁recommendations -10.1402
2133
+ ▁snow -10.1402
2134
+ ox -10.1406
2135
+ ▁gr -10.1428
2136
+ ▁vegetables -10.1434
2137
+ ▁drop -10.1445
2138
+ ▁variety -10.1445
2139
+ ▁companies -10.1445
2140
+ ▁foot -10.1446
2141
+ ▁commit -10.1448
2142
+ ▁considered -10.1448
2143
+ ▁whi -10.1451
2144
+ ▁Most -10.1454
2145
+ ▁Bri -10.1455
2146
+ ▁legal -10.1464
2147
+ ▁drugs -10.1471
2148
+ ▁carry -10.1475
2149
+ you -10.1485
2150
+ ▁enjoyed -10.1485
2151
+ ▁States -10.1487
2152
+ Do -10.1487
2153
+ ▁description -10.1489
2154
+ sexual -10.1504
2155
+ ▁Wi -10.1509
2156
+ ▁soul -10.1509
2157
+ ▁Ya -10.1516
2158
+ ▁connect -10.1528
2159
+ ization -10.1536
2160
+ ▁brain -10.1536
2161
+ ▁Per -10.1565
2162
+ ▁enter -10.1574
2163
+ ▁promote -10.1578
2164
+ ▁services -10.161
2165
+ ▁regular -10.1623
2166
+ ▁truly -10.1623
2167
+ ▁Corona -10.1623
2168
+ ▁input -10.1625
2169
+ ker -10.1644
2170
+ ▁exam -10.1647
2171
+ ▁incorporate -10.1661
2172
+ ▁internet -10.1668
2173
+ ▁cash -10.1671
2174
+ ▁ignor -10.1672
2175
+ ▁sitting -10.1673
2176
+ ▁Col -10.1689
2177
+ ▁ill -10.169
2178
+ ▁Chi -10.1691
2179
+ ▁issues -10.1695
2180
+ nie -10.1701
2181
+ ▁realized -10.1706
2182
+ related -10.1708
2183
+ ▁Should -10.1713
2184
+ ▁Her -10.1715
2185
+ ▁towards -10.1722
2186
+ ▁blow -10.1733
2187
+ ▁mention -10.1737
2188
+ ▁seriously -10.1737
2189
+ ▁San -10.174
2190
+ ES -10.1749
2191
+ ▁excellent -10.1758
2192
+ ▁financial -10.1758
2193
+ ▁football -10.1758
2194
+ ▁frustrating -10.1758
2195
+ ▁creative -10.1758
2196
+ ▁feature -10.1765
2197
+ ▁buying -10.177
2198
+ nut -10.1788
2199
+ ▁mu -10.1797
2200
+ gri -10.1801
2201
+ ▁within -10.1804
2202
+ ▁wanna -10.1804
2203
+ 40 -10.1805
2204
+ ▁rob -10.1811
2205
+ You -10.1812
2206
+ ▁Jack -10.1816
2207
+ ▁manage -10.1817
2208
+ ▁waste -10.1846
2209
+ ▁journey -10.1849
2210
+ ▁Anything -10.1849
2211
+ ▁task -10.1849
2212
+ ▁Even -10.185
2213
+ air -10.1853
2214
+ ▁girls -10.1854
2215
+ mail -10.1855
2216
+ ▁sea -10.1878
2217
+ 14 -10.1887
2218
+ atic -10.189
2219
+ ▁straight -10.1895
2220
+ ▁Donald -10.1895
2221
+ ▁club -10.1897
2222
+ ▁earth -10.1898
2223
+ heat -10.1898
2224
+ ▁influence -10.1909
2225
+ ▁painting -10.1933
2226
+ ▁impressive -10.1941
2227
+ ▁summarize -10.1941
2228
+ ▁protest -10.1941
2229
+ ▁driver -10.1944
2230
+ 16 -10.1946
2231
+ He -10.1947
2232
+ ▁currently -10.1963
2233
+ ▁happening -10.1973
2234
+ ▁detail -10.1985
2235
+ ▁conversation -10.1987
2236
+ ▁Europe -10.1995
2237
+ leader -10.2
2238
+ ▁farm -10.2007
2239
+ ush -10.201
2240
+ ▁similar -10.203
2241
+ ▁organization -10.2034
2242
+ za -10.2038
2243
+ ▁grab -10.2052
2244
+ ▁harm -10.2071
2245
+ ▁paying -10.2088
2246
+ !!! -10.209
2247
+ ▁budget -10.2127
2248
+ ▁Right -10.2133
2249
+ ▁compare -10.2134
2250
+ ▁wide -10.214
2251
+ iff -10.2146
2252
+ ▁players -10.2157
2253
+ ▁Joe -10.2165
2254
+ ▁fighting -10.2166
2255
+ hal -10.2172
2256
+ ▁accomplish -10.2174
2257
+ ▁Fo -10.2177
2258
+ ▁complete -10.2208
2259
+ tory -10.2213
2260
+ ^ -10.2222
2261
+ ▁Google -10.2222
2262
+ ▁Obama -10.2222
2263
+ ED -10.2226
2264
+ ned -10.226
2265
+ ▁origin -10.2268
2266
+ ▁bird -10.227
2267
+ instruction -10.2276
2268
+ ung -10.2282
2269
+ TH -10.2297
2270
+ ou -10.2307
2271
+ ▁challenge -10.2308
2272
+ ▁comment -10.2312
2273
+ ▁negative -10.2317
2274
+ ▁sentence -10.2318
2275
+ ▁wild -10.232
2276
+ ▁quickly -10.2334
2277
+ ▁alive -10.2335
2278
+ ▁broke -10.2353
2279
+ ▁Actually -10.2365
2280
+ ▁French -10.2365
2281
+ ▁sauce -10.2365
2282
+ ▁stories -10.2365
2283
+ ▁novel -10.2367
2284
+ ▁flu -10.2373
2285
+ ▁trade -10.2395
2286
+ ▁society -10.2414
2287
+ ▁South -10.2416
2288
+ ▁Alex -10.2416
2289
+ ▁partner -10.2422
2290
+ ▁somewhere -10.2425
2291
+ phone -10.2432
2292
+ standing -10.2455
2293
+ ▁environment -10.2455
2294
+ ▁begin -10.2461
2295
+ ▁Black -10.2464
2296
+ ▁Min -10.2464
2297
+ ▁AI -10.2469
2298
+ Ti -10.2489
2299
+ haha -10.2508
2300
+ ▁patient -10.2511
2301
+ ▁medical -10.2512
2302
+ ▁jump -10.2512
2303
+ ense -10.2516
2304
+ ▁auto -10.2527
2305
+ ▁gold -10.2541
2306
+ ▁cheat -10.2557
2307
+ ▁increase -10.256
2308
+ ▁location -10.256
2309
+ ▁brought -10.256
2310
+ ▁pray -10.2563
2311
+ pec -10.2563
2312
+ ▁Ste -10.2573
2313
+ ▁tax -10.2575
2314
+ bin -10.2577
2315
+ ING -10.2577
2316
+ ants -10.2588
2317
+ ▁struggle -10.2609
2318
+ cc -10.2621
2319
+ ong -10.2633
2320
+ ▁Wal -10.2636
2321
+ car -10.2638
2322
+ OP -10.264
2323
+ ▁Si -10.265
2324
+ ▁Develop -10.2659
2325
+ ▁purchase -10.2659
2326
+ ute -10.2661
2327
+ inary -10.2667
2328
+ del -10.267
2329
+ ▁setting -10.2673
2330
+ From -10.2676
2331
+ ▁def -10.2686
2332
+ ▁tea -10.2697
2333
+ gram -10.27
2334
+ ▁grocery -10.2709
2335
+ ▁library -10.2709
2336
+ ▁routine -10.2709
2337
+ ▁Tu -10.2717
2338
+ ▁East -10.2718
2339
+ ▁created -10.2721
2340
+ ica -10.2735
2341
+ ▁para -10.2745
2342
+ ▁concert -10.2759
2343
+ ▁pleasure -10.2759
2344
+ ▁vacation -10.2759
2345
+ ▁church -10.2759
2346
+ ▁characters -10.2762
2347
+ ▁option -10.2764
2348
+ ▁shoot -10.277
2349
+ list -10.2772
2350
+ ▁considering -10.2777
2351
+ ▁Time -10.2788
2352
+ ery -10.2803
2353
+ 500 -10.2808
2354
+ ▁contribute -10.2809
2355
+ ▁speed -10.281
2356
+ ▁invest -10.281
2357
+ ▁temp -10.2814
2358
+ ache -10.2852
2359
+ ▁allowed -10.2855
2360
+ ▁Try -10.2858
2361
+ ▁basic -10.2858
2362
+ ▁2020 -10.2862
2363
+ ▁management -10.2863
2364
+ ▁practices -10.2867
2365
+ ▁wind -10.2871
2366
+ ▁cow -10.2882
2367
+ gro -10.2883
2368
+ ▁notice -10.2892
2369
+ verse -10.2906
2370
+ ▁alternative -10.291
2371
+ ▁providing -10.291
2372
+ ▁boyfriend -10.291
2373
+ ▁series -10.2912
2374
+ ▁movement -10.2936
2375
+ ▁effectively -10.2938
2376
+ fully -10.2942
2377
+ ▁properly -10.2961
2378
+ ▁maintain -10.2963
2379
+ ▁acc -10.2984
2380
+ ▁tall -10.2985
2381
+ ▁claim -10.2987
2382
+ az -10.3001
2383
+ 60 -10.3003
2384
+ ▁brings -10.3005
2385
+ down -10.3005
2386
+ ▁easier -10.3012
2387
+ ▁regard -10.3013
2388
+ ▁court -10.3013
2389
+ ▁anywhere -10.3018
2390
+ ▁En -10.3033
2391
+ ▁Pri -10.3036
2392
+ ograph -10.3037
2393
+ 25 -10.3041
2394
+ off -10.3056
2395
+ 😂 -10.3064
2396
+ ▁fell -10.3069
2397
+ hole -10.308
2398
+ LE -10.3082
2399
+ ino -10.3094
2400
+ yes -10.3099
2401
+ ▁customers -10.3109
2402
+ stream -10.3116
2403
+ ▁balance -10.3116
2404
+ ▁White -10.3116
2405
+ race -10.3119
2406
+ lift -10.3128
2407
+ gree -10.3142
2408
+ ▁schedule -10.3168
2409
+ ▁solution -10.3168
2410
+ ▁period -10.3168
2411
+ ▁salad -10.3168
2412
+ ▁magic -10.317
2413
+ latest -10.3175
2414
+ Z -10.318
2415
+ ▁camp -10.3185
2416
+ ▁Love -10.3191
2417
+ ▁songs -10.3203
2418
+ ▁Pi -10.3219
2419
+ ▁challenging -10.322
2420
+ ▁literally -10.322
2421
+ ▁reduce -10.3239
2422
+ ▁custom -10.3253
2423
+ ▁pla -10.3254
2424
+ ▁Christian -10.3273
2425
+ ▁feet -10.3275
2426
+ ▁Take -10.3281
2427
+ ▁awful -10.3283
2428
+ ▁King -10.3298
2429
+ ▁City -10.332
2430
+ ▁practicing -10.3326
2431
+ ▁significance -10.3326
2432
+ ▁credit -10.3326
2433
+ ▁Earth -10.3326
2434
+ ▁lesson -10.3332
2435
+ ▁homework -10.3332
2436
+ ▁drug -10.3348
2437
+ ▁Ko -10.3359
2438
+ late -10.3372
2439
+ ▁prop -10.3373
2440
+ ▁collect -10.3373
2441
+ ▁fantastic -10.3379
2442
+ pit -10.338
2443
+ ▁missing -10.3396
2444
+ ▁updated -10.3407
2445
+ ati -10.3415
2446
+ ▁essential -10.3432
2447
+ ▁trend -10.3447
2448
+ ▁Dar -10.3452
2449
+ ik -10.3467
2450
+ ▁diet -10.3468
2451
+ ▁according -10.3486
2452
+ ▁earlier -10.3486
2453
+ ▁necessary -10.3486
2454
+ ▁Accord -10.3486
2455
+ ▁embarrass -10.3486
2456
+ ▁beauty -10.3486
2457
+ ▁mouth -10.3486
2458
+ ▁doubt -10.3487
2459
+ ▁hide -10.3491
2460
+ free -10.3498
2461
+ ale -10.35
2462
+ ▁India -10.3503
2463
+ ▁rules -10.3512
2464
+ MA -10.3513
2465
+ ▁cro -10.3523
2466
+ ▁tend -10.3526
2467
+ ▁attitude -10.354
2468
+ ▁floor -10.354
2469
+ ▁bunch -10.3542
2470
+ ▁discuss -10.355
2471
+ ▁nor -10.3588
2472
+ ▁dev -10.359
2473
+ ▁holiday -10.3595
2474
+ ▁suffer -10.3595
2475
+ ▁damn -10.3595
2476
+ ▁dumb -10.3595
2477
+ ▁hunt -10.3598
2478
+ ▁bright -10.3598
2479
+ ▁designed -10.3601
2480
+ load -10.3613
2481
+ ▁flat -10.3615
2482
+ ▁acting -10.362
2483
+ vy -10.3631
2484
+ ▁Aww -10.3649
2485
+ ▁THE -10.3658
2486
+ ather -10.3698
2487
+ ▁poison -10.3704
2488
+ ▁leaving -10.3704
2489
+ ▁excuse -10.3705
2490
+ ▁election -10.3706
2491
+ Re -10.3707
2492
+ ▁Make -10.3714
2493
+ ▁Read -10.3719
2494
+ NA -10.3722
2495
+ ▁below -10.3726
2496
+ corn -10.3728
2497
+ over -10.3731
2498
+ ▁drinking -10.3735
2499
+ ▁Clinton -10.3759
2500
+ ▁global -10.3759
2501
+ ▁peaceful -10.3759
2502
+ ▁turnip -10.3763
2503
+ SE -10.3793
2504
+ ▁conf -10.3793
2505
+ ▁Indian -10.3804
2506
+ ALL -10.3813
2507
+ ▁officer -10.3813
2508
+ ▁represent -10.3815
2509
+ ▁biggest -10.3815
2510
+ ▁heavy -10.3815
2511
+ ▁region -10.3815
2512
+ ua -10.3837
2513
+ hose -10.3839
2514
+ ▁talent -10.384
2515
+ ▁Ken -10.3843
2516
+ ▁butter -10.385
2517
+ gh -10.3864
2518
+ ▁World -10.3871
2519
+ ▁Covid -10.3871
2520
+ ▁loud -10.3873
2521
+ ▁rough -10.3873
2522
+ illa -10.3882
2523
+ 80 -10.3883
2524
+ fer -10.3886
2525
+ fic -10.3889
2526
+ ▁hath -10.3897
2527
+ ▁staying -10.3914
2528
+ ▁hobbies -10.3927
2529
+ ▁client -10.3927
2530
+ ▁stock -10.3927
2531
+ ▁outdoor -10.3927
2532
+ ▁hungry -10.3928
2533
+ ▁comments -10.3938
2534
+ ▁Art -10.3951
2535
+ ▁quit -10.3956
2536
+ ▁gotten -10.3963
2537
+ ▁universe -10.3984
2538
+ ▁truck -10.3985
2539
+ ▁device -10.3985
2540
+ ▁blog -10.3992
2541
+ ▁winter -10.3999
2542
+ ▁growing -10.4007
2543
+ ▁soup -10.4007
2544
+ ▁paint -10.4009
2545
+ 21 -10.4039
2546
+ ▁narrative -10.404
2547
+ ▁succeed -10.404
2548
+ ▁Provide -10.404
2549
+ ▁element -10.404
2550
+ ▁forgive -10.4041
2551
+ ▁lived -10.4051
2552
+ ▁Once -10.4054
2553
+ ▁boat -10.4069
2554
+ ▁stressed -10.4081
2555
+ stone -10.4086
2556
+ ▁sent -10.4089
2557
+ rated -10.409
2558
+ ▁audience -10.4097
2559
+ ▁concept -10.4097
2560
+ ▁speech -10.4097
2561
+ ▁request -10.4098
2562
+ ▁emotion -10.4098
2563
+ ▁besides -10.4102
2564
+ place -10.4105
2565
+ ▁owner -10.4141
2566
+ ▁mostly -10.4146
2567
+ Christ -10.4152
2568
+ ▁drunk -10.4155
2569
+ ▁Work -10.4155
2570
+ ▁results -10.4157
2571
+ ▁lady -10.4159
2572
+ Co -10.4162
2573
+ blo -10.4162
2574
+ ▁Ye -10.4175
2575
+ ▁total -10.4185
2576
+ ▁Dear -10.4196
2577
+ play -10.42
2578
+ ▁seat -10.4212
2579
+ ▁jealous -10.4213
2580
+ ▁interview -10.4213
2581
+ room -10.4223
2582
+ right -10.4247
2583
+ ▁effects -10.4265
2584
+ ko -10.427
2585
+ ▁platform -10.4271
2586
+ ought -10.4273
2587
+ ▁chat -10.4276
2588
+ style -10.4281
2589
+ burg -10.4282
2590
+ hin -10.429
2591
+ ▁surprised -10.4297
2592
+ ▁Bi -10.4315
2593
+ ▁historical -10.4329
2594
+ ▁knowledge -10.4329
2595
+ ribu -10.433
2596
+ ▁quiet -10.4331
2597
+ ▁apartment -10.4333
2598
+ AM -10.4334
2599
+ ▁speaking -10.4345
2600
+ yl -10.4347
2601
+ ▁chill -10.4352
2602
+ person -10.4354
2603
+ ▁require -10.4356
2604
+ life -10.4364
2605
+ ▁University -10.4388
2606
+ ▁quarantine -10.4388
2607
+ ▁subject -10.4388
2608
+ ▁damage -10.4388
2609
+ ▁document -10.4388
2610
+ ▁relaxing -10.4392
2611
+ ▁apart -10.4393
2612
+ zo -10.4396
2613
+ lyn -10.4396
2614
+ ▁overall -10.4406
2615
+ ▁noble -10.4418
2616
+ ▁members -10.442
2617
+ IT -10.4425
2618
+ ▁Up -10.4425
2619
+ AP -10.4427
2620
+ ▁letting -10.4441
2621
+ ▁ocean -10.4447
2622
+ ▁adult -10.4448
2623
+ ▁snack -10.4448
2624
+ scient -10.4449
2625
+ ▁brown -10.4454
2626
+ ▁blame -10.446
2627
+ ▁reality -10.4465
2628
+ ▁individuals -10.4474
2629
+ ▁sales -10.4475
2630
+ via -10.448
2631
+ house -10.4485
2632
+ ▁bat -10.4493
2633
+ ▁final -10.4494
2634
+ ▁Lord -10.4509
2635
+ ▁transportation -10.451
2636
+ ▁villager -10.4513
2637
+ ▁gain -10.4515
2638
+ ▁Same -10.4523
2639
+ ▁older -10.4529
2640
+ ▁dating -10.4531
2641
+ ▁State -10.4564
2642
+ ▁emotional -10.4566
2643
+ ▁annoying -10.4567
2644
+ ▁everywhere -10.4568
2645
+ ▁bitch -10.4576
2646
+ ▁putt -10.4581
2647
+ In -10.4588
2648
+ ▁random -10.4627
2649
+ ▁aspect -10.4627
2650
+ ▁fli -10.4636
2651
+ ▁Mary -10.4636
2652
+ pper -10.464
2653
+ ▁manager -10.4653
2654
+ ▁string -10.4654
2655
+ ▁skill -10.4662
2656
+ ▁Saturday -10.4688
2657
+ Muslim -10.4688
2658
+ ▁complex -10.4688
2659
+ ▁further -10.4688
2660
+ ▁shoes -10.4688
2661
+ ▁remove -10.4689
2662
+ !" -10.4693
2663
+ ▁harder -10.4695
2664
+ 13 -10.4699
2665
+ ama -10.4706
2666
+ ▁update -10.4717
2667
+ hold -10.4718
2668
+ ▁Honestly -10.4749
2669
+ ▁behavior -10.4749
2670
+ ▁responsible -10.4749
2671
+ ▁above -10.4752
2672
+ ▁perfectly -10.4753
2673
+ ▁powerful -10.4756
2674
+ ▁station -10.4762
2675
+ ▁nobody -10.4764
2676
+ ▁Rome -10.4764
2677
+ ▁Bro -10.4776
2678
+ lash -10.4784
2679
+ ▁Got -10.4795
2680
+ ▁Japanese -10.481
2681
+ ▁bored -10.481
2682
+ ▁mood -10.4812
2683
+ ▁gentle -10.4813
2684
+ ▁hearing -10.4817
2685
+ ▁involve -10.484
2686
+ ▁prepared -10.4841
2687
+ ▁clearly -10.4844
2688
+ wee -10.4853
2689
+ ▁Bill -10.4867
2690
+ ▁exploring -10.4872
2691
+ ▁respond -10.4872
2692
+ ▁destroy -10.4872
2693
+ ▁guitar -10.4872
2694
+ ▁produce -10.4872
2695
+ ▁parasol -10.4872
2696
+ ▁policy -10.4872
2697
+ ▁adapt -10.4872
2698
+ ▁hack -10.4886
2699
+ ▁Mc -10.4888
2700
+ div -10.4889
2701
+ ▁salt -10.4923
2702
+ ▁brief -10.4934
2703
+ ▁except -10.4934
2704
+ ▁dirty -10.4934
2705
+ ▁pressure -10.4934
2706
+ ▁neighborhood -10.4934
2707
+ ▁More -10.4934
2708
+ ▁sharing -10.4935
2709
+ ▁section -10.4936
2710
+ ▁round -10.4938
2711
+ ▁shame -10.4939
2712
+ ▁benefit -10.4949
2713
+ ▁adding -10.4955
2714
+ ▁Say -10.4964
2715
+ ▁curse -10.4987
2716
+ ▁apply -10.4988
2717
+ ▁emb -10.4994
2718
+ ▁advance -10.4996
2719
+ ▁military -10.4996
2720
+ ▁strategy -10.4996
2721
+ ▁blind -10.4996
2722
+ ▁prison -10.4997
2723
+ Jesus -10.4999
2724
+ ▁kick -10.5001
2725
+ ▁terms -10.5001
2726
+ duct -10.5002
2727
+ ▁Very -10.5004
2728
+ ▁mini -10.503
2729
+ ▁referr -10.5041
2730
+ year -10.5046
2731
+ ▁vegetarian -10.5059
2732
+ ▁spirit -10.5059
2733
+ ▁dancing -10.5059
2734
+ Gra -10.5067
2735
+ ▁yo -10.507
2736
+ ▁Roman -10.5075
2737
+ ▁Haha -10.508
2738
+ ▁dick -10.5088
2739
+ ▁prepare -10.509
2740
+ grand -10.51
2741
+ ▁cell -10.5104
2742
+ 17 -10.5107
2743
+ ▁spec -10.5116
2744
+ ▁becoming -10.5121
2745
+ ▁encourage -10.5122
2746
+ ▁eventually -10.5122
2747
+ ▁engage -10.5122
2748
+ ▁faith -10.5122
2749
+ ▁finger -10.5126
2750
+ PS -10.5135
2751
+ ▁growth -10.5137
2752
+ ▁comm -10.5149
2753
+ ▁holding -10.5152
2754
+ ▁noise -10.5154
2755
+ ification -10.5183
2756
+ ▁Twitter -10.5186
2757
+ ▁background -10.5186
2758
+ ▁perspective -10.5186
2759
+ ▁Friday -10.5186
2760
+ ▁reddit -10.5186
2761
+ ▁expert -10.5187
2762
+ ▁threat -10.5187
2763
+ gate -10.5187
2764
+ ▁importance -10.5187
2765
+ bel -10.5197
2766
+ ▁rice -10.521
2767
+ ▁passed -10.5211
2768
+ ▁member -10.5218
2769
+ ov -10.5226
2770
+ lap -10.5243
2771
+ ▁walked -10.5243
2772
+ ▁added -10.5246
2773
+ ▁divorce -10.525
2774
+ ▁entanglement -10.525
2775
+ ▁pandemic -10.525
2776
+ ▁confirm -10.525
2777
+ ▁cousin -10.525
2778
+ ▁adopt -10.5251
2779
+ ▁weren -10.5251
2780
+ ▁sky -10.5257
2781
+ ▁classic -10.5267
2782
+ ▁qua -10.5275
2783
+ How -10.5283
2784
+ ▁wine -10.5295
2785
+ ▁planet -10.5301
2786
+ Russia -10.5313
2787
+ ▁Smith -10.5315
2788
+ ▁female -10.5315
2789
+ ▁religion -10.5315
2790
+ ▁1/2 -10.5315
2791
+ ▁changing -10.5315
2792
+ ▁weapon -10.5315
2793
+ ▁thousand -10.5315
2794
+ ▁appeal -10.5315
2795
+ ▁dude -10.5317
2796
+ ▁assist -10.5318
2797
+ ▁Cat -10.5321
2798
+ non -10.5327
2799
+ ▁factors -10.5352
2800
+ ▁apple -10.5375
2801
+ ▁judge -10.538
2802
+ ▁adventure -10.538
2803
+ ▁mobile -10.538
2804
+ ▁disease -10.538
2805
+ ▁shock -10.538
2806
+ ▁corona -10.5383
2807
+ ▁bake -10.5384
2808
+ ▁poet -10.5385
2809
+ ▁obviously -10.5386
2810
+ cla -10.5401
2811
+ ▁Nope -10.5408
2812
+ ena -10.5415
2813
+ ▁useful -10.5417
2814
+ direct -10.543
2815
+ ▁Kar -10.5435
2816
+ AD -10.5436
2817
+ ▁announce -10.5445
2818
+ ▁anxiety -10.5445
2819
+ ▁paternity -10.5445
2820
+ ▁promotion -10.5445
2821
+ ▁traffic -10.5445
2822
+ ▁wrote -10.5445
2823
+ ▁wedding -10.5445
2824
+ fri -10.5446
2825
+ lthough -10.5446
2826
+ ▁master -10.5447
2827
+ ▁weak -10.5455
2828
+ ▁among -10.5456
2829
+ ▁broken -10.5462
2830
+ ▁statement -10.5468
2831
+ ▁offering -10.5505
2832
+ ▁context -10.5511
2833
+ ▁kitchen -10.5511
2834
+ ▁volunteer -10.5511
2835
+ ▁Doctor -10.5511
2836
+ ▁swear -10.5511
2837
+ ▁Japan -10.5511
2838
+ ▁spell -10.5512
2839
+ ▁yell -10.5516
2840
+ ▁solve -10.5516
2841
+ ▁Today -10.5524
2842
+ ▁tear -10.5537
2843
+ ‘ -10.5577
2844
+ ▁bottle -10.5577
2845
+ ▁economy -10.5577
2846
+ ▁private -10.5577
2847
+ ▁Hillary -10.5577
2848
+ ▁album -10.5577
2849
+ ▁admit -10.5577
2850
+ ▁electric -10.5577
2851
+ ▁appear -10.558
2852
+ ▁rule -10.5601
2853
+ ▁invite -10.5608
2854
+ ▁sup -10.561
2855
+ ▁candy -10.5611
2856
+ poli -10.5614
2857
+ ▁ruin -10.5624
2858
+ ▁$1 -10.5636
2859
+ ▁``` -10.5644
2860
+ ▁argument -10.5644
2861
+ ▁easily -10.5644
2862
+ ▁official -10.5644
2863
+ ▁Title -10.5644
2864
+ ▁fool -10.5646
2865
+ ▁collection -10.5651
2866
+ ▁spent -10.5656
2867
+ ▁cancel -10.5665
2868
+ ▁sale -10.5671
2869
+ lynn -10.5679
2870
+ ▁male -10.5683
2871
+ ▁host -10.5686
2872
+ US -10.5687
2873
+ ▁killing -10.5688
2874
+ uch -10.5694
2875
+ ▁critic -10.571
2876
+ ▁temperature -10.5711
2877
+ ▁smoke -10.5712
2878
+ ▁winning -10.5714
2879
+ ▁Abou -10.5716
2880
+ bug -10.575
2881
+ ▁impressed -10.5778
2882
+ ▁School -10.5778
2883
+ ▁crack -10.5779
2884
+ pho -10.5781
2885
+ ▁stopped -10.5785
2886
+ ▁pee -10.5801
2887
+ ▁sight -10.5839
2888
+ ▁Spanish -10.5846
2889
+ ▁dollars -10.5847
2890
+ ▁feedback -10.5847
2891
+ ▁saving -10.5847
2892
+ ▁highlight -10.585
2893
+ ▁Count -10.5852
2894
+ ▁items -10.5862
2895
+ ▁disco -10.5871
2896
+ imply -10.588
2897
+ ▁caused -10.5884
2898
+ ▁engineer -10.5887
2899
+ ▁iron -10.5892
2900
+ ▁basketball -10.5915
2901
+ ▁breakfast -10.5915
2902
+ ▁communication -10.5915
2903
+ ▁pregnant -10.5915
2904
+ ▁surround -10.5915
2905
+ ▁beginning -10.5916
2906
+ ▁hiking -10.5917
2907
+ ▁Sal -10.5947
2908
+ ▁Gi -10.5974
2909
+ ▁explanation -10.5984
2910
+ ▁responsibility -10.5984
2911
+ ▁unfortunate -10.5984
2912
+ ▁itself -10.5984
2913
+ ▁predict -10.5984
2914
+ ▁multi -10.5985
2915
+ dig -10.6018
2916
+ ▁influenced -10.6032
2917
+ ▁Comp -10.6041
2918
+ ▁plane -10.6048
2919
+ frustrated -10.6053
2920
+ ▁criminal -10.6053
2921
+ ▁knock -10.6053
2922
+ ▁nurse -10.6053
2923
+ ▁While -10.6054
2924
+ ▁smile -10.6055
2925
+ ▁Keep -10.6056
2926
+ ▁joy -10.6058
2927
+ yah -10.6072
2928
+ We -10.6077
2929
+ ▁stealing -10.6092
2930
+ NY -10.6105
2931
+ Awesome -10.6123
2932
+ efficient -10.6123
2933
+ ▁ridiculous -10.6123
2934
+ ▁insurance -10.6123
2935
+ ▁heaven -10.6123
2936
+ ▁YOU -10.6123
2937
+ ▁idiot -10.6123
2938
+ fro -10.6124
2939
+ ▁active -10.6125
2940
+ ▁wake -10.6126
2941
+ ▁kiss -10.6128
2942
+ residential -10.6131
2943
+ ▁Ben -10.6132
2944
+ ▁faced -10.6158
2945
+ bil -10.6158
2946
+ ▁flowers -10.6159
2947
+ ▁workout -10.6162
2948
+ – -10.6194
2949
+ ▁citizen -10.6194
2950
+ ▁sustainable -10.6194
2951
+ stitution -10.6194
2952
+ ▁network -10.6194
2953
+ ▁pepper -10.6194
2954
+ ▁kept -10.6195
2955
+ ▁Angel -10.6198
2956
+ ▁gene -10.6201
2957
+ ▁soft -10.6221
2958
+ ▁surprise -10.6232
2959
+ ▁Being -10.6246
2960
+ ▁nose -10.6246
2961
+ ▁Men -10.6261
2962
+ ept -10.6263
2963
+ ▁Sunday -10.6265
2964
+ ▁ancient -10.6265
2965
+ ▁Italian -10.6265
2966
+ break -10.6266
2967
+ ▁treatment -10.6277
2968
+ ▁engine -10.6294
2969
+ head -10.6296
2970
+ hel -10.633
2971
+ ` -10.6336
2972
+ ▁digital -10.6336
2973
+ ▁studies -10.6336
2974
+ ▁museum -10.6336
2975
+ ▁built -10.6336
2976
+ ▁frisk -10.6336
2977
+ ▁ensure -10.6336
2978
+ ▁communities -10.6336
2979
+ ▁gym -10.6337
2980
+ ▁swim -10.6337
2981
+ rc -10.6393
2982
+ ▁dry -10.6394
2983
+ ▁festival -10.6408
2984
+ ▁instrument -10.6408
2985
+ ▁basically -10.641
2986
+ ▁House -10.6411
2987
+ ▁pound -10.6412
2988
+ ▁shut -10.6413
2989
+ face -10.6421
2990
+ even -10.6425
2991
+ ▁selling -10.6428
2992
+ ▁lying -10.6428
2993
+ ▁Home -10.6436
2994
+ ▁bless -10.6444
2995
+ uh -10.6452
2996
+ ▁hum -10.6455
2997
+ ▁comfort -10.6477
2998
+ ▁Paris -10.6479
2999
+ ▁High -10.6479
3000
+ ▁California -10.648
3001
+ ▁apologize -10.648
3002
+ ▁bunker -10.648
3003
+ ▁practical -10.648
3004
+ ▁confused -10.6481
3005
+ ▁escape -10.6481
3006
+ ▁teargas -10.6481
3007
+ ▁uncle -10.6481
3008
+ ▁demand -10.6481
3009
+ ▁Russian -10.6482
3010
+ ▁scar -10.6494
3011
+ ▁visiting -10.6498
3012
+ ▁score -10.6502
3013
+ AL -10.6505
3014
+ ▁watched -10.6517
3015
+ ▁Lol -10.6529
3016
+ power -10.655
3017
+ ▁episode -10.6553
3018
+ ▁folklore -10.6553
3019
+ ▁contract -10.6554
3020
+ ▁losing -10.6556
3021
+ ▁punch -10.6561
3022
+ ▁Park -10.6561
3023
+ ▁safety -10.6564
3024
+ ▁rent -10.6569
3025
+ RO -10.6579
3026
+ CK -10.658
3027
+ ▁refer -10.6583
3028
+ ▁Too -10.6589
3029
+ ▁defend -10.6595
3030
+ ette -10.6619
3031
+ ▁expected -10.6622
3032
+ ▁algorithm -10.6627
3033
+ ▁became -10.6627
3034
+ ▁Since -10.6628
3035
+ ▁baking -10.6629
3036
+ NE -10.6633
3037
+ ▁connection -10.6636
3038
+ ▁bigger -10.6642
3039
+ 99 -10.6647
3040
+ ▁decor -10.6654
3041
+ elli -10.6659
3042
+ ▁Only -10.666
3043
+ 23 -10.6661
3044
+ ▁deliver -10.6683
3045
+ ▁mid -10.6683
3046
+ ▁Lea -10.6696
3047
+ ▁Probably -10.6701
3048
+ ▁celebrate -10.6701
3049
+ ▁violence -10.6701
3050
+ ▁causing -10.6701
3051
+ ▁civil -10.6701
3052
+ ▁swimming -10.6701
3053
+ ▁cheap -10.6703
3054
+ ough -10.6716
3055
+ ▁farmer -10.6716
3056
+ ▁honestly -10.6738
3057
+ ▁sandwich -10.6776
3058
+ ▁virtual -10.6776
3059
+ ▁National -10.6776
3060
+ ▁screen -10.6776
3061
+ BC -10.6781
3062
+ ▁flight -10.6783
3063
+ ▁writer -10.679
3064
+ ▁center -10.679
3065
+ ▁Mark -10.6791
3066
+ ▁loss -10.6795
3067
+ angle -10.6798
3068
+ ▁accus -10.681
3069
+ ▁reflect -10.6817
3070
+ ▁pie -10.682
3071
+ ▁radi -10.6821
3072
+ ▁glass -10.6843
3073
+ ▁memories -10.6851
3074
+ ▁crowd -10.6851
3075
+ ▁corner -10.6859
3076
+ ▁Never -10.6864
3077
+ hood -10.6871
3078
+ ▁Stu -10.6875
3079
+ CO -10.6887
3080
+ ▁map -10.6891
3081
+ ▁suppose -10.6895
3082
+ ▁fail -10.6909
3083
+ ▁sleeping -10.6911
3084
+ ▁memory -10.6928
3085
+ ▁effect -10.6931
3086
+ ▁included -10.6939
3087
+ ▁determined -10.6948
3088
+ LA -10.6948
3089
+ ▁disappointed -10.6949
3090
+ ▁grace -10.6955
3091
+ ▁feed -10.6962
3092
+ 90 -10.6974
3093
+ ▁strength -10.7003
3094
+ ▁summary -10.7003
3095
+ ▁lonely -10.7003
3096
+ ▁multiple -10.7004
3097
+ ▁poop -10.7007
3098
+ ▁replace -10.7011
3099
+ ▁Fun -10.7017
3100
+ ▁metal -10.7021
3101
+ ford -10.7038
3102
+ ▁held -10.7043
3103
+ ▁equal -10.7045
3104
+ ▁successfully -10.705
3105
+ ▁throughou -10.7055
3106
+ ▁Chan -10.7056
3107
+ ▁bla -10.706
3108
+ logical -10.7063
3109
+ aught -10.7072
3110
+ ▁beyond -10.708
3111
+ ▁photograph -10.708
3112
+ ▁England -10.708
3113
+ ▁lazy -10.708
3114
+ ▁TIL -10.708
3115
+ ▁habit -10.708
3116
+ ▁belong -10.708
3117
+ ▁tomato -10.7081
3118
+ ▁Gar -10.7082
3119
+ ▁Jim -10.7088
3120
+ ▁Tri -10.7099
3121
+ factor -10.7107
3122
+ 19 -10.712
3123
+ ▁counter -10.714
3124
+ ▁actual -10.7144
3125
+ ▁Mexican -10.7157
3126
+ ▁epicenter -10.7157
3127
+ ▁quote -10.7157
3128
+ ▁crash -10.7157
3129
+ ▁graphic -10.7157
3130
+ ▁twice -10.7158
3131
+ ▁contain -10.716
3132
+ ▁2016 -10.716
3133
+ ▁pasta -10.7205
3134
+ ▁spin -10.7235
3135
+ ▁Python -10.7236
3136
+ ▁advantage -10.7236
3137
+ ▁population -10.7236
3138
+ ▁terrorist -10.7236
3139
+ ▁British -10.7236
3140
+ ▁install -10.7236
3141
+ ▁sugar -10.7236
3142
+ ▁motivated -10.7245
3143
+ proof -10.7253
3144
+ ▁mass -10.7262
3145
+ ▁Uh -10.7267
3146
+ ▁required -10.7279
3147
+ PA -10.7287
3148
+ ▁sport -10.7293
3149
+ fec -10.7311
3150
+ ▁Yo -10.7313
3151
+ ▁experiment -10.7314
3152
+ ▁psych -10.7315
3153
+ ▁grew -10.7316
3154
+ ▁passionate -10.7317
3155
+ ▁trash -10.7318
3156
+ ▁Someone -10.7318
3157
+ iness -10.7321
3158
+ ▁billion -10.7327
3159
+ ▁singing -10.7343
3160
+ ▁Wh -10.7362
3161
+ ▁Sir -10.7377
3162
+ ▁affected -10.7394
3163
+ ▁mountain -10.7394
3164
+ ▁survive -10.7394
3165
+ ▁beef -10.7395
3166
+ ▁entertain -10.7395
3167
+ ▁pretend -10.7396
3168
+ ▁conduct -10.7396
3169
+ ▁interact -10.7396
3170
+ ▁environmental -10.7405
3171
+ ▁located -10.7424
3172
+ ▁Nah -10.7427
3173
+ NO -10.7436
3174
+ ▁renewable -10.7474
3175
+ ▁average -10.7474
3176
+ ternal -10.7479
3177
+ crib -10.7488
3178
+ ▁24 -10.7504
3179
+ ez -10.7512
3180
+ ▁implemented -10.7534
3181
+ ▁concerned -10.7535
3182
+ CH -10.7536
3183
+ ▁vari -10.7551
3184
+ ▁rape -10.7552
3185
+ ▁Lily -10.7554
3186
+ — -10.7554
3187
+ ▁creature -10.7554
3188
+ ▁Scott -10.7554
3189
+ ▁minor -10.7555
3190
+ ▁porn -10.7559
3191
+ ▁national -10.7562
3192
+ ▁pants -10.7564
3193
+ ▁suddenly -10.7567
3194
+ ▁addict -10.7571
3195
+ ▁odd -10.7575
3196
+ ▁Mil -10.7603
3197
+ 0,000 -10.7618
3198
+ & -10.7625
3199
+ ▁object -10.7633
3200
+ ▁competition -10.7636
3201
+ ▁hundred -10.7636
3202
+ ▁journal -10.7636
3203
+ ▁stomach -10.7636
3204
+ ▁victim -10.7636
3205
+ ▁property -10.7636
3206
+ ▁Also -10.7637
3207
+ ▁France -10.7639
3208
+ ▁indeed -10.764
3209
+ ▁rush -10.7642
3210
+ ▁standard -10.7643
3211
+ ▁Johnson -10.7644
3212
+ ▁lack -10.7648
3213
+ ▁fishing -10.7651
3214
+ ▁genre -10.7654
3215
+ atter -10.7662
3216
+ ▁incorporat -10.7672
3217
+ flower -10.7677
3218
+ RA -10.7677
3219
+ ▁seven -10.7681
3220
+ ▁shooting -10.7699
3221
+ ▁voted -10.7714
3222
+ ▁Richard -10.7718
3223
+ ▁Unfortunately -10.7718
3224
+ ▁collaborat -10.7718
3225
+ ▁enhance -10.7718
3226
+ ▁ventilator -10.7718
3227
+ ▁false -10.7718
3228
+ ▁tennis -10.7718
3229
+ ▁lawyer -10.7718
3230
+ ▁launch -10.7718
3231
+ ▁healthcare -10.772
3232
+ ▁Using -10.7726
3233
+ ▁consume -10.7726
3234
+ ▁Show -10.7736
3235
+ point -10.7743
3236
+ ▁however -10.7745
3237
+ ▁West -10.7754
3238
+ ▁grown -10.7754
3239
+ ▁Za -10.7756
3240
+ brow -10.776
3241
+ ▁talented -10.7763
3242
+ ▁payment -10.7767
3243
+ ▁mindfulness -10.78
3244
+ ▁replied -10.78
3245
+ ▁Design -10.78
3246
+ ▁battle -10.7801
3247
+ ▁Yu -10.7803
3248
+ ▁brave -10.7811
3249
+ ▁buck -10.7814
3250
+ ▁wave -10.7824
3251
+ coin -10.7844
3252
+ ▁none -10.7853
3253
+ ▁structure -10.7861
3254
+ ▁Cap -10.7872
3255
+ ▁horror -10.7884
3256
+ ▁Something -10.7884
3257
+ ET -10.7885
3258
+ ▁assistant -10.7886
3259
+ ▁expand -10.7888
3260
+ ▁Wait -10.7901
3261
+ clock -10.791
3262
+ lab -10.7933
3263
+ Well -10.7938
3264
+ fish -10.794
3265
+ ▁pattern -10.7968
3266
+ ▁remain -10.7968
3267
+ ▁Three -10.7968
3268
+ ▁transfer -10.7969
3269
+ ▁Irish -10.7972
3270
+ ▁everyday -10.7979
3271
+ ▁Jane -10.7992
3272
+ ▁Over -10.7994
3273
+ ▁cheer -10.8
3274
+ ▁locate -10.802
3275
+ ▁equ -10.8029
3276
+ ▁exact -10.8048
3277
+ ▁reduc -10.8049
3278
+ ▁Lin -10.805
3279
+ ▁baseball -10.8052
3280
+ ▁crucial -10.8052
3281
+ ▁religious -10.8052
3282
+ ▁romantic -10.8052
3283
+ Friend -10.8052
3284
+ ▁foreign -10.8053
3285
+ ▁prince -10.8053
3286
+ ▁drum -10.8053
3287
+ ▁critical -10.8055
3288
+ ▁robot -10.8056
3289
+ ▁lower -10.8064
3290
+ ▁musician -10.8068
3291
+ ux -10.8073
3292
+ where -10.8073
3293
+ ▁somebody -10.81
3294
+ ▁release -10.8106
3295
+ ▁Call -10.8117
3296
+ ▁Val -10.8118
3297
+ ▁cheating -10.8122
3298
+ ▁Emily -10.8138
3299
+ ▁Republican -10.8138
3300
+ ▁Welcome -10.8138
3301
+ ▁activity -10.8138
3302
+ ▁architecture -10.8138
3303
+ ▁clothing -10.8138
3304
+ ▁industries -10.8138
3305
+ ▁medicine -10.8138
3306
+ ▁previous -10.8138
3307
+ ▁generation -10.8138
3308
+ ▁cloud -10.8138
3309
+ ▁wheel -10.8138
3310
+ ▁variations -10.8138
3311
+ ▁teaching -10.8138
3312
+ ▁woke -10.814
3313
+ ▁immediately -10.8141
3314
+ ▁containment -10.8141
3315
+ ▁Grand -10.8143
3316
+ ▁ugly -10.8144
3317
+ ella -10.8146
3318
+ ▁UK -10.8148
3319
+ ji -10.8175
3320
+ ▁blend -10.818
3321
+ lion -10.8181
3322
+ ▁individual -10.8186
3323
+ field -10.8209
3324
+ know -10.8216
3325
+ ▁London -10.8224
3326
+ ▁analyze -10.8224
3327
+ ▁cuisine -10.8224
3328
+ ▁phrase -10.8224
3329
+ ▁policies -10.8224
3330
+ ▁afternoon -10.8224
3331
+ ▁belief -10.8224
3332
+ ▁flour -10.8225
3333
+ 70 -10.8229
3334
+ oooo -10.8257
3335
+ ▁Africa -10.8299
3336
+ ▁employ -10.8309
3337
+ ▁scratch -10.8311
3338
+ ▁channel -10.8311
3339
+ ▁plastic -10.8311
3340
+ ▁soccer -10.8311
3341
+ ▁rewarding -10.8311
3342
+ ▁asshole -10.8312
3343
+ ▁direction -10.8312
3344
+ ▁package -10.8312
3345
+ ▁punish -10.8312
3346
+ ▁penis -10.8315
3347
+ ▁cough -10.8317
3348
+ ▁depending -10.8329
3349
+ ▁received -10.8339
3350
+ ▁Thou -10.8348
3351
+ ▁Max -10.8349
3352
+ ▁hug -10.8358
3353
+ Net -10.8365
3354
+ ▁sudden -10.8385
3355
+ é -10.8399
3356
+ ▁Florida -10.8399
3357
+ ▁intelligence -10.8399
3358
+ ▁marriage -10.8399
3359
+ ▁optimiz -10.8399
3360
+ ▁managing -10.8399
3361
+ ▁bowl -10.8399
3362
+ ▁knee -10.84
3363
+ ▁tongue -10.84
3364
+ ▁trump -10.84
3365
+ ▁valid -10.84
3366
+ dependent -10.84
3367
+ ▁Super -10.8408
3368
+ ▁covered -10.8413
3369
+ ▁production -10.8421
3370
+ ▁till -10.8437
3371
+ ▁copy -10.8442
3372
+ ▁coat -10.8449
3373
+ iah -10.8457
3374
+ ▁alcohol -10.8464
3375
+ EE -10.8464
3376
+ ▁Korea -10.8479
3377
+ ▁younger -10.8482
3378
+ \ -10.8487
3379
+ appropriate -10.8487
3380
+ ▁convince -10.8487
3381
+ ▁ourselves -10.8487
3382
+ ▁protagonist -10.8487
3383
+ ▁honor -10.8487
3384
+ ▁Apple -10.8487
3385
+ ▁plot -10.8489
3386
+ ▁display -10.849
3387
+ ▁cities -10.8491
3388
+ ▁tight -10.8491
3389
+ ▁orange -10.8492
3390
+ ▁cri -10.8496
3391
+ ▁moon -10.8496
3392
+ ably -10.8497
3393
+ ▁hotel -10.8504
3394
+ ▁parties -10.8513
3395
+ SH -10.8522
3396
+ RT -10.8544
3397
+ ▁European -10.8561
3398
+ volution -10.8574
3399
+ ▁bartender -10.8576
3400
+ ▁conflict -10.8576
3401
+ ▁identify -10.8576
3402
+ ▁prompt -10.8576
3403
+ ▁David -10.8577
3404
+ ▁William -10.8577
3405
+ ▁Pretty -10.8577
3406
+ ▁bathroom -10.8577
3407
+ ▁slice -10.8578
3408
+ ▁gather -10.8579
3409
+ ▁Char -10.8643
3410
+ take -10.8646
3411
+ ▁Bla -10.865
3412
+ GO -10.866
3413
+ liberal -10.8667
3414
+ ▁thread -10.8669
3415
+ ▁trail -10.8676
3416
+ ▁investment -10.8678
3417
+ ▁Win -10.8681
3418
+ ▁Asian -10.8685
3419
+ LL -10.8706
3420
+ Last -10.8713
3421
+ ▁thin -10.8734
3422
+ TS -10.8737
3423
+ ▁Yea -10.8744
3424
+ ▁dessert -10.8757
3425
+ ▁unfair -10.8757
3426
+ ▁politics -10.8757
3427
+ ▁switch -10.8758
3428
+ ▁attempt -10.8758
3429
+ ▁double -10.8758
3430
+ ▁intense -10.876
3431
+ ▁slave -10.876
3432
+ ▁hike -10.8774
3433
+ ▁passion -10.8778
3434
+ ▁discussing -10.8824
3435
+ Yeah -10.8833
3436
+ ▁smooth -10.8838
3437
+ ▁birth -10.8848
3438
+ ▁breast -10.8849
3439
+ ▁engaging -10.8849
3440
+ ▁typical -10.8849
3441
+ ▁Street -10.8849
3442
+ ▁intention -10.8849
3443
+ ▁homemade -10.8849
3444
+ ▁medium -10.885
3445
+ ▁prank -10.885
3446
+ ▁marry -10.8852
3447
+ ▁chair -10.8852
3448
+ ▁addition -10.8857
3449
+ ▁south -10.8866
3450
+ ▁actor -10.8872
3451
+ ▁generate -10.8888
3452
+ ▁bath -10.8894
3453
+ ▁USA -10.8894
3454
+ ▁camping -10.8918
3455
+ ▁Use -10.8937
3456
+ ▁disagree -10.8942
3457
+ ▁complicated -10.8942
3458
+ ▁garlic -10.8942
3459
+ ▁meditation -10.8942
3460
+ ▁freak -10.8942
3461
+ ▁James -10.8944
3462
+ ▁Tim -10.8945
3463
+ rose -10.8951
3464
+ ▁settle -10.8956
3465
+ ▁Paul -10.8956
3466
+ ▁cancer -10.8965
3467
+ ▁failed -10.8979
3468
+ ▁plate -10.8985
3469
+ ▁Won -10.8992
3470
+ ▁tag -10.9019
3471
+ ▁cul -10.902
3472
+ ▁wh -10.9024
3473
+ ▁1000 -10.9033
3474
+ ▁Mexico -10.9035
3475
+ ▁consequences -10.9035
3476
+ ▁knife -10.9035
3477
+ ▁overwhelming -10.9035
3478
+ ▁organized -10.9037
3479
+ ▁gross -10.9038
3480
+ ▁jail -10.9038
3481
+ ▁species -10.9038
3482
+ ▁babe -10.9039
3483
+ ▁huh -10.9044
3484
+ view -10.9046
3485
+ ▁Peter -10.9047
3486
+ ▁Mike -10.9051
3487
+ conserv -10.9065
3488
+ ▁stage -10.9068
3489
+ ▁flag -10.9075
3490
+ ▁unless -10.9077
3491
+ ▁Texas -10.913
3492
+ ▁candidate -10.913
3493
+ ▁clarify -10.913
3494
+ ▁ghost -10.913
3495
+ ▁tattoo -10.913
3496
+ ▁drama -10.913
3497
+ ▁gender -10.9131
3498
+ ▁capture -10.9133
3499
+ ▁Del -10.9135
3500
+ income -10.914
3501
+ ▁personality -10.9149
3502
+ ▁Act -10.917
3503
+ ▁dying -10.9177
3504
+ water -10.9187
3505
+ ▁edge -10.92
3506
+ ▁differ -10.9221
3507
+ ▁Facebook -10.9225
3508
+ ▁international -10.9225
3509
+ ▁overwhelmed -10.9225
3510
+ ▁College -10.9225
3511
+ ▁informative -10.9225
3512
+ ▁detect -10.9225
3513
+ ▁debt -10.9226
3514
+ ▁shout -10.9227
3515
+ ▁Canada -10.9229
3516
+ ▁shake -10.9232
3517
+ ▁constantly -10.9232
3518
+ ▁Things -10.9232
3519
+ ▁delivery -10.9249
3520
+ grad -10.9255
3521
+ ▁ruined -10.9269
3522
+ ility -10.9282
3523
+ ▁faster -10.9294
3524
+ Your -10.9305
3525
+ MO -10.9315
3526
+ ▁surgery -10.9321
3527
+ ▁technical -10.9321
3528
+ ▁pogrom -10.9321
3529
+ ▁vivid -10.9321
3530
+ ▁retail -10.9321
3531
+ ▁Besides -10.9321
3532
+ ▁commun -10.9323
3533
+ ▁transform -10.9323
3534
+ ▁alien -10.9325
3535
+ ▁reaction -10.9326
3536
+ ▁extremely -10.9331
3537
+ ▁glasses -10.9331
3538
+ ▁spice -10.9332
3539
+ ▁relate -10.9343
3540
+ leading -10.9356
3541
+ ▁released -10.9357
3542
+ ▁Many -10.9405
3543
+ ▁Shi -10.941
3544
+ ▁result -10.9414
3545
+ Instruction -10.9418
3546
+ ▁calculate -10.9418
3547
+ ▁favourite -10.9418
3548
+ ▁recognize -10.9418
3549
+ ▁square -10.9418
3550
+ conscious -10.9418
3551
+ ▁April -10.9418
3552
+ ▁smoking -10.9418
3553
+ ▁utiliz -10.9418
3554
+ ▁screw -10.9419
3555
+ ▁strip -10.942
3556
+ ▁yoga -10.942
3557
+ ▁construction -10.9421
3558
+ hand -10.9421
3559
+ ▁guest -10.943
3560
+ ▁tradition -10.9465
3561
+ bound -10.9477
3562
+ ▁painful -10.9505
3563
+ ▁exhaust -10.9516
3564
+ ▁fictional -10.9516
3565
+ ▁guilty -10.9516
3566
+ ▁identity -10.9516
3567
+ ▁century -10.9516
3568
+ ▁assume -10.9517
3569
+ ▁adjust -10.9517
3570
+ ▁childhood -10.9517
3571
+ ▁oven -10.9518
3572
+ ▁fellow -10.952
3573
+ ▁quant -10.952
3574
+ ▁accidentally -10.952
3575
+ ▁pool -10.9523
3576
+ ▁fitness -10.9523
3577
+ ▁neck -10.9527
3578
+ ▁Rock -10.9537
3579
+ ▁Cook -10.9541
3580
+ ▁grant -10.9545
3581
+ OW -10.9548
3582
+ ▁etc -10.9565
3583
+ ▁whenever -10.9578
3584
+ ▁symbol -10.9614
3585
+ migrant -10.9615
3586
+ ▁improving -10.9615
3587
+ ▁Marketing -10.9615
3588
+ ▁desert -10.9615
3589
+ ▁desire -10.9616
3590
+ ▁charity -10.9616
3591
+ ▁password -10.9618
3592
+ ▁Anyone -10.9618
3593
+ ▁Italy -10.9627
3594
+ ▁African -10.9629
3595
+ ▁abuse -10.9636
3596
+ headed -10.9645
3597
+ ▁pun -10.9669
3598
+ ▁combination -10.9715
3599
+ ▁uncomfortable -10.9715
3600
+ ▁borrow -10.9715
3601
+ ▁communicate -10.9715
3602
+ ▁opposite -10.9716
3603
+ profit -10.9716
3604
+ ▁hiding -10.9717
3605
+ ▁decade -10.9717
3606
+ # -10.9722
3607
+ ▁wrap -10.9725
3608
+ jack -10.9732
3609
+ dress -10.974
3610
+ ▁TO -10.9769
3611
+ ▁rise -10.9772
3612
+ ▁receive -10.9784
3613
+ ▁Wil -10.9799
3614
+ ▁format -10.9814
3615
+ ▁statistics -10.9816
3616
+ ▁Generate -10.9816
3617
+ ▁Question -10.9816
3618
+ ▁Bernie -10.9816
3619
+ ▁micro -10.9816
3620
+ ▁evil -10.9818
3621
+ ▁coast -10.9819
3622
+ OL -10.9834
3623
+ ▁singer -10.9845
3624
+ ▁Bob -10.9852
3625
+ ▁Both -10.9879
3626
+ ▁disappoint -10.9888
3627
+ ▁Big -10.9892
3628
+ ▁Michael -10.9918
3629
+ ▁offensive -10.9918
3630
+ ▁regret -10.9918
3631
+ ▁hobby -10.9918
3632
+ ▁relative -10.9918
3633
+ ▁staff -10.9926
3634
+ quart -10.995
3635
+ ▁Year -10.9955
3636
+ just -10.998
3637
+ ▁Apparently -11.0021
3638
+ ▁Remember -11.0021
3639
+ ▁license -11.0021
3640
+ ▁terrific -11.0021
3641
+ ▁publish -11.0021
3642
+ ▁honour -11.0021
3643
+ ▁capital -11.0021
3644
+ ▁freedom -11.0021
3645
+ ▁error -11.0022
3646
+ ▁spare -11.0027
3647
+ ▁Think -11.0027
3648
+ ▁presentation -11.0036
3649
+ ▁arrested -11.0043
3650
+ rust -11.0043
3651
+ 45 -11.0059
3652
+ ▁despite -11.0125
3653
+ ▁fantasy -11.0125
3654
+ ▁opportunities -11.0125
3655
+ ▁Girl -11.0125
3656
+ ▁remote -11.0125
3657
+ ▁Social -11.0126
3658
+ ▁Health -11.0126
3659
+ ▁snake -11.0126
3660
+ ▁piano -11.0126
3661
+ ▁mystery -11.0126
3662
+ ▁Damn -11.0126
3663
+ distance -11.0126
3664
+ VER -11.0131
3665
+ ▁thus -11.0149
3666
+ ▁lake -11.0153
3667
+ ▁crap -11.0153
3668
+ ▁slip -11.0162
3669
+ ▁poll -11.0177
3670
+ UN -11.0219
3671
+ talk -11.0229
3672
+ ▁descriptive -11.0231
3673
+ ▁families -11.0231
3674
+ ▁medication -11.0231
3675
+ ▁hidden -11.0231
3676
+ ▁promoting -11.0231
3677
+ ▁soldier -11.0231
3678
+ ▁teeth -11.0231
3679
+ ▁refuse -11.0231
3680
+ ▁royal -11.0231
3681
+ ▁roast -11.0231
3682
+ ▁dropped -11.0233
3683
+ ▁True -11.0233
3684
+ ▁kidding -11.0235
3685
+ ▁Stop -11.0245
3686
+ ▁grill -11.0246
3687
+ ▁taxes -11.0248
3688
+ ▁Out -11.0265
3689
+ leigh -11.0299
3690
+ posed -11.0312
3691
+ ▁Maria -11.0313
3692
+ ▁Spi -11.0331
3693
+ ▁transport -11.0336
3694
+ ▁horrible -11.0337
3695
+ ▁motivation -11.0337
3696
+ ▁sensory -11.0337
3697
+ ▁monitor -11.0337
3698
+ ▁herself -11.0337
3699
+ ▁Hitler -11.0337
3700
+ ▁Trust -11.034
3701
+ ▁Turn -11.0342
3702
+ ▁stressful -11.0342
3703
+ ▁nail -11.0356
3704
+ fold -11.0356
3705
+ ▁Post -11.0371
3706
+ ▁incl -11.042
3707
+ worker -11.042
3708
+ friend -11.0444
3709
+ ▁Explain -11.0445
3710
+ ▁encounter -11.0445
3711
+ ▁George -11.0445
3712
+ ▁steak -11.0445
3713
+ ▁convert -11.0445
3714
+ ▁firm -11.0446
3715
+ ▁empty -11.0446
3716
+ ▁directly -11.0448
3717
+ Why -11.0468
3718
+ ▁vegetable -11.0473
3719
+ ▁nation -11.0492
3720
+ ▁dedicated -11.0553
3721
+ ▁elaborate -11.0553
3722
+ ▁possibly -11.0553
3723
+ ▁babies -11.0553
3724
+ ▁operation -11.0553
3725
+ ▁loving -11.0553
3726
+ ▁arrive -11.0554
3727
+ ▁grandma -11.0554
3728
+ ▁priest -11.0556
3729
+ ▁programming -11.0556
3730
+ ▁resist -11.0557
3731
+ ▁plus -11.0559
3732
+ ▁potatoes -11.0559
3733
+ ▁nowhere -11.0565
3734
+ ▁classical -11.058
3735
+ ▁earn -11.0613
3736
+ ▁rip -11.0659
3737
+ ▁Empire -11.0663
3738
+ ▁impossible -11.0663
3739
+ ▁jewelry -11.0663
3740
+ ▁substitute -11.0663
3741
+ ▁violent -11.0663
3742
+ ▁stood -11.0663
3743
+ ▁trial -11.0667
3744
+ ▁discussion -11.0675
3745
+ ▁taught -11.0685
3746
+ Trump -11.0709
3747
+ ▁hip -11.0774
3748
+ length -11.0774
3749
+ ▁strike -11.0774
3750
+ ▁Monday -11.0777
3751
+ ▁queen -11.0781
3752
+ ▁Romeo -11.079
3753
+ ▁friendship -11.0792
3754
+ ▁Fuck -11.0794
3755
+ ▁Vol -11.0798
3756
+ UR -11.0824
3757
+ ▁ppl -11.0843
3758
+ ▁advertising -11.0887
3759
+ ▁definition -11.0887
3760
+ ▁distancing -11.0887
3761
+ ▁mechanic -11.0887
3762
+ inspired -11.0887
3763
+ ▁repeat -11.089
3764
+ hhh -11.0893
3765
+ ▁react -11.0937
3766
+ ▁determine -11.0968
3767
+ ▁rank -11.0976
3768
+ catching -11.0983
3769
+ ▁$100 -11.0985
3770
+ ▁register -11.1
3771
+ ❤ -11.1
3772
+ ▁debate -11.1001
3773
+ ▁Prince -11.1002
3774
+ ▁guard -11.1005
3775
+ ▁chain -11.1009
3776
+ awn -11.1015
3777
+ ▁Yep -11.102
3778
+ ▁duck -11.1025
3779
+ ▁yea -11.1028
3780
+ scale -11.1065
3781
+ sensitive -11.1115
3782
+ { -11.1115
3783
+ | -11.1115
3784
+ ▁Amazon -11.1115
3785
+ ▁equipment -11.1115
3786
+ ▁introduce -11.1115
3787
+ ▁thorough -11.1115
3788
+ ▁Jewish -11.1115
3789
+ ▁laptop -11.1115
3790
+ ▁confusing -11.1115
3791
+ ▁distract -11.1116
3792
+ applied -11.1116
3793
+ ▁requirements -11.1116
3794
+ ▁diverse -11.1117
3795
+ ▁import -11.1123
3796
+ ▁River -11.1127
3797
+ ▁burger -11.1136
3798
+ ▁Greek -11.1232
3799
+ ▁vehicle -11.1232
3800
+ ▁arguing -11.1232
3801
+ ▁separate -11.1232
3802
+ ▁navigat -11.1232
3803
+ ▁Still -11.1232
3804
+ ▁contemporary -11.1232
3805
+ ▁hopefully -11.1237
3806
+ ▁bio -11.124
3807
+ ▁humor -11.1243
3808
+ ▁director -11.1255
3809
+ ▁dust -11.1286
3810
+ ▁establish -11.1319
3811
+ 26 -11.1333
3812
+ ▁admirable -11.1349
3813
+ ▁conference -11.1349
3814
+ ▁neither -11.1349
3815
+ ▁symptoms -11.1349
3816
+ ▁teenager -11.1349
3817
+ ▁turkey -11.1349
3818
+ ▁twitter -11.1349
3819
+ ▁Another -11.1349
3820
+ ▁DIY -11.1349
3821
+ ▁bodies -11.1349
3822
+ ▁Before -11.1349
3823
+ ▁jerk -11.135
3824
+ ▁Help -11.135
3825
+ ▁insights -11.135
3826
+ ▁Congress -11.135
3827
+ ▁repair -11.1351
3828
+ ▁roof -11.1351
3829
+ ▁river -11.1356
3830
+ ▁handling -11.136
3831
+ ▁500- -11.136
3832
+ ▁comic -11.1365
3833
+ ▁Gold -11.137
3834
+ ▁blast -11.1371
3835
+ ▁Play -11.1372
3836
+ ▁Must -11.1376
3837
+ ▁Again -11.1447
3838
+ ▁counsel -11.1468
3839
+ ▁incredibly -11.1468
3840
+ ▁stretch -11.1468
3841
+ ▁studied -11.1468
3842
+ ▁valuable -11.1468
3843
+ ▁disaster -11.1468
3844
+ ▁protein -11.1468
3845
+ ▁Edward -11.1468
3846
+ ▁inviting -11.1469
3847
+ Virus -11.147
3848
+ ▁desk -11.1472
3849
+ ▁command -11.1472
3850
+ ▁Office -11.1474
3851
+ ▁NOT -11.1506
3852
+ school -11.1516
3853
+ 0% -11.1525
3854
+ ▁technique -11.1533
3855
+ ▁Brea -11.1549
3856
+ ▁danger -11.1588
3857
+ ▁favor -11.1588
3858
+ ▁commercial -11.1589
3859
+ ▁confidence -11.1589
3860
+ ▁disappear -11.1589
3861
+ ▁increasing -11.1589
3862
+ ▁proposal -11.1589
3863
+ ▁subreddit -11.1589
3864
+ ▁chemical -11.1589
3865
+ ▁border -11.1589
3866
+ ▁climb -11.1589
3867
+ ▁loyal -11.1589
3868
+ ▁juice -11.159
3869
+ ▁wealth -11.159
3870
+ ▁Frank -11.1591
3871
+ ▁Brown -11.1591
3872
+ ▁fuel -11.1592
3873
+ ▁preference -11.1592
3874
+ ▁beginner -11.1592
3875
+ ▁everybody -11.1596
3876
+ ▁stolen -11.1599
3877
+ WW -11.1608
3878
+ cular -11.1609
3879
+ aaaa -11.161
3880
+ ▁village -11.1699
3881
+ ▁awhile -11.1711
3882
+ ▁hilarious -11.1711
3883
+ ▁mirror -11.1711
3884
+ ▁scientific -11.1711
3885
+ ▁invented -11.1711
3886
+ ▁integrate -11.1711
3887
+ ▁presence -11.1711
3888
+ ▁reveal -11.1712
3889
+ ▁2019 -11.1713
3890
+ ▁tourist -11.1713
3891
+ ▁appearance -11.1714
3892
+ ologist -11.1714
3893
+ ▁overview -11.1717
3894
+ ▁Cause -11.1718
3895
+ ▁define -11.1725
3896
+ ▁Old -11.1726
3897
+ $ -11.1732
3898
+ ulate -11.1791
3899
+ world -11.1799
3900
+ ▁hung -11.1834
3901
+ ▁accurate -11.1834
3902
+ ▁athlete -11.1834
3903
+ ▁destination -11.1834
3904
+ ▁evolved -11.1834
3905
+ ▁innovative -11.1834
3906
+ ▁league -11.1834
3907
+ ▁politician -11.1834
3908
+ ▁1/4 -11.1834
3909
+ ▁buddy -11.1834
3910
+ ▁podcast -11.1834
3911
+ ▁anxious -11.1834
3912
+ ▁CEO -11.1834
3913
+ ▁otherwise -11.1834
3914
+ shadow -11.1834
3915
+ ▁showcase -11.1834
3916
+ ▁replies -11.1834
3917
+ ▁spider -11.1834
3918
+ ▁witness -11.1834
3919
+ ▁shelter -11.1834
3920
+ ▁pilot -11.1835
3921
+ ▁Biden -11.1836
3922
+ ▁zero -11.1837
3923
+ ▁yellow -11.1838
3924
+ ▁wound -11.1839
3925
+ ▁Anytime -11.1839
3926
+ ▁committed -11.1839
3927
+ ▁nearby -11.1843
3928
+ ▁gang -11.1846
3929
+ bringing -11.1846
3930
+ ▁Water -11.1849
3931
+ ▁Report -11.1851
3932
+ horn -11.1887
3933
+ ▁chose -11.1887
3934
+ ▁panic -11.1893
3935
+ ▁bullshit -11.1959
3936
+ ▁initiative -11.1959
3937
+ ▁landscape -11.1959
3938
+ ▁nuclear -11.1959
3939
+ ▁occasion -11.1959
3940
+ ▁technologies -11.1959
3941
+ ▁Chapter -11.1959
3942
+ ▁burden -11.1959
3943
+ ▁Marcius -11.1959
3944
+ ▁compliment -11.1959
3945
+ ▁asleep -11.1959
3946
+ finite -11.196
3947
+ unch -11.1962
3948
+ ▁originally -11.1963
3949
+ ▁array -11.1963
3950
+ ▁thick -11.1967
3951
+ ▁cutting -11.1976
3952
+ ▁Want -11.2001
3953
+ ambi -11.2066
3954
+ ▁magazine -11.2086
3955
+ ▁muscle -11.2086
3956
+ ▁hurry -11.2086
3957
+ ▁relief -11.2086
3958
+ ▁Police -11.2086
3959
+ ▁fraud -11.2086
3960
+ ▁profile -11.2086
3961
+ ▁Perfect -11.2086
3962
+ ▁reliable -11.2086
3963
+ ▁naked -11.2086
3964
+ ▁secure -11.2086
3965
+ ▁therapist -11.2086
3966
+ ▁crisp -11.2087
3967
+ ▁coworker -11.2088
3968
+ ▁attach -11.2088
3969
+ ▁loose -11.2089
3970
+ earning -11.2098
3971
+ ▁Start -11.21
3972
+ Elect -11.2114
3973
+ ▁oppos -11.2121
3974
+ specially -11.2127
3975
+ morrow -11.2181
3976
+ ▁choosing -11.2214
3977
+ ▁infrastructure -11.2214
3978
+ ▁newspaper -11.2214
3979
+ ▁relevant -11.2214
3980
+ ▁semester -11.2214
3981
+ ▁university -11.2214
3982
+ ▁monkey -11.2214
3983
+ ▁nigga -11.2214
3984
+ ▁crisis -11.2214
3985
+ ▁crush -11.2214
3986
+ ▁advancements -11.2215
3987
+ ▁crust -11.2216
3988
+ ▁banana -11.2216
3989
+ ▁couch -11.2217
3990
+ ▁america -11.2218
3991
+ ▁Trans -11.2218
3992
+ ▁planned -11.2219
3993
+ ▁Jeff -11.2219
3994
+ ▁Mother -11.2221
3995
+ ▁Based -11.2224
3996
+ ▁download -11.2224
3997
+ ▁moral -11.2226
3998
+ ▁capabilit -11.2333
3999
+ ▁Washington -11.2344
4000
+ ▁bridge -11.2344
4001
+ ▁paraphrase -11.2344
4002
+ ▁suspense -11.2344
4003
+ ▁violin -11.2344
4004
+ ▁gluten -11.2344
4005
+ ▁admire -11.2344
4006
+ ▁enemy -11.2344
4007
+ ▁romance -11.2344
4008
+ ▁Next -11.2344
4009
+ ▁Robert -11.2345
4010
+ ;# -11.2349
4011
+ ▁Harry -11.2354
4012
+ ▁stir -11.2358
4013
+ ▁twin -11.2367
4014
+ 😭 -11.2475
4015
+ ▁necessarily -11.2475
4016
+ ville -11.2477
4017
+ ▁failure -11.2477
4018
+ ▁bedroom -11.2477
4019
+ ▁outfit -11.2478
4020
+ ▁spray -11.248
4021
+ ▁solid -11.2482
4022
+ ▁voters -11.2482
4023
+ ▁abortion -11.2609
4024
+ ▁disorder -11.2609
4025
+ ▁exhibit -11.2609
4026
+ ▁graduated -11.2609
4027
+ ▁grandmother -11.2609
4028
+ ▁laundry -11.2609
4029
+ ▁possess -11.2609
4030
+ ▁Imagine -11.2609
4031
+ ▁Little -11.2609
4032
+ ▁Women -11.2609
4033
+ ▁finance -11.2609
4034
+ ▁observ -11.2609
4035
+ ▁Israel -11.2609
4036
+ ▁scheme -11.2609
4037
+ ▁Jones -11.261
4038
+ ▁realistic -11.2615
4039
+ ▁olive -11.2617
4040
+ hrew -11.2634
4041
+ ▁Though -11.2729
4042
+ ▁obvious -11.273
4043
+ ▁Chicago -11.2744
4044
+ ▁Seriously -11.2744
4045
+ ▁bottom -11.2744
4046
+ ▁circum -11.2744
4047
+ ▁draft -11.2744
4048
+ ▁suspect -11.2744
4049
+ ▁blonde -11.2744
4050
+ ▁Science -11.2744
4051
+ ▁abilities -11.2744
4052
+ ~~ -11.2744
4053
+ ▁leadership -11.2744
4054
+ ▁twist -11.2744
4055
+ ▁defense -11.2745
4056
+ depth -11.2745
4057
+ ▁banned -11.2745
4058
+ ▁objective -11.2749
4059
+ ▁extreme -11.2867
4060
+ ▁Research -11.2881
4061
+ ▁academic -11.2881
4062
+ ▁department -11.2881
4063
+ ▁gratitude -11.2881
4064
+ ▁perhaps -11.2881
4065
+ Tuesday -11.2881
4066
+ ▁powder -11.2881
4067
+ ▁revenge -11.2881
4068
+ ▁ladies -11.2881
4069
+ ▁injury -11.2881
4070
+ ▁storage -11.2881
4071
+ ▁Sweet -11.2882
4072
+ ▁peanut -11.2883
4073
+ ▁reference -11.2883
4074
+ ▁Chris -11.2887
4075
+ ▁Find -11.2888
4076
+ tooth -11.3015
4077
+ ▁cigarette -11.302
4078
+ ▁dedication -11.302
4079
+ ▁dragon -11.302
4080
+ ▁mushroom -11.302
4081
+ ▁necklace -11.302
4082
+ ▁survey -11.302
4083
+ ▁Canadian -11.302
4084
+ ▁YouTube -11.302
4085
+ revised -11.302
4086
+ ▁Verse -11.302
4087
+ ▁evolution -11.3023
4088
+ ▁interactive -11.3025
4089
+ ▁zoo -11.304
4090
+ maker -11.3061
4091
+ ▁scam -11.3086
4092
+ ▁assault -11.3161
4093
+ ▁creativity -11.3161
4094
+ ▁garbage -11.3161
4095
+ ▁incident -11.3161
4096
+ ▁primary -11.3161
4097
+ ▁brush -11.3161
4098
+ ▁Sanders -11.3161
4099
+ ▁barely -11.3161
4100
+ ▁silly -11.3162
4101
+ ▁Queen -11.3163
4102
+ Word -11.3163
4103
+ ▁label -11.3163
4104
+ ▁massive -11.3166
4105
+ ▁spring -11.3169
4106
+ ▁litera -11.3173
4107
+ ▁Under -11.3174
4108
+ ▁Long -11.3176
4109
+ watch -11.3186
4110
+ Enter -11.3202
4111
+ gress -11.3211
4112
+ driven -11.328
4113
+ ▁stole -11.3292
4114
+ ▁Middle -11.3303
4115
+ ▁component -11.3303
4116
+ ▁convenient -11.3303
4117
+ ▁inspiration -11.3303
4118
+ ▁poverty -11.3303
4119
+ ▁sweetheart -11.3303
4120
+ ▁unhappy -11.3303
4121
+ ▁Spring -11.3303
4122
+ ▁rural -11.3303
4123
+ ▁literature -11.3304
4124
+ ▁toxic -11.3305
4125
+ ▁symbolism -11.3305
4126
+ ▁crown -11.3305
4127
+ ▁Charles -11.3306
4128
+ ▁LOL -11.3306
4129
+ ▁curry -11.3306
4130
+ ▁2017 -11.3306
4131
+ ▁detective -11.3308
4132
+ space -11.3356
4133
+ rtificial -11.3448
4134
+ ▁Australian -11.3448
4135
+ ▁Instagram -11.3448
4136
+ ▁Internet -11.3448
4137
+ ▁January -11.3448
4138
+ ▁appointment -11.3448
4139
+ ▁chopped -11.3448
4140
+ ▁competitive -11.3448
4141
+ ▁groceries -11.3448
4142
+ ▁guidance -11.3448
4143
+ ▁stunning -11.3448
4144
+ ▁unexpected -11.3448
4145
+ ▁serving -11.3448
4146
+ ▁worries -11.3448
4147
+ ▁broad -11.3448
4148
+ ▁jacket -11.3449
4149
+ ▁expectations -11.3449
4150
+ ▁joking -11.3449
4151
+ ▁status -11.3449
4152
+ ▁carbon -11.345
4153
+ ▁bond -11.3454
4154
+ ▁route -11.3459
4155
+ ▁smoothie -11.3467
4156
+ ▁arrest -11.3566
4157
+ ▁Egypt -11.3595
4158
+ ▁abandon -11.3595
4159
+ ▁dialogue -11.3595
4160
+ ▁tutorial -11.3595
4161
+ ▁scream -11.3595
4162
+ ▁shift -11.3595
4163
+ ▁toppings -11.3596
4164
+ ▁branch -11.3596
4165
+ ▁sushi -11.3596
4166
+ ▁therefore -11.3596
4167
+ ▁racism -11.3596
4168
+ ▁Guess -11.3596
4169
+ ▁History -11.3597
4170
+ ▁preserv -11.3598
4171
+ ▁proposed -11.3598
4172
+ ▁voting -11.36
4173
+ ▁chef -11.3603
4174
+ ▁hook -11.3608
4175
+ ▁continu -11.3628
4176
+ build -11.3693
4177
+ ▁thrill -11.3704
4178
+ ▁congratulations -11.3745
4179
+ ▁folks -11.3745
4180
+ ▁participate -11.3745
4181
+ ▁pocket -11.3745
4182
+ ▁professor -11.3745
4183
+ ▁suicide -11.3745
4184
+ ▁supplies -11.3745
4185
+ ▁vaccine -11.3745
4186
+ ▁Speak -11.3745
4187
+ ▁throat -11.3745
4188
+ ▁battery -11.3745
4189
+ ▁overcome -11.3745
4190
+ ▁strict -11.3745
4191
+ ▁occur -11.3745
4192
+ ▁League -11.3746
4193
+ ▁labor -11.3747
4194
+ ▁monster -11.3747
4195
+ ▁motion -11.3748
4196
+ ▁duty -11.3748
4197
+ ▁Ugh -11.3749
4198
+ ▁argue -11.3749
4199
+ ▁Creat -11.3826
4200
+ muni -11.3843
4201
+ When -11.3869
4202
+ ▁Hopefully -11.3896
4203
+ ▁Microsoft -11.3896
4204
+ ▁happiness -11.3896
4205
+ ▁iPhone -11.3896
4206
+ ▁nutrition -11.3896
4207
+ ▁twenty -11.3896
4208
+ ▁possibility -11.3896
4209
+ ▁session -11.3896
4210
+ ▁Tower -11.3897
4211
+ ▁facing -11.3897
4212
+ ▁100% -11.3898
4213
+ ▁emerg -11.3898
4214
+ ▁Talk -11.39
4215
+ ▁spoke -11.3977
4216
+ ▁December -11.405
4217
+ ▁exchange -11.405
4218
+ ▁experiencing -11.405
4219
+ ▁principle -11.405
4220
+ ▁seafood -11.405
4221
+ ▁pricing -11.4051
4222
+ ▁Guide -11.4053
4223
+ empt -11.4081
4224
+ ▁Edit -11.4107
4225
+ Democrats -11.4178
4226
+ ▁potato -11.4198
4227
+ ~ -11.4206
4228
+ ▁Michigan -11.4206
4229
+ ▁brainstorm -11.4206
4230
+ ▁metaphor -11.4206
4231
+ ▁nasty -11.4207
4232
+ ▁jazz -11.4207
4233
+ ▁steel -11.4207
4234
+ ▁funeral -11.4207
4235
+ ▁discount -11.4208
4236
+ ▁surf -11.4228
4237
+ ▁broth -11.4318
4238
+ ▁Enjoy -11.4365
4239
+ ▁Chicken -11.4365
4240
+ ▁grief -11.4365
4241
+ ▁Disney -11.4365
4242
+ ▁General -11.4365
4243
+ ▁tribe -11.4365
4244
+ ▁innocent -11.4365
4245
+ ▁creation -11.4365
4246
+ ▁Week -11.4366
4247
+ ▁Royal -11.4367
4248
+ giving -11.4414
4249
+ ▁corrupt -11.4495
4250
+ ▁atmosphere -11.4526
4251
+ ▁easiest -11.4526
4252
+ ▁Recipe -11.4526
4253
+ ▁minimum -11.4526
4254
+ ▁clown -11.4526
4255
+ ▁sneak -11.4526
4256
+ ▁covid -11.4527
4257
+ ▁smash -11.4527
4258
+ ▁Listen -11.4527
4259
+ ▁Bridge -11.4528
4260
+ ▁reserv -11.4532
4261
+ ▁accountant -11.4539
4262
+ ▁coope -11.4564
4263
+ responsi -11.469
4264
+ ▁International -11.469
4265
+ ▁Service -11.469
4266
+ ▁disgusting -11.469
4267
+ ▁rabbit -11.469
4268
+ ▁senior -11.469
4269
+ ▁kingdom -11.469
4270
+ ▁almond -11.469
4271
+ ▁Quaran -11.469
4272
+ ▁combine -11.469
4273
+ ▁Center -11.4691
4274
+ ▁convey -11.4691
4275
+ awareness -11.4691
4276
+ spoken -11.4769
4277
+ ibly -11.4785
4278
+ ▁satisfy -11.4857
4279
+ responsibilities -11.4857
4280
+ ▁agriculture -11.4857
4281
+ ▁colleague -11.4857
4282
+ ▁marijuana -11.4857
4283
+ ▁pollution -11.4857
4284
+ ▁shampoo -11.4857
4285
+ ▁consistent -11.4857
4286
+ ▁supply -11.4857
4287
+ ▁depressed -11.4857
4288
+ ▁retrieve -11.4857
4289
+ ▁manipulat -11.4857
4290
+ ▁fortune -11.4857
4291
+ ▁jeans -11.4858
4292
+ ▁afterwards -11.4858
4293
+ ▁initial -11.4858
4294
+ ▁float -11.4859
4295
+ functional -11.4921
4296
+ ATE -11.495
4297
+ ▁elephant -11.5026
4298
+ ▁luxury -11.5026
4299
+ ▁physics -11.5026
4300
+ ▁fulfill -11.5026
4301
+ solving -11.5026
4302
+ ▁ISIS -11.5026
4303
+ ▁NBA -11.5026
4304
+ ▁slept -11.5026
4305
+ nigger -11.5027
4306
+ ▁lottery -11.5028
4307
+ ▁giant -11.5028
4308
+ ▁Halloween -11.5199
4309
+ ▁Thursday -11.5199
4310
+ ▁analyzing -11.5199
4311
+ ▁awkward -11.5199
4312
+ ▁dinosaur -11.5199
4313
+ ▁emergency -11.5199
4314
+ ▁enemies -11.5199
4315
+ ▁starving -11.5199
4316
+ ▁tablespoon -11.5199
4317
+ NSFW -11.5199
4318
+ ▁WHO -11.5199
4319
+ ▁database -11.5199
4320
+ ▁landmarks -11.5199
4321
+ ▁predator -11.5199
4322
+ ▁bullet -11.5199
4323
+ ▁lyrics -11.5199
4324
+ ▁sector -11.5199
4325
+ ▁costume -11.5199
4326
+ ▁reject -11.52
4327
+ ▁upgrade -11.5203
4328
+ ▁constant -11.5361
4329
+ ▁construct -11.5369
4330
+ ▁Catholic -11.5374
4331
+ ▁administration -11.5374
4332
+ ▁astronaut -11.5374
4333
+ ▁circle -11.5374
4334
+ ▁desperate -11.5374
4335
+ ▁prioritize -11.5374
4336
+ ▁teaspoon -11.5374
4337
+ ▁cocaine -11.5374
4338
+ ▁interpret -11.5374
4339
+ ▁Greece -11.5374
4340
+ ▁insightful -11.5374
4341
+ ▁complement -11.5374
4342
+ ▁preparing -11.5374
4343
+ ▁casual -11.5374
4344
+ ▁drain -11.5374
4345
+ ▁advise -11.5375
4346
+ ▁purple -11.5375
4347
+ ▁colour -11.5375
4348
+ ▁divide -11.5375
4349
+ Peace -11.5375
4350
+ govern -11.5386
4351
+ ▁slide -11.539
4352
+ ▁Latin -11.5405
4353
+ ▁dimension -11.5514
4354
+ ▁McDonald -11.5553
4355
+ ▁contribution -11.5553
4356
+ ▁hockey -11.5553
4357
+ ▁pencil -11.5553
4358
+ ▁scenario -11.5553
4359
+ ▁ultimate -11.5553
4360
+ ▁cloves -11.5553
4361
+ ▁fridge -11.5553
4362
+ ▁Second -11.5553
4363
+ ▁Senator -11.5553
4364
+ ▁rescue -11.5553
4365
+ ▁Daniel -11.5554
4366
+ ▁sample -11.5554
4367
+ Company -11.5554
4368
+ ▁plug -11.5554
4369
+ ▁compete -11.5561
4370
+ volving -11.5735
4371
+ ▁Children -11.5735
4372
+ ▁victory -11.5735
4373
+ ▁villain -11.5735
4374
+ ▁Museum -11.5735
4375
+ ▁urban -11.5735
4376
+ ▁Western -11.5735
4377
+ ▁Emma -11.5738
4378
+ ▁skate -11.5742
4379
+ reaching -11.5748
4380
+ ▁severe -11.5753
4381
+ ▁featur -11.5783
4382
+ [ -11.5817
4383
+ ▁Otherwise -11.592
4384
+ ▁assignment -11.592
4385
+ ▁coconut -11.592
4386
+ ▁manufacture -11.592
4387
+ ▁patience -11.592
4388
+ ▁puppy -11.592
4389
+ ▁puzzle -11.592
4390
+ ▁stereotype -11.592
4391
+ ▁tactics -11.592
4392
+ ‍ -11.592
4393
+ ▁narrow -11.592
4394
+ ▁Knock -11.592
4395
+ ▁drank -11.592
4396
+ ▁Space -11.592
4397
+ ▁actress -11.592
4398
+ ▁estimate -11.592
4399
+ ▁Boston -11.592
4400
+ ▁Wood -11.592
4401
+ ▁gaming -11.592
4402
+ ▁steam -11.5921
4403
+ ▁wander -11.5921
4404
+ ▁depend -11.5956
4405
+ maybe -11.6035
4406
+ ▁integration -11.6108
4407
+ ▁Seattle -11.6108
4408
+ ▁Windows -11.6108
4409
+ ▁volume -11.6108
4410
+ ▁worship -11.6108
4411
+ ▁cruel -11.6108
4412
+ ▁sentiment -11.6108
4413
+ ▁concise -11.6108
4414
+ @ -11.7322
4415
+ < -11.7448
4416
+ £ -11.846
4417
+ ° -11.9473
4418
+ ا -12.0307
4419
+ 🙄 -12.0904
4420
+ 🏼 -12.1216
4421
+ • -12.1539
4422
+ 😊 -12.2574
4423
+ á -12.3329
4424
+ ó -12.3329
4425
+ 🍗 -12.3329
4426
+ 💕 -12.3329
4427
+ 🏻 -12.3729
4428
+ ل -12.4146
4429
+ ♀ -12.4581
4430
+ ✨ -12.4581
4431
+ í -12.5035
4432
+ ر -12.5035
4433
+ 🤔 -12.5512
4434
+ 🤷 -12.5512
4435
+ 😍 -12.6538
4436
+ ♂ -12.7093
4437
+ 🥴 -12.7093
4438
+ ® -12.7682
4439
+ 👌 -12.7682
4440
+ 😁 -12.7682
4441
+ “ -12.8183
4442
+ } -12.8307
4443
+ ی -12.8307
4444
+ 💖 -12.8307
4445
+ 💙 -12.8307
4446
+ 🙌 -12.8307
4447
+ م -12.8973
4448
+ 🤦 -12.8973
4449
+  -12.9688
4450
+ 👏 -12.9688
4451
+ 😀 -12.9688
4452
+ è -12.9688
4453
+ 😔 -13.0457
4454
+ 😢 -13.0457
4455
+ 🤗 -13.0457
4456
+ ب -13.0457
4457
+ ō -13.129
4458
+ س -13.129
4459
+ ہ -13.129
4460
+ 😎 -13.129
4461
+ 😬 -13.129
4462
+ 👇 -13.2199
4463
+ 🕊 -13.2199
4464
+ 😩 -13.2199
4465
+ ک -13.3199
4466
+ ♥ -13.3199
4467
+ 🔥 -13.3199
4468
+ à -13.431
4469
+ د -13.431
4470
+ و -13.431
4471
+ ے -13.431
4472
+ ✌ -13.431
4473
+ 🏽 -13.431
4474
+ 💫 -13.431
4475
+ 😅 -13.431
4476
+ 😇 -13.431
4477
+ 🥰 -13.431
4478
+ ö -13.431
4479
+ ن -13.431
4480
+ ي -13.431
4481
+ ع -13.556
4482
+ 💀 -13.556
4483
+ ã -13.556
4484
+ ä -13.556
4485
+ ̛ -13.556
4486
+ € -13.556
4487
+ ☕ -13.556
4488
+ 👑 -13.556
4489
+ 😏 -13.556
4490
+ 😜 -13.556
4491
+ ı -13.6989
4492
+ ♡ -13.6989
4493
+ ➡ -13.6989
4494
+ 🐝 -13.6989
4495
+ 👊 -13.6989
4496
+ 💓 -13.6989
4497
+ 💪 -13.6989
4498
+ 😋 -13.6989
4499
+ 😷 -13.6989
4500
+ ت -13.6989
4501
+ â -13.6989
4502
+ ̨ -13.8656
4503
+ ‬ -13.8656
4504
+ ⭐ -13.8656
4505
+ 🌺 -13.8656
4506
+ 👆 -13.8656
4507
+ 😒 -13.8656
4508
+ 😤 -13.8656
4509
+ 🛰 -13.8656
4510
+ 🥂 -13.8656
4511
+ 😫 -14.0656
4512
+ × -14.0656
4513
+ α -14.0656
4514
+ ق -14.0656
4515
+ ⁩ -14.0656
4516
+ ✍ -14.0656
4517
+ ➦ -14.0656
4518
+ 。 -14.0656
4519
+ 🌷 -14.0656
4520
+ 😃 -14.0656
4521
+ 😝 -14.0656
4522
+ 😪 -14.0656
4523
+ 🙋 -14.0656
4524
+ 🤓 -14.0656
4525
+ 🧿 -14.0656
4526
+ ح -14.0656
4527
+ ल -14.3155
4528
+ ¢ -14.3156
4529
+ ì -14.3156
4530
+ ⁣ -14.3156
4531
+ ☝ -14.3156
4532
+ ☠ -14.3156
4533
+ ⚡ -14.3156
4534
+ ✔ -14.3156
4535
+ 🇦 -14.3156
4536
+ 🌐 -14.3156
4537
+ 🏖 -14.3156
4538
+ 👋 -14.3156
4539
+ 💐 -14.3156
4540
+ 📖 -14.3156
4541
+ 😌 -14.3156
4542
+ ج -14.3156
4543
+ 🙃 -14.3156
4544
+ ि -14.6488
4545
+ Â -14.6489
4546
+ ذ -14.6489
4547
+ » -14.6489
4548
+ ā -14.6489
4549
+ μ -14.6489
4550
+ ة -14.6489
4551
+ ✈ -14.6489
4552
+ ✡ -14.6489
4553
+ ❄ -14.6489
4554
+ ❣ -14.6489
4555
+ ー -14.6489
4556
+ 🇮 -14.6489
4557
+ 🌉 -14.6489
4558
+ 🌬 -14.6489
4559
+ 🎂 -14.6489
4560
+ 🏵 -14.6489
4561
+ 👅 -14.6489
4562
+ 💋 -14.6489
4563
+ 💝 -14.6489
4564
+ 💟 -14.6489
4565
+ 💥 -14.6489
4566
+ 🔔 -14.6489
4567
+ 😚 -14.6489
4568
+ 😺 -14.6489
4569
+ 🚩 -14.6489
4570
+ 🚶 -14.6489
4571
+ 🤩 -14.6489
4572
+ 🤬 -14.6489
4573
+ 🤮 -14.6489
4574
+ 🥁 -14.6489
4575
+ 🧡 -14.6489
4576
+ û -14.6489
4577
+ る -14.6489
4578
+ を -14.6489
4579
+ の -14.6489
4580
+ ् -15.1487
4581
+ ा -15.1489
4582
+ ğ -15.1489
4583
+ ط -15.1489
4584
+ 田 -15.1489
4585
+ 🥚 -15.1489
4586
+ Ç -15.1489
4587
+ Ü -15.1489
4588
+ æ -15.1489
4589
+ ð -15.1489
4590
+ ú -15.1489
4591
+ ă -15.1489
4592
+ š -15.1489
4593
+ ż -15.1489
4594
+ ə -15.1489
4595
+ ؟ -15.1489
4596
+ آ -15.1489
4597
+ ز -15.1489
4598
+ ك -15.1489
4599
+ ى -15.1489
4600
+ پ -15.1489
4601
+ ں -15.1489
4602
+ ھ -15.1489
4603
+ ۔ -15.1489
4604
+ ी -15.1489
4605
+ ु -15.1489
4606
+ ง -15.1489
4607
+ □ -15.1489
4608
+ ◍ -15.1489
4609
+ ☃ -15.1489
4610
+ ♾ -15.1489
4611
+ ⛪ -15.1489
4612
+ ✩ -15.1489
4613
+ ❓ -15.1489
4614
+ 、 -15.1489
4615
+ 🇧 -15.1489
4616
+ 🇭 -15.1489
4617
+ 🇱 -15.1489
4618
+ 🇴 -15.1489
4619
+ 🌄 -15.1489
4620
+ 🌅 -15.1489
4621
+ 🌊 -15.1489
4622
+ 🌌 -15.1489
4623
+ 🌞 -15.1489
4624
+ 🌠 -15.1489
4625
+ 🌤 -15.1489
4626
+ 🌱 -15.1489
4627
+ 🍸 -15.1489
4628
+ 🍾 -15.1489
4629
+ 🎀 -15.1489
4630
+ 🎈 -15.1489
4631
+ 🎙 -15.1489
4632
+ 🎸 -15.1489
4633
+ 🐒 -15.1489
4634
+ 🐦 -15.1489
4635
+ 🐷 -15.1489
4636
+ 👈 -15.1489
4637
+ 👣 -15.1489
4638
+ 👻 -15.1489
4639
+ 💃 -15.1489
4640
+ 💅 -15.1489
4641
+ 💌 -15.1489
4642
+ 💎 -15.1489
4643
+ 💒 -15.1489
4644
+ 💣 -15.1489
4645
+ 💦 -15.1489
4646
+ 💩 -15.1489
4647
+ 💰 -15.1489
4648
+ 📢 -15.1489
4649
+ 📸 -15.1489
4650
+ 🗡 -15.1489
4651
+ 😖 -15.1489
4652
+ 😹 -15.1489
4653
+ 🙁 -15.1489
4654
+ 🙊 -15.1489
4655
+ 🚗 -15.1489
4656
+ 🤍 -15.1489
4657
+ 🤑 -15.1489
4658
+ 🤞 -15.1489
4659
+ 🥇 -15.1489
4660
+ 🧁 -15.1489
4661
+ 👁 -15.1489
4662
+ å -15.1489
4663
+ ο -15.1489
4664
+ خ -15.1489
4665
+ َ -15.1489
4666
+ ɪ -15.1489
4667
+ न -15.1489
4668
+ อ -15.1489
4669
+ ं -15.149
4670
+ 🦋 -16.1409
4671
+ 🥑 -16.141
4672
+ 🤨 -16.1411
4673
+ 🕺 -16.1412
4674
+ 🐾 -16.1413
4675
+ 🎥 -16.1414
4676
+ 🍹 -16.1415
4677
+ 愛 -16.1416
4678
+ ☂ -16.1417
4679
+ √ -16.1418
4680
+ ⃣ -16.1419
4681
+ स -16.142
4682
+ ş -16.1421
4683
+ É -16.1422
4684
+ ¡ -16.1423
4685
+ 🧐 -16.1424
4686
+ 🤢 -16.1425
4687
+ 🤝 -16.1426
4688
+ 😣 -16.1427
4689
+ 😛 -16.1428
4690
+ 😈 -16.1429
4691
+ 🗣 -16.143
4692
+ 💨 -16.1431
4693
+ 今 -16.1432
4694
+ ⚠ -16.1433
4695
+ ف -16.1434
4696
+ œ -16.1435
4697
+ 🤘 -16.1436
4698
+ 😠 -16.1437
4699
+ 😞 -16.1438
4700
+ 🌿 -16.1439
4701
+ ✅ -16.144
4702
+ ⚽ -16.1441
4703
+ ☀ -16.1442
4704
+ π -16.1443
4705
+ ï -16.1444
4706
+ 😄 -16.1445
4707
+ 💔 -16.1446
4708
+ 🇨 -16.1447
4709
+ ⠀ -16.1448
4710
+ ‪ -16.1449
4711
+ 😻 -16.145
4712
+ 😡 -16.1451
4713
+ 😑 -16.1452
4714
+ 🎉 -16.1453
4715
+ ⁦ -16.1454
4716
+ 😳 -16.1455
4717
+ ☺ -16.1456
4718
+ 😱 -16.1457
4719
+ 👉 -16.1458
4720
+ 🎶 -16.1459
4721
+ 🌹 -16.146
4722
+ 🌟 -16.1461
4723
+ 🙂 -16.1462
4724
+ 💛 -16.1463
4725
+ 👀 -16.1464
4726
+ 🍀 -16.1465
4727
+ 🤡 -16.1466
4728
+ ̊ -16.1467
4729
+ 🙈 -16.1468
4730
+ ¿ -16.1469
4731
+ 🥺 -16.147
4732
+ 💚 -16.1471
4733
+ 💗 -16.1472
4734
+ ๆ -16.1473
4735
+ ⁄ -16.1474
4736
+ 😘 -16.1475
4737
+ 💯 -16.1476
4738
+ 😆 -16.1477
4739
+ 🏾 -16.1478
4740
+ ü -16.1479
4741
+ ç -16.148
4742
+ 🇸 -16.1481
4743
+ ́ -16.1482
4744
+ 💜 -16.1483
4745
+ 😉 -16.1484
4746
+ 🇺 -16.1485
4747
+ ठ -16.1486
4748
+ 👍 -16.1486
4749
+ ट -16.1487
4750
+ 💞 -16.1487
4751
+ ñ -16.1488
4752
+ ओ -16.1488
4753
+ ज -16.1488
4754
+ प -16.1488
4755
+ ब -16.1488
4756
+ ω -16.1489
4757
+ ش -16.1489
4758
+ − -16.1489
4759
+ ­ -16.1489
4760
+ ê -16.1489
4761
+ ≠ -16.1489
4762
+ ❶ -16.1489
4763
+ 💆 -16.1489
4764
+ € -16.1489
4765
+ © -16.1489
4766
+ « -16.1489
4767
+ ¶ -16.1489
4768
+ À -16.1489
4769
+ Í -16.1489
4770
+ ß -16.1489
4771
+ î -16.1489
4772
+ ÷ -16.1489
4773
+ ù -16.1489
4774
+ ÿ -16.1489
4775
+ ĝ -16.1489
4776
+ ź -16.1489
4777
+ ɒ -16.1489
4778
+ ʻ -16.1489
4779
+ Ξ -16.1489
4780
+ Π -16.1489
4781
+ ν -16.1489
4782
+ υ -16.1489
4783
+ е -16.1489
4784
+ כ -16.1489
4785
+ ן -16.1489
4786
+ ض -16.1489
4787
+ ظ -16.1489
4788
+ ه -16.1489
4789
+ ّ -16.1489
4790
+ ٹ -16.1489
4791
+ گ -16.1489
4792
+ ۓ -16.1489
4793
+ द -16.1489
4794
+ น -16.1489
4795
+ บ -16.1489
4796
+ ั -16.1489
4797
+ ้ -16.1489
4798
+ ᴗ -16.1489
4799
+ ₤ -16.1489
4800
+ ₹ -16.1489
4801
+ ← -16.1489
4802
+ ⌚ -16.1489
4803
+ ⌨ -16.1489
4804
+ ⏰ -16.1489
4805
+ ▶ -16.1489
4806
+ ● -16.1489
4807
+ ☑ -16.1489
4808
+ ☻ -16.1489
4809
+ ♨ -16.1489
4810
+ ♪ -16.1489
4811
+ ♫ -16.1489
4812
+ ⚘ -16.1489
4813
+ ⚰ -16.1489
4814
+ ⚾ -16.1489
4815
+ ✝ -16.1489
4816
+ ✳ -16.1489
4817
+ ❇ -16.1489
4818
+ ❗ -16.1489
4819
+ 】 -16.1489
4820
+ あ -16.1489
4821
+ こ -16.1489
4822
+ さ -16.1489
4823
+ す -16.1489
4824
+ で -16.1489
4825
+ と -16.1489
4826
+ 了 -16.1489
4827
+ 分 -16.1489
4828
+ 士 -16.1489
4829
+ 夢 -16.1489
4830
+ 寵 -16.1489
4831
+ 強 -16.1489
4832
+ 心 -16.1489
4833
+ 扎 -16.1489
4834
+ 況 -16.1489
4835
+ 洗 -16.1489
4836
+ 禅 -16.1489
4837
+ 紳 -16.1489
4838
+ 練 -16.1489
4839
+ 門 -16.1489
4840
+ 나 -16.1489
4841
+ 러 -16.1489
4842
+ 브 -16.1489
4843
+ 인 -16.1489
4844
+  -16.1489
4845
+ ︎ -16.1489
4846
+ 🃏 -16.1489
4847
+ 🆒 -16.1489
4848
+ 🆓 -16.1489
4849
+ 🇩 -16.1489
4850
+ 🇪 -16.1489
4851
+ 🇬 -16.1489
4852
+ 🇳 -16.1489
4853
+ 🇹 -16.1489
4854
+ 🇼 -16.1489
4855
+ 🇿 -16.1489
4856
+ 🌀 -16.1489
4857
+ 🌃 -16.1489
4858
+ 🌆 -16.1489
4859
+ 🌈 -16.1489
4860
+ 🌍 -16.1489
4861
+ 🌕 -16.1489
4862
+ 🌙 -16.1489
4863
+ 🌸 -16.1489
4864
+ 🌻 -16.1489
4865
+ 🍁 -16.1489
4866
+ 🍄 -16.1489
4867
+ 🍊 -16.1489
4868
+ 🍑 -16.1489
4869
+ 🍟 -16.1489
4870
+ 🍦 -16.1489
4871
+ 🍭 -16.1489
4872
+ 🍷 -16.1489
4873
+ 🍻 -16.1489
4874
+ 🍽 -16.1489
4875
+ 🎁 -16.1489
4876
+ 🎄 -16.1489
4877
+ 🎆 -16.1489
4878
+ 🎇 -16.1489
4879
+ 🎓 -16.1489
4880
+ 🎗 -16.1489
4881
+ 🎢 -16.1489
4882
+ 🎤 -16.1489
4883
+ 🎨 -16.1489
4884
+ 🎪 -16.1489
4885
+ 🎲 -16.1489
4886
+ 🎵 -16.1489
4887
+ 🎼 -16.1489
4888
+ 🏆 -16.1489
4889
+ 🏏 -16.1489
4890
+ 🏔 -16.1489
4891
+ 🏘 -16.1489
4892
+ 🏚 -16.1489
4893
+ 🏠 -16.1489
4894
+ 🏤 -16.1489
4895
+ 🏴 -16.1489
4896
+ 🏿 -16.1489
4897
+ 🐀 -16.1489
4898
+ 🐂 -16.1489
4899
+ 🐃 -16.1489
4900
+ 🐄 -16.1489
4901
+ 🐌 -16.1489
4902
+ 🐘 -16.1489
4903
+ 🐚 -16.1489
4904
+ 🐠 -16.1489
4905
+ 🐧 -16.1489
4906
+ 🐭 -16.1489
4907
+ 🐮 -16.1489
4908
+ 🐯 -16.1489
4909
+ 🐱 -16.1489
4910
+ 👄 -16.1489
4911
+ 👎 -16.1489
4912
+ 👐 -16.1489
4913
+ 👗 -16.1489
4914
+ 👨 -16.1489
4915
+ 👮 -16.1489
4916
+ 👶 -16.1489
4917
+ 👿 -16.1489
4918
+ 💉 -16.1489
4919
+ 💘 -16.1489
4920
+ 💡 -16.1489
4921
+ 💭 -16.1489
4922
+ 💼 -16.1489
4923
+ 📷 -16.1489
4924
+ 📺 -16.1489
4925
+ 🔊 -16.1489
4926
+ 🔓 -16.1489
4927
+ 🔗 -16.1489
4928
+ 🔫 -16.1489
4929
+ 🔹 -16.1489
4930
+ 🕉 -16.1489
4931
+ 🕯 -16.1489
4932
+ 🕵 -16.1489
4933
+ 🖋 -16.1489
4934
+ 🖖 -16.1489
4935
+ 🖥 -16.1489
4936
+ 🗑 -16.1489
4937
+ 😐 -16.1489
4938
+ 😕 -16.1489
4939
+ 😧 -16.1489
4940
+ 😰 -16.1489
4941
+ 😴 -16.1489
4942
+ 😵 -16.1489
4943
+ 🚀 -16.1489
4944
+ 🚌 -16.1489
4945
+ 🚘 -16.1489
4946
+ 🚴 -16.1489
4947
+ 🛣 -16.1489
4948
+ 🤖 -16.1489
4949
+ 🤜 -16.1489
4950
+ 🤟 -16.1489
4951
+ 🤠 -16.1489
4952
+ 🤤 -16.1489
4953
+ 🤪 -16.1489
4954
+ 🤭 -16.1489
4955
+ 🤲 -16.1489
4956
+ 🤸 -16.1489
4957
+ 🥒 -16.1489
4958
+ 🥬 -16.1489
4959
+ 🥭 -16.1489
4960
+ 🥲 -16.1489
4961
+ 🥵 -16.1489
4962
+ 🦁 -16.1489
4963
+ 🦃 -16.1489
4964
+ 🦄 -16.1489
4965
+ 🦌 -16.1489
4966
+ 🦜 -16.1489
4967
+ 🧀 -16.1489
4968
+ 🧬 -16.1489
4969
+ 🧳 -16.1489
4970
+ 🙏 -16.1489
4971
+ ก -16.1489
4972
+ λ -16.1489
4973
+ أ -16.1489
4974
+ ص -16.1489
4975
+ ῖ -16.1489
4976
+ 【 -16.1489
4977
+ 状 -16.1489
4978
+ 自 -16.1489
4979
+ 🧖 -16.1489
4980
+ ˈ -16.1489
4981
+ ป -16.1489
4982
+ ร -16.1489
4983
+ ห -16.1489
4984
+ ู -16.1489
4985
+ โ -16.1489
4986
+ い -16.1489
4987
+ っ -16.1489
4988
+ て -16.1489
4989
+ も -16.1489
4990
+ 持 -16.1489
4991
+ ม -16.1489
4992
+ ะ -16.1489
4993
+ ึ -16.1489
4994
+ क -16.149
4995
+ ग -16.149
4996
+ ड -16.149
4997
+ ह -16.149
4998
+ े -16.149
4999
+ ่ -16.149
5000
+ 🤣 -16.149
chat_sprint_artifacts/tinygpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37118ed4c7497877ccaf6fafebeb63ed37d4988e8f414cac27ae55d31dd43b02
3
+ size 59904321
chat_sprint_artifacts/train_ids.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f388f53957e50f65bd632083829f7f9cb198ef32867d4bb8bdaf0377fd7e698e
3
+ size 48539814
chat_sprint_artifacts/val_ids.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94eec301757b67b5861153d442c8e8ac5f9d67753d264928c1a7cc374deea455
3
+ size 1502364
chat_sprint_artifacts/vocab_size.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 5000
markov_init.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
train.py ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ chat_sprint_standalone.py
5
+ One-file pipeline: collect datasets -> reformat as You:/Bot: -> train tiny GPT (CUDA) -> sample & save model
6
+
7
+ Requirements (install once):
8
+ pip install torch datasets sentencepiece tqdm numpy
9
+
10
+ Run:
11
+ python chat_sprint_standalone.py
12
+ """
13
+
14
+ import os, re, time, math, random, json
15
+ from pathlib import Path
16
+ from typing import List, Optional, Tuple
17
+ from itertools import islice
18
+ from contextlib import nullcontext
19
+
20
+ import numpy as np
21
+ from tqdm import tqdm
22
+ from datasets import load_dataset, get_dataset_config_names
23
+ import sentencepiece as spm
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+ import torch.nn.functional as F
28
+
29
+ # --------------------------
30
+ # Global config (tweak here)
31
+ # --------------------------
32
+ SEED = 1337
33
+ random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
34
+
35
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
+ if DEVICE.type == "cuda":
37
+ torch.set_float32_matmul_precision("high")
38
+ torch.backends.cuda.matmul.allow_tf32 = True
39
+
40
+ SAVE_DIR = Path("./chat_sprint_artifacts")
41
+ SAVE_DIR.mkdir(parents=True, exist_ok=True)
42
+
43
+ # How many formatted You:/Bot: pairs to KEEP from each dataset (first run)
44
+ CAPS = {
45
+ "shakespeare": 15000,
46
+ "jokes": 20000,
47
+ "dadjokes": 8000,
48
+ "rsarcasm": 8000, # Thewillonline/reddit-sarcasm
49
+ "figlang": 12000,
50
+ "shower": 4000, # HuggingFaceGECLM/REDDIT_submissions split "Showerthoughts"
51
+ "personas": 2000,
52
+ "tweeteval": 4000,
53
+ "fourchan": 500, # dataset is tiny (~195)
54
+ "elonvtrump": 3000,
55
+ }
56
+
57
+ # Upper bound on rows to SCAN in each streaming dataset
58
+ SCAN = {
59
+ "jokes": 120_000,
60
+ "dadjokes": 60_000,
61
+ "rsarcasm": 120_000,
62
+ "figlang": 150_000,
63
+ "shower": 250_000,
64
+ "personas": 30_000,
65
+ "tweeteval": 60_000,
66
+ "fourchan": 2_000,
67
+ "elonvtrump": 60_000,
68
+ }
69
+
70
+ MAX_TOTAL_PAIRS = 60_000
71
+ MAX_LEN = 120
72
+
73
+ # Tokenizer
74
+ VOCAB_SIZE = 1500
75
+ TOKENIZER_PREFIX = SAVE_DIR / "spm_chat"
76
+ USER_SYMBOLS = ["You:", "Bot:", "[STYLE=Snark]", "[FORM=TWEET]", "[FORM=HEADLINE]", "[MOOD=Unhinged]", "[MOOD=Cheeky]"]
77
+
78
+ # Model size & train budget (~5 minutes on RTX 3090 with default)
79
+ block_size = 256
80
+ n_layer = 6
81
+ n_head = 6
82
+ n_embd = 384 # ~11.9M params
83
+ dropout = 0.0
84
+
85
+ MAX_SECONDS = 300 # hard cap
86
+ train_steps = 5000 # big number; time cap will stop near 5 min
87
+ log_interval = 100
88
+ eval_every = 400
89
+ batch_size = 24
90
+ accum_steps = 3
91
+ base_lr = 3e-3
92
+ min_lr = 5e-4
93
+ warmup_ratio = 0.06
94
+
95
+ # Sampling defaults
96
+ TEMP = 0.8
97
+ TOP_K = 60
98
+ TOP_P = 0.95
99
+ REP_PEN = 1.08
100
+
101
+ # --------------------------
102
+ # Helpers: cleaning & format
103
+ # --------------------------
104
+ URL_RE = re.compile(r"https?://\S+|www\.\S+", re.IGNORECASE)
105
+ MENT_RE = re.compile(r"@\w+")
106
+ WS_RE = re.compile(r"\s+")
107
+ QUOTE_RE = re.compile(r"^[\"'“”‘’]+|[\"'“”‘’]+$")
108
+
109
+ def clean_text(s: str) -> str:
110
+ s = s.strip()
111
+ s = URL_RE.sub("", s)
112
+ s = MENT_RE.sub("", s)
113
+ s = QUOTE_RE.sub("", s)
114
+ s = WS_RE.sub(" ", s)
115
+ return s.strip()
116
+
117
+ def shorten_to(s: str, n: int) -> str:
118
+ s = re.sub(r"\s+", " ", s.strip())
119
+ if len(s) <= n: return s
120
+ cut = max(s.rfind(". ", 0, n), s.rfind("! ", 0, n), s.rfind("? ", 0, n))
121
+ if cut != -1: return s[:cut+1].strip()
122
+ return s[:n].strip()
123
+
124
+ def keep_or_clip(s: str, min_len: int = 6, max_len: int = MAX_LEN) -> Optional[str]:
125
+ if not s: return None
126
+ s = re.sub(r"\s+", " ", s.strip())
127
+ if len(s) < min_len: return None
128
+ return shorten_to(s, max_len)
129
+
130
+ def turn(you: str, bot: str, tags: str = "") -> str:
131
+ lines = [f"You: {you}".rstrip()]
132
+ if tags: lines.append(tags)
133
+ lines.append(f"Bot: {bot}".rstrip())
134
+ lines.append("")
135
+ return "\n".join(lines)
136
+
137
+ def limited(ds, limit: int):
138
+ try:
139
+ return ds.take(limit)
140
+ except Exception:
141
+ return islice(ds, limit)
142
+
143
+ def get_first_nonempty(ex, keys) -> Optional[str]:
144
+ for k in keys:
145
+ v = ex.get(k)
146
+ if isinstance(v, str) and v.strip():
147
+ return v
148
+ return None
149
+
150
+ # --------------------------
151
+ # Collectors (console tqdm)
152
+ # --------------------------
153
+ def collect_shakespeare(pairs: List[str], overall: tqdm):
154
+ try:
155
+ ds = load_dataset(
156
+ "text",
157
+ data_files={"train": "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"},
158
+ split="train",
159
+ streaming=True,
160
+ )
161
+ kept = 0
162
+ pbar = tqdm(total=CAPS["shakespeare"], desc="[shakespeare]", unit="pair", leave=False, ncols=100)
163
+ for row in ds:
164
+ txt = keep_or_clip(clean_text(row["text"]))
165
+ if not txt: continue
166
+ pairs.append(turn("Continue in Shakespearean style.", txt))
167
+ kept += 1; pbar.update(1); overall.update(1)
168
+ if kept >= CAPS["shakespeare"]: break
169
+ pbar.close()
170
+ print(f"[ok] shakespeare kept={kept}")
171
+ except Exception as e:
172
+ print(f"[skip] shakespeare: {e}")
173
+
174
+ def collect_reddit_jokes(pairs: List[str], overall: tqdm):
175
+ for dsid in ["SocialGrep/one-million-reddit-jokes", "SocialGrep/reddit_jokes", "timc1/reddit_jokes"]:
176
+ try:
177
+ ds = load_dataset(dsid, split="train", streaming=True)
178
+ kept = 0
179
+ pbar = tqdm(total=CAPS["jokes"], desc="[jokes]", unit="pair", leave=False, ncols=100)
180
+ for ex in limited(ds, SCAN["jokes"]):
181
+ title = keep_or_clip(clean_text(str(ex.get("title") or "")))
182
+ body = keep_or_clip(clean_text(str(ex.get("selftext") or ex.get("body") or "")))
183
+ if body and title:
184
+ pairs.append(turn(title, body))
185
+ elif title:
186
+ pairs.append(turn("Tell me a short joke.", title))
187
+ else:
188
+ continue
189
+ kept += 1; pbar.update(1); overall.update(1)
190
+ if kept >= CAPS["jokes"]: break
191
+ pbar.close()
192
+ print(f"[ok] jokes {dsid} kept={kept}")
193
+ return
194
+ except Exception as e:
195
+ print(f"[try next] jokes {dsid}: {e}")
196
+ print("[skip] jokes: none worked")
197
+
198
+ def collect_dadjokes(pairs: List[str], overall: tqdm):
199
+ try:
200
+ ds = load_dataset("shuttie/reddit-dadjokes", split="train", streaming=True)
201
+ kept = 0
202
+ pbar = tqdm(total=CAPS["dadjokes"], desc="[dadjokes]", unit="pair", leave=False, ncols=100)
203
+ for ex in limited(ds, SCAN["dadjokes"]):
204
+ setup = keep_or_clip(clean_text(str(ex.get("setup") or ex.get("instruction") or ex.get("input") or "")))
205
+ punch = keep_or_clip(clean_text(str(ex.get("punchline") or ex.get("output") or "")))
206
+ if not (setup and punch): continue
207
+ pairs.append(turn(setup, punch))
208
+ kept += 1; pbar.update(1); overall.update(1)
209
+ if kept >= CAPS["dadjokes"]: break
210
+ pbar.close()
211
+ print(f"[ok] dadjokes kept={kept}")
212
+ except Exception as e:
213
+ print(f"[skip] dadjokes: {e}")
214
+
215
+ def collect_reddit_sarcasm(pairs: List[str], overall: tqdm):
216
+ """Thewillonline/reddit-sarcasm — flexible parsing, scan+keep bars."""
217
+ try:
218
+ ds = load_dataset("Thewillonline/reddit-sarcasm", split="train", streaming=True)
219
+ keep_cap, scan_cap = CAPS["rsarcasm"], SCAN["rsarcasm"]
220
+ scanbar = tqdm(total=scan_cap, desc="[sarcasm scan]", unit="row", leave=False, ncols=100)
221
+ keepbar = tqdm(total=keep_cap, desc="[sarcasm kept]", unit="pair", leave=False, ncols=100)
222
+
223
+ PATS = [
224
+ re.compile(r"User\s*:\s*(.+?)\s*(?:Reddit\s*Comment|Comment|Reply)\s*:\s*(.+)", re.IGNORECASE | re.DOTALL),
225
+ re.compile(r"Post\s*:\s*(.+?)\s*(?:Top\s*Comment|Comment)\s*:\s*(.+)", re.IGNORECASE | re.DOTALL),
226
+ ]
227
+ def parse(raw: str) -> Tuple[Optional[str], Optional[str]]:
228
+ raw = raw.replace("<|endoftext|>", "\n")
229
+ for pat in PATS:
230
+ m = pat.search(raw)
231
+ if m:
232
+ return m.group(1).strip(), m.group(2).strip()
233
+ lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
234
+ if len(lines) >= 2: return lines[0], lines[1]
235
+ if len(lines) == 1: return "Reply with sarcasm:", lines[0]
236
+ return None, None
237
+
238
+ kept = scanned = 0
239
+ for ex in limited(ds, scan_cap):
240
+ scanned += 1
241
+ you, bot = parse(str(ex.get("text") or ""))
242
+ you = keep_or_clip(you); bot = keep_or_clip(bot)
243
+ if you and bot:
244
+ pairs.append(turn(you, bot, "[STYLE=Snark]"))
245
+ kept += 1; keepbar.update(1); overall.update(1)
246
+ if kept >= keep_cap: break
247
+ scanbar.update(1)
248
+ if scanned % 2000 == 0:
249
+ keepbar.set_postfix(rate=f"{kept/max(1,scanned):.2%}")
250
+ scanbar.close(); keepbar.close()
251
+ print(f"[ok] reddit-sarcasm kept={kept} (scanned {scanned})")
252
+ except Exception as e:
253
+ print(f"[skip] reddit-sarcasm: {e}")
254
+
255
+ def collect_figlang(pairs: List[str], overall: tqdm):
256
+ for dsid in ["tasksource/figlang2020-sarcasm", "tasksource/figlang2020_sarcasm"]:
257
+ try:
258
+ ds = load_dataset(dsid, split="train", streaming=True)
259
+ kept = 0
260
+ pbar = tqdm(total=CAPS["figlang"], desc="[figlang]", unit="pair", leave=False, ncols=100)
261
+ for ex in limited(ds, SCAN["figlang"]):
262
+ ctx = ex.get("context")
263
+ if isinstance(ctx, list) and ctx:
264
+ context_str = " ".join(str(c) for c in ctx[-2:])
265
+ else:
266
+ context_str = str(ex.get("context") or ex.get("prompt") or "")
267
+ reply = str(ex.get("response") or ex.get("answer") or ex.get("text") or "")
268
+ context_str = keep_or_clip(clean_text(context_str))
269
+ reply = keep_or_clip(clean_text(reply))
270
+ if reply:
271
+ if context_str:
272
+ pairs.append(turn(context_str, reply, "[STYLE=Snark]"))
273
+ else:
274
+ pairs.append(turn("Reply with sarcasm:", reply, "[STYLE=Snark]"))
275
+ kept += 1; pbar.update(1); overall.update(1)
276
+ if kept >= CAPS["figlang"]: break
277
+ pbar.close()
278
+ print(f"[ok] figlang {dsid} kept={kept}")
279
+ return
280
+ except Exception as e:
281
+ print(f"[try next] figlang {dsid}: {e}")
282
+ print("[skip] figlang")
283
+
284
+ def collect_showerthoughts(pairs: List[str], overall: tqdm):
285
+ """Use REEDIT_submissions split 'Showerthoughts' directly (no 'train')."""
286
+ try:
287
+ ds = load_dataset("HuggingFaceGECLM/REDDIT_submissions", split="Showerthoughts", streaming=True)
288
+ keep_cap, scan_cap = CAPS["shower"], SCAN["shower"]
289
+ scanbar = tqdm(total=scan_cap, desc="[shower scan]", unit="row", leave=False, ncols=100)
290
+ keepbar = tqdm(total=keep_cap, desc="[shower kept]", unit="pair", leave=False, ncols=100)
291
+
292
+ kept = scanned = 0
293
+ for ex in limited(ds, scan_cap):
294
+ scanned += 1
295
+ title = get_first_nonempty(ex, ["title", "selftext", "text"]) or ""
296
+ text = keep_or_clip(clean_text(title))
297
+ if text:
298
+ pairs.append(turn("Give me a shower thought.", text))
299
+ kept += 1; keepbar.update(1); overall.update(1)
300
+ if kept >= keep_cap: break
301
+ scanbar.update(1)
302
+ scanbar.close(); keepbar.close()
303
+ print(f"[ok] showerthoughts kept={kept} (scanned {scanned})")
304
+ except Exception as e:
305
+ print(f"[skip] showerthoughts: {e}")
306
+
307
+ def collect_personas(pairs: List[str], overall: tqdm):
308
+ """Non-streaming is more reliable for this dataset."""
309
+ try:
310
+ ds = load_dataset("NapthaAI/twitter_personas")["train"]
311
+ keep_cap = CAPS["personas"]
312
+ pbar = tqdm(total=keep_cap, desc="[personas]", unit="pair", leave=False, ncols=100)
313
+ kept = 0
314
+ for ex in ds:
315
+ desc = get_first_nonempty(ex, ["description","persona","bio","text","content","full_text"])
316
+ if not isinstance(desc, str) and isinstance(ex.get("content"), dict):
317
+ desc = ex["content"].get("text")
318
+ desc = keep_or_clip(clean_text(str(desc or "")))
319
+ if not desc: continue
320
+ pairs.append(turn("Adopt this persona in one sentence.", desc, "[FORM=TWEET]"))
321
+ kept += 1; pbar.update(1); overall.update(1)
322
+ if kept >= keep_cap: break
323
+ pbar.close()
324
+ print(f"[ok] personas kept={kept}")
325
+ except Exception as e:
326
+ print(f"[skip] personas: {e}")
327
+
328
+ def collect_tweeteval(pairs: List[str], overall: tqdm):
329
+ """Handle super_tweeteval (text_1/text_2, etc.) and fallback tweet_eval."""
330
+ def extract_pair(ex):
331
+ t = ex.get("text")
332
+ if isinstance(t, str) and t.strip():
333
+ return "React with a sharp one-liner.", t
334
+ for a,b in [("text_1","text_2"), ("sentence1","sentence2"),
335
+ ("premise","hypothesis"), ("question","answer"),
336
+ ("context","response"), ("tweet1","tweet2")]:
337
+ t1, t2 = ex.get(a), ex.get(b)
338
+ if isinstance(t1, str) and t1.strip() and isinstance(t2, str) and t2.strip():
339
+ return t1, t2
340
+ return None
341
+
342
+ def run_on(dsname, pick, is_super):
343
+ keep_cap, scan_cap = CAPS["tweeteval"], SCAN["tweeteval"]
344
+ pbar = tqdm(total=keep_cap, desc=f"[tweeteval:{pick}]", unit="pair", leave=False, ncols=100)
345
+ kept = 0
346
+ ds = load_dataset(dsname, pick, split="train", streaming=True)
347
+ for ex in limited(ds, scan_cap):
348
+ pair = extract_pair(ex) if is_super else ("React with a sharp one-liner.", ex.get("text")) if ex.get("text") else None
349
+ if not pair: continue
350
+ you, bot = pair
351
+ you = keep_or_clip(clean_text(str(you or ""))); bot = keep_or_clip(clean_text(str(bot or "")))
352
+ if not (you and bot): continue
353
+ tag = "[STYLE=Snark]" if you and you != "React with a sharp one-liner." else ""
354
+ pairs.append(turn(you, bot, tag))
355
+ kept += 1; pbar.update(1); overall.update(1)
356
+ if kept >= keep_cap: break
357
+ pbar.close()
358
+ return kept
359
+
360
+ kept_total = 0
361
+ try:
362
+ cfgs = get_dataset_config_names("cardiffnlp/super_tweeteval")
363
+ except Exception:
364
+ cfgs = []
365
+ prio = ["irony","sarcasm","humor","sentiment","emoji","emotion","stance","offensive","hate"]
366
+ ordered = [c for c in prio if c in cfgs] + [c for c in cfgs if c not in prio]
367
+ for pick in ordered:
368
+ kept_total += run_on("cardiffnlp/super_tweeteval", pick, True)
369
+ if kept_total >= CAPS["tweeteval"]:
370
+ print(f"[ok] tweeteval(super) kept={kept_total}"); return
371
+ if kept_total == 0:
372
+ try:
373
+ base_cfgs = get_dataset_config_names("cardiffnlp/tweet_eval")
374
+ except Exception:
375
+ base_cfgs = []
376
+ ordered_b = [c for c in prio if c in base_cfgs] + [c for c in base_cfgs if c not in prio]
377
+ for pick in ordered_b:
378
+ kept_total += run_on("cardiffnlp/tweet_eval", pick, False)
379
+ if kept_total >= CAPS["tweeteval"]:
380
+ print(f"[ok] tweeteval(base) kept={kept_total}"); return
381
+ print(f"[ok] tweeteval kept={kept_total}")
382
+
383
+ def collect_fourchan(pairs: List[str], overall: tqdm):
384
+ try:
385
+ ds = load_dataset("sbussiso/4chan-dataset", split="train", streaming=True)
386
+ keep_cap = min(CAPS["fourchan"], 195)
387
+ pbar = tqdm(total=keep_cap, desc="[4chan]", unit="pair", leave=False, ncols=100)
388
+ kept = 0
389
+ for ex in limited(ds, SCAN["fourchan"]):
390
+ prompt = keep_or_clip(clean_text(str(ex.get("prompt") or "")))
391
+ resp = keep_or_clip(clean_text(str(ex.get("response") or "")))
392
+ if prompt and resp:
393
+ pairs.append(turn(prompt, resp))
394
+ kept += 1; pbar.update(1); overall.update(1)
395
+ else:
396
+ txt = keep_or_clip(clean_text(str(ex.get("text") or ex.get("body") or ex.get("content") or "")))
397
+ if txt:
398
+ pairs.append(turn("Drop a spicy one-liner.", txt))
399
+ kept += 1; pbar.update(1); overall.update(1)
400
+ if kept >= keep_cap: break
401
+ pbar.close()
402
+ print(f"[ok] 4chan kept={kept}")
403
+ except Exception as e:
404
+ print(f"[skip] 4chan: {e}")
405
+
406
+ def collect_elon_trump(pairs: List[str], overall: tqdm):
407
+ try:
408
+ ds = load_dataset("MasaFoundation/Twitter_X_Elon_vs_Trump", split="train", streaming=True, revision="refs/convert/parquet")
409
+ keep_cap, scan_cap = CAPS["elonvtrump"], SCAN["elonvtrump"]
410
+ scanbar = tqdm(total=scan_cap, desc="[elon_vs_trump scan]", unit="row", leave=False, ncols=100)
411
+ keepbar = tqdm(total=keep_cap, desc="[elon_vs_trump kept]", unit="pair", leave=False, ncols=100)
412
+ kept = scanned = 0
413
+ for ex in limited(ds, scan_cap):
414
+ scanned += 1
415
+ txt = get_first_nonempty(ex, ["content","text","tweet","full_text"]) or ""
416
+ txt = keep_or_clip(clean_text(txt))
417
+ if txt:
418
+ pairs.append(turn("[FORM=TWEET] One sentence hot take:", txt, "[FORM=TWEET]"))
419
+ kept += 1; keepbar.update(1); overall.update(1)
420
+ if kept >= keep_cap: break
421
+ scanbar.update(1)
422
+ scanbar.close(); keepbar.close()
423
+ print(f"[ok] Elon_vs_Trump kept={kept} (scanned {scanned})")
424
+ except Exception as e:
425
+ print(f"[skip] Elon_vs_Trump: {e}")
426
+
427
+ # --------------------------
428
+ # Build corpus
429
+ # --------------------------
430
+ def build_corpus() -> Path:
431
+ pairs: List[str] = []
432
+ total_target = sum(CAPS.values())
433
+ print("[1/6] Collecting & reformatting datasets (streaming, capped)…")
434
+ overall = tqdm(total=total_target, desc="[all] collecting", unit="pair", ncols=100)
435
+
436
+ collectors = [
437
+ collect_shakespeare,
438
+ collect_reddit_jokes,
439
+ collect_dadjokes,
440
+ collect_reddit_sarcasm,
441
+ collect_figlang,
442
+ collect_showerthoughts,
443
+ collect_personas,
444
+ collect_tweeteval,
445
+ collect_fourchan,
446
+ collect_elon_trump,
447
+ ]
448
+ for fn in collectors:
449
+ try:
450
+ fn(pairs, overall)
451
+ except Exception as e:
452
+ print(f"[collector error] {fn.__name__}: {e}")
453
+
454
+ overall.close()
455
+ print("[2/6] Deduplicating & clipping…")
456
+ seen = set(); deduped = []
457
+ for block in pairs:
458
+ try:
459
+ bot_line = [ln for ln in block.splitlines() if ln.startswith("Bot:")][0]
460
+ key = bot_line[4:].strip().lower()
461
+ except Exception:
462
+ key = block.strip().lower()
463
+ if key in seen: continue
464
+ seen.add(key); deduped.append(block)
465
+
466
+ random.shuffle(deduped)
467
+ if len(deduped) > MAX_TOTAL_PAIRS:
468
+ deduped = deduped[:MAX_TOTAL_PAIRS]
469
+
470
+ out_path = SAVE_DIR / "corpus.txt"
471
+ out_path.write_text("\n".join(deduped), encoding="utf-8")
472
+ print(f" wrote {len(deduped)} pairs → {out_path}")
473
+ return out_path
474
+
475
+ # --------------------------
476
+ # SentencePiece tokenizer
477
+ # --------------------------
478
+ def train_spm(corpus_path: Path) -> spm.SentencePieceProcessor:
479
+ print("[3/6] Training SentencePiece tokenizer…")
480
+ spm.SentencePieceTrainer.Train(
481
+ input=str(corpus_path),
482
+ model_prefix=str(TOKENIZER_PREFIX),
483
+ vocab_size=VOCAB_SIZE,
484
+ model_type="unigram",
485
+ character_coverage=1.0,
486
+ user_defined_symbols=USER_SYMBOLS,
487
+ bos_id=1, eos_id=2, unk_id=0, pad_id=-1
488
+ )
489
+ sp = spm.SentencePieceProcessor()
490
+ sp.load(f"{TOKENIZER_PREFIX}.model")
491
+ print(f" tokenizer saved at {TOKENIZER_PREFIX}.model")
492
+ return sp
493
+
494
+ # --------------------------
495
+ # Encode to token IDs
496
+ # --------------------------
497
+ def encode_corpus_to_ids(sp: spm.SentencePieceProcessor, corpus_path: Path):
498
+ print("[4/6] Encoding corpus to token IDs…")
499
+ text = corpus_path.read_text(encoding="utf-8")
500
+ blocks = [b for b in text.split("\n\n") if b.strip()]
501
+ ids = []
502
+ eos = sp.eos_id()
503
+ for b in blocks:
504
+ for line in b.splitlines():
505
+ if not line.strip(): continue
506
+ ids.extend(sp.encode(line, out_type=int)); ids.append(eos)
507
+ ids.append(eos)
508
+ ids = np.array(ids, dtype=np.int32)
509
+ n = len(ids); cut = int(n * 0.97)
510
+ train_ids = torch.tensor(ids[:cut], dtype=torch.long, device=DEVICE)
511
+ val_ids = torch.tensor(ids[cut:], dtype=torch.long, device=DEVICE)
512
+ print(f" tokens: train={train_ids.numel():,}, val={val_ids.numel():,}, vocab={sp.vocab_size()}")
513
+ return train_ids, val_ids, sp.vocab_size()
514
+
515
+ # --------------------------
516
+ # Tiny GPT model
517
+ # --------------------------
518
+ class CausalSelfAttention(nn.Module):
519
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
520
+ super().__init__()
521
+ assert n_embd % n_head == 0
522
+ self.n_head = n_head
523
+ self.head_dim = n_embd // n_head
524
+ self.qkv = nn.Linear(n_embd, 3*n_embd, bias=False)
525
+ self.proj = nn.Linear(n_embd, n_embd, bias=False)
526
+ self.attn_drop = nn.Dropout(dropout)
527
+ self.resid_drop = nn.Dropout(dropout)
528
+ self.register_buffer("mask", torch.tril(torch.ones(block_size, block_size)).view(1,1,block_size,block_size))
529
+ def forward(self, x):
530
+ B,T,C = x.shape
531
+ qkv = self.qkv(x); q,k,v = qkv.chunk(3, dim=-1)
532
+ q = q.view(B,T,self.n_head,self.head_dim).transpose(1,2)
533
+ k = k.view(B,T,self.n_head,self.head_dim).transpose(1,2)
534
+ v = v.view(B,T,self.n_head,self.head_dim).transpose(1,2)
535
+ att = (q @ k.transpose(-2,-1)) / math.sqrt(self.head_dim)
536
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
537
+ att = torch.softmax(att, dim=-1)
538
+ att = self.attn_drop(att)
539
+ y = att @ v
540
+ y = y.transpose(1,2).contiguous().view(B,T,C)
541
+ y = self.resid_drop(self.proj(y))
542
+ return y
543
+
544
+ class Block(nn.Module):
545
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
546
+ super().__init__()
547
+ self.ln1 = nn.LayerNorm(n_embd)
548
+ self.attn = CausalSelfAttention(n_embd, n_head, dropout, block_size)
549
+ self.ln2 = nn.LayerNorm(n_embd)
550
+ self.mlp = nn.Sequential(
551
+ nn.Linear(n_embd, 4*n_embd),
552
+ nn.GELU(),
553
+ nn.Linear(4*n_embd, n_embd),
554
+ nn.Dropout(dropout),
555
+ )
556
+ def forward(self, x):
557
+ x = x + self.attn(self.ln1(x))
558
+ x = x + self.mlp(self.ln2(x))
559
+ return x
560
+
561
+ class TinyGPT(nn.Module):
562
+ def __init__(self, vocab_size, n_layer, n_head, n_embd, block_size, dropout=0.0):
563
+ super().__init__()
564
+ self.block_size = block_size
565
+ self.tok_emb = nn.Embedding(vocab_size, n_embd)
566
+ self.pos_emb = nn.Embedding(block_size, n_embd)
567
+ self.blocks = nn.ModuleList([Block(n_embd, n_head, dropout, block_size) for _ in range(n_layer)])
568
+ self.ln_f = nn.LayerNorm(n_embd)
569
+ self.head = nn.Linear(n_embd, vocab_size, bias=False)
570
+ self.apply(self._init)
571
+ def _init(self, m):
572
+ if isinstance(m, (nn.Linear, nn.Embedding)): nn.init.normal_(m.weight, mean=0.0, std=0.02)
573
+ if isinstance(m, nn.Linear) and m.bias is not None: nn.init.zeros_(m.bias)
574
+ if isinstance(m, nn.LayerNorm): nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
575
+ def forward(self, idx, targets=None):
576
+ B,T = idx.shape; assert T <= self.block_size
577
+ pos = torch.arange(0, T, device=idx.device)
578
+ x = self.tok_emb(idx) + self.pos_emb(pos)[None,:,:]
579
+ for blk in self.blocks: x = blk(x)
580
+ x = self.ln_f(x)
581
+ logits = self.head(x)
582
+ loss = None
583
+ if targets is not None:
584
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
585
+ return logits, loss
586
+ @torch.no_grad()
587
+ def generate(self, idx, max_new_tokens=200, temperature=0.8, top_k=50, top_p=0.95, repetition_penalty=1.0):
588
+ self.eval()
589
+ for _ in range(max_new_tokens):
590
+ idx_cond = idx[:, -self.block_size:]
591
+ logits, _ = self.forward(idx_cond)
592
+ logits = logits[:, -1, :]
593
+ if repetition_penalty != 1.0:
594
+ uniq, _ = torch.unique(idx_cond[0], return_counts=True)
595
+ logits[:, uniq] /= repetition_penalty
596
+ logits = logits / max(1e-8, temperature)
597
+ if top_k is not None:
598
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
599
+ cutoff = v[:, -1].unsqueeze(-1)
600
+ logits = torch.where(logits < cutoff, torch.full_like(logits, -1e9), logits)
601
+ if top_p is not None:
602
+ sorted_logits, sorted_idx = torch.sort(logits, descending=True)
603
+ probs = torch.softmax(sorted_logits, dim=-1)
604
+ cdf = torch.cumsum(probs, dim=-1)
605
+ mask = cdf > top_p; mask[:, 0] = False
606
+ sorted_logits[mask] = -1e9
607
+ logits = torch.zeros_like(logits).scatter(1, sorted_idx, sorted_logits)
608
+ probs = torch.softmax(logits, dim=-1)
609
+ next_id = torch.multinomial(probs, num_samples=1)
610
+ idx = torch.cat([idx, next_id], dim=1)
611
+ return idx
612
+
613
+ # --------------------------
614
+ # Data loader for token IDs
615
+ # --------------------------
616
+ def get_batch(split_ids: torch.Tensor, B: int, T: int):
617
+ ix = torch.randint(0, split_ids.numel() - T - 1, (B,), device=split_ids.device)
618
+ x = torch.stack([split_ids[i:i+T] for i in ix])
619
+ y = torch.stack([split_ids[i+1:i+T+1] for i in ix])
620
+ return x, y
621
+
622
+ # --------------------------
623
+ # Train loop
624
+ # --------------------------
625
+ def train_model(vocab_size, train_ids, val_ids):
626
+ print("[5/6] Training tiny GPT on", DEVICE.type.upper(), "…")
627
+ model = TinyGPT(vocab_size, n_layer, n_head, n_embd, block_size, dropout).to(DEVICE)
628
+ params_m = sum(p.numel() for p in model.parameters())/1e6
629
+ print(f" params: {params_m:.2f}M")
630
+ optimizer = torch.optim.AdamW(model.parameters(), lr=base_lr, betas=(0.9, 0.95), weight_decay=0.0)
631
+
632
+ use_amp = DEVICE.type == "cuda"
633
+ scaler = torch.amp.GradScaler("cuda", enabled=use_amp)
634
+ autocast = (lambda: torch.amp.autocast("cuda", dtype=torch.float16)) if use_amp else nullcontext
635
+
636
+ start = time.time()
637
+ best_val = float("inf")
638
+
639
+ def get_lr(step):
640
+ warmup = max(1, int(train_steps * warmup_ratio))
641
+ if step < warmup: return base_lr * (step+1)/warmup
642
+ progress = (step - warmup) / max(1, train_steps - warmup)
643
+ return min_lr + 0.5*(base_lr - min_lr)*(1 + math.cos(math.pi * min(1.0, progress)))
644
+
645
+ @torch.no_grad()
646
+ def eval_loss(iters=80):
647
+ model.eval(); losses=[]
648
+ for _ in range(iters):
649
+ xb, yb = get_batch(val_ids, min(batch_size, 32), block_size)
650
+ with autocast():
651
+ _, loss = model(xb, yb)
652
+ losses.append(loss.item())
653
+ model.train()
654
+ return float(sum(losses)/len(losses))
655
+
656
+ model.train(); step = 0
657
+ pbar = tqdm(total=train_steps, ncols=100, desc="[train]")
658
+ while step < train_steps and (time.time()-start) < MAX_SECONDS:
659
+ lr = get_lr(step)
660
+ for pg in optimizer.param_groups: pg["lr"] = lr
661
+ optimizer.zero_grad(set_to_none=True)
662
+
663
+ total_loss = 0.0
664
+ for _ in range(accum_steps):
665
+ xb, yb = get_batch(train_ids, batch_size, block_size)
666
+ with autocast():
667
+ _, loss = model(xb, yb)
668
+ if use_amp:
669
+ scaler.scale(loss / accum_steps).backward()
670
+ else:
671
+ (loss / accum_steps).backward()
672
+ total_loss += loss.item()
673
+
674
+ if use_amp:
675
+ scaler.unscale_(optimizer)
676
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
677
+ if use_amp:
678
+ scaler.step(optimizer); scaler.update()
679
+ else:
680
+ optimizer.step()
681
+
682
+ step += 1; pbar.update(1)
683
+ if step % log_interval == 0 or step == 1:
684
+ pbar.set_postfix(train=f"{total_loss/accum_steps:.3f}", lr=f"{lr:.2e}")
685
+
686
+ if step % eval_every == 0:
687
+ vl = eval_loss()
688
+ best_val = min(best_val, vl)
689
+ print(f" eval loss {vl:.3f} | best {best_val:.3f}")
690
+
691
+ pbar.close()
692
+ elapsed = time.time() - start
693
+ print(f" done in {elapsed:.1f}s | best val {best_val:.3f}")
694
+
695
+ # Save model + config
696
+ ckpt_path = SAVE_DIR / "tinygpt.pt"
697
+ torch.save(model.state_dict(), ckpt_path)
698
+ (SAVE_DIR / "model_config.json").write_text(json.dumps({
699
+ "vocab_size": int(vocab_size),
700
+ "n_layer": n_layer, "n_head": n_head, "n_embd": n_embd,
701
+ "block_size": block_size, "dropout": dropout
702
+ }, indent=2))
703
+ print(f"[saved] weights → {ckpt_path}")
704
+ return model
705
+
706
+ # --------------------------
707
+ # Sampling helper
708
+ # --------------------------
709
+ def sample_chat(sp: spm.SentencePieceProcessor, model: TinyGPT, prompt: str, max_new_tokens=200):
710
+ prefix = f"You: {prompt}\nBot:"
711
+ ids = sp.encode(prefix, out_type=int)
712
+ x = torch.tensor([ids], dtype=torch.long, device=DEVICE)
713
+ with torch.no_grad():
714
+ y = model.generate(x, max_new_tokens=max_new_tokens, temperature=TEMP, top_k=TOP_K, top_p=TOP_P, repetition_penalty=REP_PEN)
715
+ return sp.decode(y[0].tolist())
716
+
717
+ # --------------------------
718
+ # Main
719
+ # --------------------------
720
+ def main():
721
+ # Build or reuse corpus/tokenizer/ids
722
+ corpus_path = SAVE_DIR / "corpus.txt"
723
+ spm_model = SAVE_DIR / "spm_chat.model"
724
+ if not corpus_path.exists():
725
+ corpus_path = build_corpus()
726
+ else:
727
+ print(f"[cache] using {corpus_path}")
728
+
729
+ sp = spm.SentencePieceProcessor()
730
+ if not spm_model.exists():
731
+ sp = train_spm(corpus_path)
732
+ else:
733
+ sp.load(str(spm_model))
734
+ print(f"[cache] using {spm_model}")
735
+
736
+ enc_train = SAVE_DIR / "train_ids.pt"
737
+ enc_val = SAVE_DIR / "val_ids.pt"
738
+ vocab_txt = SAVE_DIR / "vocab_size.txt"
739
+
740
+ if enc_train.exists() and enc_val.exists() and vocab_txt.exists():
741
+ train_ids = torch.load(enc_train, map_location=DEVICE)
742
+ val_ids = torch.load(enc_val, map_location=DEVICE)
743
+ vocab_size = int(vocab_txt.read_text())
744
+ print(f"[cache] loaded ids: train={train_ids.numel():,}, val={val_ids.numel():,}, vocab={vocab_size}")
745
+ else:
746
+ train_ids, val_ids, vocab_size = encode_corpus_to_ids(sp, corpus_path)
747
+ torch.save(train_ids, enc_train); torch.save(val_ids, enc_val)
748
+ vocab_txt.write_text(str(vocab_size))
749
+ print("[cache] saved encoded ids")
750
+
751
+ model = train_model(vocab_size, train_ids, val_ids)
752
+
753
+ print("\n[6/6] Samples:\n")
754
+ prompts = [
755
+ "Give me a spicy take on AI.",
756
+ "Roast my messy desk.",
757
+ "Explain recursion like you're annoyed.",
758
+ "Write a satirical headline about coffee.",
759
+ "Give me a shower thought about umbrellas.",
760
+ "Tell me a one-liner about deadlines.",
761
+ "Stay in Shakespeare mode and flatter me.",
762
+ "Reply sarcastically to: I love meetings.",
763
+ ]
764
+ out_path = SAVE_DIR / "samples.txt"
765
+ with out_path.open("w", encoding="utf-8") as f:
766
+ for p in prompts:
767
+ txt = sample_chat(sp, model, p, max_new_tokens=200)
768
+ print("----\n" + txt)
769
+ f.write("----\n" + txt + "\n")
770
+ print(f"\n[saved] samples → {out_path}")
771
+
772
+ if __name__ == "__main__":
773
+ main()
train2.py ADDED
@@ -0,0 +1,1131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ chat_sprint_standalone.py
5
+ One-file pipeline: collect datasets -> reformat as You:/Bot: -> train tiny GPT (CUDA) -> sample & save model
6
+
7
+ Requirements (install once):
8
+ uv pip install torch datasets sentencepiece tqdm numpy
9
+
10
+ Run:
11
+ python chat_sprint_standalone.py
12
+ """
13
+
14
+ import os, re, time, math, random, json
15
+ from pathlib import Path
16
+ from typing import List, Optional, Tuple
17
+ from itertools import islice
18
+ from contextlib import nullcontext
19
+
20
+ import numpy as np
21
+ from tqdm import tqdm
22
+ from datasets import load_dataset, get_dataset_config_names
23
+ import sentencepiece as spm
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+ import torch.nn.functional as F
28
+
29
+ # --------------------------
30
+ # Global config (tweak here)
31
+ # --------------------------
32
+ SEED = 1337
33
+ random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
34
+
35
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
+ if DEVICE.type == "cuda":
37
+ torch.set_float32_matmul_precision("high")
38
+ torch.backends.cuda.matmul.allow_tf32 = True
39
+
40
+ SAVE_DIR = Path("./chat_sprint_artifacts")
41
+ SAVE_DIR.mkdir(parents=True, exist_ok=True)
42
+
43
+ # How many formatted You:/Bot: pairs to KEEP from each dataset (first run)
44
+ CAPS = {
45
+ # NEW multi-turn / assistant cadence
46
+ "oasst1": 0,
47
+ "hhrlhf": 12000,
48
+ "ultrachat": 20000,
49
+ "dailydialog": 8000,
50
+ "bst": 6000,
51
+ "personachat": 8000,
52
+ "soda": 50000,
53
+ "topical": 8000,
54
+
55
+ # Existing single-turn & flavor
56
+ "shakespeare": 15000,
57
+ "jokes": 20000,
58
+ "dadjokes": 8000,
59
+ "rsarcasm": 8000, # Thewillonline/reddit-sarcasm
60
+ "figlang": 12000,
61
+ "shower": 4000, # HuggingFaceGECLM/REDDIT_submissions split "Showerthoughts"
62
+ "personas": 2000,
63
+ "tweeteval": 4000,
64
+ "fourchan": 500, # dataset is tiny (~195)
65
+ "elonvtrump": 3000,
66
+ }
67
+
68
+ # Upper bound on rows to SCAN in each streaming dataset
69
+ SCAN = {
70
+ # NEW
71
+ "oasst1": 200_000,
72
+ "hhrlhf": 100_000,
73
+ "ultrachat": 220_000,
74
+ "dailydialog": 30_000,
75
+ "bst": 30_000,
76
+ "personachat": 60_000,
77
+ "soda": 300_000,
78
+ "topical": 15_000,
79
+
80
+ # Existing
81
+ "jokes": 120_000,
82
+ "dadjokes": 60_000,
83
+ "rsarcasm": 120_000,
84
+ "figlang": 150_000,
85
+ "shower": 250_000,
86
+ "personas": 30_000,
87
+ "tweeteval": 60_000,
88
+ "fourchan": 2_000,
89
+ "elonvtrump": 60_000,
90
+ }
91
+
92
+ MAX_TOTAL_PAIRS = 150_000
93
+ MAX_LEN = 120
94
+
95
+ # Tokenizer
96
+ VOCAB_SIZE = 5000
97
+ TOKENIZER_PREFIX = SAVE_DIR / "spm_chat"
98
+ USER_SYMBOLS = ["You:", "Bot:", "[STYLE=Snark]", "[FORM=TWEET]", "[FORM=HEADLINE]", "[MOOD=Unhinged]", "[MOOD=Cheeky]"]
99
+
100
+ # Model size & train budget (~5 minutes on RTX 3090 with default)
101
+ block_size = 256
102
+ n_layer = 6
103
+ n_head = 6
104
+ n_embd = 384 # ~11.9M params
105
+ dropout = 0.0
106
+
107
+ MAX_SECONDS = 300 # hard cap
108
+ train_steps = 5000 # big number; time cap will stop near 5 min
109
+ log_interval = 100
110
+ eval_every = 400
111
+ batch_size = 24
112
+ accum_steps = 3
113
+ base_lr = 3e-3
114
+ min_lr = 5e-4
115
+ warmup_ratio = 0.06
116
+
117
+ # Sampling defaults
118
+ TEMP = 0.8
119
+ TOP_K = 60
120
+ TOP_P = 0.95
121
+ REP_PEN = 1.08
122
+
123
+ # --------------------------
124
+ # Helpers: cleaning & format
125
+ # --------------------------
126
+ URL_RE = re.compile(r"https?://\S+|www\.\S+", re.IGNORECASE)
127
+ MENT_RE = re.compile(r"@\w+")
128
+ WS_RE = re.compile(r"\s+")
129
+ QUOTE_RE = re.compile(r"^[\"'“”‘’]+|[\"'“”‘’]+$")
130
+
131
+ def clean_text(s: str) -> str:
132
+ s = s.strip()
133
+ s = URL_RE.sub("", s)
134
+ s = MENT_RE.sub("", s)
135
+ s = QUOTE_RE.sub("", s)
136
+ s = WS_RE.sub(" ", s)
137
+ return s.strip()
138
+
139
+ def shorten_to(s: str, n: int) -> str:
140
+ s = re.sub(r"\s+", " ", s.strip())
141
+ if len(s) <= n: return s
142
+ cut = max(s.rfind(". ", 0, n), s.rfind("! ", 0, n), s.rfind("? ", 0, n))
143
+ if cut != -1: return s[:cut+1].strip()
144
+ return s[:n].strip()
145
+
146
+ def keep_or_clip(s: str, min_len: int = 6, max_len: int = MAX_LEN) -> Optional[str]:
147
+ if not s: return None
148
+ s = re.sub(r"\s+", " ", s.strip())
149
+ if len(s) < min_len: return None
150
+ return shorten_to(s, max_len)
151
+
152
+ def turn(you: str, bot: str, tags: str = "") -> str:
153
+ lines = [f"You: {you}".rstrip()]
154
+ if tags: lines.append(tags)
155
+ lines.append(f"Bot: {bot}".rstrip())
156
+ lines.append("")
157
+ return "\n".join(lines)
158
+
159
+ def limited(ds, limit: int):
160
+ try:
161
+ return ds.take(limit)
162
+ except Exception:
163
+ return islice(ds, limit)
164
+
165
+ def get_first_nonempty(ex, keys) -> Optional[str]:
166
+ for k in keys:
167
+ v = ex.get(k)
168
+ if isinstance(v, str) and v.strip():
169
+ return v
170
+ return None
171
+
172
+ def to_str(x) -> Optional[str]:
173
+ if isinstance(x, str): return x
174
+ if isinstance(x, dict):
175
+ for k in ("text","utterance","content","response","value","message","msg"):
176
+ v = x.get(k)
177
+ if isinstance(v, str) and v.strip():
178
+ return v
179
+ return None
180
+
181
+ # --------------------------
182
+ # NEW Collectors (multi-turn)
183
+ # --------------------------
184
+ def collect_oasst1(pairs: List[str], overall: tqdm):
185
+ """OpenAssistant/oasst1: pair prompter -> assistant using parent links."""
186
+ try:
187
+ ds = load_dataset("OpenAssistant/oasst1", split="train", streaming=True)
188
+ keep_cap, scan_cap = CAPS["oasst1"], SCAN["oasst1"]
189
+ pbar = tqdm(total=keep_cap, desc="[oasst1]", unit="pair", leave=False, ncols=100)
190
+ kept = scanned = 0
191
+ seen = {} # id -> (role, text)
192
+ for ex in limited(ds, scan_cap):
193
+ scanned += 1
194
+ role = str(ex.get("role") or "")
195
+ txt = keep_or_clip(clean_text(str(ex.get("text") or "")))
196
+ mid = ex.get("message_id") or ex.get("id")
197
+ pid = ex.get("parent_id")
198
+ if role == "assistant" and pid in seen and seen[pid][0] == "prompter":
199
+ you = seen[pid][1]
200
+ if you and txt:
201
+ pairs.append(turn(you, txt))
202
+ kept += 1; pbar.update(1); overall.update(1)
203
+ if kept >= keep_cap: break
204
+ if mid and role in ("assistant", "prompter"):
205
+ seen[mid] = (role, txt or "")
206
+ pbar.close()
207
+ print(f"[ok] oasst1 kept={kept} (scanned {scanned})")
208
+ except Exception as e:
209
+ print(f"[skip] oasst1: {e}")
210
+
211
+ def collect_ultrachat(pairs: List[str], overall: tqdm):
212
+ """HuggingFaceH4/ultrachat_200k: slide over user->assistant pairs."""
213
+ try:
214
+ ds = load_dataset("HuggingFaceH4/ultrachat_200k", split="train_sft", streaming=True)
215
+ keep_cap, scan_cap = CAPS["ultrachat"], SCAN["ultrachat"]
216
+ pbar = tqdm(total=keep_cap, desc="[ultrachat]", unit="pair", leave=False, ncols=100)
217
+ kept = scanned = 0
218
+ for ex in limited(ds, scan_cap):
219
+ scanned += 1
220
+ msgs = ex.get("messages") or []
221
+ for a, b in zip(msgs, msgs[1:]):
222
+ if (a.get("role") in ("user","human")) and (b.get("role") in ("assistant","gpt")):
223
+ you = keep_or_clip(clean_text(str(a.get("content") or "")))
224
+ bot = keep_or_clip(clean_text(str(b.get("content") or "")))
225
+ if you and bot:
226
+ pairs.append(turn(you, bot))
227
+ kept += 1; pbar.update(1); overall.update(1)
228
+ if kept >= keep_cap: break
229
+ if kept >= keep_cap: break
230
+ pbar.close()
231
+ print(f"[ok] ultrachat kept={kept} (scanned {scanned})")
232
+ except Exception as e:
233
+ print(f"[skip] ultrachat: {e}")
234
+
235
+
236
+ def collect_dailydialog(pairs: List[str], overall: tqdm):
237
+ """
238
+ DailyDialog via data-only mirrors (no dataset scripts).
239
+ Tries Parquet-converted branches/configs first; collects adjacent utterance pairs.
240
+ """
241
+ keep_cap, scan_cap = CAPS["dailydialog"], SCAN["dailydialog"]
242
+ candidates = [
243
+ # (dataset_id, name_or_None, revision_or_None)
244
+ ("roskoN/dailydialog", "full", "refs/convert/parquet"), # has full/train/*.parquet
245
+ ("roskoN/dailydialog", "default", "refs/convert/parquet"),
246
+ ("frankdarkluo/DailyDialog", "default", "refs/convert/parquet"),
247
+ ("ConvLab/dailydialog", None, None), # sometimes ships plain JSON/arrow
248
+ ]
249
+
250
+ kept = 0
251
+ for dsid, name, rev in candidates:
252
+ try:
253
+ # Build load_dataset call dynamically
254
+ load_kwargs = dict(split="train", streaming=True)
255
+ if rev is not None:
256
+ load_kwargs["revision"] = rev
257
+ if name is not None:
258
+ ds = load_dataset(dsid, name, **load_kwargs)
259
+ else:
260
+ ds = load_dataset(dsid, **load_kwargs)
261
+
262
+ pbar = tqdm(total=keep_cap, desc=f"[dailydialog:{dsid}]", unit="pair", leave=False, ncols=100)
263
+ for ex in limited(ds, scan_cap):
264
+ # Flexible extraction across mirrors
265
+ dialog = (
266
+ ex.get("dialog") or ex.get("dialogue") or ex.get("utterances")
267
+ or ex.get("turns") or ex.get("content") or []
268
+ )
269
+ seq = []
270
+ if isinstance(dialog, list):
271
+ for u in dialog:
272
+ s = to_str(u) if not isinstance(u, str) else u
273
+ s = keep_or_clip(clean_text(str(s or "")))
274
+ if s:
275
+ seq.append(s)
276
+
277
+ for a, b in zip(seq, seq[1:]):
278
+ pairs.append(turn(a, b))
279
+ kept += 1; pbar.update(1); overall.update(1)
280
+ if kept >= keep_cap:
281
+ break
282
+ if kept >= keep_cap:
283
+ break
284
+ pbar.close()
285
+
286
+ if kept > 0:
287
+ print(f"[ok] dailydialog kept={kept} via {dsid} ({'name='+name if name else 'no-name'} {rev or ''})")
288
+ return
289
+ else:
290
+ print(f"[try next] dailydialog: {dsid} produced 0 pairs; trying next candidate…")
291
+ except Exception as e:
292
+ print(f"[try next] dailydialog {dsid}: {e}")
293
+
294
+ print("[skip] dailydialog: no usable Parquet/JSON mirror found (all candidates failed)")
295
+
296
+
297
+ def collect_bst(pairs: List[str], overall: tqdm):
298
+ """blended_skill_talk: previous_utterance -> one of guided/free/suggested."""
299
+ try:
300
+ ds = load_dataset("blended_skill_talk", split="train", streaming=True)
301
+ keep_cap, scan_cap = CAPS["bst"], SCAN["bst"]
302
+ pbar = tqdm(total=keep_cap, desc="[bst]", unit="pair", leave=False, ncols=100)
303
+ kept = 0
304
+ def pick_first(x):
305
+ if isinstance(x, list) and x: return x[0]
306
+ if isinstance(x, dict):
307
+ for v in x.values():
308
+ if isinstance(v, list) and v: return v[0]
309
+ return None
310
+ for ex in limited(ds, scan_cap):
311
+ you = keep_or_clip(clean_text(str(ex.get("previous_utterance") or ex.get("context") or "")))
312
+ cand = pick_first(ex.get("guided_messages")) or pick_first(ex.get("free_messages")) or pick_first(ex.get("suggestions"))
313
+ bot = keep_or_clip(clean_text(str(cand or "")))
314
+ if you and bot:
315
+ pairs.append(turn(you, bot))
316
+ kept += 1; pbar.update(1); overall.update(1)
317
+ if kept >= keep_cap: break
318
+ pbar.close()
319
+ print(f"[ok] bst kept={kept}")
320
+ except Exception as e:
321
+ print(f"[skip] bst: {e}")
322
+
323
+ def collect_personachat(pairs: List[str], overall: tqdm):
324
+ """bavard/personachat_truecased (Parquet): expand history[] into adjacent pairs."""
325
+ try:
326
+ ds = load_dataset("bavard/personachat_truecased", split="train", streaming=True, revision="refs/convert/parquet")
327
+ keep_cap, scan_cap = CAPS["personachat"], SCAN["personachat"]
328
+ pbar = tqdm(total=keep_cap, desc="[personachat]", unit="pair", leave=False, ncols=100)
329
+ kept = 0
330
+ for ex in limited(ds, scan_cap):
331
+ hist = ex.get("history") or []
332
+ seq = [keep_or_clip(clean_text(str(u))) for u in hist]
333
+ seq = [s for s in seq if s]
334
+ for a, b in zip(seq, seq[1:]):
335
+ pairs.append(turn(a, b))
336
+ kept += 1; pbar.update(1); overall.update(1)
337
+ if kept >= keep_cap: break
338
+ if kept >= keep_cap: break
339
+ pbar.close()
340
+ print(f"[ok] personachat kept={kept}")
341
+ except Exception as e:
342
+ print(f"[skip] personachat: {e}")
343
+
344
+
345
+ def collect_soda(pairs: List[str], overall: tqdm):
346
+ """allenai/soda: dialogue can be list[str] or list[dict]."""
347
+ try:
348
+ ds = load_dataset("allenai/soda", split="train", streaming=True)
349
+ keep_cap, scan_cap = CAPS["soda"], SCAN["soda"]
350
+ pbar = tqdm(total=keep_cap, desc="[soda]", unit="pair", leave=False, ncols=100)
351
+ kept = 0
352
+ for ex in limited(ds, scan_cap):
353
+ dia = ex.get("dialogue") or ex.get("dialog") or ex.get("utterances") or []
354
+ seq = []
355
+ for u in dia:
356
+ s = to_str(u)
357
+ s = keep_or_clip(clean_text(str(s or "")))
358
+ if s: seq.append(s)
359
+ for a, b in zip(seq, seq[1:]):
360
+ pairs.append(turn(a, b))
361
+ kept += 1; pbar.update(1); overall.update(1)
362
+ if kept >= keep_cap: break
363
+ if kept >= keep_cap: break
364
+ pbar.close()
365
+ print(f"[ok] soda kept={kept}")
366
+ except Exception as e:
367
+ print(f"[skip] soda: {e}")
368
+
369
+ def collect_topical_chat(pairs: List[str], overall: tqdm):
370
+ """
371
+ Topical-Chat: try Hub mirrors, else fetch official GitHub JSON files.
372
+ Keeps only conversation text (no reading sets).
373
+ """
374
+ keep_cap, scan_cap = CAPS["topical"], SCAN["topical"]
375
+ kept = 0
376
+
377
+ # 1) Try Hub mirrors/splits first
378
+ dsids = [
379
+ "Conversational-Reasoning/Topical-Chat",
380
+ "AmazonScience/Topical-Chat",
381
+ "microsoft/Topical-Chat",
382
+ ]
383
+ splits = ["train","valid_freq","valid_rare","test_freq","test_rare","validation","test","valid_frequent","test_frequent"]
384
+
385
+ for dsid in dsids:
386
+ try:
387
+ pbar = tqdm(total=keep_cap, desc=f"[topical:{dsid}]", unit="pair", leave=False, ncols=100)
388
+ for split in splits:
389
+ try:
390
+ ds = load_dataset(dsid, split=split, streaming=True)
391
+ except Exception:
392
+ continue
393
+ for ex in limited(ds, scan_cap):
394
+ dia = ex.get("messages") or ex.get("conversation") or ex.get("utterances") or ex.get("dialogue") or ex.get("content") or []
395
+ seq = []
396
+ for u in dia:
397
+ s = to_str(u)
398
+ s = keep_or_clip(clean_text(str(s or "")))
399
+ if s: seq.append(s)
400
+ for a, b in zip(seq, seq[1:]):
401
+ pairs.append(turn(a, b))
402
+ kept += 1; pbar.update(1); overall.update(1)
403
+ if kept >= keep_cap: break
404
+ if kept >= keep_cap: break
405
+ if kept >= keep_cap: break
406
+ pbar.close()
407
+ if kept > 0:
408
+ print(f"[ok] topical_chat ({dsid}) kept={kept}")
409
+ return
410
+ except Exception as e:
411
+ print(f"[try next] topical_chat {dsid}: {e}")
412
+
413
+ # 2) Fallback to official GitHub JSON (robust)
414
+ try:
415
+ names = ["train","valid_freq","valid_rare","test_freq","test_rare"]
416
+ base = "https://raw.githubusercontent.com/alexa/Topical-Chat/master/conversations"
417
+ pbar = tqdm(total=keep_cap, desc="[topical:github]", unit="pair", leave=False, ncols=100)
418
+ for nm in names:
419
+ try:
420
+ with urlreq.urlopen(f"{base}/{nm}.json") as r:
421
+ data = json.loads(r.read().decode("utf-8"))
422
+ except Exception:
423
+ continue
424
+ for _, convo in data.items():
425
+ content = convo.get("content") or []
426
+ seq = []
427
+ for t in content:
428
+ s = keep_or_clip(clean_text(str(t.get("message") or t.get("text") or "")))
429
+ if s: seq.append(s)
430
+ for a, b in zip(seq, seq[1:]):
431
+ pairs.append(turn(a, b))
432
+ kept += 1; pbar.update(1); overall.update(1)
433
+ if kept >= keep_cap: break
434
+ if kept >= keep_cap: break
435
+ if kept >= keep_cap: break
436
+ pbar.close()
437
+ if kept == 0:
438
+ print("[skip] topical_chat: no usable conversations found")
439
+ else:
440
+ print(f"[ok] topical_chat (github) kept={kept}")
441
+ except Exception as e:
442
+ print(f"[skip] topical_chat (github): {e}")
443
+
444
+
445
+ # --------------------------
446
+ # Collectors (existing)
447
+ # --------------------------
448
+ def collect_shakespeare(pairs: List[str], overall: tqdm):
449
+ try:
450
+ ds = load_dataset(
451
+ "text",
452
+ data_files={"train": "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"},
453
+ split="train",
454
+ streaming=True,
455
+ )
456
+ kept = 0
457
+ pbar = tqdm(total=CAPS["shakespeare"], desc="[shakespeare]", unit="pair", leave=False, ncols=100)
458
+ for row in ds:
459
+ txt = keep_or_clip(clean_text(row["text"]))
460
+ if not txt: continue
461
+ pairs.append(turn("Continue in Shakespearean style.", txt))
462
+ kept += 1; pbar.update(1); overall.update(1)
463
+ if kept >= CAPS["shakespeare"]: break
464
+ pbar.close()
465
+ print(f"[ok] shakespeare kept={kept}")
466
+ except Exception as e:
467
+ print(f"[skip] shakespeare: {e}")
468
+
469
+ def collect_reddit_jokes(pairs: List[str], overall: tqdm):
470
+ for dsid in ["SocialGrep/one-million-reddit-jokes", "SocialGrep/reddit_jokes", "timc1/reddit_jokes"]:
471
+ try:
472
+ ds = load_dataset(dsid, split="train", streaming=True)
473
+ kept = 0
474
+ pbar = tqdm(total=CAPS["jokes"], desc="[jokes]", unit="pair", leave=False, ncols=100)
475
+ for ex in limited(ds, SCAN["jokes"]):
476
+ title = keep_or_clip(clean_text(str(ex.get("title") or "")))
477
+ body = keep_or_clip(clean_text(str(ex.get("selftext") or ex.get("body") or "")))
478
+ if body and title:
479
+ pairs.append(turn(title, body))
480
+ elif title:
481
+ pairs.append(turn("Tell me a short joke.", title))
482
+ else:
483
+ continue
484
+ kept += 1; pbar.update(1); overall.update(1)
485
+ if kept >= CAPS["jokes"]: break
486
+ pbar.close()
487
+ print(f"[ok] jokes {dsid} kept={kept}")
488
+ return
489
+ except Exception as e:
490
+ print(f"[try next] jokes {dsid}: {e}")
491
+ print("[skip] jokes: none worked")
492
+
493
+ def collect_dadjokes(pairs: List[str], overall: tqdm):
494
+ try:
495
+ ds = load_dataset("shuttie/reddit-dadjokes", split="train", streaming=True)
496
+ kept = 0
497
+ pbar = tqdm(total=CAPS["dadjokes"], desc="[dadjokes]", unit="pair", leave=False, ncols=100)
498
+ for ex in limited(ds, SCAN["dadjokes"]):
499
+ setup = keep_or_clip(clean_text(str(ex.get("setup") or ex.get("instruction") or ex.get("input") or "")))
500
+ punch = keep_or_clip(clean_text(str(ex.get("punchline") or ex.get("output") or "")))
501
+ if not (setup and punch): continue
502
+ pairs.append(turn(setup, punch))
503
+ kept += 1; pbar.update(1); overall.update(1)
504
+ if kept >= CAPS["dadjokes"]: break
505
+ pbar.close()
506
+ print(f"[ok] dadjokes kept={kept}")
507
+ except Exception as e:
508
+ print(f"[skip] dadjokes: {e}")
509
+
510
+ def collect_reddit_sarcasm(pairs: List[str], overall: tqdm):
511
+ """Thewillonline/reddit-sarcasm — flexible parsing, scan+keep bars."""
512
+ try:
513
+ ds = load_dataset("Thewillonline/reddit-sarcasm", split="train", streaming=True)
514
+ keep_cap, scan_cap = CAPS["rsarcasm"], SCAN["rsarcasm"]
515
+ scanbar = tqdm(total=scan_cap, desc="[sarcasm scan]", unit="row", leave=False, ncols=100)
516
+ keepbar = tqdm(total=keep_cap, desc="[sarcasm kept]", unit="pair", leave=False, ncols=100)
517
+
518
+ PATS = [
519
+ re.compile(r"User\s*:\s*(.+?)\s*(?:Reddit\s*Comment|Comment|Reply)\s*:\s*(.+)", re.IGNORECASE | re.DOTALL),
520
+ re.compile(r"Post\s*:\s*(.+?)\s*(?:Top\s*Comment|Comment)\s*:\s*(.+)", re.IGNORECASE | re.DOTALL),
521
+ ]
522
+ def parse(raw: str) -> Tuple[Optional[str], Optional[str]]:
523
+ raw = raw.replace("<|endoftext|>", "\n")
524
+ for pat in PATS:
525
+ m = pat.search(raw)
526
+ if m:
527
+ return m.group(1).strip(), m.group(2).strip()
528
+ lines = [ln.strip() for ln in raw.splitlines() if ln.strip()]
529
+ if len(lines) >= 2: return lines[0], lines[1]
530
+ if len(lines) == 1: return "Reply with sarcasm:", lines[0]
531
+ return None, None
532
+
533
+ kept = scanned = 0
534
+ for ex in limited(ds, scan_cap):
535
+ scanned += 1
536
+ you, bot = parse(str(ex.get("text") or ""))
537
+ you = keep_or_clip(you); bot = keep_or_clip(bot)
538
+ if you and bot:
539
+ pairs.append(turn(you, bot, "[STYLE=Snark]"))
540
+ kept += 1; keepbar.update(1); overall.update(1)
541
+ if kept >= keep_cap: break
542
+ scanbar.update(1)
543
+ if scanned % 2000 == 0:
544
+ keepbar.set_postfix(rate=f"{kept/max(1,scanned):.2%}")
545
+ scanbar.close(); keepbar.close()
546
+ print(f"[ok] reddit-sarcasm kept={kept} (scanned {scanned})")
547
+ except Exception as e:
548
+ print(f"[skip] reddit-sarcasm: {e}")
549
+
550
+ def collect_figlang(pairs: List[str], overall: tqdm):
551
+ for dsid in ["tasksource/figlang2020-sarcasm", "tasksource/figlang2020_sarcasm"]:
552
+ try:
553
+ ds = load_dataset(dsid, split="train", streaming=True)
554
+ kept = 0
555
+ pbar = tqdm(total=CAPS["figlang"], desc="[figlang]", unit="pair", leave=False, ncols=100)
556
+ for ex in limited(ds, SCAN["figlang"]):
557
+ ctx = ex.get("context")
558
+ if isinstance(ctx, list) and ctx:
559
+ context_str = " ".join(str(c) for c in ctx[-2:])
560
+ else:
561
+ context_str = str(ex.get("context") or ex.get("prompt") or "")
562
+ reply = str(ex.get("response") or ex.get("answer") or ex.get("text") or "")
563
+ context_str = keep_or_clip(clean_text(context_str))
564
+ reply = keep_or_clip(clean_text(reply))
565
+ if reply:
566
+ if context_str:
567
+ pairs.append(turn(context_str, reply, "[STYLE=Snark]"))
568
+ else:
569
+ pairs.append(turn("Reply with sarcasm:", reply, "[STYLE=Snark]"))
570
+ kept += 1; pbar.update(1); overall.update(1)
571
+ if kept >= CAPS["figlang"]: break
572
+ pbar.close()
573
+ print(f"[ok] figlang {dsid} kept={kept}")
574
+ return
575
+ except Exception as e:
576
+ print(f"[try next] figlang {dsid}: {e}")
577
+ print("[skip] figlang")
578
+
579
+ def collect_showerthoughts(pairs: List[str], overall: tqdm):
580
+ """Use REEDIT_submissions split 'Showerthoughts' directly (no 'train')."""
581
+ try:
582
+ ds = load_dataset("HuggingFaceGECLM/REDDIT_submissions", split="Showerthoughts", streaming=True)
583
+ keep_cap, scan_cap = CAPS["shower"], SCAN["shower"]
584
+ scanbar = tqdm(total=scan_cap, desc="[shower scan]", unit="row", leave=False, ncols=100)
585
+ keepbar = tqdm(total=keep_cap, desc="[shower kept]", unit="pair", leave=False, ncols=100)
586
+
587
+ kept = scanned = 0
588
+ for ex in limited(ds, scan_cap):
589
+ scanned += 1
590
+ title = get_first_nonempty(ex, ["title", "selftext", "text"]) or ""
591
+ text = keep_or_clip(clean_text(title))
592
+ if text:
593
+ pairs.append(turn("Give me a shower thought.", text))
594
+ kept += 1; keepbar.update(1); overall.update(1)
595
+ if kept >= keep_cap: break
596
+ scanbar.update(1)
597
+ scanbar.close(); keepbar.close()
598
+ print(f"[ok] showerthoughts kept={kept} (scanned {scanned})")
599
+ except Exception as e:
600
+ print(f"[skip] showerthoughts: {e}")
601
+
602
+ def collect_personas(pairs: List[str], overall: tqdm):
603
+ """Non-streaming is more reliable for this dataset."""
604
+ try:
605
+ ds = load_dataset("NapthaAI/twitter_personas")["train"]
606
+ keep_cap = CAPS["personas"]
607
+ pbar = tqdm(total=keep_cap, desc="[personas]", unit="pair", leave=False, ncols=100)
608
+ kept = 0
609
+ for ex in ds:
610
+ desc = get_first_nonempty(ex, ["description","persona","bio","text","content","full_text"])
611
+ if not isinstance(desc, str) and isinstance(ex.get("content"), dict):
612
+ desc = ex["content"].get("text")
613
+ desc = keep_or_clip(clean_text(str(desc or "")))
614
+ if not desc: continue
615
+ pairs.append(turn("Adopt this persona in one sentence.", desc, "[FORM=TWEET]"))
616
+ kept += 1; pbar.update(1); overall.update(1)
617
+ if kept >= keep_cap: break
618
+ pbar.close()
619
+ print(f"[ok] personas kept={kept}")
620
+ except Exception as e:
621
+ print(f"[skip] personas: {e}")
622
+
623
+ def collect_tweeteval(pairs: List[str], overall: tqdm):
624
+ """Handle super_tweeteval (text_1/text_2, etc.) and fallback tweet_eval."""
625
+ def extract_pair(ex):
626
+ t = ex.get("text")
627
+ if isinstance(t, str) and t.strip():
628
+ return "React with a sharp one-liner.", t
629
+ for a,b in [("text_1","text_2"), ("sentence1","sentence2"),
630
+ ("premise","hypothesis"), ("question","answer"),
631
+ ("context","response"), ("tweet1","tweet2")]:
632
+ t1, t2 = ex.get(a), ex.get(b)
633
+ if isinstance(t1, str) and t1.strip() and isinstance(t2, str) and t2.strip():
634
+ return t1, t2
635
+ return None
636
+
637
+ def run_on(dsname, pick, is_super):
638
+ keep_cap, scan_cap = CAPS["tweeteval"], SCAN["tweeteval"]
639
+ pbar = tqdm(total=keep_cap, desc=f"[tweeteval:{pick}]", unit="pair", leave=False, ncols=100)
640
+ kept = 0
641
+ ds = load_dataset(dsname, pick, split="train", streaming=True)
642
+ for ex in limited(ds, scan_cap):
643
+ pair = extract_pair(ex) if is_super else ("React with a sharp one-liner.", ex.get("text")) if ex.get("text") else None
644
+ if not pair: continue
645
+ you, bot = pair
646
+ you = keep_or_clip(clean_text(str(you or ""))); bot = keep_or_clip(clean_text(str(bot or "")))
647
+ if not (you and bot): continue
648
+ tag = "[STYLE=Snark]" if you and you != "React with a sharp one-liner." else ""
649
+ pairs.append(turn(you, bot, tag))
650
+ kept += 1; pbar.update(1); overall.update(1)
651
+ if kept >= keep_cap: break
652
+ pbar.close()
653
+ return kept
654
+
655
+ kept_total = 0
656
+ try:
657
+ cfgs = get_dataset_config_names("cardiffnlp/super_tweeteval")
658
+ except Exception:
659
+ cfgs = []
660
+ prio = ["irony","sarcasm","humor","sentiment","emoji","emotion","stance","offensive","hate"]
661
+ ordered = [c for c in prio if c in cfgs] + [c for c in cfgs if c not in prio]
662
+ for pick in ordered:
663
+ kept_total += run_on("cardiffnlp/super_tweeteval", pick, True)
664
+ if kept_total >= CAPS["tweeteval"]:
665
+ print(f"[ok] tweeteval(super) kept={kept_total}"); return
666
+ if kept_total == 0:
667
+ try:
668
+ base_cfgs = get_dataset_config_names("cardiffnlp/tweet_eval")
669
+ except Exception:
670
+ base_cfgs = []
671
+ ordered_b = [c for c in prio if c in base_cfgs] + [c for c in base_cfgs if c not in prio]
672
+ for pick in ordered_b:
673
+ kept_total += run_on("cardiffnlp/tweet_eval", pick, False)
674
+ if kept_total >= CAPS["tweeteval"]:
675
+ print(f"[ok] tweeteval(base) kept={kept_total}"); return
676
+ print(f"[ok] tweeteval kept={kept_total}")
677
+
678
+ def collect_fourchan(pairs: List[str], overall: tqdm):
679
+ try:
680
+ ds = load_dataset("sbussiso/4chan-dataset", split="train", streaming=True)
681
+ keep_cap = min(CAPS["fourchan"], 195)
682
+ pbar = tqdm(total=keep_cap, desc="[4chan]", unit="pair", leave=False, ncols=100)
683
+ kept = 0
684
+ for ex in limited(ds, SCAN["fourchan"]):
685
+ prompt = keep_or_clip(clean_text(str(ex.get("prompt") or "")))
686
+ resp = keep_or_clip(clean_text(str(ex.get("response") or "")))
687
+ if prompt and resp:
688
+ pairs.append(turn(prompt, resp))
689
+ kept += 1; pbar.update(1); overall.update(1)
690
+ else:
691
+ txt = keep_or_clip(clean_text(str(ex.get("text") or ex.get("body") or ex.get("content") or "")))
692
+ if txt:
693
+ pairs.append(turn("Drop a spicy one-liner.", txt))
694
+ kept += 1; pbar.update(1); overall.update(1)
695
+ if kept >= keep_cap: break
696
+ pbar.close()
697
+ print(f"[ok] 4chan kept={kept}")
698
+ except Exception as e:
699
+ print(f"[skip] 4chan: {e}")
700
+
701
+ def collect_elon_trump(pairs: List[str], overall: tqdm):
702
+ try:
703
+ ds = load_dataset("MasaFoundation/Twitter_X_Elon_vs_Trump", split="train", streaming=True, revision="refs/convert/parquet")
704
+ keep_cap, scan_cap = CAPS["elonvtrump"], SCAN["elonvtrump"]
705
+ scanbar = tqdm(total=scan_cap, desc="[elon_vs_trump scan]", unit="row", leave=False, ncols=100)
706
+ keepbar = tqdm(total=keep_cap, desc="[elon_vs_trump kept]", unit="pair", leave=False, ncols=100)
707
+ kept = scanned = 0
708
+ for ex in limited(ds, scan_cap):
709
+ scanned += 1
710
+ txt = get_first_nonempty(ex, ["content","text","tweet","full_text"]) or ""
711
+ txt = keep_or_clip(clean_text(txt))
712
+ if txt:
713
+ pairs.append(turn("[FORM=TWEET] One sentence hot take:", txt, "[FORM=TWEET]"))
714
+ kept += 1; keepbar.update(1); overall.update(1)
715
+ if kept >= keep_cap: break
716
+ scanbar.update(1)
717
+ scanbar.close(); keepbar.close()
718
+ print(f"[ok] Elon_vs_Trump kept={kept} (scanned {scanned})")
719
+ except Exception as e:
720
+ print(f"[skip] Elon_vs_Trump: {e}")
721
+
722
+
723
+ def collect_hh_rlhf(pairs: List[str], overall: tqdm):
724
+ """
725
+ Anthropic HH-RLHF (English multi-turn). We parse the 'chosen' conversation
726
+ into adjacent Human->Assistant pairs and emit You:/Bot: blocks.
727
+ """
728
+ try:
729
+ ds = load_dataset("Anthropic/hh-rlhf", split="train", streaming=True)
730
+ keep_cap, scan_cap = CAPS["hhrlhf"], SCAN["hhrlhf"]
731
+ pbar = tqdm(total=keep_cap, desc="[hh-rlhf]", unit="pair", leave=False, ncols=100)
732
+ kept = 0
733
+
734
+ HUMAN = re.compile(r"(?:^|\n)\s*Human:\s*", re.IGNORECASE)
735
+ ASSIST = re.compile(r"(?:^|\n)\s*Assistant:\s*", re.IGNORECASE)
736
+
737
+ for ex in limited(ds, scan_cap):
738
+ # Prefer the curated 'chosen' conversation; fall back to 'prompt' if needed
739
+ convo = ex.get("chosen") or ex.get("prompt") or ""
740
+ if not isinstance(convo, str) or not convo.strip():
741
+ continue
742
+
743
+ # Split into alternating roles based on explicit markers
744
+ # We'll rebuild a list of (role, text) turns.
745
+ tokens = re.split(r"(Human:|Assistant:)", convo)
746
+ turns = []
747
+ # tokens looks like: ["", "Human:", "text...", "Assistant:", "text...", ...]
748
+ for i in range(1, len(tokens), 2):
749
+ role = tokens[i].strip().lower()
750
+ content = tokens[i + 1] if i + 1 < len(tokens) else ""
751
+ content = keep_or_clip(clean_text(str(content or "")))
752
+ if not content:
753
+ continue
754
+ if role.startswith("human"):
755
+ turns.append(("user", content))
756
+ elif role.startswith("assistant"):
757
+ turns.append(("assistant", content))
758
+
759
+ # Emit adjacent user->assistant pairs
760
+ for a, b in zip(turns, turns[1:]):
761
+ if a[0] == "user" and b[0] == "assistant":
762
+ pairs.append(turn(a[1], b[1]))
763
+ kept += 1; pbar.update(1); overall.update(1)
764
+ if kept >= keep_cap:
765
+ break
766
+ if kept >= keep_cap:
767
+ break
768
+
769
+ pbar.close()
770
+ print(f"[ok] hh-rlhf kept={kept}")
771
+ except Exception as e:
772
+ print(f"[skip] hh-rlhf: {e}")
773
+
774
+
775
+ # --------------------------
776
+ # Build corpus
777
+ # --------------------------
778
+ def build_corpus() -> Path:
779
+ pairs: List[str] = []
780
+ total_target = sum(CAPS.values())
781
+ print("[1/6] Collecting & reformatting datasets (streaming, capped)…")
782
+ overall = tqdm(total=total_target, desc="[all] collecting", unit="pair", ncols=100)
783
+
784
+ collectors = [
785
+ collect_oasst1,
786
+ collect_hh_rlhf,
787
+ collect_ultrachat,
788
+ collect_dailydialog,
789
+ collect_bst,
790
+ collect_personachat,
791
+ collect_soda,
792
+ collect_topical_chat, # optional; will skip if not available
793
+ collect_shakespeare,
794
+ collect_reddit_jokes,
795
+ collect_dadjokes,
796
+ collect_reddit_sarcasm,
797
+ collect_figlang,
798
+ collect_showerthoughts,
799
+ collect_personas,
800
+ collect_tweeteval,
801
+ collect_fourchan,
802
+ collect_elon_trump,
803
+ ]
804
+ for fn in collectors:
805
+ try:
806
+ fn(pairs, overall)
807
+ except Exception as e:
808
+ print(f"[collector error] {fn.__name__}: {e}")
809
+
810
+ overall.close()
811
+ print("[2/6] Deduplicating & clipping…")
812
+ seen = set(); deduped = []
813
+ for block in pairs:
814
+ try:
815
+ bot_line = [ln for ln in block.splitlines() if ln.startswith("Bot:")][0]
816
+ key = bot_line[4:].strip().lower()
817
+ except Exception:
818
+ key = block.strip().lower()
819
+ if key in seen: continue
820
+ seen.add(key); deduped.append(block)
821
+
822
+ random.shuffle(deduped)
823
+ if len(deduped) > MAX_TOTAL_PAIRS:
824
+ deduped = deduped[:MAX_TOTAL_PAIRS]
825
+
826
+ out_path = SAVE_DIR / "corpus.txt"
827
+ out_path.write_text("\n".join(deduped), encoding="utf-8")
828
+ print(f" wrote {len(deduped)} pairs → {out_path}")
829
+ return out_path
830
+
831
+ # --------------------------
832
+ # SentencePiece tokenizer
833
+ # --------------------------
834
+ def train_spm(corpus_path: Path) -> spm.SentencePieceProcessor:
835
+ print("[3/6] Training SentencePiece tokenizer…")
836
+ spm.SentencePieceTrainer.Train(
837
+ input=str(corpus_path),
838
+ model_prefix=str(TOKENIZER_PREFIX),
839
+ vocab_size=VOCAB_SIZE,
840
+ model_type="unigram",
841
+ character_coverage=1.0,
842
+ user_defined_symbols=USER_SYMBOLS,
843
+ bos_id=1, eos_id=2, unk_id=0, pad_id=-1
844
+ )
845
+ sp = spm.SentencePieceProcessor()
846
+ sp.load(f"{TOKENIZER_PREFIX}.model")
847
+ print(f" tokenizer saved at {TOKENIZER_PREFIX}.model")
848
+ return sp
849
+
850
+ # --------------------------
851
+ # Encode to token IDs
852
+ # --------------------------
853
+ def encode_corpus_to_ids(sp: spm.SentencePieceProcessor, corpus_path: Path):
854
+ print("[4/6] Encoding corpus to token IDs…")
855
+ text = corpus_path.read_text(encoding="utf-8")
856
+ blocks = [b for b in text.split("\n\n") if b.strip()]
857
+ ids = []
858
+ eos = sp.eos_id()
859
+ for b in blocks:
860
+ for line in b.splitlines():
861
+ if not line.strip(): continue
862
+ ids.extend(sp.encode(line, out_type=int)); ids.append(eos)
863
+ ids.append(eos)
864
+ ids = np.array(ids, dtype=np.int32)
865
+ n = len(ids); cut = int(n * 0.97)
866
+ train_ids = torch.tensor(ids[:cut], dtype=torch.long, device=DEVICE)
867
+ val_ids = torch.tensor(ids[cut:], dtype=torch.long, device=DEVICE)
868
+ print(f" tokens: train={train_ids.numel():,}, val={val_ids.numel():,}, vocab={sp.vocab_size()}")
869
+ return train_ids, val_ids, sp.vocab_size()
870
+
871
+ # --------------------------
872
+ # Tiny GPT model
873
+ # --------------------------
874
+ class CausalSelfAttention(nn.Module):
875
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
876
+ super().__init__()
877
+ assert n_embd % n_head == 0
878
+ self.n_head = n_head
879
+ self.head_dim = n_embd // n_head
880
+ self.qkv = nn.Linear(n_embd, 3*n_embd, bias=False)
881
+ self.proj = nn.Linear(n_embd, n_embd, bias=False)
882
+ self.attn_drop = nn.Dropout(dropout)
883
+ self.resid_drop = nn.Dropout(dropout)
884
+ self.register_buffer("mask", torch.tril(torch.ones(block_size, block_size)).view(1,1,block_size,block_size))
885
+ def forward(self, x):
886
+ B,T,C = x.shape
887
+ qkv = self.qkv(x); q,k,v = qkv.chunk(3, dim=-1)
888
+ q = q.view(B,T,self.n_head,self.head_dim).transpose(1,2)
889
+ k = k.view(B,T,self.n_head,self.head_dim).transpose(1,2)
890
+ v = v.view(B,T,self.n_head,self.head_dim).transpose(1,2)
891
+ att = (q @ k.transpose(-2,-1)) / math.sqrt(self.head_dim)
892
+ att = att.masked_fill(self.mask[:,:,:T,:T] == 0, float('-inf'))
893
+ att = torch.softmax(att, dim=-1)
894
+ att = self.attn_drop(att)
895
+ y = att @ v
896
+ y = y.transpose(1,2).contiguous().view(B,T,C)
897
+ y = self.resid_drop(self.proj(y))
898
+ return y
899
+
900
+ class Block(nn.Module):
901
+ def __init__(self, n_embd, n_head, dropout=0.0, block_size=256):
902
+ super().__init__()
903
+ self.ln1 = nn.LayerNorm(n_embd)
904
+ self.attn = CausalSelfAttention(n_embd, n_head, dropout, block_size)
905
+ self.ln2 = nn.LayerNorm(n_embd)
906
+ self.mlp = nn.Sequential(
907
+ nn.Linear(n_embd, 4*n_embd),
908
+ nn.GELU(),
909
+ nn.Linear(4*n_embd, n_embd),
910
+ nn.Dropout(dropout),
911
+ )
912
+ def forward(self, x):
913
+ x = x + self.attn(self.ln1(x))
914
+ x = x + self.mlp(self.ln2(x))
915
+ return x
916
+
917
+ class TinyGPT(nn.Module):
918
+ def __init__(self, vocab_size, n_layer, n_head, n_embd, block_size, dropout=0.0):
919
+ super().__init__()
920
+ self.block_size = block_size
921
+ self.tok_emb = nn.Embedding(vocab_size, n_embd)
922
+ self.pos_emb = nn.Embedding(block_size, n_embd)
923
+ self.blocks = nn.ModuleList([Block(n_embd, n_head, dropout, block_size) for _ in range(n_layer)])
924
+ self.ln_f = nn.LayerNorm(n_embd)
925
+ self.head = nn.Linear(n_embd, vocab_size, bias=False)
926
+ self.apply(self._init)
927
+ def _init(self, m):
928
+ if isinstance(m, (nn.Linear, nn.Embedding)): nn.init.normal_(m.weight, mean=0.0, std=0.02)
929
+ if isinstance(m, nn.Linear) and m.bias is not None: nn.init.zeros_(m.bias)
930
+ if isinstance(m, nn.LayerNorm): nn.init.ones_(m.weight); nn.init.zeros_(m.bias)
931
+ def forward(self, idx, targets=None):
932
+ B,T = idx.shape; assert T <= self.block_size
933
+ pos = torch.arange(0, T, device=idx.device)
934
+ x = self.tok_emb(idx) + self.pos_emb(pos)[None,:,:]
935
+ for blk in self.blocks: x = blk(x)
936
+ x = self.ln_f(x)
937
+ logits = self.head(x)
938
+ loss = None
939
+ if targets is not None:
940
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
941
+ return logits, loss
942
+ @torch.no_grad()
943
+ def generate(self, idx, max_new_tokens=200, temperature=0.8, top_k=50, top_p=0.95, repetition_penalty=1.0):
944
+ self.eval()
945
+ for _ in range(max_new_tokens):
946
+ idx_cond = idx[:, -self.block_size:]
947
+ logits, _ = self.forward(idx_cond)
948
+ logits = logits[:, -1, :]
949
+ if repetition_penalty != 1.0:
950
+ uniq, _ = torch.unique(idx_cond[0], return_counts=True)
951
+ logits[:, uniq] /= repetition_penalty
952
+ logits = logits / max(1e-8, temperature)
953
+ if top_k is not None:
954
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
955
+ cutoff = v[:, -1].unsqueeze(-1)
956
+ logits = torch.where(logits < cutoff, torch.full_like(logits, -1e9), logits)
957
+ if top_p is not None:
958
+ sorted_logits, sorted_idx = torch.sort(logits, descending=True)
959
+ probs = torch.softmax(sorted_logits, dim=-1)
960
+ cdf = torch.cumsum(probs, dim=-1)
961
+ mask = cdf > top_p; mask[:, 0] = False
962
+ sorted_logits[mask] = -1e9
963
+ logits = torch.zeros_like(logits).scatter(1, sorted_idx, sorted_logits)
964
+ probs = torch.softmax(logits, dim=-1)
965
+ next_id = torch.multinomial(probs, num_samples=1)
966
+ idx = torch.cat([idx, next_id], dim=1)
967
+ return idx
968
+
969
+ # --------------------------
970
+ # Data loader for token IDs
971
+ # --------------------------
972
+ def get_batch(split_ids: torch.Tensor, B: int, T: int):
973
+ ix = torch.randint(0, split_ids.numel() - T - 1, (B,), device=split_ids.device)
974
+ x = torch.stack([split_ids[i:i+T] for i in ix])
975
+ y = torch.stack([split_ids[i+1:i+T+1] for i in ix])
976
+ return x, y
977
+
978
+ # --------------------------
979
+ # Train loop
980
+ # --------------------------
981
+ def train_model(vocab_size, train_ids, val_ids):
982
+ print("[5/6] Training tiny GPT on", DEVICE.type.upper(), "…")
983
+ model = TinyGPT(vocab_size, n_layer, n_head, n_embd, block_size, dropout).to(DEVICE)
984
+ params_m = sum(p.numel() for p in model.parameters())/1e6
985
+ print(f" params: {params_m:.2f}M")
986
+ optimizer = torch.optim.AdamW(model.parameters(), lr=base_lr, betas=(0.9, 0.95), weight_decay=0.0)
987
+
988
+ use_amp = DEVICE.type == "cuda"
989
+ scaler = torch.amp.GradScaler("cuda", enabled=use_amp)
990
+ autocast = (lambda: torch.amp.autocast("cuda", dtype=torch.float16)) if use_amp else nullcontext
991
+
992
+ start = time.time()
993
+ best_val = float("inf")
994
+
995
+ def get_lr(step):
996
+ warmup = max(1, int(train_steps * warmup_ratio))
997
+ if step < warmup: return base_lr * (step+1)/warmup
998
+ progress = (step - warmup) / max(1, train_steps - warmup)
999
+ return min_lr + 0.5*(base_lr - min_lr)*(1 + math.cos(math.pi * min(1.0, progress)))
1000
+
1001
+ @torch.no_grad()
1002
+ def eval_loss(iters=80):
1003
+ model.eval(); losses=[]
1004
+ for _ in range(iters):
1005
+ xb, yb = get_batch(val_ids, min(batch_size, 32), block_size)
1006
+ with autocast():
1007
+ _, loss = model(xb, yb)
1008
+ losses.append(loss.item())
1009
+ model.train()
1010
+ return float(sum(losses)/len(losses))
1011
+
1012
+ model.train(); step = 0
1013
+ pbar = tqdm(total=train_steps, ncols=100, desc="[train]")
1014
+ while step < train_steps and (time.time()-start) < MAX_SECONDS:
1015
+ lr = get_lr(step)
1016
+ for pg in optimizer.param_groups: pg["lr"] = lr
1017
+ optimizer.zero_grad(set_to_none=True)
1018
+
1019
+ total_loss = 0.0
1020
+ for _ in range(accum_steps):
1021
+ xb, yb = get_batch(train_ids, batch_size, block_size)
1022
+ with autocast():
1023
+ _, loss = model(xb, yb)
1024
+ if use_amp:
1025
+ scaler.scale(loss / accum_steps).backward()
1026
+ else:
1027
+ (loss / accum_steps).backward()
1028
+ total_loss += loss.item()
1029
+
1030
+ if use_amp:
1031
+ scaler.unscale_(optimizer)
1032
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
1033
+ if use_amp:
1034
+ scaler.step(optimizer); scaler.update()
1035
+ else:
1036
+ optimizer.step()
1037
+
1038
+ step += 1; pbar.update(1)
1039
+ if step % log_interval == 0 or step == 1:
1040
+ pbar.set_postfix(train=f"{total_loss/accum_steps:.3f}", lr=f"{lr:.2e}")
1041
+
1042
+ if step % eval_every == 0:
1043
+ vl = eval_loss()
1044
+ best_val = min(best_val, vl)
1045
+ print(f" eval loss {vl:.3f} | best {best_val:.3f}")
1046
+
1047
+ pbar.close()
1048
+ elapsed = time.time() - start
1049
+ print(f" done in {elapsed:.1f}s | best val {best_val:.3f}")
1050
+
1051
+ # Save model + config
1052
+ ckpt_path = SAVE_DIR / "tinygpt.pt"
1053
+ torch.save(model.state_dict(), ckpt_path)
1054
+ (SAVE_DIR / "model_config.json").write_text(json.dumps({
1055
+ "vocab_size": int(vocab_size),
1056
+ "n_layer": n_layer, "n_head": n_head, "n_embd": n_embd,
1057
+ "block_size": block_size, "dropout": dropout
1058
+ }, indent=2))
1059
+ print(f"[saved] weights → {ckpt_path}")
1060
+ return model
1061
+
1062
+ # --------------------------
1063
+ # Sampling helper
1064
+ # --------------------------
1065
+ def sample_chat(sp: spm.SentencePieceProcessor, model: TinyGPT, prompt: str, max_new_tokens=200):
1066
+ prefix = f"You: {prompt}\nBot:"
1067
+ ids = sp.encode(prefix, out_type=int)
1068
+ x = torch.tensor([ids], dtype=torch.long, device=DEVICE)
1069
+ with torch.no_grad():
1070
+ y = model.generate(x, max_new_tokens=max_new_tokens, temperature=TEMP, top_k=TOP_K, top_p=TOP_P, repetition_penalty=REP_PEN)
1071
+ return sp.decode(y[0].tolist())
1072
+
1073
+ # --------------------------
1074
+ # Main
1075
+ # --------------------------
1076
+ def main():
1077
+ # Build or reuse corpus/tokenizer/ids
1078
+ corpus_path = SAVE_DIR / "corpus.txt"
1079
+ spm_model = SAVE_DIR / "spm_chat.model"
1080
+ if not corpus_path.exists():
1081
+ corpus_path = build_corpus()
1082
+ else:
1083
+ print(f"[cache] using {corpus_path}")
1084
+
1085
+ sp = spm.SentencePieceProcessor()
1086
+ if not spm_model.exists():
1087
+ sp = train_spm(corpus_path)
1088
+ else:
1089
+ sp.load(str(spm_model))
1090
+ print(f"[cache] using {spm_model}")
1091
+
1092
+ enc_train = SAVE_DIR / "train_ids.pt"
1093
+ enc_val = SAVE_DIR / "val_ids.pt"
1094
+ vocab_txt = SAVE_DIR / "vocab_size.txt"
1095
+
1096
+ if enc_train.exists() and enc_val.exists() and vocab_txt.exists():
1097
+ train_ids = torch.load(enc_train, map_location=DEVICE)
1098
+ val_ids = torch.load(enc_val, map_location=DEVICE)
1099
+ vocab_size = int(vocab_txt.read_text())
1100
+ print(f"[cache] loaded ids: train={train_ids.numel():,}, val={val_ids.numel():,}, vocab={vocab_size}")
1101
+ else:
1102
+ train_ids, val_ids, vocab_size = encode_corpus_to_ids(sp, corpus_path)
1103
+ torch.save(train_ids, enc_train); torch.save(val_ids, enc_val)
1104
+ vocab_txt.write_text(str(vocab_size))
1105
+ print("[cache] saved encoded ids")
1106
+
1107
+ model = train_model(vocab_size, train_ids, val_ids)
1108
+
1109
+ print("\n[6/6] Samples:\n")
1110
+ prompts = [
1111
+ "Give me a spicy take on AI.",
1112
+ "Roast my messy desk.",
1113
+ "Explain recursion like you're annoyed.",
1114
+ "Write a satirical headline about coffee.",
1115
+ "Give me a shower thought about umbrellas.",
1116
+ "Tell me a one-liner about deadlines.",
1117
+ "Stay in Shakespeare mode and flatter me.",
1118
+ "Reply sarcastically to: I love meetings.",
1119
+ "What's a good way to say no to a meeting politely?",
1120
+ "Roleplay as my productivity coach for two turns.",
1121
+ ]
1122
+ out_path = SAVE_DIR / "samples.txt"
1123
+ with out_path.open("w", encoding="utf-8") as f:
1124
+ for p in prompts:
1125
+ txt = sample_chat(sp, model, p, max_new_tokens=200)
1126
+ print("----\n" + txt)
1127
+ f.write("----\n" + txt + "\n")
1128
+ print(f"\n[saved] samples → {out_path}")
1129
+
1130
+ if __name__ == "__main__":
1131
+ main()