AbdullahNasir commited on
Commit
17e2432
·
1 Parent(s): cd2cfc6

Upload 20 files

Browse files
14-11-23.ipynb ADDED
@@ -0,0 +1,818 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
13
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
14
+ "Ignoring args : ('bert-base-uncased',)\n"
15
+ ]
16
+ },
17
+ {
18
+ "data": {
19
+ "text/plain": [
20
+ "[{'label': 'NEGATIVE', 'score': 0.9996927976608276}]"
21
+ ]
22
+ },
23
+ "execution_count": 3,
24
+ "metadata": {},
25
+ "output_type": "execute_result"
26
+ }
27
+ ],
28
+ "source": [
29
+ "from transformers import pipeline\n",
30
+ "\n",
31
+ "checkpoint = 'bert-base-uncased'\n",
32
+ "\n",
33
+ "classifier = pipeline('sentiment-analysis')\n",
34
+ "classifier('I am disappointed in myself', checkpoint)"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 12,
40
+ "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "name": "stderr",
44
+ "output_type": "stream",
45
+ "text": [
46
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
47
+ ]
48
+ },
49
+ {
50
+ "name": "stdout",
51
+ "output_type": "stream",
52
+ "text": [
53
+ "['this', 'is', 'a', 'sentence', 'this', 'is', 'another', 'sentence', '.'] [2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012] {'input_ids': [101, 2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
54
+ "{'input_ids': [101, 2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
55
+ "[CLS] this is a sentence this is another sentence. [SEP]\n",
56
+ "{'input_ids': tensor([[ 101, 2023, 2003, 1037, 6251, 102, 0],\n",
57
+ " [ 101, 2023, 2003, 2178, 6251, 1012, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0],\n",
58
+ " [0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0],\n",
59
+ " [1, 1, 1, 1, 1, 1, 1]])}\n",
60
+ "torch.Size([2, 7, 768])\n"
61
+ ]
62
+ },
63
+ {
64
+ "name": "stderr",
65
+ "output_type": "stream",
66
+ "text": [
67
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
68
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
69
+ ]
70
+ },
71
+ {
72
+ "name": "stdout",
73
+ "output_type": "stream",
74
+ "text": [
75
+ "torch.Size([2, 2])\n",
76
+ "tensor([[0.6280, 0.3720],\n",
77
+ " [0.6231, 0.3769]], grad_fn=<SoftmaxBackward0>)\n"
78
+ ]
79
+ },
80
+ {
81
+ "data": {
82
+ "text/plain": [
83
+ "{0: 'LABEL_0', 1: 'LABEL_1'}"
84
+ ]
85
+ },
86
+ "execution_count": 12,
87
+ "metadata": {},
88
+ "output_type": "execute_result"
89
+ }
90
+ ],
91
+ "source": [
92
+ "from transformers import AutoTokenizer\n",
93
+ "\n",
94
+ "checkpoint = 'bert-base-uncased'\n",
95
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
96
+ "\n",
97
+ "raw_inputs = ['This is a sentence', 'This is another sentence.']\n",
98
+ "\n",
99
+ "input_tokens = tokenizer.tokenize(raw_inputs)\n",
100
+ "input_ids = tokenizer.convert_tokens_to_ids(input_tokens)\n",
101
+ "inputs = tokenizer.prepare_for_model(input_ids)\n",
102
+ "\n",
103
+ "print(input_tokens, input_ids, inputs)\n",
104
+ "print(inputs)\n",
105
+ "print(tokenizer.decode(inputs['input_ids']))\n",
106
+ "\n",
107
+ "direct_inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
108
+ "print(direct_inputs)\n",
109
+ "\n",
110
+ "from transformers import AutoModel\n",
111
+ "\n",
112
+ "model = AutoModel.from_pretrained(checkpoint)\n",
113
+ "outputs = model(**direct_inputs)\n",
114
+ "print(outputs.last_hidden_state.shape)\n",
115
+ "\n",
116
+ "from transformers import AutoModelForSequenceClassification\n",
117
+ "\n",
118
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
119
+ "outputs = model(**direct_inputs)\n",
120
+ "print(outputs.logits.shape)\n",
121
+ "\n",
122
+ "import torch\n",
123
+ "\n",
124
+ "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
125
+ "print(predictions)\n",
126
+ "\n",
127
+ "model.config.id2label"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 15,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "from transformers import AutoModel\n",
137
+ "\n",
138
+ "checkpoint = 'bert-base-uncased'\n",
139
+ "model = AutoModel.from_pretrained(checkpoint)\n",
140
+ "\n",
141
+ "from transformers import AutoConfig, BertModel\n",
142
+ "\n",
143
+ "config = AutoConfig.from_pretrained(checkpoint)\n",
144
+ "model = BertModel(config)\n",
145
+ "\n",
146
+ "from transformers import BertConfig\n",
147
+ "\n",
148
+ "config = BertConfig.from_pretrained(checkpoint)\n",
149
+ "model = BertModel(config)\n",
150
+ "\n",
151
+ "model.save_pretrained('my-bert-model')\n",
152
+ "model = BertModel.from_pretrained('my-bert-model')"
153
+ ]
154
+ },
155
+ {
156
+ "cell_type": "code",
157
+ "execution_count": 16,
158
+ "metadata": {},
159
+ "outputs": [
160
+ {
161
+ "data": {
162
+ "text/plain": [
163
+ "DatasetDict({\n",
164
+ " train: Dataset({\n",
165
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
166
+ " num_rows: 3668\n",
167
+ " })\n",
168
+ " validation: Dataset({\n",
169
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
170
+ " num_rows: 408\n",
171
+ " })\n",
172
+ " test: Dataset({\n",
173
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
174
+ " num_rows: 1725\n",
175
+ " })\n",
176
+ "})"
177
+ ]
178
+ },
179
+ "execution_count": 16,
180
+ "metadata": {},
181
+ "output_type": "execute_result"
182
+ }
183
+ ],
184
+ "source": [
185
+ "from datasets import load_dataset\n",
186
+ "\n",
187
+ "raw_datasets = load_dataset('glue', 'mrpc')\n",
188
+ "raw_datasets"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": 17,
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "data": {
198
+ "text/plain": [
199
+ "Dataset({\n",
200
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
201
+ " num_rows: 3668\n",
202
+ "})"
203
+ ]
204
+ },
205
+ "execution_count": 17,
206
+ "metadata": {},
207
+ "output_type": "execute_result"
208
+ }
209
+ ],
210
+ "source": [
211
+ "raw_datasets['train']"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 18,
217
+ "metadata": {},
218
+ "outputs": [
219
+ {
220
+ "data": {
221
+ "text/plain": [
222
+ "{'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
223
+ " 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
224
+ " 'label': 1,\n",
225
+ " 'idx': 0}"
226
+ ]
227
+ },
228
+ "execution_count": 18,
229
+ "metadata": {},
230
+ "output_type": "execute_result"
231
+ }
232
+ ],
233
+ "source": [
234
+ "raw_datasets['train'][0]"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 19,
240
+ "metadata": {},
241
+ "outputs": [
242
+ {
243
+ "data": {
244
+ "text/plain": [
245
+ "{'sentence1': Value(dtype='string', id=None),\n",
246
+ " 'sentence2': Value(dtype='string', id=None),\n",
247
+ " 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),\n",
248
+ " 'idx': Value(dtype='int32', id=None)}"
249
+ ]
250
+ },
251
+ "execution_count": 19,
252
+ "metadata": {},
253
+ "output_type": "execute_result"
254
+ }
255
+ ],
256
+ "source": [
257
+ "raw_datasets['train'].features"
258
+ ]
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": 23,
263
+ "metadata": {},
264
+ "outputs": [
265
+ {
266
+ "data": {
267
+ "application/vnd.jupyter.widget-view+json": {
268
+ "model_id": "aace1e2dface4a91891e4f3f293f72a3",
269
+ "version_major": 2,
270
+ "version_minor": 0
271
+ },
272
+ "text/plain": [
273
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
274
+ ]
275
+ },
276
+ "metadata": {},
277
+ "output_type": "display_data"
278
+ },
279
+ {
280
+ "name": "stdout",
281
+ "output_type": "stream",
282
+ "text": [
283
+ "{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
284
+ ]
285
+ }
286
+ ],
287
+ "source": [
288
+ "from transformers import AutoTokenizer\n",
289
+ "\n",
290
+ "checkpoint = 'bert-base-uncased'\n",
291
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
292
+ "\n",
293
+ "def tokenize_function(example):\n",
294
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
295
+ " padding='max_length', truncation=True, max_length=128)\n",
296
+ " \n",
297
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
298
+ "print(tokenized_datasets.column_names)"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 24,
304
+ "metadata": {},
305
+ "outputs": [
306
+ {
307
+ "data": {
308
+ "text/plain": [
309
+ "Dataset({\n",
310
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
311
+ " num_rows: 3668\n",
312
+ "})"
313
+ ]
314
+ },
315
+ "execution_count": 24,
316
+ "metadata": {},
317
+ "output_type": "execute_result"
318
+ }
319
+ ],
320
+ "source": [
321
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
322
+ "tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')\n",
323
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
324
+ "tokenized_datasets['train']"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 26,
330
+ "metadata": {},
331
+ "outputs": [],
332
+ "source": [
333
+ "small_train_dataset = tokenized_datasets['train'].select(range(100))"
334
+ ]
335
+ },
336
+ {
337
+ "cell_type": "code",
338
+ "execution_count": 27,
339
+ "metadata": {},
340
+ "outputs": [
341
+ {
342
+ "data": {
343
+ "text/plain": [
344
+ "DatasetDict({\n",
345
+ " train: Dataset({\n",
346
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
347
+ " num_rows: 3668\n",
348
+ " })\n",
349
+ " validation: Dataset({\n",
350
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
351
+ " num_rows: 408\n",
352
+ " })\n",
353
+ " test: Dataset({\n",
354
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
355
+ " num_rows: 1725\n",
356
+ " })\n",
357
+ "})"
358
+ ]
359
+ },
360
+ "execution_count": 27,
361
+ "metadata": {},
362
+ "output_type": "execute_result"
363
+ }
364
+ ],
365
+ "source": [
366
+ "from datasets import load_dataset\n",
367
+ "\n",
368
+ "raw_datasets = load_dataset('glue','mrpc')\n",
369
+ "raw_datasets"
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "execution_count": 28,
375
+ "metadata": {},
376
+ "outputs": [
377
+ {
378
+ "name": "stdout",
379
+ "output_type": "stream",
380
+ "text": [
381
+ "{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
382
+ ]
383
+ }
384
+ ],
385
+ "source": [
386
+ "from transformers import AutoTokenizer\n",
387
+ "\n",
388
+ "checkpoint = 'bert-base-uncased'\n",
389
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
390
+ "\n",
391
+ "def tokenize_function(example):\n",
392
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
393
+ " padding='max_length', truncation=True, max_length=128)\n",
394
+ "\n",
395
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
396
+ "print(tokenized_datasets.column_names)"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": 29,
402
+ "metadata": {},
403
+ "outputs": [
404
+ {
405
+ "data": {
406
+ "text/plain": [
407
+ "Dataset({\n",
408
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
409
+ " num_rows: 3668\n",
410
+ "})"
411
+ ]
412
+ },
413
+ "execution_count": 29,
414
+ "metadata": {},
415
+ "output_type": "execute_result"
416
+ }
417
+ ],
418
+ "source": [
419
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx', 'sentence1','sentence2'])\n",
420
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
421
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
422
+ "\n",
423
+ "tokenized_datasets['train']"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "execution_count": 30,
429
+ "metadata": {},
430
+ "outputs": [
431
+ {
432
+ "data": {
433
+ "text/plain": [
434
+ "Dataset({\n",
435
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
436
+ " num_rows: 3668\n",
437
+ "})"
438
+ ]
439
+ },
440
+ "execution_count": 30,
441
+ "metadata": {},
442
+ "output_type": "execute_result"
443
+ }
444
+ ],
445
+ "source": [
446
+ "from datasets import load_dataset\n",
447
+ "\n",
448
+ "raw_datasets = load_dataset('glue', 'mrpc')\n",
449
+ "\n",
450
+ "from transformers import AutoTokenizer\n",
451
+ "\n",
452
+ "checkpoint = 'bert-base-uncased'\n",
453
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
454
+ "\n",
455
+ "def tokenize_function(example):\n",
456
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
457
+ " padding='max_length', truncation=True, max_length=128)\n",
458
+ " \n",
459
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
460
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
461
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
462
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
463
+ "\n",
464
+ "tokenized_datasets['train']"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": 31,
470
+ "metadata": {},
471
+ "outputs": [
472
+ {
473
+ "name": "stdout",
474
+ "output_type": "stream",
475
+ "text": [
476
+ "0 torch.Size([16, 128])\n",
477
+ "1 torch.Size([16, 128])\n",
478
+ "2 torch.Size([16, 128])\n",
479
+ "3 torch.Size([16, 128])\n",
480
+ "4 torch.Size([16, 128])\n",
481
+ "5 torch.Size([16, 128])\n",
482
+ "6 torch.Size([16, 128])\n"
483
+ ]
484
+ }
485
+ ],
486
+ "source": [
487
+ "from torch.utils.data import DataLoader\n",
488
+ "\n",
489
+ "train_dataloader = DataLoader(tokenized_datasets['train'], shuffle=True, batch_size=16)\n",
490
+ "\n",
491
+ "for step, batch in enumerate(train_dataloader):\n",
492
+ " print(step, batch['input_ids'].shape)\n",
493
+ " if step>5:\n",
494
+ " break"
495
+ ]
496
+ },
497
+ {
498
+ "cell_type": "markdown",
499
+ "metadata": {},
500
+ "source": [
501
+ "Dynamic Padding !!!!"
502
+ ]
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "execution_count": 32,
507
+ "metadata": {},
508
+ "outputs": [
509
+ {
510
+ "data": {
511
+ "application/vnd.jupyter.widget-view+json": {
512
+ "model_id": "62a66797be6544e9a7c2c1b2844d2c0a",
513
+ "version_major": 2,
514
+ "version_minor": 0
515
+ },
516
+ "text/plain": [
517
+ "Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
518
+ ]
519
+ },
520
+ "metadata": {},
521
+ "output_type": "display_data"
522
+ },
523
+ {
524
+ "data": {
525
+ "application/vnd.jupyter.widget-view+json": {
526
+ "model_id": "a0de8cbd9b4447c8b33d0cf9b87ff400",
527
+ "version_major": 2,
528
+ "version_minor": 0
529
+ },
530
+ "text/plain": [
531
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
532
+ ]
533
+ },
534
+ "metadata": {},
535
+ "output_type": "display_data"
536
+ },
537
+ {
538
+ "data": {
539
+ "application/vnd.jupyter.widget-view+json": {
540
+ "model_id": "eb3ef1f76bce496d9068289dea690165",
541
+ "version_major": 2,
542
+ "version_minor": 0
543
+ },
544
+ "text/plain": [
545
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
546
+ ]
547
+ },
548
+ "metadata": {},
549
+ "output_type": "display_data"
550
+ }
551
+ ],
552
+ "source": [
553
+ "from datasets import load_dataset\n",
554
+ "\n",
555
+ "raw_datasets = load_dataset('glue', 'mrpc')\n",
556
+ "\n",
557
+ "from transformers import AutoTokenizer\n",
558
+ "\n",
559
+ "checkpoint = 'bert-base-uncased'\n",
560
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
561
+ "\n",
562
+ "def tokenize_function(example):\n",
563
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
564
+ " truncation=True)\n",
565
+ "\n",
566
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "code",
571
+ "execution_count": 33,
572
+ "metadata": {},
573
+ "outputs": [],
574
+ "source": [
575
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
576
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
577
+ "tokenized_datasets = tokenized_datasets.with_format('torch')"
578
+ ]
579
+ },
580
+ {
581
+ "cell_type": "code",
582
+ "execution_count": 34,
583
+ "metadata": {},
584
+ "outputs": [
585
+ {
586
+ "name": "stderr",
587
+ "output_type": "stream",
588
+ "text": [
589
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
590
+ ]
591
+ },
592
+ {
593
+ "name": "stdout",
594
+ "output_type": "stream",
595
+ "text": [
596
+ "0 torch.Size([16, 80])\n",
597
+ "1 torch.Size([16, 79])\n",
598
+ "2 torch.Size([16, 74])\n",
599
+ "3 torch.Size([16, 64])\n",
600
+ "4 torch.Size([16, 72])\n",
601
+ "5 torch.Size([16, 78])\n",
602
+ "6 torch.Size([16, 82])\n"
603
+ ]
604
+ }
605
+ ],
606
+ "source": [
607
+ "from torch.utils.data import DataLoader\n",
608
+ "from transformers import DataCollatorWithPadding\n",
609
+ "\n",
610
+ "data_collator = DataCollatorWithPadding(tokenizer)\n",
611
+ "train_dataloader = DataLoader(\n",
612
+ " tokenized_datasets['train'], batch_size=16, shuffle=True, collate_fn=data_collator\n",
613
+ ")\n",
614
+ "\n",
615
+ "for step, batch in enumerate(train_dataloader):\n",
616
+ " print(step, batch['input_ids'].shape)\n",
617
+ " if step>5:\n",
618
+ " break"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "execution_count": 35,
624
+ "metadata": {},
625
+ "outputs": [
626
+ {
627
+ "data": {
628
+ "application/vnd.jupyter.widget-view+json": {
629
+ "model_id": "9a3cbe1e023c427daa2f4d86542449f4",
630
+ "version_major": 2,
631
+ "version_minor": 0
632
+ },
633
+ "text/plain": [
634
+ "Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
635
+ ]
636
+ },
637
+ "metadata": {},
638
+ "output_type": "display_data"
639
+ },
640
+ {
641
+ "data": {
642
+ "application/vnd.jupyter.widget-view+json": {
643
+ "model_id": "80e82d865bdd44f184282d0991a36010",
644
+ "version_major": 2,
645
+ "version_minor": 0
646
+ },
647
+ "text/plain": [
648
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
649
+ ]
650
+ },
651
+ "metadata": {},
652
+ "output_type": "display_data"
653
+ },
654
+ {
655
+ "data": {
656
+ "application/vnd.jupyter.widget-view+json": {
657
+ "model_id": "f7a7d45215804479ba0d135080c04e56",
658
+ "version_major": 2,
659
+ "version_minor": 0
660
+ },
661
+ "text/plain": [
662
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
663
+ ]
664
+ },
665
+ "metadata": {},
666
+ "output_type": "display_data"
667
+ },
668
+ {
669
+ "name": "stdout",
670
+ "output_type": "stream",
671
+ "text": [
672
+ "0 torch.Size([16, 128])\n",
673
+ "1 torch.Size([16, 128])\n",
674
+ "2 torch.Size([16, 128])\n",
675
+ "3 torch.Size([16, 128])\n",
676
+ "4 torch.Size([16, 128])\n",
677
+ "5 torch.Size([16, 128])\n",
678
+ "6 torch.Size([16, 128])\n"
679
+ ]
680
+ }
681
+ ],
682
+ "source": [
683
+ "from datasets import load_dataset\n",
684
+ "\n",
685
+ "raw_dataset = load_dataset('glue','mrpc')\n",
686
+ "\n",
687
+ "from transformers import AutoTokenizer\n",
688
+ "checkpoint = 'bert-base-uncased'\n",
689
+ "\n",
690
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
691
+ "\n",
692
+ "def tokenize_function(example):\n",
693
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
694
+ " padding='max_length', truncation=True,\n",
695
+ " max_length=128)\n",
696
+ "\n",
697
+ "tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)\n",
698
+ "\n",
699
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
700
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
701
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
702
+ "\n",
703
+ "from torch.utils.data import DataLoader\n",
704
+ "\n",
705
+ "train_dataloader = DataLoader(tokenized_datasets['train'],\n",
706
+ " shuffle=True, batch_size=16)\n",
707
+ "\n",
708
+ "for step, batch in enumerate(train_dataloader):\n",
709
+ " print(step, batch['input_ids'].shape)\n",
710
+ " if step>5:\n",
711
+ " break"
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "execution_count": 36,
717
+ "metadata": {},
718
+ "outputs": [
719
+ {
720
+ "data": {
721
+ "application/vnd.jupyter.widget-view+json": {
722
+ "model_id": "59780038e4dc41d68f858b55511780a4",
723
+ "version_major": 2,
724
+ "version_minor": 0
725
+ },
726
+ "text/plain": [
727
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
728
+ ]
729
+ },
730
+ "metadata": {},
731
+ "output_type": "display_data"
732
+ },
733
+ {
734
+ "name": "stderr",
735
+ "output_type": "stream",
736
+ "text": [
737
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
738
+ ]
739
+ },
740
+ {
741
+ "name": "stdout",
742
+ "output_type": "stream",
743
+ "text": [
744
+ "0 torch.Size([16, 72])\n",
745
+ "1 torch.Size([16, 70])\n",
746
+ "2 torch.Size([16, 77])\n",
747
+ "3 torch.Size([16, 76])\n",
748
+ "4 torch.Size([16, 81])\n",
749
+ "5 torch.Size([16, 82])\n",
750
+ "6 torch.Size([16, 81])\n"
751
+ ]
752
+ }
753
+ ],
754
+ "source": [
755
+ "from datasets import load_dataset\n",
756
+ "\n",
757
+ "raw_datasets = load_dataset('glue','mrpc')\n",
758
+ "\n",
759
+ "from transformers import AutoTokenizer\n",
760
+ "checkpoint = 'bert-base-uncased'\n",
761
+ "\n",
762
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
763
+ "\n",
764
+ "def tokenize_function(example):\n",
765
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
766
+ " truncation=True)\n",
767
+ "\n",
768
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
769
+ "\n",
770
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
771
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
772
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
773
+ "\n",
774
+ "from torch.utils.data import DataLoader\n",
775
+ "from transformers import DataCollatorWithPadding\n",
776
+ "\n",
777
+ "data_collator = DataCollatorWithPadding(tokenizer)\n",
778
+ "train_dataloader = DataLoader(\n",
779
+ " tokenized_datasets['train'], batch_size=16, shuffle=True,\n",
780
+ " collate_fn=data_collator\n",
781
+ ")\n",
782
+ "\n",
783
+ "for step,batch in enumerate(train_dataloader):\n",
784
+ " print(step,batch['input_ids'].shape)\n",
785
+ " if step>5:\n",
786
+ " break"
787
+ ]
788
+ },
789
+ {
790
+ "cell_type": "code",
791
+ "execution_count": null,
792
+ "metadata": {},
793
+ "outputs": [],
794
+ "source": []
795
+ }
796
+ ],
797
+ "metadata": {
798
+ "kernelspec": {
799
+ "display_name": "Python 3",
800
+ "language": "python",
801
+ "name": "python3"
802
+ },
803
+ "language_info": {
804
+ "codemirror_mode": {
805
+ "name": "ipython",
806
+ "version": 3
807
+ },
808
+ "file_extension": ".py",
809
+ "mimetype": "text/x-python",
810
+ "name": "python",
811
+ "nbconvert_exporter": "python",
812
+ "pygments_lexer": "ipython3",
813
+ "version": "3.11.6"
814
+ }
815
+ },
816
+ "nbformat": 4,
817
+ "nbformat_minor": 2
818
+ }
18-11-23.ipynb ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
13
+ ]
14
+ },
15
+ {
16
+ "data": {
17
+ "text/plain": [
18
+ "{'input_ids': [101, 1045, 2572, 5191, 1012, 1012, 1045, 4553, 2061, 4030, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}"
19
+ ]
20
+ },
21
+ "execution_count": 1,
22
+ "metadata": {},
23
+ "output_type": "execute_result"
24
+ }
25
+ ],
26
+ "source": [
27
+ "from transformers import AutoTokenizer\n",
28
+ "\n",
29
+ "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
30
+ "tokenized_inputs=tokenizer.tokenize('I am worried.. I learn so slow')\n",
31
+ "ids = tokenizer.convert_tokens_to_ids(tokenized_inputs)\n",
32
+ "input_ids = tokenizer.prepare_for_model(ids)\n",
33
+ "input_ids"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 2,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "name": "stdout",
43
+ "output_type": "stream",
44
+ "text": [
45
+ "{'input_ids': tensor([[ 101, 1045, 2572, 5191, 1045, 4553, 2061, 4030, 102, 0],\n",
46
+ " [ 101, 2023, 2003, 2033, 2028, 1997, 1996, 5409, 2493, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
47
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],\n",
48
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n",
49
+ "torch.Size([2, 10, 768])\n"
50
+ ]
51
+ },
52
+ {
53
+ "name": "stderr",
54
+ "output_type": "stream",
55
+ "text": [
56
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
57
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
58
+ ]
59
+ },
60
+ {
61
+ "name": "stdout",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "tensor([[-0.1298, -0.3846],\n",
65
+ " [ 0.1664, -0.1490]], grad_fn=<AddmmBackward0>)\n",
66
+ "tensor([[0.5633, 0.4367],\n",
67
+ " [0.5782, 0.4218]], grad_fn=<SoftmaxBackward0>)\n"
68
+ ]
69
+ },
70
+ {
71
+ "data": {
72
+ "text/plain": [
73
+ "{0: 'LABEL_0', 1: 'LABEL_1'}"
74
+ ]
75
+ },
76
+ "execution_count": 2,
77
+ "metadata": {},
78
+ "output_type": "execute_result"
79
+ }
80
+ ],
81
+ "source": [
82
+ "from transformers import AutoTokenizer\n",
83
+ "\n",
84
+ "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
85
+ "inputs = tokenizer([\"I am worried I learn so slow\", \n",
86
+ " \"This is me one of the worst students\"],\n",
87
+ " padding=True, truncation=True, return_tensors=\"pt\")\n",
88
+ "print(inputs)\n",
89
+ "\n",
90
+ "from transformers import AutoModel\n",
91
+ "\n",
92
+ "model = AutoModel.from_pretrained(\"bert-base-uncased\")\n",
93
+ "outputs = model(**inputs)\n",
94
+ "print(outputs.last_hidden_state.shape)\n",
95
+ "\n",
96
+ "from transformers import AutoModelForSequenceClassification\n",
97
+ "\n",
98
+ "model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n",
99
+ "outputs = model(**inputs)\n",
100
+ "print(outputs.logits)\n",
101
+ "\n",
102
+ "import torch\n",
103
+ "\n",
104
+ "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
105
+ "print(predictions)\n",
106
+ "\n",
107
+ "model.config.id2label"
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "execution_count": 3,
113
+ "metadata": {},
114
+ "outputs": [
115
+ {
116
+ "name": "stdout",
117
+ "output_type": "stream",
118
+ "text": [
119
+ "BertConfig {\n",
120
+ " \"architectures\": [\n",
121
+ " \"BertForMaskedLM\"\n",
122
+ " ],\n",
123
+ " \"attention_probs_dropout_prob\": 0.1,\n",
124
+ " \"classifier_dropout\": null,\n",
125
+ " \"gradient_checkpointing\": false,\n",
126
+ " \"hidden_act\": \"gelu\",\n",
127
+ " \"hidden_dropout_prob\": 0.1,\n",
128
+ " \"hidden_size\": 768,\n",
129
+ " \"initializer_range\": 0.02,\n",
130
+ " \"intermediate_size\": 3072,\n",
131
+ " \"layer_norm_eps\": 1e-12,\n",
132
+ " \"max_position_embeddings\": 512,\n",
133
+ " \"model_type\": \"bert\",\n",
134
+ " \"num_attention_heads\": 12,\n",
135
+ " \"num_hidden_layers\": 12,\n",
136
+ " \"pad_token_id\": 0,\n",
137
+ " \"position_embedding_type\": \"absolute\",\n",
138
+ " \"transformers_version\": \"4.34.1\",\n",
139
+ " \"type_vocab_size\": 2,\n",
140
+ " \"use_cache\": true,\n",
141
+ " \"vocab_size\": 30522\n",
142
+ "}\n",
143
+ "\n"
144
+ ]
145
+ }
146
+ ],
147
+ "source": [
148
+ "from transformers import AutoConfig\n",
149
+ "from transformers import BertConfig\n",
150
+ "from transformers import BertModel\n",
151
+ "\n",
152
+ "bert_config = BertConfig.from_pretrained(\"bert-base-uncased\")\n",
153
+ "bert_model = BertModel(bert_config)\n",
154
+ "\n",
155
+ "auto_config = AutoConfig.from_pretrained(\"bert-base-uncased\")\n",
156
+ "bert_model_auto_config = BertModel(auto_config)\n",
157
+ "\n",
158
+ "print(bert_config)"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": 4,
164
+ "metadata": {},
165
+ "outputs": [],
166
+ "source": [
167
+ "new_bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers = 9, vocab_size=1000)\n",
168
+ "new_bert_model = BertModel(new_bert_config)\n",
169
+ "\n",
170
+ "save_new_bert_model = new_bert_model.save_pretrained('new_bert_model')\n",
171
+ "\n",
172
+ "load_new_bert_model = BertModel.from_pretrained('new_bert_model')"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 5,
178
+ "metadata": {},
179
+ "outputs": [
180
+ {
181
+ "name": "stdout",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "Dataset({\n",
185
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
186
+ " num_rows: 3668\n",
187
+ "}) {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None), 'idx': Value(dtype='int32', id=None)} {'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .', 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .', 'label': 1, 'idx': 0} {'sentence1': ['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .', \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\", 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .', 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .', 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .'], 'sentence2': ['Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .', \"Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .\", \"On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .\", 'Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .', 'PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .'], 'label': [1, 0, 1, 0, 1], 'idx': [0, 1, 2, 3, 4]}\n"
188
+ ]
189
+ },
190
+ {
191
+ "data": {
192
+ "application/vnd.jupyter.widget-view+json": {
193
+ "model_id": "008ad47191464d26a6edd36e8d00cc06",
194
+ "version_major": 2,
195
+ "version_minor": 0
196
+ },
197
+ "text/plain": [
198
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
199
+ ]
200
+ },
201
+ "metadata": {},
202
+ "output_type": "display_data"
203
+ },
204
+ {
205
+ "data": {
206
+ "text/plain": [
207
+ "DatasetDict({\n",
208
+ " train: Dataset({\n",
209
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
210
+ " num_rows: 3668\n",
211
+ " })\n",
212
+ " validation: Dataset({\n",
213
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
214
+ " num_rows: 408\n",
215
+ " })\n",
216
+ " test: Dataset({\n",
217
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
218
+ " num_rows: 1725\n",
219
+ " })\n",
220
+ "})"
221
+ ]
222
+ },
223
+ "execution_count": 5,
224
+ "metadata": {},
225
+ "output_type": "execute_result"
226
+ }
227
+ ],
228
+ "source": [
229
+ "from datasets import load_dataset\n",
230
+ "\n",
231
+ "raw_dataset = load_dataset(\"glue\", \"mrpc\")\n",
232
+ "print(raw_dataset[\"train\"], raw_dataset[\"train\"].features, raw_dataset[\"train\"][0], raw_dataset[\"train\"][:5])\n",
233
+ "\n",
234
+ "from transformers import AutoTokenizer\n",
235
+ "checkpoint = \"bert-base-uncased\"\n",
236
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
237
+ "\n",
238
+ "def tokenize_function(example):\n",
239
+ " return tokenizer(example[\"sentence1\"], example[\"sentence2\"],\n",
240
+ " padding = True, truncation=True,\n",
241
+ " max_length=128) \n",
242
+ "\n",
243
+ "tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
244
+ "tokenized_dataset"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 6,
250
+ "metadata": {},
251
+ "outputs": [
252
+ {
253
+ "data": {
254
+ "text/plain": [
255
+ "DatasetDict({\n",
256
+ " train: Dataset({\n",
257
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
258
+ " num_rows: 3668\n",
259
+ " })\n",
260
+ " validation: Dataset({\n",
261
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
262
+ " num_rows: 408\n",
263
+ " })\n",
264
+ " test: Dataset({\n",
265
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
266
+ " num_rows: 1725\n",
267
+ " })\n",
268
+ "})"
269
+ ]
270
+ },
271
+ "execution_count": 6,
272
+ "metadata": {},
273
+ "output_type": "execute_result"
274
+ }
275
+ ],
276
+ "source": [
277
+ "tokenized_dataset = tokenized_dataset.remove_columns(['idx','sentence1','sentence2'])\n",
278
+ "tokenized_dataset = tokenized_dataset.rename_column('label','labels')\n",
279
+ "tokenized_dataset = tokenized_dataset.with_format('torch')\n",
280
+ "\n",
281
+ "tokenized_dataset"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 7,
287
+ "metadata": {},
288
+ "outputs": [
289
+ {
290
+ "data": {
291
+ "text/plain": [
292
+ "Dataset({\n",
293
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
294
+ " num_rows: 3668\n",
295
+ "})"
296
+ ]
297
+ },
298
+ "execution_count": 7,
299
+ "metadata": {},
300
+ "output_type": "execute_result"
301
+ }
302
+ ],
303
+ "source": [
304
+ "tokenized_dataset['train']"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": 18,
310
+ "metadata": {},
311
+ "outputs": [
312
+ {
313
+ "data": {
314
+ "application/vnd.jupyter.widget-view+json": {
315
+ "model_id": "14633ab0e5204abf9593e1b5a8b57c2d",
316
+ "version_major": 2,
317
+ "version_minor": 0
318
+ },
319
+ "text/plain": [
320
+ "Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
321
+ ]
322
+ },
323
+ "metadata": {},
324
+ "output_type": "display_data"
325
+ },
326
+ {
327
+ "data": {
328
+ "application/vnd.jupyter.widget-view+json": {
329
+ "model_id": "8e10fd28b7ad4805b3f3b55d0b192027",
330
+ "version_major": 2,
331
+ "version_minor": 0
332
+ },
333
+ "text/plain": [
334
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
335
+ ]
336
+ },
337
+ "metadata": {},
338
+ "output_type": "display_data"
339
+ },
340
+ {
341
+ "data": {
342
+ "application/vnd.jupyter.widget-view+json": {
343
+ "model_id": "9aea16a997e64169acfde01a91195a79",
344
+ "version_major": 2,
345
+ "version_minor": 0
346
+ },
347
+ "text/plain": [
348
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
349
+ ]
350
+ },
351
+ "metadata": {},
352
+ "output_type": "display_data"
353
+ },
354
+ {
355
+ "name": "stdout",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "torch.Size([16, 128])\n",
359
+ "torch.Size([16, 128])\n",
360
+ "torch.Size([16, 128])\n",
361
+ "torch.Size([16, 128])\n",
362
+ "torch.Size([16, 128])\n",
363
+ "torch.Size([16, 128])\n",
364
+ "torch.Size([16, 128])\n"
365
+ ]
366
+ }
367
+ ],
368
+ "source": [
369
+ "from datasets import load_dataset\n",
370
+ "from transformers import AutoTokenizer\n",
371
+ "\n",
372
+ "raw_datasets = load_dataset(\"glue\", \"mrpc\")\n",
373
+ "checkpoint = 'bert-base-uncased'\n",
374
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
375
+ "\n",
376
+ "def tokenize_function(example):\n",
377
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
378
+ " padding=\"max_length\",truncation=True,\n",
379
+ " max_length=128)\n",
380
+ " \n",
381
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True) \n",
382
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
383
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
384
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
385
+ "\n",
386
+ "from torch.utils.data import DataLoader\n",
387
+ "\n",
388
+ "train_dataloader = DataLoader(tokenized_datasets['train'], \n",
389
+ " batch_size=16, shuffle=True)\n",
390
+ "\n",
391
+ "for step, batch in enumerate(train_dataloader):\n",
392
+ " print(batch['input_ids'].shape)\n",
393
+ " if step>5:\n",
394
+ " break"
395
+ ]
396
+ },
397
+ {
398
+ "cell_type": "code",
399
+ "execution_count": 12,
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": [
403
+ "from datasets import load_dataset\n",
404
+ "\n",
405
+ "raw_datasets = load_dataset('glue', 'mrpc')\n",
406
+ "from transformers import AutoTokenizer\n",
407
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
408
+ "\n",
409
+ "def tokenize_function(example):\n",
410
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
411
+ " truncation=True)\n",
412
+ "\n",
413
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
414
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
415
+ "tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
416
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n"
417
+ ]
418
+ },
419
+ {
420
+ "cell_type": "code",
421
+ "execution_count": 13,
422
+ "metadata": {},
423
+ "outputs": [
424
+ {
425
+ "data": {
426
+ "text/plain": [
427
+ "DatasetDict({\n",
428
+ " train: Dataset({\n",
429
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
430
+ " num_rows: 3668\n",
431
+ " })\n",
432
+ " validation: Dataset({\n",
433
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
434
+ " num_rows: 408\n",
435
+ " })\n",
436
+ " test: Dataset({\n",
437
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
438
+ " num_rows: 1725\n",
439
+ " })\n",
440
+ "})"
441
+ ]
442
+ },
443
+ "execution_count": 13,
444
+ "metadata": {},
445
+ "output_type": "execute_result"
446
+ }
447
+ ],
448
+ "source": [
449
+ "tokenized_datasets"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": 14,
455
+ "metadata": {},
456
+ "outputs": [
457
+ {
458
+ "name": "stderr",
459
+ "output_type": "stream",
460
+ "text": [
461
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
462
+ ]
463
+ },
464
+ {
465
+ "name": "stdout",
466
+ "output_type": "stream",
467
+ "text": [
468
+ "torch.Size([16, 73])\n",
469
+ "torch.Size([16, 75])\n",
470
+ "torch.Size([16, 85])\n",
471
+ "torch.Size([16, 81])\n",
472
+ "torch.Size([16, 77])\n",
473
+ "torch.Size([16, 83])\n",
474
+ "torch.Size([16, 79])\n"
475
+ ]
476
+ }
477
+ ],
478
+ "source": [
479
+ "from torch.utils.data import DataLoader\n",
480
+ "from transformers import DataCollatorWithPadding\n",
481
+ "\n",
482
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
483
+ "train_dataloader = DataLoader(tokenized_datasets['train'],\n",
484
+ " batch_size=16, shuffle=True,\n",
485
+ " collate_fn=data_collator)\n",
486
+ "\n",
487
+ "for step, batch in enumerate(train_dataloader):\n",
488
+ " print(batch['input_ids'].shape)\n",
489
+ " if step>5:\n",
490
+ " break"
491
+ ]
492
+ },
493
+ {
494
+ "cell_type": "code",
495
+ "execution_count": 20,
496
+ "metadata": {},
497
+ "outputs": [],
498
+ "source": [
499
+ "from datasets import load_dataset\n",
500
+ "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
501
+ "\n",
502
+ "checkpoint = 'bert-base-uncased'\n",
503
+ "raw_dataset = load_dataset('glue', 'mrpc')\n",
504
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
505
+ "\n",
506
+ "def tokenize_function(example):\n",
507
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
508
+ " truncation=True)\n",
509
+ "\n",
510
+ "tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
511
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
512
+ "\n",
513
+ "from transformers import AutoModelForSequenceClassification\n",
514
+ "\n",
515
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
516
+ "\n",
517
+ "# Specify training arguments hyperparameters:\n",
518
+ "from transformers import TrainingArguments\n",
519
+ "training_args = TrainingArguments(\"test-trainer\",\n",
520
+ " per_device_train_batch_size=16,\n",
521
+ " per_device_eval_batch_size=16,\n",
522
+ " num_train_epochs=5,\n",
523
+ " learning_rate=2e-5,\n",
524
+ " weight_decay=0.01)\n",
525
+ "\n",
526
+ "# Create the Trainer instance:\n",
527
+ "from transformers import Trainer\n",
528
+ "trainer = Trainer(\n",
529
+ " model=model,\n",
530
+ " args=training_args,\n",
531
+ " train_dataset=tokenized_dataset['train'],\n",
532
+ " eval_dataset=tokenized_dataset['validation'],\n",
533
+ " data_collator=data_collator,\n",
534
+ " tokenizer=tokenizer\n",
535
+ ")\n",
536
+ "trainer.train()\n",
537
+ "\n",
538
+ "'''The results will however be anticlimatic because \n",
539
+ "you will only get a training loss that doesn't tell you how well the\n",
540
+ "model is actually doing..\n",
541
+ "To fix this, you need to specify the evaluation metric'''\n",
542
+ "\n",
543
+ "predictions = trainer.predict(tokenized_dataset['validation'])\n",
544
+ "print(predictions)\n",
545
+ "print(predictions.predictions.shape, predictions.label_ids.shape)\n",
546
+ "\n",
547
+ "# it returns a named tuple with 3 elements: predictions, label_ids, metrics\n",
548
+ "# the predictions are the logits of the model with all the sentences of the dataset\n",
549
+ "# so a numpy array of shape(488 x 2)\n",
550
+ "\n",
551
+ "# to match them with our labels we need to take the maximum logits for each prediction\n",
552
+ "# to know which is the maximum, use the argmax function\n",
553
+ "import numpy as np\n",
554
+ "from datasets import load_metric\n",
555
+ "\n",
556
+ "metric = load_metric('glue', 'mrpc')\n",
557
+ "preds = np.argmax(predictions.predictions, axis=-1)\n",
558
+ "metric.compute(predictions=preds, references=predictions.label_ids)\n",
559
+ "\n",
560
+ "'''We can see that our model did learn something!'''"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": null,
566
+ "metadata": {},
567
+ "outputs": [],
568
+ "source": [
569
+ "'''To monitor the metrics during training, we need to define a\n",
570
+ "compute metric function as we did above\n",
571
+ "and pass it to the Trainer\n",
572
+ "'''\n",
573
+ "metric = load_metric('glue','mrpc')\n",
574
+ "def compute_metrics(eval_preds):\n",
575
+ " logits, labels = eval_preds\n",
576
+ " predictions = np.argmax(logits, axis=-1)\n",
577
+ " return metric.compute(predictions=predictions, references=labels)\n",
578
+ "\n",
579
+ "training_args = TrainingArguments(\"test-trainer\",\n",
580
+ " evaluation_strategy='epoch')\n",
581
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
582
+ "\n",
583
+ "trainer = Trainer(\n",
584
+ " model=model,\n",
585
+ " args=training_args,\n",
586
+ " train_dataset=tokenized_dataset['train'],\n",
587
+ " eval_dataset=tokenized_dataset['validation'],\n",
588
+ " data_collator=data_collator,\n",
589
+ " tokenizer=tokenizer,\n",
590
+ " compute_metrics=compute_metrics\n",
591
+ ")\n",
592
+ "\n",
593
+ "trainer.train()\n"
594
+ ]
595
+ }
596
+ ],
597
+ "metadata": {
598
+ "kernelspec": {
599
+ "display_name": "Python 3",
600
+ "language": "python",
601
+ "name": "python3"
602
+ },
603
+ "language_info": {
604
+ "codemirror_mode": {
605
+ "name": "ipython",
606
+ "version": 3
607
+ },
608
+ "file_extension": ".py",
609
+ "mimetype": "text/x-python",
610
+ "name": "python",
611
+ "nbconvert_exporter": "python",
612
+ "pygments_lexer": "ipython3",
613
+ "version": "3.11.6"
614
+ }
615
+ },
616
+ "nbformat": 4,
617
+ "nbformat_minor": 2
618
+ }
19-11-23.ipynb ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": []
9
+ }
10
+ ],
11
+ "metadata": {
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "nbformat": 4,
17
+ "nbformat_minor": 2
18
+ }
Accelerate.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Batch-Inputs-Together.ipynb ADDED
@@ -0,0 +1,623 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607,\n",
13
+ " 2026, 2878, 2166, 1012, 102],\n",
14
+ " [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n",
15
+ " 0, 0, 0, 0, 0],\n",
16
+ " [ 101, 1045, 2293, 2023, 2061, 2172, 999, 102, 0, 0,\n",
17
+ " 0, 0, 0, 0, 0],\n",
18
+ " [ 101, 1045, 2572, 9364, 1998, 2026, 2154, 2003, 9868, 102,\n",
19
+ " 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
20
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],\n",
21
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],\n",
22
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])}\n"
23
+ ]
24
+ }
25
+ ],
26
+ "source": [
27
+ "from transformers import AutoTokenizer\n",
28
+ "\n",
29
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
30
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
31
+ "\n",
32
+ "sentences = [\n",
33
+ " 'I have been waiting for a hugging face course my whole life.',\n",
34
+ " 'I hate this so much!',\n",
35
+ " 'I love this so much!',\n",
36
+ " 'I am disappointed and my day is ruined'\n",
37
+ "]\n",
38
+ "\n",
39
+ "tokens = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
40
+ "print(tokens)"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 1,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "name": "stdout",
50
+ "output_type": "stream",
51
+ "text": [
52
+ "[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 2572, 2200, 9364, 1999, 2017, 1012], [1045, 5223, 2023, 2061, 2172, 999], [1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012]]\n",
53
+ "[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
54
+ "[1045, 2572, 2200, 9364, 1999, 2017, 1012]\n",
55
+ "[1045, 5223, 2023, 2061, 2172, 999]\n",
56
+ "[1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012]\n"
57
+ ]
58
+ }
59
+ ],
60
+ "source": [
61
+ "from transformers import AutoTokenizer\n",
62
+ "\n",
63
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
64
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
65
+ "\n",
66
+ "sentences = [\n",
67
+ " 'I have been waiting for a hugging face course my whole life.',\n",
68
+ " 'I am very disappointed in you.',\n",
69
+ " 'I hate this so much!',\n",
70
+ " 'I am terrified as I am never confident about my skills.'\n",
71
+ "]\n",
72
+ "\n",
73
+ "tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
74
+ "ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
75
+ "\n",
76
+ "print(ids)\n",
77
+ "print(ids[0])\n",
78
+ "print(ids[1])\n",
79
+ "print(ids[2])\n",
80
+ "print(ids[3])"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 2,
86
+ "metadata": {},
87
+ "outputs": [
88
+ {
89
+ "name": "stderr",
90
+ "output_type": "stream",
91
+ "text": [
92
+ "You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
93
+ ]
94
+ }
95
+ ],
96
+ "source": [
97
+ "final_inputs = tokenizer.prepare_for_model(ids)"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 3,
103
+ "metadata": {},
104
+ "outputs": [
105
+ {
106
+ "name": "stdout",
107
+ "output_type": "stream",
108
+ "text": [
109
+ "{'input_ids': [101, [1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 2572, 2200, 9364, 1999, 2017, 1012], [1045, 5223, 2023, 2061, 2172, 999], [1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012], 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}\n"
110
+ ]
111
+ }
112
+ ],
113
+ "source": [
114
+ "print(final_inputs)"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 4,
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "ename": "ValueError",
124
+ "evalue": "expected sequence of length 13 at dim 1 (got 7)",
125
+ "output_type": "error",
126
+ "traceback": [
127
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
128
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
129
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Batch-Inputs-Together.ipynb Cell 5\u001b[0m line \u001b[0;36m6\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m ids \u001b[39m=\u001b[39m ids\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m input_ids \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39;49mtensor(ids)\n",
130
+ "\u001b[1;31mValueError\u001b[0m: expected sequence of length 13 at dim 1 (got 7)"
131
+ ]
132
+ }
133
+ ],
134
+ "source": [
135
+ "'''trying to create a tensor or numpy array from the list of inputs\n",
136
+ "will result in an error. This is because the list of inputs is not \n",
137
+ "rectangular i.e they are not of equal dimensions'''\n",
138
+ "import torch\n",
139
+ "ids = ids\n",
140
+ "input_ids = torch.tensor(ids)\n"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": 5,
146
+ "metadata": {},
147
+ "outputs": [
148
+ {
149
+ "data": {
150
+ "text/plain": [
151
+ "'Therefore, you have to pad, here we pad manually. But be sure to\\ncheck out dynamic padding which is almost always better on the CPU and\\nthe GPU!'"
152
+ ]
153
+ },
154
+ "execution_count": 5,
155
+ "metadata": {},
156
+ "output_type": "execute_result"
157
+ }
158
+ ],
159
+ "source": [
160
+ "'''Therefore, you have to pad, here we pad manually. But be sure to\n",
161
+ "check out dynamic padding which is almost always better on the CPU and\n",
162
+ "the GPU!'''\n"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 6,
168
+ "metadata": {},
169
+ "outputs": [
170
+ {
171
+ "name": "stdout",
172
+ "output_type": "stream",
173
+ "text": [
174
+ "BertTokenizerFast(name_or_path='bert-base-cased', vocab_size=28996, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n",
175
+ "\t0: AddedToken(\"[PAD]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
176
+ "\t100: AddedToken(\"[UNK]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
177
+ "\t101: AddedToken(\"[CLS]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
178
+ "\t102: AddedToken(\"[SEP]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
179
+ "\t103: AddedToken(\"[MASK]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
180
+ "}\n"
181
+ ]
182
+ },
183
+ {
184
+ "data": {
185
+ "text/plain": [
186
+ "0"
187
+ ]
188
+ },
189
+ "execution_count": 6,
190
+ "metadata": {},
191
+ "output_type": "execute_result"
192
+ }
193
+ ],
194
+ "source": [
195
+ "'''The value you use to pad the sentences should not be picked\n",
196
+ "randomly. Use tokenizer.pad_token_id to get the value of the pad token'''\n",
197
+ "\n",
198
+ "from transformers import AutoTokenizer\n",
199
+ "\n",
200
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')\n",
201
+ "print(tokenizer)\n",
202
+ "tokenizer.pad_token_id"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 13,
208
+ "metadata": {},
209
+ "outputs": [
210
+ {
211
+ "name": "stderr",
212
+ "output_type": "stream",
213
+ "text": [
214
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
215
+ ]
216
+ },
217
+ {
218
+ "name": "stdout",
219
+ "output_type": "stream",
220
+ "text": [
221
+ "{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
222
+ "convert this to tokens.\n"
223
+ ]
224
+ }
225
+ ],
226
+ "source": [
227
+ "from transformers import AutoTokenizer\n",
228
+ "\n",
229
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
230
+ "tokens = tokenizer.tokenize('Convert this to tokens.')\n",
231
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
232
+ "inputs = tokenizer.prepare_for_model(input_ids)\n",
233
+ "print(inputs)\n",
234
+ "\n",
235
+ "# decode:\n",
236
+ "decode = tokenizer.decode(input_ids)\n",
237
+ "print(decode)"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 11,
243
+ "metadata": {},
244
+ "outputs": [
245
+ {
246
+ "name": "stdout",
247
+ "output_type": "stream",
248
+ "text": [
249
+ "{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
250
+ "[CLS] convert this to tokens. [SEP]\n"
251
+ ]
252
+ }
253
+ ],
254
+ "source": [
255
+ "from transformers import AutoTokenizer\n",
256
+ "\n",
257
+ "tokenizer =AutoTokenizer.from_pretrained('bert-base-uncased')\n",
258
+ "inputs = tokenizer('Convert this to tokens.')\n",
259
+ "print(inputs)\n",
260
+ "decode = tokenizer.decode(inputs['input_ids'])\n",
261
+ "print(decode)"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "code",
266
+ "execution_count": 4,
267
+ "metadata": {},
268
+ "outputs": [
269
+ {
270
+ "name": "stderr",
271
+ "output_type": "stream",
272
+ "text": [
273
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
274
+ ]
275
+ },
276
+ {
277
+ "name": "stdout",
278
+ "output_type": "stream",
279
+ "text": [
280
+ "{'input_ids': [101, [[2023, 2003, 1037, 2862, 1997, 11746], [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012], [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012], [1045, 2572, 5458, 1012]], 102], 'token_type_ids': [0, 0, 0], 'attention_mask': [1, 1, 1]}\n"
281
+ ]
282
+ }
283
+ ],
284
+ "source": [
285
+ "'''Enough revision, now pad them:'''\n",
286
+ "from transformers import AutoTokenizer\n",
287
+ "\n",
288
+ "sentences = ['This is a list of sentences',\n",
289
+ " 'I will try my best to keep it short.',\n",
290
+ " 'It is hard to learn like this.',\n",
291
+ " 'I am tired.']\n",
292
+ "\n",
293
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
294
+ "tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
295
+ "input_ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
296
+ "inputs = tokenizer.prepare_for_model([input_ids])\n",
297
+ "print(inputs)\n",
298
+ "\n"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 7,
304
+ "metadata": {},
305
+ "outputs": [
306
+ {
307
+ "name": "stdout",
308
+ "output_type": "stream",
309
+ "text": [
310
+ "101\n",
311
+ "[[2023, 2003, 1037, 2862, 1997, 11746], [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012], [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012], [1045, 2572, 5458, 1012]]\n",
312
+ "102\n"
313
+ ]
314
+ }
315
+ ],
316
+ "source": [
317
+ "for i in range(len(inputs)): print(inputs['input_ids'][i])"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 11,
323
+ "metadata": {},
324
+ "outputs": [
325
+ {
326
+ "data": {
327
+ "text/plain": [
328
+ "[[2023, 2003, 1037, 2862, 1997, 11746],\n",
329
+ " [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012],\n",
330
+ " [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012],\n",
331
+ " [1045, 2572, 5458, 1012]]"
332
+ ]
333
+ },
334
+ "execution_count": 11,
335
+ "metadata": {},
336
+ "output_type": "execute_result"
337
+ }
338
+ ],
339
+ "source": [
340
+ "inputs['input_ids'][1]"
341
+ ]
342
+ },
343
+ {
344
+ "cell_type": "code",
345
+ "execution_count": 12,
346
+ "metadata": {},
347
+ "outputs": [
348
+ {
349
+ "data": {
350
+ "text/plain": [
351
+ "[2023, 2003, 1037, 2862, 1997, 11746]"
352
+ ]
353
+ },
354
+ "execution_count": 12,
355
+ "metadata": {},
356
+ "output_type": "execute_result"
357
+ }
358
+ ],
359
+ "source": [
360
+ "inputs['input_ids'][1][0]"
361
+ ]
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "execution_count": 13,
366
+ "metadata": {},
367
+ "outputs": [
368
+ {
369
+ "data": {
370
+ "text/plain": [
371
+ "[1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012]"
372
+ ]
373
+ },
374
+ "execution_count": 13,
375
+ "metadata": {},
376
+ "output_type": "execute_result"
377
+ }
378
+ ],
379
+ "source": [
380
+ "inputs['input_ids'][1][1]"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "execution_count": 14,
386
+ "metadata": {},
387
+ "outputs": [
388
+ {
389
+ "data": {
390
+ "text/plain": [
391
+ "[2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012]"
392
+ ]
393
+ },
394
+ "execution_count": 14,
395
+ "metadata": {},
396
+ "output_type": "execute_result"
397
+ }
398
+ ],
399
+ "source": [
400
+ "inputs['input_ids'][1][2]"
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "code",
405
+ "execution_count": 15,
406
+ "metadata": {},
407
+ "outputs": [
408
+ {
409
+ "data": {
410
+ "text/plain": [
411
+ "[1045, 2572, 5458, 1012]"
412
+ ]
413
+ },
414
+ "execution_count": 15,
415
+ "metadata": {},
416
+ "output_type": "execute_result"
417
+ }
418
+ ],
419
+ "source": [
420
+ "inputs['input_ids'][1][3]"
421
+ ]
422
+ },
423
+ {
424
+ "cell_type": "code",
425
+ "execution_count": 20,
426
+ "metadata": {},
427
+ "outputs": [],
428
+ "source": [
429
+ "'''Now we won't receive error.'''\n",
430
+ "import torch\n",
431
+ "padded_input_ids = [[1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012],\n",
432
+ " [2023, 2003, 1037, 2862, 1997, 11746, 0, 0, 0, 0],\n",
433
+ " [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012, 0, 0],\n",
434
+ " [1045, 2572, 5458, 1012, 0, 0, 0, 0, 0, 0]]\n",
435
+ "\n",
436
+ "input_ids = torch.tensor(padded_input_ids) "
437
+ ]
438
+ },
439
+ {
440
+ "cell_type": "code",
441
+ "execution_count": 27,
442
+ "metadata": {},
443
+ "outputs": [
444
+ {
445
+ "name": "stderr",
446
+ "output_type": "stream",
447
+ "text": [
448
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
449
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
450
+ "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
451
+ ]
452
+ },
453
+ {
454
+ "name": "stdout",
455
+ "output_type": "stream",
456
+ "text": [
457
+ "tensor([[0.0690, 0.7675]], grad_fn=<AddmmBackward0>)\n",
458
+ "tensor([[-0.2026, 0.1231]], grad_fn=<AddmmBackward0>)\n",
459
+ "tensor([[0.0924, 0.7572]], grad_fn=<AddmmBackward0>)\n",
460
+ "tensor([[0.2478, 0.6774]], grad_fn=<AddmmBackward0>)\n",
461
+ "tensor([[ 0.0690, 0.7675],\n",
462
+ " [-0.2026, 0.1231],\n",
463
+ " [ 0.0924, 0.7572],\n",
464
+ " [ 0.2478, 0.6774]], grad_fn=<AddmmBackward0>)\n"
465
+ ]
466
+ }
467
+ ],
468
+ "source": [
469
+ "from transformers import AutoModelForSequenceClassification\n",
470
+ "\n",
471
+ "ids1 = torch.tensor([padded_input_ids[0]])\n",
472
+ "ids2 = torch.tensor([padded_input_ids[1]])\n",
473
+ "ids3 = torch.tensor([padded_input_ids[2]])\n",
474
+ "ids4 = torch.tensor([padded_input_ids[3]])\n",
475
+ "\n",
476
+ "all_ids = torch.tensor([padded_input_ids[0], \n",
477
+ " padded_input_ids[1], \n",
478
+ " padded_input_ids[2],\n",
479
+ " padded_input_ids[3]])\n",
480
+ "# # same effect:\n",
481
+ "# all_ids =torch.tensor(padded_input_ids)\n",
482
+ "\n",
483
+ "model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
484
+ "print(model(ids1).logits)\n",
485
+ "print(model(ids2).logits)\n",
486
+ "print(model(ids3).logits)\n",
487
+ "print(model(ids4).logits)\n",
488
+ "print(model(all_ids).logits)"
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "code",
493
+ "execution_count": 28,
494
+ "metadata": {},
495
+ "outputs": [],
496
+ "source": [
497
+ "all_ids = torch.tensor(padded_input_ids)\n",
498
+ "attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
499
+ " [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],\n",
500
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],\n",
501
+ " [1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])"
502
+ ]
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "execution_count": 30,
507
+ "metadata": {},
508
+ "outputs": [
509
+ {
510
+ "name": "stderr",
511
+ "output_type": "stream",
512
+ "text": [
513
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
514
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
515
+ ]
516
+ },
517
+ {
518
+ "name": "stdout",
519
+ "output_type": "stream",
520
+ "text": [
521
+ "tensor([[ 0.1353, -0.0232]], grad_fn=<AddmmBackward0>)\n",
522
+ "tensor([[ 0.1116, -0.2974]], grad_fn=<AddmmBackward0>)\n",
523
+ "tensor([[ 0.1224, -0.1755]], grad_fn=<AddmmBackward0>)\n",
524
+ "tensor([[ 0.0059, -0.2736]], grad_fn=<AddmmBackward0>)\n",
525
+ "tensor([[ 0.1353, -0.0232],\n",
526
+ " [ 0.1041, -0.0745],\n",
527
+ " [ 0.1715, -0.1862],\n",
528
+ " [ 0.0225, -0.1921]], grad_fn=<AddmmBackward0>)\n"
529
+ ]
530
+ }
531
+ ],
532
+ "source": [
533
+ "model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
534
+ "output1 = model(ids1)\n",
535
+ "output2 = model(ids2)\n",
536
+ "output3 = model(ids3)\n",
537
+ "output4 = model(ids4)\n",
538
+ "print(output1.logits)\n",
539
+ "print(output2.logits)\n",
540
+ "print(output3.logits)\n",
541
+ "print(output4.logits)\n",
542
+ "\n",
543
+ "output = model(all_ids, attention_mask=attention_mask)\n",
544
+ "print(output.logits)"
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "execution_count": 31,
550
+ "metadata": {},
551
+ "outputs": [],
552
+ "source": [
553
+ "# In short, this is how you batch input together:\n",
554
+ "\n",
555
+ "from transformers import AutoTokenizer\n",
556
+ "\n",
557
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
558
+ "raw_inputs = ['I am so slow', 'I wish I had more time in a day',\n",
559
+ " 'We all have equal time per day so we need to make the best use of it',\n",
560
+ " 'This is a very long sentence that will not fit in the model.. of will it?']\n",
561
+ "batch = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n"
562
+ ]
563
+ },
564
+ {
565
+ "cell_type": "code",
566
+ "execution_count": 32,
567
+ "metadata": {},
568
+ "outputs": [
569
+ {
570
+ "name": "stdout",
571
+ "output_type": "stream",
572
+ "text": [
573
+ "{'input_ids': tensor([[ 101, 1045, 2572, 2061, 4030, 102, 0, 0, 0, 0, 0, 0,\n",
574
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
575
+ " [ 101, 1045, 4299, 1045, 2018, 2062, 2051, 1999, 1037, 2154, 102, 0,\n",
576
+ " 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
577
+ " [ 101, 2057, 2035, 2031, 5020, 2051, 2566, 2154, 2061, 2057, 2342, 2000,\n",
578
+ " 2191, 1996, 2190, 2224, 1997, 2009, 102, 0, 0],\n",
579
+ " [ 101, 2023, 2003, 1037, 2200, 2146, 6251, 2008, 2097, 2025, 4906, 1999,\n",
580
+ " 1996, 2944, 1012, 1012, 1997, 2097, 2009, 1029, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
581
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
582
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
583
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
584
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
585
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],\n",
586
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n"
587
+ ]
588
+ }
589
+ ],
590
+ "source": [
591
+ "print(batch)"
592
+ ]
593
+ },
594
+ {
595
+ "cell_type": "code",
596
+ "execution_count": null,
597
+ "metadata": {},
598
+ "outputs": [],
599
+ "source": []
600
+ }
601
+ ],
602
+ "metadata": {
603
+ "kernelspec": {
604
+ "display_name": "Python 3",
605
+ "language": "python",
606
+ "name": "python3"
607
+ },
608
+ "language_info": {
609
+ "codemirror_mode": {
610
+ "name": "ipython",
611
+ "version": 3
612
+ },
613
+ "file_extension": ".py",
614
+ "mimetype": "text/x-python",
615
+ "name": "python",
616
+ "nbconvert_exporter": "python",
617
+ "pygments_lexer": "ipython3",
618
+ "version": "3.11.6"
619
+ }
620
+ },
621
+ "nbformat": 4,
622
+ "nbformat_minor": 2
623
+ }
DynamicPadding.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
PytorchTrainingLoop.ipynb ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "application/vnd.jupyter.widget-view+json": {
11
+ "model_id": "01f95b9bb5e44ed9b5a1fc5deb9fa736",
12
+ "version_major": 2,
13
+ "version_minor": 0
14
+ },
15
+ "text/plain": [
16
+ "Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
17
+ ]
18
+ },
19
+ "metadata": {},
20
+ "output_type": "display_data"
21
+ },
22
+ {
23
+ "data": {
24
+ "application/vnd.jupyter.widget-view+json": {
25
+ "model_id": "a0cf3c495d53445cab1df0269126c7bc",
26
+ "version_major": 2,
27
+ "version_minor": 0
28
+ },
29
+ "text/plain": [
30
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
31
+ ]
32
+ },
33
+ "metadata": {},
34
+ "output_type": "display_data"
35
+ },
36
+ {
37
+ "data": {
38
+ "application/vnd.jupyter.widget-view+json": {
39
+ "model_id": "3470da5867ee4478a66fb7daac246f60",
40
+ "version_major": 2,
41
+ "version_minor": 0
42
+ },
43
+ "text/plain": [
44
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
45
+ ]
46
+ },
47
+ "metadata": {},
48
+ "output_type": "display_data"
49
+ }
50
+ ],
51
+ "source": [
52
+ "from datasets import load_dataset\n",
53
+ "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
54
+ "\n",
55
+ "raw_datasets = load_dataset('glue', 'mrpc')\n",
56
+ "checkpoint = 'bert-base-cased'\n",
57
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
58
+ "\n",
59
+ "def tokenize_function(examples):\n",
60
+ " return tokenizer(examples['sentence1'], examples['sentence2'],\n",
61
+ " truncation=True)\n",
62
+ "\n",
63
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
64
+ "tokenized_datasets = tokenized_datasets.remove_columns(['idx', 'sentence1', 'sentence2'])\n",
65
+ "tokenized_datasets = tokenized_datasets.rename_column('label',\"labels\")\n",
66
+ "tokenized_datasets = tokenized_datasets.with_format('torch')\n",
67
+ "\n",
68
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 2,
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "from torch.utils.data import DataLoader\n",
78
+ "\n",
79
+ "train_dataloader = DataLoader(\n",
80
+ " tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n",
81
+ ")\n",
82
+ "eval_dataloader = DataLoader(\n",
83
+ " tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n",
84
+ ")"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 3,
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "name": "stderr",
94
+ "output_type": "stream",
95
+ "text": [
96
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
97
+ ]
98
+ },
99
+ {
100
+ "name": "stdout",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "{'labels': torch.Size([8]), 'input_ids': torch.Size([8, 60]), 'token_type_ids': torch.Size([8, 60]), 'attention_mask': torch.Size([8, 60])}\n"
104
+ ]
105
+ }
106
+ ],
107
+ "source": [
108
+ "'''Grab a batch of data and inspect it, check to see if everything\n",
109
+ "works as intended'''\n",
110
+ "\n",
111
+ "for batch in train_dataloader:\n",
112
+ " break\n",
113
+ "print({k: v.shape for k, v in batch.items()})"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 4,
119
+ "metadata": {},
120
+ "outputs": [
121
+ {
122
+ "name": "stderr",
123
+ "output_type": "stream",
124
+ "text": [
125
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
126
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
127
+ ]
128
+ }
129
+ ],
130
+ "source": [
131
+ "'''Send the training data to the model'''\n",
132
+ "from transformers import AutoModelForSequenceClassification\n",
133
+ "\n",
134
+ "checkpoint = 'bert-base-cased'\n",
135
+ "# Adjust number of labels to the number of classes, in this case 2\n",
136
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 5,
142
+ "metadata": {},
143
+ "outputs": [
144
+ {
145
+ "name": "stdout",
146
+ "output_type": "stream",
147
+ "text": [
148
+ "tensor(0.7639, grad_fn=<NllLossBackward0>) torch.Size([8, 2])\n"
149
+ ]
150
+ }
151
+ ],
152
+ "source": [
153
+ "'''To be sure everything is going well, check outputs'''\n",
154
+ "outputs = model(**batch)\n",
155
+ "print(outputs.loss, outputs.logits.shape)\n",
156
+ "# Note: The transformers model automatically calculates the loss"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 6,
162
+ "metadata": {},
163
+ "outputs": [
164
+ {
165
+ "name": "stderr",
166
+ "output_type": "stream",
167
+ "text": [
168
+ "C:\\Users\\HP\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
169
+ " warnings.warn(\n"
170
+ ]
171
+ }
172
+ ],
173
+ "source": [
174
+ "'''So, just initialize the optimizer and the training loop'''\n",
175
+ "# optimizer:\n",
176
+ "from transformers import AdamW\n",
177
+ "\n",
178
+ "optimizer = AdamW(model.parameters(), lr=0.005)\n",
179
+ "\n",
180
+ "# training loop:\n",
181
+ "loss = outputs.loss\n",
182
+ "loss.backward()\n",
183
+ "optimizer.step()\n",
184
+ "\n",
185
+ "optimizer.zero_grad()"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "code",
190
+ "execution_count": 7,
191
+ "metadata": {},
192
+ "outputs": [],
193
+ "source": [
194
+ "'''To reduce learning rate to 0 import get_scheduler\n",
195
+ "This is a convenient function from the transformers library to \n",
196
+ "progressively decay our learning rate to zero as we train the model'''\n",
197
+ "\n",
198
+ "from transformers import get_scheduler\n",
199
+ "\n",
200
+ "num_epochs = 3\n",
201
+ "num_training_steps = num_epochs * len(train_dataloader)\n",
202
+ "\n",
203
+ "lr_scheduler = get_scheduler(\n",
204
+ " name = 'linear',\n",
205
+ " optimizer = optimizer,\n",
206
+ " num_warmup_steps = 0,\n",
207
+ " num_training_steps = num_training_steps \n",
208
+ ")"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": 8,
214
+ "metadata": {},
215
+ "outputs": [
216
+ {
217
+ "name": "stdout",
218
+ "output_type": "stream",
219
+ "text": [
220
+ "cpu\n"
221
+ ]
222
+ }
223
+ ],
224
+ "source": [
225
+ "'''Device agnostic code: use gpu'''\n",
226
+ "import torch\n",
227
+ "\n",
228
+ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
229
+ "model.to(device)\n",
230
+ "print(device)"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": null,
236
+ "metadata": {},
237
+ "outputs": [],
238
+ "source": [
239
+ "'''Finally, we can put everything together:'''\n",
240
+ "\n",
241
+ "from tqdm.auto import tqdm\n",
242
+ "\n",
243
+ "progress_bar = tqdm(range(num_training_steps))\n",
244
+ "\n",
245
+ "# set model to training mode:\n",
246
+ "model.train()\n",
247
+ "for epoch in range(num_epochs):\n",
248
+ " # loop over batches:\n",
249
+ " for batch in train_dataloader:\n",
250
+ " # send batch to device:\n",
251
+ " batch = {k: v.to(device) for k,v in batch.items()}\n",
252
+ " # forward pass:\n",
253
+ " outputs = model(**batch)\n",
254
+ " # calculate loss:\n",
255
+ " loss = outputs.loss\n",
256
+ " loss.backward()\n",
257
+ " # update weights:\n",
258
+ " optimizer.step()\n",
259
+ " # update learning rate:\n",
260
+ " lr_scheduler.step()\n",
261
+ " # reset gradients:\n",
262
+ " optimizer.zero_grad()\n",
263
+ " # update progress bar:\n",
264
+ " progress_bar.update(1)\n"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": 9,
270
+ "metadata": {},
271
+ "outputs": [
272
+ {
273
+ "name": "stderr",
274
+ "output_type": "stream",
275
+ "text": [
276
+ "C:\\Users\\HP\\AppData\\Local\\Temp\\ipykernel_1992\\3192441023.py:6: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
277
+ " metric = load_metric(\"glue\", \"mrpc\")\n"
278
+ ]
279
+ },
280
+ {
281
+ "data": {
282
+ "application/vnd.jupyter.widget-view+json": {
283
+ "model_id": "97b30eb20a8f481097917bf582f9a963",
284
+ "version_major": 2,
285
+ "version_minor": 0
286
+ },
287
+ "text/plain": [
288
+ "Downloading builder script: 0%| | 0.00/1.84k [00:00<?, ?B/s]"
289
+ ]
290
+ },
291
+ "metadata": {},
292
+ "output_type": "display_data"
293
+ },
294
+ {
295
+ "data": {
296
+ "text/plain": [
297
+ "{'accuracy': 0.6838235294117647, 'f1': 0.8122270742358079}"
298
+ ]
299
+ },
300
+ "execution_count": 9,
301
+ "metadata": {},
302
+ "output_type": "execute_result"
303
+ }
304
+ ],
305
+ "source": [
306
+ "'''Once the above step is completed, we can evaluate our model\n",
307
+ "very easily using the load_metric module in datasets library:'''\n",
308
+ "\n",
309
+ "from datasets import load_metric\n",
310
+ "\n",
311
+ "metric = load_metric(\"glue\", \"mrpc\")\n",
312
+ "\n",
313
+ "model.eval()\n",
314
+ "for batch in eval_dataloader:\n",
315
+ " batch = {k: v.to(device) for k,v in batch.items()}\n",
316
+ " with torch.no_grad():\n",
317
+ " outputs = model(**batch)\n",
318
+ " \n",
319
+ " logits = outputs.logits\n",
320
+ " predictions = torch.argmax(logits, dim=-1)\n",
321
+ " metric.add_batch(predictions=predictions, references=batch['labels'])\n",
322
+ " \n",
323
+ "metric.compute()"
324
+ ]
325
+ },
326
+ {
327
+ "cell_type": "code",
328
+ "execution_count": 10,
329
+ "metadata": {},
330
+ "outputs": [
331
+ {
332
+ "data": {
333
+ "text/plain": [
334
+ "'Congratulations! You have successfully fine-tuned a model all by yourself'"
335
+ ]
336
+ },
337
+ "execution_count": 10,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "'''Congratulations! You have successfully fine-tuned a model all by yourself'''"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": []
352
+ }
353
+ ],
354
+ "metadata": {
355
+ "kernelspec": {
356
+ "display_name": "Python 3",
357
+ "language": "python",
358
+ "name": "python3"
359
+ },
360
+ "language_info": {
361
+ "codemirror_mode": {
362
+ "name": "ipython",
363
+ "version": 3
364
+ },
365
+ "file_extension": ".py",
366
+ "mimetype": "text/x-python",
367
+ "name": "python",
368
+ "nbconvert_exporter": "python",
369
+ "pygments_lexer": "ipython3",
370
+ "version": "3.11.6"
371
+ }
372
+ },
373
+ "nbformat": 4,
374
+ "nbformat_minor": 2
375
+ }
Tokenizer_pipeline.ipynb ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "['i', 'am', 'going', 'to', 'token', '##ize', 'this', 'sentence', '.']\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "from transformers import AutoTokenizer\n",
18
+ "\n",
19
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
20
+ "tokens = tokenizer.tokenize('''I am going to tokenize this sentence.''')\n",
21
+ "print(tokens)"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "['▁i', '▁am', '▁going', '▁to', '▁to', 'ken', 'ize', '▁this', '▁sentence', '▁using', '▁albert', '-', 'base', '-', 'v', '1', '▁model', \"'\", 's', '▁to', 'ken', 'izer', '.']\n"
34
+ ]
35
+ }
36
+ ],
37
+ "source": [
38
+ "from transformers import AutoTokenizer\n",
39
+ "\n",
40
+ "tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')\n",
41
+ "tokens = tokenizer.tokenize('''I am going to tokenize this sentence\n",
42
+ " using albert-base-v1 model's tokenizer.''')\n",
43
+ "print(tokens)"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "'Note that above, the subword based tokenizer \\nis used in both tokenizers. \\nHowever to depict the beginning of a word in albert\\nbase tokenizer it uses the prefix _\\nwhile, bert base tokenizer uses ##.'"
55
+ ]
56
+ },
57
+ "execution_count": 4,
58
+ "metadata": {},
59
+ "output_type": "execute_result"
60
+ }
61
+ ],
62
+ "source": [
63
+ "'''Note that above, the subword based tokenizer \n",
64
+ "is used in both tokenizers. \n",
65
+ "However to depict the beginning of a word in albert\n",
66
+ "base tokenizer it uses the prefix _\n",
67
+ "while, bert base tokenizer uses ##.'''\n"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 5,
73
+ "metadata": {},
74
+ "outputs": [
75
+ {
76
+ "name": "stdout",
77
+ "output_type": "stream",
78
+ "text": [
79
+ "[11082, 3046, 2000, 19204, 4697]\n"
80
+ ]
81
+ }
82
+ ],
83
+ "source": [
84
+ "'''\n",
85
+ "The second task of the tokenization pipeline\n",
86
+ "is to map those tokens to their respective IDs.\n",
87
+ "This is done by the convert_tokens_to_ids method.\n",
88
+ "'''\n",
89
+ "\n",
90
+ "from transformers import AutoTokenizer\n",
91
+ "\n",
92
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
93
+ "tokens = tokenizer.tokenize('Lets try to tokenize')\n",
94
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
95
+ "print(input_ids)\n",
96
+ "\n",
97
+ "'''This is why we need to download a file while instantiating\n",
98
+ "the tokenizer from pretrained method.\n",
99
+ "We have to make sure we use the same mapping as to when the model\n",
100
+ "was pretrained\n",
101
+ "To do this we use the convert tokens to ids method.'''"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 6,
107
+ "metadata": {},
108
+ "outputs": [
109
+ {
110
+ "name": "stderr",
111
+ "output_type": "stream",
112
+ "text": [
113
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
114
+ ]
115
+ },
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "[101, 11082, 3046, 2000, 19204, 4697, 102]\n"
121
+ ]
122
+ }
123
+ ],
124
+ "source": [
125
+ "'''\n",
126
+ "You may have noticed that the tokens for CLS and SEP \n",
127
+ "are missing! Those are the special tokens that are added \n",
128
+ "by the prepare for model method. \n",
129
+ "The prepare for model method knows which special tokens\n",
130
+ "to add and where to add them based on the model type.\n",
131
+ "'''\n",
132
+ "final_inputs = tokenizer.prepare_for_model(input_ids)\n",
133
+ "print(final_inputs['input_ids'])"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 8,
139
+ "metadata": {},
140
+ "outputs": [
141
+ {
142
+ "name": "stdout",
143
+ "output_type": "stream",
144
+ "text": [
145
+ "[CLS] lets try to tokenize [SEP]\n"
146
+ ]
147
+ }
148
+ ],
149
+ "source": [
150
+ "'''\n",
151
+ "You can look at the special tokens modularly\n",
152
+ "by decoding the input ids as how the tokenizer \n",
153
+ "has changed your text by using the decode method.\n",
154
+ "'''\n",
155
+ "from transformers import AutoTokenizer\n",
156
+ "\n",
157
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
158
+ "inputs = tokenizer('Lets try to tokenize')\n",
159
+ "print(tokenizer.decode(inputs['input_ids']))\n"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 11,
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "name": "stdout",
169
+ "output_type": "stream",
170
+ "text": [
171
+ "<s>Lets try to tokenize.</s>\n"
172
+ ]
173
+ }
174
+ ],
175
+ "source": [
176
+ "'''Above the bert tokenizer uses the [CLS] and [SEP] tokens\n",
177
+ "But the roberta tokenizer uses the <s> and </s> tokens'''\n",
178
+ "from transformers import AutoTokenizer\n",
179
+ "\n",
180
+ "tokenizer = AutoTokenizer.from_pretrained('roberta-base')\n",
181
+ "inputs = tokenizer('Lets try to tokenize.')\n",
182
+ "print(tokenizer.decode(inputs['input_ids']))"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "markdown",
187
+ "metadata": {},
188
+ "source": [
189
+ "There are actually two ways to decode:\n",
190
+ "- The one shown above\n",
191
+ "- The one shown below"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "metadata": {},
198
+ "outputs": [
199
+ {
200
+ "name": "stderr",
201
+ "output_type": "stream",
202
+ "text": [
203
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
204
+ ]
205
+ },
206
+ {
207
+ "name": "stdout",
208
+ "output_type": "stream",
209
+ "text": [
210
+ "{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
211
+ "convert this to tokens.\n"
212
+ ]
213
+ }
214
+ ],
215
+ "source": [
216
+ "from transformers import AutoTokenizer\n",
217
+ "\n",
218
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
219
+ "tokens = tokenizer.tokenize('Convert this to tokens.')\n",
220
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
221
+ "inputs = tokenizer.prepare_for_model(input_ids)\n",
222
+ "print(inputs)\n",
223
+ "\n",
224
+ "# decode:\n",
225
+ "decode = tokenizer.decode(input_ids)\n",
226
+ "print(decode)"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "name": "stdout",
236
+ "output_type": "stream",
237
+ "text": [
238
+ "{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
239
+ "[CLS] convert this to tokens. [SEP]\n"
240
+ ]
241
+ }
242
+ ],
243
+ "source": [
244
+ "from transformers import AutoTokenizer\n",
245
+ "\n",
246
+ "tokenizer =AutoTokenizer.from_pretrained('bert-base-uncased')\n",
247
+ "inputs = tokenizer('Convert this to tokens.')\n",
248
+ "print(inputs)\n",
249
+ "decode = tokenizer.decode(inputs['input_ids'])\n",
250
+ "print(decode)"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": 12,
256
+ "metadata": {},
257
+ "outputs": [
258
+ {
259
+ "name": "stdout",
260
+ "output_type": "stream",
261
+ "text": [
262
+ "{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n"
263
+ ]
264
+ }
265
+ ],
266
+ "source": [
267
+ "'''\n",
268
+ "Now that you know the intermediate stuff about how \n",
269
+ "a tokenizer works, you can forget all that stuff\n",
270
+ "and only remember that you have to call it on the input \n",
271
+ "text.\n",
272
+ "'''\n",
273
+ "from transformers import AutoTokenizer\n",
274
+ "\n",
275
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
276
+ "inputs = tokenizer('Lets try to tokenize')\n",
277
+ "print(inputs)"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 13,
283
+ "metadata": {},
284
+ "outputs": [
285
+ {
286
+ "data": {
287
+ "text/plain": [
288
+ "'To learn what attention mask is\\ncheck out the --batch input together\\n\\nTo learn what the token type ids are\\ncheck out --process pairs of sentences \\n'"
289
+ ]
290
+ },
291
+ "execution_count": 13,
292
+ "metadata": {},
293
+ "output_type": "execute_result"
294
+ }
295
+ ],
296
+ "source": [
297
+ "'''To learn what attention mask is\n",
298
+ "check out the --batch input together\n",
299
+ "\n",
300
+ "To learn what the token type ids are\n",
301
+ "check out --process pairs of sentences \n",
302
+ "'''"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "markdown",
307
+ "metadata": {},
308
+ "source": [
309
+ "The primary and easy way to batch inputs together is as follows:"
310
+ ]
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "execution_count": 2,
315
+ "metadata": {},
316
+ "outputs": [
317
+ {
318
+ "name": "stdout",
319
+ "output_type": "stream",
320
+ "text": [
321
+ "{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607,\n",
322
+ " 2026, 2878, 2166, 1012, 102],\n",
323
+ " [ 101, 1045, 5223, 2023, 2061, 2172, 102, 0, 0, 0,\n",
324
+ " 0, 0, 0, 0, 0],\n",
325
+ " [ 101, 1045, 2572, 2025, 9657, 1012, 102, 0, 0, 0,\n",
326
+ " 0, 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
327
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
328
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
329
+ " [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
330
+ " [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}\n"
331
+ ]
332
+ }
333
+ ],
334
+ "source": [
335
+ "from transformers import AutoTokenizer\n",
336
+ "\n",
337
+ "checkpoint = 'bert-base-uncased'\n",
338
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
339
+ "\n",
340
+ "sentences = ['I have been waiting for a hugging face course my whole life.',\n",
341
+ " 'I hate this so much',\n",
342
+ " 'I am not confident.']\n",
343
+ "tokens = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
344
+ "print(tokens)"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "markdown",
349
+ "metadata": {},
350
+ "source": [
351
+ "However, if you wish to batch inputs together from beneath the tokenizer pipeline:"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": 4,
357
+ "metadata": {},
358
+ "outputs": [
359
+ {
360
+ "name": "stdout",
361
+ "output_type": "stream",
362
+ "text": [
363
+ "[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n",
364
+ "[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
365
+ "[1045, 5223, 2023, 2061, 2172]\n",
366
+ "[1045, 2572, 2025, 9657, 1012]\n"
367
+ ]
368
+ }
369
+ ],
370
+ "source": [
371
+ "from transformers import AutoTokenizer\n",
372
+ "\n",
373
+ "checkpoint = 'bert-base-uncased'\n",
374
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
375
+ "\n",
376
+ "sentences = ['I have been waiting for a hugging face course my whole life.',\n",
377
+ " 'I hate this so much',\n",
378
+ " 'I am not confident.']\n",
379
+ "\n",
380
+ "tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
381
+ "ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
382
+ "\n",
383
+ "print(ids)\n",
384
+ "print(ids[0])\n",
385
+ "print(ids[1])\n",
386
+ "print(ids[2])"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 5,
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ "[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n"
399
+ ]
400
+ },
401
+ {
402
+ "ename": "ValueError",
403
+ "evalue": "expected sequence of length 13 at dim 1 (got 5)",
404
+ "output_type": "error",
405
+ "traceback": [
406
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
407
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
408
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 14\u001b[0m line \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39mprint\u001b[39m(ids)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m input_ids \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39;49mtensor(ids)\n",
409
+ "\u001b[1;31mValueError\u001b[0m: expected sequence of length 13 at dim 1 (got 5)"
410
+ ]
411
+ }
412
+ ],
413
+ "source": [
414
+ "'''Trying to create a tensor from the three lists \n",
415
+ "in torch or tensorflow will result in an error. This\n",
416
+ "is because the tensors must be of the same size, i.e. rectangular\n",
417
+ "This is done by padding. which we will see later on.'''\n",
418
+ "\n",
419
+ "\n",
420
+ "import torch\n",
421
+ "print(ids)\n",
422
+ "input_ids = torch.tensor(ids)"
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": 7,
428
+ "metadata": {},
429
+ "outputs": [
430
+ {
431
+ "name": "stdout",
432
+ "output_type": "stream",
433
+ "text": [
434
+ "0\n"
435
+ ]
436
+ }
437
+ ],
438
+ "source": [
439
+ "'''The padding id provided to the model via training is 0.\n",
440
+ "One should not try to change it. you can pad your outputs like so:'''\n",
441
+ "\n",
442
+ "from transformers import AutoTokenizer\n",
443
+ "\n",
444
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
445
+ "pad = tokenizer.pad_token_id\n",
446
+ "print(pad)"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": 15,
452
+ "metadata": {},
453
+ "outputs": [
454
+ {
455
+ "name": "stdout",
456
+ "output_type": "stream",
457
+ "text": [
458
+ "[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n",
459
+ "[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
460
+ "[1045, 5223, 2023, 2061, 2172]\n",
461
+ "[1045, 2572, 2025, 9657, 1012]\n"
462
+ ]
463
+ },
464
+ {
465
+ "ename": "TypeError",
466
+ "evalue": "'int' object is not callable",
467
+ "output_type": "error",
468
+ "traceback": [
469
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
470
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
471
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m1\u001b[39m])\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m2\u001b[39m])\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m pad_ids \u001b[39m=\u001b[39m [tokenizer\u001b[39m.\u001b[39;49mpad_token_id(_) \u001b[39mfor\u001b[39;49;00m _ \u001b[39min\u001b[39;49;00m ids]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m \u001b[39mprint\u001b[39m(pad_ids)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m ids1 \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(pad_ids[\u001b[39m0\u001b[39m])\n",
472
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m1\u001b[39m])\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m2\u001b[39m])\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m pad_ids \u001b[39m=\u001b[39m [tokenizer\u001b[39m.\u001b[39;49mpad_token_id(_) \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m ids]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m \u001b[39mprint\u001b[39m(pad_ids)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m ids1 \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(pad_ids[\u001b[39m0\u001b[39m])\n",
473
+ "\u001b[1;31mTypeError\u001b[0m: 'int' object is not callable"
474
+ ]
475
+ }
476
+ ],
477
+ "source": [
478
+ "from transformers import AutoTokenizer\n",
479
+ "\n",
480
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
481
+ "\n",
482
+ "sentences = ['I have been waiting for a hugging face course my whole life.',\n",
483
+ " 'I hate this so much',\n",
484
+ " 'I am not confident.']\n",
485
+ "\n",
486
+ "tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
487
+ "ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
488
+ "\n",
489
+ "print(ids)\n",
490
+ "print(ids[0])\n",
491
+ "print(ids[1])\n",
492
+ "print(ids[2])\n",
493
+ "\n",
494
+ "pad_ids = [tokenizer.pad_token_id(_) for _ in ids]\n",
495
+ "print(pad_ids)\n",
496
+ "ids1 = torch.tensor(pad_ids[0])\n",
497
+ "ids2 = torch.tensor(pad_ids[1])\n",
498
+ "ids3 = torch.tensor(pad_ids[2])\n",
499
+ "all_ids = torch.tensor(ids1, ids2, ids3)"
500
+ ]
501
+ },
502
+ {
503
+ "cell_type": "code",
504
+ "execution_count": 1,
505
+ "metadata": {},
506
+ "outputs": [
507
+ {
508
+ "name": "stdout",
509
+ "output_type": "stream",
510
+ "text": [
511
+ "['this', 'is', 'me', 'practicing']\n",
512
+ "[2023, 2003, 2033, 12560]\n",
513
+ "['this', 'is', 'me', 'practicing']\n",
514
+ "this is me practicing\n"
515
+ ]
516
+ }
517
+ ],
518
+ "source": [
519
+ "'''More methods!!!'''\n",
520
+ "from transformers import AutoTokenizer\n",
521
+ "\n",
522
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
523
+ "tokens = tokenizer.tokenize('This is me practicing')\n",
524
+ "print(tokens)\n",
525
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
526
+ "print(input_ids)\n",
527
+ "tokens_2 = tokenizer.convert_ids_to_tokens(input_ids)\n",
528
+ "print(tokens_2)\n",
529
+ "strings = tokenizer.convert_tokens_to_string(tokens)\n",
530
+ "print(strings)"
531
+ ]
532
+ },
533
+ {
534
+ "cell_type": "code",
535
+ "execution_count": null,
536
+ "metadata": {},
537
+ "outputs": [],
538
+ "source": []
539
+ }
540
+ ],
541
+ "metadata": {
542
+ "kernelspec": {
543
+ "display_name": "Python 3",
544
+ "language": "python",
545
+ "name": "python3"
546
+ },
547
+ "language_info": {
548
+ "codemirror_mode": {
549
+ "name": "ipython",
550
+ "version": 3
551
+ },
552
+ "file_extension": ".py",
553
+ "mimetype": "text/x-python",
554
+ "name": "python",
555
+ "nbconvert_exporter": "python",
556
+ "pygments_lexer": "ipython3",
557
+ "version": "3.11.6"
558
+ }
559
+ },
560
+ "nbformat": 4,
561
+ "nbformat_minor": 2
562
+ }
Trainer API 18-11-23.ipynb ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from datasets import load_dataset\n",
10
+ "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
11
+ "\n",
12
+ "checkpoint = 'bert-base-uncased'\n",
13
+ "raw_dataset = load_dataset('glue', 'mrpc')\n",
14
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
15
+ "\n",
16
+ "def tokenize_function(example):\n",
17
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
18
+ " truncation=True)\n",
19
+ "\n",
20
+ "tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
21
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
22
+ "\n",
23
+ "from transformers import AutoModelForSequenceClassification\n",
24
+ "\n",
25
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
26
+ "\n",
27
+ "# Specify training arguments hyperparameters:\n",
28
+ "from transformers import TrainingArguments\n",
29
+ "training_args = TrainingArguments(\"test-trainer\",\n",
30
+ " per_device_train_batch_size=16,\n",
31
+ " per_device_eval_batch_size=16,\n",
32
+ " num_train_epochs=5,\n",
33
+ " learning_rate=2e-5,\n",
34
+ " weight_decay=0.01)\n",
35
+ "\n",
36
+ "# Create the Trainer instance:\n",
37
+ "from transformers import Trainer\n",
38
+ "trainer = Trainer(\n",
39
+ " model=model,\n",
40
+ " args=training_args,\n",
41
+ " train_dataset=tokenized_dataset['train'],\n",
42
+ " eval_dataset=tokenized_dataset['validation'],\n",
43
+ " data_collator=data_collator,\n",
44
+ " tokenizer=tokenizer\n",
45
+ ")\n",
46
+ "trainer.train()\n",
47
+ "\n",
48
+ "'''The results will however be anticlimatic because \n",
49
+ "you will only get a training loss that doesn't tell you how well the\n",
50
+ "model is actually doing..\n",
51
+ "To fix this, you need to specify the evaluation metric'''\n",
52
+ "\n",
53
+ "predictions = trainer.predict(tokenized_dataset['validation'])\n",
54
+ "print(predictions)\n",
55
+ "print(predictions.predictions.shape, predictions.label_ids.shape)\n",
56
+ "\n",
57
+ "# it returns a named tuple with 3 elements: predictions, label_ids, metrics\n",
58
+ "# the predictions are the logits of the model with all the sentences of the dataset\n",
59
+ "# so a numpy array of shape(488 x 2)\n",
60
+ "\n",
61
+ "# to match them with our labels we need to take the maximum logits for each prediction\n",
62
+ "# to know which is the maximum, use the argmax function\n",
63
+ "import numpy as np\n",
64
+ "from datasets import load_metric\n",
65
+ "\n",
66
+ "metric = load_metric('glue', 'mrpc')\n",
67
+ "preds = np.argmax(predictions.predictions, axis=-1)\n",
68
+ "metric.compute(predictions=preds, references=predictions.label_ids)\n",
69
+ "\n",
70
+ "'''We can see that our model did learn something!'''"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "'''To monitor the metrics during training, we need to define a\n",
80
+ "compute metric function as we did above\n",
81
+ "and pass it to the Trainer\n",
82
+ "'''\n",
83
+ "metric = load_metric('glue','mrpc')\n",
84
+ "def compute_metrics(eval_preds):\n",
85
+ " logits, labels = eval_preds\n",
86
+ " predictions = np.argmax(logits, axis=-1)\n",
87
+ " return metric.compute(predictions=predictions, references=labels)\n",
88
+ "\n",
89
+ "training_args = TrainingArguments(\"test-trainer\",\n",
90
+ " evaluation_strategy='epoch')\n",
91
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
92
+ "\n",
93
+ "trainer = Trainer(\n",
94
+ " model=model,\n",
95
+ " args=training_args,\n",
96
+ " train_dataset=tokenized_dataset['train'],\n",
97
+ " eval_dataset=tokenized_dataset['validation'],\n",
98
+ " data_collator=data_collator,\n",
99
+ " tokenizer=tokenizer,\n",
100
+ " compute_metrics=compute_metrics\n",
101
+ ")\n",
102
+ "\n",
103
+ "trainer.train()\n"
104
+ ]
105
+ }
106
+ ],
107
+ "metadata": {
108
+ "language_info": {
109
+ "name": "python"
110
+ }
111
+ },
112
+ "nbformat": 4,
113
+ "nbformat_minor": 2
114
+ }
Trainer API.ipynb ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "application/vnd.jupyter.widget-view+json": {
11
+ "model_id": "a2e9b1825c9d48a8abb3535087c66b42",
12
+ "version_major": 2,
13
+ "version_minor": 0
14
+ },
15
+ "text/plain": [
16
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
17
+ ]
18
+ },
19
+ "metadata": {},
20
+ "output_type": "display_data"
21
+ }
22
+ ],
23
+ "source": [
24
+ "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
25
+ "from datasets import load_dataset\n",
26
+ "\n",
27
+ "checkpoint = 'bert-base-uncased'\n",
28
+ "raw_datasets = load_dataset('glue','mrpc')\n",
29
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
30
+ "\n",
31
+ "def tokenizer_function(example):\n",
32
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
33
+ " truncation=True)\n",
34
+ "\n",
35
+ "tokenized_datasets = raw_datasets.map(tokenizer_function, batched=True)\n",
36
+ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 2,
42
+ "metadata": {},
43
+ "outputs": [
44
+ {
45
+ "data": {
46
+ "text/plain": [
47
+ "'Note that we do not do the final steps such as:\\nRemoving the unnecessary columns, Renaming the column label->labels,\\nFormating the input_ids to pytorch tensors, etc.\\nThe trainer API will automatically do this for us by analyzing the model\\nsignature!\\n'"
48
+ ]
49
+ },
50
+ "execution_count": 2,
51
+ "metadata": {},
52
+ "output_type": "execute_result"
53
+ }
54
+ ],
55
+ "source": [
56
+ "'''Note we did not apply padding in the preprocessing as we used Dynamic Padding\n",
57
+ "by the DataCollatorWithPadding!'''\n",
58
+ "\"\"\"Note that we do not do the final steps such as:\n",
59
+ "Removing the unnecessary columns, Renaming the column label->labels,\n",
60
+ "Formating the input_ids to pytorch tensors, etc.\n",
61
+ "The trainer API will automatically do this for us by analyzing the model\n",
62
+ "signature!\n",
63
+ "\"\"\""
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 3,
69
+ "metadata": {},
70
+ "outputs": [
71
+ {
72
+ "name": "stderr",
73
+ "output_type": "stream",
74
+ "text": [
75
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
76
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
77
+ ]
78
+ }
79
+ ],
80
+ "source": [
81
+ "'''The last step is to define our model and\n",
82
+ "prepare some training hyper parameters'''\n",
83
+ "from transformers import AutoModelForSequenceClassification\n",
84
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 4,
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "from transformers import TrainingArguments\n",
94
+ "import torch\n",
95
+ "training_args = TrainingArguments('test-trainer')"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 6,
101
+ "metadata": {},
102
+ "outputs": [],
103
+ "source": [
104
+ "from transformers import TrainingArguments\n",
105
+ "\n",
106
+ "# You can also assign all the hyper parameters in the arguments:\n",
107
+ "training_args = TrainingArguments(\n",
108
+ " 'test-trainer',\n",
109
+ " per_device_train_batch_size=16,\n",
110
+ " per_device_eval_batch_size=64,\n",
111
+ " num_train_epochs=5,\n",
112
+ " learning_rate=2e-5,\n",
113
+ " weight_decay=0.01,)\n"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 7,
119
+ "metadata": {},
120
+ "outputs": [
121
+ {
122
+ "data": {
123
+ "text/plain": [
124
+ "\"It's then very easy to create a trainer and launch a training\""
125
+ ]
126
+ },
127
+ "execution_count": 7,
128
+ "metadata": {},
129
+ "output_type": "execute_result"
130
+ }
131
+ ],
132
+ "source": [
133
+ "'''It's then very easy to create a trainer and launch a training'''"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 8,
139
+ "metadata": {},
140
+ "outputs": [
141
+ {
142
+ "data": {
143
+ "application/vnd.jupyter.widget-view+json": {
144
+ "model_id": "90b1d1a6ce2f4288b6645258ddf83af6",
145
+ "version_major": 2,
146
+ "version_minor": 0
147
+ },
148
+ "text/plain": [
149
+ " 0%| | 0/1150 [00:00<?, ?it/s]"
150
+ ]
151
+ },
152
+ "metadata": {},
153
+ "output_type": "display_data"
154
+ },
155
+ {
156
+ "name": "stderr",
157
+ "output_type": "stream",
158
+ "text": [
159
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
160
+ ]
161
+ },
162
+ {
163
+ "ename": "KeyboardInterrupt",
164
+ "evalue": "",
165
+ "output_type": "error",
166
+ "traceback": [
167
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
168
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
169
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Trainer API.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m Trainer\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m trainer \u001b[39m=\u001b[39m Trainer(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m model, \n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m training_args,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m tokenizer\u001b[39m=\u001b[39mtokenizer,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m )\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain()\n",
170
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:1591\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m 1589\u001b[0m hf_hub_utils\u001b[39m.\u001b[39menable_progress_bars()\n\u001b[0;32m 1590\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1591\u001b[0m \u001b[39mreturn\u001b[39;00m inner_training_loop(\n\u001b[0;32m 1592\u001b[0m args\u001b[39m=\u001b[39;49margs,\n\u001b[0;32m 1593\u001b[0m resume_from_checkpoint\u001b[39m=\u001b[39;49mresume_from_checkpoint,\n\u001b[0;32m 1594\u001b[0m trial\u001b[39m=\u001b[39;49mtrial,\n\u001b[0;32m 1595\u001b[0m ignore_keys_for_eval\u001b[39m=\u001b[39;49mignore_keys_for_eval,\n\u001b[0;32m 1596\u001b[0m )\n",
171
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:1892\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m 1889\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback_handler\u001b[39m.\u001b[39mon_step_begin(args, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol)\n\u001b[0;32m 1891\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maccelerator\u001b[39m.\u001b[39maccumulate(model):\n\u001b[1;32m-> 1892\u001b[0m tr_loss_step \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtraining_step(model, inputs)\n\u001b[0;32m 1894\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 1895\u001b[0m args\u001b[39m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m 1896\u001b[0m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m 1897\u001b[0m \u001b[39mand\u001b[39;00m (torch\u001b[39m.\u001b[39misnan(tr_loss_step) \u001b[39mor\u001b[39;00m torch\u001b[39m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m 1898\u001b[0m ):\n\u001b[0;32m 1899\u001b[0m \u001b[39m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m 1900\u001b[0m tr_loss \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m tr_loss \u001b[39m/\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mglobal_step \u001b[39m-\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_globalstep_last_logged)\n",
172
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:2787\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[1;34m(self, model, inputs)\u001b[0m\n\u001b[0;32m 2785\u001b[0m scaled_loss\u001b[39m.\u001b[39mbackward()\n\u001b[0;32m 2786\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 2787\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49maccelerator\u001b[39m.\u001b[39;49mbackward(loss)\n\u001b[0;32m 2789\u001b[0m \u001b[39mreturn\u001b[39;00m loss\u001b[39m.\u001b[39mdetach() \u001b[39m/\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mgradient_accumulation_steps\n",
173
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\accelerate\\accelerator.py:1989\u001b[0m, in \u001b[0;36mAccelerator.backward\u001b[1;34m(self, loss, **kwargs)\u001b[0m\n\u001b[0;32m 1987\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscaler\u001b[39m.\u001b[39mscale(loss)\u001b[39m.\u001b[39mbackward(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 1988\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1989\u001b[0m loss\u001b[39m.\u001b[39;49mbackward(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
174
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\_tensor.py:492\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m 482\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[0;32m 483\u001b[0m \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m 484\u001b[0m Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[0;32m 485\u001b[0m (\u001b[39mself\u001b[39m,),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 490\u001b[0m inputs\u001b[39m=\u001b[39minputs,\n\u001b[0;32m 491\u001b[0m )\n\u001b[1;32m--> 492\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(\n\u001b[0;32m 493\u001b[0m \u001b[39mself\u001b[39;49m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39;49minputs\n\u001b[0;32m 494\u001b[0m )\n",
175
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\autograd\\__init__.py:251\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m 246\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[0;32m 248\u001b[0m \u001b[39m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 251\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[0;32m 252\u001b[0m tensors,\n\u001b[0;32m 253\u001b[0m grad_tensors_,\n\u001b[0;32m 254\u001b[0m retain_graph,\n\u001b[0;32m 255\u001b[0m create_graph,\n\u001b[0;32m 256\u001b[0m inputs,\n\u001b[0;32m 257\u001b[0m allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[0;32m 258\u001b[0m accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[0;32m 259\u001b[0m )\n",
176
+ "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
177
+ ]
178
+ }
179
+ ],
180
+ "source": [
181
+ "from transformers import Trainer\n",
182
+ "\n",
183
+ "trainer = Trainer(\n",
184
+ " model, \n",
185
+ " training_args,\n",
186
+ " train_dataset=tokenized_datasets['train'],\n",
187
+ " eval_dataset=tokenized_datasets['validation'],\n",
188
+ " data_collator=data_collator,\n",
189
+ " tokenizer=tokenizer,\n",
190
+ ")\n",
191
+ "\n",
192
+ "trainer.train()"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": 9,
198
+ "metadata": {},
199
+ "outputs": [
200
+ {
201
+ "data": {
202
+ "text/plain": [
203
+ "\"The result will however be anticlimactic as you will only get a training loss\\nwhich doesnt really tell you how well your model is performing\\n\\nThis is because we did not define a metric to evaluate our model on!\\n\\nTo get the metrics we will first gathers the predictions on the whole evaluation set('validation set')\\nusing the trainer.predict method\\n\\nIt will return a namedtuple with the following attributes:\\npredictions, label_ids, metrics, num_samples\\n\\nwe are trying to get the metrics attribute which is empty here!\\n\""
204
+ ]
205
+ },
206
+ "execution_count": 9,
207
+ "metadata": {},
208
+ "output_type": "execute_result"
209
+ }
210
+ ],
211
+ "source": [
212
+ "'''The result will however be anticlimactic as you will only get a training loss\n",
213
+ "which doesnt really tell you how well your model is performing\n",
214
+ "\n",
215
+ "This is because we did not define a metric to evaluate our model on!\n",
216
+ "\n",
217
+ "To get the metrics we will first gathers the predictions on the whole evaluation set('validation set')\n",
218
+ "using the trainer.predict method\n",
219
+ "\n",
220
+ "It will return a namedtuple with the following attributes:\n",
221
+ "predictions, label_ids, metrics, num_samples\n",
222
+ "\n",
223
+ "we are trying to get the metrics attribute which is empty here!\n",
224
+ "'''"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 10,
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "application/vnd.jupyter.widget-view+json": {
235
+ "model_id": "43c2affe978b4dfd904e22f2766afe46",
236
+ "version_major": 2,
237
+ "version_minor": 0
238
+ },
239
+ "text/plain": [
240
+ " 0%| | 0/7 [00:00<?, ?it/s]"
241
+ ]
242
+ },
243
+ "metadata": {},
244
+ "output_type": "display_data"
245
+ },
246
+ {
247
+ "ename": "AttributeError",
248
+ "evalue": "'numpy.ndarray' object has no attribute 'label_ids'",
249
+ "output_type": "error",
250
+ "traceback": [
251
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
252
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
253
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Trainer API.ipynb Cell 9\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#X11sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m predictions \u001b[39m=\u001b[39m trainer\u001b[39m.\u001b[39mpredict(tokenized_datasets[\u001b[39m'\u001b[39m\u001b[39mvalidation\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#X11sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mprint\u001b[39m(predictions\u001b[39m.\u001b[39mpredictions\u001b[39m.\u001b[39mshape, predictions\u001b[39m.\u001b[39;49mpredictions\u001b[39m.\u001b[39;49mlabel_ids\u001b[39m.\u001b[39mshape)\n",
254
+ "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'label_ids'"
255
+ ]
256
+ }
257
+ ],
258
+ "source": [
259
+ "predictions = trainer.predict(tokenized_datasets['validation'])\n",
260
+ "print(predictions.predictions.shape, predictions.predictions.label_ids.shape)"
261
+ ]
262
+ },
263
+ {
264
+ "cell_type": "code",
265
+ "execution_count": null,
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "'''The predictions are the logits of the model for all the sentences in the dataset\n",
270
+ "a numpy array of shape 408 x 2'''"
271
+ ]
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "execution_count": null,
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": [
279
+ "'''To match them with our labels we have to take the maximum\n",
280
+ "logits for each prediction, to know which of the two classes was predicted\n",
281
+ "We do this using the argmax function of numpy\n",
282
+ "Then we can use the metrics from the datasets library\n",
283
+ "it can be loaded as easily as the dataset with the load_metric function\n",
284
+ "and it returns the evaluation metric for the dataset'''"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": null,
290
+ "metadata": {},
291
+ "outputs": [],
292
+ "source": [
293
+ "import numpy as np\n",
294
+ "from datasets import load_metric\n",
295
+ "\n",
296
+ "metric = load_metric('glue','mrpc')\n",
297
+ "preds = np.argmax(predictions.predictions, axis=-1)\n",
298
+ "metric.compute(predictions=preds, references=predictions.label_ids)"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": null,
304
+ "metadata": {},
305
+ "outputs": [],
306
+ "source": [
307
+ "''''We can see our model did learn something!'''"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": null,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "'''To monitor the evaluation metrics during training\n",
317
+ "we need to define a compute_metrics function\n",
318
+ "as we have just did\n",
319
+ "it takes a namedtuple with predictions and the labels and\n",
320
+ "returns a dictionary with the metrics we want to keep track of\n",
321
+ "'''"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": null,
327
+ "metadata": {},
328
+ "outputs": [],
329
+ "source": [
330
+ "metric = load_metric('glue','mrpc')\n",
331
+ "\n",
332
+ "def compute_metric(eval_preds):\n",
333
+ " logits, labels = eval_preds\n",
334
+ " predictions = np.argmax(logits, axis=-1)\n",
335
+ " return metric.compute(predictions=predictions, references=labels)\n"
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "execution_count": null,
341
+ "metadata": {},
342
+ "outputs": [],
343
+ "source": [
344
+ "'''By passing the epoch evaluation strategy to the trainer,\n",
345
+ "we tell the trainer to evaluate at the end of every epoch'''"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": null,
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "training_args = TrainingArguments(\n",
355
+ " 'test-trainer', evaluation_strategy='epoch'\n",
356
+ ")\n",
357
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
358
+ "\n",
359
+ "trainer = Trainer(\n",
360
+ " model,\n",
361
+ " training_args,\n",
362
+ " train_dataset=tokenized_datasets['train'],\n",
363
+ " eval_dataset=tokenized_datasets['validation'],\n",
364
+ " data_collator=data_collator,\n",
365
+ " tokenizer=tokenizer,\n",
366
+ " compute_metrics=compute_metrics\n",
367
+ ")"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "code",
372
+ "execution_count": null,
373
+ "metadata": {},
374
+ "outputs": [],
375
+ "source": [
376
+ "trainer.train()"
377
+ ]
378
+ }
379
+ ],
380
+ "metadata": {
381
+ "kernelspec": {
382
+ "display_name": "Python 3",
383
+ "language": "python",
384
+ "name": "python3"
385
+ },
386
+ "language_info": {
387
+ "codemirror_mode": {
388
+ "name": "ipython",
389
+ "version": 3
390
+ },
391
+ "file_extension": ".py",
392
+ "mimetype": "text/x-python",
393
+ "name": "python",
394
+ "nbconvert_exporter": "python",
395
+ "pygments_lexer": "ipython3",
396
+ "version": "3.11.6"
397
+ }
398
+ },
399
+ "nbformat": 4,
400
+ "nbformat_minor": 2
401
+ }
datasets_library.ipynb ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "### Hugging Face Datasets Library\n",
8
+ " You can find the names of the datasets provided by the glue benchmark in the video 22,23\n",
9
+ " \n",
10
+ " https://huggingface.co/docs/datasets/glue.html\n",
11
+ "\n",
12
+ "mrpc is one of the datasets provided by this benchmark to test para-\n",
13
+ "phrases"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "data": {
23
+ "text/plain": [
24
+ "DatasetDict({\n",
25
+ " train: Dataset({\n",
26
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
27
+ " num_rows: 3668\n",
28
+ " })\n",
29
+ " validation: Dataset({\n",
30
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
31
+ " num_rows: 408\n",
32
+ " })\n",
33
+ " test: Dataset({\n",
34
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
35
+ " num_rows: 1725\n",
36
+ " })\n",
37
+ "})"
38
+ ]
39
+ },
40
+ "execution_count": 2,
41
+ "metadata": {},
42
+ "output_type": "execute_result"
43
+ }
44
+ ],
45
+ "source": [
46
+ "from datasets import load_dataset\n",
47
+ "\n",
48
+ "# Load the dataset\n",
49
+ "raw_datasets = load_dataset(\"glue\", \"mrpc\")\n",
50
+ "raw_datasets\n",
51
+ "\n",
52
+ "# The output is a DatasetDict object, which contains each split of the Dataset."
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 3,
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "Dataset({\n",
64
+ " features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
65
+ " num_rows: 3668\n",
66
+ "})"
67
+ ]
68
+ },
69
+ "execution_count": 3,
70
+ "metadata": {},
71
+ "output_type": "execute_result"
72
+ }
73
+ ],
74
+ "source": [
75
+ "# Connect to each split by indexing!\n",
76
+ "raw_datasets['train']"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 4,
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "# we can see the number of training examples in the dataset as num_rows: 3668 in the above output~"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 5,
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "data": {
95
+ "text/plain": [
96
+ "{'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
97
+ " 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
98
+ " 'label': 1,\n",
99
+ " 'idx': 0}"
100
+ ]
101
+ },
102
+ "execution_count": 5,
103
+ "metadata": {},
104
+ "output_type": "execute_result"
105
+ }
106
+ ],
107
+ "source": [
108
+ "# Access a given element by it's index:\n",
109
+ "raw_datasets['train'][0]"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": 7,
115
+ "metadata": {},
116
+ "outputs": [
117
+ {
118
+ "data": {
119
+ "text/plain": [
120
+ "{'sentence1': ['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
121
+ " \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\",\n",
122
+ " 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .',\n",
123
+ " 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .',\n",
124
+ " 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .'],\n",
125
+ " 'sentence2': ['Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
126
+ " \"Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .\",\n",
127
+ " \"On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .\",\n",
128
+ " 'Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .',\n",
129
+ " 'PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .'],\n",
130
+ " 'label': [1, 0, 1, 0, 1],\n",
131
+ " 'idx': [0, 1, 2, 3, 4]}"
132
+ ]
133
+ },
134
+ "execution_count": 7,
135
+ "metadata": {},
136
+ "output_type": "execute_result"
137
+ }
138
+ ],
139
+ "source": [
140
+ "# Access a slice of your dataset:\n",
141
+ "raw_datasets['train'][:5]"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 8,
147
+ "metadata": {},
148
+ "outputs": [
149
+ {
150
+ "data": {
151
+ "text/plain": [
152
+ "'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .'"
153
+ ]
154
+ },
155
+ "execution_count": 8,
156
+ "metadata": {},
157
+ "output_type": "execute_result"
158
+ }
159
+ ],
160
+ "source": [
161
+ "# Access the sentence1 of the first element:\n",
162
+ "raw_datasets['train'][0]['sentence1']"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": 9,
168
+ "metadata": {},
169
+ "outputs": [
170
+ {
171
+ "data": {
172
+ "text/plain": [
173
+ "['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
174
+ " \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\",\n",
175
+ " 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .',\n",
176
+ " 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .',\n",
177
+ " 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .']"
178
+ ]
179
+ },
180
+ "execution_count": 9,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "# Access the first 5 sentences of sentence1\n",
187
+ "raw_datasets['train'][:5]['sentence1']"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 10,
193
+ "metadata": {},
194
+ "outputs": [
195
+ {
196
+ "data": {
197
+ "text/plain": [
198
+ "{'sentence1': Value(dtype='string', id=None),\n",
199
+ " 'sentence2': Value(dtype='string', id=None),\n",
200
+ " 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),\n",
201
+ " 'idx': Value(dtype='int32', id=None)}"
202
+ ]
203
+ },
204
+ "execution_count": 10,
205
+ "metadata": {},
206
+ "output_type": "execute_result"
207
+ }
208
+ ],
209
+ "source": [
210
+ "# Use the features attribute to see the information your dataset contains:\n",
211
+ "raw_datasets['train'].features"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 11,
217
+ "metadata": {},
218
+ "outputs": [
219
+ {
220
+ "data": {
221
+ "text/plain": [
222
+ "'Remember features are the input variables to your model.'"
223
+ ]
224
+ },
225
+ "execution_count": 11,
226
+ "metadata": {},
227
+ "output_type": "execute_result"
228
+ }
229
+ ],
230
+ "source": [
231
+ "'''Remember features are the input variables to your model.'''"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 14,
237
+ "metadata": {},
238
+ "outputs": [
239
+ {
240
+ "data": {
241
+ "application/vnd.jupyter.widget-view+json": {
242
+ "model_id": "390d148b78f84283b5c3273c08fca389",
243
+ "version_major": 2,
244
+ "version_minor": 0
245
+ },
246
+ "text/plain": [
247
+ "Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
248
+ ]
249
+ },
250
+ "metadata": {},
251
+ "output_type": "display_data"
252
+ },
253
+ {
254
+ "data": {
255
+ "application/vnd.jupyter.widget-view+json": {
256
+ "model_id": "4be5e99804ce4588aefb566219523f97",
257
+ "version_major": 2,
258
+ "version_minor": 0
259
+ },
260
+ "text/plain": [
261
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
262
+ ]
263
+ },
264
+ "metadata": {},
265
+ "output_type": "display_data"
266
+ },
267
+ {
268
+ "data": {
269
+ "application/vnd.jupyter.widget-view+json": {
270
+ "model_id": "1bbbf398ada1455bb9b726a462e4b7e5",
271
+ "version_major": 2,
272
+ "version_minor": 0
273
+ },
274
+ "text/plain": [
275
+ "Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
276
+ ]
277
+ },
278
+ "metadata": {},
279
+ "output_type": "display_data"
280
+ },
281
+ {
282
+ "name": "stdout",
283
+ "output_type": "stream",
284
+ "text": [
285
+ "{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
286
+ ]
287
+ }
288
+ ],
289
+ "source": [
290
+ "# To preprocess all the elements of the dataset we need to tokenize them!\n",
291
+ "\n",
292
+ "from transformers import AutoTokenizer\n",
293
+ "\n",
294
+ "checkpoint = 'bert-base-cased'\n",
295
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
296
+ "\n",
297
+ "def tokenize_function(example):\n",
298
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
299
+ " padding='max_length',\n",
300
+ " truncation=True,\n",
301
+ " max_length=128)\n",
302
+ "tokenized_datasets = raw_datasets.map(tokenize_function)\n",
303
+ "print(tokenized_datasets.column_names)"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": 15,
309
+ "metadata": {},
310
+ "outputs": [
311
+ {
312
+ "data": {
313
+ "text/plain": [
314
+ "'To speed up tokenization, the map method uses multiprocessing.\\nYou could also set the batched=True'"
315
+ ]
316
+ },
317
+ "execution_count": 15,
318
+ "metadata": {},
319
+ "output_type": "execute_result"
320
+ }
321
+ ],
322
+ "source": [
323
+ "'''To speed up tokenization, the map method uses multiprocessing.\n",
324
+ "You could also set the batched=True'''"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 17,
330
+ "metadata": {},
331
+ "outputs": [
332
+ {
333
+ "data": {
334
+ "application/vnd.jupyter.widget-view+json": {
335
+ "model_id": "29966e62f612498d8d2d4c54d40467e3",
336
+ "version_major": 2,
337
+ "version_minor": 0
338
+ },
339
+ "text/plain": [
340
+ "Map: 0%| | 0/408 [00:00<?, ? examples/s]"
341
+ ]
342
+ },
343
+ "metadata": {},
344
+ "output_type": "display_data"
345
+ },
346
+ {
347
+ "data": {
348
+ "text/plain": [
349
+ "DatasetDict({\n",
350
+ " train: Dataset({\n",
351
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
352
+ " num_rows: 3668\n",
353
+ " })\n",
354
+ " validation: Dataset({\n",
355
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
356
+ " num_rows: 408\n",
357
+ " })\n",
358
+ " test: Dataset({\n",
359
+ " features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
360
+ " num_rows: 1725\n",
361
+ " })\n",
362
+ "})"
363
+ ]
364
+ },
365
+ "execution_count": 17,
366
+ "metadata": {},
367
+ "output_type": "execute_result"
368
+ }
369
+ ],
370
+ "source": [
371
+ "from transformers import AutoTokenizer\n",
372
+ "\n",
373
+ "checkpoint = 'bert-base-cased'\n",
374
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
375
+ "\n",
376
+ "def tokenize_function(example):\n",
377
+ " return tokenizer(example['sentence1'], example['sentence2'],\n",
378
+ " padding='max_length',\n",
379
+ " truncation=True,\n",
380
+ " max_length=128)\n",
381
+ " \n",
382
+ "from datasets import load_dataset\n",
383
+ "\n",
384
+ "raw_datasets = load_dataset(\"glue\",\"mrpc\") \n",
385
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
386
+ "tokenized_datasets"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 18,
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "data": {
396
+ "text/plain": [
397
+ "\"Once done, we are almost ready for training!\\nJust remove the columns we don't need anymore by the remove columns function\\nRename the column label to labels\\nand use the format torch\""
398
+ ]
399
+ },
400
+ "execution_count": 18,
401
+ "metadata": {},
402
+ "output_type": "execute_result"
403
+ }
404
+ ],
405
+ "source": [
406
+ "'''Once done, we are almost ready for training!\n",
407
+ "Just remove the columns we don't need anymore by the remove columns function\n",
408
+ "Rename the column label to labels\n",
409
+ "and use the format torch'''"
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "execution_count": 20,
415
+ "metadata": {},
416
+ "outputs": [
417
+ {
418
+ "data": {
419
+ "text/plain": [
420
+ "Dataset({\n",
421
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
422
+ " num_rows: 3668\n",
423
+ "})"
424
+ ]
425
+ },
426
+ "execution_count": 20,
427
+ "metadata": {},
428
+ "output_type": "execute_result"
429
+ }
430
+ ],
431
+ "source": [
432
+ "tokenized_datasets = tokenized_datasets.remove_columns([\"idx\",\"sentence1\",\"sentence2\"])\n",
433
+ "tokenized_datasets = tokenized_datasets.rename_column(\"label\",\"labels\")\n",
434
+ "tokenized_datasets = tokenized_datasets.with_format(\"torch\") # The format could be torch/tensorflow/numpy~\n",
435
+ "tokenized_datasets[\"train\"]"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "code",
440
+ "execution_count": 21,
441
+ "metadata": {},
442
+ "outputs": [
443
+ {
444
+ "data": {
445
+ "text/plain": [
446
+ "'If needed we can also generate a short sample of the dataset using the select method!'"
447
+ ]
448
+ },
449
+ "execution_count": 21,
450
+ "metadata": {},
451
+ "output_type": "execute_result"
452
+ }
453
+ ],
454
+ "source": [
455
+ "'''If needed we can also generate a short sample of the dataset using the select method!'''\n"
456
+ ]
457
+ },
458
+ {
459
+ "cell_type": "code",
460
+ "execution_count": 23,
461
+ "metadata": {},
462
+ "outputs": [
463
+ {
464
+ "data": {
465
+ "text/plain": [
466
+ "Dataset({\n",
467
+ " features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
468
+ " num_rows: 10\n",
469
+ "})"
470
+ ]
471
+ },
472
+ "execution_count": 23,
473
+ "metadata": {},
474
+ "output_type": "execute_result"
475
+ }
476
+ ],
477
+ "source": [
478
+ "small_train_dataset = tokenized_datasets[\"train\"].select(range(10))\n",
479
+ "small_train_dataset"
480
+ ]
481
+ },
482
+ {
483
+ "cell_type": "code",
484
+ "execution_count": null,
485
+ "metadata": {},
486
+ "outputs": [],
487
+ "source": [
488
+ "'''As you can see, the number of training examples has now reduced to 10'''"
489
+ ]
490
+ }
491
+ ],
492
+ "metadata": {
493
+ "kernelspec": {
494
+ "display_name": "Python 3",
495
+ "language": "python",
496
+ "name": "python3"
497
+ },
498
+ "language_info": {
499
+ "codemirror_mode": {
500
+ "name": "ipython",
501
+ "version": 3
502
+ },
503
+ "file_extension": ".py",
504
+ "mimetype": "text/x-python",
505
+ "name": "python",
506
+ "nbconvert_exporter": "python",
507
+ "pygments_lexer": "ipython3",
508
+ "version": "3.11.6"
509
+ }
510
+ },
511
+ "nbformat": 4,
512
+ "nbformat_minor": 2
513
+ }
pipeline.ipynb ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# ! pip install tensorflow"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 6,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "# ! pip install torch"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 8,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "# ! pip install transformers"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "metadata": {},
33
+ "source": [
34
+ "# Pipeline:\n",
35
+ "The pipeline function is the most high level api in the transformers library.\n",
36
+ "The pipeline function returns an end-to-end object that performs an NLP task on one or several texts.\n",
37
+ "A pipeline includes all the necessary pre-processing as the model does not expect texts but numbers, it feeds the numbers to the model and the post-processing to make the output human readable."
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "markdown",
42
+ "metadata": {},
43
+ "source": [
44
+ "# Sentiment Analysis Pipeline"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 7,
50
+ "metadata": {},
51
+ "outputs": [
52
+ {
53
+ "name": "stderr",
54
+ "output_type": "stream",
55
+ "text": [
56
+ "Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']\n",
57
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
58
+ ]
59
+ },
60
+ {
61
+ "name": "stdout",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "[{'label': 'LABEL_0', 'score': 0.06559786200523376}]\n",
65
+ "[{'label': 'LABEL_0', 'score': 0.12948733568191528}, {'label': 'LABEL_0', 'score': 0.12888683378696442}]\n"
66
+ ]
67
+ }
68
+ ],
69
+ "source": [
70
+ "from transformers import pipeline\n",
71
+ "\n",
72
+ "classifier = pipeline('sentiment-analysis', model='distilgpt2')\n",
73
+ "\n",
74
+ "# pass single text:\n",
75
+ "res = classifier(\"I've been waiting for a Huggingface course\")\n",
76
+ "print(res)\n",
77
+ "\n",
78
+ "# Pass multiple texts:\n",
79
+ "res = classifier(['I love you', 'I hate you'])\n",
80
+ "print(res)"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "markdown",
85
+ "metadata": {},
86
+ "source": [
87
+ "# Zero Shot Classification Pipeline\n",
88
+ "Helps to classify what the sentence or topic is about "
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 6,
94
+ "metadata": {},
95
+ "outputs": [
96
+ {
97
+ "name": "stderr",
98
+ "output_type": "stream",
99
+ "text": [
100
+ "Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']\n",
101
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
102
+ "Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.\n",
103
+ "Tokenizer was not supporting padding necessary for zero-shot, attempting to use `pad_token=eos_token`\n"
104
+ ]
105
+ },
106
+ {
107
+ "data": {
108
+ "text/plain": [
109
+ "{'sequence': 'This is a course about the Transformers library',\n",
110
+ " 'labels': ['education', 'ploitics', 'business'],\n",
111
+ " 'scores': [0.36338528990745544, 0.3443466126918793, 0.29226812720298767]}"
112
+ ]
113
+ },
114
+ "execution_count": 6,
115
+ "metadata": {},
116
+ "output_type": "execute_result"
117
+ }
118
+ ],
119
+ "source": [
120
+ "from transformers import pipeline \n",
121
+ "\n",
122
+ "classifier = pipeline('zero-shot-classification', model='distilgpt2')\n",
123
+ "classifier('This is a course about the Transformers library',\n",
124
+ " candidate_labels=['education', 'ploitics', 'business'])"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "markdown",
129
+ "metadata": {},
130
+ "source": [
131
+ "# Text Generation pipeline:"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "markdown",
136
+ "metadata": {},
137
+ "source": [
138
+ "will auto complete a given prompt. \n",
139
+ "Output is generated with a bit of randomness so it changes when you run it each time."
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 4,
145
+ "metadata": {},
146
+ "outputs": [
147
+ {
148
+ "name": "stderr",
149
+ "output_type": "stream",
150
+ "text": [
151
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
152
+ ]
153
+ },
154
+ {
155
+ "data": {
156
+ "text/plain": [
157
+ "[{'generated_text': 'In this course we will teach you how to play and take your skill set as a starter, how to play, and as a player. I will'},\n",
158
+ " {'generated_text': 'In this course we will teach you how to convert to Java as your main operating system and write to your friends through our website at Google+.\\n\\n'}]"
159
+ ]
160
+ },
161
+ "execution_count": 4,
162
+ "metadata": {},
163
+ "output_type": "execute_result"
164
+ }
165
+ ],
166
+ "source": [
167
+ "from transformers import pipeline\n",
168
+ "\n",
169
+ "generator = pipeline('text-generation', model='distilgpt2')\n",
170
+ "generator('In this course we will teach you how to',\n",
171
+ " max_length=30,\n",
172
+ " num_return_sequences=2)"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 8,
178
+ "metadata": {},
179
+ "outputs": [
180
+ {
181
+ "name": "stderr",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
185
+ ]
186
+ },
187
+ {
188
+ "data": {
189
+ "text/plain": [
190
+ "[{'generated_text': 'in this course we will teach you how to play with a real life experience. It will be a lot more about the importance of understanding and building a'},\n",
191
+ " {'generated_text': 'in this course we will teach you how to achieve the objectives of the program. We will teach you how to achieve the objectives of the program. We'}]"
192
+ ]
193
+ },
194
+ "execution_count": 8,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "from transformers import pipeline\n",
201
+ "\n",
202
+ "generator = pipeline('text-generation', model='distilgpt2')\n",
203
+ "generator('in this course we will teach you how to',\n",
204
+ " max_length=30,\n",
205
+ " num_return_sequences=2)"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "markdown",
210
+ "metadata": {},
211
+ "source": [
212
+ "The text-generation pipeline is used with the model distilgpt2 above"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "markdown",
217
+ "metadata": {},
218
+ "source": [
219
+ "# Fill Mask Pipeline\n",
220
+ "This pipeline is a pertraining objective of BERT. This is guess masked words like fill in the blanks. \n",
221
+ "In this case we ask the pipeline to generate the two most likely words in the mask using top_k "
222
+ ]
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": 2,
227
+ "metadata": {},
228
+ "outputs": [
229
+ {
230
+ "name": "stderr",
231
+ "output_type": "stream",
232
+ "text": [
233
+ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.bias']\n",
234
+ "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
235
+ "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
236
+ ]
237
+ },
238
+ {
239
+ "data": {
240
+ "application/vnd.jupyter.widget-view+json": {
241
+ "model_id": "96e2afa0d0574883abf3b4e6a86ccaca",
242
+ "version_major": 2,
243
+ "version_minor": 0
244
+ },
245
+ "text/plain": [
246
+ "Downloading (…)okenizer_config.json: 0%| | 0.00/29.0 [00:00<?, ?B/s]"
247
+ ]
248
+ },
249
+ "metadata": {},
250
+ "output_type": "display_data"
251
+ },
252
+ {
253
+ "data": {
254
+ "application/vnd.jupyter.widget-view+json": {
255
+ "model_id": "e4bfb78bd48b446aab7bcc7d1c2064fd",
256
+ "version_major": 2,
257
+ "version_minor": 0
258
+ },
259
+ "text/plain": [
260
+ "Downloading (…)solve/main/vocab.txt: 0%| | 0.00/213k [00:00<?, ?B/s]"
261
+ ]
262
+ },
263
+ "metadata": {},
264
+ "output_type": "display_data"
265
+ },
266
+ {
267
+ "data": {
268
+ "application/vnd.jupyter.widget-view+json": {
269
+ "model_id": "f3554d73b7914b8d824c843c347343ef",
270
+ "version_major": 2,
271
+ "version_minor": 0
272
+ },
273
+ "text/plain": [
274
+ "Downloading (…)/main/tokenizer.json: 0%| | 0.00/436k [00:00<?, ?B/s]"
275
+ ]
276
+ },
277
+ "metadata": {},
278
+ "output_type": "display_data"
279
+ },
280
+ {
281
+ "data": {
282
+ "text/plain": [
283
+ "[{'score': 0.2596316933631897,\n",
284
+ " 'token': 1648,\n",
285
+ " 'token_str': 'role',\n",
286
+ " 'sequence': 'This course will teach you all about role models.'},\n",
287
+ " {'score': 0.09427264332771301,\n",
288
+ " 'token': 1103,\n",
289
+ " 'token_str': 'the',\n",
290
+ " 'sequence': 'This course will teach you all about the models.'}]"
291
+ ]
292
+ },
293
+ "execution_count": 2,
294
+ "metadata": {},
295
+ "output_type": "execute_result"
296
+ }
297
+ ],
298
+ "source": [
299
+ "from transformers import pipeline\n",
300
+ "\n",
301
+ "unmasker = pipeline('fill-mask', model='bert-base-cased')\n",
302
+ "unmasker('This course will teach you all about [MASK] models.', top_k=2)\n"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "markdown",
307
+ "metadata": {},
308
+ "source": [
309
+ "# Text Classifier Pipeline:\n",
310
+ "Name Entity Recognition Pipeline within text classifier pipeline which helps identify entities in a sentence."
311
+ ]
312
+ },
313
+ {
314
+ "cell_type": "code",
315
+ "execution_count": null,
316
+ "metadata": {},
317
+ "outputs": [],
318
+ "source": [
319
+ "from transformers import pipeline\n",
320
+ "\n",
321
+ "ner = pipeline('ner', grouped_entities=True, model='distilgpt2')\n",
322
+ "ner('My name is Abdullah and I work at Hackules in Bangladesh')"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "markdown",
327
+ "metadata": {},
328
+ "source": [
329
+ "# Extractive Question Answering\n",
330
+ "Another task available with pipeline api, is the extractive question answering.\n",
331
+ "Providing a context and a question the model will identify a span of text in the context containing the answer to the question\n",
332
+ "The model will classify whether the sentence is a question or an answer."
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": null,
338
+ "metadata": {},
339
+ "outputs": [],
340
+ "source": [
341
+ "from transformers import pipeline\n",
342
+ "\n",
343
+ "question_answerer = pipeline('question-answering', model='distilgpt2')\n",
344
+ "question_answerer(\n",
345
+ " question='Where do I work?',\n",
346
+ " context='My name is Abdullah and I work at Hackules in Bangladesh'\n",
347
+ ")"
348
+ ]
349
+ },
350
+ {
351
+ "cell_type": "markdown",
352
+ "metadata": {},
353
+ "source": [
354
+ "# Summarization Pipeline:\n",
355
+ "Getting short summaries with articles."
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "code",
360
+ "execution_count": null,
361
+ "metadata": {},
362
+ "outputs": [],
363
+ "source": [
364
+ "from transformers import pipeline\n",
365
+ "\n",
366
+ "summarizer = pipeline('summarization', model='distilgpt2')\n",
367
+ "summarizer('''\n",
368
+ "It was the 1st of November yesterday, and I had decided to grind my research paper to completion. I failed at the task but I did make some progress. I also discovered that, the conference papers can't be more than 10 pages long and too long conference papers get rejected. I really have a lot to learn about conferences and paper submissions but I don't have anybody to guide me through the steps. I am not complaining, I am just saying that it's going to take me a while but I will get there in shaa Allah!\n",
369
+ "''')"
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "markdown",
374
+ "metadata": {},
375
+ "source": [
376
+ "# Translation Pipeline:\n",
377
+ "The last task by the pipeline API is translation. "
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "code",
382
+ "execution_count": 16,
383
+ "metadata": {},
384
+ "outputs": [
385
+ {
386
+ "ename": "ValueError",
387
+ "evalue": "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.",
388
+ "output_type": "error",
389
+ "traceback": [
390
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
391
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
392
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\pipeline.ipynb Cell 23\u001b[0m line \u001b[0;36m4\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39m# ! pip install sentencepiece\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msentencepiece\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m translator \u001b[39m=\u001b[39m pipeline(\u001b[39m'\u001b[39;49m\u001b[39mtranslation\u001b[39;49m\u001b[39m'\u001b[39;49m, model\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mHelsinki-NLP/opus-mt-fr-en\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m translator(\u001b[39m'\u001b[39m\u001b[39mCe cours est produit par Hugging Face.\u001b[39m\u001b[39m'\u001b[39m)\n",
393
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\pipelines\\__init__.py:931\u001b[0m, in \u001b[0;36mpipeline\u001b[1;34m(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[0;32m 928\u001b[0m tokenizer_kwargs \u001b[39m=\u001b[39m model_kwargs\u001b[39m.\u001b[39mcopy()\n\u001b[0;32m 929\u001b[0m tokenizer_kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m--> 931\u001b[0m tokenizer \u001b[39m=\u001b[39m AutoTokenizer\u001b[39m.\u001b[39;49mfrom_pretrained(\n\u001b[0;32m 932\u001b[0m tokenizer_identifier, use_fast\u001b[39m=\u001b[39;49muse_fast, _from_pipeline\u001b[39m=\u001b[39;49mtask, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mhub_kwargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtokenizer_kwargs\n\u001b[0;32m 933\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mif\u001b[39;00m load_image_processor:\n\u001b[0;32m 936\u001b[0m \u001b[39m# Try to infer image processor from model or config name (if provided as str)\u001b[39;00m\n\u001b[0;32m 937\u001b[0m \u001b[39mif\u001b[39;00m image_processor \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
394
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:774\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 772\u001b[0m \u001b[39mreturn\u001b[39;00m tokenizer_class_py\u001b[39m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[39m*\u001b[39minputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 773\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 774\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 775\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 776\u001b[0m \u001b[39m\"\u001b[39m\u001b[39min order to use this tokenizer.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 777\u001b[0m )\n\u001b[0;32m 779\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 780\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnrecognized configuration class \u001b[39m\u001b[39m{\u001b[39;00mconfig\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m to build an AutoTokenizer.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 781\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mModel type should be one of \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(c\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m \u001b[39m\u001b[39mfor\u001b[39;00m\u001b[39m \u001b[39mc\u001b[39m \u001b[39m\u001b[39min\u001b[39;00m\u001b[39m \u001b[39mTOKENIZER_MAPPING\u001b[39m.\u001b[39mkeys())\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 782\u001b[0m )\n",
395
+ "\u001b[1;31mValueError\u001b[0m: This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer."
396
+ ]
397
+ }
398
+ ],
399
+ "source": [
400
+ "from transformers import pipeline\n",
401
+ "# ! pip install sentencepiece\n",
402
+ "import sentencepiece\n",
403
+ "translator = pipeline('translation', model='Helsinki-NLP/opus-mt-fr-en')\n",
404
+ "translator('Ce cours est produit par Hugging Face.')"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "markdown",
409
+ "metadata": {},
410
+ "source": [
411
+ "So, there are the following tasks available withing our Pipeline API:\n",
412
+ "\n",
413
+ "- Text-Classification(Also called sequence classification)\n",
414
+ "- Zero Shot Classification\n",
415
+ "- Text Generation\n",
416
+ "- Text Completion(mask filling)/ Masked Language Modeling\n",
417
+ "- Token Classification\n",
418
+ "- Question Answering\n",
419
+ "- Summarization\n",
420
+ "- Translation"
421
+ ]
422
+ },
423
+ {
424
+ "cell_type": "markdown",
425
+ "metadata": {},
426
+ "source": []
427
+ }
428
+ ],
429
+ "metadata": {
430
+ "kernelspec": {
431
+ "display_name": "Python 3",
432
+ "language": "python",
433
+ "name": "python3"
434
+ },
435
+ "language_info": {
436
+ "codemirror_mode": {
437
+ "name": "ipython",
438
+ "version": 3
439
+ },
440
+ "file_extension": ".py",
441
+ "mimetype": "text/x-python",
442
+ "name": "python",
443
+ "nbconvert_exporter": "python",
444
+ "pygments_lexer": "ipython3",
445
+ "version": "3.11.6"
446
+ }
447
+ },
448
+ "nbformat": 4,
449
+ "nbformat_minor": 2
450
+ }
pipeline2.ipynb ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "[{'label': 'NEGATIVE', 'score': 0.9961605072021484}]"
12
+ ]
13
+ },
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
+ "source": [
20
+ "from transformers import pipeline\n",
21
+ "\n",
22
+ "classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')\n",
23
+ "classifier('I suck at coding')"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 7,
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "name": "stdout",
33
+ "output_type": "stream",
34
+ "text": [
35
+ "{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 14924,\n",
36
+ " 4818, 2026, 2878, 2166, 1010, 1045, 2572, 2061, 3407, 999,\n",
37
+ " 102],\n",
38
+ " [ 101, 1045, 2572, 2061, 9364, 1999, 2026, 3754, 2000, 4553,\n",
39
+ " 102, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
40
+ " 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
41
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}\n",
42
+ "torch.Size([2, 21, 768])\n",
43
+ "BaseModelOutput(last_hidden_state=tensor([[[ 0.4223, 0.2644, 0.2841, ..., 0.5133, 0.7970, -0.5389],\n",
44
+ " [ 0.6034, 0.4271, 0.2106, ..., 0.5094, 0.8745, -0.4014],\n",
45
+ " [ 0.6883, 0.5008, 0.2713, ..., 0.4099, 0.7448, -0.1239],\n",
46
+ " ...,\n",
47
+ " [ 0.5705, 0.3254, 0.1810, ..., 0.5382, 0.7913, -0.5631],\n",
48
+ " [ 0.5371, 0.2903, 0.1535, ..., 0.5578, 0.8199, -0.4776],\n",
49
+ " [ 1.2266, 0.2534, 0.4621, ..., 0.7747, 0.5059, -0.8307]],\n",
50
+ "\n",
51
+ " [[-0.7461, 0.9088, -0.0971, ..., 0.1799, -0.9920, -0.4135],\n",
52
+ " [-0.7748, 0.9645, -0.0093, ..., -0.0285, -0.9143, -0.1535],\n",
53
+ " [-0.8590, 0.9238, -0.0158, ..., 0.0062, -1.0241, -0.1349],\n",
54
+ " ...,\n",
55
+ " [-0.6346, 0.9681, -0.0236, ..., 0.1793, -1.1010, -0.2452],\n",
56
+ " [-0.5911, 0.9420, -0.1765, ..., 0.2015, -1.0720, -0.2666],\n",
57
+ " [-0.5166, 0.9548, -0.1337, ..., 0.2211, -1.0757, -0.2626]]],\n",
58
+ " grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)\n",
59
+ "tensor([[-4.2574, 4.6149],\n",
60
+ " [ 4.6649, -3.7399]], grad_fn=<AddmmBackward0>)\n",
61
+ "SequenceClassifierOutput(loss=None, logits=tensor([[-4.2574, 4.6149],\n",
62
+ " [ 4.6649, -3.7399]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
63
+ "tensor([[1.4020e-04, 9.9986e-01],\n",
64
+ " [9.9978e-01, 2.2374e-04]], grad_fn=<SoftmaxBackward0>)\n",
65
+ "tensor(2)\n",
66
+ "2\n",
67
+ "{0: 'NEGATIVE', 1: 'POSITIVE'}\n"
68
+ ]
69
+ }
70
+ ],
71
+ "source": [
72
+ "from transformers import AutoTokenizer\n",
73
+ "\n",
74
+ "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
75
+ "raw_inputs = ['''I have been waiting for a hugging face tutorial my whole life, i am so happy!''',\n",
76
+ " 'I am so disappointed in my ability to learn']\n",
77
+ "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
78
+ "print(inputs)\n",
79
+ "\n",
80
+ "from transformers import AutoModel\n",
81
+ "\n",
82
+ "model = AutoModel.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
83
+ "outputs = model(**inputs)\n",
84
+ "print(outputs.last_hidden_state.shape)\n",
85
+ "print(outputs)\n",
86
+ "from transformers import AutoModelForSequenceClassification\n",
87
+ "\n",
88
+ "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
89
+ "outputs = model(**inputs)\n",
90
+ "print(outputs.logits)\n",
91
+ "print(outputs)\n",
92
+ "\n",
93
+ "import torch\n",
94
+ "torch_inputs = outputs.logits\n",
95
+ "probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
96
+ "print(probabilities)\n",
97
+ "\n",
98
+ "# check output rating:\n",
99
+ "print(torch.argmax(outputs.logits))\n",
100
+ "print(int(torch.argmax(outputs.logits)))\n",
101
+ "\n",
102
+ "print(model.config.id2label)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": []
111
+ }
112
+ ],
113
+ "metadata": {
114
+ "kernelspec": {
115
+ "display_name": "Python 3",
116
+ "language": "python",
117
+ "name": "python3"
118
+ },
119
+ "language_info": {
120
+ "codemirror_mode": {
121
+ "name": "ipython",
122
+ "version": 3
123
+ },
124
+ "file_extension": ".py",
125
+ "mimetype": "text/x-python",
126
+ "name": "python",
127
+ "nbconvert_exporter": "python",
128
+ "pygments_lexer": "ipython3",
129
+ "version": "3.11.6"
130
+ }
131
+ },
132
+ "nbformat": 4,
133
+ "nbformat_minor": 2
134
+ }
prac.ipynb ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "['lets', '##s', 'try', 'to', 'token', '##ize']\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "from transformers import AutoTokenizer\n",
18
+ "\n",
19
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
20
+ "tokens = tokenizer.tokenize('''Letss try to tokenize''')\n",
21
+ "print(tokens)"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "application/vnd.jupyter.widget-view+json": {
32
+ "model_id": "c2fdecef86644ec1b3467bf653e8d30d",
33
+ "version_major": 2,
34
+ "version_minor": 0
35
+ },
36
+ "text/plain": [
37
+ "Downloading (…)lve/main/config.json: 0%| | 0.00/684 [00:00<?, ?B/s]"
38
+ ]
39
+ },
40
+ "metadata": {},
41
+ "output_type": "display_data"
42
+ },
43
+ {
44
+ "data": {
45
+ "application/vnd.jupyter.widget-view+json": {
46
+ "model_id": "6a4c6b7d714c40ca9695acf581de7bb2",
47
+ "version_major": 2,
48
+ "version_minor": 0
49
+ },
50
+ "text/plain": [
51
+ "Downloading (…)ve/main/spiece.model: 0%| | 0.00/760k [00:00<?, ?B/s]"
52
+ ]
53
+ },
54
+ "metadata": {},
55
+ "output_type": "display_data"
56
+ },
57
+ {
58
+ "data": {
59
+ "application/vnd.jupyter.widget-view+json": {
60
+ "model_id": "49267a76ecfc4aee9d4906e96ddbca5b",
61
+ "version_major": 2,
62
+ "version_minor": 0
63
+ },
64
+ "text/plain": [
65
+ "Downloading (…)/main/tokenizer.json: 0%| | 0.00/1.31M [00:00<?, ?B/s]"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ },
71
+ {
72
+ "name": "stdout",
73
+ "output_type": "stream",
74
+ "text": [
75
+ "['▁let', \"'\", 's', '▁learn', '▁to', '▁code', '▁in', '▁hugging', 'face']\n"
76
+ ]
77
+ }
78
+ ],
79
+ "source": [
80
+ "from transformers import AutoTokenizer\n",
81
+ "\n",
82
+ "albert_tokenizer = AutoTokenizer.from_pretrained('albert-base-v2')\n",
83
+ "tokens = albert_tokenizer.tokenize('''Let's learn to code in huggingface''')\n",
84
+ "print(tokens)"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 3,
90
+ "metadata": {},
91
+ "outputs": [
92
+ {
93
+ "name": "stdout",
94
+ "output_type": "stream",
95
+ "text": [
96
+ "['these', 'are', 'broken', 'down', 'into', 'token', '##s']\n",
97
+ "[2122, 2024, 3714, 2091, 2046, 19204, 2015]\n"
98
+ ]
99
+ }
100
+ ],
101
+ "source": [
102
+ "from transformers import AutoTokenizer\n",
103
+ "\n",
104
+ "tokeninzer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
105
+ "tokens = tokenizer.tokenize('These are broken down into tokens')\n",
106
+ "print(tokens)\n",
107
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
108
+ "print(input_ids)"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 4,
114
+ "metadata": {},
115
+ "outputs": [
116
+ {
117
+ "name": "stdout",
118
+ "output_type": "stream",
119
+ "text": [
120
+ "['this', 'is', 'me', 'practicing']\n",
121
+ "[2023, 2003, 2033, 12560]\n",
122
+ "['this', 'is', 'me', 'practicing']\n"
123
+ ]
124
+ }
125
+ ],
126
+ "source": [
127
+ "from transformers import AutoTokenizer\n",
128
+ "\n",
129
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
130
+ "tokens = tokenizer.tokenize('This is me practicing')\n",
131
+ "print(tokens)\n",
132
+ "\n",
133
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
134
+ "print(input_ids)\n",
135
+ "\n",
136
+ "tokens = tokenizer.convert_ids_to_tokens(input_ids)\n",
137
+ "print(tokens)"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 5,
143
+ "metadata": {},
144
+ "outputs": [
145
+ {
146
+ "name": "stdout",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "['this', 'is', 'me', 'practicing']\n",
150
+ "[2023, 2003, 2033, 12560]\n",
151
+ "['this', 'is', 'me', 'practicing']\n",
152
+ "this is me practicing\n"
153
+ ]
154
+ }
155
+ ],
156
+ "source": [
157
+ "from transformers import AutoTokenizer\n",
158
+ "\n",
159
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
160
+ "tokens = tokenizer.tokenize('This is me practicing')\n",
161
+ "print(tokens)\n",
162
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
163
+ "print(input_ids)\n",
164
+ "tokens_2 = tokenizer.convert_ids_to_tokens(input_ids)\n",
165
+ "print(tokens_2)\n",
166
+ "strings = tokenizer.convert_tokens_to_string(tokens)\n",
167
+ "print(strings)"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 6,
173
+ "metadata": {},
174
+ "outputs": [
175
+ {
176
+ "name": "stderr",
177
+ "output_type": "stream",
178
+ "text": [
179
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
180
+ ]
181
+ },
182
+ {
183
+ "name": "stdout",
184
+ "output_type": "stream",
185
+ "text": [
186
+ "{'input_ids': [101, 2023, 2003, 2033, 12560, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1]}\n"
187
+ ]
188
+ }
189
+ ],
190
+ "source": [
191
+ "final_ids = tokenizer.prepare_for_model(input_ids)\n",
192
+ "print(final_ids)\n"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": 7,
198
+ "metadata": {},
199
+ "outputs": [
200
+ {
201
+ "name": "stderr",
202
+ "output_type": "stream",
203
+ "text": [
204
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
205
+ ]
206
+ },
207
+ {
208
+ "name": "stdout",
209
+ "output_type": "stream",
210
+ "text": [
211
+ "['this', 'is', 'me', 'practicing', 'the', 'use', 'of', 'auto', '##tok', '##eni', '##zer']\n",
212
+ "[2023, 2003, 2033, 12560, 1996, 2224, 1997, 8285, 18715, 18595, 6290]\n",
213
+ "{'input_ids': [101, 2023, 2003, 2033, 12560, 1996, 2224, 1997, 8285, 18715, 18595, 6290, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
214
+ "[CLS] this is me practicing the use of autotokenizer [SEP]\n"
215
+ ]
216
+ }
217
+ ],
218
+ "source": [
219
+ "from transformers import AutoTokenizer\n",
220
+ "\n",
221
+ "sentence = 'This is me practicing the use of AutoTokenizer'\n",
222
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
223
+ "tokens = tokenizer.tokenize(sentence)\n",
224
+ "print(tokens)\n",
225
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
226
+ "print(input_ids)\n",
227
+ "inputs = tokenizer.prepare_for_model(input_ids)\n",
228
+ "print(inputs)\n",
229
+ "\n",
230
+ "decode = tokenizer.decode(inputs['input_ids'])\n",
231
+ "print(decode)"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": []
240
+ }
241
+ ],
242
+ "metadata": {
243
+ "kernelspec": {
244
+ "display_name": "Python 3",
245
+ "language": "python",
246
+ "name": "python3"
247
+ },
248
+ "language_info": {
249
+ "codemirror_mode": {
250
+ "name": "ipython",
251
+ "version": 3
252
+ },
253
+ "file_extension": ".py",
254
+ "mimetype": "text/x-python",
255
+ "name": "python",
256
+ "nbconvert_exporter": "python",
257
+ "pygments_lexer": "ipython3",
258
+ "version": "3.11.6"
259
+ }
260
+ },
261
+ "nbformat": 4,
262
+ "nbformat_minor": 2
263
+ }
practise basics.ipynb ADDED
@@ -0,0 +1,962 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
13
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import transformers\n",
19
+ "\n",
20
+ "from transformers import pipeline\n",
21
+ "\n",
22
+ "checkpoint = 'bert-base-uncased'\n",
23
+ "classifier = pipeline('sentiment-analysis', model=checkpoint)"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 2,
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "text/plain": [
34
+ "[{'label': 'LABEL_1', 'score': 0.5578101277351379}]"
35
+ ]
36
+ },
37
+ "execution_count": 2,
38
+ "metadata": {},
39
+ "output_type": "execute_result"
40
+ }
41
+ ],
42
+ "source": [
43
+ "classifier('This is a test sentence')"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 5,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stderr",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
56
+ ]
57
+ },
58
+ {
59
+ "data": {
60
+ "text/plain": [
61
+ "[{'generated_text': 'In this course, I will teach you how to make a really big use of the language you\\u202are learning to use.\\u202a'},\n",
62
+ " {'generated_text': 'In this course, I will teach you how to manipulate sound design to better enhance your sound design while also illustrating the application of certain audio and video technologies. In this section I will introduce some examples of how to manipulate sound design in my introductory video.'}]"
63
+ ]
64
+ },
65
+ "execution_count": 5,
66
+ "metadata": {},
67
+ "output_type": "execute_result"
68
+ }
69
+ ],
70
+ "source": [
71
+ "generator = pipeline('text-generation', model='distilgpt2')\n",
72
+ "generator('In this course, I will teach you how to',\n",
73
+ " max_length=50, num_return_sequences = 2\n",
74
+ " )"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": 6,
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "name": "stderr",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
87
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
88
+ ]
89
+ },
90
+ {
91
+ "data": {
92
+ "text/plain": [
93
+ "[{'label': 'LABEL_0', 'score': 0.6686140894889832}]"
94
+ ]
95
+ },
96
+ "execution_count": 6,
97
+ "metadata": {},
98
+ "output_type": "execute_result"
99
+ }
100
+ ],
101
+ "source": [
102
+ "# inside the pipeline function;\n",
103
+ "\n",
104
+ "from transformers import pipeline\n",
105
+ "classifier = pipeline('sentiment-analysis', model=checkpoint)\n",
106
+ "classifier('I am very sad')\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": 7,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "# Tokenization: Raw text -> Tokenizer -> Tokenized Text -> Input IDs for model"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 8,
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": [
124
+ "from transformers import AutoTokenizer\n",
125
+ "\n",
126
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
127
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
128
+ "\n",
129
+ "raw_inputs = ['This is a course on huggingface',\n",
130
+ " 'I am very disgusted at my stupidity']\n",
131
+ "\n",
132
+ "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 9,
138
+ "metadata": {},
139
+ "outputs": [
140
+ {
141
+ "data": {
142
+ "text/plain": [
143
+ "{'input_ids': tensor([[ 101, 2023, 2003, 1037, 2607, 2006, 17662, 12172, 102],\n",
144
+ " [ 101, 1045, 2572, 2200, 17733, 2012, 2026, 28072, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
145
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
146
+ ]
147
+ },
148
+ "execution_count": 9,
149
+ "metadata": {},
150
+ "output_type": "execute_result"
151
+ }
152
+ ],
153
+ "source": [
154
+ "inputs"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 10,
160
+ "metadata": {},
161
+ "outputs": [
162
+ {
163
+ "data": {
164
+ "text/plain": [
165
+ "Encoding(num_tokens=9, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])"
166
+ ]
167
+ },
168
+ "execution_count": 10,
169
+ "metadata": {},
170
+ "output_type": "execute_result"
171
+ }
172
+ ],
173
+ "source": [
174
+ "inputs[0]"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": 11,
180
+ "metadata": {},
181
+ "outputs": [
182
+ {
183
+ "data": {
184
+ "text/plain": [
185
+ "tensor([[ 101, 2023, 2003, 1037, 2607, 2006, 17662, 12172, 102],\n",
186
+ " [ 101, 1045, 2572, 2200, 17733, 2012, 2026, 28072, 102]])"
187
+ ]
188
+ },
189
+ "execution_count": 11,
190
+ "metadata": {},
191
+ "output_type": "execute_result"
192
+ }
193
+ ],
194
+ "source": [
195
+ "inputs['input_ids']"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": 12,
201
+ "metadata": {},
202
+ "outputs": [
203
+ {
204
+ "data": {
205
+ "text/plain": [
206
+ "tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
207
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
208
+ ]
209
+ },
210
+ "execution_count": 12,
211
+ "metadata": {},
212
+ "output_type": "execute_result"
213
+ }
214
+ ],
215
+ "source": [
216
+ "inputs['attention_mask']"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 13,
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "from transformers import AutoTokenizer\n",
226
+ "\n",
227
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
228
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
229
+ "\n",
230
+ "raw_inputs = ['This is very good','I am learning slowly.. sad']\n",
231
+ "\n",
232
+ "inputs=tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 14,
238
+ "metadata": {},
239
+ "outputs": [
240
+ {
241
+ "data": {
242
+ "text/plain": [
243
+ "{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0, 0],\n",
244
+ " [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
245
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
246
+ ]
247
+ },
248
+ "execution_count": 14,
249
+ "metadata": {},
250
+ "output_type": "execute_result"
251
+ }
252
+ ],
253
+ "source": [
254
+ "inputs"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": 15,
260
+ "metadata": {},
261
+ "outputs": [
262
+ {
263
+ "data": {
264
+ "text/plain": [
265
+ "tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0, 0],\n",
266
+ " [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517, 102]])"
267
+ ]
268
+ },
269
+ "execution_count": 15,
270
+ "metadata": {},
271
+ "output_type": "execute_result"
272
+ }
273
+ ],
274
+ "source": [
275
+ "inputs['input_ids']"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": 16,
281
+ "metadata": {},
282
+ "outputs": [
283
+ {
284
+ "data": {
285
+ "text/plain": [
286
+ "tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
287
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
288
+ ]
289
+ },
290
+ "execution_count": 16,
291
+ "metadata": {},
292
+ "output_type": "execute_result"
293
+ }
294
+ ],
295
+ "source": [
296
+ "inputs['attention_mask']"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 17,
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "name": "stdout",
306
+ "output_type": "stream",
307
+ "text": [
308
+ "torch.Size([2, 9, 768])\n"
309
+ ]
310
+ }
311
+ ],
312
+ "source": [
313
+ "from transformers import AutoModel\n",
314
+ "\n",
315
+ "model = AutoModel.from_pretrained(checkpoint)\n",
316
+ "outputs = model(**inputs)\n",
317
+ "print(outputs.last_hidden_state.shape)"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 23,
323
+ "metadata": {},
324
+ "outputs": [
325
+ {
326
+ "name": "stdout",
327
+ "output_type": "stream",
328
+ "text": [
329
+ "{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0],\n",
330
+ " [ 101, 1045, 2572, 2667, 2000, 4553, 2242, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0],\n",
331
+ " [1, 1, 1, 1, 1, 1, 1, 1]])}\n",
332
+ "torch.Size([2, 8, 768])\n",
333
+ "torch.Size([2, 2])\n",
334
+ "SequenceClassifierOutput(loss=None, logits=tensor([[-4.1928, 4.5727],\n",
335
+ " [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
336
+ "tensor([[-4.1928, 4.5727],\n",
337
+ " [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>)\n",
338
+ "tensor([[1.5600e-04, 9.9984e-01],\n",
339
+ " [9.7146e-01, 2.8543e-02]], grad_fn=<SoftmaxBackward0>)\n",
340
+ "{0: 'NEGATIVE', 1: 'POSITIVE'}\n"
341
+ ]
342
+ }
343
+ ],
344
+ "source": [
345
+ "from transformers import AutoTokenizer\n",
346
+ "\n",
347
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
348
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
349
+ "\n",
350
+ "raw_inputs = ['This is very good', 'I am trying to learn something']\n",
351
+ "\n",
352
+ "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
353
+ "\n",
354
+ "print(inputs)\n",
355
+ "\n",
356
+ "from transformers import AutoModel\n",
357
+ "\n",
358
+ "model = AutoModel.from_pretrained(checkpoint)\n",
359
+ "outputs = model(**inputs)\n",
360
+ "print(outputs.last_hidden_state.shape)\n",
361
+ "\n",
362
+ "from transformers import AutoModelForSequenceClassification\n",
363
+ "\n",
364
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
365
+ "outputs = model(**inputs)\n",
366
+ "print(outputs.logits.shape)\n",
367
+ "print(outputs)\n",
368
+ "print(outputs.logits)\n",
369
+ "\n",
370
+ "import torch\n",
371
+ "\n",
372
+ "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
373
+ "print(predictions)\n",
374
+ "\n",
375
+ "print(model.config.id2label)\n"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": 24,
381
+ "metadata": {},
382
+ "outputs": [
383
+ {
384
+ "name": "stdout",
385
+ "output_type": "stream",
386
+ "text": [
387
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
388
+ "<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n",
389
+ "<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
390
+ ]
391
+ }
392
+ ],
393
+ "source": [
394
+ "# instantiate a transformers model:\n",
395
+ "\n",
396
+ "from transformers import AutoModel\n",
397
+ "\n",
398
+ "bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
399
+ "print(type(bert_model))\n",
400
+ "\n",
401
+ "gpt_model = AutoModel.from_pretrained('gpt2')\n",
402
+ "print(type(gpt_model))\n",
403
+ "\n",
404
+ "bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
405
+ "print(type(bart_model))"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "execution_count": 25,
411
+ "metadata": {},
412
+ "outputs": [
413
+ {
414
+ "name": "stdout",
415
+ "output_type": "stream",
416
+ "text": [
417
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
418
+ "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
419
+ "<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
420
+ ]
421
+ }
422
+ ],
423
+ "source": [
424
+ "from transformers import AutoConfig\n",
425
+ "\n",
426
+ "bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
427
+ "print(type(bert_config))\n",
428
+ "\n",
429
+ "gpt_config = AutoConfig.from_pretrained('gpt2')\n",
430
+ "print(type(gpt_config))\n",
431
+ "\n",
432
+ "bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n",
433
+ "print(type(bart_config))"
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "code",
438
+ "execution_count": 26,
439
+ "metadata": {},
440
+ "outputs": [
441
+ {
442
+ "name": "stdout",
443
+ "output_type": "stream",
444
+ "text": [
445
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
446
+ "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
447
+ "<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
448
+ ]
449
+ }
450
+ ],
451
+ "source": [
452
+ "from transformers import BertConfig\n",
453
+ "\n",
454
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
455
+ "print(type(bert_config))\n",
456
+ "\n",
457
+ "from transformers import GPT2Config\n",
458
+ "gpt_config = GPT2Config.from_pretrained('gpt2')\n",
459
+ "print(type(gpt_config))\n",
460
+ "\n",
461
+ "from transformers import BartConfig\n",
462
+ "bart_config = BartConfig.from_pretrained('facebook/bart-base')\n",
463
+ "print(type(bart_config))"
464
+ ]
465
+ },
466
+ {
467
+ "cell_type": "code",
468
+ "execution_count": 27,
469
+ "metadata": {},
470
+ "outputs": [
471
+ {
472
+ "name": "stdout",
473
+ "output_type": "stream",
474
+ "text": [
475
+ "BertConfig {\n",
476
+ " \"architectures\": [\n",
477
+ " \"BertForMaskedLM\"\n",
478
+ " ],\n",
479
+ " \"attention_probs_dropout_prob\": 0.1,\n",
480
+ " \"classifier_dropout\": null,\n",
481
+ " \"gradient_checkpointing\": false,\n",
482
+ " \"hidden_act\": \"gelu\",\n",
483
+ " \"hidden_dropout_prob\": 0.1,\n",
484
+ " \"hidden_size\": 768,\n",
485
+ " \"initializer_range\": 0.02,\n",
486
+ " \"intermediate_size\": 3072,\n",
487
+ " \"layer_norm_eps\": 1e-12,\n",
488
+ " \"max_position_embeddings\": 512,\n",
489
+ " \"model_type\": \"bert\",\n",
490
+ " \"num_attention_heads\": 12,\n",
491
+ " \"num_hidden_layers\": 12,\n",
492
+ " \"pad_token_id\": 0,\n",
493
+ " \"position_embedding_type\": \"absolute\",\n",
494
+ " \"transformers_version\": \"4.34.1\",\n",
495
+ " \"type_vocab_size\": 2,\n",
496
+ " \"use_cache\": true,\n",
497
+ " \"vocab_size\": 30522\n",
498
+ "}\n",
499
+ "\n"
500
+ ]
501
+ }
502
+ ],
503
+ "source": [
504
+ "from transformers import BertConfig\n",
505
+ "\n",
506
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
507
+ "print(bert_config)"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": 31,
513
+ "metadata": {},
514
+ "outputs": [
515
+ {
516
+ "name": "stdout",
517
+ "output_type": "stream",
518
+ "text": [
519
+ "BertModel(\n",
520
+ " (embeddings): BertEmbeddings(\n",
521
+ " (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
522
+ " (position_embeddings): Embedding(512, 768)\n",
523
+ " (token_type_embeddings): Embedding(2, 768)\n",
524
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
525
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
526
+ " )\n",
527
+ " (encoder): BertEncoder(\n",
528
+ " (layer): ModuleList(\n",
529
+ " (0-11): 12 x BertLayer(\n",
530
+ " (attention): BertAttention(\n",
531
+ " (self): BertSelfAttention(\n",
532
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
533
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
534
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
535
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
536
+ " )\n",
537
+ " (output): BertSelfOutput(\n",
538
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
539
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
540
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
541
+ " )\n",
542
+ " )\n",
543
+ " (intermediate): BertIntermediate(\n",
544
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
545
+ " (intermediate_act_fn): GELUActivation()\n",
546
+ " )\n",
547
+ " (output): BertOutput(\n",
548
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
549
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
550
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
551
+ " )\n",
552
+ " )\n",
553
+ " )\n",
554
+ " )\n",
555
+ " (pooler): BertPooler(\n",
556
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
557
+ " (activation): Tanh()\n",
558
+ " )\n",
559
+ ")\n"
560
+ ]
561
+ }
562
+ ],
563
+ "source": [
564
+ "from transformers import BertConfig, BertModel\n",
565
+ "\n",
566
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
567
+ "bert_model = BertModel(bert_config)\n",
568
+ "print(bert_model)"
569
+ ]
570
+ },
571
+ {
572
+ "cell_type": "code",
573
+ "execution_count": 32,
574
+ "metadata": {},
575
+ "outputs": [],
576
+ "source": [
577
+ "from transformers import BertConfig, BertModel\n",
578
+ "\n",
579
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
580
+ "bert_model = BertModel(bert_config)"
581
+ ]
582
+ },
583
+ {
584
+ "cell_type": "code",
585
+ "execution_count": 33,
586
+ "metadata": {},
587
+ "outputs": [
588
+ {
589
+ "name": "stdout",
590
+ "output_type": "stream",
591
+ "text": [
592
+ "BertModel(\n",
593
+ " (embeddings): BertEmbeddings(\n",
594
+ " (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
595
+ " (position_embeddings): Embedding(512, 768)\n",
596
+ " (token_type_embeddings): Embedding(2, 768)\n",
597
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
598
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
599
+ " )\n",
600
+ " (encoder): BertEncoder(\n",
601
+ " (layer): ModuleList(\n",
602
+ " (0-9): 10 x BertLayer(\n",
603
+ " (attention): BertAttention(\n",
604
+ " (self): BertSelfAttention(\n",
605
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
606
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
607
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
608
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
609
+ " )\n",
610
+ " (output): BertSelfOutput(\n",
611
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
612
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
613
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
614
+ " )\n",
615
+ " )\n",
616
+ " (intermediate): BertIntermediate(\n",
617
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
618
+ " (intermediate_act_fn): GELUActivation()\n",
619
+ " )\n",
620
+ " (output): BertOutput(\n",
621
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
622
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
623
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
624
+ " )\n",
625
+ " )\n",
626
+ " )\n",
627
+ " )\n",
628
+ " (pooler): BertPooler(\n",
629
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
630
+ " (activation): Tanh()\n",
631
+ " )\n",
632
+ ")\n"
633
+ ]
634
+ }
635
+ ],
636
+ "source": [
637
+ "print(bert_model)"
638
+ ]
639
+ },
640
+ {
641
+ "cell_type": "code",
642
+ "execution_count": 34,
643
+ "metadata": {},
644
+ "outputs": [],
645
+ "source": [
646
+ "from transformers import BertConfig, BertModel\n",
647
+ "\n",
648
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers = 10)\n",
649
+ "bert_model = BertModel(bert_config)\n",
650
+ "\n",
651
+ "bert_model.save_pretrained('my_bert_model')"
652
+ ]
653
+ },
654
+ {
655
+ "cell_type": "code",
656
+ "execution_count": 35,
657
+ "metadata": {},
658
+ "outputs": [],
659
+ "source": [
660
+ "from transformers import BertModel\n",
661
+ "\n",
662
+ "bert_model = BertModel.from_pretrained('my_bert_model')"
663
+ ]
664
+ },
665
+ {
666
+ "cell_type": "code",
667
+ "execution_count": 39,
668
+ "metadata": {},
669
+ "outputs": [
670
+ {
671
+ "name": "stdout",
672
+ "output_type": "stream",
673
+ "text": [
674
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
675
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
676
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
677
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
678
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n"
679
+ ]
680
+ }
681
+ ],
682
+ "source": [
683
+ "from transformers import AutoModel\n",
684
+ "\n",
685
+ "bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
686
+ "print(type(bert_model))\n",
687
+ "\n",
688
+ "from transformers import AutoConfig, BertModel\n",
689
+ "\n",
690
+ "bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
691
+ "print(type(bert_config))\n",
692
+ "bert_model = BertModel(bert_config)\n",
693
+ "print(type(bert_model))\n",
694
+ "\n",
695
+ "from transformers import BertConfig, BertModel\n",
696
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
697
+ "print(type(bert_config))\n",
698
+ "bert_model = BertModel(bert_config)\n",
699
+ "print(type(bert_model))"
700
+ ]
701
+ },
702
+ {
703
+ "cell_type": "code",
704
+ "execution_count": 40,
705
+ "metadata": {},
706
+ "outputs": [
707
+ {
708
+ "data": {
709
+ "text/plain": [
710
+ "BertConfig {\n",
711
+ " \"architectures\": [\n",
712
+ " \"BertForMaskedLM\"\n",
713
+ " ],\n",
714
+ " \"attention_probs_dropout_prob\": 0.1,\n",
715
+ " \"classifier_dropout\": null,\n",
716
+ " \"gradient_checkpointing\": false,\n",
717
+ " \"hidden_act\": \"gelu\",\n",
718
+ " \"hidden_dropout_prob\": 0.1,\n",
719
+ " \"hidden_size\": 768,\n",
720
+ " \"initializer_range\": 0.02,\n",
721
+ " \"intermediate_size\": 3072,\n",
722
+ " \"layer_norm_eps\": 1e-12,\n",
723
+ " \"max_position_embeddings\": 512,\n",
724
+ " \"model_type\": \"bert\",\n",
725
+ " \"num_attention_heads\": 12,\n",
726
+ " \"num_hidden_layers\": 12,\n",
727
+ " \"pad_token_id\": 0,\n",
728
+ " \"position_embedding_type\": \"absolute\",\n",
729
+ " \"transformers_version\": \"4.34.1\",\n",
730
+ " \"type_vocab_size\": 2,\n",
731
+ " \"use_cache\": true,\n",
732
+ " \"vocab_size\": 30522\n",
733
+ "}"
734
+ ]
735
+ },
736
+ "execution_count": 40,
737
+ "metadata": {},
738
+ "output_type": "execute_result"
739
+ }
740
+ ],
741
+ "source": [
742
+ "from transformers import BertConfig\n",
743
+ "\n",
744
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
745
+ "bert_config"
746
+ ]
747
+ },
748
+ {
749
+ "cell_type": "code",
750
+ "execution_count": 49,
751
+ "metadata": {},
752
+ "outputs": [],
753
+ "source": [
754
+ "from transformers import BertConfig, BertModel\n",
755
+ "new_bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
756
+ "new_bert_model = BertModel(new_bert_config)\n",
757
+ "\n",
758
+ "new_bert_model.save_pretrained('new-bert-model')\n",
759
+ "\n"
760
+ ]
761
+ },
762
+ {
763
+ "cell_type": "code",
764
+ "execution_count": 50,
765
+ "metadata": {},
766
+ "outputs": [],
767
+ "source": [
768
+ "load_new_bert_model = BertModel.from_pretrained('new-bert-model')\n"
769
+ ]
770
+ },
771
+ {
772
+ "cell_type": "code",
773
+ "execution_count": 57,
774
+ "metadata": {},
775
+ "outputs": [
776
+ {
777
+ "name": "stdout",
778
+ "output_type": "stream",
779
+ "text": [
780
+ "['let', \"'\", 's', 'try', 'to', 'token', '##ize']\n"
781
+ ]
782
+ },
783
+ {
784
+ "name": "stderr",
785
+ "output_type": "stream",
786
+ "text": [
787
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
788
+ ]
789
+ },
790
+ {
791
+ "name": "stdout",
792
+ "output_type": "stream",
793
+ "text": [
794
+ "['▁let', \"'\", 's', '▁try', '▁to', '▁to', 'ken', 'ize']\n",
795
+ "[2292, 1005, 1055, 3046, 2000, 19204, 4697]\n",
796
+ "[101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102]\n",
797
+ "{'input_ids': [101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
798
+ "[CLS] let's try to tokenize [SEP]\n"
799
+ ]
800
+ }
801
+ ],
802
+ "source": [
803
+ "from transformers import AutoTokenizer\n",
804
+ "\n",
805
+ "# split our input into tokens:\n",
806
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
807
+ "tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
808
+ "print(tokens)\n",
809
+ "\n",
810
+ "albert_tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')\n",
811
+ "albert_tokens = albert_tokenizer.tokenize(\"Let's try to tokenize\")\n",
812
+ "print(albert_tokens)\n",
813
+ "\n",
814
+ "# map tokens to respective ids:\n",
815
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
816
+ "print(input_ids)\n",
817
+ "\n",
818
+ "# add special tokens:\n",
819
+ "final_inputs = tokenizer.prepare_for_model(input_ids)\n",
820
+ "print(final_inputs['input_ids'])\n",
821
+ "print(final_inputs)\n",
822
+ "\n",
823
+ "print(tokenizer.decode(final_inputs['input_ids']))"
824
+ ]
825
+ },
826
+ {
827
+ "cell_type": "code",
828
+ "execution_count": 61,
829
+ "metadata": {},
830
+ "outputs": [
831
+ {
832
+ "name": "stderr",
833
+ "output_type": "stream",
834
+ "text": [
835
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
836
+ ]
837
+ },
838
+ {
839
+ "name": "stdout",
840
+ "output_type": "stream",
841
+ "text": [
842
+ "['lets', 'try', 'to', 'token', '##ize']\n",
843
+ "[11082, 3046, 2000, 19204, 4697]\n",
844
+ "{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
845
+ "[CLS] lets try to tokenize [SEP]\n",
846
+ "{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
847
+ "[101, 11082, 3046, 2000, 19204, 4697, 102]\n",
848
+ "[CLS] lets try to tokenize [SEP]\n"
849
+ ]
850
+ }
851
+ ],
852
+ "source": [
853
+ "from transformers import AutoTokenizer\n",
854
+ "\n",
855
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
856
+ "tokens = tokenizer.tokenize(\"lets try to tokenize\")\n",
857
+ "print(tokens)\n",
858
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
859
+ "print(input_ids)\n",
860
+ "final_inputs = tokenizer.prepare_for_model(input_ids)\n",
861
+ "print(final_inputs)\n",
862
+ "decoded_inputs = tokenizer.decode(final_inputs['input_ids'])\n",
863
+ "print(decoded_inputs)\n",
864
+ "\n",
865
+ "from transformers import AutoTokenizer\n",
866
+ "\n",
867
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
868
+ "inputs = tokenizer('lets try to tokenize')\n",
869
+ "print(inputs)\n",
870
+ "print(inputs['input_ids'])\n",
871
+ "print(tokenizer.decode(inputs['input_ids']))"
872
+ ]
873
+ },
874
+ {
875
+ "cell_type": "code",
876
+ "execution_count": 1,
877
+ "metadata": {},
878
+ "outputs": [
879
+ {
880
+ "name": "stderr",
881
+ "output_type": "stream",
882
+ "text": [
883
+ "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
884
+ ]
885
+ },
886
+ {
887
+ "ename": "AttributeError",
888
+ "evalue": "'list' object has no attribute 'size'",
889
+ "output_type": "error",
890
+ "traceback": [
891
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
892
+ "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
893
+ "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\practise basics.ipynb Cell 32\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m BertModel\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m Bert_model \u001b[39m=\u001b[39m BertModel(Bert_config)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m outputs \u001b[39m=\u001b[39m Bert_model(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49minputs)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39mprint\u001b[39m(outputs\u001b[39m.\u001b[39mlast_hidden_state\u001b[39m.\u001b[39mshape)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoModelForSequenceClassification\n",
894
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
895
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
896
+ "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\models\\bert\\modeling_bert.py:970\u001b[0m, in \u001b[0;36mBertModel.forward\u001b[1;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m 968\u001b[0m \u001b[39melif\u001b[39;00m input_ids \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwarn_if_padding_and_no_attention_mask(input_ids, attention_mask)\n\u001b[1;32m--> 970\u001b[0m input_shape \u001b[39m=\u001b[39m input_ids\u001b[39m.\u001b[39;49msize()\n\u001b[0;32m 971\u001b[0m \u001b[39melif\u001b[39;00m inputs_embeds \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 972\u001b[0m input_shape \u001b[39m=\u001b[39m inputs_embeds\u001b[39m.\u001b[39msize()[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n",
897
+ "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'size'"
898
+ ]
899
+ }
900
+ ],
901
+ "source": [
902
+ "from transformers import pipeline\n",
903
+ "from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, AutoConfig\n",
904
+ "\n",
905
+ "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
906
+ "tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
907
+ "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
908
+ "inputs = tokenizer.prepare_for_model(input_ids)\n",
909
+ "Bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
910
+ "from transformers import BertModel\n",
911
+ "Bert_model = BertModel(Bert_config)\n",
912
+ "outputs = Bert_model(**inputs)\n",
913
+ "print(outputs.last_hidden_state.shape)\n",
914
+ "from transformers import AutoModelForSequenceClassification\n",
915
+ "model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
916
+ "outputs = model(**inputs)\n",
917
+ "print(outputs.logits)\n",
918
+ "import torch\n",
919
+ "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
920
+ "print(predictions)\n",
921
+ "print(model.config.id2label)\n",
922
+ "\n",
923
+ "from transformers import AutoModel, AutoConfig, BertModel, BertConfig\n",
924
+ "\n",
925
+ "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
926
+ "bert_model = BertModel(bert_config)\n",
927
+ "bert_model.save_pretrained(bert_model)\n",
928
+ "initialize_model = BertModel.from_pretrained('bert_model')\n",
929
+ "outputs = initialize_model(**inputs)\n",
930
+ "print(outputs.last_hidden_state.shape)"
931
+ ]
932
+ },
933
+ {
934
+ "cell_type": "code",
935
+ "execution_count": null,
936
+ "metadata": {},
937
+ "outputs": [],
938
+ "source": []
939
+ }
940
+ ],
941
+ "metadata": {
942
+ "kernelspec": {
943
+ "display_name": "Python 3",
944
+ "language": "python",
945
+ "name": "python3"
946
+ },
947
+ "language_info": {
948
+ "codemirror_mode": {
949
+ "name": "ipython",
950
+ "version": 3
951
+ },
952
+ "file_extension": ".py",
953
+ "mimetype": "text/x-python",
954
+ "name": "python",
955
+ "nbconvert_exporter": "python",
956
+ "pygments_lexer": "ipython3",
957
+ "version": "3.11.6"
958
+ }
959
+ },
960
+ "nbformat": 4,
961
+ "nbformat_minor": 2
962
+ }
preprocess-sentence-pairs.ipynb ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "This is how to batch inputs together."
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "from transformers import AutoTokenizer\n",
17
+ "\n",
18
+ "checkpoint = 'bert-base-uncased'\n",
19
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
20
+ "sequences = [\n",
21
+ " 'I will not give up this time','I will try my best and see what happens'\n",
22
+ "]\n",
23
+ "batch = tokenizer(sequences, padding=True, truncation=True, return_tensors='pt')"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "The tokenizer accepts sentence pairs as well!"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 2,
36
+ "metadata": {},
37
+ "outputs": [
38
+ {
39
+ "data": {
40
+ "text/plain": [
41
+ "{'input_ids': [101, 2026, 2171, 2003, 14093, 999, 102, 1045, 2572, 1037, 3076, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}"
42
+ ]
43
+ },
44
+ "execution_count": 2,
45
+ "metadata": {},
46
+ "output_type": "execute_result"
47
+ }
48
+ ],
49
+ "source": [
50
+ "from transformers import AutoTokenizer\n",
51
+ "\n",
52
+ "checkpoint = \"bert-base-uncased\"\n",
53
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
54
+ "tokenizer('My name is Abdullah!', \"I am a student.\")"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "markdown",
59
+ "metadata": {},
60
+ "source": [
61
+ "If we have several pairs of sentences:"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 3,
67
+ "metadata": {},
68
+ "outputs": [
69
+ {
70
+ "data": {
71
+ "text/plain": [
72
+ "{'input_ids': tensor([[ 101, 2026, 2171, 2003, 14093, 999, 102, 1045, 2572, 1037,\n",
73
+ " 3076, 102, 0, 0, 0, 0, 0, 0, 0, 0,\n",
74
+ " 0],\n",
75
+ " [ 101, 1045, 2572, 6517, 1045, 2514, 2066, 1045, 2001, 6620,\n",
76
+ " 3993, 1012, 102, 2045, 2003, 2061, 2172, 2000, 4553, 1012,\n",
77
+ " 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
78
+ " [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
79
+ " [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
80
+ ]
81
+ },
82
+ "execution_count": 3,
83
+ "metadata": {},
84
+ "output_type": "execute_result"
85
+ }
86
+ ],
87
+ "source": [
88
+ "from transformers import AutoTokenizer\n",
89
+ "\n",
90
+ "checkpoint = 'bert-base-uncased'\n",
91
+ "\n",
92
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
93
+ "tokenizer(\n",
94
+ " ['My name is Abdullah!', 'I am sad I feel like I was prideful.'],\n",
95
+ " ['I am a student', 'There is so much to learn.'],\n",
96
+ " padding = True,\n",
97
+ " truncation = True,\n",
98
+ " return_tensors = 'pt'\n",
99
+ ")"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 4,
105
+ "metadata": {},
106
+ "outputs": [
107
+ {
108
+ "name": "stderr",
109
+ "output_type": "stream",
110
+ "text": [
111
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
112
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
113
+ ]
114
+ },
115
+ {
116
+ "name": "stdout",
117
+ "output_type": "stream",
118
+ "text": [
119
+ "tensor([[ 0.2318, -0.3874],\n",
120
+ " [-0.3586, -0.5364]], grad_fn=<AddmmBackward0>)\n",
121
+ "tensor([[0.6500, 0.3500],\n",
122
+ " [0.5443, 0.4557]], grad_fn=<SoftmaxBackward0>)\n"
123
+ ]
124
+ },
125
+ {
126
+ "data": {
127
+ "text/plain": [
128
+ "{0: 'LABEL_0', 1: 'LABEL_1'}"
129
+ ]
130
+ },
131
+ "execution_count": 4,
132
+ "metadata": {},
133
+ "output_type": "execute_result"
134
+ }
135
+ ],
136
+ "source": [
137
+ "from transformers import AutoModelForSequenceClassification\n",
138
+ "from transformers import AutoTokenizer\n",
139
+ "\n",
140
+ "checkpoint = 'bert-base-uncased'\n",
141
+ "\n",
142
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
143
+ "batch = tokenizer(\n",
144
+ " ['My name is Abdullah!', 'I work at Hackules.inc'],\n",
145
+ " ['I am a student', 'This movie is great'],\n",
146
+ " padding = True,\n",
147
+ " truncation = True,\n",
148
+ " return_tensors = 'pt'\n",
149
+ ")\n",
150
+ "\n",
151
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
152
+ "outputs = model(**batch)\n",
153
+ "print(outputs.logits)\n",
154
+ "\n",
155
+ "import torch\n",
156
+ "\n",
157
+ "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
158
+ "print(predictions)\n",
159
+ "\n",
160
+ "model.config.id2label\n",
161
+ "\n"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": []
170
+ }
171
+ ],
172
+ "metadata": {
173
+ "kernelspec": {
174
+ "display_name": "Python 3",
175
+ "language": "python",
176
+ "name": "python3"
177
+ },
178
+ "language_info": {
179
+ "codemirror_mode": {
180
+ "name": "ipython",
181
+ "version": 3
182
+ },
183
+ "file_extension": ".py",
184
+ "mimetype": "text/x-python",
185
+ "name": "python",
186
+ "nbconvert_exporter": "python",
187
+ "pygments_lexer": "ipython3",
188
+ "version": "3.11.6"
189
+ }
190
+ },
191
+ "nbformat": 4,
192
+ "nbformat_minor": 2
193
+ }
study_pipeline.ipynb ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "[{'label': 'POSITIVE', 'score': 0.9433633089065552},\n",
12
+ " {'label': 'NEGATIVE', 'score': 0.9994558691978455}]"
13
+ ]
14
+ },
15
+ "execution_count": 4,
16
+ "metadata": {},
17
+ "output_type": "execute_result"
18
+ }
19
+ ],
20
+ "source": [
21
+ "from transformers import pipeline\n",
22
+ "\n",
23
+ "sentiment_analyser = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')\n",
24
+ "sentiment_analyser(['I have been waiting for a HuggingFace course my whole life.',\n",
25
+ " 'I hate this so much!'])"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "markdown",
30
+ "metadata": {},
31
+ "source": [
32
+ "What happens under the hood?"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "metadata": {},
38
+ "source": [
39
+ "Tokenizer -> Model -> PostProcessing"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "markdown",
44
+ "metadata": {},
45
+ "source": [
46
+ "Tokenizer takes in the raw data in this case the text and converts it into numerical representation for the model.\n",
47
+ "It does so using the following steps:\n",
48
+ "- Take input\n",
49
+ "- Break input down into tokens depending on spaces or punctuation\n",
50
+ "- Provide the sequence of tokens with a start token and a stop token, the start token for the BERT model is CLS which stands for Classification Tasks and the stop token for said model is SEP which stands for Seperation Tasks.\n",
51
+ "- Convert all the tokens in the sequence into their numerical representation for the model to ingest."
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "markdown",
56
+ "metadata": {},
57
+ "source": [
58
+ "# Pytorch"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "metadata": {},
64
+ "source": [
65
+ "### Tokenizer"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 9,
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "from transformers import AutoTokenizer\n",
75
+ "\n",
76
+ "# initialize tokenizer and model from checkpoint name\n",
77
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
78
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
79
+ "\n",
80
+ "# use tokenizer to preprocess inputs:\n",
81
+ "raw_inputs = [\n",
82
+ " 'I have been waiting for a HuggingFace course my whole life.',\n",
83
+ " 'I hate this so much!'\n",
84
+ "]\n",
85
+ "inputs_pytorch = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 10,
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "name": "stdout",
95
+ "output_type": "stream",
96
+ "text": [
97
+ "{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 12172, 2607,\n",
98
+ " 2026, 2878, 2166, 1012, 102],\n",
99
+ " [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n",
100
+ " 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
101
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]])}\n"
102
+ ]
103
+ }
104
+ ],
105
+ "source": [
106
+ "print(inputs_pytorch)\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "metadata": {},
112
+ "source": [
113
+ "### Model"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 14,
119
+ "metadata": {},
120
+ "outputs": [
121
+ {
122
+ "name": "stdout",
123
+ "output_type": "stream",
124
+ "text": [
125
+ "torch.Size([2, 15, 768])\n"
126
+ ]
127
+ }
128
+ ],
129
+ "source": [
130
+ "from transformers import AutoModel\n",
131
+ "\n",
132
+ "# initialize model from checkpoint name\n",
133
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
134
+ "model = AutoModel.from_pretrained(checkpoint)\n",
135
+ "\n",
136
+ "# forward pass\n",
137
+ "outputs_pytorch = model(**inputs_pytorch)\n",
138
+ "\n",
139
+ "# print last hidden states of the first batch\n",
140
+ "print(outputs_pytorch.last_hidden_state.shape)\n"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "markdown",
145
+ "metadata": {},
146
+ "source": [
147
+ "However, the automodel api will only instantiate once the pre-training head is removed. It will output a high dimensional tensor that is a representation of the sentences passed but not directly useful for classification."
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "markdown",
152
+ "metadata": {},
153
+ "source": [
154
+ "Use Auto model for sequence classification api instead"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 18,
160
+ "metadata": {},
161
+ "outputs": [
162
+ {
163
+ "name": "stdout",
164
+ "output_type": "stream",
165
+ "text": [
166
+ "tensor([[-1.3782, 1.4346],\n",
167
+ " [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)\n"
168
+ ]
169
+ }
170
+ ],
171
+ "source": [
172
+ "from transformers import AutoModelForSequenceClassification\n",
173
+ "\n",
174
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
175
+ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
176
+ "\n",
177
+ "outputs_pytorch = model(**inputs_pytorch)\n",
178
+ "print(outputs_pytorch.logits)"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "markdown",
183
+ "metadata": {},
184
+ "source": [
185
+ "It is seen that the outputs are not probabilities yet as each of the outputs don't are from from being between 0 and 1.\n",
186
+ "This is because each model of the transformers library returns logits.\n",
187
+ "The logits are converted into probabilities in the third and last step of the pipeline, which is\n",
188
+ "### Postprocessing "
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": 19,
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "name": "stdout",
198
+ "output_type": "stream",
199
+ "text": [
200
+ "tensor([[5.6636e-02, 9.4336e-01],\n",
201
+ " [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)\n"
202
+ ]
203
+ },
204
+ {
205
+ "data": {
206
+ "text/plain": [
207
+ "{0: 'NEGATIVE', 1: 'POSITIVE'}"
208
+ ]
209
+ },
210
+ "execution_count": 19,
211
+ "metadata": {},
212
+ "output_type": "execute_result"
213
+ }
214
+ ],
215
+ "source": [
216
+ "import torch\n",
217
+ "\n",
218
+ "'''to convert logits into probabilities we apply the softmax layer'''\n",
219
+ "predictions = torch.nn.functional.softmax(outputs_pytorch.logits, dim=-1)\n",
220
+ "print(predictions)\n",
221
+ "\n",
222
+ "'''the last of these is to see which of these responses \n",
223
+ "are positive or negative. \n",
224
+ "this is given by id2label field of the model config'''\n",
225
+ "model.config.id2label"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "markdown",
230
+ "metadata": {},
231
+ "source": [
232
+ "# Tensorflow:"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "markdown",
237
+ "metadata": {},
238
+ "source": [
239
+ "### Tokenizer:"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": 21,
245
+ "metadata": {},
246
+ "outputs": [],
247
+ "source": [
248
+ "from transformers import AutoTokenizer\n",
249
+ "\n",
250
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
251
+ "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
252
+ "\n",
253
+ "raw_inputs = [\n",
254
+ " '''I've been waiting for a HuggingFace course my whole life.''',\n",
255
+ " 'I hate this so much!'\n",
256
+ "]\n",
257
+ "\n",
258
+ "inputs_tensorflow = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='tf')"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 22,
264
+ "metadata": {},
265
+ "outputs": [
266
+ {
267
+ "name": "stdout",
268
+ "output_type": "stream",
269
+ "text": [
270
+ "{'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=\n",
271
+ "array([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662,\n",
272
+ " 12172, 2607, 2026, 2878, 2166, 1012, 102],\n",
273
+ " [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0,\n",
274
+ " 0, 0, 0, 0, 0, 0, 0]])>, 'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=\n",
275
+ "array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
276
+ " [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])>}\n"
277
+ ]
278
+ }
279
+ ],
280
+ "source": [
281
+ "print(inputs_tensorflow)"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "markdown",
286
+ "metadata": {},
287
+ "source": [
288
+ "### Model:"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "markdown",
293
+ "metadata": {},
294
+ "source": [
295
+ "#### AutoModel API"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 23,
301
+ "metadata": {},
302
+ "outputs": [
303
+ {
304
+ "name": "stderr",
305
+ "output_type": "stream",
306
+ "text": [
307
+ "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.bias']\n",
308
+ "- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
309
+ "- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
310
+ "All the weights of TFDistilBertModel were initialized from the PyTorch model.\n",
311
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.\n"
312
+ ]
313
+ },
314
+ {
315
+ "data": {
316
+ "text/plain": [
317
+ "TensorShape([2, 16, 768])"
318
+ ]
319
+ },
320
+ "execution_count": 23,
321
+ "metadata": {},
322
+ "output_type": "execute_result"
323
+ }
324
+ ],
325
+ "source": [
326
+ "from transformers import TFAutoModel\n",
327
+ "\n",
328
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
329
+ "model =TFAutoModel.from_pretrained(checkpoint)\n",
330
+ "\n",
331
+ "outputs_tensorflow = model(inputs_tensorflow)\n",
332
+ "outputs_tensorflow.last_hidden_state.shape"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "markdown",
337
+ "metadata": {},
338
+ "source": [
339
+ "#### AutoModel for Sequence Classification Class:"
340
+ ]
341
+ },
342
+ {
343
+ "cell_type": "code",
344
+ "execution_count": 24,
345
+ "metadata": {},
346
+ "outputs": [
347
+ {
348
+ "name": "stderr",
349
+ "output_type": "stream",
350
+ "text": [
351
+ "All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.\n",
352
+ "\n",
353
+ "All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.\n",
354
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.\n"
355
+ ]
356
+ },
357
+ {
358
+ "data": {
359
+ "text/plain": [
360
+ "<tf.Tensor: shape=(2, 2), dtype=float32, numpy=\n",
361
+ "array([[-1.5606958, 1.6122808],\n",
362
+ " [ 4.169232 , -3.3464477]], dtype=float32)>"
363
+ ]
364
+ },
365
+ "execution_count": 24,
366
+ "metadata": {},
367
+ "output_type": "execute_result"
368
+ }
369
+ ],
370
+ "source": [
371
+ "from transformers import TFAutoModelForSequenceClassification\n",
372
+ "\n",
373
+ "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
374
+ "model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
375
+ "\n",
376
+ "outputs_tensorflow = model(inputs_tensorflow)\n",
377
+ "outputs_tensorflow.logits"
378
+ ]
379
+ },
380
+ {
381
+ "cell_type": "markdown",
382
+ "metadata": {},
383
+ "source": [
384
+ "### Postprocessing"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": 25,
390
+ "metadata": {},
391
+ "outputs": [
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "tf.Tensor(\n",
397
+ "[[4.0195428e-02 9.5980465e-01]\n",
398
+ " [9.9945587e-01 5.4418371e-04]], shape=(2, 2), dtype=float32)\n"
399
+ ]
400
+ }
401
+ ],
402
+ "source": [
403
+ "import tensorflow as tf\n",
404
+ "\n",
405
+ "predictions = tf.math.softmax(outputs_tensorflow.logits, axis=-1)\n",
406
+ "print(predictions)"
407
+ ]
408
+ },
409
+ {
410
+ "cell_type": "code",
411
+ "execution_count": 26,
412
+ "metadata": {},
413
+ "outputs": [
414
+ {
415
+ "data": {
416
+ "text/plain": [
417
+ "{0: 'NEGATIVE', 1: 'POSITIVE'}"
418
+ ]
419
+ },
420
+ "execution_count": 26,
421
+ "metadata": {},
422
+ "output_type": "execute_result"
423
+ }
424
+ ],
425
+ "source": [
426
+ "model.config.id2label"
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "code",
431
+ "execution_count": null,
432
+ "metadata": {},
433
+ "outputs": [],
434
+ "source": []
435
+ }
436
+ ],
437
+ "metadata": {
438
+ "kernelspec": {
439
+ "display_name": "Python 3",
440
+ "language": "python",
441
+ "name": "python3"
442
+ },
443
+ "language_info": {
444
+ "codemirror_mode": {
445
+ "name": "ipython",
446
+ "version": 3
447
+ },
448
+ "file_extension": ".py",
449
+ "mimetype": "text/x-python",
450
+ "name": "python",
451
+ "nbconvert_exporter": "python",
452
+ "pygments_lexer": "ipython3",
453
+ "version": "3.11.6"
454
+ }
455
+ },
456
+ "nbformat": 4,
457
+ "nbformat_minor": 2
458
+ }
study_transformers.ipynb ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Transformer Architecture:\n",
8
+ "Encoders, Decoders, Encoder-Decoder "
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "metadata": {},
14
+ "source": [
15
+ "# Paper: Attention is All You Need, Vaswani"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "markdown",
20
+ "metadata": {},
21
+ "source": [
22
+ "The transformer architecture has two parts, the encoder and the decoder."
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "markdown",
27
+ "metadata": {},
28
+ "source": [
29
+ "# Encoder:\n",
30
+ "The encoder accepts inputs that represent text. It converts these words and texts into numerical representations. \n",
31
+ "These numerical representations can also be called Embeddings or Features.\n",
32
+ "The Encoder uses the self-attention mechanism as it's main component. It is bi-directional.\n",
33
+ "\n",
34
+ "An example of the encoders only architecture is BERT which is the most popular model of it's kind.\n",
35
+ "\n"
36
+ ]
37
+ },
38
+ {
39
+ "attachments": {
40
+ "image.png": {
41
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAABhaVRYdFNuaXBNZXRhZGF0YQAAAAAAeyJjbGlwUG9pbnRzIjpbeyJ4IjowLCJ5IjowfSx7IngiOjQyNCwieSI6MH0seyJ4Ijo0MjQsInkiOjI4MH0seyJ4IjowLCJ5IjoyODB9XX2kMJZ8AACQaUlEQVR4Xu19B2Adx3nm9zp67wBJsDeJRV2ybEm2XGW5O7bPsZPcJXHJJXHiu/TmXJxyKZfmuMtyt2zZlnuTZXWKReydANF77+W1+7+ZnfcWjw8gSIIEQMxH/tjdqf/M7v7f/LOz+zxxASwsLCwsLJYYvM7WwsLCwsJiScESlIWFhYXFkoQlKAsLCwuLJQlLUBYWFhYWSxKWoCwsLCwsliQsQVlYWFhYLElYgrKwsLCwWJKwBGVhYWFhsSRhCcrCwsLCYknCEpSFhYWFxZKEJSgLCwsLiyUJS1AWFhYWFksSlqAsLCwsLJYkLEFZWFhYWCxJWIKyWBKwv/piYWGRCktQ1wEikQhGRkYQi8WcEGBsbExJKtxEwP1UYnCXcTk4duwYuru71f6lkI7H48H3vvc9/OhHP3JCNFjG4OAgenp60Nvbe0GZ6epwh5n9dOnmC/bJ5eRPp8d8MTk5iYMHD6Y9h25cSbssLJY6LEFdBzh8+DDe+MY3or6+3gkBPvGJT+Bb3/qWc5QEiWAuw8n4ywUN+T/+4z8qfYhLNZ7f/e530d/f7xxpNDY24i1veQte//rX47WvfS1e85rX4IknnnBiL66viTfb+erkTuf16tvkUtvDOkkyBw4cmFe//vSnP0VXV5faJ8l/9KMfRUdHhzqeDfMp18JiucIS1HWA06dPK4P27LPPOiFQhrG0tNQ5mgm30abx5Si9ublZGWCGtbe3K2+FcHtUJMChoSHnKAnmZ3rm/X//7//hnnvuUeEsm56PKdsgHA7j/Pnzakswjl4gyWn79u0qzIBtKywsxOc//3l87Wtfw7333ot/+7d/w+joqIo3ZdHjMDDto66tra1q38DEkQjodRIsi22jDgZM19LSgoGBAXVs+mZqakql5TaVsJiWedz44he/iM7OTudIw/SJG9SFaY0Oq1evxsc//nGsX79eHRNNTU3o6+tzjmaC7TTnzMLieoH9yffrAH/2Z3+GaDSqDPx//Md/YGJiAr/8y7+M//t//y9qa2uVQX/ve9+Lqqoq7N27F0eOHMFv/uZv4ic/+YnydnJycvDMM89gy5Yt2Lp1K06ePIn9+/ercu+66y6cPXtWEQSNJ0f0f/AHf4AbbrhBeWg8/sUvfqHSveQlL8GePXvwO7/zO0of6kJDTAJ71atehbe//e2KOD/zmc8gFAohLy9Pbf/n//yfyvB+6EMfwmc/+1kUFxcniOSf//mfVf6/+Iu/UMcsn2Ff/epXcejQIXzzm99UZME2/+Vf/qVqI8sg6VBftouk9ru/+7uqf77yla9genpakTm9Per3/e9/X6Wlzn/zN3+D7OxsFcf0w8PDeM973oOXvexlqqzvfOc7Kh0Jkf1TXV2tvFW/36+mIp966inVTvb/pz71KfzLv/wLduzYgbe+9a1417vehU9/+tPqHLDP2f5Nmzbhla98Jf78z/9c9SN1/fCHP6ymSnlr/tIv/ZIi03/9139VOpKk3v3udyuPmQTMvszPz1ee5osvvoiPfOQjuPXWWxP9Z2GxnGE9qOsA9CA4BUaDdurUKWWsaPhXrVqFuro6RTaZmZkqLY2smTZ68skn1eieBpUGnIRDQ0sjt3btWkUmHNn/53/+J9785jfjH/7hH3D//fcnpg4Nwf3xH/8xPvjBD6qpLJZHz4meVFtbmzK8NPYvf/nLFRn87d/+rSqLhrukpEQRRW5uLs6cOYOCggJluN3GlWTKY7aL+v7TP/0THnzwQdUGehy/8Ru/oYw3PY6f//znyqPisyyWReIjsZFU6LXQy/jCF76AmpoafOxjH1Mk9thjjylC+Pd//3dkZGTg+eefV1OIJIK/+7u/U6S3efNmNDQ04JFHHlHtZNsqKyuV7iTPH/7whyovCfYd73iHCqfOJHsOEP7qr/5KEdSXv/xlRWA8/vu//3ucOHFCkWt5eTnKysrUOSTpcaDA6T62haTEMJ4PTvlxYEECZr087zwH1I/ESp1IUpacLK4XWIJa5uDomkZu27ZtyiDSiHNajKNqGvvjx48rj4T7xNGjR9WonaDRJTnRONJToFf0wAMPqDiWWVFRoZ4LkSTordCLooFlWsZzSokEcdNNNykDzbqoB4mJenzgAx9QdZMsuH344YdVPL0RGlF6IjSu3Gf569atQzAYVPUTrIOER8/kj/7ojxRh0JuhN0hyov6sh6Szb98+5UWRuEiyfF4VCAQUcXGakl4T9bvzzjvVMy16WiRals2FGWwbCYPTdCR2eo1f+tKXVB+QQEhOJLnHH39chbNPSBIkXQ4GSJrGi+LUKttEkmNetpl49NFH8b73vU8RJPuLg4aNGzeqOOp98803Y82aNYqUqAfPBwcUHCS8853vVH2zc+dO1Z7x8XHVHh6zTQTDeS7tpIjF9QJLUMsc9Dxo7LKyspRRptGiV0OyImh0+VyHxpojcho7xtHw0xByhE/Q4NI40tjSmNMY79q1C+fOnVNpSCYkAJIRSYIkxPI4sqcxpnGkh3LjjTcqT8rn8ylD7wZJ6JZbblF1EHyWY4w3y6OeZkECwWdhbNef/MmfKO+Ehp/TYayP05Csm/WSyNh2TnuxvTTkJEWWxWdCrI/PsThtxvrYFwQJjmTBdrBtb3rTm9TzM+pBMqQnRg+Q/URdTVuZlnqQlOihkoToBZoy6QERjLvtttuUHjxPrMfEkch4Llg/y+czRBPHdpOA2O88L9ya8klcTM9+4UCEBEdiZNkskyRvYXG9wBLUMgcN9YYNGxQh0MDRc3rhhRcUURA0downmJbGjUTEfRq5oqIiFUciomGmATYP8Pl8hQTAZyjvf//7lfzWb/2WGrXT4DIviYBgPSQxkhJ1oVE2RGTAKTXWSdALYxn0oGjwOV3FBQHu0T/Jhs+D6P3x+RaNMD1AgnXR+HPKi14cdSNRkIS4NZ4YF1YwL+uljoa4CZZ33333KU/PtI8eE58v0Suhx0eCoCdEMIx1mfpImBwM0EszYDsM6XPf1EfCYd+bqdbnnntOEQt1dZM9Qe+NnhDPJcG28rwQfPbGgQP7hVO5htRIxNSb5Zm0FhbLHZagljk4YqdhJykQNNr0ZIz3Qm+BzynoDXBajEaV3gRH9yQgel8ER/Nm6o8jcxIPCeZ1r3uderbz0EMPqak0PrMhSC5m9E6Q4Fg2H/yTEFgvn9XQoHL6jKD3ZcrgAgoaXupO74HG1r0SjyCJGq+OhMOFGFwUQXDxAKfduOiBz2Q49UaQXKkLp9P4zIbTkL/+67+uFmGQJEgmhgTpdTEvy+TiBXp+TPenf/qn+PrXv64Iim2i0Wfaz33uc/jGN76hFkVwQQNBYqBXRkJmv9PDoVdEMIx9zwEDPRv21Sc/+UmlG/WmLiROhpOwqQfLIOkZknvFK16RmG7kszbWx2lCergkfFMX+4qkxud6FhbXC3x/xSe2FssWNOC7d+9OjMz5bIUrwTiypuGj8aTRpPGikSV5mOdCnG4jWdFLIlEwD4mAZXElGPOQxOhZkUDoAbAuYwj5zISExBE7SYTkSGPO/LfffrsiC04XMh3DOfLndBzLefWrX62mycz0FfWhB8GyDbjPcoyXRm+Lhp7h1JV5SdA08vTquKXh5wo6rsCjjjTmXDxAUuIzHdZhpvjYLhIQy2A76CkyrVlcwr6gt8Q0rJtxJD96NmwTPTsSBOsm0bMfqO8mIW6SE70nEgnbt13Oww7xaukt8RzxOdgdd9yhvFCWs1baQg+PpMWBAvuSOrHfWDcHDayfZMs62R7qa4iPbWceeoAWFtcL7DLz6wQ8je6pndTj+cDkcecleaWWS7jDLrWehYRbVz4n4so9Lr0mCRBXUzfTF0RqPe5+MvtuXblPEnPDnYcwfU/hvjs9jwlTfmpZFhbXA+xVfZ3AGDWD1OP5wOQxW2NQ3UJDSHGHLSbc9XORwC0336I8nmuhm6kjXT3ucLPv7rd0hGLiCEM6PDbnwcB9XtLFUSwsrgdYD8pi2cBcqm6D7AafYXH60Cypv14w1y3KODfZub0uC4vlDktQFssGsxLUlVzBV2rHF/LuSacLy5/1FpUMzGPyxZx0lpyWDuypuCJYgrJYNnATlLLb6t+lccTF7MVi2hPxe5y9JHQLZ0N8Rp65084PM/szWTb3jFx5LdcvTP/oPuJfj/McJd3ZtbgYLEFZLBu4CYrQhiAO8znbi5mApNGYG4ttSEw70ukxU/uLt+VyoIlfDwKog1suF6Y8Yq5yki0ytV8MC9sHV9pOA62VHkDo8hai1JUHS1AWywa8VJXZcghK/TV2jB9GjzJgGWIp3YE0B+xf9ilfrSP7G/2uRE99ypYHrvR8sK2Ume+pJ4pdTl2x2LAEZbFsoAmKq9a8cuE6gT1T6N9/FIM9Q4gKQcWd5zDxhBXQ1sKkVyP5OZ7RMN21NiDxeAwxti3lVky0UZBsT9IXSUZzzxxxezktcOXzeuH1BRCVvqROXukvxrj1uVTEJbP+Z2rRZbqLvDzNTY7U0i4PqrR5FzOLtio4Bk/Ah8J1FSi/eSvAD6hIOHvA+FQWF4clKItlA16qHND79J0OnOnFsZ88jXJfDkpziuDx6a9izLz/eeAOSD1Og8W4I1JJM52KSi/5Y+LUsXOg2MPIRdqXFiafCE2CV9wnMr4MBpyKLhMzlE1uE/oSrNPZJS5Q3x1pYPIw8QUZrhHS6GVGEp4YxqMTaBzsxHR+ALsevAcoCcn1K4S/aPouP1iCslh+IEt1TOPUN36CsoJSZGXlwCejfa9cyTEx9FGv+BhiV6OxqCI1elyEMmX0vnjAq547s1z9Kq3evepwr0r0Od4h9aatM8/X/BLmE4lIWMTn9mikrYk2JHauAFoX7eto3YyJSGo5HzCPKUtpq/YJD810PKq2PPJIQ41XEZNzF5Vd7WnM3p5EiUnXckGQKPeioG5J/Uxr/aKPNyoDKTk5Ya9uX0dvD/pyI7j57fcDGfqTZBbzgyUoi2UBYwCUTRO+6XvsIPzNo8gvq8CU3PPTYgzCYsGHZeQ6EJnCZHQKXhIV88olHotrU+9T01UzL3n3kTFOaisR3M5MfZXgVOQTZg2Jj5jvC4qEEBBjRyIiKY1JG/vCExhBRNKLIVSKuQlqISD9I7o4PadCrqx4TT7ab/DALwScKd5ZiWxzI9I2GVhEpMLxgAdD0q7h8BTCkjpJUBRzVpK6pJ7DhYKuaa6y3VcEtxRqI1s5EVkIoDiQiTw5h8FwDP6oX1grgP2tx7D+VbegaPsaVYTOZXExWIKyWBZI3NBcCDEFnPj8D7Epd5X6rt64EFN3KIaG6BiGMn3wFOXCF/LDJ0ZdWVuBMXjamZr9klepJfpaGw/TPhKUf1rMeccgisbjKPdmKO+p0xfBec8EYsXZ8OZlwhvQnhY9Qt7BC3YbS3/xeRh1obBUVbI5mC+0eycbQ1D8K8cyTvCOTKKoewxrw3KOJGw8EEejZwo9OX6ECvPh5QeI5TxpD2smWKry6liYHEUX3HyxvPmXafo9KgMgXlr+aQnrGcbqST/WxWSAMcUGe1A/2I54RQ42PXCnnGSOs+xU33xgCcpiWcBxnMSVEBkBTn3pB9hUuk7d/CNi4I76x9BcLKPX7Rvgzc0UHotIepoBY27kr9gDzVezX/LKiC7QHeEu5mKmiIsIjDEPRX0oGI1i5Nh5ZPSMIs8fRGMgDN/u9YgLQU17o2oKk4bavGWjRvALAF2KQyYi7MHLL1m3h/9YhvahvMiKeJB5qgPBs+3ICAXQPj2Ckcp8ZO6Q9mXon0m58DzpHmSIIicRahZTV8b8wLy6lCRMDSZct/biZWqzKe1yPHND6pneIDLk3E3vPYv1Q0BFPChecBRdI/0YCkWx9V2vEBZLr4vFhbAEZbEskCAoLicfjuHkl7+PzRUbMS13eV8gikO+EWTcvgU5JfnwiUGIxSJiAPS6c206CDGQyqjPvOQvx1CoPFIMt6o02VmYG0kKisURktbGRyfR/OIJ+GUUnre+BpkbqjAZkw5gGxRBqT/KOC4cpLeE9HVrpKf4UVrV2EuEYRGXeEXfmGzpERZPedH64kmMToxjVLzDdTfdgFhOUOKlftUuSTSjWY4SstHBJpJnV/Y1o83MInByzQhPF5YEy0rGzJaWq0UZl9r31D3TI55h6zCGD5zDZm8OisWr6hztRV9wGje++9X6YWKiZIu5oIdfFhbLDMo4iCGZ9sYx4okgLMYtmh/ClHhOnngEATF1PhndUvzxqAi3cSV8dsMLn1saSyO0G/MVphfHbcY2NX6+YtJz6xX94p4YRr1hjBb40Z3nUVN7UwXSNs80PJLIL+32Sw/4nPawbT629wpF95ERXa7/gvDLFy9FBg1RTxRjGcBoSRbqvOMYLs7CdK6Qk3iG6qRIvBqScD4wIVxRyHB6xlFdlirXaX8sKued4VG1b4RhlIuF+YSI1TY1rSPqWAY+iTjRj3SrzoVIUC6oAJ95io6Tcj16SnIxWZCJUerOa03OlzK20jyNxI7FHLAEZbFswYEuB+okKr9ss2XkGlTUJV4FR+teoSmvH3GvT4lYdxUepdDMyTbiEhU+i1yQzslvylNmk1vnmPF8ymBEm+Y0YvI7W65C5NicrfBJPPNFRG+PtI1PbBhG94nmjaJH8EzN48v/p0szZSn61sK5xMsRlV/rRbCOmLSYq/fYKrbSz5MW1VOxXp5MRwftEym/yCW6BLPlyTetZj0mj65zpphw3asz0yX/EfI3jefnzs9pWHe4kaioYc4F0wTknHmdn0SJ0ruTPvHGzQo+U4bFxcCryMJi2YImK0LjJvc7Z6bUbR+jodBG0hCEMQjKoMhuTIRbpqQZ4XYuYRrmSaR18iYk5ZjVqa1gRpiIo0oiDUWbbCedQNUn9izs82I64EdUtu6ss8OUcKlw5VOKOMolar1UcUMKUgTEVsoZE5KKi6cUC3jFK4xiSjzeCMlLvA1NQMyhiShBSpKf4joDjpi/DmYczESqVgYXhM8sXsfLfiKdOzwNVNeJrhHRlYsnNMFxUYtch05ei/nBEpTFsgRveee2V7NCCsqAmQO9r42ISc2/EiZRaiaJ4oo1oqktKSZM5XOOWbz8V6LyOWWlxhtzqsA0PDABiQjmcR0ITJnUk1N/Kp8JdyQ9Zo+ZG5JvRtaZ+iwMtLfBLb0MvvPllS2XmivPMKGAomcRk5r9wClNSS8paei1J6O1NJqac+CGCnMSsERCHTth3Mw4Rylwl+nKlghPrU9BJaLuOkcijfOcTMOUZLYW6cCrwsJi+cExEEloYzAXvOKFhAJB+PkpHzl25/f7/AiIGGOWAOsRYZ7MYEi8NBpXCeZ0jhTAMgPBIAJ+nZdfuUiUwQrcskBIVxT1YtvMZ4nc8Pt8s8YpSFuof8AfSKjqTuf3c4oxbc75Q3fXjLLZT+5SzXGqkKQDQmYZ0gb2r9toUS++asC2zWbMeO74s/h6gYw2ekrkmD/rT5mzfc75VJzoTuaEm2ATlUziJDBwSDWJlHiLCzDbObWwWNpIvdfnAJOSfDrbO/DCc89jfGhYvTCqjBoNk4zg6+vq0NzYiGg0qt6tcY+qmXegrx/PP/cchgYG1So39bxB/o+PjWP/vn04+OJBRKanVXrGK6FBdMlCgyVyWpMyMTaG/S+8gLbmlhkETO/k9MmT2LtnD0YHh+CTOKML/3KxBSJRHD7wIs6cOqXSK5E49g3bd/LkKYxJ+VcCpYv014ztrKIpRHpNeU4hIdimunoc2rcf0alp52VrrR/P6Z5nn5Vz15QIN+A+z11TQyMOy/mZGp9QdSvCc87p3uf34PiRo4hJHyTr13IBJF8qp6TmccsFYOUWlwRLUBbLHHPf9DQUNMod7e34ype/jK62Nnzlc5/HxMioMlQ+r08MVR/+4//9Kx579FsqvSqRVl8MHo39kMR/+eEvoLujA5976CEM9A8oI++TUfnevXvR1taOF4Wkvv/Yd5ShI3mlLj9WkCI5iE6MxCkOLtV0MSv116Y8jke/+jV0trTim197BPWnzya8jk5p98ljxzHY04tPfey/MCwES8Nu8mcFQviB6H1K0jz+459gz/PPJ+Jo8El6f/vX/0eVsxAwNtrdfPe+2pM01JD/MsSrO7x3P37yvR/g7PGTco6+mSDRMTmHzz71FMKTU/jGV7+KM0Kkqm0i3Abl/NSdOSPn5TE01dfjm19/BLFwmDWA3xk8dvgQRgYH8aMffB8/+eEPVZsZZ8pIDDTSCNNQ1IlziSFfi4WBJSiL6x4Z/iCefPJJVFVU4B1vezsmRkeVp0FDEhdDVVNdg7e8+c0yqvap6TASmgGnxkg+WaGQysuY02dOK5IJRyO48yV34XWvey3uu/de1J09J3YvkrBXqZgtzC2XChrVnvZOtDY1481veCNu2LoN+4U0vWLAEY1hVVU13v7Wt+EV992HQSHWCfGIOD2pCEhkdGQY586cxQOvfS3uvP0OPCfeSET6REHyv/ZVr8a2zVuUl+k2vNyfr7jhbqPZ59YdrjNJfRIY9Prx3FNP45Zdu/Gm178B506fw/DgsOjvRX5WLt7+lrfi9a98Daorq1Q7kvXFkeEN4tmnn8L2LVvwlje+AV0ywOju6pQo/Z7cPS99Kd70ugexfu1aNDc1SF62MamJIpvZhPGOzETq00SLK4ElKItljtlNBWGMxcjgEEoKisTgeZEdyhRj3S/21zw218aXYkbRJmNApK+7FwU5OeA3DnKzczA8PKziaOiDMsLPycjEgX37UV1dDb8cE6o8luUSlp0QSUEhlLclu8q7UiHpwbhEvLNDMh0Xws3NylI3c2V5uSIrD9/rkTQh8SJaGhvxd3/zUdTW1KC8pESFMy09JE77BaSMPGlXcWEhJiYmpF1cbadUUnF+eguSxxhmA5NmLpkPZkurpiJFl7GhYVQUlwoh5SivcGxoRPSSgYToQwKbmBhHY109Nq5fn9CRwjaOCQHn5+YgMxCAXwh3sLdXOpzvUMVlQOLFtx77Bn72ox/hXiEr9c6Tk59555J0+mpIARYLBva1hcUyhjFHs4OxMTFKkXAEXjmY4rs3YrCC4hXxt49IEGrs7BTjF+OXGQhJiP4aXEDShSNRRVAsJyheFRdG+Hx+ZEjc/r37cPToUbxaPCmveGAZwZCaOtQfadEP4vkVBXokmZI3JPniUh4f7pPguEghUfksuIC8eCzC52X+UBDjU5Ny7MHE1BRy8vNU20y9tavX4Dd/8zfRK6R87uxZ1TZ6i2x3Vk42JiYnMR0O634QYbv8ohuh6iRRcCNCj1Lp7MBHr9M55j7rJBgWkn5gHiOzY/ZYkhTbOcH2yTG/bJGVmSl6BPSil1AGHnroYdRIG7eI9+j3BdRiCrU6UOX3Ymp6Wp0PGjsOMDJlQOGVY/b/3S+5Gw8++CCeefoZSe9BppRH70wtCZccoUCmlMFFIlxoIvkS+xlKN4J9Zj55ZLGwsARlcR1AmSK9mwZhMR633norTpw8jmf27sXIxBhq16/Fpz79KdTX12FwYADtHW3o7O5Gb1+veEzdePxnP8Xk+DgmwtO4cdcOtLa34bkXD6JXRuBbt27FV7/yFRw5cghHjxzBZz7zabzs3nswHQljdGwEP5O8I8NDQmZRMdRePPTZz+LUyRNivAN4+HOfw/Fjx8SwBvH5hx9W4WrhAilSLPDsrdBQRtoRgm/ZVFRVSUYv9u7fj2effx67du/G0888g8ceewxHDh3CvhdekLZMYGRkVK1WfPKJJ9SiAtaVn1+AnNxcPC/5nnr6aWzfvh3Hjh3Fww9/Xhn2ltZW9PX1obOzA2E5fmHPHpw+fUrpTDLaL3V+7WtfVft7JO6RR76uiOmZZ57GF7/0xcQngeYChwemPW7Q8JM0t27bhj1y3n7x9FPIFk+WC1k++clPqPZ86StfxsnTp3HHnXdhdHQMjY0NePqpp9RgZBpRac8NOHz4iBDQ04jJAKG8rAyf/MQncb7+PH7645+ip6dXTWmOjIygo7MTT/z8CWcg48X05BT+7d/+TS3EmJqYxH/918fQ3dWj+vI/P/af0iedjqaEaSW3F2uxxXxhCcpi2cP8lIaGGER1Vet1eDT80+Ep3HTLTbjlztvx/L4X8JoHH0BeYT5yc3OQJ9LU1IjBoUHkFeTi8NHDMmKP4/vf/y76hawi0TDWblyPl953D54Uo3//q16F6poalTczMwNNrc2o3bAOLe2teObZp5Xx/MlPfoxTp04qO0Xd8qWuQEZQfWAhpzAPniC/aBFHTl6Oo7vWk94d9y8Gdwo+agrKqP9d7/5lHDpyFJu2bMXNt92OYEYWMrOykZdfKCTcgCfFQ3jdAw8oL+O55/fgyaeekdwehMVov/uX34Ou3j7k5OTida97ABniYRQVFaop0Gefew4lZaVCSmfU1Ob58+fx+OOPK08yJpXTa8oVgmO7s7OzpYxs0S8uBByS4xzpv4giGoIeB38CxXge/MsY057E4gSmc7aT01N47etfj9LKCpw9X483vf1tyJQ6MqXOwdER9A0OYo0MNp569hkclz4fHR3Fd77zHfG4JpRneNfdd2PT5i04dOw43vj2t6t8eQUFiugCGRn4uZDZwPAI3vWe96qvQTzy6KPoHxyQtuvfEissLBCdtZlkO+m18ZjEHvDTW6SeMsBQFx1b5BaLK4X9WKzFsgDNuDIT/FjsUBSnv/wDbKrcgCEf0OyfQnNhHEV3beOMHQIR+hXa8LlNhfo5eDmg4TOXPX0XZTCdRNxvE6/h+eeeF4P+OjHWGZKIUz7Mo9N4/Zwe48+062c1iRVdgiEx4j/44Q/xyvvvR1FxcSJcPWfiVgw766AoIy9lcEqNK8M4wjd6RT2STuo5sv84xgYmsHP3diFV/m444yW/SpWEqYex9Aj4vhBXIPI5jVFcLZ8XoQHfuWsX1q1bx0g2OlGv2jrHicUiTj6PHLNtlVWV2C1eGtNERecAv3QhdRpDznDVpwK2iW1TZTJOiJnaZ3pC6Gxqx5Ezp5BZUoBdu26AT323jmVoj5LPicyKPb7HNDUdgc95JysifecGwzhld+TwIbS1teH++1+hwlgaPVPqxPOmW6lhnkF6SDqS9oyQ8FHxHt/0xjeqc2La4ZN4ttOA543lMJ+7PIKfNeI0YP60By3PHsLaUT/WhEPoGx7AYDCMLe+6T0YUTOnOmXo2LQycK9DCYumDBsMNdSz/eXtT+BxAGVIxEqlQOV2Gklum4/MNmhvmM1JQWITXyag9SzwCj3gIKg9H/mKQlFEyXg/rccqj8aeEMjPxpje9CQVFRTPKJJRBdOpN7rM88TlInilgKyjGyLphDrmlNhEpw+hAIqG+4gOAawr5C7UUtp/tfcUr70ft2lpml1j9ThfLUcL2SBrVNwx3xVHnu196N3bu3KnCCb7ES3JypydR8ViRMfvcgOQkh8nhA0ES0WlYbyJUDhiqiFt22A5PwKfaySlbcy6MsG0R8XrWrt+Ae+67T+rxKaKQAvR5lzQsg8IfRKTw13vVL/g6hEPP+AE573x+R0+J5bJvFBlKOnWtsI2S1hClagn1NkKd5S/Plxw6LdNInGPZJHvVYi5YgrJYfhAjwTtf3eLcKoOsbRGXDyvDqFPOgDEJc5kFGtiMzAwhmgwx9vpbalExqcrYSxw9HhKUMcYUQ0IUTnlxAQXDVXkuoUEjzL5JQ5h9juRVOhpO2aqfeBdrF1UMlUxPqBC2X4RGXP1UesqxEZKYWjwuZXJ6iyaZXxWncLqR6SnMmwpVq+Sj0c7Ly0u0g6De7mNC9QnPgYQb0aWwv3huWJ/0pwRr/Wj8mcYhJSW6reqv1KuJU+JJHDzfTlyqZGZlqoUTetpN102yJhHpvkz2ieoXEZZL7siQvJzejMqB5hLJL/nMFB6Fi2D4/huJncL/7vpV/8tW9T+3TngqJNpiHrAEZbEsMGOVlBhAYxDEauhFBhzkinCrRq8iZkl3Yl+SJ8QJTy9ilJSllnL5lbi4H76YD/449yWM8azakRnlOsIl3Nx6aMBJPizX+Wf2+SCeYsL5j1DlSj2syyd1UVd+B1undOIT6bS42+oWA9UckYSh5VaVzTbpdmmROlT7kv9YLyXOaS7VvxI6h7jTKAtNUf1AHflNPSmPYfS8hAm07tLWmK5ftYPqOm1Qx5KO+eYSlqn1ly3JSf0TT0fOn/pmhicg54Vfhddfhtdfh3c8LUlpthSjP+PdbTL9o/RMpGOc1tPoTobj1ky9EhJlcYngPWRhsSzBG56/6k7zM8UVdOI9Rfw+xPxexCQiLsJt1NlyJG2Ex+lFbIsvhog3KqNtKc87JTLuyITItAjj6AFomZ6HRIQ5o2K9IuKxULgfE8Up+ljqlO2UpJuQ9GEhEDHJCMowP1cMdzwcU182j4iO9KbcEmG5aSTsiNEhTGE9on/UEd2+SQmnTImwzdRR6+OWqI/lMs+liOlL5pXzJH0/QY9FTE9schqZXiELZbk9Urf52RHt3Sh9RQ+1ld4Ii08Slpx6Ox/hSr4p6dMJaf+EHE/KcVjKi8jfmOxLv8wmUidlytmmPZZ0Wh8t06LrpHP+6EEG5Zzxt8IMRQmVmR3JaelqPrAEZbEMoW9voRr4IxFkc0ppbAL9XQMyYiVdcXSb/Cdj5Hn8YyqdUuyLGl1neDOQ5c+SbQgBFaNf8OTCA/OyqwLtzsWEOsf5vTcaL72VHSVqlK08RAkTA03jHfIGMdHRj4KxGFZHMzDZ1ifRMp738R0fqZsj9LQiRbgkMW8nEudW0tDT8EUCyIjnIseXj0xfhv42odRv2kgKUfo4Qn0Zfumiy6Owbracv9GFyShinYNYFchBcHAcY139Uo30vdKD58B4J86+aMG/3HLPeJ9atBdkxMetyselIizD3T8xOWYJ+vx5OOXIvhfRU7cz230xYfmOxlKOKhEhOUfDrV3IHosiR0Y86huBIpwiVRWrvtUtspgbdhWfxbIAnwUpI8CHCMNxnPryD7CxuFZsdgwjoRhOBSZxJhRGxaa1KCkrUe8c8bt4ZuWeucjTXeyKLBTEWNF6yy2R4c1Fb9sYpgbDKMorQChbSEtEbLka0UfiEWX7lf13cs8FmqKYs+RaP5PR4LEWaR9f7vUFEBf3YbKjD8OnG1E1HUCBuFlnYyOY2liOirVVCAU4FUXjqOunuXMj2R5utZCbGENy88cCCEwG8eNHDyLDn4l1G1ahsCoH2aWSJCR9LV4ihwA0vnoZvNbf/L006JII4WVE/QGMDY0herIJed0japXkoPgfPaE48reuQ05hATIznJeEnboJd58RyTh3W7kRMlQkJ+QknTPSP6VenM7Ih3g4/DyVSiJwBgkO9B7/Xk4bdS7+fEhUBkzD7d2IH23C1mg2isP8EU0veseHMBAIY+M7XgYE+ASQeUxdl1fnSoAlKItlgQRBce5nAjj+0PewoWAVAkE/JvxR9Aej6AnG0Bodx3iGX62m44hZQbJwz21q9daIE+vhY20u0fYjG2XoPDuBn33zBfE0ipGVn4Oc8iBCJcNYtzMf2QVBpRPNb7JMN0y4jlMjaMfI8sG7WbasnpXIMZc18zt+sah4aJEYQoMTqPJkoljIKSieQG8wjibvFKYypYyQEJTkIdnQ+JN8dG1GG/7lYgTpM7oL0ib+Um9MRvhe8QUzPHnInlyLx796BKdebECWkESwKIycmgg231aJ8g05UmZECpUeTyGGywVL4TMs/qZTdHgc1VMe1EjbWP6UtKfDF0FzdALe3CwEMjPUS8/SRJWTg4ykmXK2ql2p4oDP1JQHnIcjz7Wgq3EapbVlCJV7UVaRjawsDhLkIvLoD8cqz5L5nY3eSRzMAEMNVK1OgCJ+kcDENDLHplCNTJTFQsgOe8Q79eF8XxtQmYN1D9zhEJQeACTrskgHS1AWywIJgqLLIOj/3hFEmwZQWlout3oMMV8ck+L9DGb60IlpvSzZEJSABkFnVWaFOwIpT4yZjH2V8JmPzxcSgyJGMlKKsW4fvvPFIxjp9iPmHcWUrxk331OMm15aikAWCY0Fsqy5DAzjdDw/t+MGXwbllBZBj4D7PrkdA0JShULEeeItZsiWuccCXgz6gVEh40kx5gSNojQy0Ro1aaUspuop7Slw2snLzwJliNfnx9j4FPq7xjHUkIWWI+K5NEwq0phAPTbeEsAdr14nJDwl2UjW1FdKukKSUj0gunB6LSj6ZUmbCqeiyOObsSRmOX994gn3i+M0KoMEj3i/0ThdHeqgGsFiHPCcqRJF2Hc8b9yy7dJ//gzZEzdQLWrJxfE9I3j8e2cw4ZvGRKgH+QXD2LWjAHfdxc8ihdU50O009Ziy5wd3LuEdFIpuRaJHSE6Rl8+gOOUc8OPFpuPY/rq7kLGhQpiMvx6sWuLknH99Kw2WoCyWBfRLsfqfWDSgJ47zX/8xijMLkJtXKOTjPFAXWxMRcXGTuv21CWAgPRez5Q0QlMMM8WLEa0AWBgen0dDYhfONPegfiGJ0OB99XTLS9nbgjruK8fJ7NyIjNCIGlNNg8zUs2nviEnT1kqfccmbfvNyq98XY0mCKavzpQL9YMY7K6fdwgQGnqLgsPE4jLrUrD0o9aGJ7JIF6/uaXcoISH0BEMg2PxNDVM47m1h7ZDqgvLQQCQZQWr0fAV4J9e44hGp7G3XdX4/Y7i5GRNSW6ieGOC8WLzoZANVjPpUERE/+R5KTdPObzroB4U/qrfRIufTEu7Z6UyAjPs1Tp83sQifL3tdgv0jq6U4qYvNJHfLbEwYSQkTdT+i6Aqek4xsam0d83hp6uIdkOybkLo683G11dQoKSdTrehy2bgFe+vAaV5SHJG3HIN9lGfUyZH0yPKMJh20T/ANsZ0bTJ7zaeb6rHVHkGtr71PjWFyidxUpHql0upayXCEpTFsgEvVZKCMickqcYR1P18DyLD0ygvLIaf01409HLPq8kulVDTiF6+LKZevAiKxyPmkQ+V/DkYH42ipaUT586eR2/3IDIy81C2ugbVGzejfyiEb33759i9swAvv6cCuYFJ+GNTUiZr0IYtMXXnbA3me2slU+k9lsxVeDSqqh0iemGGJi21JDsu7oYYZggZQbwGqGdXYqQnpoWI+tDW2oauji4x2tIfvlwUFBWjvLICxSXFsl8IT0YII9NTeOSRb2Hb1k24eUcJsoLdyBDvLODRL9/GpC71oVQhA82j+kXoVJh26hdRRWNXGj3daCBxTpvUqkZadJ5PqYPurW4XJ1j94m3ySw6SVtoU8ItVD2ZKW6W9wl7T4QiGh8fQ1zeInt5B2ZKQBhCZjiAUyEZeTjGKi4pQWFiGeHA1fvCzwxgZHsStN6/HnbcWIzezQ+ochf5yhVwfrF/AZ1cabp1nB1Pp9mihh663MlgSwh2dnkDf+DCyyguw8YG7gQIZgEid+irhX71nMTssQVksK/By1VM8AtqXkThiZ5vR0dCCydFx7aFIhPIsaAC5paGl4fNkqOcS/lgOersmcfR4AzqHJuDLyRaDFkJRvkdEtgVcDSFm1B8QY5iLzs5R8Th8yM4aE4M5oQiQtkxTX3rMfVul5uOxSU/zrQ0nPwukG6mpkKvE2IbMYL60IROTI0B3+wjqz3egbWAQ4aAPwdwM5Ir+hXkhhMQTyskKIT8/E8EQS52SUsQYk9yEAGJiUYeHRpGfmyvkPi51jCkjHQ6T1CQZn8lJ39Hbow7k3+QqNwNOJbqPZ2s3w5Pt0W0zadmZMnDwhBDwZiEo4hXPNjol+g1Mor2tF+0d/RgYHkdU2ujLDcEX8iMnJxNZ2QEhsBhCInlZAWlvpvpSe0A8MOo+HS/C4RNtKCrKwKrqbAT9k1LPhGgdEVW0J6p6X7Xr4jBnLtESVqNCklBxEp5VmIuyDasQXFcFZEiAXk/vwJRkMRcsQVksO3C6j6Nv3uJilzjvpYXWgscUWnRe2Uwk9ljt9wJ9R4ZwcN9RtLb0ICu/BGtvuAE33F6ErEqJ55wT01MIGlFjgUxZJo5IpHO2qWCey4Gpj2A7OHPH43GRDqD9TDfqzjagtbkTUxNAXkE58srLsGb7OqzZBIQKJZ04HDP0U/uTIlKYenHVBVMXt8JJ+z5/BLfdvR3YxLQSlsjvwKQ3cMepSGW6RRhBppONAffNMc+LAVXrk/FGyyQ627vQWN8kg4g+aV8UgWA2snMLUVZVg/K1NSirBXIKpPR8ycNzxurYJHrVqWD/sWxxMhPXhIHZT91eDExn2jyj7SmgTvSE/Vox/WSQSsyVycINS1AWyxK8atWMWqpxMcaDdoCGqwuoOzyBgy8eQ2trG/ILsrBVDPmNN21Ergxswe+v0pDodQcal2o/mN6IG7PdWYl0TGCMORUWYZwxpGPARAtw9kyvEFIjWpra1BRbvozM16yrxLrNNahelYdQsaSlsWc+IlUPwh2WTi+GCZdMHgH+4ve+jbtesx5v+v0dOi5dXneY2TceX8I7okJSqNHLQNo1Ku1qb5kQ6RKi7VDTdNORCDKy/NK+TKyuLUNVdYmQUgnyi8Swk2BIRiwrXd1uGB3dSJeOmC38SuBuPvtC9YcBK7walV6fsARlsQzAaS9zmXJr9jkmTbV+YmQHgKMHW3H4wEl0dwwhJ7tI/czEzpsqUVEtFz29CyElfjEcHv0gnmVxFZ82Hsky9TMUThsyPj30w26DpH6xBPloMB2f56gXSCVYmSqX+mFO2XVGcOZ0PerPNaCjvRuRaQ/Kymqwfu0mrK2tRmUNkFsk+ULCqZI3xvx82C918WkRt7o97DFjDE2bjJ5mq9uktGIWCf7mR8fx+KON8JQ14o8/9jrUbND9pPuBK+ak1ESTdMtdTUgWTUi6yWFx+jqmZXDQgeaWNnR19mGgfwTxiA85mXlYVbkaVeIZVVbmCxlByEhq4UICngo5R1SL/kdcRhAecFm4bpUG94ywLWaf6fWW/UHRx1pTrb6O19D5Es2aAzplEu59ghOp5rzr88ErVPeTRmoJFnPBEpTFMgFNY9L48ibnSjWDeK+Q0v4RHD58HA1NjcgvzsP2Gzdi2461qFodgJeeEu2C+2o3diLVXlzOHZHIIzskPtlqfZPQD+MpAkaNAi1n40JITeIddaCluRX+kEf0LUHNmjKs3VSNCjHc2bmSVk0XiZh6XLozKEl0PHJLKpjRyczpS/WMTus0egz4u9/fi5GufPTGj+C1/6MW7/mt28U7kzQqHZVwYOo3CEt0P9B8fgqtTZ3oFCLq6OrBwNAQvAEvSsryUVKej1W1VTJIKEFxcQaysqUYekWsnizkdI2CUdPdhBl1mva5A51j1S4dkkziLsgEmkQLCZbtrsvA1HU16rx+YQnKYgnBXIrJm1z/dcagPHDd3/SUTh7txuH9x9Hd2oPMQI6Q0g4hpVWokpG/mr5zDLf+gpu2gh6x9mZUm9ZcGDUSkIAZCU0Cbs2+U1rcqdAEJ4gDmO4Dms4Po04IqbWxFUMD4l7EQygurcaWTdtQuyZbeUgoEDFTdqpYshnf7NLPMugN0BPTbTGKmQoN3LoZmDAKy+SWZQpLxEL47r+dwy++2QzPeBnC2V0IVHbgj/7Pe1C2WZK4MQ4MdIlnJB5efV0jutp7MTw0gumJaYR82SgpqsDqNetRUVUupAQUlUoThIwUybJdCSJi/fzKHhcrEMY/YsOZiGfJ0VeRjtNWBvGPcuVM+wlz7ApTaQUzptnSpFswsGx3XQZXq77rG5agLJYgaDwpBI1V0srHBoHjR0exd89RNDa2oaAgiF271+HGXWIQq4WRQpKIdsAYdxdMicSc5uJid4TK6NaRYKDLBYiI6RWPokU8itMn6kXXDvT3jyCYGRQSKsOGjVWoXVcqBjyAAI23cQaNUrMqdxHMpbsy0kZvEpPZl8qjIfVcKCTk850v9+LWl5UgW8glK1/4aBKor+9HW3MXerq7MTwyiKmpKeTm5aBMvKLVayuEjIqFiPKRkyd+rXmuR7jbw6pEhQTXJE/rpSHRRtlJkJYTaI7nBNPOJ93lgGUnFHSB9V2tOq9fWIKyWAToS04/V9KiPQH3HI8L4mgcO9iJA/sOorVNPKWsAmzbfhN2iqdUvVri80SUseMyav5yEw/0Fwa0p5EEazNmImkuzC0w29YxLsY7IkxBphAhpBHxKpqbunH6dAOaG9owMjiOPNG1pnI1Nm26AatrvcinN2H0dSTuUT+fJ0UZf8gU7BZWqbczn5a4j0wKNyTEnTxVb4NpYEL0H2oHHvvq8/BnxjEWHRByEsby+JCTI0RUs1Y8oxpUVIZQIu0Qh1WvjmMXO2WSfGLeadU0E6i14l935aa1lNlg0jtwt2M2zFXcNQGVdCmaaMLF2mqRDpagLBYN+hkNCSUmBo0mLflMaXoIOCWe0uEXjqKzuQMZmR5sEy9p2+71KF+dAz9H6alQU2EG6QxCwlrMAmNcKM5wX6Uni4gVdmcVgz7YBjTVj+Hs6To01DdiOjyF7LwsVK+pwsYta1C1qhglJWK/uSiDzTPFGSFkq4laHyTpJmnOLwamYx+qBQwipGUNbjVVJCCJo+KF9vUCXR3TaJG+5ZLu6YkIwmNh5ASK0S6e0m33bMOG3eUorcxCYbHTBvMczI35KOiCtjamjZJ9Xh6Pg2S22XGJ+iw89Nm4EFRs0ZVbdrAEZXEVoS+tmZ4Swb9i8M2V59y3cTGcR4+249CRE6iva0NuVjF2bL4Rt+2uRdEqSWCmwrz0OOgtkURoMWk5+TzG7S25DYKpX5OOrjZZOddZaa9L9k0wt+4iJoH+DohereqLE22tvZgYi6K8pBKVZZXYsH4d1m7wIJPv5lBPqqKLFK9CEzHJQxfPPS0XIqmX+aupiluTnlsp3CQ14LGbQKTa6QGgo3MEbS2daBPvs69nHO2t/fD7gsjPz0dxURnKSsuwcX2peHrSveLdffuhZrzstatRzOd4rEp5SNLn6hNLWiMDvb5Pe6rJGE4fmqNk2itGanvdRTNuAau6fFCRVEWJBe6LFQJLUBZXFfriShpoDbKMc7MOAycPT+KAeErNTW3IyvVj641rsOPmzSivDoFfuUk4Vswu2dyLtJLGYD43PwmC6bW3wWO9CFhZYCZIYgLoaQAazvbizOkGdHb1YFI8pMKiHKzbUC1kVI2a2ny15NvdHLV1VFL06SYMF3Tts4HaaTJlcUkqk79sPPXVAUkIX0/0AW3NMTQ2tKoFDO3tneo7e4GAF3kF2aioKhGdq1BUVoDishCycoTaFeGLsDwqJNtH/+sY7nr5jaja4gSZulLrJNKFqVxE2sgVgHRndqX2xZXBEpTFAkBfQsbom2M92hZjaq4w5x6NDAkpHe/CwQMn0drYhSyuvtu2G7t2VaN8jSSgB0KjKVnV756qfNrLMe89GZuahK49WZk28Bomn+RgUDI4gegY0NgYw/m6ZjTWNaK3vQuxiTCqRaHaNRuxZkMFqmuBoPl6geE0bpXHwDd9WaemE+1ZsFbWbTQ1FRPUl8cmjqm4r5WasejMJDEYB3q7IATUg4bzjejs6MZA7wimxmLIzc5XX3gvK69CdXUV1qwGcqkzp+gMGVGU/m4vlFu9zO5rH9uD2+/ZhrXb8yRUe0PqO3X6RMxEmiALi4WCJSiLBYIxuDRo+pLyKEvugJ7SkTEcPngCdfWNajXb1hs2YtfN67GqNgNePmw3nEJ7mbTV+rmFYwjNxWrs4kz7aHSgGMIgQRjL7IDRQpItjcDpM0JIjR1ob+tQEdWryrB2XSXWry8VI5+NrALJl+4LBgYu3dwwWhA6Wutm9qmbeYGUkvTkXAhLK6TfutqBpsZe8Yq60NXVhcGBQUSiEeTkZqO8qhTVNWWidwlKynLEUxLuMQRKpBQ5H3zjv17ErS/ditrtWTO6TamdinR9YmGxQLAEZXEJMJcKt0YIWimxhDx0GaxpMa7nTvTi4J4j6GzvlfgAbrhhl5DSOlRy9Z1Zjqx+2Cgs5tp54z/On+72z7B9TEG6IRiuiYj7XGBByF/3CJ/RLuM6KdW3NU3izIkzqD9bj8mJKMJhD8oqV2HLthuwelU2aqolC1fYUSdVlJCtV1hC9GNxUUUi5mkLC3db/6SGenrOTMwlRf1lMu6kQqoZ7eeXJEaFMNvRIJ4cv7jAxQv8hd3MzGwhzvWorV0rRBQQLwnqV2KVCpQZCxhIznqgYCYztX+mddAKmIwabrVIULfcvQVrb8jW06kqYjbFLSyuHixBWVwGeMnQAGoPRVtHjaiQ0qnj4zj44in1LCQj6MHWLaux66aNqFqdDZ8hJWPvjM3jdrYrUeIYpc0+RXsjDDX+xwyyENUig0Bjvehy4hxamloxPDiCYMCP0ooCrNtUjfWbV6O4THwrszrNDaOT0Ue2NNT8GQUSjOYBCdDKiDCh6Q+dySwccLFGAlHx3oZImM3jaGhoQ5t4RkNDw4iEpxEMBVFcWojyyiLxiopRI54RvyTB50WqONaXqu+8YRpEsKAkGGNCZhBUQn3d1xom5cwyLCwWGpagLNLAXBJJIiASRJBqp8aA08eHsXfvQdTXtSA7qxBbtuzArl2rsUo8JeWV0NApQz7l5GOANuDa00hFar1OmtS6CXEYhjuAhvpu1J2rw/m6RkxNepGbU4qK8krRZRPWrPajoETS0tAbAy8qxNQPAE47xfGv8Tmom/hKYqF1/QzltJyjDy23+70oQrVR7yoIZ00KGbW3j6GzrRutTe3iSbZjoK8fGf58Icx8lJTyhd0NqKnJRrHol8cvkZPEWZYRVSm/JMHvBpoVh6bX+FcnSvaTjrkUsMUm16MfP4ib7tqIdTfm6mZKmH6qRrhPwOXVZWExX1iCspgDnHTTngHNtPodIWdJV1gM75njwJF9x9Ha0IJgthdrNlfgxt2bsXZ9hn5Pydgut01LBxYtm2Qy7YnwmFm04aURdiB2ur8NaGkYQ93JBuUhjY6PITsvE6tqS7BhczWqV1eIh+SDl6sATVZdWLIyp179/pSxxKyXU2QGyUzagxBtaLVTl+dNAiM9EAKKoqO1R61I7O7sxsTkBN9zRUEepxDLUCUeUWVNEUrKshEiWboe0yk9Hb0SYuDenxdUy5ytyTx7Ie5U3/zEIey6fT3W78pTnqOOc3tQbjDX7OVaWFwJLEGtaOhTr5+Z6D1Cr3hzDDbF2OIR8ZRODuHFF0/izOlmBH252LphG267qRarNkm88ZSUk0VPhD+LrgP0IgBj4oyxS/UCBKl1EsIXgy1S94mzqBPvqKW1C5NjEVSWi4dWvQ4b1q/F6nVAJr0PGnyuiUhMg5Fs+N4Ui2WhukZNfzzSZOhRH0INkJ9UqIJOokGHimBytYoujs4OIaKWTnTIlsu6x4YnUFIoXltZNcpLK1BdVaW+rVdAvbgIxPwmEYtXTRbyj4t+qjpHIzUAUJEJUdGXDJZmziuRbHs6sHY9ENAEtfM26dfd+S6CMh5UKhg6e7kWFlcCS1ArHsaQcRtHNBaFTy2pczAMNJyIYr94SvV1zWL4o9i2Yz2271yPNeuy9HfXjPGWItTFJPZKfV07xW7pC83Up0UnYQGuBytChD3twLkzPWgQQurp6sH01BSyczKwfuMq8ZBqUFqZj/wyyU0yMvXMtnWgjbDeuvXQZMmCTEMc8FmWWrgAtLcNiVfUopZ0jwyPIBqNI5QZEk+tHBWrilFZXSBeUS4KC7zw8rlWSt0K7jCzz9vvUr6mMG+YNlJYvpH0MKmIb33yMLbdtBpbbi5KM8VHzF2WhcVCwRLUioA+xUnDrI/1iFm5O1pIKoR4CMeP9WH/C0fQUteBTE82tmzZiRt3rxVSknxcPZaw55NiusKy5Rs/hi2cctVW16X2TT0micEY0NY4iTMnG3DmTCN6uwcRi3rEQ6rCxo1bsG5dGaqrhcLMlBiLpvj1yj/zfQPfDG9Dex+ESwMNo4MbU8Bon5BRxwgazrcIIXWir2dAyGgC3ngApSXlqKpeJXrUoKoyhNJyKZ3va3EKkdyaqI5Tony/yF2J2ZpEquNcYHyqQlcK09kGc9fh9qBIUFt2VWPrLaVKVd0Sxps2LbSuFhbpYQlqxYAmiKeaBlR7EknLKhCv5fyJCPbvFVJq7UAkHhFPZS123rQBa2qzEaSnZJ6XsBhmM3ZKtvylVw3xSlQ4vROBBHs9JC4HEhwfADpbIYTUhPPnW9HTN4CIeG6VVWVYt6EKtevLxSPJ0M9oTD2mLlONHKtV5RLvChKQLDmtp4nA43F5ZgZCiAM9QHPDGFpb2tEpZDQ4OIzp6TB8fg8Ki/Oxek0FVq0uQ3FpDgoKgwhx+jIdr6Ru1T7bTq1MBJGa+FrA9Awxd716AYi+Fr7zmaPYsL0C228vc3lQ5pq5lvpbrHRYgrquoE/lTE+JBsVZiWYODcRTOntyCAf3HUdTXRs8MR82b9qOm2/eguq1kpSf8RGjHIvFxFBNwufhRI8Za7Mg+atYgsNs/XbQjDpo78RWTwgZdHeM4eTRszh7qh4TY9Pwe4OoVO8g7UDlqkz1u0FBPqsxfMJtfBoxj4gynhQWLGQXN24U65e/rJOcwCBtYzUknL8Z1d05LjKA+nPNavFCZCqKiPBYpjBgdfVqrF+3AeUVmerdIv68hPrqAstiuZrnpBLJoBZT6HbrSIo51pXrEKZLhUm3NKF/k4ltAL730Ams3VKMG+6skEFA0rdautpbXK+wBHVdQk98adPCiS9teBSElM4dj+LoodPqK9xej3hKG6ux65bNqN1YCB+9FsLY3AS0QdYv07Jc/UxCj7pdHpI4aOFeoP7sBOrOtqCpoQVDA0PIyswQMijFmvVlqN1QgeKKEHz0SliIuQKNBUxs6Qnpr51rCAEoQvTrT++YdMSENG0Q6GiNokW8In4ZoqOjC+NjEwgE/CgsyEN5eRFqakpRtaoMhSUh/WFXdxtZniE4d9nu/dkwI026W2o+hSwe3AT1/c+dRO3mItxwlxCU+mcJymJxYAlqWcKcMm0+zLE2I2JkTLSxKFMkpSHs33MQdWdaEPDnYuuWnbjppnXqPSX1TEn90B+JgO/a6GlAba1JCuKBcYk5CzTBpmyusOsEzte1CyE1oqm+CRMjEygqrEJV5VpsWC/EJ95YnngnyS9HUKQuz6Rk5/s9NIH08vjeURBerqhzt8GQhoGQbGdHHB3tPWhtbkNzI0lwFNGIBxnBbKm7GNU1q7B6zSpUVHhQKJ6gh+s+2ASjuyk3LpSrXDCKaGHapeBu7IyI6w6pHhQJ6saXVEq4JSiLxYMlqGUNejOzPFMSj6L+BHDkxeNCHOcQjk1h3cZV2HXTVmwU46MWHBjDz8xmX3krbnZIMU1SLn/Urr5uAA314iE1t4uXMomcXPGQ1pRh3fpyrKnlL8UGNSFRPeOlmDo0F4A/38BdLv92JiFnQrgyPAz0dAMtTb3iHXWrZ0ZDwyMqOic3B6XlhSivKFC/6srnRbnilQVIRizM1R0XwFTm6LLSkUpQqzcVYudLquwUn8WiwhLUkoY+NUkvyZwqGoz0nlL9iUEcPHBCPKU2+D3Z2Lhug5CSeDF8Tymx6IDmaBpecRf0Cmf+4bMd2VDcRl3KHBRCOn2iUX3pu7WxA2Mjk6iuWoU1a9di7Ya1qF4FZBdLWrOijfCGEVUExMIc70ugolP1JsLA2ADQeK5fvKIu9HUPoKerD719AwiEMlEj9ZUUlaCK7xZVZqlVdEG2J/XdItpYTg2q94sYwEiTgHA3TqviVmOlInWKr3p9Hna/tMZFUIamLCyuHSxBLQPoZzA0INzSSLhcA/Eymk8Bh/adFE+pDpNTI9iyfQNu2LkV6zblqR+gMzY6LtljcrY9nhg5SrYMlDjj4RD8enaLEEVdH86eakBPT7+UOYWCwixs3ERSKkNFVQHyqiUt1aA6xm4Za6+mCimEYawk4lJHfw/Q3hpBw/lm9axoaLBffYuOX2goLilGbW2lkFERyioK1YuuIdMOt400dRsh1NZc0gmF1BFhYtxIxq5cuAnqBw+fQmVtNm66Z7X1oCwWFZaglgT0KUj1lLRZcDwlisNJnGY7e2YU+/ie0vl2eMIebFy/Cbtv2oF1GyXeGHOHeLSRkW1cL2lIfKyA4Lfi2iI4dvQU6s41YaBPAiIerK5ai/Xidq1ZX4xyIaOAefeJBSgRAqKnkvBS3CznQLwifouuvyeC+vpmNDd3oKe7D6Mj44iIZ1ZcUIby8kqUV1RgzZpCISMgM1uKNL9dpLtB6yr1RT1CYOKVJeC8Dax/Lt40iL2W3JfG660TZLrSwIld8UglqIo1Wbj53jWWoCwWFZaglhCMp2RMqDYYjuEfBVrOAvv3nsTZc+fFcESwdkOVkNJ6rN9UDL9ZETeXJZFip/uEkPjZoGN1aG1qQ58Qks/vQ25BCJu2rlLvIZVX5iKD5bFq2n6qY8o1+wx3Q7hqXLyiLv52UUM7Wpvb0dvTi5GREfgDPvXbRVWrKtQ38iqqi1BSGkBOgeQjEbHZRufUukTIMWpXpRFzyUtW9j16ftLZOolTYQlqXphJUCdRuVY8qJeRoHR/2X6yWAxYgrqm0J5M0kzqo1k9JfEyGs6M4dC+ozh9sgHxWBDr1m7CbbftEHKSZHxvyKRNJQwH/BnwpvoxnDtTL3IOI8OjyAhmoqpilfK6atcVK89FfRXB7SEZcD/FMvHnIgbEK+rpGkBjQxvOnWtWCyViQjRBfyaKxDNas3otateUoaRMHDohIo8pn2WxTDXzRzIOIxZ3nlWpl2r1O08mIacluR932FJ/zUBSO+REzElQqpxkWncqE5qMXblwE9QPv3AS5avoQdVagrJYVFiCusZIdjaNM40xrbU2DArT4uGIp3Rg/3mcOH4a0eiEkEgZdty8Eeu3ViDbeDZuK2ssR1iydwPnz4dRV98i2xb09w0gJ0cIaVUJalYXY+OWKpSWBZNfGzdkZKyQSxX1QYYRvlsk3ltzHzrbe9QHUjs7utQCi6KSfBQU5YpHVIw1tRUoK88RT0zycLEEy3FbNO6zDlOPA+MVzUxsEjqTS5KIezotUzIuicshqJmhFm6C+smXzqCwMoDbXr7OEpTFosIS1FWDMYVGCN7iYgRMkCEH8ZSa66ax94UDqDstnlLEi3XrtmPHjh1YJ55SBlfIGe/GbSWEQPjDd+fOtqPuVAOazrVgcjKMHGGxNWs2YtOWbVhbiyRpML+bD93rF4QvR3s0EXW09aLxfBP6uvswPRlTXlF+bpH66sJq8YxKSj1CRlKMeS5FnYxuaktm096iIRPjO+rKzQu3+r0jfQm6G8ZjLYagVEmJJLosM8WnYfKkQuc1cKdw517pSCWoggo/bn/FektQFosKS1BXFexazlE5XcwPm6kH+wKS0mnxlPbV43x9M8Yn+lG7oRQ37lyFzVurkVUQTBKKsQxjQkgdQP25EdTXNaC1rUX9DlJeQTaqV5dg7foKrF5bjrzCDISMh2TEqVZ9qm5UyukDWpvHRDqUV9Tb14/RsXFkZGWgtLwEFZWFqKoplnJLkV8EBM0XuklqpixjuczWwL1/RUgt+GJw+nkGFkyZ6xpugvrpl88ir8yDO+7faAnKYlFhCeqKoLvOWSPnCJ+TGAsuID+Zw2khhbNjOPDCEdSfFW9nwoPa2htw663b1Oo7Pz0dEoGBkFhPxxjOnmpCY10r2ho7MTEeR0VFLVbVrMbatWtQu9aLTH4zj54MP1GX4hVNCRl1tA7hvHhXvZ3D6OkawtDgOGKRCAryclFRXo1V1bUor8pHaQWQzeda9LYI6i2iPhjqmZSrZVo8Hhoxw1LaZBlfyYTpUP414RZLHW6C+tlXziG7KIa7Xr1ZzjvPrl3FZ7E4sAS1ANAGmkzEZ0pivDnsNHezeCx8pnT4wHmcOHYSk5OjqF1Xidtu24b14jH5zfQdwSXfTRAy6ceZUycwONiLqfCU+ro2fwNp1ZpSVNSUoaBEiEB9mkhnU5jQU3Q9XUKCLd04d7Yeff39CIfDagotmJmBdetrUS1llFfkoaw8iOxcyWem6IwYuPcTMJdK2kiLZQz318w1QUWFoLZYgrJYVFiCmhfYRZqGkkZaQ93U5uGIuYMjQjQNYex74RDOHm9CdDKI9Wu2Y/euDdi4VeKdr4Rzqq2zvR/HT9eprzR0dQhDRXNRXbEZ61avw8Z1sr9a0hkS0wNcxMel/M5xtLW1o6WpXfL1Yrh/AtPi5GSF8lBZXoOS4lKsWlWDyiqggN+ic6/SYzlKV7OknU+K9GdgdRMYZryhRGKBMVUrGTPP//WCpAflEYI6i6ziKF7yqq1yaesVljzr18+ZX+nX8PKBJahLgiEpx0xr264h5NB5Hjh08AyOHz+DyYkJ1NbW4PbbdmHTNmGHLPGvBoCOZuDs6R6cqz+FgeEu+EJeFBblYuOGVdiwcQ0qq7P0x1tpK6ZFhLN6O/i8aApNjQ3o7OpC39Cg8EcMmdkh8a7ysG7DapRXFYlnlKOWdHvNFB3hkFrae1JGx+kjZsOlprdYLkj1oDIKwnjpa7fZ022xqLAENQO6K5JTdsmu0TevWHu3jQ4LeTQAh/efwvEjdZiaADZt3IGbb14DcYAwLcctbUJI9YdR31CH4ZEJhHz5qCiuxdZtO7B+E5BfJuVwQYN4XeF+oLl9EK2dbTjfIB5VZz/CUzEEfEEU5BWjunIVamrWoGZVBopLhIg4RUdd+EjIEBHV9JovhBO6HbolDDGfZTWN0EYpeTw7zKUyc/Xc9Qy21whh+pG4vnxJTVC8iLzKgwrkjuPe1++UZnJFprlGljPSnUOC55Htc98TFksFlqBmAac89DMlXrSulQcSTFI68Hwdzp1oEhKKYP369dh2wwYUFAN9XPZ95jyam9rR2zsgYTlYtaYMm7etRe36LP31BMG4kFFdPcQr6lJfXOhsb8Hk+JB4RLkoKs1DaXkBqleXo7KmCPnFPgQ5RZfOTpj7ijxE0MbMcp+ZE52MZsgsidNg5REUwTabzjXtvh4M9ux4+tEWBHPGccdrNjsh1xPMgI3ES/CHNt0riyyWElYoQZlRlDE+biNED4OekhghY4/kWu4RMnlx/3k0nm3F1EgMNWUbsaqyWn18tbOnH+095zE23Yec7Bysq92IDbVlqKoAMsQ76h0Uz6htCm2d59DScR7DowNSmx+BoJBXzVqsrl6DyvJ8lJUCOVxFZ77QnZAIYvwAq1miLqBqM8bwzmnUfpN++0jHOmmEVEyYK5dFWrCn9ABF9xYNmKvXxoEBGWAM9o9hbGwc0agxdhr8+fvldlvx6vDEfcjyleO5Jw8hkDGJm27je1BiymMSywv9CuGZ+WNbC4Z0uiX7Pw6vz4NAwIeMjACy87JRUh6Cj7MPLnBAGlefLRF/Sq1UtXfJUsCK9qB0w/lwmBemISjnAY5EDoqndHR/H86f6lSLGbweP3JzMuATsoiEo5iajqKopBir1tagalUWQuLl8KfEO9rG0HK+BSMDQxgdHxWuiylPqnpViaQtRmF5ForKM1R6H8nIwNhDYsaWmsrOxe4ZnkpJk+6EqjAhKRZxsWIsCJ4M9pqeOw0PAMcPjuDk0fMYHZ7C2OgEgkG5Fnw+MYDJgYNBLBpbViSlCIr/4iHEwjJE88fFqMfVACzitOVyScoQ00J73u7+devG8xHwm6Wxci5iUWnDtGzDcs9OIRDyITs/hDUbK7H7jtUorHLS8SdapA80QVksBawwgqLnpA2PvlXkQpTRYWLGZlpIqRk4dWQEh/aeRd3JDkwMeVCUW4ni4lJkZgaRX+hHaXk2cgsyEPXG0T/cg4HhbvQMtGAyMoxgKIjiEiGiyirU1qxGWUVAveia+C0mihmQq3o5UtcjcE2RRj9JwF+yNQndL/nOBoeglLeUelpdxiG5Z5GE6S9nlOB40BNCTM8+cU6uiRYEUIiaqg0oys9FUOyfVy4fL5Ol6dDldle5m0Bbz2O/E8i2XEl73P2zkBzl1sm9P9v5MEkmeZ+PTKG5ow4DY62o3ViE+19zK4r5EzJMlPhiPu+3BVTY4pKx4ggqKc4oSbhhQkjp6IEw9jx1BvWnOzE6NIns7CwhmgIUFOairKwYGUJOQ8P9GBnvk+t2CuF4GP4Mr/rNog2bKlFSIekrgsg2L9vyuk7tWXO9u8PTXv+pGQl7o1xTyCk48EwET/z4aRTnFWLNmrXqF3y9Pv8VG+yliuv5CjOnK/m5LI3IVAzNjU1o7TiLW+/cjPteWwuPmtXgQOUiA0KLq44VRFBsplydHByJTLaKp7R/FIf216PudC/GBoII+UoQ8GfBKx5ObrEP8eAoPMExBLPC4jllorSiABVVpahZI2RUBgQ4j21emOW1rK5n1jMpvMf3R7g+iJGJCRRnn0n1sy4dZrG44BkRg6S8Jg9i48A3HzmPhnMj2Ll1J4pk0EHDRo9ZPQtUYJ7rC8kWXYdtUy8cm3bJXSfn2hv3wyenMyDndkLO+dETBxHMHcE7f+Ue5JSwF/S51qv8LBYDK46g+s4DX/rYPjQe68No9zQikRgCmfy9okwhIXpBeSgsz0bV2iJUrC5AWSVQzBdluYrOLPYxW11kGvCVV31xG2K6sJN1uN6zWHTwNuDc0Bjwhc+ew4h40du3b0Mo5FM/I6KmjTR/XZfeUxJs3PXXwJkEpaHuP2caXT0mk/N89swJDAx24Jd/834U17AnnOl2i0XByiAo1UIShhfRHuBbD/fgxL5mVBavwqraMpSvBqpESoSMAlxFRzLiDKAZOCnjFEU4Pi0Gin5P0vPhBcxvMGiPKDnSSt4OfB6kdhLQD4u1mD2LxYJcF/SciCngi586jfB4MbZsLVWedFTOnfpylYhZhGYJavkhHUGxnXHHI6Y35Y351MdWzpxuQ8/oCfzKB1+FwlIVLdD2w+LaYsURlNqXUfKUuPTqi9/8wKphCDdTmH2m53UpxzRU+gf0klBh8s8r6ZiM+x6fu6AkUjvajsyWAMxJkVPxvS+2ovF8D3bcuBsBXhe8XJx4EpQaVwgsQS0/pCeoJHhqPXIzq2l3OTh+/BB8gTH86vvvdp5J8dmAXd13rbEyhgTKsPCPXGTiCSE3gmD5FKK5E4iExhEOjiISlP3AFGKBMOKBCOAXJvLLBW2+gSc9pUbRXLnlFgnnj/fxXQsOsT1qyTHrulDc/3ivrJjZ1SUJngBnV07H6b3AqaP92HnDbmSIQfJwxMEEYtg8IgRPlz1l1zHkPPMZI73mndt2Y6gjgOeeaFFRnOizuPZYGQRFqCEwmyusIu68ByHZy4QfWcJBObLlfkg6JCBxfMgkaektXeTdD162iU6c7zUsRXK0ZrGYcM4rN+JNP/HT57Bl83oExXPSJJQ878ZL1mErQa4/KO+Iz5tmFZ5kbnkbc1AiJLXjFjzz1AH0dfFdSXOXX5/9s1QxL4K6fkb6DimoEfHFRCAeUVRYil8GWBC4imbPp764aD2qawlnrlbOw8GnhaEimSgty9ZkpN6DcU6WXCuKnOZ1zVwPcv2Cd9tsYv5yKpD9wIUxRUU+5GfX4JmfH3d1zfXdR0sN8yKodG+A05guX4PqvjRnim6SB01NTTh06BAmJyaFpxh3dcHfbYpEZn4yx+Jqwjmnwk0v7j2FtWs3aOda7ghO59gpnZUJM7PB9/f549ObN9+Ac8c7MdanggWWoK4lZiWoI0eO4F/+5V/w9a9/HSMjI07o9Q+S8dNPP42//Mu/xI9//GOMjo5eEyL+6Ec/imeffVbVFePwzeIqgy4s0NkATI4GUFiQJ0ZJBl2Jrwg4MKfe2qUVBUVQcs4LCzIRiJXg1LEuuQZ4Edh781oiLUF973vfwx/+4R+io6MDjz76KP74j/8YQ0NDTqxGOq/qesEXv/hFvPWtb8Uf/dEfoby8fOHaaopxFcdrfmpqCocPH0Zpaamqy8uVFxZXCQ7TOCPlU2c6kFtYoJaU68UrM881jzjrMzPU4vqHfupIVFRW4ezps3IRmKuAMSbW4moirSV86KGH8Gu/9mv4x3/8R3zqU5/Ce97zHoRC+iOqnZ2d+NGPfqQMqttwk8B+9rOf4fTp08rrmJiYUOG9vb2YnJxU+5zC4rHxEGiY6a1QTBqGMX9fX58q79y5cyrcoL29HT/5yU+Uhzc9zV/0kxGw5DXlmDDixIkT+OlPf4rW1lYnRMN4ROfPn1dlNTTIMNpBXV0dmpubkZOTo6bdDFgu29TT05PQy5QzODiYaC/D2EZumce05cknnkRLk+ghXXb6xGk88+QzmBqfUtc8pxOzs7MVMbFs6mXKItra2pQ3x74lGD48PKzK53k4deqU6tsDBw6o/KzP5LVIhdMvzoY/j18igxDDS8krOol0YRbXNww9cda9rLQSvd0yQB+XMM4BKy/K3l/XAr6/Ejj7CXzzm99UxvnlL385MjMzUVNTA7/frwjgv/7rv1Tc1772NeTm5mLDhg04fvw4WMzY2Jgijs985jNYs2YNVq9ejd/5nd/Bxo0blSdy7Ngx/Ou//ite8YpXKO/s7/7u7xSxsVwa6TvuuANPPvlkoqyzZ88qgrzzzjtRVFSE73znO/jEJz6hdNy3bx9uvPFGZaj/9m//Vm1ZDo0709Pz+8Y3voHx8XGV9qabbkqQLIn1S1/6kkpDYvnCF76AqqoqJf/xH/+BvXv3IhAIYOfOnYqoiOeeew4f+chHVD0kp//8z//E3XffrfT6X//rf6GiokL1E0nkr//6r3H//ffjzJkz+NM//VMMDAyotn/qk5/C8NCwIsCvfPWrOF9fj5fd+zI8//zziijZ1+wH9jHbxjIff/xxpR+J+5FHHkFlZSVWrVqFj33sY+o8sb/KysoUgb3wwguqX9lvbC/PmUUqaFiEcriRMdHzT9dLn66Fj68TWCaycKC8aeeC4OLfppbTuHHnOoRy5EJxLh+dxuJqIq0H9Wd/9mc4ePAg3v3udyvDStBzISn9wR/8gTLI//2//3c89dRTykP4+7//e7z+9a/Hn//5n+N//I//oUbwNPbd3d3K4yoo0L/SR4+GJEFP4d///d+VEWdZf/M3f6Pi6FmR4IqLi/HOd75TTbEFg0Fl4E+ePImPf/zj+K3f+i38/u//vnpGVFJSosp5zWtegw9/+MOqHHoTJDeSzxvf+EZVBuvIy8tTOhAkhG9/+9v4vd/7PdUeEuPnPvc5ZdBJuNSLz4RIqgZ79uxRBPLf/tt/U2Vyn54SpaWlRU3PEayfBEiP6OjRo8qTYT8yDz2tjIwMvO+D78Nb3vxm1WaT5+abb1ZeK9PV1tYqHeuFwEhCDGObSewkLJZJMiZZkVDvvfdeRVC/+qu/qkj013/91xNkbDETiXEvd6aAaCQKjy9+sbcJLFYc9AVBX8kvt1I0Kg7USNh1AVlcC6QlKHoOn//857Flyxa8//3vVyNyjtRJHp/97GeVh8NRPY3+L37xC0Uir3rVq1ReGuHCwkJUV1crA0xDTS+DYDn89VlOqTEfvQaSIYmFRBeVq4BTbPTcWAa9FZIWPQYSCj2j3bt3KwNNQ08PhSTJcqgTy6FHRKJ55StfiX/+539WXpfxggy+Kt4LjfratWvVMQ09iYbgyj3qmPrciV4iiZBkyzbSi6TnQo+N7ec+wTauW7dO7bP99913n2oLpwZJaq9+9atVXLt4Ohs2blD7bAdJkn1FsG0EpydZBkmYhPnYY4+pMrq6upR+73rXu5QXm5+fj5e85CX4kz/5E0Vcqe21SI9wROxNNAY/X7CWY/W1AbXM2MKC0Gs5/VzdGY9hOrHK1mt9p2uEtARF0Hv4i7/4C2Vsn3nmGTWav+eee/D2t78db5bR///+3/9beTP0LDiVZ4wiDTQNPo02n4/Q2Gdl8ZtCUFNbJD9OY23duhXvfe971WIEPuPitBjJiN4XvRiCdTIvParGxkaVl4bZkAfDOH1IQ/3ggw8qT4WeFb2HX/mVX1FeB6fCODXmXhnHaTCSrwEJc/Nm/fPW1G379u1q34Ckx+dc27ZtU8dsI3UiwZG06ckZj4UEa/RnPZu36HIbzjeoKUDjaTWxPTt2YnpqWhGOyUPQI6N+JCcS6dve9jbloXKxCr0jTjGyXwzxE/Sy6HXynHEwYXFxxGRUzEd1/MYeLyn93MESlAWRvBa4ZolUxS9uJmEp6lpgBkHxeRCJwkzr0YPhyJ+GlZ4DDTWfjZAodu3apTwoEhGn1Aw4BUXyIfhwn4aXhEKyouGlZ8URP8smudAj4vMSpmN6EgnrI+hZcKqQHhHrSV3sQGKgN7Np0yY1RUa9WI5ZqEFv5R3veIdaPGBIjeAUY39/vwpjmfv371fEyylJLjzgFJsbnN6k10ZSIjglx3YQ9KD4vI3PrEgcbOOOHTtUv7E/q6t0urPnhLilLYFgALFIXNW1pnaNqp8EVVjEr9QC3/rWt5R+9BZZH/uDfc22sZ94HujN0WMzHheJjHlI1Lfeeqt65mYXScwPEU7xyaVBI2RhkYS2F/xLp9rH75rNMJf2/roWmLFIglNLNHb/9E//hJ///OdqscMNN9ygPB16Qpwuo8fE5yP0OkguJBsuy6bnwedEnLqjh8X0NN58hkKviOVyCo8eEwmIz2c4ZUUy5LQUjS/z85kVnx2RPBhP74xGl4aZ9dBD4TJ41nvLLbcoD4bpmJeeA8vhgoZPfvKTKoxTgCQp6mNA74OLJNgGEgK9L5IZCZH6kKxIiAYkMJLUG97wBkWW3//+91V5bD/bTX3YRuYn8dKTo5fF6cJf+qVfUt7Vt7/1bey44UbcuPNGdLZ3qv591zvfhbr6Ohw8dFA8qibVr9Sd3imnNdl2PktjOexXTkNygMD28ryQtAguBvnKV76i9CRJ8lkUF1i4SdnCgGNh6RexL1G+pHugGTXVtepBON+DUgZJ/bWwEMQ9CPqBhubz2HbjWhSU6EUSmqDsdXK1ccHXzHlII8fpM04h0RsyI3V6BTTgPp9PTUFxGpDpST70CEg8H/rQh9SzH8bTs6L3wuckNOicJuPIn6N9xtGr4pZeD6cSadzppZnFCSyTpGkWOJCcOO1Hz4IGmnEskws6WA7roPdDw0wvh94Jjbx7Oo+gzvSyGM88nKpjHnpdyutxvCMD6sVVdGZ6jp4e20S9qC/byGdD9OS4apD6syzmowfIsjvaO5CXk4fsnGw1rUeyqaquwujYKCanJpUXxf41/UowH4mRRMvyOcVIoubUIadUqQPT8HkYCZ+LSagDzwPDKRYzwR9HUd9ViwJTvcDH//NJ3H7rvfCIEYp4GCcklfIulMUKRtSL7AzgZ0/9GG/7b/eidgu/JCzh6lmlvU6uNhb05za4zJnvTj388MNX9KB+LsNq1F2Wxtf9EvplTClxus9NPKmnzhLSxZFKUB/7t1/gjtvug1ccZktQFhdgBkHdJwQV0rxkCeqa4DLM5OygZ/SmN70p4XFZLCzc5GSxMPD6vPoj9xYWF8WCjeUt5okFvTVvu+02tSKPRGWM6eXIXJhPmiULNfJy5DKQ2m7TF8u6TxYZcfsClIXFkoUdO15LXCFBWSw8YrHoBb+SbGFhsTRgCcrCwsLCYknCEpTFCkfqFJ91by0slgosQVlYzJh2tc+kLCyWCixBWaxo8PNGpCRNS5acLCyWEixBWaxocJl5jCSluClJVRYWFosPS1AWKwqpT5j08nxDUIR9BmVhsVRgCcpiRSHVP+KXI6zXZGGxNGEJymLFw2s/bWQxB+y73IsHS1AWKwoXTPFR+CUOa4Qs0sB+oGVxYQnKYmVCCIk/VOgGD73WIlk40J8Qcw4E6nmlvTyuKSxBWawcGC/J2dLWGHujflXXelEWs0B/s9Gy07WGJSiLlQ2xO26isrAg4uoDje6rwl4hiwFLUBYrHtb0WKSH26XmVWJd7GsNS1AWFhYWF4CutZuQuG8J6lrDEpSFhYVFWqQQlH1Iec1hCcrCwsLiorDktBiwBGVhYXGJ4PMYIxYWVw+WoCwslj04ur+WI3xTn/UqLK4uLEFZWCxb0IPhLexzhPuX49Vcric0Xy/qcsu3WOmwBGVhsayxEF5MahmzeUcMiwnfRBEMehEIkHj4vpBJny4PMVv40gW/GqFfzrVYTFiCsrBYtiBZTGNyqhcTkz0i3PZjOjwGj7qzSSDccYvxZkyc+9iEGY/MhBEm3IOA34d9+5/HE0/+GPF4xAlPzZMqFhaXDl51FhYWyxGeGIZHOvB/Pvp7+NM//wD+/C9/F3/8Jx/Cww9/GtPTk0Ie6TwA4+loUZ+XEysw85tzzo4iFnd6Ig5/wIszZ4/g+Im9cjTthM8G5puPl2VhcSEsQVlYLFOQSOLxSbR1nMUvveMN+L3f+x18+MMfxpvf/BYV5/MDgUAcsdgEuntaEYlOCBlpkvD54wgEw+jqbsDwcLeERyRdWKWfDo9I+hZ4fVERVkRPLYJQBoQQezAk6X1+ek6TIiQoTvlBwiVuqE/iSGwxKTOm8vgC0xgdo2fH9ATjKRYWc8Mjoyw7pLFYGeCVTmMbl3FZFIj0A//17z/Hbbe+HB4xqlExwh6vF+ozbEseHkUAA4Pn8eH/9Zv4yEf+AetqdyIaDSAq3OHzx/Dssz8X0ujHwMAA2lpbxfMJ4dd+9TdRVFQuBNSMRx75nJBGGFOTETz44Juxc8dNeO75pyTfL4TgfAgGsvC2t70T5eWVUl8Y3//Bt3H06GFUVFTg3LnTWLW6Gh94/+9Kf4bw3e8+hsam85iaiuDGG3bhNa95LXp6urFv/zMYHx/EiweO47d/+w+watU66V8zLl7KHa1188b9CAWAx5/8IX7pvfdhzeZMFa5f2rUke7VhPSgLi2UMPswPR8J4/rm9+OlPn8TjP3sKvb0D8Iv3VFd/DE8++VPcffed+J0PfQAdHfU4dHiPkNg4PvvZjyO/IFsI5v341V/9NdTUVKD+/HF849Ev4xX334ff+p/vQ3lFIb705c9KJZPY88ITePa5n+E9732nyC+jsKgIMSF5f8CDx5/4Ls6cO4Rf++/vwK//xrtVHS2tdRgZ7cO3vvWI8qg++MEPCNFVuMjfjostLg5LUBYWyxY08jG1UIHTa8Mj3UIKvYjFpiQ8ioi4Uhs33oiK8k3IzSlFfn4+xsaH0NB4Bi0tbbjv3gfFSypFWckGlBRXYu++51FcVIHt2+5E0F+Al770HjQ01KGzs1mI7kns2HEzaqrXSVnFqKxYrbws6vDii/uwYcMGdHf1YmhoCNFYGOfPn1O6ZWUV4GUvfR1Wr96MgN/xPpTebrGwSA9LUBYWywpmWkmTE58N8bnRvffejTe+8Q0iD6K0rEiFq2dDgUz4vEIMca+a9oNnAuMTfQiF/MjJzpfwgKQLgAslRkcHkJubrfIQWdkS5wljZGRQiG0ERYWF8HrFNROz4fNJHskUiUyqKbzTp07hqaf24BdPPI28vFxUVJZImXw2FZS0GeI5kcxobqi/Q0z223YWF4ElKAuLZQnHyAtIFJmZuY5kICMjQ0K94ln5hFB8Ek8y4NxaXE2xlZaUY2yMCyFaEcqIi8RUmqqqGrR3tGJqehCZ2R709nZJeg+Ki6uFdIrQ3d0Nv98r5BZAVLwzPr0OhTJQWFiCbdt24v3v+xB+4zd+G3/4B3+GHTfulviYWngRj0+r52VG3wRSDi0sUmEJysJiOUNIKBz24KGHHsZHP/p/8Nd//df49Kc/gcmJKbUYIWZeNhUPKhbzIxL2orJqDV760nvx5S9/Cc88+ws8+ujX8MILL+Duu18uXlU2vva1L+CZp5/EV778NbziFQ+gorwWr3/gLTh8+Ci+973v4Wc/+xlOnz4rpBaQOgIqbt++Q/jud7+PfXsP4LFvfx/Dw2NKtxidNv1SloXFJcOu4rNYOeCVft2s4hN4ouLtDODkqRc1IQkhRCIeNXW3e/cuNDc3CjlkoKZ6LfwB4MTJQygsKEJlZTUmJ8dw8NB+NDe1IDsnDzfddBOqqyvR19eN/fv3ivfUj/XrNkv4HeKRZUvZU5L/II4dPYZi8cBq16xFMBhCbW2tmh6sqzuD48ePSrnTqK5ahVtvu1X2J9HY0ITNm7ch4M8QXTg9aKb4ljr0RWBX8S0uLEFZrBxcbwQlDYojDK9XtvGw0zafIqqk1yLH4klxtZ+G3jK918cFFmwsw8wzIikHfH4lZBIPOWGEdJjHvJTL9BLu1KNMiGdKxeu6+FyL+SRd3F2v2V8OsAS1FGCuPgsLi2UHj/zzK0ICMoQMstTWg6BsHcIRgkiSE40qDS+/IEEio0cjablQgqStyIRhJCaSTDK9BsOYluTF8mWjCI7xDM+UcqkH41meQ1IJsbC4NFiCsrBY1hCiiJNkjJBEGOZI4hYnQXDfHMtWEQkJiemctIqsWA7DTVoSUGp6xrlExbnqvyDNdQLD1RbXBNfRlWNhsRJhvBy3GCvq9loYZoRw53PHpSuHmC29kdR8qWJhcemwBGVhYWFhsSRhCcrCwsLCYknCEpSFhYWFxZKEJSgLCwsLiyUJS1AWFhYWFksSlqAsLCwsLJYkLEFZWFhYWCxJWIKysLCwsFiSsARlYWFhYbEkYQnKwsLC4gKk+Xag/SDGNYclKAsLC4tUpPu1X/u922sOS1AWFhYWF4Vlp8WAJSgLCwuLVCR+x8pAji8Is7jasARlYWFhkRap5tES1LWGJSgLCwuLCzCTjPQPj1uCutawBGVhYWFxAWYuktC/SmyX8V1rWIKyWDkwA+CUgTDNTswOkC3mhL04FgOWoCxWJsTemFkbbozEVKCFhYa5HLQHZXGtYQnKYkXBTT+GlPh8gQu0LDVZpINXcZO9OhYDlqAsVizcJseOjy1mQl8RCYeaAxjrXV9zWIKyWLGgubEmx2J+sFfKYsASlMWKhfWaLCyWNixBWVjY0bHFvGCHNNcalqAsLCwsLgY7hlkUWIKyWLHgymE+9/Z4fE6IhcWFiMZ4nZChLEtda1iCslix8AoveYWlrO2xSA+OYIBYlCQVletFmCoBO913LWAJymLFwh8EfD4/omKAkvxkmcqC4HWgCSkmGw5kMkJ+dWxx7WAJymLlIgsIZgQwOTnpBBB2ZGwhUD9YGFcv6U5NcCAjl0u2jGgsriksQVmsKCTohztidEpKijE6NOG6EawHZWHgUQQ1MjyBgoJcZOeqIItrCEtQFisMjoVxNrVrV6GrvQsezubYLwVYGMS5cManPKeOjmZU1xTDE5AgT1RFW1wbWIKyWJlwCGr79kxEpqYRnpSgWFzP7FiseMiVoLcycBke7MONN24SaxmWkIgKV462vVauOixBWawwuOZoxMCEyoHVa4vR2trA8bKEueItVgB4vo3MBL2n1tZuFJdkono13Sd6VdZkXkvY3rZYoUguGb7n/mp0dbYiNumDX03tGLiN14UGzOJ6AK8DI4R2i+hJe0XO1p3E7S/ZAmQwUMUkLwV7SVx1WIKyWKFwrIvYpYJ1wJZt63DmZKN6KO738rZgfKpYXJ8gKTkizMR/Qb+Q0+lOlJZnY9POTIAOlLoGHJNpL4drAktQFisXapgsW5FXP1CNsfFRtDYPwyfHfO8lFovrX9q1uE7hGnzwWuBKmbhfTfX29UyipfUUXvemW+HJU4kFLoKyuCawvW2xQuEyTNwrAt767htw6twRtLePgU6U1ytj6XhEhNM/TOfkUWKxvGHOI02gDx6PVyQu3jMwNQ68sP/nePUbdqO8Vs68nH77S8uLA0tQFiscYn28UWWrKm8E3vKOl+LIkSPoaBsTT8ornpRfEVQ8HkVcRtgU/bxiZRosttrI8oNb8+S++iZjzCfkFMTYSAzPP/8MXv6qm7D7rgLAL2ebVtKOSRYFnrj9mUiLFQ1e/rRQsnF4p+4I8Ogje1BVWYvNmyslKIpILKKGcxxpM72yV9flij/TpvRmwfw0PlM5zmcKllqfpG8H1WQMiYkeUigAtLeN4MixZ3H/q2/GS15ZpshJpdHJl1zLVgIsQVlYKPAFTDFBHC7Lbl8r8Ng3TmN8eALbtm5BQWEmwnKn8JkUR9zGWpmbJ/UuYhqVbhlCmwSHhAXupqkYcrTscJOKlG6YAXd3zDedQbr0l929UhibqKZxpZDBwTDq6s5hfGIQD7zpVmzZLWylyIkjFtaia7rs+iwuG5agLCzc4N1ghM8inhzH808eRSiYh9p161FcFoJP7Bc9CbWAQrazEdFyvLPYFMU/ontCfQk0+2bLdOmaPVeT3ennm84gXfp06WaFFGAIiecrKg7xQL94y/VnMDbWgRt3r8HLXrEWWaU6rX34sTRgCcrCIgHeCvxSgIyc40ERsWSyG+0FDh0QOXQco2OT8Ph8yC/IRSgjhGAggIA/AB9/u8NlRq/0obrb+C7kDZpq1M0XE1RDPTE5iokh96ppL/pQOpYLr3V3qCN+cUO2qWURKp3EcMvceqvhTq/bpMtluNkaSQXjjcyV7kLolGxPJBrF6OgYRkZGMTw6iszMEG7YUYubby9CfqUkS3wLdlqEDGW/Xr7YsARlYZEO5q5wW0axWz1dQEvTtGz70dPTi/CkEJoYP+Mu0Rxyj8vUPctgjs9QjdLbIaVwOIJghl8M+pQy017+FddDt1BD5XIHOIhLm5P0RXFB0qt6uJ8SZaBWezv7bjCP+5HfbOkuhMqppiWDQR9ycrNQUlaAmtpClAgpBbiEnAVR2FieS76hqzC/GiyuHjwxvuxhYWExA3psH5O/9KiiYqoCSpQBoxBm/3q4g5w2xPuA73yrE296RwVQIAG00Xw8x3hjyOfCldr0ufrSXfal9rnJSxIyWyX6/OoDPnhyEl5pOywWBJ5oNHo93F4WFlcf7jtltv3lChKt8O/R7wP//n+/gd/6wwex+zUZ+vNzxPXWXgNDRPSAE6QkO5aglgQ8kUjkerrcLCyuEnibUOhVmf3kHmFsmnlykwqd+kKkpk6WruMu11bOVW4SEUnnhy8aAgaBf/uTczj6fCc23RnFh//hXvgLucheu1BGH4OZ5Rgw1LiV6VNo6Ja5+2qu1FcKXZupkXviMbm/u6jUoN4mpcVSgCUoC4ulAN6FV8MuppTLw5k3fERMtX7h5/h3gE/83R5gMh9jwaP4wF+9FLe9ptqx47ogt4qmnAvVvrCW1FR8TpWab/byFhJz6cW4q1u7xaXBEw7z7Q4LC4tFg/sOXEj7mKZcBs0M1h4UhoCPf+Q4Tr8wCs9UDqYy67H+jig+9BdvgV99i47eBb0O7jtIqysTMC3hTsD9NBnc5RFpy1xIuPUjWCGfP1ksRXimp6dTLxELC4triYsa/ctEmnIZRDHVJBaCjwGdDRI3CPz0u6145Ztq4CmcQsXaEDz8qQmTLk2ZM2FqSAUTp81wYfJZki0MjH4Uo9NVrdDiCuCZmppKdzVZWFhcSxh7udBIKdd9syeCTSAdixHgkYeO4u3v3QFvvhxzeo8OhlpGn0bBC4KYzl2LAROmyU+kVepqIVW/OfSyWHRY39bCYingatnIlHKNOdaeExkphUwkMuyNIcJEbuvAd7qSmZNyAdIlMjIL5pFk4eCu7JpVanGZ8ExOTqZcoRYWFtc/HE9C3f0OE8l+bAz4/EPP4x3vvQuZhQyLCTfp+HSGwpp3i6sJz8TERLrrzsLC4roGb3sR9WKqQzMMGiVBvSAEdQcyikygjp/NUFiSsrha8IyPj8923VlYWFy34G0vEk96T0rGgS98RgjqV+5ASBGUs3rP2UuFi94sLBYc7llmCwuLFQfX+NTlSCm4oiwsFgOesbExexlaWKw48LYXMVN8ziEmHA/qvRd6UCYJc5gtYbYWFgsNz+joKK81CwuLFQWHbtwExd1x4MsP7cfb3n3rBQRlYXGt4RkZGbEEZWGx4jALQYkH9dXPvYi3vOtmS1AWiw575VlYWGhysrBYYvAMDw9bD8rCYsXB8aA4RnVbAPGgvvbwQbz5nTdZD8pi0eEZGhqyBGVhsZJhCcpiicIzODhoCcrCYiXDWABup4WgHnoRb/ylm5FZwkBLUBaLB3vlWVisdPD5E8nJcJHHgxj3LSwWGZ6BgQHrQVlYrFDw5lfrI0hIjjzy2YN4/VtvQnaZibDjWIvFgae/v98SlIXFCoP7pvcY78nZPvLZQ3j923Yju5SxlqAsFg+evr4+S1AWFisMvOnNja/ohzzkyNcfOoQH3moJymLxYa88C4sVDkVUnOdzJB6PMsTCYtHh6e3ttR6UhcUKg/GgKOQlr7ECwk1f/dRevOHtt1sPymLR4enp6bEEZWGxwpCWoLgTAb72qX148O23WYKyWHR4uru7LUFZWKwwmJvebBVBUYSPLEFZLBV4urq6LEFZWKxQmJvfE5e9GF0o4JFPH8Dr33qLJSiLRYe98iwsVjBISZqWkuNUj0eHWFgsNjydnZ3Wg7KwWOmIi6cUk/GqWIOvf/ZFPPCWm60HZbHo8HR0dFiCsrBYyaAF4Nu6/G2oCN+DOoDXvvEW5FYw0hKUxeLB097ebgnKwmLFgre/MQFCREJQj3xGE1ReJcMsQVksHuyVZ2GxopEyPlUWwY5ZLZYGPG1tbfZqtLBYsTAelLNcQna/8en9ePWDt1oPymLR4WltbbUEZWGxYuEQFJ8/UeT/Nz69TwjqNoegDHlZWFx7eFpaWixBWVisWLgIiu9BibP06Gf34VUPWIKyWHxY393CYkXDkI+z5SbuVbSkYcnJYvHgaW5uth6UxTLH9XAJz4cIFrKdpj4pk+9AcaxKL0o2fAb1ygduRUGVE+9OuyC41m1dbMynvRbp4GlqarqergSLFQZz8S7ni9iYr4uZsavRVtaZWu8T3zuFW+7cirySmXVerpl16zufts7WztTjpYjUdvH4cvvNQvqusbFxOZx3C4sUzDR76S5iE7YU57GpGw1XXP4lDJgJVHCbNdMSHZaurVcKVTILFolOAr6g7PsZmERceVqXB48n9SzMbFMSyXB3O7Vq+l9qHs8FZWjotDMx37SXV2byXJow/dfkSV+mxeywBGWxLMGLluaSt7w2fQwRSVzNjjFlgiV/hVNJX8J+UV1jypxWJZBoqzvRQoDTewoREUNErI3hFNbp1uQy4JE2siz5z5JMEyiECSNUO9WBE6p+l14knQ6zfTtwvmlna9d806amU0kkTEiZlMVDpkhTmsVF4GloaJjl7FhYLF2Yi9ajjKkcRSdEpsW2yrGyBCaFbK/UsF4VOCaLnoWXEhKOytJRLlBzite0Mx4GwuLisE0LavGkMNVN6o/AqU/pyDjWbYjrUsC8LEPaKBvlmvkzZSe5EIPB3HdqU6J+OZGuXETOqYlV9ZuUDlQ/OOW7MZeuF0t7KWUm0pJ83ZC8PnFB/TlqnxrrdllcCjznz593nW0Li2UC/iy5R2S8F4P1xzDS3yv2IwKvGAufT0yBGBOPY1BiNCIC/o3LPmXx4DZVNGwywhbjFvNlIr9sNQrXbXeMmtuc8SfYRxFtOYPutkZEpiZV7MxWuNNfXvt0Lv6VPhKi93g9qj9NjOnHSwGnuqQUeJWH5kVYxJddgMr12+ErrpaiJZzt18klVghpahDD9Scx1t+JqCIolsO6Y865M6l1m2f7+nq685wu7WzXw/zSio+UIDI9sccUnAqMefwIFVajfMNOILNAov0qLr22Fungqa+vT392LCyWIJI3+Dgw1oGufT9HKDyArIwMxGXEGqexkP/qeYmQGA3tkjYJauqK5tePkYk4IrmrUbPzHiCUp8IRj0h7phE9/QyGGw8iMzMgdk68rVlwuS3VRlWEVco2RoIXgkqCxG6I4eJInCcp0Ct7njhJSjwJiRifjmIgmoHanS+Br2ytJMzSPEXimerE6KHHMT3ai8wMP/z0QuCT8iTOE1N6pYMm0iRmI9PUdMTCpuWWE3tRhKNRDI+L7xsqQ9XNLweyyqWd5umUxXxgCcpiWYEXq7bp/cDhn2K69yyCPhll+3xiJYISrg2EGmUbY0ajYgzIDKO72BBdqGNM9KOO4kU1DYeQv/EuFKzbLdHSnvgEMNiAwf3fQ0HmmOSh58gVDKlgmw0us40mGx0CqqSPHFwaQRkoD0qxHgt3KvAEMBHLQE80F6vveoOQcZkTHgbO/BxTDc8hlCPEpBSgMpw+kwMhqMQ5dWuXhkgU0pFJurSzkM6807rTJeL5LI/IQv+weI2rb0H+9nulOSkrTyzmhKeurm6Ws2NhsURBryjcjtHnvoWs4JSYMDFsNKCyxxGqBi9rY8xmMWCXjHS3yqWWzTKYJ5mP00bqSAi2dzKIqcxKrLrtNXKcI8nGET/1DIZajwtB6QUMcWOwU/UR5k6uI7s0uEtiCbocd1lp6psDJqcaTHDrLsvjQ9SXhbbhGFbf+jqgcL0ULcQbG8HgnseQJ+fWG2B6Y8x1IWxdEpemz9UGtVUayY7ysmJR4S0hJhlMjE/70BdchVW3vx4IZDgpmcPVJxZp4fvt3/7tv3L2LSyWPpQVkD+T3Yg0HkTQH0NUBtkxWkJjDRXc+wsJY1gWwrh4HIeAz3rkr5BrTMhnMhxD3qqNEsCpvEmg/RSmR/vgD/J5lRFtDJNC861NeIzlyvZSJQEWqGC2GsobSmzT/0uNvQAqSNc2HY0ji1N8mSWitJBvZATTTceQ4RFPURXFqT2mZScZDd1l6vrmFoPZjmcLuzxRXcdBkoiPCyfkWqUPOBoPIa9azqkvIEcW84WXozcrVpaN8J/aehBRHhONgFzJInGxDsm05vhCiaUJu/bCQbZ4Q+INKl1p3ERi0iY+XNf6M07H029SU4EicYqKN201osOcnXmK7juKDtK6JbeOSJKY+senQbP/S8QmdE/RUVVFK+5FRAgqGok6cbpfJEji9bOqZH5HByWmLHfYXKLPt9N1zr45drfVLallzF+k6WqHYyWlv1pVKueUxyrK6G9lPkJyt7BYMaDhkFGZbLURcYv8VaINaDowES2QER6rjM7W7M8FXccFUDekY9AIpVSyLuo7E/Opay4YPVhOsj2m9Xrr8sUuqP8ywXKknQTLjkWjqvj0MGfC6Ho5khqS9O3YPg2dzvSD0+IUzCxlbpG/iWeduk+1oU2WPGuTLWbAelBWlrHIja7EHSbmQAn3OZLnmJ6miKNYMRNePr/hCF0MiFeP1GktzMg6Kmk8vpCk96lRrxYpIyEc1XPU7x7dm23qfjphvCsN/8mOKlt5UzoiHjPtkENn+O1EzSI6vZ7vc4fPJbp8mmsvp9PUMfWhHmKsZyROObxEiUobYiLK+6NQTfkTZZudPkik5ylJHJu4yxS2S7XNKVu8GUUZnCZV1wfDuPybYbLvk3ghF0VVksEtactPEZZnRA71uZPrLSpErI6ddDxw57OSXixBWVm+Yv4ZY5AwADSw3KcR8qG7bwSjk1FExBB19g5hfDqGMPzo6h9Rz0FojHRJkt4bxNPPHcDA0JiQlS47JmXosmYTTjNqiTtLqaUwKjBDLtTfiRJviVpoUhBR3MA0Zqs9GSeV/pcolvtGUo8vJkIcovOY9Ed3/7Ai56i0h29d6T6h8C9rVUqpfCqjIxeW6cgF/wQkADHY6p/KbvZTReIStc4ts/9jj/kwODSOnv5RxDwZiEj7+gZG0D86id6+IYn3S5gPrV2DmIgHMIUgzja2o66pE1PRmPQDhf+caUun5FQd3OHqryivCE2Rnj5zbHyibTO2VuYSS1BWlrHwJqcxc/8z5oJxXngD2XjhhcM4d74NI1NxPPTVb+JsWx+6RyL40RPPYTLmR9QjHpOMsr3+gAx2M3D85HkMjk0LWcmxP0OMTFD2M9XW68+UtEF4Apky+veLt5UFjz9L6suQvFlihzLg8YbkmCvQROiNiR7qCwokM+plRLWBBs3ozGPGMIz7TpoYKYMGjsduYXnuMvlP788HTD0tRry5ZwQ/f+YAIrEAwiJxaWOUbfaxzfQuOEEl7CJbvtXEbUKoh+xdIPJnpsiZITnJ1hzLxjl2icrPfeP9inA7q5jyZ4oK94dwsr4Fn3j4G+genhLS8eFnz+xBz+gUvv/40xid9uF8ax+e3n8E/ePAl775Yzy7/ySe3XsER4+fZs2O6H/p608Rpb/7vDgDFuWxmXTUkW20cjHhVWZhcZ2BFsAjNkEIQgxtbn4FuvrG0dY7hikhkcaeUbQPTCDsEQMcLMTew+fws6cPiJHeh/EIv2Ugo21vNvpGo3jiqRfwi188i8amDiGtOJ54ei9++sRT2Lf/GGK+bOzZexRPPbUPP3n8Wfzi6f3Ys+8YvvujX6B7cFII0Sve2ItSxl4cPXZaDBMNerpbTpsyijZtBs6e3KjOQ7I0SKa+FJhnWl4h4NPn23GqrgknTzdgMuzBk89Je558Hk/vPYiJaZKjpKVn59IzKQuLRIlslvIsJUQ9j5tFZtVBEyo/HxXx5+EZOS/T4h0PT0yhsLQcgewiHKtrx/6jZ7Fp207sPXBUDTYefMNb8OCDr8eWzZudMoikHvR2L9BBJBHugnrlQYkQO7cKrjM8M7lFGlgPysoyFbm/lbj3k8I/6plONILy8hIMj4ygrb0b27bvQk/vALq7e1BRVo7TZxpwur4L22+6G63iSdQ1tYjnIAZEjNXjT72oDPhNN+9CaUUlfv70HnGKQrjl9ttw4lwdmtr7xLA3IJSbi9rNW3DwxEnUbtqK6bgPp8Rje1JIbHDai827bsPh42fRPzQuI2w9op5VRHWOsi9or2qSO8wc09ty5b8EUcN9Gc4X5eejrKQEW7dtwf4DBzAxGcYdL3kZmjt6cOLsOeX58FlR1O3pOf8UbaUpe34iJSS2bkmmkSpmHF8o7jzJMkwfTk9PY/v2rdL3Q2hq7dQj8qlx3HzbrXjyhX0YHh3Fxtpq9LU3Ycu61cjyTiMvGEUm38OSQY6euuXAgh4Q67iYuPURkb7jak3VdyrcbK3MRyxBWVm+wn8Jo5BqHLiNIRKZQnllqXgCYbR39WPztl1ivPxoa+tAdXU1Wts70T0wgr0vHsV0jA/QA2KMfYjKtq17COXVa5CRlQV/Rg6aO/tRuXoDcooqEcorRXv/GKKBTGQXlsCflY3MwiKE8gvgz8zFWNSDhs4+NHcP4oXDJxALiFemH+M4usmOakE6I2za4Qj/qf3UdEZM/KULF0gEhXR94l34gyF0dvejvGINQuJh5BSWobOnX5EQB/162s2ZenO28za47n/mOK3uEjZrXDphunQicaI5nyD5gwFs33GLnONTCIuHqActlcjMK8Ga9euRGQoilJGBkbFxupRqqtfrCwi5yPWgFqs45c1DpFLZunQQ0f00/zKsJMUSlJVlK8oYzDBm3NKgcBqFiMrofwp5OUGMj/ajp7sNlaWFyA8F0HD2JEqKslFTXYDCfC92bKnFK15yGzasroEnFkYsPIKiPB/OnTuJbvG4+vp6UFNZgjOnT6G9rRu9vX0oKskTMuPHTMPweMQUhifhiU/L/jTikXFUlxeJd5KJXTu34WV334H8gmzRzazmcgmnfYzeifBkOxNh6f654y9BtC8kf6NRZIhBnhgfQ//gAErLy9DU1Iiezg70i1SWlcnon0aW+bzSn0bH1G2KSK/MEIYZcY75R4eZdlAnJ8xsLyYX+ReLTCA8OYitm9YjKyMbra0dqi0BrweFWSFkZwQwPj2BG3bvwotHj+PAkdOKyF48ckquHmkb/0lFF9Q7q5j0Jo+zz3fe0qaxMpf4PvjBD9ovSVgsL8iFi8ggpltPIcivK8g/Dc7ta+FUDvcksfq6Oaf4SkqKsX51NUKIwB+bxMZ1NSgtKRDvIa4IKypGukoMtJrhE9LZtWMz+ro60dbSjNycLGzbthn9vb1oaWxV02G1tdVi2IckTwkyA+J1CUGtrqnA1MSIkGIWdmzfgpGhPjTVn1VfvCgtytNfjFB6JaHey1GBHoRjfkwhA3mrNslonp/FmQQ668XIDsPvNzm5TS3l0qGJ0Yu8XPH4xoYwONgjbb4B42MjqD97BhtqV2HLxnXwO4/NErUrZefWITWWZygRxvzeAMam4sgorYUvt1yfUzknk61nkOmdkDQ8q07Fc0CXmSg5AVWfdPb4xBhCIT8q5LwW5eVLxBTWrq2W8+HH5OgwSotzkZ+bgaJi8RhzcnDi2FEMDw1ik7Q7LydTSlX+4yWC51OdVUcrP0Y9Wcir2SwXFr8kYcIFM9W2SIHn2LFj5u62sFge4MusE40Y3fMt5GTzywvOg3xz48uo3tz32giLL6W+cu4XuycxMpp1HsCocL7zxMk99fIu0zGPJOPPNHFxgDOOdsryyV8f4kJqMSE6xsmubL3qGXncQ9+EZTNGa6H3JB0PpTxqpSBDeepGe03hQ/WxSAZG4vmouvP1YtcKJNEgcPgnGB9oQ0aGKkCEW2efG1XfpYMlcSk23/9SOlIHCaOHQXj5zlAsKgRO/Uyf6lwGHOWmg1oz4OwTnObisVpHwI7wZ6F7JIaCrfcgWHmjJJBzGBnAwAvfRaGvT9JIL6pzMTd4PrVmFJdecsxzyD7VZ1DOWSINqc/k0u+0eX0yJOH5kGtDpeblovrVlMnU84HqyUR/qUsTQbTHi1F12wOIB8SLlpJVy1j0fItdofB94AMfsB6UxfICjWJYPKiW0wjKgFQTgmNIZhgsbmgsaGyiYvPCItN6X/JoI0VzQYrSYfoDSpHEsQ7TW+bz0ZiJlfVJOfSy/AwXfVQ6xjnpuGUZiXJo5EWdGQadiiUONQFOx3yYioeQy9E2f8RQ/Cl01iEyMax+/24mXGVdJnTbdJupNw22N87f1ZItvRilL82pW9ckOMWXFqlp5Vh7XoyQrfKgYsgoEQ8qp0yCxZLHJsSDOo0Mr3iNko69elGo+i/UQVXP8yJt0OeU50vOPY/V+eI5nlLTuV6p2yPXhk/C/Krd+pwxP/8lMUtbZ4BXlIGkV2zvxwiykVu1UQY2wRklWswN3/vf/35LUBbLB7y7aTQTBEWDYG55MQgXGKwUo6KWefNrEnpETVFEoaDzJsLNvsS7w5KQ/YSBdseZrWPYKSyDBu8C6yQBDlGx/ClO8SmC4hSf87HYznpMT47AL8N6lqb1IJiPBV5Q6CWCbaMPqUXrbfZZlyOqKlM34+RQGeA09acE0a80OVVZvgBGhXtDQlDenFIWJIkmMdkmBOWZUKl0/XpvVsxBUKpGdW4p9HDF+1Ur8piCG2mj2k+eXwq9aKqjNU4tm8dz6OOUYfaVlyz1jMSykFvNc5rup1IsZoO5AiwsljQSJkHd+/SYxGPh3Jra51anSH7VW/8Ti+SIpHPF6UUCKof8l9G7E59WVEpH1Pfx9M9eJMkhnRCuY1o897HTBkOO2qzRW1OmVIUxVG+4ZVrRWdXvKsckvWKwoNTCdB1aRdbliIKznWf9yTYRkld5m5xK41+WxXbR0+GW82IU09a5oM9n6j+dLykJtbmvjp24Gfo73jerdSGRVWHm0YVwynVdG5xCVM+y1CHrn3e3rXhYD8piWYD3dhJivKLDMto+h0BADJzc7TqenpGYH7GoCZEQLcosJcJpIRSZqa2O02YjKSavJjNj9pxy0oqJT5dOl3VhHdyyDSzfi8loEGFfHnKqNjqjbSHD3hZMjQ2BH7pQUPo7klLelQp1pDlVejnlJ3XX2wtFp6doitV5E+eAYsIVmE48qGkPcivXA5mFUggHCdOY7qhDMMbl3pKG39BL5Esv+vylFyZJ6mb2uTXCY/a9STNTVPkp24tJou9cEuUiCeVBSVv5dRKLecNz6NAhnisLi6UPXqm857m02zuI6X0/BCb7EORiN/6ekHqofrHL2RgOwqQ1x2mg54CSEGN2eWA5qbpJWSxfDLEy0GK8usdCCJRuQeHWu4RvnVV8rUfRd+p5FOdKWmXIr+bEB0lb66mfGRGpeicx6yIJ/kkXpdL7EPFkoXsyiKpbXwNkV0q4eBiecUwf/RlivaeRke2XMFP/5fa5QLWBkk4Zp9xZ2jAT80lDmPoceISIJ72YyF6F0pvul2M5pwmdXOks0sL3vve9z3pQFksfbvugHhCE4fP7MNreihA9Kt70NPRqNQL3uZ1NGO9OY45TRNXFP9x3RIW7xAzV3ZKaJhHu1kF0VcQk+zKK5/7YRASj8RyUbboZCOVKOOMlX2YmfEN9iAx1S5u5UsLdzjSSqC9lfy5JpDH6ONuLyYw2zSUsW/SWevjya8dIBPlrtgkZr5EgMdrOsyFfhh9j7c3wxcS34aoQN3kYPS9FVB4iJVzJpSBd/jlE1evFmIwvRsJBlG29Rc5jsYRLOy3mDc/BgwfnOzSwsFgcuK9Q3vcQgorJnR+dALrPY6rhKMLhcUSi/LqBkNW8R7uzw+sYtqQX4eByDYwYWnob/AKDBsvlz1zIViTq9cOfVYj89TuBonUSxwUSNOxMJnnG2hE7vQejw72Iibeo1ErV7QqhJzF175kJzYthXs7HDAhBeUPIXbUVodXbAT+JOKh4WDlu8WFgoBGj547I6R2QgKj6zNLl4oLzlwazeYGXB6lPGsN3sPh1eH7rsXSdnFNOZSrvidcP61MnkBks5oDnxRdfXMizY2Gx8HBfoeqelgDHi0JsShPVmBg2ZcjmMGaXc6WnGq8rIQX1IpRArXwj5NjYK37tPDtPvR/ED9yqb8AZA6Ys97Ru54S0c3LU0esKdEkHxb1SZkK/eeBS9QhI2zJyRKStbKfyqgipXJUVka20NSJtnBiUNvMcp6uDYfPQcz7na6EJSom0h88QM/OFf7NlnwMOCU/oMw+9LOA5cODAQp4dC4trAkNDyqYqW+WEpLv/L/cKX+g7I51NMgZeRtZsAe1XIpkTRTCOhz6mJzErqNYvMGZrdDrlCYbPFpcOorMkj1H0kYjsqbbqPuAaSfW9BRKVgiROSyKz6brYYH+QeB3y5SHF4pLh2b9//1I9yxYWM6BsmN5NQhkucwmnXMpMvByubtUo0zLX1q27CpYA1V6accKkvQQw+5zZJIGzSGJm3zFTuowSNiPdHFDZnXISRTGzKcAEcusKVxt3OjdmC18sOO1zy5V43SsclqAslgWMGTK3vcFs5ik1/XzzXSqutFyT3l1Owi9yFySRl1J2armmbG7d5bj354K7jFSYuHTlpNZHmHL0Np0G2pNyx5g6lhtmttXiUuHZt29f6tVhYbHkYIzVfA1V2vTuK30hLcYVlGv0NFgofU25FyvPpEviwhBCv8I6ixIXJk+PtNlT69Mapw9Nh9SUi43ZNbW4dHj27t27lM6uhUVapBqri2HW9Iy4GvbjMss1erqzL4S+plxirvIurJ97ydzJ8CskqFmyptanE2qCIpL1z1ZEav7FhtbfYmHgeeGFF5bS2bWwsFh0pDMJV9Pouuu7nHqWGkFZLBSuxjIgCwuLZQ3jBbjlauJK63HnX2yxWEh49uzZYz0oCwsLC4slB8/zzz9vCcrCwsLCYsnB89xzz1mCsrCwsLBYYgD+P87iRVnIPd8iAAAAAElFTkSuQmCC"
42
+ }
43
+ },
44
+ "cell_type": "markdown",
45
+ "metadata": {},
46
+ "source": [
47
+ "![image.png](attachment:image.png)"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "markdown",
52
+ "metadata": {},
53
+ "source": [
54
+ "The encoder takes input the three words and converts them into a numerical representation of each word.\n",
55
+ "The encoder converts those three sequence of words into the shown sequence of numbers. \n",
56
+ "The encoder outputs exactly one sequence of numbers per word.\n",
57
+ "This numerical representation can also be called a feature vector or a feature tensor."
58
+ ]
59
+ },
60
+ {
61
+ "attachments": {
62
+ "image.png": {
63
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdwAAAErCAYAAAB0NDJUAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAABhaVRYdFNuaXBNZXRhZGF0YQAAAAAAeyJjbGlwUG9pbnRzIjpbeyJ4IjowLCJ5IjowfSx7IngiOjQ3NiwieSI6MH0seyJ4Ijo0NzYsInkiOjI5OX0seyJ4IjowLCJ5IjoyOTl9XX2gMSyYAAB/jklEQVR4Xu39aZQsx5Xnif3DPZbc93z59gV4eFhIAiBAAOwSq6pZBRbJqukajdQcqavPGemMdCSNjkbShzn61N2qOWemv2hOn/ki9YykOt2ntunqqVYVu0AAZG0soIokVhLE8la8B7x9z32LVfdv5jfCIjIiMzIzIjI80n7v3XR3c3Nzt2vX7JqZe7gnCoVCKQgC1KcUSSKSzdC4SqM0Pb1GqVRCIrGVfXg8e4+3VU87oF2RrWwrUSwWTczaiOo+1YXSfSZ0I5GXP0W7jqRIYOJpCDHx7apnH+IbNk9c8LbqaRWuLdWzK+NwK4HqUUl0UHktK7IusgDkF8W7itMtiVtNDALpMQkfEgmN0+UxoUjFBfvR7n5gK2PzeLoFb6uedrCVXSUkMPKypeifHa2GJnRNYoiDXbsOzH2O5XuXsLxwA9nsnPhSOlMZ3QZDSPUfwND4cQxOPQqMiYRTkpzs8za8b/GNmMfj8VTjONyCSFH+heInpbHErDjaT4Grb2D++o+wvnwVQbCCVEb8aYpxeHAoDWsCuZyMfdckLJhG//RLGD7xdWD6GYk4blKmM/fe1+PxeDz7mbLDrbjEJZEHwO03MX/2u8DyBxjtuw8MrMuAVpxyKLGSaYkc3aUtyUiXt4Fzsr6cxsLSNGYLpzF68psYO/MtOe64xON9Xo/H4/F49i+Rw1V3K061dBG48gpuX3wN/biGkfEACfGvSHAELEIHa5wtw3gM10WCUByvSD7E6mKIOwtjSE28iCNf/EfA6HMmuh/oejwej2e/kiiKwzU+sLQiW3eAC7+Hh5f/EIPpm8iMiYdMJlBI8H6seN1SyjjNQBytvT9HD0q4jBxyMSubAYrLfbj9cBSFoW/h2HP/R2DkUdnPyWrvcz0ej8ez/wissxVHmbgF3HwVNz99DX19y+Jsx4HUlMQYQ5gYlDFsRiKG4jAT5tljK9aBWhfKaeM+iT8qmyMIBvsxPVFEcf5t3Pv492TwLCNnQV20x+PxeDz7iURJhrhIyOh28QdYffe/Ed97HgNT4hbDEPmEOFCkxSvzRz4FFEtZlEriZjm6jZ5A5d+EjJETJk4gsexIN1lYt4Pe5QHcfTCOkSf+D+g7879GKZGK3LQ9vtuwHQL3r2drurMsPR7P/qBRC9RtLVOiVFoRv3IH+Q//O8x++vuYPhwA/XSJ4mCRQjGYxvxSKK50EWMjOXGi68ZhlhLRT4hKfeKbB7GwKM5YtgeHxCGX5pHkreE8fzbUh9XbBdwt/RJOvPR/A8YekdPyp0dBDJpp5tGzOdLZitY8Ho9nT6ltsLuscRKHe6eEhXdw/8f/DcZTP0M4Jk4y5NRwCkiO470LQ/iDP/kEWL+P/+hbo/jq80eRFOeLRE4ccgaF5CN4/8Nl/P/+9C0zM/2bv34ML345lD10znyYKg+IT79++zTGT//vMfjktyVswkxHR49e7QksFyvVJUT3wTIql5P3uFvjdeTxePaSGsfKB5N429Osyz+7u9K210TvGOFv//b/7rfx2V9g/u67GJkSF8iZYb5BqhDK6HYKf/z6Hfztz0LcvjOH3No6Xnz+BDLhMhIJjmgzWC09gt/9dxfxzs/W8GC2D3lxzF/98gTC0hoC49HkT2ZIBsYy0l0NMXT4SXHk0+bke5VpwnPz8lgE1vEnzC+QNxQGI7HkTF68bBCvGy9xEm+vvStsuNmYy1JXtW2n2L9W9opEKfdmqfST/weWVj7A8AG56qT0B+bXMLuQRd/4GfzxDxP40x/O4yvPjOKxg1fx618/gsHkXTlyBcXSIFbDL+PP/noBt+6M4J0f/RS/+NwS/tPvjGNt4RoK2TwmJkaA/jGUlkZx9+E0Zl78L4Dpb1YU1Caqk+dWpa4R9itMN4hwwR0PVrF46z5WZhexvLCCdelgZFfXUMzmzJuTPBWoMqO9BG8rcEVWy0+vezzdB+swxdtp75GXBjxIpZDpzyCVSaNveACDY4OYOHoQGMvYZ3oj7MhXR71sxzpnC4nS0p+VVv/uv0Ky7xZSo9JyJhPIzy5jXhzOwMgMloozuP6ghOMnhmVkew+ZYFV8ck4usYBiIkQhMYJsYUR81xQuXbqGQ+NrmBlbwMrcfeTF4Y6MicOVES6ywzJKHsT0l/5zhCd/i6e2V9Bi1C1yyTOYs0QbXFDZUSfIPtR1cw5zlz7HvWu3sTonI/f1EkYGRtDfP4BQClArpqmkZs2jUB9G3/q7bJdI3x5PN1AqFhGInQZBAtH3WizeTmONaZOlALksFgumQ7WezWJldRmr2TUglUBmZAAjR2cw8/gp4Ij4InMQiUqeA69yWHtJlOb/oLT6xj9BZmwJGM2IM5Izr9MTyTLk729Dez3JPAqlFQmVngQDzLCGVxm5r7BfcsxuhBzL3+IW6NoYzv18IUYKd24mMPTof4bBp/5PEmZ/02uSaCFMMlJjGT1F+VRyiesfX8X1j85j7d4CxtODmBwYkw4Fn8pmHuR6GbnF19a71Gpc8LrzdBt1zNTbaQ+g5WqWWqDcoBSxll/DnZV5PCysIiE+7vSXn8TQF47ZqIxiHgC2tHtYJQ73fygtv/FP0T+6iGBcht50pCVxOHyjlDjWIoZlBDuA+w8fYHKaIz8603kRfi1InGY4Kb5VRq+3b2B0NIX+DHuSfJI5crhmKU6s2Ifb14roO/6/xdgz/1cJ44s0oihthGcn5THYpw9x+W/fxdK9eUyPTWFiZByhdCrMT5tkt13K+J3TTxJQKQjus+tVOHFcTLbcHc3GU2riNxtP2RC/2XhKTfyG8Rrg6s7j2Wtov/Xqr7fTeOOWq2mjIhhkf0ljl0WuiROYnXuI+/MPEB4YxZmvPY/kSX3ff4SbSBswDnfpzX+CwZElJDjXzQvjm6WKObnQQC50Evfn+vAv/uWPxSkP46WXTuOJx0YwNJBBPhfi6vUCfvKTc7hy7ir+0388gKeeOIxEwGOtqwt5z8Q48D7cEYebOvq/wcSz/wX3WK20KYNGwZJ42dHKAP7G37yH+fOf48TgFAaHxmS37A2l0xBdQzEquSI7G2Zpt4kpWLtaBWNUYlWojd9sPKU2frPxlNr4zcZTauM3iteIRufzePaCZu3cEy/qlysdrX3bA/dyP/1QwPa8IBIk8GB1DleW72P0kYN47JdfAIbFHwk8pp6dtIpEafHflFbf/GfoH1oERjjqFGfDjxOYl1wEyCemsJQ7jP/yv/0xPrwyhEyYx/TIOgaGB7CWLUqPIcTaShbTfTfxT/8vh/HYyUEkEjk5PpqWZnpcLQ3izk1g8NH/DENP/ecSUHaFbYEus1wY13P48Ht/iaH1Eo7NSIdARrTFIl2yOGTe10nYwrEFJEvH0WoBlNOqQY/TeBviRwFbxquhNn6z8TbEjwK2jFdDbfxG8RpRe7zHsxdsZb/eTuPJhnItF6AtTftIlDpdpfKgFFv/bCmH6w9vYzVdxJe//cvAkWF7ONGILSZAsg+Z1AAKK+tYv30PizfEK85xylguLpdHaXURQ+kAz37pa+JDTyO98hxG5n4RuYtPIbjzLMKVJ5HPHsDpR2dw7NAMwBvVKyJZ8bJrOeQezmHlzj1kHywgUUhjaHDCnNZQrY2WQEXS2fInSdRZ4aN7+Pjf/TlOBGM4NX4UyVLadCSKcg3aC+JI1hWG1V6ahtWTepT3y0U0Fa+OuNTbr1KP8v4OnL+ReDzdQj37VPHEl3I5Ou1cvTK14XzUlzc7xe2JDxgSX/DExFEcS4zgnT/+AZZ+ft06DRH6kHrp7Jbwt3/7H/124vZPsDp7DUFYRCadxPrKKlLZHLILy1ieeyCD3RWcOnoCC9ev4oSs//3pKZxOFfHkRAKTw3cxMvwpfut/9igOjS1i6f5N5BaXxdmuIy/plPJ5pJIprIsPLiXG0X/iF4ChR2WDnyCKYCZbBJVq3LmkmX3vOj76q7dwcuIQBgeGUAjEwcpO0/+p89OASNdlIdUxGtNs/J2muxV7ff5G7PZ4j2c3dMrOPZ2lcXnpnuoY3HKFDroowmUmncFY3wCufHReBp/96Ds0Jr6hNoXWEP6z/3Dyt7N335ZR3yL6B/oQjI8hVSxidWEJfX396B9Ko7B0H6PDaTz/5AmcGclgMreMqf4VTIzO45HHc/j6N2dw/HgR2dkrCAo5DAyPobi2hmKugD5JL8j0o7iSRS4XYHGuIHH6kRo7KMNQPhUstCBndlzKsbNNrPjBLVz8i3fw2OQxDKaGRIGBcbj23m7llG4vZrPL2Ky3U++4evGbjae48bv5/I2odz6PZy/Ybv3xxIP65Vpdoo3bPfuAbEq8bibRhwOZYXz08ScYHBtH34HhaJTLIVzrLCT8P/+H+d8eHnwoa1ksrWXRx5/xDI8gn11Hsi+FxPCwOMgCVtZXMDiRxPCJDEYeT2Ls8QImvhRi8vE0BibzMpx8iOX5h+gbHEQwMiIj2lX0Dw9BhpYoLS4in1vFkDhvXvzs7CJGDpyCdCVMD6M12aFiuJC/lx7ig+//HU5MHpVOg5w/ybdm2TjEPR/XVTbDjVcr9dhNPBWXevtV6rGbeCou9fZvJR5Pt1DPPlU88aVeedZKPSr7Smaka5xqmMSADDLP/fwjHDw4g+T4gInVKI2dEAS5z4GhElIHxKuPjWBxdR3I5cVZDmOJ88CFPMK+NIrr91Fa/RBI/QyF8Q9QOnYRpenzQL9I8TJyyzcQhgmEA4MorCwjIU4OAzKyXVhENi/pTYgjHk9gILyBTPZjyecDcwE7GTm58HiKVYp41QcFXPrBWzg9dgwjmRGEQcr8xIf3Zv3LKzwej8ejcJhGKZrbiwlMJAfxhbEj+NnrbwD3l4y/UB/TCoJiIYmVO/MoLq0iMzKMQZEVcZIIkkinMlhfWpGR6AAGBkawuiCnX++XwXAGidUkEvl+cc5prM+tY3ktL0PxUTkugWUZ0faL4y0uLSOXyyEzMS7hIQoysl1aXUPevHlC0hX0N1S7wThRfihBBtqf/+WP0cd/cs0F6QAU+FusUsE+iSxO1+PxeDweYt0sHWpRfEURRfFfA0PDGC2lcfGv3xL/JvuLesNy9wRjQykMTEyhUChhYW7W3OMcEKfL795mRkaQkZEuh9rJkVFk0imszcvItCgj34R4N7mawsoSVpeX0T84aKaPkUpiZGrSLIN00jjxwuoq1pZXkJDt4ck+8fIPsPTguuk2cAZ4N+g9WaO1n9/C6tWHODRyACGSZmRbkHxYZ8sZeY/H4/F4qqEXUQlLAc5MHUX+s3tY+ehT8S3WJbeCIJ8Xxyn/UzKCHekfRm593bxPOTEoo1fx9vwQvTmZOMtwfBzr2QKyi1nxdCkjK8s59PcNiWPliyQkPq8tk5YE7ZQy0y8lQvSNTSAYGBVfLaPbYgKptMQxTwzvDn7hx9zdXgIu/ug9HJqeQSkdmN6KfduVnoHLdj3s7fF4PJ64QrdF4cuP8iIcgB6aPIyP3/25+JZ1GSSaaLsmSGIVKCyhsHBXnOGCjFT5ukZJPVFAiS+c4sNG5ket4pXDLIbTBaRy8+K3Vsxxpewy0imJL8cUxHPzYnPFHNfE6ZaQlBF0MiMZWJaR8eoD3jQWJ5lDSeLshrLb5Ipc4/oHFzGYDTEqnQbqRt8aRSV6PB6Px7MVxmvIwDEM0hgbmEB6pYQ7Zy/LjuidDdy/C4K1fJ+MXvsQjo4h6OuXREORlEhSEpdlwHVxwnwVowxOc9m8ODHxcHwtooxcA1kUzbyw7E8wXlLCMjIwlnSLEibD81JRUs3ItjhDlAawWhiTU/IFGNGTxbuBWpgr4tOzFzF54IDkSBQjo1vrbj0ej8fj2QZ8+ZEZ8iZwdGIGN94/KyM6ex931w43NfEi5h5ksPpQRqTFhHmNcgJrSMgINpTRb4hl2ZbRLKeeF/LIZgexuDKEpZsFLNzm9gBWlsQTL0v80jqSiTVZrsoAeUnSk9Ezp3bFAUPizd9M4Pb8IUw/+psIDnzJOEsOoHeCe++2dP4aUmslZPh73/KDUd7lejwej2f7lMzt0RCTfSMYXC5i+eJnka/anV8Rv/p6CbffwvrNt7A8d07840P0pfNIpgJxlHyNREEiBVhZHcRadgITh55AIjOBnHFsOaSTWaw/uILVpc8xkFlFKvoWQJH3bgsJrBf7sMgnm/tOYGTqWaQP/SIw9QKQktFoQkbUzJe5lO0hLl4cu/yRfsCtP/whBkp9GBoYMU8iay9kJ+l6PB6PZ39Cl0K/QQnp/4p53Hh4E3OTIb7wD1/e+QgxIlEqLpeQuCO+8zNg9iORC1h78Bmy60vI5tbFaeYQJvuQ6n8Ew0dfAmaeBNLjcqScWUa0CEVWbgHX3sHqnY+QW18wHwbgw0zpzDCSg4cQTj4OTHwRGJRlcFyOzURD9ugi7GJbGIfLldtF3PyD13H48HHkknb6mmqzae4kZY/H4/HsR9ThEv66JRQpFtbx7sNP8ZXf+jaC0QETwY23HRKl0lrJOM5EVjYpSzJqnJchKl+AIdu8Sct7s+GIrIvwO7bmQ+2Ebo9IvNKCxJ+VY9dEJJxD3bRcXMC3dURS6pe0Mia9yt3bnb2Mopzhn97GnR9+gJlDR7HuPEqm37X1eDwej2cn8Kkkjux+fONjnPnmVzF5RgaMHGtK8I4cbq7E10JU4KjRjBwj7HjRiroz7ue6hhOGcbStI27u56cHueR0uNkv2xzZctLX/mSHLpHT1s1fOs9nYkcrd7/7LgZurGJoYkIcbqG836RrhtEej8fj8TSP+pGkrNB/nZ27htQjkzj9Ky8YZ0bvpb5uOwR0hPTiFH6/h9v2ZVcU+xE7hiXN/lI5TrJUMsJP1lP4THPAG6p8NYdIUMpK2LqkmxMpyDHiZBMlicPpZl7sdl1tDdSInG51cRnpfrkCuZbKmNY7Wo/H4/FsD7oVQg/iepGhdD/WHyxWRp07hK+2iBK2jz1T+M1AK4FZ8hz2QqodWfUWXXNSxP6MqCCuuZAQ95xgGPcRzQ6pPrpZyi/Z4kWtis9dXkOYYjdAUpRdu3DhHo/H4/EY6EnMsFP8ykT/CErz4nDW7T7Xk20HMypmwnRUOiXMEagrDKt1Y3W/J2scnpVQNjgq1jStI2wkO4A5Xi2hmJcRc8gz8QySVvQ0VslPJ3s8Ho9nh1inaod4mXQaIe+9rvI5J7Izl0tfaNi1e9Lzl6/DOr6tZXtUZXM9H71yy6ZV7dQ9Ho/H49k59Dccu/EnqKEM7swDwbug7HB3jevjqrxiaymfhiv5nPOii7ae1uPx7Bmm2bOrHs8eQOtbF3/Dz9XuhtY53L2Ab78q7vIutsfj2WPYe64VheuVG1MeT7NoN61a9J/dbkStpfH2qfm8qzPA2wmtc7judXSqXjDzvg56Ok5tpaMRVp7tryeeRljdVUSdKx+0VHGfJKH0JvXsplaqdeVphK2htfpSUVy9bkRD6+/dGa1zuO24uibYXX/D49kJbrWhBfK+Dp/n54c9Ks/4u+ItdTOoGwpnqzhlR+Fv9SuuphKHonS4sWkrzCdthTbUWCr6UZty9eGxWJvhb2z4U9VqyVWJ1WulfqpFuZptpYZb53DJHtu/q5itLqVSlRmzWjbu89SnWm8qrv56T4+aFzsKq+RLq+hW4qlG9UJnazsuVrit4SrcrtW96r+XUJ00I56N6OOzutzYEvHHrraDo3a1EaNhicxlkUseuEta63D3AFWKUUi0vjkmVqRs26Os9BqtVPcoWfk9G6EetXFsrD8rGo/HxB1rPxT+46cpzecrzStL+yLhK0z5ztVBRzIST3+P7qmGlVZ0WOpHwujN1SOX+nt+1bqWQe9A2yiWUkgEowiTM5EcMBKE06KXKYk1hVJpEsXihMiQHEObU71UZH9ic27rWBqF4gAKpVHki+OynJAw0Z1IIhB9im4pYYq6HZOwARG+1qlaj64QXe6GRMncCY4P/Cwf31Al7ThweR7nXnsTj82ckg1p2vnuyAj9TXB9GM+VWjSMKfRyT3o3OPpLsJcYYVRXqytXj3FFr92OthJhCoVCiFSSTiJphN24YrEk4WKlpucnlZ/RxRiLxSWE4brEYCfPoyQSoqNSiEJeGrwiHQjdqXTOgkD+U6+yGkqdT0jHLcG32BUlbkGO0dFJ3GGFYU4yyOczuHN7GXfvLGItuy5OI49ioYA8JceOK9+VkBT7ystRi3jqC0dx7Pi0HM7fhtp0Nm/3ehmbf/vt9n58cvYqrl5/IPUuFNsRWzLvapAY5t0RRaRSSWQyKfT3hTg4M4ID0wNIpUWvYlcFPohrPoJjdcljgnQCH145i2e/8w3g2IhYKJ8s2L6mY+9wz77+Jk6Lw6UxFkxljZS0wfA0mzqxEL3WwzSMcjQLIjqAjYCFx4jRF2nQnIJQKmv7D6tHlgL1l0ikxKCpS+qEjoWKj+IYb0O4ZIGps4mj/tRuJB+mgzGEXLYP16/fxtLSChaXV7C8vCqNY07yzfwzrlRUcRrGdhJLePGFJzA+OmRS8lgdJcR2SsUkPvzwijibJWnnijIqyVr7Mk6Xtb2IvkwoksTIiDSOM1MYH+9DmOBrf1TXcYW2JPlMTuHHP7mAe3cXMHPgEPoG+tDXl5ERboikOItkMslYCGUkFgYJCV9Bpm9NnERORmfsiET6lEWctbFzbP45ui2V+rCaTWNl3bZPhbzYlAi/YsfOCm0sm81ibW0V6ysruHHtCp566iDOnJkynTuTjuk0R1PQ3uFKRiOH+4k4XI5w1eEqDR2uFEAi0SdG2i/KTyG7lje9x1xee5NSKOI07O97+RarFUxOpsToqXyGmSJgSvsMzTMbCHZsMqK/DGZn15HP2tGrHY2IcYZsKBPSi5QRYDqFdJojwazEXxQjZk+dqC7jgjpcVjXm7TDe+OHHMhJZwdTUoDR+kk/pMfeJhMnIUVAXoocg4BFZDPTJ+ES2vcN1sR23tVU2iqGpd0WOMqQjLFVR6mTJ1slsEctLa1hYXMe1G9fxay8/h9ERO+K1acQVXr803clD+Iu/+hkefWQGx48NSydWHIRjJtpMsw/LERtkxG/fW89ObLU9xalWtQ5tT+xT7XzPId/uXw/7Ex87C5VOTuP9985J/Fl86elDUlflKH5hQJTvHa5Q5XCvzOPjaEqZzVhRvKyqvdrhco17eJ9kCBcv3sb1G3eQzeVMhQ455WA+42BjRWo2zmFwqIDnnz9tGks1bDu6s1TO0etYnei92EQwLA1ggJ/97JyM8CqvO+M/01mRAjCj3UhX/f0JHDk8ikdOHRTnyzTY0MRJe5FNSP4LpRDp1CN49ZX38eXnHsPEFC3GjjISHP2KSO5RMkvbIHKUFpgKTPFUoDZoI5xtsve4Oelk61gkou9EKWOmnBPhEF7/8zfx4leOYGpiVfbTlmzdjSfMcVLGAcfw+g/ewxOPj+HEUQkt0Z6Yf+63FsM1tUMV/xnS+rD+GSdg2hljVQyOoL4YnpSB1Am89dYVGSTM4ZlneE+X8UTv3uFaduZwabiyVUphYSmNP/+Ld/HMs2cwPFLCQH8aKY7IODyTOByhJWTb3lviVKBUaDF+e++NhcSiq6RMqrd6FasfNeBEol/0I6PcYuQ8zTSrNWv6WQpfypLNieYKSczPreKn77+Pb3/rqxgaWDedmThoTu2Jf/mPDpTfuwqCo3jt1Xfx4gunMTkleWEDKQ6DhmecLf+ZCk/h0aIzuxql56mgtkUtKa6WZF2cLr9LlkjO4NXX/1Yc7nFMja9JWC843JQ43CN47ftv4anHp3H8KB8gy9FibBRDRUcV3VRsy1ON1d1meuG+EMnkEbz77mXZeiAOd0baf+5rn8OlJ4o1rvmRxipgVpPIZkP0D07i+MkTmJjIiNNNINOfRzK1hhQluYYwWBIlL4gsSrlEn4con8n2J13ZX9hcG4dZWhXHsyIdlEUZ8VYkCJfFkFeR6ctheAgYHU3hyNETsj0jztd+2WnzytB9mFw7l2xnRKTzF3CKfFlEdIE1WfLBKJFEtERWhB02+wzA/rOXZmBnjY6TelKh3lSHoteE2FliSeyOnTXbse4Nai2CjTxzZ5fVEtmhEf3nqcdGfdUXQodKsRW8vZYVb4cruikrj+uOVOCGHZWZZUJ6jolVOYaNI50GR7BUA0WKycxnJSWqjHIl3KbPYyuJls9ptixVp+xJXD0S0Z15alQMm6OPEp8OtJIwfT87BU/HE9Ahic5L0oiyYeWTgHHU2MYrZg5tR469ZfZ5rR1xaaUyNepai6carU2qK5WKHjWOvffWy7XNtRNdV/249LIO9op6em4ttOpYow6WGVF1qcpokiqVv1FvOvqZgXG6nDYue2qORNg74tIVTzXUI/VnnzxW/agarc74pheJk7CdG2qX0kvYxt9qoN54oxzWaxnvOFSg7agZp2ueD+hlyjUqWnragatdjnIrv6xoD7F3uIRKU3Exg1WVKKwck6NXM7IlHPlKJTYPudgKTbQJpQN2U9CtSohNdX/BHHPkoV0d6tNqzGgtmjUwoz4Z/dqRXu9h+2gc5Wu+I0uQcBMWyX60kNbTyzp0WxOtU7VCjMHVLDeHMXpVWoHRLG9TdGDmJL6toLFB08RVRPRllCewjdusYCrHUdG8BU7jtU6XSzPKZXoqEko0PZX9S0WDlHr/bCG48XoH80CdWWHOOH1em7+KHuw/j6cZ1FK4dKWW5luf2pR6SXaLSUdUaR6Q4n1cBz5Iq/d3a/ftlJ4YdpSdn6sT0xBaGqnKhvNvRWxYZMx2o7IUKjGrgj2NGgCjpAb7YoqpnFyJPg3JWZGidNQKCem4RU9rb8c2qJ1eFU+z+NZkL1AbdaeSG4107S9ZdkfsHa6rGreSq2oaqciES+TaeHq8puWOlEk5fB+Ipxmstuw/e0tiu9D2elU8zdL6Gqf12BUzfycFE3epzZdLvbDNsC856gyxc7j1JudUwbVqqzclrJTjike199hcuF25c1uOqwSyj4Xew9I81I6dhq8W1Zru36DFWKK9X45xuWYcrRnVMsT+2y7utFUrp6+6kV7OW1chek6mUkhm0gjTKSOBLiU89hLlxeRRhPklO21lOvUQXuxefMGLNarlA4uXF3D21Tfw2MFTxmnqqx2rq7QewYd3Mrg3m8Db753Ft15+HmHpLpLJBPLSXtqGk/dt2WQ2Ljz2hpIp+yLsXoYaUD3yZfHNY/VtHZKkYF4cEiJXnMbr338Xv/y1JzE2vCRxmCYfuupu1HqMTYhtFJEX4Q/mT+B7r/wEX3nhBKan+BQ2ne7288P0+SpMvlY0Edj+bymaqu4txB6k7tDhcmqO6zqyaG6qjjpJyujmML732tt46YVTODDJ38jHw442h2+a0hdfHMCJoynRT17Cm9HLRmhP9+7fx9179xAkbVvFF/KXp03NqyFlWa+B63ac6zZ2IzbEfJ08dgIDfX12R1MwkejFF+9cRr5wB8+9cFTCrD25gzRjs+kEPrpyFk//w5f385umFnBOHO7pyOG6b5qqoCHW4d6nw333LL75DetwQ3G4fGerVYKNW0+NZr+MbMn8/DzW1viWm96GvT7meHp6GmHkDJqF+rIjZVkTh1soHcBrr7+FX/raU+JwlyU83g43jBzuC+Jwp6b5cgZxmOZtSNuDNstXjObzOfD9yyRmVXJLrBlYLWbSaSTD0OTRO1xFHe475k1TJ44mRT/6asftQY1yxHfuwnnxqwlMTkwau6K+Kb00g0KT4gtobt66KbYwhYmRUWtr3Ocs6+eWe7bncH9++RP/8QJ+nq8VDncrTCrmlY8JXLhwAQMDA0hxKqOHYV5v3riBJ848jkwmE4U2B/VV63BfpcP9nzyB8ZFVqfz8HXT3zxSo9dRzuK9EDnd6hw6X+p1bnMfVG9fQ398fhfYWVneC6C6bzWN6ShrGsTGT9+3R6w73qDjct1vjcDNpXLx0ydjUwQMHRHXSueEIV/ZR73TEcce0LyKhtMHXr1/HoOR1fHikSmMap35uzdFIicN9p47Dpc6IsV/RmX+X8g4dbu2UsjvC3Ux9RkliqOwtXrl8BQcPHURaeuu9DA3t/LlzeOTkqW3nlfqqdbjfe+0nZkq51uFq49uNpqh20S6H+3B+FosrSzh0+LA5j2rA7WG3ApNuvQRrztMwnrLt6+IsCUdVAWYfzoGfR5uW0Yhb5s2NdPebw93ZlDI1mZK6evHTSxiUQcEMHS6/thQ5XFG8SVb1H2eYH47mP7/6OQb6B2xHrmn75NHW4XKEm+uQw91+F2ofw16iWZbEeKWR6GVhI2g+xCzwntDOoNFqDZCOkhgu01Z4Dm1s9yOac6onoJpEuDSi4dFyp1KVTnQOV2rP0yieSm38rYV/LXJ4WTxKI23sTkssq9qllkmvFIDmRx944l+VZtnJMbuB9WZfYQrJ0a7rALaCn5szhSOJmKPMhqQnjqTXxMleWbaHjGxEt3x0SjRnQmyqG1Env58wOo0Uoo6MXwZTCWQnZ20YyTzIt0Nxj2eatdJsPJV68ZsRlj4Xmm8uFXbEmruP26uwfqhGjIYi2T2uzrVuU9fGAcdcTP7swkALUtkOkXlWUdaTSCuJt8PdgTLYsPMo86agbd7L0NjlQm5tWXQVWjnBBy5ku7aR3Aqqxjpbe5Rd72mV7QjVKRsQjmyNs+W2kd75V8t+62A1RvXg6mN3umFd5SycfTI5GiREQqxtxVuUemHboVY37Sa+DneHGqZizY+nt1nh3YKtPXVtzyvO4lqe9u6oK7NLNp3dTSBNraa7QWsexfSwBS5MY8mlCWkdTK/25QGURuepjd/K6ylPAXqn23Kss5X6qksJk1VDr2m73G4569vFHBtJJ+jpKWXqX6UR7iu9mqWcpvxxC6xXROEMgMmrBJbzvANq0/VUqKfX3eh6MzRdVzaj2XjNUk4vMoZWpdsbtK6GUK/qbBXVe6/o3G1TdGDghjWL0UsknSB2Drfe9NRO2O6bRcoFI6fncieFGwdMviSDek9tN8bsaY5avbZLz1qGrmyG7t8qXjNo/eGzslqPPC5UiN5M2B2Nyo3bu0+9+9DbX9pWdTM9PcJVA9Ni2O40ci3aaGgqmrYb1guovtzpGk/76Gb7UVtoCZKYm8+Wpu3Zt/AnZ3a5fYsy7VsknaAnHG4jZbnKbKXT0PM1Om8vwLyZp/SidSPe8bacsrPtgDGZDlSNbEaz8ZrGSceky6Xd7NoOh6f74W1B83DYDgdUxr47ZICxc7jlOxO6kBqrunJ1pg6ipQ2GQ1X60XYvYabcacD8KZQsezWf3YCotu1o2dWTejQbb6dwNLK/fwrkaQUVZ7v9Z3HITuzbnI//zHm3V3tjPsJltm2DZZ6krNVapMlW3i+KkqxyQG7ynWg8O4X74MV2DcvTPK797AfM1N9+y/Q+gC2ESlzY9vXWaQe30zb23D1czboqsnlVNI/rZN31XqKsN98w9hTtrBfNUB7V7tUFeNqGtoW+yWhMvB2uVNpyIXM92lZYp8vSRitg+u2g6vobSNsQfbU1fc+e48vX4+kssR/hqqNlRtT5EjYmrsQRzc9m0k5c3cVZj54KjexHy9eVtiInb/s5PB2j1nbqiSfODte0GLbJ0MbDblncbffBKk/z6KsvTYWhDuvqkXEo2uXRpaebcBs9LTHihpfLuAPFx+cD3GcEOnBKTxsw5SftBD9dWn4rGQuTL83ZJ4W6nZ8jxX+E64qUftzLuNwA0tmZh0vs257Mb4ijNz9pQ6XSDspGxGV0LY3Pxbj1xNMtaInoPVQtSw1n54pl7paxrrfaxvQhk1an6+kspvzEbpbX1nD588+RKxZREGNaK+TxYG7OtFPG5kSs1fVAmUsdYT0x/6L17RB7h0tYiKYgJe9aoG7Bxs0Rs2donKoY81oui5XsujHkohTu0toqViWsKPvqPpndShy9cbn5qepp3tNNhMkQiSS/vZoqj0YCCePnF/kzMDpC89vrqBHhtunotRD/tHtvwFJk28OR7fsf/AwffPRzfPjJR2JfIT79/DP82fdeQT6XQyi2ZL9+ZY/b78TO4bJn4eKWY70yZezqI+JBQRqmfKmIv3v7J/jua9/D3bmHWM1n8VMx7Nf/4s8xv7TYfhcXKY4dFk98YfGxIbwko5B/9+//FB+e/QRZCS1IY/nDH/0dzn56yTjfWrTutKv+tCtdT2egXWULBTMgePr553B/fs7Y1dGTJ3BcJJ1Ol5+x8WVt6akRLkWJawHzutkbTAYB5ufmMTQygpOnH8Xt+/cQysjkxZdewujYqIln4koPsh0jERc9V33Hy0D+6FxFt9t3PZ7mYSlwJFISh3vlxjVrS3dvGwf84fmz+FTCFtZWjcM15RuNdAlty4x4mUYU1qpS1UaYQlqVrqdzsOxSyZSxkTff/FukMxl8eukSrn1+FZ98/DFu3ryJgpSszqZ4esDhsqKaBkXXGRhD9LpZIGb6RRo+TvfdfXAft+7cMVPLt+7dxb9/5RWsrq2hVCgiwcaRS4neDnt2e6a1DWRj4loCvQ27QNliAavr62b2ZHZxARevfo5bYl/nLn9qwkzZ1jpb2dzuhz42w02pHTbr6TAywn3+mWfx3NNP45mnvoDxoWGMDAzgK19+zoxwaXe8Hablvt/LPHYO1z4uZFZEokYioicKM8oXG7vx0VGcPHocB6emcfTgYawsLWGgv9+Mfgu5nHG4vEdiRiYtho0s9cmRjYonfrAMjX1Iw3j65CncuXkLxw8fwa1r1/Frv/Ir+Ef/8Dt4URpH08mL6pNxslreEqAPVLWSdqTp6SzWroqYHBrBFx89g7HMAA6PT+GEtFUvfeUFTE9PVw2GPD0wwu01zIMI0hCxwSvmC3jy9GN48Zkv49DkFE6fOCWN5K/i1371ZUxOTNqHXeQY9wnTerIT2BSaRnenCXi6BlOWYksnjxzFP/jWt/H4o6fxxTNPoA8hDo5PmnDTcVOJjtHbFDraJcYu1RnvAJM20/J2FWvURmg3dCIp2cokAiSlXDkQ0E4bi5nTyWzX2g3PtV2z6sBlVbHvHK4xFBqF3ewa9HpoMJyCIRx18Ak/GnGqCGQkVlrEjEYkDqdryiLbpjdZIx4PUVsKo/WUSB9tSiQtwjD5b1CzaYf5uA7dE3/Y+TLtj3T62W7ZX1NU2h4ujKhxtQkmv9P2boeH7Yh9OcLtpIK3izGcSHidLCB1ukGhZBtNEdODlAid6j164omYicHYkow86Fhdm2IHjusMY1yKottu2G7hdai5tjptT+dxy4/rZvYiRqgNdsoO4+1wd1C4VKxxUnaz69AGSXOm16s9Rz7colN6dLpsQM2IvYF49jdqR4TmQNuhDZnRiIxK8pFdcZ/G5borZhQT2Zw7vbxdzFE2mXLays5S9GyF3otnmbXDGTJFM3UsoqnHrR0yA5YOGWB8He4OFOQ2FLzvudOGo5PQVmuFqI00I7vFTAPGQFeeralnTyqkXrjKbqi1x93eC/ZsjtbZTtdbt5xrpRvZiX3be9M7e5A03iPcHWCNMD7Z3kvDNbqSjomnd6i1JVdIvXCV3VCVTvQzo92m6dmavXC6caLKLpuhxslu1+n2nMNVxVENrng8nr3FTC1GPzdruoHzeNpIp31E7Byu9NeitQq+8no83Y/v+Hr2O7Ef4bq9E3dde9EqHo9nb2E95AMqbj31ePaSTvuI2Dlc+1Nqs2LRCsxljdZUkUZaUMNrkt837OThAI+nEd6aPN2E8Q07NMrt3h+P9wjXPC1mdVWWKP/qZFVaQb1kasN6zSnXe48uQ1Q8rUf16uo5rkLKS6kcXDdvRqP4h3k8e4zxE5E0RfQQmvm3A/uN/ZSym+Xa7BvnK1I78m0FbqOiDYrH46mPryd7Q9QEltFOT9ylVbQ6va2IvcMlrkHpaFaV2Epl6nlMIdFwVTSsx8QQZdr9zSSDVDytp6JffaKXy3gKLckuLe6ooMrOPG1D7YmYGT8uYy4kjrYTf4frGlBNCXCzLFpKO6Rc0JJYVboU2dFI9C09rtSLR9lpvHbE1W3No4vsqugjojaOZ2dUGkR1Wvz8YsWBxVUCsSXNGzF2JRv+d96dw709pFrf6s1hut+VRjQbt9l4ZDtx40C8Ha4on+pnRTavOOS62RFVaEdahpzAOChZbSpdvShX6mAeTKqJ1/BhpZp4RhrRZNwN5xc0n+77mnW321P2tAtqv1fE0yk4i8D6HCZDhEFotrmeyqQRplMIUikkRMxStilcrye635V68Si18ZBMNhWPUi8epWE8rouEsh7KeeJCQgomVrWhKJU3YDNfkI3LCzj36hs4M3NKQvi1CjsiI6aaG2/ANdlbEsMrZXBvNoF33j+Pl7/+NPqTszKKy0uMMIq1OYViEUEmhU8/+wxHDx1Cfyq95Y1zjhJradRLq43bbDzSqrjcY44wD7aE+OTcWRw/dgyZZApJo2WLxrNH2CVHMpU9/IaILpMolGbw6utv42u/8DjGR9ekQbB6J93ca9UcmZyLQRWRF5GGLHkCr7zyE7zwwglMT6/L3oKxsWZhutTx/dkHWFldwbGZQ+ZjAgxTzRlUnXEnCPBwbh75XA7TU1PlemP+1nkwbyPWjoqJw/jea2/jpRdO4cDkuoSxIWhe792HzVcidRyvff9dPPX4JE4cDUz92Em+2DlOptM4f+E8FhYWMDkyaj6Xx6/5FKUMtONMjZekPXOpdQWN2rZ6LqMdcTeLp3vYoZh7+BDHpI2iXdGW6h9VC88VSp/gCN595zLyhTt47oWjEmbtyZ2NIUE6gQ+vnMWz3/kGcGxEYrH1au5MLrF3uOddhyv7dFRmxOiDa7K3RQ43kU7ikwsX0J/JIBP1HhNiyPWoNWiX2mNq49L50RFtFY9sJ66yVdx8STQtad68dQtPf+lL6GvgcLlGE6/vcFW8w63F6C5yuIvLS+JwD5rvidJmadrmQxWblF81rv5sqbSG1qXL16nOisNlg+gdrovNV6sd7tmzZ832walpGpL9fF5oa2i9TngjN1DP6TUbdzPX0mzcRudnKJ1iUkbud2/fwcT4uHe47YAXa7LpONzHIodbFC1pE1XJlD0iQePdpcM1I0Ap4I9k1DcyNIQ+GeGa89QxCqWeendjxKTZNMlOzs/97Ngwv7du3sQXnnoKacfhmqMZXVbU2UabDiwJFe9wazHalkbw3sP7uHrtKqZHx8xHvI2jFTGaE/2bDuSmDsleXQXG3Sx+s7Q23SARYnlp2TSKMwcOlG3Q/PUOt6UOl9OsFy9dxODAYJXDpRD+rW0Duqn9cWkUV0OTYYhr166hr68Pk5OTxpbqX0ktPJ93uFvCizXZjBxuoynlCnrE7h0uodFeunIZhw8eMg6XD32YY8UwNp7bGkctnTJispPzc38QBqbBP3/uHB45eco43DA6zj3amrd3uDt1uA/mZrG6bqeUw4KEShg/mWc+w8hT1im/aux12auUuGwpbOq7pFXp8hjp7gYh7t25Z1I7MD1dtkHz1zvclo9wL1y8YBzuzOSUqbe0p5LYFvVtipJLrXeyaNSpq/dQW724jR5+azbuTs+fkrxev34dyVQyFg63/lxozKDqVDoBDdU4JYqclCM//TB8rXBfrewmHqUdcWv3q8GpuMguR9/8yydp1bnabd3r2RpOt9ovWEnHJVIfb5uE4vCSslYtlXKy29JoyLFl2RC/WWlHupqW5EXqCqf/PJ2hqgMdSfnb2VznDsahIxYnZWZSHFHMvhqpR714lHo0G49sFZf55MDAPBzWOJmuIXYOV1SuK+WFMRJdZ2AHCIPAiDpd2mhPiRixqZyRQt1RqOpZpZqNIZ5mER2LMZt/sgzqiexjpTVLE8aG1F3uUFqdrkmLwmcuJFvSUMbps5hxR0evpjayjRJhiCkTCWS9NkvuppiI1bW3vK+LpcRnTUTCkJZrw9oNnbz5J8vtEu8aQKcQrZJOKHsDYshm6i/a7BWKUU+y3gNhatgqng7CYlFpJa1O102vVWl6toWZiQvs7R4tArO+SeWNc33eiQPcNjXn2O45fZezBRgjtqs9AyueGdX2WsZihLaJ9UXvnrdaWp2uHVlRPHuPloO2WVq992s177RdeofbAnqtQeE9EdKRHqOnR6muFb1WR+KMlkOj2h2He6GtpJN26R2upy7e2e4FqvNmdN+uZqKV6TKtSnrmmYBo3dN5aFX1hGi5aBntl3Kq1UO7ia/DNVbhq6+nV4iqfUKf8G7UBGhzqLbPeK14Orz96TKF8k9RBHbqap869bQHapn3bs3ShFRKRonryFbtSGfmtkv5nvY2MLdKeI98mz6o50e4roF5PN2NWutWFluvkrfCytubrpu6n0HZG9Tp1sKyUYkjO70NxtgqzbJdJ+vS8w43rgbk2Y/EvdnbHD4xu5vGyrN7enlaPw6duNg5XPvMY2N0r9tr8VXc49l7yvXRO11PCzFTyjt0trRElabYpe32xAh3KxUYhXZ/58fj6Vm0+pWdrqz4OulpBbvtwBk77JAtxt7huvckXL0ZJ+uIx+PZO1gv+eiV+2CLr5uevabTPiJ2Dpev1IpWzB8uTGUWjZmnzRx0s1LFPR7PXkFnyzrKF+nzfq6+bpD4OurZDRzlutIs6j86Rc89NKXKo8p7+QEBjyd2SGV0GzdfNz27JW7v5463w40catm51jjY2hGvx+PZQ6R+1j7cstv7bx5PnIj9CFcdLTOizpdota6u3h6Px+Px7A3xdrhR71gdrTpbhc62LLU7PS1CNa9dHl16PBtxP/NI4vDbSc/m8N58MpUyH4MP+MnS6AtjDHcfkvPE2eFG9ZaLskjZanUuO9pI4kyYDKuk+6gqBUc8nsb4xrg34HfBb9y4gXNnz+L8hQs4e+4slleWo70el3iPcCPKTrWmjTc/GVKJwuIGHeznn32OTz/9FFeufIbLl690aUOl1xRXTe8Pcrkcstls1XIv8O9Q7h04SzEyMoKDhw7i8OFDOHTwEDKZjJnNqJ3R2O/E3uG61ZbrvViNOVUzODCItCxpyDTwfCHvRwiebcEHlObn53Hlsysin5nl/Qf3o73tx7/asXfp7+szbRSFzjcZJqM9HpfYOdzKL/cqqKOtdT/lJ5cpUVjcKBaKmDlwAAciOXTwoJnCCYLQ/o6xSWkf1Lp5pUEkus2lp5tgR21ichKnTz+GRx45ZZYzMwejvZ2jvfbo8XQvPTHC1SljFcIq7UpcYSPJBopLOljdptPV/c1I5+jkuTzbhXajHTa1IY/H0xliV+PEfeiKwXWmcXasm1HrMDvvRD29gtqN3lvr5D023rdV2+X0ssez34h3F9c7HY8nnkjV9Q/UePaaTlugn1PyeDwdhY2cd7WebqGTtugdrsfj6RzFUrmB4/yUf4DKs5fQBlU6gXe4Ho+nI7iutbah827Xs1eYtxB2yADj7XB979jjiQXappXv20YL//CfpxWofW2X2o5fu4mvw92Jdj0ez56gDaIRad0o3tl69hrXLjtBT08pa8/FV2uPZ+8JpFnTxs07W08r2Wlbv9PjdkrsHK6tsh6PJ3ZI1eVDUv5Bqc6hb9lTjbvORdfjKESXBslg1XaX0tMjXDU0NTaPx+PZL2i757Z/bnuo63EU4q4bR6wb20DT2MGhO6InHG4jZbnKZE/P44kDbg8+7kJ0WVnxeFqLtvM7wfiGDtlm7Bxu7asd3U/vuTpTJ6vi2R5+6q9zaGNhtW1/p9orQnSpHySneNtqH2zujNSoWMPK+3tEiC63C1Wk0gliPsK17pdSpCHVai3S5E6mGvY7+s5bT3vZYLI9aqymnrIuOkvvdFuP0S9X+IGKVAqhSJBOm098UpKUTO9IkE4hKXncKWV9dYiENKqdPN+uKYp6+LQj8rJxZR6fvPYmHps5JRsJcbqSFfmvvQhbnZk91vBQ1jK4N5vAO++fx8tffxr9yVnpbeclnPs0fmPM92fDAJ9+dgXHDh9BfyptwnvRMSUkn8zVxYsXcezYMWQkr9trHnk0hZ/qS6JQmsGrr7+Nr/3C4xgfXROdWb2Tbn6nLnPAqzMP64mnKIrhFeW6w+QJvPLKT/DCCycwPb0uewtSmWx+msFYTJDAg/lZrK6v4ujMIQQFWrftPOpZewYp4wezs8jncpiZnjZlrjNPzdUfa0fFxGF877W38dILp3Bgcl3CCiLN6737sPlKpI7jte+/i6cen8SJo1L3pH7sJF8FMZlQHNEn589haWkJUyNjSMkpCrRcmnAUr1dIJpN4cP8+Tp44Ke3KqPENtK2tbYr7Qzn+CN595zLyhTt47oWjEmbtycyMMlpEkE7gwytn8ex3vgEcG5FYbL22Xz/j7XAvz+Ps65HDZWMY1eBqNTCMIWK8pd073CCdxMdnz2FybAx923ZC8cDoQoyWur567RoeP3PGO1z+bYPDvScOd2F1CUcOzCDk13QkpcrLD+OP6o+fA5ydfYiSdCoO0OFKqObSO9zWOVx21jiyvfDpRalzJRwYHUcoOjczgGFolr0EPzF5785dTE5OeofbDmod7rlohMvqW9Aus1C9xr2tc7gfffKJFO6YjHDtVEbMVNgcYrSFYhHXxOF+4Qtf8A6Xf9vgcG89vIcPz53F6OAg0gke7zrc3rGrIBFgWUZcJ44fx+HDhxGyzCV7zKl3uK11uJxG5ixcpq8PR6YPICEO19Rn2cc2zGjbKJ4rMUWun4MCOtwb169joH8AoyMj3uG2mrLDpV4uL+Dcq2/gjONwazPDkJIUABvDRIsc7jlOsx46jGEpZHMpjgqZhpl6jilqaCYP4hAuXLiIkydPbOJwNZRL1SKXbEh06R1uLUyX+r0no76ltVWxp0MyEpFQaUDMPqF7tbJ9EhyJPLiPfC6PAwcOmDI3emVmm6ov3uE2AzVJh8sR7oB04g5OTSMQuxLTFWWLI+oloxICsavr4nD7pS0eG+v+EW7sfxZk7EilRkntggXKgmahus5VtxkSV3FhHpv7UHhVKTji2QqO/MIglIrIpkLsSgqhlySMltKzt/kUm6rCm0nLMSotitL5ZSZZGKKvNNWWT+zF5i42xN7hEtG7dRZiUWYp6JK01RG7J4rgueIqCqdr2IEoseI2RT3Ne7aCIw7qmh2bXnxqV63B5JFLRzztQeuui3ace612VuUnBpmLncO11baCq+N6+vaVe+eYEXuRUyyedlGvM9irNqt59fWxM5Q7cpGz7cUOXRUxyF5PjXBdh9tp3Zd7lGxUYizbhwfxPpSKbu8osX0JbXVnuo8ZPd7edyvmfnkPO9va0Xw3E3uHS1VzWs4sI+kEasBm2tUpcNN4xlx2R6dKoDegvjltH6dGw+PpFswoXiTgKD6qQt1cl2LncO0jSWaFmq1yELt3Fs1T62jrwetRUbrXFDx7gXnIrlQ0thTnp9s9nr3CDH6k6nSy/d8pPTGl3A1oU6lL42hpBCo22OPZEtqQ2pHH49ka095yZZud1k63y97hthMtTe9tPZugI1s1k27+XbLH0424ndTtzhR1srZ5h9sian2rGoB7f9njqYdrM0onG4GO4SuBp8vQtrlTphlvh9sFIwFegTt17KIF6Qb3ZEPq2THGfuxqGXe7xqRii+bTrSv16ozHs1u2O0Nk38Jl19tNfB1uhxTUDFpebrmxHamVOLFvfru3xxibEeNQ23Glp9gPefTsCe7Dq9ttr7RtrqTQXvyUcpuobVi2ZwbdgRovl736JqS9pDzK03UuzZ7epFONmsfTLFrnOlXves7hquJYuV3xeLqJ2kpu1sVQvc16PM1T76eZ2xkYdLq+xc7hyjgrWqvQvHo9nu7EVHox5NrK723b49mard6J0C3EfoTrNlDuOhsqVzyebsY/ze7xdJ5O+4jYOdyqN00Rbai4rNGaKtKIb8k8XU6nTNTUl0g8njjjPmeibHe0a3xDhypDvEe4oljVVVkivauTVekV/Ov/ehO13XaPdM0DcFxyw9uSZ5eoo+N3jilhMjSynfuou2VXztaRThD7KWVXUbVKM85XpHbkGyfoYM2H4COjSoahWZqGMxJP/IlM1dAue6UtsUFS8XhaQTKVwp07d3Dp0iVcOH8BZ8+ew9LykrG3bh8g8Oo6eYWxd7jEbZ90NKtK7O7i3ppUKokbN2/g+vXruHnrJq5du4Z8IR/t9Xiahy8EyOZzKEgj6EqnGx1P71AoFlEsFDA4NISx8TEjE+PjSCVTUYzO081OPv4OV3Sro4PaqWNtSIy0adTQbvjpNhKElVFuIhH4kYpn23CmZHZuDmfPn8O5SxeM3Lx9K9YzQJ69hZ24ojjdgf5+cbQTmJyYxNTUFNLptNnn3wteTbwdrhSmOtpAxKybHTXOlgExhcZ86OChshw5cgShNJwez3ahLbFBfPT0aTzy6KNGpmcORHs9nt3RLbe5utnJx7fldnSqjrZWzey5lyUK87Qa1TxNyV16uhF21vpTafQlU0bSQbKnHir07D/cWb5uf6Yl9kMlbe6NiN59U78XVJWCI55uQ0smlLpC0e6RLy3PbvG3ubYmdg7XTiJXw+I1RSy73KI2DlglCvO0C9W8WwKebqNRKflS8+wWfSq5kw8t1XPu3TzKjf0I11W3u06Vu+LxeDye9rGX907jMqqOvcMlVLWKp9NQ60VHdNuXRjfiNoluCflOqacVdPrJ5G6/Z1tL7Byu/dWgWSkv9KEoFc9e4ksgDvj64ukVap+M7ubRbrxHuKJYt38Tr76Ox7N3sK6oeDy9QrdPLffElLLH4/F4PNul0x1O73A9Ho/H0zNs9ynpTjpd73A9Ho/Hsy/R5xg6NREdX4drbkDt7R0oLSS30OIuRJeeDuMV7+kAbn2Pq5Dy0hnR8uGp7T4lbd4l3iFX0vMjXLeAWo2WkZZXLwgxy3YpzdMYLYAepcezFwu0nsddiC6JG74d1D90qrnreYe7k0LweDpNbcX37//2eLamXvvu3zTVQuq92tFF97qNVzvUr+n3oiBI+Aa/Q9A2KeZrVyLlr15Fy15C7YlLFU97oNNJpVLmu7RBmEQYhgiSSSSSYc9IKPljngLJ207tSetfp+paohSXd2JF8GKNcgoilxdw7tU38NjBU7IjgSJbqQhdE9chIkeUWCgZ3JtN4J33z+Plrz+N/uQsiqW8hNsC20rpfPotSCVx8fKnOHb0KNI1H1nu5p7VtqHTlfyeO3sWj556BJl02jiB5s2F8Sh861QShdIMXn39bXztFx7H+OiapGP1Trr5c1pqF9aGxMaQF5HKnjyBV175CV544QSmp9dlb0H0Y/PTLLSX2dlZrKys4PDhw0a3DKOYdYmjWow7CbGnudk55PI5TE5NleuK+Rt983lzrB0VE4fxvdfexksvnMKByXUJY0OwPb13FzZfidRxvPb9d/HU45M4cZTfu85L+PbzRU2mpK7ym8crq6uYnpiQ1APRUhHFQNKV/cUSzxl/aDtJ6UzcvXsXR48cwdjIqGlLKKw/pr2O7Gwj1ESIZPII3n3nMnKFO3j+haMSZu2ptsMbpBP48MpZPPudbwDHRiQWW69GaTcm9g737GvicGdOSUakyYscrqsGdbilFjrcj85+grHxcSlsqRBsGJ1CddfjDPVBuXHtGr745FPe4fJvCx2uSVfy/VAc7vLKsvnWMWEYnZP+tEG1GGeoP378/uHDBygWipj2Dteh9Q6XI78LFy+Ykd+ByUkkROe5YlHGHIFVeI+0UYQO987tW+bj9yMjI6KxaodL6rcvRlNIOQ73Oe9wNyJmI/01ySjt8co8PnntTeNwqR6OcF1F2SWzJ2stcricyvjg448wPDyMTEqckIS7x5XEsOMMp5KlxTf6KBQLuHvrNp750pe8w+XfFjtcyTjuiRPijMn42DgNzIRpg2g02EDfdMqlphzV3sAr01Llkt/hXZifx+GDB0UOeYdbpn0Od3BoCDOTU0iJ7k2rxI6cON+itFHUe/daT2Pc6+YyKQOgW7dvo6+vDyNDw6Yt2Y7D1RFu3jvc+mzlcKlH3pimKrjJP2ZyrgUOlxTFaC98egnHjx3DYF8/CgU2tHZfzFRZF5MDNuasnJKfi+fP47SfUhahDbXe4T6Ym8PS2opxQnQ8dKR5aRA57WfOLZ2fzSh1+fSg1Z1kNQgx++ABStLgHzxwwDvcMu1xuOcvnMeQDAqmJyaNwzX6prD+dnFHrVk4MDB5lQHQjZu30NffZwZB9A3qcDfHHN1xhxvvp5QjpRjheo2SzGitDfA85uEWGm5eGkZpRKTnYpTZC2LyZfIk21s0+J6dQdNUG0qJ1tOi54xIWow2YyRAXyIU4bKx9AfJrpc+I9JJkVy3qUp6GsAOmY5q6YTYiS5KIcRdpOrY/PGPs93txL41VUdrHEUkpKpAWoimr+c0Ij0qc+8tEk6fxV1cnXraB/VLx2uE6yKhrFOSPSCaF+YrSacrtuXZG+hwpSh6Qmox7VS9HV1GvK1fnJtZOOJSVUi1O3eBnZ6RNDltEcgKp2AlXKUg4XEV9oCbnzYmVCyFpuQuPZtBDZueuoj22gtcRuHGrsTOXLuKsxi7kqWns/CWhGmvHLgVd3EpFsW2RGrDu5H4OtxIu24hmNEuAwW3sreqoutNeBcac+254iyK6rQqsCHlEqgRz1a4uqezpQPWbY9nJ+iMW3ldOm+1TrfX4CBBW53tDRg6S0/M75QbqBqbMo2XShTWCmjARB2wnpbLuMvOUQ23UtP7A9W7q//dlYVnv6NtFB2t64B7DY7g9cHBOLQ8sXe4rpK57pt7j8fjkbawi0d6rabbn9ZXYudwpa8WrVVQR1trXvpAlTvV3Cp4z6CbfwfZOagDc+cxEt32uvF4PJ2n3q2/bqEnRrjufS9VNR2sK55O0b3G7vHsF/T32+7Ucq/ijuS7fVQfO4dbftYxWrhm1G6T4r0QxTyIsA8eRvB4WoWvKZ2jfF8zmoVrlyNiqip7hdsG6z3rbiXeI9w96M10skBdY24knh6jhwvV22tnUSe7/Z/6NQ9bQpVaeMbas2pYq66G7bH96VOwo4fDOtOSV4j9lHKv0qxBtspwPXuLabSkMLUB8OXqiRu0WbXbrey3045uMzp5Ld7h7oJ2jnRNA9ykeHqDTpWlNoxbNYqtxttq7+Haktse6bpuq63Vhmn4XqHX0Knr8A53F7TjaTg3RddoG4nSKYPxtB925NrRmavXsHTKbnT0XvXLgUg8MSbgZ1HFjiJzZXFS9Nab2rE7pd16y94d5to7dFHxdrhtaJT2mp3mqPc0sT9gufEBl048XclzUVjpKbrdSfSce3FuT2uhxfI7u/yCGr+xQ+dL51X1cQGxax2Y8K9b9ip7Ca9JpRPE1+HudUm1GRqA++vWRtIpQ/G0ntpRHhun7T70sW14Dllw5OGeyduRZzvQXuhc+T3aXKGA67dulp3v8toaPrt+DXNLi+b97C7mOLvaFU04r0GlE/T0lLIWbnWRdzfl6xXDZU8xJ6Of9XzOvthewtaLeazmsmbd9CQpeownNrCCuyLDAP5t+UhXbYOSCAOkMhmxrcB8c5ek0mmzNNeh03+ReDxbUQpDvPfhB/jRO2/jg08+RiKVwl//3Zt45c+/j1lxuPyutk4tl22dSzEwI7Ku9rYXNtfpc8fO4Vb3y3sTOlH2Gtko/u2P/g7ffeXPMLe4YD56/O777+O1H3wfS8tLNm4U3xNjWuxkazH2IQ3f/MIC/vrNN3Bn9iFyEracXcfHZz8RW8sjEGfciWltT2+Rk8HAorRFL331JTwUuwqSIU6efhQHDh3E3YcPjMMl/FvVTG0I2B/09AhXyzRu5Wp6hDIKmV2Yw9DYKI4/+giu376FhBjz5IFpzEvPkc7YjlEqaHNpnHCNeLoYU0jtKyWmHMrI40NxrnMrS/jpxx+imAywuL5mRibLq6tm6k+vYJ+2hZ4dwO8b8wP377z7rrGxTy6cR1YGC4l0CtliQQypviUZk49E7U1jangnqD13u+kJh9tIWa4y9T5Zt6KXxwIxHyMXQ00mU3g4N4f70lPMrq/j5vXryEjY2PAIZh/OmnyZUQnf6xw12F2eTU89pCBZbu16ByzthGln83k8+dRTMgpJms7a1MwBPPGFp0zjaBwuIwrRwuDtybMp4mxfePpZnDp6DF988ilkwiRGBwdx6MAMHj/zOIocGIhtUWhLtUJobyp7gfENejFtJnYOt9wP14WUkurK1ZkpQAlQiRMlMWIa6vDwMCYnJzE0OIQZMeBCNo+5hw8xMz2NAyJmJCw5dQ22kXg8o6Oj+PGPf4xMJoOz587i8mdX8NMPfoZLn35aPcJlnYnWPZ7NSEhbNTk0jBef+TImZCBw/NBhHD98BM988UsY7O83bZRC+6oV0z7JSlk0TKQTdPp8MR/hVnpN+gBRFZEm43CPUy/R5EcfNJD1Z576Ar724ks4PH0AJ48dw7Nfehp/T7aHaMyy3wjjikTZrSsK0/fsL9iQFXM5fPGJJ/Gtl7+BL3/pSzh94hSmxybwD779Gzh14gTCyH44U9Kukbant6BdhWI1SZGgWEKar1eUZViQdQkLZT0hYtqmOval7ZJpn0X2YoaF6XfS2mPucDeiylNFdlKZrUCvmQZKY6bhpoqwBiw7XKExG1vl+lbCxD37DmMfFLGBdBhiYmQU/ak0BtIZDGX6zGzJyPBw9c+RZFWiezwNUbsyHX62L5FzTUloSrbZZtHxmn2RwyW0KzMraTfLdqbLXifeDpeFKQsjXI+2FRZiWdwdXY5eqxqzK8zjBspK2EQ8+x41HdoQbcuYRhSo66ZhdJyvNx3PZtB8OLuoQrigqI250L7MNLMao8DVWlFqj4878Xa4gmkkRMoNiAltXICxQXqLKlsZnZvPzcSzvzEP1omYaT5ZNSLh3DbhXJfGkLZC0YddPJ5GGOuIOmlqN4S3KDhDR5ui3elDnYHzMyG1Lz3OlV4lvg7Xtg6V1UgUd9tMYUTrcYAdCL3Xwd/empdeiOjSNcxmRZHDWwxTpGiXR5eeriPqwGnjR0yHNQpX9HkAj6cZdIqYwqffKXzVo7ZVbsfNDo6qrcs9nlTv7S3i63AjWDhlkcKMe2G5+TFEPUcaMZe7kfbhXrUrnm6BpbHZg3Vmmk+oGo1ImIZ7PI2oamfEXIyjpXBnhGtf8qdicyK1x+t6LxJ7h0vKBeQUsltgcXTEeq+D190qaS/1NO/pNmhX5j6tg7tdHok48dwRisdTi7YvVeK0uW6nrda+zB4JoyPWWDrS7UVi53CjIirjNgP1mgTG7uHy83haRiOn6ke5nlbQyL6M462/q+foqRGuW2a+iegU1LreudGJb1164gQdqzpXd2qZaLgf7cabdrSLZjRbI/VoZF+Mz/Ctju8FYu9wWTbuvH8Pl1VM8CXQC7iO1Z1e9sQXt41sldAqGkm9+CpqX4ynjng7x8eV2Dlc6Rfpikhl3p+46x6Pp/W4oxSPx3WCtdIMbrydHB83emJK2ePxtBbXsVZN/3lnGzt0ipYl1y2i9sV1ta/a8M2kVbQyrWbwDtfj6TLcXn63CaHTrbdvM/HsDWUnJYVQXu8yQfQ78Hr7NpNW0cq0tsI73BbRyULz9BhOY0h0vdtER7ykdh9FRyf1xI+MO4f7sxrt7GgZdSVyYYnoDVTN0qpOHNNR6QTxdrh7XIm71oA9nk3Qh6CaFT3GpTZO7X6iU4Uqns4ShiHCZIgglULApWybpUgiWnaDhKlkeT0RBkbc/fWE+aIwbiii7MTOTAelQ415Qi4wVjWhKH0R82owvjvs8gLOvfoGTh88JRlJoJiQii3B1brTkFBWM7g3m8A775/Hy19/Gv3JWRRLeYkR1jmuAVK4l65cNt987EulTUPDb4mS2uNjpdgI5oHXbZ4glF7n+QsXcOrkSWTSklfZ0by5MJ5JSSQpOprBq6+/ja/9wuMYH12TdKzeCUdA3QpzwKsTlyIbYmPIi7Cyn8Arr/wEL7xwAtPT67K3IPqx+WkW2s7D2Vmsrq7g4MFDJiwQ+yo5r1lsB9sdTTRDo2tWewmCALOS10I+j6mpqSoH3ZxNWTsqJg7je6+9jZdeOIUDk+sSxoZge3rvLmy+EqnjeO377+Kpxydx4qjYgNSPneSLmkyKgz134TzW1tcxOTFh2kvVMf+6I+C4Yj7kIsukOOu79+5h5sABTE1M2p3C1jbF/azHR/DOu5eRL9zBV75yVMKsPekUvBKkE/jwylk8+51vAMdGJBZbr+0r0jvcbTpc9gw/OXcOE2NjSCYCY9z6eLt7PNOLlWJroFkUS0VcvXoVTz75JDLSudieY1QNeIfbiLLDXVvF4SNHUCgWTVhRlu3EdXatol4zoufhkmU8+3AWuWwWB6anq66huSbIO9xmMPYqul1ZXcXi8pKMD+ybixneC45WKd+Tlja4UCyYT0yyjWoeasQ63Hcjh/u8d7gb2crhKtVrbC5b53Bv3r6N7NqaXIw4WjHu8u8VzV8LQ+Js4I4qcejwIaTCpHe4bXC4C0uL+Fw6Nem+PmPbxp4KbXa4bRjh1oN5MSLrYRBiZWnJjESmJ/0It0JrHa6itc8oXyhv9wjMlmvFtQ5ya6gN1uOj4nA/9Q63EbxYk03H4T4mDtc0hps63KSspnftcM03H9lYUG2Rw62nQhPCaHYRG8rXKyt0hG7evMNtrcMlTD8vI1r20tlBC8QxlUptdrgyKmglrvNsBGOUCgUkJX8s7+07XMYJpf4dEYf7Vu853KQ43B/Q4U6VHa7Wj3rY58Qb04xGe41aK7TzjvWh/qjfpBnhds7hxv4pZRqWGpe7TlW4UtmzO4xPp6OVpWk4ZJPLesJGRZdxkarrrsmbpxbViVqeyvbgQx/JZBLpZEocUmBmE9opPEcrxXz7dAuh/TCPtC1CJ6uyObW63Sp+TClXL3HACRndiiSkc2c6clViw7bSBZPbb1IN3W2t3ioi3dxoXXXZGWLncO1dWrNiEU0b0+OyRutaEEY26LSyV3dtiNIAHlWPSoqRSIJxldah2ug1qCRXUZpPVildr5X6UN/sLZc/Ct+jsrldVfTEprIiMtqLdMpty6YJxRTmiUriiDdaL0ttvnXZ2KY8iupQibaNMYpF0TCN09VZpfbqNN4jXPaQuXAl0hcX6jxsRTchXBEYQAVbcat3TWoi1WgqbkrEhMlGWTQsxtIc1MAmYhoQtxfZfMrdiWqHVUedq1Yj5pF5tT1oK7loWb8nrantJ9lItd5qRyN2hMJ6SuiAk2bZKLVYUgqk3UghDEZQKo5IzkYleyMiwyKDkQw40ifCX0nEeTq99dCSbEueFMmI9IsMODIkbkOkOCxL0XVpUHTYJ+Gqx411tJXE2+EKbpWrrX7W+bo9Y92qDmkk6oKbhoe5S6G9xdcNVOusIi7ctlUhzhqp5IrT7bSOIlLJDIJERtb5hCQrreZf8snOhorp3MU7/+1B9eHoq6wrQl2LM0pwOjqJIKC4eq6USpxhLgJxntevzeLu3RIWFvqxtj6EbG4YBXEORXEOSIzLiGzKSBCOij4yss8e7+2qgnmsNuhHMjUhMoUwtDpDYkoc7ITobBzZ7CiWlvpx+9YKbt16IJ2dzrjC+D6lzM7w5Xmcff1NnJ45JRv2oSkdXRK7lL/iedmDKSGFB7MB3n3vAn71688gk+JDUzkJ1yMq2JBKpW8EY+heN5Ye6e7vPWwuN+aRW64GUiiUDuB7r76Fr33tCUyMrorh86EQa+TdfH+4kjc2/GJjpYLYYCiO9jC+98qPcfDgOA4fHsbAQMb+dCpMSCUPkZSlna5SPeSRy9uHxSphlt61j2Zg7vnyAum4BOy0sK6WUCyWUJBBb0nqblEku76O9bUcltfSePNv38PLv/I0piftiDje4wZ6TBmNJaZx63YWly5cwvLirKhB9BKIDcmiJDZHm+GLK8IgZY8qLuGpp47j0KExhLzna2xqP1uSzb9py2Xkf/78ddy4OS+6S4lwPx9Es88M0LYYRuGDjiMjQ3jyqSmMT1DPFP+UcpkNDve1N+1TyhLGfaRaDdxiQXAqShzugwDvvXcJ3/7m35NGUQw7yCMvBq2FYWIzGaMWhrEAtBu5fQX3MnwARmy3Ck5xsaduH45RocOdwCuvvmGeUh4ZXpXQeDjcjdAd0ALHcPf2Gi5evIJ1cQbFQgKFfBGJkCMwcSBJPihkP0Em7aQ0lqt45tlHMTgkDabTQNZW7P1DlH8ZtfLXA2c/uY67d5ak0xw5XNlNPdPZ5mUYVyzkkRRzSadTmJoaxeNnZpBKLRsdszGNL1YPBfBnYRnTiJfy0iYVSsiJ5MWmctmcjMqkdTPNEJ9ip8NdxuhYCoMD/LmezgjsT0uy2PzbNiWJ2dk1rKzQI4hG+WS8dF74cGIoRpRgp1gkbTrGrMtF2Z+VZbacDnG16R0u26wr0c+CZITL0UfBVD5LdabobFkYKaws9+Gv/uJdTE+NSOOXwMBgH5JSiVPptFTgpBQAJfrpgqg1mcwhnZECMWk3VnA9JW6/OOIAm0Mx3iApjUACa+tFaRysfvnChnze9iCzWRnR5QrIiywurmBhcQ0PHj7EN1/+Cgb614xu2ZjqE9HdjpYvK6cp2VJGZEAawX4ZiaXFIQSmkWQDaX7mU8hFnTXpMxdzcsgSJqdSYk9iuOJwrRblnyTcm3ayFVqfZNQW9mP2QUFGsKJHOlnZF5iOC19JKPWRr/4TRSWTJWRS0jCWxNEG62I7ebE1NgTx12BRnKZt2+RfifWJM3KcOhcHYn5uxjxafdnluiyin6MZXe531J7ocNkpkZFtdK/faocVjRoWkaXVm31GgKNac9tik5/1eYfLtiz6He4Zx+FqZriU9ryCmaNPSkEMYm42h3t370sPaBlrq3njOPKFAnLSs2SB8A1L5lV1UjBDA1m88JXH0N9np3I81lVQl+xBvvv+WSyvUFccxlnHS3ivLZVKSy+SEmB0pA9TkwMYG5UGNsGpVYlpRsPbN9q9hZbFa5b8SqW2DWBlhEXtlModP43LJQ2WzlZHI5b963BdqAGOdEVoR2abtVw1w1pPvdoHHLltOz6cmarWZ1wxuWAnX7JCa7L1SEXRdS5t/jWmR7HasCPdSr1UaEe6Zm1JpVrTtXiHGznc867DlX10smU11urD9GCk51NMSWPPEYpdZxin/qyYVEQzPJj97QVxGJy6sqMVDzVL3aSko5KSToqMSmS0R/MzD7OI3qh3vlyBwpcasRcZyAivVJyXwzl1Y02Oo+T4YvNQQRpA0+GzzsHqyEUbAIZX9nmHKw1fIBqLHGfJvNbSulr7l7qkRvmP29Shdcrcth222rKIHyaXYgzV9qD5qpc/qyE3toe4OqunN6I6q102Zt86XF6syabjcHVKmQ9NsbqS+pnikTUNnxn5VhRnjqPVR9ipB57MNgj7lbLey0uru/K0l2xvHK1ym0dKY2mmcwpmCpAmF3bo9YLtx80HnUJ9THidLHuHKzU3shs6Xb75jXWtohOuRfvL6xRrV1wPWvzmrL3AtRubW0830SqHG3tLdQ211mg3CityToT3EFdElqV3vSAyV5YgISKj2gDzZsm41tl6qhGdJESXiVXR2SLChOisNG+WRkR3oejQLhdFl/ZBKTplvlfXlkgvQNtQcW1to3jqwy6/dvtpH4G0SqaDJr0RI+afC7esxO+WRH28newP4t81FCrVbytozswyG3xOZ/K+UUokXRHze0pOM+uS8ZtLvZdxGwKrDU7p8QGXUHp7KXGlKeQSKWRLSSM50S0lHy0LHA0bFfIPnVOv6NM3lbvD3sYpd1j4hLvYUoBMJPxKlWzzN7jmFoWKfRre690TJ2LncMt9XWdRdd+WgZ624erZiOieH3SoJ9yvrpXi8dSHTtc6XOtA6UhrhU0V97nUbns83U28R7gyZHKrnK9+7Yc6rhLxpBvCGoR7PPXQUSvh/f3Kk8e14vHEm3g7XI/H4/F4YoJ3uJ5dw9GsS+22x+LqxavI49l/eIfr2RXlKePIg7hTyZ4KG/RkFx6PZx8RX4drWi/fbHUTfmTbHKZTEomnEVSO3svVpccTb/wI19MSyiM4s+VphKsnr69m8I7W0zt4h+vxeDweTweIncO171dtjO5lv9gVj8fj8Xj2kp4Y4fppOY/H4/F0O7F3uPqWKbMeCaETdsXj8Xg8nr0kdg63+tWOdotiXiVY41nrOVx1yB6Px+PxdJKee2hKHapxtLJRFhvs8Xg8Hs+eEG+HGznSKudqdkToRge8rZy658Szd9Qrj7iLZ2+pVyZxljgS+xGuOlpmRJ0vKReMBOh6O+F5eQ3l75vICWMnUR5cPXr2BrWnKpuKoXh76ixSjavaOndby2Fru+LH1SmNwpuR2mPd42u3m5cgErUnN69xgHqPL9GbptSQtBAUNTa3UGrj7BamXfkUXQkFES6L0guIm7jXXk93nvbj6p3vV6qyqZgJr7tgbKtSR7w9dYZaPRdE/3nW8UjK5VMWibMhrFgjtfubkc3SrQ3fWoqJommfjJicxYv4OtzIc3JRFrGyKFiKZqO0Ek2PCkxJySdlKB2WAtnW/mP8hP+Yh1SRSxvq6tPTetQ2XXui7pNGqm0qETPhP9pSRrwt64i3p/ZTbgvNVsWe0lIGGSmLpNhTghKVT+WfhnBpj9soWrIq9eKocH9turXHaFi9fbVi47A+JCN7Yh3hPhIXe0qU+AHKGMGeGRUv3R3g8gLOv/oGHps5JSGJql4PM6VG1w5UaVrgeRlt58OEuSz25kmcFMtLDuSCUyJhwU7beDqDq2m1WdpQQWyqEIjIOuPovrjADnBaKmSqyCnAeI5I4oJrH1yqTXHdDESCwOg/Kw0WhTB8I4zMHbqTSzflJjE/GVGJ0qtKl2wjPcaVNHnpSXFZgbRRnJYmmup2UtsuQTqBD6+cxbPf+QZwbMSMtsMdnDH2DvecONwzkcPlVIlmpt0FoLBhzMp5HyaLuF1aR07Wi6JSTplsVGwnrmhn8MpMTzhXwHiQwXTYh6G8NXBW1O698t6CNpMTpc9Jz+dOKYtVsSQzBSg2ZdqrbscxFHbg0vkShmRkNZPsN/YUSieC2fD21FpUp6pXblO0/i6lgXvIYja/bgYGjT/8oke6oik3OqYetfE1LV7NTtK0cRNyeCKbx9HUIA6UUgilM9cJ9q3D1aJyHa6OcNkwdQo2JnS2i8kS7ibzuJxYQf7YBMZnppBOJeUipWHZUB7bL6DOIQ06RyJrOazduIexu2s4metHRnqSYiRdfeVxpGzH0ZKmS3tak277nNjUx8UFLB4awfSJw+jrS6OQz0u8ztn3bknIyNxUR6mnq3dmkbl8D6fz/Rgu2dsV3p7aj5klKRWxnglxNljCtaEiJo8dxuDQIIIkx4daCvXsimEq9UqLYVuVortfHS2pl+bm6ZkjEgHy0katzy0hvHQbJ5YDTCcySLONiuK0C+9wI4d7lg73oDhc8W7qcLevhu3DRkM67LjWX8AHpXmkv3gcqVPTKEiXkr1KXkmcFMu5AXuPBBjOi2G/dwWHbq5jppSGZNHkqSCmwpEWCRr2kD3NQC1Sg0Zkg0va0/10CZ8mlnHn8BAyzz6CRDol+woiRRMnDtjrtH955YMQG7pwF6mffo6TyUGxr0qH1dNabO202qd+l4Ii7qYK+GhwFZmvnkEwmAEtioOBStx6BcEWQWNoabowxIZWYlWo7Ry6aVVSczvylfQU9wh7NbYmZJDEuAwIsu9cxKlcGgfWbbvFWy/twk8pS6XFFTvCPR05XH26to16L8MCXhcv9MlAFudHC3jk+adMobBHaSdiq82pYj6dusLm0SvjVbHTkpbrHxCDXnj7PB6V5nIqG5gGMi8xvcNtDWoFZZEA3lu72lfAhdQapr58BpgalPIQeypydGDjdTuan3IDKRthKcTkWhIP3voERxZLOCwZ1Q6r6sHTGqhPOlM+WETd38uUcL64gOQTh9B3egZ5ccCsyxU4E7GxBErR4MVlY5AcJ/83xmSwuRK7Idg1nkdFESspu6BKeO0laWrsRISycyTRh3sfforxyw/xVH7APEBFe2oXrXK41jPElT3sK2hjMoccVkczyPWJmy0VzA390Ih9ACklxsGlfeq05Cz3TlKO2Gu1wgel+G9dzCk7mMSs9IyXSjmTX+0FJ8TRemfbWsQkysuVJDDfLyUxlJHyKCAs0qaKUlaU6rLrJlG7MjZWlHXpI6RlyW2Oc7N9ARYHQqyyYgh12nNPC4lqq9hTAg/S4mTHBhGGHAkWq4S25ZZduQylfFhUKrbdqpXq9qNWrB1I+ly6x4lt0GmadGWdYXa/e/6atCJhPUgU81hK5JCbGjL1xeRUjo8D8Xa4XUBfIkS/dLtSIiVxRCx3FY5LVNzwvZaq65KaWRbZZkXVysAHXIIwMKPaEg09crZcenaHalDLhNtVWpUNaadkH2duIpHgbhXmQdfNU9VRZjh6CiJ7SjEPBY4NnP124Wkh6nxMmcgGy4Mzg0UxKNqTEZaRs86Ro7s0ZVQrus+VevGMMF3arXQeZVm1T47jmq5Tmjl/hYQ43wT6+BOh6h1dj3e4O0SNwiC9ONlsQJdbhFwer71WmD/z1HcglUYcrnW6MbPumKBaNXoXsQ0kt+IPGxjbyES2JNnyVtQ+aDUqpCg65y8nskXp6Ej9Ld+vZQWP0PKoXe6WskOPxFyXJK4dAmVb5+fxuhq1S3HCO9xdwpEfi7xS7BVDtqEV4R7TAMkml8YANxGOJjnKrLfPlWbScsXFhMn1VImG8Z8sXWjk3vG2FrdMWJYcDVbfZ4sfZrQrGWM2jD2Zf7ruaTWuuRgd04ZkaWaqRPgLhAqVkrDO0JaXu3RjV+PGstiU+I9P19hy3gjPI1ck7ZmLnqvp89dPPDawTOJLTeF1BWUPpWZjhVeaCkOkRQJxWP3pDJKBefwLmVTarJvpWonOxpaOdn52DqsrqybOZsIz1AuvJ1vRTBxP+6iUFUu1PaRSKYRib+3GWL43qD2D7QjVb0a2MtIl1qoqVrZ9bAr96T5jQ0whnUxJ+yVtmyyDhA3rk/aNbRjbteH0AEbT/eiTdo7vnGod7asj7SK+DncnttIGOKVBI+MFlUwvkiNA/map2hjoVP/mr/4aH/7s52KYSfzR//BvcPniJfOk259997u4f/ee6YXSISfFUAdT/fi3f/RH+OTjj60TpvHSWcuSN/d0W++n0viZFo/nMinXRDGjaRETFh3PI7ZSH9NVMceIeNqPWI+MDOVvuePWWtjR++C99/Hzn31Q7uRxyc5gM3axXZgLjlo87cUtN2M5vHcarZdlV4VrU6G9rC2v4L/7l/8v3Ltz1zjRN//mb3BW2qk//qN/i9n7D6Wty+D1772Km1evYXl2Hv/9v/x/4p/8s3+Kv/7Lv0KxyLbRsiN7K2fCzRnZVeY6RrxHuFtQWyTthFPLFTaeMZkIcf/ePbzz1ltYk1Hrq6+8grd+9GMsyCj2nR//BH3SO8ytruHiJ+cwe+8+UnLM6sIi8mvrSMuIhKPdTz78CA/EMWfSaWPY3F6WOGww5x/OYv7BQ5z/+KxJ586Nm7h6+QoShSLSUvl4no9//iFWlpY7Mrrx7AxrOe27N1XM5fDDH/w53virv8KdO3fE0SZx7do1nDt7DiWxFe2QkVY2YZ2ogx6rZ+0c6+0f8/xFtG935WB/UpRbW8Off//7ePXP/gx9iSQufHIWi9K+zD98iB+98QZuf34d7/3kbQz3D+K//Rf/AhC7+vu/9MtYWFhATuyPsD+5oz5lmzqinSJ2LW+lOegO3PuZlfWN15gr5vHII49gbn4eF85fwMkTJ/BQDPTqZ59heHDQjGp//1/9a9y6fh3/5g/+ENevX0MgI1mOPD76+c/xO/+f/y8uf/qpkQ/e/yn+4Pd+F59d+hS/89//v5FbXsUf/f4f4E/+xz/GT99+B//lP/u/mzh/+Hu/j/ffeQfXP7+KP/jd38Pt6zfkuN/HvFxDvM3WsxOMVYqNstO2vLhk2q6//Mu/wPdfex3vvfcefud3fgdr0pgynp2zqWfJnrjgzkrp+k7L07YX9uhivoCnv/Q0Ll28hIuXLphOfVY6+b/x67+BKxL2p//2f8QLX34el6R9KhRK+Mf/yf8Kf+8Xv4Zf/w9+A6lM2qSxe3gtKiQeLVpPD3Vqi6RdFKtGt/UpFPI4ceKkafA+/PkH+IWXvmoeZuDU3onjJ/C2jHI5Kj00fQArMmr9UBwmH3nPyAjke9/99/jy00/jP/oHv4mvS0/x+6++ime+8CX8L//hdzCU6cM7MlLOitN99gtfxG/+xn+AlIxgv/nyy3hEnPrtG7fwFz/4AQrZLI4eOowr4rAvXrjYvE7arTxPx2BRDqT7xd6O44nHH8fE2Bh++Fd/jW9985v4T/7xP8aNq9dwWRpJj6dCdQOgMyBcfvvb35K26DWE0czIsRPHcfjgQXz805/hV6Sdmp+dx8jIKNLSRtHhm9lgxy/uaIq76qCdJLC39ITDbaR2hqsoe9UP4rTO9MwBpNJpnD17Fk+LAx0dHcX777+PRx59FCsysuBbqvLimL/xa7+Gr7z4onHkqWQSK8sr6O/rQyadQZ8UWT6XR18mgyHJGR+A4TQN7TCRDM2ToaEcY0ScdSIMsJbLGoPn+3j/F//xf4xHZaTt2V9oHWDXcE1sLBS7SYst5qQjxlsUA8mMjHjtj5EYR6VV9cWtg544wBLTeQ4+CGXXEwlpY8Qo8sUiXnjpJWnPUqYNk8YGWbGWIydP4vQTT6BvoB8nT500tyvm5+fMswNz9x8iu74u6Vi/SdtqbF/2vPZ1PFb0engNfK9yMwOdbiN2DtfejTArdhEVnFmPloTFpfcJjNjgtmAfmpJzsPcXSS28tnSQkoYuiTt372JqehqPPHIK12/ewMyRQ/jqL/w9DAwO4nMx0Idzs+iX9T6Rm7dv41df/lX88Ic/xJ/8yZ/gB3/z1/gV2f7xT36Cf/Xv/hj3HzzAC+KcmX5CjJ5LuYBoO8C6ONtfkdEuHfotSYv3UQb6+xlrUzQHfBAsbr91izPttFOSFzd64vSj+PlHH+Hq1at46cWX8N0//S5+9w9/D1OTUzgVdcZaXeLeguKKWiSXdHJSluLw+FBWQZzut7/967h9565YVQn5Er9sJb63L4OcOMMzTzyO577yPP75P//n+K/+6/8av/uv/zXWpR1iUs3bQzQyLsP2lQ/7iRMWf+ve0osDMX+X8jw+ee1N87UgFgSf7lTnqmJKVla4UDHhu0SKHatBCe9nlnDz2CBOfekxBMUc7dAYgnMFBq7xfuytGzexOL+AJ86cwdLiIi5f/RyPPX4GmTCFhYezuHb9OjL9fTgto9779+8bR3n02DHckPAHsj1z6JDIQbPNh7COHTmKmZkZfP755xgbG8PAwIBZ5/3ie7Kfxcv9t8XZ3rx5E2MTEzh8/JhxxtRFLXzZBfM2tArc+NHP8XhuAIeXA6SlkuXMmMfTDnj7gO9SPjeUx4eD63jk+S8gyLADZX8pudtOo1ojp/7WZZTB5wimxBYOHJjBZ59/htXVVZyS0cngyLA0nPUsY2fwDVl809Qw+nD7p+cxc2sFj62nTX7b+e7b/QpLjmWcLPLFoCVcGi7i7eIDHH7uDAYPjJgYLI+tUYux6BafZeco9caNGzgm7VI6TOLTCxcwfuAAhibGzMOZq4tLOHr0KC/EzMZduXzZPLvCEe/Y5KQJr1gY1+rZmz2j2Surav/cZv5SYQYPr9zGwAdX8Xxu0LxKlJ+1bBf+4wUNHC5LRPXuqkMzyeX21bSRxg5XjITdQHOWypl0rbyUKFzXlwOYn+/IkusUF9fgGJ9uj+tGovBmYdp6zloYRodLQ6LDvf53P8eZXD8Or4TG4Zo3T8WsRxkX1OGedRxu6DpcicOy3im1NtIovUa2sVPU4Y6Iw73lHW7bYdkZhys6ZxN5aaggDvchjrTC4UaG4R6t4Uyytjtu9kXCQ41I+8ivjjHMontqsUeavbLKc2g6Jn/icGdj6HDbeIkdICoEI1yPthUWjoqyfRU1pnzeaL0C1Vod4l4LhQ2beedszbZxqDVSG5/ocTTG8noz4qThUj6XGDPhX7vm6QRuGbUDN20tV2MLKtF2q6mqH1FnTa/D03rcIiyJC+S2bY0qa5uzdTxjK46YtkOWtZT3q0g8bhNdbmTr8xPuVYkTzFmsUUerRaQFwAKtlVbDNI0jlJPXvuCxEeYYaXgo+n1ZjhoZzp+E6yszNhOF62rIzYp7PJGgstj0WU2tc8+LYumAuY/z5H502z6oWdoD74NxFJCIfiutZbOh4FqALe8oaTlJK05Rvt4IMzqXhGlX5pUHak+eNhO1KaJ8ziRwpsHSAu0ziUjK7Qq3lWgfheHl9keCTHvHfduA9qPobI/Nm02zY5iZy90Rb4eblEE9GydZVVHcsNp9LYePxcuCYt6DG63rOfX8Gm6mj0X0nbk6nRxGUo7XpGj6zUrV8c61lK9dKmdR8lQq2nzxGHW29q+nHbAxYhmkCiUEtClpmcp2Ivtry7EZIbpu0mABRsJ1lfJ2FG/HImm416zPt5al3PB7OoPonw+WiD0luW7KmC9Y5Fvo6omNw7Jiy1r+J+FV65HY+FbK/6J9G+JE+924xj4anr8ixnbUptxlh+yJ7R+/nMYO427gdceXTJ/56QsxBSMFwKUs2u4Y9Fz9qTTy2TzW8wWsyahkVToAa6LVtWjJj9TrsizSGm2Q2jgdFN47zEpmspIpXveqeH0+GJHJlpCuNyz27BrXRs26qJlPt/cHKaSyRRRyeWTFLtYc27B2tT3R4zSNurbnirRi2xcnfRHaE5drYk98zmFV1qUPgSBXMC9z4ajEm1T7YfuUzgODxRDZbA7rqRDLobZPUm5SzxuJjVOsSFiUdmHnwuOZzrojNu361+Hab1kknO3riqyzvhRW1sxLN8qZbSEb6qfYbRDIBaT4DsCdI5ceU6iFTNJOv7HXYaSipPZjRyNDYsyZ+Szmb96XEUqAgnlkXjoB5rdi0qcsC8NF3SKJwO6vFrvPrnN/e6WkItdCSYR88bi9zmQihfnPb+NgMYXBguhXdCw7onx7WgUbDddepeQxkA8wvArcvXJN9C49fv6WWio6K3sQWgllfbsSsIxlWRHa4S5Eevt2XdOL0pfro5hrle2SXH8ylcH9z28gfLiEfqkjOsXoaT1qT1xSxUO5Eo6hD7NXbmB5NWvquRSBaTeLXEq95jdra8U8y8F6Hwnj71bc9Nx0eR1lia6nJCIGVJEorEgbS6aw9mAJuRv3MBbaN1e1o91362eukLd5yGSikJ0R76eU14GLv//nODU4g6RU7Lz0ltzMuOtyREuRU5l7CPfTJVxL5XA9uYbwyDiGJsfNlzKMQQn6m1xzLdLj57Sh2a65H2Cmxk1cFVIdp5UwZYo5E6dLZK0ko/TVtVXM3bqLw3fW8MXcIIbEAehlVB3j2RU6cUA70hJnX31R+kP3U0VcSCxgcaYPE4dn0N/Xb2yDcaKi2BH2WJ5QxGzspiSZgB5fSUesxV4n7VvsivelF+49QPrKA5zO9WOylEa/VBx2Vj2th2rlNCvLgW3NWqKIhVQC59PruDoETJ04iIHBjMTjP5aUSi1MyX2a2IbslHpnUKrTrVyPe4y0TqaulMT5rs4tIf/pLRyRoe6xUh/GcnYaupWdOJN7SY9T3NxazC7j7vocnvitX5NesbT9Eiau38TdDjF0uFGRRE8X3fi3f4fxVanE/QPIc4jAfXaXEWX7qtkaps/ps2VpPWbTedwpLpttvnUlVywgXyoao7fXYq+A68bp6kVG6OezKldqj+oE2pjLGAUZsdpRGbUfQT+mczI6qXzco3xFeoWenWMmwkSR9H00GcIw/rSB07Bz/SXcxiqWSjnz0IuxhMiWiHbclOqPZ9Sn+giWYitKkqlWUmaK5U6EOF3+/GdImqaDiQFM5JMYENviT1a4v/p6PK3EPIchNsJBAaf951LAgzCPeWSxLoXCD9Jz8MJbcnQBKi5qba2wkuaxZ+OZzZqscEmHK31RkUDaqABT0nGbytsZuIyYPuO00p40LTO4k61bC/exPpTA6f/5LwEyqN5XDpcEXGHj9INzWDl7E5MHDoqzq/TZ2p0pps/zpKJ5h7WwgKVkETlpQQti7DR0For9R3OxV8bjTKdpS0zMSJo6YMeY71rKNbO/0p/jtCbQJxlIlfhyDFtpeRWk/VezPzAlK4pU50QYxpEfy4L3U5dldLIqhp4Vq+dr7PiGHTNzwg6SSFDupMmxNTMmtURm6qBunmlsfmx99Dgr/MsQ7UAwX1yyMeyTThs7cqGIHVxLPYiuR4/ztBbqlbAMqHNO1cpAECvibJfDYrkTp+8NsA7X7bTtZano1RN2zsRi5Pr4gZe0bPWVQjNLkjY/5C6VZ4vaccWmcxgm8PGty5h59lFMfe0Jo9TyTOs2ia/D5VVTLi3j5itv4vChozLCDco9f6rCVYdmkstWFwzTNI/fy0Xp9TGs2rHaxiZabRI9oL2YEa405kZnUvn4YIv0HYxBmQZSduiVcNlq/e1H3JI1ererBu7jS1z4xicu2aDQrllO5rjy7YcKzVTj6hitLMVKytrAmzzJUp0vRwPcbu15Pc1AtRsbEmE7ZSYHZd0Uh4P7xK9bSrXx9gJ7PXS8YkfSPrFjGnK9vK81MK+aHpecoeE977eufYLn/qdfR/rEFIfZEo/dxu2fOXYOVy+Wjk06asACcP1fvYqD04dlqJ+ORpVRJXdypgZGaUcB2R59VATOeTew2b69xlGMNvQuvHTNr6c1qDmoo1LM3ZGqno6sUPGuo3XXu60a6+VEl8iscFSgoxHiXL2nzVDX2gGi04otxo5s+0Q7orQyN256XCYLRazl1vDx2m08/1vfAgZTVok7JHYOV2EvzThUWcm98hHm7sxibGISRU6tiaasAxQiDXKh0soC6jWom3r68fprD6rPst6jANV1LSZ8BwXAxnYHh+0anrNePjzxYKf21inabddhqYgbd28iPDONwy8/Z0a3u2EXvnrvUUNIPX4K95fmzVRo9NxUucFiHF3fS8rXE2PxtI+ynh17dcVO+3dxyxdRe90cjdSGeeLB/i6rhLlNw9uED1bncVh8DL2l8wzpjoitw2XTU25+Hh3E8MwElhYWTI9HqVfJ96rJ0uvtdiGu3rpFf72Kq89afbvlwsqvk1GmV79N6RTuNTcSTzzQsqpnT90i7YJ5D4IAN2fvYfjkDDAzLiG8b7s7YuxwqW0RDmllmH/0mSfwcPaBBEmfOiqIKEZZPFtTqzNXPO2jWX2r09qJeDzbpZ4ddZu0Eq17QcH+pO3O0kMcf+Ep81Mg+0xLo5rZHDF2uJG6VeOPTmD4yDRui9MtjwRqxLM1tTpzxdM+vL49nu4hSIe4c/8Oxg9NI3V42nhKWy93VzNj63AV81QycyE9kKmvvYC51SUU17Pm91Mej8fj8WyFjlvtz42AlfUV3F2bw5mvPY9SBsiZNy3tviMce4db/ikOp5aPBHji+S/i1u0b5hcTVKIq0uPxeDye+vBnRvx+MN++lcC5m5/h0V98Djg0DL50YycvuahH7B1uFeJ0w6+exOSpQ7h+8yrSMsrlSNc7Xo/H4/E0gu40USoiSIW4eOUiJk4fxsAzJ4xP4UswW+Nue8DhlhXBR9boVZPA2MtfQWIkjdl7d8w3DM0XKaIRr8fj8Xg8pDIYk5FtKokrt68CUwM4+csv2jdK0QnLoG23926VnhnhckKgFBTsBPwYcPQ3fwUryRzu3r2JTCJAqjL5XFayKx6Px+Ppfdx2n16B72/gR/pv3L2NlT7gsd/4JWCEn1rNy0DNjm5b9ROk2L5pqiGqSd7jni3h2vd+iGA+hyNHjiEnWdUv9bgKbE3fxePxeDzdiuvo9Jlajl6TQYArt66iOJzC6V8XZzsemqFoSZwtjzIDtUTCHLPbl8/0nMPVN4GY1z5SN3PAw9fewv2rt3H8yAlkUhn7EgHZRQXaNS6bw1V3TyluF+zOBD0ej6e11Gub2U4ZkZ2c2k0EIdYKOZy/dgUjJ2dw6uWXZGQLrIu/TaBongFSdNU73EYwVxR+qD4L5N65hCvvfYzxgVFMTx5AoSi9F1Ge3tstK7Q3tdEmKspyzdCr0OPx7D12VKqwbecjtPzMH7+OtvDgAT5fmcXh55/C5FceBTJRRHOjtdKKMY1WvVa1Zx2uvsPV9GQYsCZyaxW33ngbC7ce4MDYFMZHJ6K9VpkV51utkp5U0K7ZaIDU08bQWtwYrdRsq9Otl5NWp+vz3zp8/ivs5/zbNLllHSwlmscUpxmECdyfu4/bd29j/PA0jvx9GdWe6OderEqsUCStlxW0/hGn3h3hKsyd5pDzzesi52/gs3c+RHZ2BVND45iYmJLeTZ/EK6JYLFRGuyLu4YT7uK0Fun9RU5aOCqfojc6k59iUTlSzrabV6ZpMRbQ6XZ//1uPzv3/zbx+JpWtlqxRQ5BTGZSbFjeayuL8wi3tLswhGMjjx7BPof+okYH2tGaHZgZYcr6PZwM1/a+h5h2tVaEe8VD4LwTjeVZFP7+DhuSuYu3kfibzovm8AIyOjSKfTSCZToh1RuIqrJpZDT2utGagX0Sj1wvdXc51hTemFkajEVtPqdN3MtDpdn//W4/O/r/NvvKukzYFTPofVtVXMiZNdXFlCqS+F8SMHcPCMONlTB4AhcQNsvsQzBMViNBrmNYlIe9/KaWSX3h/h1sLc0vsSrvMe74KsXL2N2c9vYf7OQxTWcihm8+bptVB6R0EgPSR1LNFh7nI7uEW4k+Mb0ep065ladbo2htpkgtMvOjWwKbYHar+70Uz8Zml1ujY9pdXpti49pdXp2vSUVqfbuvSUVqdr01NanW7r0lNana5NT2l1uq1Lz8LfyxY4O2mWeSTSSRTSAQanxzBx7BAGTh0DxvhmJInM00bjA4O07dVtm8U73F3CjNJlqr+lzql/s4OOl5IT4eh3cR1YF8lmJVyGxMbhSrjS+rKIGaoMjm5FGcaMqBSKrtfCcJUqi4+Wm1EvPaUd6fJYtRTCeCo7TVevkeLzX4mz0/SUJtM1szBC1HG2NEqXx/ryr8B4KjtNV6+R0uL8h5JeKHFCadEH+kT6gUFZT8luSvTheE5u6tkpxPxUVKg9g3e47YC5d4Xo0tW3hrUCptvK9JRWp9uO/LdTp0or0nXTU1qdrs9/6/D5r7Df869oGJfueg2uC2yHk3XZ1w6XGddpE/blirLOqQ67Xm0Pbpk1Q218puXKdtNT9DhKbZo7Sbc2fm16ysZ03Rjc48ZmiH2zl+6hZnm/xK7bPTY9N53NsFegx7Uq3Y3XSdF/hH+5306CNYvPv89/dbr85/Nv1+2e7VynzXHluHrpMpRL3W9HsZX4KpbKWifxI1ziaiAqB1uYFaoLq0k2SXdH6SmtTneT9JRWpdsS3HTJbtPuxHX6/LcOn/9qfP5jg05je+qg5di15dnmC+v6/JOuvrgO4PO/v/H5jxV+hEs62MNjEIWbOz6VJuCwq3R5oLLji/J4PB7PZvgRLlEn02pn0yDdXZ+mQQI7TrfBdXYv2r1oJe1Is134/Pv8+/zHET/C9ewe14KacdrbjV9FnYN3lZ7SrnTr4PNfwed/m/j8V2jzdbYc4P8P1W/wtehX4OsAAAAASUVORK5CYII="
64
+ }
65
+ },
66
+ "cell_type": "markdown",
67
+ "metadata": {},
68
+ "source": [
69
+ "![image.png](attachment:image.png)"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "markdown",
74
+ "metadata": {},
75
+ "source": [
76
+ "# Decoder: \n",
77
+ "The decoder is similar to the encoder. It can also accept text inputs. \n",
78
+ "It uses a similar method as the encoder which is the masked self attention mechanism as well.\n",
79
+ "It differs from the encoder in the sense that it is uni-directional and it is traditionally used in an auto regressive manner. "
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "markdown",
84
+ "metadata": {},
85
+ "source": [
86
+ "# Encoder Decoder:\n",
87
+ "Combining the two parts results in an encoder-decoder or a sequence to sequence transformer. \n",
88
+ "The encoder accepts inputs and computes a high level representation of those inputs. These outputs are then passed to the decoder. \n",
89
+ "The decoder uses the encoders outputs as inputs along side other inputs to generate a prediction. \n",
90
+ "It then provides outputs which is reused in future iterations hence the term auto-regressive."
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "markdown",
95
+ "metadata": {},
96
+ "source": []
97
+ }
98
+ ],
99
+ "metadata": {
100
+ "language_info": {
101
+ "name": "python"
102
+ }
103
+ },
104
+ "nbformat": 4,
105
+ "nbformat_minor": 2
106
+ }
transformer_instantiation.ipynb ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
13
+ "<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n",
14
+ "<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
15
+ ]
16
+ }
17
+ ],
18
+ "source": [
19
+ "from transformers import AutoModel\n",
20
+ "\n",
21
+ "bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
22
+ "print(type(bert_model))\n",
23
+ "\n",
24
+ "gpt_model = AutoModel.from_pretrained('gpt2')\n",
25
+ "print(type(gpt_model))\n",
26
+ "\n",
27
+ "bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
28
+ "print(type(bart_model))"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 37,
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "name": "stdout",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
41
+ "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
42
+ "<class 'transformers.models.bart.configuration_bart.BartConfig'>\n",
43
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
44
+ "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n"
45
+ ]
46
+ }
47
+ ],
48
+ "source": [
49
+ "from transformers import AutoConfig\n",
50
+ "\n",
51
+ "bert_config_by_autoconfig = AutoConfig.from_pretrained('bert-base-cased')\n",
52
+ "print(type(bert_config_by_autoconfig))\n",
53
+ "\n",
54
+ "gpt_config = AutoConfig.from_pretrained('gpt2')\n",
55
+ "print(type(gpt_config))\n",
56
+ "\n",
57
+ "bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n",
58
+ "print(type(bart_config))\n",
59
+ "\n",
60
+ "'''You can also use a specific class corresponding to a checkpoint\n",
61
+ "to get the same output as above'''\n",
62
+ "\n",
63
+ "from transformers import BertConfig\n",
64
+ "bert_config_by_selfconfig = BertConfig.from_pretrained('bert-base-cased')\n",
65
+ "print(type(bert_config_by_selfconfig))\n",
66
+ "\n",
67
+ "\n",
68
+ "from transformers import GPT2Config\n",
69
+ "gpt_config_byselfconfig = GPT2Config.from_pretrained('gpt2')\n",
70
+ "print(type(gpt_config_byselfconfig))\n",
71
+ "\n",
72
+ "from transformers import BartConfig\n",
73
+ "bart_config_byselfconfig = BartConfig.from_pretrained('facebook/bart-base')\n",
74
+ "print(type(bart_config_byselfconfig)) "
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": 38,
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "name": "stdout",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "BertConfig {\n",
87
+ " \"architectures\": [\n",
88
+ " \"BertForMaskedLM\"\n",
89
+ " ],\n",
90
+ " \"attention_probs_dropout_prob\": 0.1,\n",
91
+ " \"classifier_dropout\": null,\n",
92
+ " \"gradient_checkpointing\": false,\n",
93
+ " \"hidden_act\": \"gelu\",\n",
94
+ " \"hidden_dropout_prob\": 0.1,\n",
95
+ " \"hidden_size\": 768,\n",
96
+ " \"initializer_range\": 0.02,\n",
97
+ " \"intermediate_size\": 3072,\n",
98
+ " \"layer_norm_eps\": 1e-12,\n",
99
+ " \"max_position_embeddings\": 512,\n",
100
+ " \"model_type\": \"bert\",\n",
101
+ " \"num_attention_heads\": 12,\n",
102
+ " \"num_hidden_layers\": 12,\n",
103
+ " \"pad_token_id\": 0,\n",
104
+ " \"position_embedding_type\": \"absolute\",\n",
105
+ " \"transformers_version\": \"4.34.1\",\n",
106
+ " \"type_vocab_size\": 2,\n",
107
+ " \"use_cache\": true,\n",
108
+ " \"vocab_size\": 28996\n",
109
+ "}\n",
110
+ "\n"
111
+ ]
112
+ },
113
+ {
114
+ "data": {
115
+ "text/plain": [
116
+ "'\\nThe configuration of a model is a blueprint that has all the information\\nnecessarty to create the model architecture\\nfor instance, the bert model associated with the bert-base-cased checkpoint\\nhas 12 layers and 768 hidden units in each layer\\n'"
117
+ ]
118
+ },
119
+ "execution_count": 38,
120
+ "metadata": {},
121
+ "output_type": "execute_result"
122
+ }
123
+ ],
124
+ "source": [
125
+ "from transformers import BertConfig\n",
126
+ "\n",
127
+ "bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
128
+ "print(bert_config)\n",
129
+ "\n",
130
+ "'''\n",
131
+ "The configuration of a model is a blueprint that has all the information\n",
132
+ "necessarty to create the model architecture\n",
133
+ "for instance, the bert model associated with the bert-base-cased checkpoint\n",
134
+ "has 12 layers and 768 hidden units in each layer\n",
135
+ "'''"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": 2,
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "'''\n",
145
+ "Once we have a configuration of a model we can create\n",
146
+ "a model that has the same architecture as our checkpoint\n",
147
+ "but is randomly initialized\n",
148
+ "We can then train it from scratch like any pytorch or \n",
149
+ "tensorflow model\n",
150
+ "We can also change any part of the configuration by using\n",
151
+ "key word arguments\n",
152
+ "'''\n",
153
+ "# Same architecture as bert-base-cased\n",
154
+ "from transformers import BertConfig, BertModel\n",
155
+ "\n",
156
+ "bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
157
+ "bert_model = BertModel(bert_config)\n",
158
+ "\n",
159
+ "# Using only 10 layers instead of 12\n",
160
+ "from transformers import BertConfig, BertModel\n",
161
+ "\n",
162
+ "bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=10)\n",
163
+ "bert_model = BertModel(bert_config)"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 3,
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": [
172
+ "'''Saving a model after it's been trained or fine-tuned\n",
173
+ "is very easy.\n",
174
+ "Just use the save_pretrained method\n",
175
+ "'''\n",
176
+ "# Saving a model:\n",
177
+ "\n",
178
+ "from transformers import BertConfig, BertModel\n",
179
+ "bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
180
+ "bert_model = BertModel(bert_config)\n",
181
+ "\n",
182
+ "# Training code goes here\n",
183
+ "\n",
184
+ "bert_model.save_pretrained('my-bert-model')"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 4,
190
+ "metadata": {},
191
+ "outputs": [],
192
+ "source": [
193
+ "# Reloading a saved model\n",
194
+ "from transformers import BertModel\n",
195
+ "\n",
196
+ "bert_model = BertModel.from_pretrained('my-bert-model')"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": null,
202
+ "metadata": {},
203
+ "outputs": [],
204
+ "source": []
205
+ }
206
+ ],
207
+ "metadata": {
208
+ "kernelspec": {
209
+ "display_name": "Python 3",
210
+ "language": "python",
211
+ "name": "python3"
212
+ },
213
+ "language_info": {
214
+ "codemirror_mode": {
215
+ "name": "ipython",
216
+ "version": 3
217
+ },
218
+ "file_extension": ".py",
219
+ "mimetype": "text/x-python",
220
+ "name": "python",
221
+ "nbconvert_exporter": "python",
222
+ "pygments_lexer": "ipython3",
223
+ "version": "3.11.6"
224
+ }
225
+ },
226
+ "nbformat": 4,
227
+ "nbformat_minor": 2
228
+ }
transformer_instantiation_2.ipynb ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "<class 'transformers.models.bert.modeling_bert.BertModel'>\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "from transformers import AutoModel\n",
18
+ "\n",
19
+ "bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
20
+ "print(type(bert_model))"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stdout",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n"
33
+ ]
34
+ }
35
+ ],
36
+ "source": [
37
+ "gpt_model = AutoModel.from_pretrained('gpt2')\n",
38
+ "print(type(gpt_model))"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "name": "stdout",
48
+ "output_type": "stream",
49
+ "text": [
50
+ "<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
51
+ ]
52
+ }
53
+ ],
54
+ "source": [
55
+ "bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
56
+ "print(type(bart_model))"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 4,
62
+ "metadata": {},
63
+ "outputs": [
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n"
69
+ ]
70
+ }
71
+ ],
72
+ "source": [
73
+ "from transformers import AutoConfig\n",
74
+ "\n",
75
+ "bert_config = AutoConfig.from_pretrained('bert-base-cased')\n",
76
+ "print(type(bert_config))"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 5,
82
+ "metadata": {},
83
+ "outputs": [
84
+ {
85
+ "name": "stdout",
86
+ "output_type": "stream",
87
+ "text": [
88
+ "BertConfig {\n",
89
+ " \"_name_or_path\": \"bert-base-cased\",\n",
90
+ " \"architectures\": [\n",
91
+ " \"BertForMaskedLM\"\n",
92
+ " ],\n",
93
+ " \"attention_probs_dropout_prob\": 0.1,\n",
94
+ " \"classifier_dropout\": null,\n",
95
+ " \"gradient_checkpointing\": false,\n",
96
+ " \"hidden_act\": \"gelu\",\n",
97
+ " \"hidden_dropout_prob\": 0.1,\n",
98
+ " \"hidden_size\": 768,\n",
99
+ " \"initializer_range\": 0.02,\n",
100
+ " \"intermediate_size\": 3072,\n",
101
+ " \"layer_norm_eps\": 1e-12,\n",
102
+ " \"max_position_embeddings\": 512,\n",
103
+ " \"model_type\": \"bert\",\n",
104
+ " \"num_attention_heads\": 12,\n",
105
+ " \"num_hidden_layers\": 12,\n",
106
+ " \"pad_token_id\": 0,\n",
107
+ " \"position_embedding_type\": \"absolute\",\n",
108
+ " \"transformers_version\": \"4.34.1\",\n",
109
+ " \"type_vocab_size\": 2,\n",
110
+ " \"use_cache\": true,\n",
111
+ " \"vocab_size\": 28996\n",
112
+ "}\n",
113
+ "\n"
114
+ ]
115
+ }
116
+ ],
117
+ "source": [
118
+ "print(bert_config)"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 6,
124
+ "metadata": {},
125
+ "outputs": [
126
+ {
127
+ "name": "stdout",
128
+ "output_type": "stream",
129
+ "text": [
130
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
131
+ "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n"
132
+ ]
133
+ }
134
+ ],
135
+ "source": [
136
+ "from transformers import BertConfig\n",
137
+ "\n",
138
+ "bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
139
+ "print(type(bert_config))\n",
140
+ "\n",
141
+ "from transformers import AutoConfig\n",
142
+ "\n",
143
+ "bert_config = AutoConfig.from_pretrained('bert-base-cased')\n",
144
+ "print(type(bert_config))"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "code",
149
+ "execution_count": 7,
150
+ "metadata": {},
151
+ "outputs": [
152
+ {
153
+ "name": "stdout",
154
+ "output_type": "stream",
155
+ "text": [
156
+ "BertConfig {\n",
157
+ " \"_name_or_path\": \"bert-base-cased\",\n",
158
+ " \"architectures\": [\n",
159
+ " \"BertForMaskedLM\"\n",
160
+ " ],\n",
161
+ " \"attention_probs_dropout_prob\": 0.1,\n",
162
+ " \"classifier_dropout\": null,\n",
163
+ " \"gradient_checkpointing\": false,\n",
164
+ " \"hidden_act\": \"gelu\",\n",
165
+ " \"hidden_dropout_prob\": 0.1,\n",
166
+ " \"hidden_size\": 768,\n",
167
+ " \"initializer_range\": 0.02,\n",
168
+ " \"intermediate_size\": 3072,\n",
169
+ " \"layer_norm_eps\": 1e-12,\n",
170
+ " \"max_position_embeddings\": 512,\n",
171
+ " \"model_type\": \"bert\",\n",
172
+ " \"num_attention_heads\": 12,\n",
173
+ " \"num_hidden_layers\": 12,\n",
174
+ " \"pad_token_id\": 0,\n",
175
+ " \"position_embedding_type\": \"absolute\",\n",
176
+ " \"transformers_version\": \"4.34.1\",\n",
177
+ " \"type_vocab_size\": 2,\n",
178
+ " \"use_cache\": true,\n",
179
+ " \"vocab_size\": 28996\n",
180
+ "}\n",
181
+ "\n"
182
+ ]
183
+ }
184
+ ],
185
+ "source": [
186
+ "print(bert_config)"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 9,
192
+ "metadata": {},
193
+ "outputs": [
194
+ {
195
+ "name": "stdout",
196
+ "output_type": "stream",
197
+ "text": [
198
+ "BertModel(\n",
199
+ " (embeddings): BertEmbeddings(\n",
200
+ " (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
201
+ " (position_embeddings): Embedding(512, 768)\n",
202
+ " (token_type_embeddings): Embedding(2, 768)\n",
203
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
204
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
205
+ " )\n",
206
+ " (encoder): BertEncoder(\n",
207
+ " (layer): ModuleList(\n",
208
+ " (0-11): 12 x BertLayer(\n",
209
+ " (attention): BertAttention(\n",
210
+ " (self): BertSelfAttention(\n",
211
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
212
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
213
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
214
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
215
+ " )\n",
216
+ " (output): BertSelfOutput(\n",
217
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
218
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
219
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
220
+ " )\n",
221
+ " )\n",
222
+ " (intermediate): BertIntermediate(\n",
223
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
224
+ " (intermediate_act_fn): GELUActivation()\n",
225
+ " )\n",
226
+ " (output): BertOutput(\n",
227
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
228
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
229
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
230
+ " )\n",
231
+ " )\n",
232
+ " )\n",
233
+ " )\n",
234
+ " (pooler): BertPooler(\n",
235
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
236
+ " (activation): Tanh()\n",
237
+ " )\n",
238
+ ")\n"
239
+ ]
240
+ }
241
+ ],
242
+ "source": [
243
+ "from transformers import AutoModel\n",
244
+ "\n",
245
+ "bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
246
+ "print(bert_model)"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 11,
252
+ "metadata": {},
253
+ "outputs": [
254
+ {
255
+ "name": "stdout",
256
+ "output_type": "stream",
257
+ "text": [
258
+ "BertModel(\n",
259
+ " (embeddings): BertEmbeddings(\n",
260
+ " (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
261
+ " (position_embeddings): Embedding(512, 768)\n",
262
+ " (token_type_embeddings): Embedding(2, 768)\n",
263
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
264
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
265
+ " )\n",
266
+ " (encoder): BertEncoder(\n",
267
+ " (layer): ModuleList(\n",
268
+ " (0-11): 12 x BertLayer(\n",
269
+ " (attention): BertAttention(\n",
270
+ " (self): BertSelfAttention(\n",
271
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
272
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
273
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
274
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
275
+ " )\n",
276
+ " (output): BertSelfOutput(\n",
277
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
278
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
279
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
280
+ " )\n",
281
+ " )\n",
282
+ " (intermediate): BertIntermediate(\n",
283
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
284
+ " (intermediate_act_fn): GELUActivation()\n",
285
+ " )\n",
286
+ " (output): BertOutput(\n",
287
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
288
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
289
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
290
+ " )\n",
291
+ " )\n",
292
+ " )\n",
293
+ " )\n",
294
+ " (pooler): BertPooler(\n",
295
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
296
+ " (activation): Tanh()\n",
297
+ " )\n",
298
+ ")\n",
299
+ "BertModel(\n",
300
+ " (embeddings): BertEmbeddings(\n",
301
+ " (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
302
+ " (position_embeddings): Embedding(512, 768)\n",
303
+ " (token_type_embeddings): Embedding(2, 768)\n",
304
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
305
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
306
+ " )\n",
307
+ " (encoder): BertEncoder(\n",
308
+ " (layer): ModuleList(\n",
309
+ " (0-7): 8 x BertLayer(\n",
310
+ " (attention): BertAttention(\n",
311
+ " (self): BertSelfAttention(\n",
312
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
313
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
314
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
315
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
316
+ " )\n",
317
+ " (output): BertSelfOutput(\n",
318
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
319
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
320
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
321
+ " )\n",
322
+ " )\n",
323
+ " (intermediate): BertIntermediate(\n",
324
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
325
+ " (intermediate_act_fn): GELUActivation()\n",
326
+ " )\n",
327
+ " (output): BertOutput(\n",
328
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
329
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
330
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
331
+ " )\n",
332
+ " )\n",
333
+ " )\n",
334
+ " )\n",
335
+ " (pooler): BertPooler(\n",
336
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
337
+ " (activation): Tanh()\n",
338
+ " )\n",
339
+ ")\n"
340
+ ]
341
+ }
342
+ ],
343
+ "source": [
344
+ "from transformers import BertConfig, BertModel\n",
345
+ "\n",
346
+ "bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
347
+ "bert_model = BertModel(bert_config)\n",
348
+ "print(bert_model)\n",
349
+ "\n",
350
+ "bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=8)\n",
351
+ "bert_model = BertModel(bert_config)\n",
352
+ "print(bert_model)"
353
+ ]
354
+ },
355
+ {
356
+ "cell_type": "code",
357
+ "execution_count": 12,
358
+ "metadata": {},
359
+ "outputs": [
360
+ {
361
+ "name": "stdout",
362
+ "output_type": "stream",
363
+ "text": [
364
+ "BertConfig {\n",
365
+ " \"architectures\": [\n",
366
+ " \"BertForMaskedLM\"\n",
367
+ " ],\n",
368
+ " \"attention_probs_dropout_prob\": 0.1,\n",
369
+ " \"classifier_dropout\": null,\n",
370
+ " \"gradient_checkpointing\": false,\n",
371
+ " \"hidden_act\": \"gelu\",\n",
372
+ " \"hidden_dropout_prob\": 0.1,\n",
373
+ " \"hidden_size\": 768,\n",
374
+ " \"initializer_range\": 0.02,\n",
375
+ " \"intermediate_size\": 3072,\n",
376
+ " \"layer_norm_eps\": 1e-12,\n",
377
+ " \"max_position_embeddings\": 512,\n",
378
+ " \"model_type\": \"bert\",\n",
379
+ " \"num_attention_heads\": 12,\n",
380
+ " \"num_hidden_layers\": 12,\n",
381
+ " \"pad_token_id\": 0,\n",
382
+ " \"position_embedding_type\": \"absolute\",\n",
383
+ " \"transformers_version\": \"4.34.1\",\n",
384
+ " \"type_vocab_size\": 2,\n",
385
+ " \"use_cache\": true,\n",
386
+ " \"vocab_size\": 28996\n",
387
+ "}\n",
388
+ "\n"
389
+ ]
390
+ }
391
+ ],
392
+ "source": [
393
+ "from transformers import BertConfig, BertModel\n",
394
+ "bert_config_original = BertConfig.from_pretrained('bert-base-cased')\n",
395
+ "print(bert_config_original)\n",
396
+ "# bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=8)\n",
397
+ "# bert_model = BertModel(bert_config)"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 13,
403
+ "metadata": {},
404
+ "outputs": [
405
+ {
406
+ "name": "stdout",
407
+ "output_type": "stream",
408
+ "text": [
409
+ "BertConfig {\n",
410
+ " \"_name_or_path\": \"bert-base-cased\",\n",
411
+ " \"architectures\": [\n",
412
+ " \"BertForMaskedLM\"\n",
413
+ " ],\n",
414
+ " \"attention_probs_dropout_prob\": 0.1,\n",
415
+ " \"classifier_dropout\": null,\n",
416
+ " \"gradient_checkpointing\": false,\n",
417
+ " \"hidden_act\": \"gelu\",\n",
418
+ " \"hidden_dropout_prob\": 0.1,\n",
419
+ " \"hidden_size\": 768,\n",
420
+ " \"initializer_range\": 0.02,\n",
421
+ " \"intermediate_size\": 3072,\n",
422
+ " \"layer_norm_eps\": 1e-12,\n",
423
+ " \"max_position_embeddings\": 512,\n",
424
+ " \"model_type\": \"bert\",\n",
425
+ " \"num_attention_heads\": 12,\n",
426
+ " \"num_hidden_layers\": 2,\n",
427
+ " \"pad_token_id\": 0,\n",
428
+ " \"position_embedding_type\": \"absolute\",\n",
429
+ " \"transformers_version\": \"4.34.1\",\n",
430
+ " \"type_vocab_size\": 2,\n",
431
+ " \"use_cache\": true,\n",
432
+ " \"vocab_size\": 100\n",
433
+ "}\n",
434
+ "\n"
435
+ ]
436
+ }
437
+ ],
438
+ "source": [
439
+ "bert_config_new = AutoConfig.from_pretrained('bert-base-cased', \n",
440
+ " num_hidden_layers=2,\n",
441
+ " vocab_size=100,\n",
442
+ " )\n",
443
+ "print(bert_config_new)"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": 14,
449
+ "metadata": {},
450
+ "outputs": [
451
+ {
452
+ "name": "stdout",
453
+ "output_type": "stream",
454
+ "text": [
455
+ "BertModel(\n",
456
+ " (embeddings): BertEmbeddings(\n",
457
+ " (word_embeddings): Embedding(100, 768, padding_idx=0)\n",
458
+ " (position_embeddings): Embedding(512, 768)\n",
459
+ " (token_type_embeddings): Embedding(2, 768)\n",
460
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
461
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
462
+ " )\n",
463
+ " (encoder): BertEncoder(\n",
464
+ " (layer): ModuleList(\n",
465
+ " (0-1): 2 x BertLayer(\n",
466
+ " (attention): BertAttention(\n",
467
+ " (self): BertSelfAttention(\n",
468
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
469
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
470
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
471
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
472
+ " )\n",
473
+ " (output): BertSelfOutput(\n",
474
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
475
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
476
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
477
+ " )\n",
478
+ " )\n",
479
+ " (intermediate): BertIntermediate(\n",
480
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
481
+ " (intermediate_act_fn): GELUActivation()\n",
482
+ " )\n",
483
+ " (output): BertOutput(\n",
484
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
485
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
486
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
487
+ " )\n",
488
+ " )\n",
489
+ " )\n",
490
+ " )\n",
491
+ " (pooler): BertPooler(\n",
492
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
493
+ " (activation): Tanh()\n",
494
+ " )\n",
495
+ ")\n"
496
+ ]
497
+ }
498
+ ],
499
+ "source": [
500
+ "bert_model_new = BertModel(bert_config_new)\n",
501
+ "print(bert_model_new)"
502
+ ]
503
+ },
504
+ {
505
+ "cell_type": "code",
506
+ "execution_count": 15,
507
+ "metadata": {},
508
+ "outputs": [],
509
+ "source": [
510
+ "bert_model_new.save_pretrained('pew_bert_ye')"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": 16,
516
+ "metadata": {},
517
+ "outputs": [
518
+ {
519
+ "name": "stdout",
520
+ "output_type": "stream",
521
+ "text": [
522
+ "BertModel(\n",
523
+ " (embeddings): BertEmbeddings(\n",
524
+ " (word_embeddings): Embedding(100, 768, padding_idx=0)\n",
525
+ " (position_embeddings): Embedding(512, 768)\n",
526
+ " (token_type_embeddings): Embedding(2, 768)\n",
527
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
528
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
529
+ " )\n",
530
+ " (encoder): BertEncoder(\n",
531
+ " (layer): ModuleList(\n",
532
+ " (0-1): 2 x BertLayer(\n",
533
+ " (attention): BertAttention(\n",
534
+ " (self): BertSelfAttention(\n",
535
+ " (query): Linear(in_features=768, out_features=768, bias=True)\n",
536
+ " (key): Linear(in_features=768, out_features=768, bias=True)\n",
537
+ " (value): Linear(in_features=768, out_features=768, bias=True)\n",
538
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
539
+ " )\n",
540
+ " (output): BertSelfOutput(\n",
541
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
542
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
543
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
544
+ " )\n",
545
+ " )\n",
546
+ " (intermediate): BertIntermediate(\n",
547
+ " (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
548
+ " (intermediate_act_fn): GELUActivation()\n",
549
+ " )\n",
550
+ " (output): BertOutput(\n",
551
+ " (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
552
+ " (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
553
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
554
+ " )\n",
555
+ " )\n",
556
+ " )\n",
557
+ " )\n",
558
+ " (pooler): BertPooler(\n",
559
+ " (dense): Linear(in_features=768, out_features=768, bias=True)\n",
560
+ " (activation): Tanh()\n",
561
+ " )\n",
562
+ ")\n"
563
+ ]
564
+ }
565
+ ],
566
+ "source": [
567
+ "bert_model_new = BertModel.from_pretrained('pew_bert_ye')\n",
568
+ "print(bert_model_new)"
569
+ ]
570
+ },
571
+ {
572
+ "cell_type": "code",
573
+ "execution_count": null,
574
+ "metadata": {},
575
+ "outputs": [],
576
+ "source": []
577
+ }
578
+ ],
579
+ "metadata": {
580
+ "kernelspec": {
581
+ "display_name": "Python 3",
582
+ "language": "python",
583
+ "name": "python3"
584
+ },
585
+ "language_info": {
586
+ "codemirror_mode": {
587
+ "name": "ipython",
588
+ "version": 3
589
+ },
590
+ "file_extension": ".py",
591
+ "mimetype": "text/x-python",
592
+ "name": "python",
593
+ "nbconvert_exporter": "python",
594
+ "pygments_lexer": "ipython3",
595
+ "version": "3.11.6"
596
+ }
597
+ },
598
+ "nbformat": 4,
599
+ "nbformat_minor": 2
600
+ }