Commit
·
17e2432
1
Parent(s):
cd2cfc6
Upload 20 files
Browse files- 14-11-23.ipynb +818 -0
- 18-11-23.ipynb +618 -0
- 19-11-23.ipynb +18 -0
- Accelerate.ipynb +0 -0
- Batch-Inputs-Together.ipynb +623 -0
- DynamicPadding.ipynb +0 -0
- PytorchTrainingLoop.ipynb +375 -0
- Tokenizer_pipeline.ipynb +562 -0
- Trainer API 18-11-23.ipynb +114 -0
- Trainer API.ipynb +401 -0
- datasets_library.ipynb +513 -0
- pipeline.ipynb +450 -0
- pipeline2.ipynb +134 -0
- prac.ipynb +263 -0
- practise basics.ipynb +962 -0
- preprocess-sentence-pairs.ipynb +193 -0
- study_pipeline.ipynb +458 -0
- study_transformers.ipynb +106 -0
- transformer_instantiation.ipynb +228 -0
- transformer_instantiation_2.ipynb +600 -0
14-11-23.ipynb
ADDED
|
@@ -0,0 +1,818 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stderr",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
|
| 13 |
+
"Using a pipeline without specifying a model name and revision in production is not recommended.\n",
|
| 14 |
+
"Ignoring args : ('bert-base-uncased',)\n"
|
| 15 |
+
]
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"data": {
|
| 19 |
+
"text/plain": [
|
| 20 |
+
"[{'label': 'NEGATIVE', 'score': 0.9996927976608276}]"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
"execution_count": 3,
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"output_type": "execute_result"
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"source": [
|
| 29 |
+
"from transformers import pipeline\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"classifier = pipeline('sentiment-analysis')\n",
|
| 34 |
+
"classifier('I am disappointed in myself', checkpoint)"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": 12,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [
|
| 42 |
+
{
|
| 43 |
+
"name": "stderr",
|
| 44 |
+
"output_type": "stream",
|
| 45 |
+
"text": [
|
| 46 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"name": "stdout",
|
| 51 |
+
"output_type": "stream",
|
| 52 |
+
"text": [
|
| 53 |
+
"['this', 'is', 'a', 'sentence', 'this', 'is', 'another', 'sentence', '.'] [2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012] {'input_ids': [101, 2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 54 |
+
"{'input_ids': [101, 2023, 2003, 1037, 6251, 2023, 2003, 2178, 6251, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 55 |
+
"[CLS] this is a sentence this is another sentence. [SEP]\n",
|
| 56 |
+
"{'input_ids': tensor([[ 101, 2023, 2003, 1037, 6251, 102, 0],\n",
|
| 57 |
+
" [ 101, 2023, 2003, 2178, 6251, 1012, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0],\n",
|
| 58 |
+
" [0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0],\n",
|
| 59 |
+
" [1, 1, 1, 1, 1, 1, 1]])}\n",
|
| 60 |
+
"torch.Size([2, 7, 768])\n"
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"name": "stderr",
|
| 65 |
+
"output_type": "stream",
|
| 66 |
+
"text": [
|
| 67 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 68 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "stdout",
|
| 73 |
+
"output_type": "stream",
|
| 74 |
+
"text": [
|
| 75 |
+
"torch.Size([2, 2])\n",
|
| 76 |
+
"tensor([[0.6280, 0.3720],\n",
|
| 77 |
+
" [0.6231, 0.3769]], grad_fn=<SoftmaxBackward0>)\n"
|
| 78 |
+
]
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"data": {
|
| 82 |
+
"text/plain": [
|
| 83 |
+
"{0: 'LABEL_0', 1: 'LABEL_1'}"
|
| 84 |
+
]
|
| 85 |
+
},
|
| 86 |
+
"execution_count": 12,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"output_type": "execute_result"
|
| 89 |
+
}
|
| 90 |
+
],
|
| 91 |
+
"source": [
|
| 92 |
+
"from transformers import AutoTokenizer\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 95 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"raw_inputs = ['This is a sentence', 'This is another sentence.']\n",
|
| 98 |
+
"\n",
|
| 99 |
+
"input_tokens = tokenizer.tokenize(raw_inputs)\n",
|
| 100 |
+
"input_ids = tokenizer.convert_tokens_to_ids(input_tokens)\n",
|
| 101 |
+
"inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"print(input_tokens, input_ids, inputs)\n",
|
| 104 |
+
"print(inputs)\n",
|
| 105 |
+
"print(tokenizer.decode(inputs['input_ids']))\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"direct_inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
|
| 108 |
+
"print(direct_inputs)\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"from transformers import AutoModel\n",
|
| 111 |
+
"\n",
|
| 112 |
+
"model = AutoModel.from_pretrained(checkpoint)\n",
|
| 113 |
+
"outputs = model(**direct_inputs)\n",
|
| 114 |
+
"print(outputs.last_hidden_state.shape)\n",
|
| 115 |
+
"\n",
|
| 116 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
|
| 119 |
+
"outputs = model(**direct_inputs)\n",
|
| 120 |
+
"print(outputs.logits.shape)\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"import torch\n",
|
| 123 |
+
"\n",
|
| 124 |
+
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 125 |
+
"print(predictions)\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"model.config.id2label"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": 15,
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": [
|
| 136 |
+
"from transformers import AutoModel\n",
|
| 137 |
+
"\n",
|
| 138 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 139 |
+
"model = AutoModel.from_pretrained(checkpoint)\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"from transformers import AutoConfig, BertModel\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"config = AutoConfig.from_pretrained(checkpoint)\n",
|
| 144 |
+
"model = BertModel(config)\n",
|
| 145 |
+
"\n",
|
| 146 |
+
"from transformers import BertConfig\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"config = BertConfig.from_pretrained(checkpoint)\n",
|
| 149 |
+
"model = BertModel(config)\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"model.save_pretrained('my-bert-model')\n",
|
| 152 |
+
"model = BertModel.from_pretrained('my-bert-model')"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": 16,
|
| 158 |
+
"metadata": {},
|
| 159 |
+
"outputs": [
|
| 160 |
+
{
|
| 161 |
+
"data": {
|
| 162 |
+
"text/plain": [
|
| 163 |
+
"DatasetDict({\n",
|
| 164 |
+
" train: Dataset({\n",
|
| 165 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 166 |
+
" num_rows: 3668\n",
|
| 167 |
+
" })\n",
|
| 168 |
+
" validation: Dataset({\n",
|
| 169 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 170 |
+
" num_rows: 408\n",
|
| 171 |
+
" })\n",
|
| 172 |
+
" test: Dataset({\n",
|
| 173 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 174 |
+
" num_rows: 1725\n",
|
| 175 |
+
" })\n",
|
| 176 |
+
"})"
|
| 177 |
+
]
|
| 178 |
+
},
|
| 179 |
+
"execution_count": 16,
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"output_type": "execute_result"
|
| 182 |
+
}
|
| 183 |
+
],
|
| 184 |
+
"source": [
|
| 185 |
+
"from datasets import load_dataset\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"raw_datasets = load_dataset('glue', 'mrpc')\n",
|
| 188 |
+
"raw_datasets"
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "code",
|
| 193 |
+
"execution_count": 17,
|
| 194 |
+
"metadata": {},
|
| 195 |
+
"outputs": [
|
| 196 |
+
{
|
| 197 |
+
"data": {
|
| 198 |
+
"text/plain": [
|
| 199 |
+
"Dataset({\n",
|
| 200 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 201 |
+
" num_rows: 3668\n",
|
| 202 |
+
"})"
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
"execution_count": 17,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"output_type": "execute_result"
|
| 208 |
+
}
|
| 209 |
+
],
|
| 210 |
+
"source": [
|
| 211 |
+
"raw_datasets['train']"
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"cell_type": "code",
|
| 216 |
+
"execution_count": 18,
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [
|
| 219 |
+
{
|
| 220 |
+
"data": {
|
| 221 |
+
"text/plain": [
|
| 222 |
+
"{'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
|
| 223 |
+
" 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
|
| 224 |
+
" 'label': 1,\n",
|
| 225 |
+
" 'idx': 0}"
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
"execution_count": 18,
|
| 229 |
+
"metadata": {},
|
| 230 |
+
"output_type": "execute_result"
|
| 231 |
+
}
|
| 232 |
+
],
|
| 233 |
+
"source": [
|
| 234 |
+
"raw_datasets['train'][0]"
|
| 235 |
+
]
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"cell_type": "code",
|
| 239 |
+
"execution_count": 19,
|
| 240 |
+
"metadata": {},
|
| 241 |
+
"outputs": [
|
| 242 |
+
{
|
| 243 |
+
"data": {
|
| 244 |
+
"text/plain": [
|
| 245 |
+
"{'sentence1': Value(dtype='string', id=None),\n",
|
| 246 |
+
" 'sentence2': Value(dtype='string', id=None),\n",
|
| 247 |
+
" 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),\n",
|
| 248 |
+
" 'idx': Value(dtype='int32', id=None)}"
|
| 249 |
+
]
|
| 250 |
+
},
|
| 251 |
+
"execution_count": 19,
|
| 252 |
+
"metadata": {},
|
| 253 |
+
"output_type": "execute_result"
|
| 254 |
+
}
|
| 255 |
+
],
|
| 256 |
+
"source": [
|
| 257 |
+
"raw_datasets['train'].features"
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"cell_type": "code",
|
| 262 |
+
"execution_count": 23,
|
| 263 |
+
"metadata": {},
|
| 264 |
+
"outputs": [
|
| 265 |
+
{
|
| 266 |
+
"data": {
|
| 267 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 268 |
+
"model_id": "aace1e2dface4a91891e4f3f293f72a3",
|
| 269 |
+
"version_major": 2,
|
| 270 |
+
"version_minor": 0
|
| 271 |
+
},
|
| 272 |
+
"text/plain": [
|
| 273 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"output_type": "display_data"
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"name": "stdout",
|
| 281 |
+
"output_type": "stream",
|
| 282 |
+
"text": [
|
| 283 |
+
"{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
|
| 284 |
+
]
|
| 285 |
+
}
|
| 286 |
+
],
|
| 287 |
+
"source": [
|
| 288 |
+
"from transformers import AutoTokenizer\n",
|
| 289 |
+
"\n",
|
| 290 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 291 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"def tokenize_function(example):\n",
|
| 294 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 295 |
+
" padding='max_length', truncation=True, max_length=128)\n",
|
| 296 |
+
" \n",
|
| 297 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 298 |
+
"print(tokenized_datasets.column_names)"
|
| 299 |
+
]
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "code",
|
| 303 |
+
"execution_count": 24,
|
| 304 |
+
"metadata": {},
|
| 305 |
+
"outputs": [
|
| 306 |
+
{
|
| 307 |
+
"data": {
|
| 308 |
+
"text/plain": [
|
| 309 |
+
"Dataset({\n",
|
| 310 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 311 |
+
" num_rows: 3668\n",
|
| 312 |
+
"})"
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
"execution_count": 24,
|
| 316 |
+
"metadata": {},
|
| 317 |
+
"output_type": "execute_result"
|
| 318 |
+
}
|
| 319 |
+
],
|
| 320 |
+
"source": [
|
| 321 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 322 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')\n",
|
| 323 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 324 |
+
"tokenized_datasets['train']"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": 26,
|
| 330 |
+
"metadata": {},
|
| 331 |
+
"outputs": [],
|
| 332 |
+
"source": [
|
| 333 |
+
"small_train_dataset = tokenized_datasets['train'].select(range(100))"
|
| 334 |
+
]
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"cell_type": "code",
|
| 338 |
+
"execution_count": 27,
|
| 339 |
+
"metadata": {},
|
| 340 |
+
"outputs": [
|
| 341 |
+
{
|
| 342 |
+
"data": {
|
| 343 |
+
"text/plain": [
|
| 344 |
+
"DatasetDict({\n",
|
| 345 |
+
" train: Dataset({\n",
|
| 346 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 347 |
+
" num_rows: 3668\n",
|
| 348 |
+
" })\n",
|
| 349 |
+
" validation: Dataset({\n",
|
| 350 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 351 |
+
" num_rows: 408\n",
|
| 352 |
+
" })\n",
|
| 353 |
+
" test: Dataset({\n",
|
| 354 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 355 |
+
" num_rows: 1725\n",
|
| 356 |
+
" })\n",
|
| 357 |
+
"})"
|
| 358 |
+
]
|
| 359 |
+
},
|
| 360 |
+
"execution_count": 27,
|
| 361 |
+
"metadata": {},
|
| 362 |
+
"output_type": "execute_result"
|
| 363 |
+
}
|
| 364 |
+
],
|
| 365 |
+
"source": [
|
| 366 |
+
"from datasets import load_dataset\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"raw_datasets = load_dataset('glue','mrpc')\n",
|
| 369 |
+
"raw_datasets"
|
| 370 |
+
]
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"cell_type": "code",
|
| 374 |
+
"execution_count": 28,
|
| 375 |
+
"metadata": {},
|
| 376 |
+
"outputs": [
|
| 377 |
+
{
|
| 378 |
+
"name": "stdout",
|
| 379 |
+
"output_type": "stream",
|
| 380 |
+
"text": [
|
| 381 |
+
"{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
|
| 382 |
+
]
|
| 383 |
+
}
|
| 384 |
+
],
|
| 385 |
+
"source": [
|
| 386 |
+
"from transformers import AutoTokenizer\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 389 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"def tokenize_function(example):\n",
|
| 392 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 393 |
+
" padding='max_length', truncation=True, max_length=128)\n",
|
| 394 |
+
"\n",
|
| 395 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 396 |
+
"print(tokenized_datasets.column_names)"
|
| 397 |
+
]
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"cell_type": "code",
|
| 401 |
+
"execution_count": 29,
|
| 402 |
+
"metadata": {},
|
| 403 |
+
"outputs": [
|
| 404 |
+
{
|
| 405 |
+
"data": {
|
| 406 |
+
"text/plain": [
|
| 407 |
+
"Dataset({\n",
|
| 408 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 409 |
+
" num_rows: 3668\n",
|
| 410 |
+
"})"
|
| 411 |
+
]
|
| 412 |
+
},
|
| 413 |
+
"execution_count": 29,
|
| 414 |
+
"metadata": {},
|
| 415 |
+
"output_type": "execute_result"
|
| 416 |
+
}
|
| 417 |
+
],
|
| 418 |
+
"source": [
|
| 419 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx', 'sentence1','sentence2'])\n",
|
| 420 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 421 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 422 |
+
"\n",
|
| 423 |
+
"tokenized_datasets['train']"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"cell_type": "code",
|
| 428 |
+
"execution_count": 30,
|
| 429 |
+
"metadata": {},
|
| 430 |
+
"outputs": [
|
| 431 |
+
{
|
| 432 |
+
"data": {
|
| 433 |
+
"text/plain": [
|
| 434 |
+
"Dataset({\n",
|
| 435 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 436 |
+
" num_rows: 3668\n",
|
| 437 |
+
"})"
|
| 438 |
+
]
|
| 439 |
+
},
|
| 440 |
+
"execution_count": 30,
|
| 441 |
+
"metadata": {},
|
| 442 |
+
"output_type": "execute_result"
|
| 443 |
+
}
|
| 444 |
+
],
|
| 445 |
+
"source": [
|
| 446 |
+
"from datasets import load_dataset\n",
|
| 447 |
+
"\n",
|
| 448 |
+
"raw_datasets = load_dataset('glue', 'mrpc')\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"from transformers import AutoTokenizer\n",
|
| 451 |
+
"\n",
|
| 452 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 453 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 454 |
+
"\n",
|
| 455 |
+
"def tokenize_function(example):\n",
|
| 456 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 457 |
+
" padding='max_length', truncation=True, max_length=128)\n",
|
| 458 |
+
" \n",
|
| 459 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 460 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 461 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 462 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 463 |
+
"\n",
|
| 464 |
+
"tokenized_datasets['train']"
|
| 465 |
+
]
|
| 466 |
+
},
|
| 467 |
+
{
|
| 468 |
+
"cell_type": "code",
|
| 469 |
+
"execution_count": 31,
|
| 470 |
+
"metadata": {},
|
| 471 |
+
"outputs": [
|
| 472 |
+
{
|
| 473 |
+
"name": "stdout",
|
| 474 |
+
"output_type": "stream",
|
| 475 |
+
"text": [
|
| 476 |
+
"0 torch.Size([16, 128])\n",
|
| 477 |
+
"1 torch.Size([16, 128])\n",
|
| 478 |
+
"2 torch.Size([16, 128])\n",
|
| 479 |
+
"3 torch.Size([16, 128])\n",
|
| 480 |
+
"4 torch.Size([16, 128])\n",
|
| 481 |
+
"5 torch.Size([16, 128])\n",
|
| 482 |
+
"6 torch.Size([16, 128])\n"
|
| 483 |
+
]
|
| 484 |
+
}
|
| 485 |
+
],
|
| 486 |
+
"source": [
|
| 487 |
+
"from torch.utils.data import DataLoader\n",
|
| 488 |
+
"\n",
|
| 489 |
+
"train_dataloader = DataLoader(tokenized_datasets['train'], shuffle=True, batch_size=16)\n",
|
| 490 |
+
"\n",
|
| 491 |
+
"for step, batch in enumerate(train_dataloader):\n",
|
| 492 |
+
" print(step, batch['input_ids'].shape)\n",
|
| 493 |
+
" if step>5:\n",
|
| 494 |
+
" break"
|
| 495 |
+
]
|
| 496 |
+
},
|
| 497 |
+
{
|
| 498 |
+
"cell_type": "markdown",
|
| 499 |
+
"metadata": {},
|
| 500 |
+
"source": [
|
| 501 |
+
"Dynamic Padding !!!!"
|
| 502 |
+
]
|
| 503 |
+
},
|
| 504 |
+
{
|
| 505 |
+
"cell_type": "code",
|
| 506 |
+
"execution_count": 32,
|
| 507 |
+
"metadata": {},
|
| 508 |
+
"outputs": [
|
| 509 |
+
{
|
| 510 |
+
"data": {
|
| 511 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 512 |
+
"model_id": "62a66797be6544e9a7c2c1b2844d2c0a",
|
| 513 |
+
"version_major": 2,
|
| 514 |
+
"version_minor": 0
|
| 515 |
+
},
|
| 516 |
+
"text/plain": [
|
| 517 |
+
"Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
|
| 518 |
+
]
|
| 519 |
+
},
|
| 520 |
+
"metadata": {},
|
| 521 |
+
"output_type": "display_data"
|
| 522 |
+
},
|
| 523 |
+
{
|
| 524 |
+
"data": {
|
| 525 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 526 |
+
"model_id": "a0de8cbd9b4447c8b33d0cf9b87ff400",
|
| 527 |
+
"version_major": 2,
|
| 528 |
+
"version_minor": 0
|
| 529 |
+
},
|
| 530 |
+
"text/plain": [
|
| 531 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 532 |
+
]
|
| 533 |
+
},
|
| 534 |
+
"metadata": {},
|
| 535 |
+
"output_type": "display_data"
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"data": {
|
| 539 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 540 |
+
"model_id": "eb3ef1f76bce496d9068289dea690165",
|
| 541 |
+
"version_major": 2,
|
| 542 |
+
"version_minor": 0
|
| 543 |
+
},
|
| 544 |
+
"text/plain": [
|
| 545 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 546 |
+
]
|
| 547 |
+
},
|
| 548 |
+
"metadata": {},
|
| 549 |
+
"output_type": "display_data"
|
| 550 |
+
}
|
| 551 |
+
],
|
| 552 |
+
"source": [
|
| 553 |
+
"from datasets import load_dataset\n",
|
| 554 |
+
"\n",
|
| 555 |
+
"raw_datasets = load_dataset('glue', 'mrpc')\n",
|
| 556 |
+
"\n",
|
| 557 |
+
"from transformers import AutoTokenizer\n",
|
| 558 |
+
"\n",
|
| 559 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 560 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 561 |
+
"\n",
|
| 562 |
+
"def tokenize_function(example):\n",
|
| 563 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 564 |
+
" truncation=True)\n",
|
| 565 |
+
"\n",
|
| 566 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)"
|
| 567 |
+
]
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"cell_type": "code",
|
| 571 |
+
"execution_count": 33,
|
| 572 |
+
"metadata": {},
|
| 573 |
+
"outputs": [],
|
| 574 |
+
"source": [
|
| 575 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 576 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 577 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')"
|
| 578 |
+
]
|
| 579 |
+
},
|
| 580 |
+
{
|
| 581 |
+
"cell_type": "code",
|
| 582 |
+
"execution_count": 34,
|
| 583 |
+
"metadata": {},
|
| 584 |
+
"outputs": [
|
| 585 |
+
{
|
| 586 |
+
"name": "stderr",
|
| 587 |
+
"output_type": "stream",
|
| 588 |
+
"text": [
|
| 589 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 590 |
+
]
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"name": "stdout",
|
| 594 |
+
"output_type": "stream",
|
| 595 |
+
"text": [
|
| 596 |
+
"0 torch.Size([16, 80])\n",
|
| 597 |
+
"1 torch.Size([16, 79])\n",
|
| 598 |
+
"2 torch.Size([16, 74])\n",
|
| 599 |
+
"3 torch.Size([16, 64])\n",
|
| 600 |
+
"4 torch.Size([16, 72])\n",
|
| 601 |
+
"5 torch.Size([16, 78])\n",
|
| 602 |
+
"6 torch.Size([16, 82])\n"
|
| 603 |
+
]
|
| 604 |
+
}
|
| 605 |
+
],
|
| 606 |
+
"source": [
|
| 607 |
+
"from torch.utils.data import DataLoader\n",
|
| 608 |
+
"from transformers import DataCollatorWithPadding\n",
|
| 609 |
+
"\n",
|
| 610 |
+
"data_collator = DataCollatorWithPadding(tokenizer)\n",
|
| 611 |
+
"train_dataloader = DataLoader(\n",
|
| 612 |
+
" tokenized_datasets['train'], batch_size=16, shuffle=True, collate_fn=data_collator\n",
|
| 613 |
+
")\n",
|
| 614 |
+
"\n",
|
| 615 |
+
"for step, batch in enumerate(train_dataloader):\n",
|
| 616 |
+
" print(step, batch['input_ids'].shape)\n",
|
| 617 |
+
" if step>5:\n",
|
| 618 |
+
" break"
|
| 619 |
+
]
|
| 620 |
+
},
|
| 621 |
+
{
|
| 622 |
+
"cell_type": "code",
|
| 623 |
+
"execution_count": 35,
|
| 624 |
+
"metadata": {},
|
| 625 |
+
"outputs": [
|
| 626 |
+
{
|
| 627 |
+
"data": {
|
| 628 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 629 |
+
"model_id": "9a3cbe1e023c427daa2f4d86542449f4",
|
| 630 |
+
"version_major": 2,
|
| 631 |
+
"version_minor": 0
|
| 632 |
+
},
|
| 633 |
+
"text/plain": [
|
| 634 |
+
"Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
|
| 635 |
+
]
|
| 636 |
+
},
|
| 637 |
+
"metadata": {},
|
| 638 |
+
"output_type": "display_data"
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"data": {
|
| 642 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 643 |
+
"model_id": "80e82d865bdd44f184282d0991a36010",
|
| 644 |
+
"version_major": 2,
|
| 645 |
+
"version_minor": 0
|
| 646 |
+
},
|
| 647 |
+
"text/plain": [
|
| 648 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 649 |
+
]
|
| 650 |
+
},
|
| 651 |
+
"metadata": {},
|
| 652 |
+
"output_type": "display_data"
|
| 653 |
+
},
|
| 654 |
+
{
|
| 655 |
+
"data": {
|
| 656 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 657 |
+
"model_id": "f7a7d45215804479ba0d135080c04e56",
|
| 658 |
+
"version_major": 2,
|
| 659 |
+
"version_minor": 0
|
| 660 |
+
},
|
| 661 |
+
"text/plain": [
|
| 662 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 663 |
+
]
|
| 664 |
+
},
|
| 665 |
+
"metadata": {},
|
| 666 |
+
"output_type": "display_data"
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"name": "stdout",
|
| 670 |
+
"output_type": "stream",
|
| 671 |
+
"text": [
|
| 672 |
+
"0 torch.Size([16, 128])\n",
|
| 673 |
+
"1 torch.Size([16, 128])\n",
|
| 674 |
+
"2 torch.Size([16, 128])\n",
|
| 675 |
+
"3 torch.Size([16, 128])\n",
|
| 676 |
+
"4 torch.Size([16, 128])\n",
|
| 677 |
+
"5 torch.Size([16, 128])\n",
|
| 678 |
+
"6 torch.Size([16, 128])\n"
|
| 679 |
+
]
|
| 680 |
+
}
|
| 681 |
+
],
|
| 682 |
+
"source": [
|
| 683 |
+
"from datasets import load_dataset\n",
|
| 684 |
+
"\n",
|
| 685 |
+
"raw_dataset = load_dataset('glue','mrpc')\n",
|
| 686 |
+
"\n",
|
| 687 |
+
"from transformers import AutoTokenizer\n",
|
| 688 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 689 |
+
"\n",
|
| 690 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 691 |
+
"\n",
|
| 692 |
+
"def tokenize_function(example):\n",
|
| 693 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 694 |
+
" padding='max_length', truncation=True,\n",
|
| 695 |
+
" max_length=128)\n",
|
| 696 |
+
"\n",
|
| 697 |
+
"tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)\n",
|
| 698 |
+
"\n",
|
| 699 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 700 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 701 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 702 |
+
"\n",
|
| 703 |
+
"from torch.utils.data import DataLoader\n",
|
| 704 |
+
"\n",
|
| 705 |
+
"train_dataloader = DataLoader(tokenized_datasets['train'],\n",
|
| 706 |
+
" shuffle=True, batch_size=16)\n",
|
| 707 |
+
"\n",
|
| 708 |
+
"for step, batch in enumerate(train_dataloader):\n",
|
| 709 |
+
" print(step, batch['input_ids'].shape)\n",
|
| 710 |
+
" if step>5:\n",
|
| 711 |
+
" break"
|
| 712 |
+
]
|
| 713 |
+
},
|
| 714 |
+
{
|
| 715 |
+
"cell_type": "code",
|
| 716 |
+
"execution_count": 36,
|
| 717 |
+
"metadata": {},
|
| 718 |
+
"outputs": [
|
| 719 |
+
{
|
| 720 |
+
"data": {
|
| 721 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 722 |
+
"model_id": "59780038e4dc41d68f858b55511780a4",
|
| 723 |
+
"version_major": 2,
|
| 724 |
+
"version_minor": 0
|
| 725 |
+
},
|
| 726 |
+
"text/plain": [
|
| 727 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 728 |
+
]
|
| 729 |
+
},
|
| 730 |
+
"metadata": {},
|
| 731 |
+
"output_type": "display_data"
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"name": "stderr",
|
| 735 |
+
"output_type": "stream",
|
| 736 |
+
"text": [
|
| 737 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 738 |
+
]
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"name": "stdout",
|
| 742 |
+
"output_type": "stream",
|
| 743 |
+
"text": [
|
| 744 |
+
"0 torch.Size([16, 72])\n",
|
| 745 |
+
"1 torch.Size([16, 70])\n",
|
| 746 |
+
"2 torch.Size([16, 77])\n",
|
| 747 |
+
"3 torch.Size([16, 76])\n",
|
| 748 |
+
"4 torch.Size([16, 81])\n",
|
| 749 |
+
"5 torch.Size([16, 82])\n",
|
| 750 |
+
"6 torch.Size([16, 81])\n"
|
| 751 |
+
]
|
| 752 |
+
}
|
| 753 |
+
],
|
| 754 |
+
"source": [
|
| 755 |
+
"from datasets import load_dataset\n",
|
| 756 |
+
"\n",
|
| 757 |
+
"raw_datasets = load_dataset('glue','mrpc')\n",
|
| 758 |
+
"\n",
|
| 759 |
+
"from transformers import AutoTokenizer\n",
|
| 760 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 761 |
+
"\n",
|
| 762 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 763 |
+
"\n",
|
| 764 |
+
"def tokenize_function(example):\n",
|
| 765 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 766 |
+
" truncation=True)\n",
|
| 767 |
+
"\n",
|
| 768 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 769 |
+
"\n",
|
| 770 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 771 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 772 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 773 |
+
"\n",
|
| 774 |
+
"from torch.utils.data import DataLoader\n",
|
| 775 |
+
"from transformers import DataCollatorWithPadding\n",
|
| 776 |
+
"\n",
|
| 777 |
+
"data_collator = DataCollatorWithPadding(tokenizer)\n",
|
| 778 |
+
"train_dataloader = DataLoader(\n",
|
| 779 |
+
" tokenized_datasets['train'], batch_size=16, shuffle=True,\n",
|
| 780 |
+
" collate_fn=data_collator\n",
|
| 781 |
+
")\n",
|
| 782 |
+
"\n",
|
| 783 |
+
"for step,batch in enumerate(train_dataloader):\n",
|
| 784 |
+
" print(step,batch['input_ids'].shape)\n",
|
| 785 |
+
" if step>5:\n",
|
| 786 |
+
" break"
|
| 787 |
+
]
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"cell_type": "code",
|
| 791 |
+
"execution_count": null,
|
| 792 |
+
"metadata": {},
|
| 793 |
+
"outputs": [],
|
| 794 |
+
"source": []
|
| 795 |
+
}
|
| 796 |
+
],
|
| 797 |
+
"metadata": {
|
| 798 |
+
"kernelspec": {
|
| 799 |
+
"display_name": "Python 3",
|
| 800 |
+
"language": "python",
|
| 801 |
+
"name": "python3"
|
| 802 |
+
},
|
| 803 |
+
"language_info": {
|
| 804 |
+
"codemirror_mode": {
|
| 805 |
+
"name": "ipython",
|
| 806 |
+
"version": 3
|
| 807 |
+
},
|
| 808 |
+
"file_extension": ".py",
|
| 809 |
+
"mimetype": "text/x-python",
|
| 810 |
+
"name": "python",
|
| 811 |
+
"nbconvert_exporter": "python",
|
| 812 |
+
"pygments_lexer": "ipython3",
|
| 813 |
+
"version": "3.11.6"
|
| 814 |
+
}
|
| 815 |
+
},
|
| 816 |
+
"nbformat": 4,
|
| 817 |
+
"nbformat_minor": 2
|
| 818 |
+
}
|
18-11-23.ipynb
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stderr",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"data": {
|
| 17 |
+
"text/plain": [
|
| 18 |
+
"{'input_ids': [101, 1045, 2572, 5191, 1012, 1012, 1045, 4553, 2061, 4030, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
"execution_count": 1,
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"output_type": "execute_result"
|
| 24 |
+
}
|
| 25 |
+
],
|
| 26 |
+
"source": [
|
| 27 |
+
"from transformers import AutoTokenizer\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
|
| 30 |
+
"tokenized_inputs=tokenizer.tokenize('I am worried.. I learn so slow')\n",
|
| 31 |
+
"ids = tokenizer.convert_tokens_to_ids(tokenized_inputs)\n",
|
| 32 |
+
"input_ids = tokenizer.prepare_for_model(ids)\n",
|
| 33 |
+
"input_ids"
|
| 34 |
+
]
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"cell_type": "code",
|
| 38 |
+
"execution_count": 2,
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [
|
| 41 |
+
{
|
| 42 |
+
"name": "stdout",
|
| 43 |
+
"output_type": "stream",
|
| 44 |
+
"text": [
|
| 45 |
+
"{'input_ids': tensor([[ 101, 1045, 2572, 5191, 1045, 4553, 2061, 4030, 102, 0],\n",
|
| 46 |
+
" [ 101, 2023, 2003, 2033, 2028, 1997, 1996, 5409, 2493, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 47 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],\n",
|
| 48 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n",
|
| 49 |
+
"torch.Size([2, 10, 768])\n"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "stderr",
|
| 54 |
+
"output_type": "stream",
|
| 55 |
+
"text": [
|
| 56 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 57 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"name": "stdout",
|
| 62 |
+
"output_type": "stream",
|
| 63 |
+
"text": [
|
| 64 |
+
"tensor([[-0.1298, -0.3846],\n",
|
| 65 |
+
" [ 0.1664, -0.1490]], grad_fn=<AddmmBackward0>)\n",
|
| 66 |
+
"tensor([[0.5633, 0.4367],\n",
|
| 67 |
+
" [0.5782, 0.4218]], grad_fn=<SoftmaxBackward0>)\n"
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"data": {
|
| 72 |
+
"text/plain": [
|
| 73 |
+
"{0: 'LABEL_0', 1: 'LABEL_1'}"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
"execution_count": 2,
|
| 77 |
+
"metadata": {},
|
| 78 |
+
"output_type": "execute_result"
|
| 79 |
+
}
|
| 80 |
+
],
|
| 81 |
+
"source": [
|
| 82 |
+
"from transformers import AutoTokenizer\n",
|
| 83 |
+
"\n",
|
| 84 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")\n",
|
| 85 |
+
"inputs = tokenizer([\"I am worried I learn so slow\", \n",
|
| 86 |
+
" \"This is me one of the worst students\"],\n",
|
| 87 |
+
" padding=True, truncation=True, return_tensors=\"pt\")\n",
|
| 88 |
+
"print(inputs)\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"from transformers import AutoModel\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"model = AutoModel.from_pretrained(\"bert-base-uncased\")\n",
|
| 93 |
+
"outputs = model(**inputs)\n",
|
| 94 |
+
"print(outputs.last_hidden_state.shape)\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"model = AutoModelForSequenceClassification.from_pretrained(\"bert-base-uncased\")\n",
|
| 99 |
+
"outputs = model(**inputs)\n",
|
| 100 |
+
"print(outputs.logits)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"import torch\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 105 |
+
"print(predictions)\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"model.config.id2label"
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"cell_type": "code",
|
| 112 |
+
"execution_count": 3,
|
| 113 |
+
"metadata": {},
|
| 114 |
+
"outputs": [
|
| 115 |
+
{
|
| 116 |
+
"name": "stdout",
|
| 117 |
+
"output_type": "stream",
|
| 118 |
+
"text": [
|
| 119 |
+
"BertConfig {\n",
|
| 120 |
+
" \"architectures\": [\n",
|
| 121 |
+
" \"BertForMaskedLM\"\n",
|
| 122 |
+
" ],\n",
|
| 123 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 124 |
+
" \"classifier_dropout\": null,\n",
|
| 125 |
+
" \"gradient_checkpointing\": false,\n",
|
| 126 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 127 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 128 |
+
" \"hidden_size\": 768,\n",
|
| 129 |
+
" \"initializer_range\": 0.02,\n",
|
| 130 |
+
" \"intermediate_size\": 3072,\n",
|
| 131 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 132 |
+
" \"max_position_embeddings\": 512,\n",
|
| 133 |
+
" \"model_type\": \"bert\",\n",
|
| 134 |
+
" \"num_attention_heads\": 12,\n",
|
| 135 |
+
" \"num_hidden_layers\": 12,\n",
|
| 136 |
+
" \"pad_token_id\": 0,\n",
|
| 137 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 138 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 139 |
+
" \"type_vocab_size\": 2,\n",
|
| 140 |
+
" \"use_cache\": true,\n",
|
| 141 |
+
" \"vocab_size\": 30522\n",
|
| 142 |
+
"}\n",
|
| 143 |
+
"\n"
|
| 144 |
+
]
|
| 145 |
+
}
|
| 146 |
+
],
|
| 147 |
+
"source": [
|
| 148 |
+
"from transformers import AutoConfig\n",
|
| 149 |
+
"from transformers import BertConfig\n",
|
| 150 |
+
"from transformers import BertModel\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"bert_config = BertConfig.from_pretrained(\"bert-base-uncased\")\n",
|
| 153 |
+
"bert_model = BertModel(bert_config)\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"auto_config = AutoConfig.from_pretrained(\"bert-base-uncased\")\n",
|
| 156 |
+
"bert_model_auto_config = BertModel(auto_config)\n",
|
| 157 |
+
"\n",
|
| 158 |
+
"print(bert_config)"
|
| 159 |
+
]
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"cell_type": "code",
|
| 163 |
+
"execution_count": 4,
|
| 164 |
+
"metadata": {},
|
| 165 |
+
"outputs": [],
|
| 166 |
+
"source": [
|
| 167 |
+
"new_bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers = 9, vocab_size=1000)\n",
|
| 168 |
+
"new_bert_model = BertModel(new_bert_config)\n",
|
| 169 |
+
"\n",
|
| 170 |
+
"save_new_bert_model = new_bert_model.save_pretrained('new_bert_model')\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"load_new_bert_model = BertModel.from_pretrained('new_bert_model')"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": 5,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [
|
| 180 |
+
{
|
| 181 |
+
"name": "stdout",
|
| 182 |
+
"output_type": "stream",
|
| 183 |
+
"text": [
|
| 184 |
+
"Dataset({\n",
|
| 185 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 186 |
+
" num_rows: 3668\n",
|
| 187 |
+
"}) {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None), 'idx': Value(dtype='int32', id=None)} {'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .', 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .', 'label': 1, 'idx': 0} {'sentence1': ['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .', \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\", 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .', 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .', 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .'], 'sentence2': ['Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .', \"Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .\", \"On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .\", 'Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .', 'PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .'], 'label': [1, 0, 1, 0, 1], 'idx': [0, 1, 2, 3, 4]}\n"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"data": {
|
| 192 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 193 |
+
"model_id": "008ad47191464d26a6edd36e8d00cc06",
|
| 194 |
+
"version_major": 2,
|
| 195 |
+
"version_minor": 0
|
| 196 |
+
},
|
| 197 |
+
"text/plain": [
|
| 198 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"output_type": "display_data"
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"data": {
|
| 206 |
+
"text/plain": [
|
| 207 |
+
"DatasetDict({\n",
|
| 208 |
+
" train: Dataset({\n",
|
| 209 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 210 |
+
" num_rows: 3668\n",
|
| 211 |
+
" })\n",
|
| 212 |
+
" validation: Dataset({\n",
|
| 213 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 214 |
+
" num_rows: 408\n",
|
| 215 |
+
" })\n",
|
| 216 |
+
" test: Dataset({\n",
|
| 217 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 218 |
+
" num_rows: 1725\n",
|
| 219 |
+
" })\n",
|
| 220 |
+
"})"
|
| 221 |
+
]
|
| 222 |
+
},
|
| 223 |
+
"execution_count": 5,
|
| 224 |
+
"metadata": {},
|
| 225 |
+
"output_type": "execute_result"
|
| 226 |
+
}
|
| 227 |
+
],
|
| 228 |
+
"source": [
|
| 229 |
+
"from datasets import load_dataset\n",
|
| 230 |
+
"\n",
|
| 231 |
+
"raw_dataset = load_dataset(\"glue\", \"mrpc\")\n",
|
| 232 |
+
"print(raw_dataset[\"train\"], raw_dataset[\"train\"].features, raw_dataset[\"train\"][0], raw_dataset[\"train\"][:5])\n",
|
| 233 |
+
"\n",
|
| 234 |
+
"from transformers import AutoTokenizer\n",
|
| 235 |
+
"checkpoint = \"bert-base-uncased\"\n",
|
| 236 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"def tokenize_function(example):\n",
|
| 239 |
+
" return tokenizer(example[\"sentence1\"], example[\"sentence2\"],\n",
|
| 240 |
+
" padding = True, truncation=True,\n",
|
| 241 |
+
" max_length=128) \n",
|
| 242 |
+
"\n",
|
| 243 |
+
"tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
|
| 244 |
+
"tokenized_dataset"
|
| 245 |
+
]
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"cell_type": "code",
|
| 249 |
+
"execution_count": 6,
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"outputs": [
|
| 252 |
+
{
|
| 253 |
+
"data": {
|
| 254 |
+
"text/plain": [
|
| 255 |
+
"DatasetDict({\n",
|
| 256 |
+
" train: Dataset({\n",
|
| 257 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 258 |
+
" num_rows: 3668\n",
|
| 259 |
+
" })\n",
|
| 260 |
+
" validation: Dataset({\n",
|
| 261 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 262 |
+
" num_rows: 408\n",
|
| 263 |
+
" })\n",
|
| 264 |
+
" test: Dataset({\n",
|
| 265 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 266 |
+
" num_rows: 1725\n",
|
| 267 |
+
" })\n",
|
| 268 |
+
"})"
|
| 269 |
+
]
|
| 270 |
+
},
|
| 271 |
+
"execution_count": 6,
|
| 272 |
+
"metadata": {},
|
| 273 |
+
"output_type": "execute_result"
|
| 274 |
+
}
|
| 275 |
+
],
|
| 276 |
+
"source": [
|
| 277 |
+
"tokenized_dataset = tokenized_dataset.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 278 |
+
"tokenized_dataset = tokenized_dataset.rename_column('label','labels')\n",
|
| 279 |
+
"tokenized_dataset = tokenized_dataset.with_format('torch')\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"tokenized_dataset"
|
| 282 |
+
]
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"cell_type": "code",
|
| 286 |
+
"execution_count": 7,
|
| 287 |
+
"metadata": {},
|
| 288 |
+
"outputs": [
|
| 289 |
+
{
|
| 290 |
+
"data": {
|
| 291 |
+
"text/plain": [
|
| 292 |
+
"Dataset({\n",
|
| 293 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 294 |
+
" num_rows: 3668\n",
|
| 295 |
+
"})"
|
| 296 |
+
]
|
| 297 |
+
},
|
| 298 |
+
"execution_count": 7,
|
| 299 |
+
"metadata": {},
|
| 300 |
+
"output_type": "execute_result"
|
| 301 |
+
}
|
| 302 |
+
],
|
| 303 |
+
"source": [
|
| 304 |
+
"tokenized_dataset['train']"
|
| 305 |
+
]
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"cell_type": "code",
|
| 309 |
+
"execution_count": 18,
|
| 310 |
+
"metadata": {},
|
| 311 |
+
"outputs": [
|
| 312 |
+
{
|
| 313 |
+
"data": {
|
| 314 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 315 |
+
"model_id": "14633ab0e5204abf9593e1b5a8b57c2d",
|
| 316 |
+
"version_major": 2,
|
| 317 |
+
"version_minor": 0
|
| 318 |
+
},
|
| 319 |
+
"text/plain": [
|
| 320 |
+
"Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
|
| 321 |
+
]
|
| 322 |
+
},
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"output_type": "display_data"
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"data": {
|
| 328 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 329 |
+
"model_id": "8e10fd28b7ad4805b3f3b55d0b192027",
|
| 330 |
+
"version_major": 2,
|
| 331 |
+
"version_minor": 0
|
| 332 |
+
},
|
| 333 |
+
"text/plain": [
|
| 334 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
"metadata": {},
|
| 338 |
+
"output_type": "display_data"
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"data": {
|
| 342 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 343 |
+
"model_id": "9aea16a997e64169acfde01a91195a79",
|
| 344 |
+
"version_major": 2,
|
| 345 |
+
"version_minor": 0
|
| 346 |
+
},
|
| 347 |
+
"text/plain": [
|
| 348 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 349 |
+
]
|
| 350 |
+
},
|
| 351 |
+
"metadata": {},
|
| 352 |
+
"output_type": "display_data"
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"name": "stdout",
|
| 356 |
+
"output_type": "stream",
|
| 357 |
+
"text": [
|
| 358 |
+
"torch.Size([16, 128])\n",
|
| 359 |
+
"torch.Size([16, 128])\n",
|
| 360 |
+
"torch.Size([16, 128])\n",
|
| 361 |
+
"torch.Size([16, 128])\n",
|
| 362 |
+
"torch.Size([16, 128])\n",
|
| 363 |
+
"torch.Size([16, 128])\n",
|
| 364 |
+
"torch.Size([16, 128])\n"
|
| 365 |
+
]
|
| 366 |
+
}
|
| 367 |
+
],
|
| 368 |
+
"source": [
|
| 369 |
+
"from datasets import load_dataset\n",
|
| 370 |
+
"from transformers import AutoTokenizer\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"raw_datasets = load_dataset(\"glue\", \"mrpc\")\n",
|
| 373 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 374 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"def tokenize_function(example):\n",
|
| 377 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 378 |
+
" padding=\"max_length\",truncation=True,\n",
|
| 379 |
+
" max_length=128)\n",
|
| 380 |
+
" \n",
|
| 381 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True) \n",
|
| 382 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 383 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 384 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 385 |
+
"\n",
|
| 386 |
+
"from torch.utils.data import DataLoader\n",
|
| 387 |
+
"\n",
|
| 388 |
+
"train_dataloader = DataLoader(tokenized_datasets['train'], \n",
|
| 389 |
+
" batch_size=16, shuffle=True)\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"for step, batch in enumerate(train_dataloader):\n",
|
| 392 |
+
" print(batch['input_ids'].shape)\n",
|
| 393 |
+
" if step>5:\n",
|
| 394 |
+
" break"
|
| 395 |
+
]
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"cell_type": "code",
|
| 399 |
+
"execution_count": 12,
|
| 400 |
+
"metadata": {},
|
| 401 |
+
"outputs": [],
|
| 402 |
+
"source": [
|
| 403 |
+
"from datasets import load_dataset\n",
|
| 404 |
+
"\n",
|
| 405 |
+
"raw_datasets = load_dataset('glue', 'mrpc')\n",
|
| 406 |
+
"from transformers import AutoTokenizer\n",
|
| 407 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"def tokenize_function(example):\n",
|
| 410 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 411 |
+
" truncation=True)\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 414 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx','sentence1','sentence2'])\n",
|
| 415 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label','labels')\n",
|
| 416 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n"
|
| 417 |
+
]
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"cell_type": "code",
|
| 421 |
+
"execution_count": 13,
|
| 422 |
+
"metadata": {},
|
| 423 |
+
"outputs": [
|
| 424 |
+
{
|
| 425 |
+
"data": {
|
| 426 |
+
"text/plain": [
|
| 427 |
+
"DatasetDict({\n",
|
| 428 |
+
" train: Dataset({\n",
|
| 429 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 430 |
+
" num_rows: 3668\n",
|
| 431 |
+
" })\n",
|
| 432 |
+
" validation: Dataset({\n",
|
| 433 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 434 |
+
" num_rows: 408\n",
|
| 435 |
+
" })\n",
|
| 436 |
+
" test: Dataset({\n",
|
| 437 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 438 |
+
" num_rows: 1725\n",
|
| 439 |
+
" })\n",
|
| 440 |
+
"})"
|
| 441 |
+
]
|
| 442 |
+
},
|
| 443 |
+
"execution_count": 13,
|
| 444 |
+
"metadata": {},
|
| 445 |
+
"output_type": "execute_result"
|
| 446 |
+
}
|
| 447 |
+
],
|
| 448 |
+
"source": [
|
| 449 |
+
"tokenized_datasets"
|
| 450 |
+
]
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"cell_type": "code",
|
| 454 |
+
"execution_count": 14,
|
| 455 |
+
"metadata": {},
|
| 456 |
+
"outputs": [
|
| 457 |
+
{
|
| 458 |
+
"name": "stderr",
|
| 459 |
+
"output_type": "stream",
|
| 460 |
+
"text": [
|
| 461 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 462 |
+
]
|
| 463 |
+
},
|
| 464 |
+
{
|
| 465 |
+
"name": "stdout",
|
| 466 |
+
"output_type": "stream",
|
| 467 |
+
"text": [
|
| 468 |
+
"torch.Size([16, 73])\n",
|
| 469 |
+
"torch.Size([16, 75])\n",
|
| 470 |
+
"torch.Size([16, 85])\n",
|
| 471 |
+
"torch.Size([16, 81])\n",
|
| 472 |
+
"torch.Size([16, 77])\n",
|
| 473 |
+
"torch.Size([16, 83])\n",
|
| 474 |
+
"torch.Size([16, 79])\n"
|
| 475 |
+
]
|
| 476 |
+
}
|
| 477 |
+
],
|
| 478 |
+
"source": [
|
| 479 |
+
"from torch.utils.data import DataLoader\n",
|
| 480 |
+
"from transformers import DataCollatorWithPadding\n",
|
| 481 |
+
"\n",
|
| 482 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 483 |
+
"train_dataloader = DataLoader(tokenized_datasets['train'],\n",
|
| 484 |
+
" batch_size=16, shuffle=True,\n",
|
| 485 |
+
" collate_fn=data_collator)\n",
|
| 486 |
+
"\n",
|
| 487 |
+
"for step, batch in enumerate(train_dataloader):\n",
|
| 488 |
+
" print(batch['input_ids'].shape)\n",
|
| 489 |
+
" if step>5:\n",
|
| 490 |
+
" break"
|
| 491 |
+
]
|
| 492 |
+
},
|
| 493 |
+
{
|
| 494 |
+
"cell_type": "code",
|
| 495 |
+
"execution_count": 20,
|
| 496 |
+
"metadata": {},
|
| 497 |
+
"outputs": [],
|
| 498 |
+
"source": [
|
| 499 |
+
"from datasets import load_dataset\n",
|
| 500 |
+
"from transformers import AutoTokenizer, DataCollatorWithPadding\n",
|
| 501 |
+
"\n",
|
| 502 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 503 |
+
"raw_dataset = load_dataset('glue', 'mrpc')\n",
|
| 504 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 505 |
+
"\n",
|
| 506 |
+
"def tokenize_function(example):\n",
|
| 507 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 508 |
+
" truncation=True)\n",
|
| 509 |
+
"\n",
|
| 510 |
+
"tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
|
| 511 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 512 |
+
"\n",
|
| 513 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 514 |
+
"\n",
|
| 515 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
|
| 516 |
+
"\n",
|
| 517 |
+
"# Specify training arguments hyperparameters:\n",
|
| 518 |
+
"from transformers import TrainingArguments\n",
|
| 519 |
+
"training_args = TrainingArguments(\"test-trainer\",\n",
|
| 520 |
+
" per_device_train_batch_size=16,\n",
|
| 521 |
+
" per_device_eval_batch_size=16,\n",
|
| 522 |
+
" num_train_epochs=5,\n",
|
| 523 |
+
" learning_rate=2e-5,\n",
|
| 524 |
+
" weight_decay=0.01)\n",
|
| 525 |
+
"\n",
|
| 526 |
+
"# Create the Trainer instance:\n",
|
| 527 |
+
"from transformers import Trainer\n",
|
| 528 |
+
"trainer = Trainer(\n",
|
| 529 |
+
" model=model,\n",
|
| 530 |
+
" args=training_args,\n",
|
| 531 |
+
" train_dataset=tokenized_dataset['train'],\n",
|
| 532 |
+
" eval_dataset=tokenized_dataset['validation'],\n",
|
| 533 |
+
" data_collator=data_collator,\n",
|
| 534 |
+
" tokenizer=tokenizer\n",
|
| 535 |
+
")\n",
|
| 536 |
+
"trainer.train()\n",
|
| 537 |
+
"\n",
|
| 538 |
+
"'''The results will however be anticlimatic because \n",
|
| 539 |
+
"you will only get a training loss that doesn't tell you how well the\n",
|
| 540 |
+
"model is actually doing..\n",
|
| 541 |
+
"To fix this, you need to specify the evaluation metric'''\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"predictions = trainer.predict(tokenized_dataset['validation'])\n",
|
| 544 |
+
"print(predictions)\n",
|
| 545 |
+
"print(predictions.predictions.shape, predictions.label_ids.shape)\n",
|
| 546 |
+
"\n",
|
| 547 |
+
"# it returns a named tuple with 3 elements: predictions, label_ids, metrics\n",
|
| 548 |
+
"# the predictions are the logits of the model with all the sentences of the dataset\n",
|
| 549 |
+
"# so a numpy array of shape(488 x 2)\n",
|
| 550 |
+
"\n",
|
| 551 |
+
"# to match them with our labels we need to take the maximum logits for each prediction\n",
|
| 552 |
+
"# to know which is the maximum, use the argmax function\n",
|
| 553 |
+
"import numpy as np\n",
|
| 554 |
+
"from datasets import load_metric\n",
|
| 555 |
+
"\n",
|
| 556 |
+
"metric = load_metric('glue', 'mrpc')\n",
|
| 557 |
+
"preds = np.argmax(predictions.predictions, axis=-1)\n",
|
| 558 |
+
"metric.compute(predictions=preds, references=predictions.label_ids)\n",
|
| 559 |
+
"\n",
|
| 560 |
+
"'''We can see that our model did learn something!'''"
|
| 561 |
+
]
|
| 562 |
+
},
|
| 563 |
+
{
|
| 564 |
+
"cell_type": "code",
|
| 565 |
+
"execution_count": null,
|
| 566 |
+
"metadata": {},
|
| 567 |
+
"outputs": [],
|
| 568 |
+
"source": [
|
| 569 |
+
"'''To monitor the metrics during training, we need to define a\n",
|
| 570 |
+
"compute metric function as we did above\n",
|
| 571 |
+
"and pass it to the Trainer\n",
|
| 572 |
+
"'''\n",
|
| 573 |
+
"metric = load_metric('glue','mrpc')\n",
|
| 574 |
+
"def compute_metrics(eval_preds):\n",
|
| 575 |
+
" logits, labels = eval_preds\n",
|
| 576 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
| 577 |
+
" return metric.compute(predictions=predictions, references=labels)\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"training_args = TrainingArguments(\"test-trainer\",\n",
|
| 580 |
+
" evaluation_strategy='epoch')\n",
|
| 581 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
|
| 582 |
+
"\n",
|
| 583 |
+
"trainer = Trainer(\n",
|
| 584 |
+
" model=model,\n",
|
| 585 |
+
" args=training_args,\n",
|
| 586 |
+
" train_dataset=tokenized_dataset['train'],\n",
|
| 587 |
+
" eval_dataset=tokenized_dataset['validation'],\n",
|
| 588 |
+
" data_collator=data_collator,\n",
|
| 589 |
+
" tokenizer=tokenizer,\n",
|
| 590 |
+
" compute_metrics=compute_metrics\n",
|
| 591 |
+
")\n",
|
| 592 |
+
"\n",
|
| 593 |
+
"trainer.train()\n"
|
| 594 |
+
]
|
| 595 |
+
}
|
| 596 |
+
],
|
| 597 |
+
"metadata": {
|
| 598 |
+
"kernelspec": {
|
| 599 |
+
"display_name": "Python 3",
|
| 600 |
+
"language": "python",
|
| 601 |
+
"name": "python3"
|
| 602 |
+
},
|
| 603 |
+
"language_info": {
|
| 604 |
+
"codemirror_mode": {
|
| 605 |
+
"name": "ipython",
|
| 606 |
+
"version": 3
|
| 607 |
+
},
|
| 608 |
+
"file_extension": ".py",
|
| 609 |
+
"mimetype": "text/x-python",
|
| 610 |
+
"name": "python",
|
| 611 |
+
"nbconvert_exporter": "python",
|
| 612 |
+
"pygments_lexer": "ipython3",
|
| 613 |
+
"version": "3.11.6"
|
| 614 |
+
}
|
| 615 |
+
},
|
| 616 |
+
"nbformat": 4,
|
| 617 |
+
"nbformat_minor": 2
|
| 618 |
+
}
|
19-11-23.ipynb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": []
|
| 9 |
+
}
|
| 10 |
+
],
|
| 11 |
+
"metadata": {
|
| 12 |
+
"language_info": {
|
| 13 |
+
"name": "python"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"nbformat": 4,
|
| 17 |
+
"nbformat_minor": 2
|
| 18 |
+
}
|
Accelerate.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Batch-Inputs-Together.ipynb
ADDED
|
@@ -0,0 +1,623 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607,\n",
|
| 13 |
+
" 2026, 2878, 2166, 1012, 102],\n",
|
| 14 |
+
" [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n",
|
| 15 |
+
" 0, 0, 0, 0, 0],\n",
|
| 16 |
+
" [ 101, 1045, 2293, 2023, 2061, 2172, 999, 102, 0, 0,\n",
|
| 17 |
+
" 0, 0, 0, 0, 0],\n",
|
| 18 |
+
" [ 101, 1045, 2572, 9364, 1998, 2026, 2154, 2003, 9868, 102,\n",
|
| 19 |
+
" 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 20 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 21 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 22 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]])}\n"
|
| 23 |
+
]
|
| 24 |
+
}
|
| 25 |
+
],
|
| 26 |
+
"source": [
|
| 27 |
+
"from transformers import AutoTokenizer\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 30 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 31 |
+
"\n",
|
| 32 |
+
"sentences = [\n",
|
| 33 |
+
" 'I have been waiting for a hugging face course my whole life.',\n",
|
| 34 |
+
" 'I hate this so much!',\n",
|
| 35 |
+
" 'I love this so much!',\n",
|
| 36 |
+
" 'I am disappointed and my day is ruined'\n",
|
| 37 |
+
"]\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"tokens = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
|
| 40 |
+
"print(tokens)"
|
| 41 |
+
]
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"cell_type": "code",
|
| 45 |
+
"execution_count": 1,
|
| 46 |
+
"metadata": {},
|
| 47 |
+
"outputs": [
|
| 48 |
+
{
|
| 49 |
+
"name": "stdout",
|
| 50 |
+
"output_type": "stream",
|
| 51 |
+
"text": [
|
| 52 |
+
"[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 2572, 2200, 9364, 1999, 2017, 1012], [1045, 5223, 2023, 2061, 2172, 999], [1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012]]\n",
|
| 53 |
+
"[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
|
| 54 |
+
"[1045, 2572, 2200, 9364, 1999, 2017, 1012]\n",
|
| 55 |
+
"[1045, 5223, 2023, 2061, 2172, 999]\n",
|
| 56 |
+
"[1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012]\n"
|
| 57 |
+
]
|
| 58 |
+
}
|
| 59 |
+
],
|
| 60 |
+
"source": [
|
| 61 |
+
"from transformers import AutoTokenizer\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 64 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"sentences = [\n",
|
| 67 |
+
" 'I have been waiting for a hugging face course my whole life.',\n",
|
| 68 |
+
" 'I am very disappointed in you.',\n",
|
| 69 |
+
" 'I hate this so much!',\n",
|
| 70 |
+
" 'I am terrified as I am never confident about my skills.'\n",
|
| 71 |
+
"]\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
|
| 74 |
+
"ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"print(ids)\n",
|
| 77 |
+
"print(ids[0])\n",
|
| 78 |
+
"print(ids[1])\n",
|
| 79 |
+
"print(ids[2])\n",
|
| 80 |
+
"print(ids[3])"
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"cell_type": "code",
|
| 85 |
+
"execution_count": 2,
|
| 86 |
+
"metadata": {},
|
| 87 |
+
"outputs": [
|
| 88 |
+
{
|
| 89 |
+
"name": "stderr",
|
| 90 |
+
"output_type": "stream",
|
| 91 |
+
"text": [
|
| 92 |
+
"You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 93 |
+
]
|
| 94 |
+
}
|
| 95 |
+
],
|
| 96 |
+
"source": [
|
| 97 |
+
"final_inputs = tokenizer.prepare_for_model(ids)"
|
| 98 |
+
]
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"cell_type": "code",
|
| 102 |
+
"execution_count": 3,
|
| 103 |
+
"metadata": {},
|
| 104 |
+
"outputs": [
|
| 105 |
+
{
|
| 106 |
+
"name": "stdout",
|
| 107 |
+
"output_type": "stream",
|
| 108 |
+
"text": [
|
| 109 |
+
"{'input_ids': [101, [1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 2572, 2200, 9364, 1999, 2017, 1012], [1045, 5223, 2023, 2061, 2172, 999], [1045, 2572, 10215, 2004, 1045, 2572, 2196, 9657, 2055, 2026, 4813, 1012], 102], 'attention_mask': [1, 1, 1, 1, 1, 1]}\n"
|
| 110 |
+
]
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"source": [
|
| 114 |
+
"print(final_inputs)"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"cell_type": "code",
|
| 119 |
+
"execution_count": 4,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"outputs": [
|
| 122 |
+
{
|
| 123 |
+
"ename": "ValueError",
|
| 124 |
+
"evalue": "expected sequence of length 13 at dim 1 (got 7)",
|
| 125 |
+
"output_type": "error",
|
| 126 |
+
"traceback": [
|
| 127 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 128 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
| 129 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Batch-Inputs-Together.ipynb Cell 5\u001b[0m line \u001b[0;36m6\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m ids \u001b[39m=\u001b[39m ids\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Batch-Inputs-Together.ipynb#W4sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m input_ids \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39;49mtensor(ids)\n",
|
| 130 |
+
"\u001b[1;31mValueError\u001b[0m: expected sequence of length 13 at dim 1 (got 7)"
|
| 131 |
+
]
|
| 132 |
+
}
|
| 133 |
+
],
|
| 134 |
+
"source": [
|
| 135 |
+
"'''trying to create a tensor or numpy array from the list of inputs\n",
|
| 136 |
+
"will result in an error. This is because the list of inputs is not \n",
|
| 137 |
+
"rectangular i.e they are not of equal dimensions'''\n",
|
| 138 |
+
"import torch\n",
|
| 139 |
+
"ids = ids\n",
|
| 140 |
+
"input_ids = torch.tensor(ids)\n"
|
| 141 |
+
]
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"cell_type": "code",
|
| 145 |
+
"execution_count": 5,
|
| 146 |
+
"metadata": {},
|
| 147 |
+
"outputs": [
|
| 148 |
+
{
|
| 149 |
+
"data": {
|
| 150 |
+
"text/plain": [
|
| 151 |
+
"'Therefore, you have to pad, here we pad manually. But be sure to\\ncheck out dynamic padding which is almost always better on the CPU and\\nthe GPU!'"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"execution_count": 5,
|
| 155 |
+
"metadata": {},
|
| 156 |
+
"output_type": "execute_result"
|
| 157 |
+
}
|
| 158 |
+
],
|
| 159 |
+
"source": [
|
| 160 |
+
"'''Therefore, you have to pad, here we pad manually. But be sure to\n",
|
| 161 |
+
"check out dynamic padding which is almost always better on the CPU and\n",
|
| 162 |
+
"the GPU!'''\n"
|
| 163 |
+
]
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"cell_type": "code",
|
| 167 |
+
"execution_count": 6,
|
| 168 |
+
"metadata": {},
|
| 169 |
+
"outputs": [
|
| 170 |
+
{
|
| 171 |
+
"name": "stdout",
|
| 172 |
+
"output_type": "stream",
|
| 173 |
+
"text": [
|
| 174 |
+
"BertTokenizerFast(name_or_path='bert-base-cased', vocab_size=28996, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True), added_tokens_decoder={\n",
|
| 175 |
+
"\t0: AddedToken(\"[PAD]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
| 176 |
+
"\t100: AddedToken(\"[UNK]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
| 177 |
+
"\t101: AddedToken(\"[CLS]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
| 178 |
+
"\t102: AddedToken(\"[SEP]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
| 179 |
+
"\t103: AddedToken(\"[MASK]\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
| 180 |
+
"}\n"
|
| 181 |
+
]
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"data": {
|
| 185 |
+
"text/plain": [
|
| 186 |
+
"0"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
"execution_count": 6,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"output_type": "execute_result"
|
| 192 |
+
}
|
| 193 |
+
],
|
| 194 |
+
"source": [
|
| 195 |
+
"'''The value you use to pad the sentences should not be picked\n",
|
| 196 |
+
"randomly. Use tokenizer.pad_token_id to get the value of the pad token'''\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"from transformers import AutoTokenizer\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')\n",
|
| 201 |
+
"print(tokenizer)\n",
|
| 202 |
+
"tokenizer.pad_token_id"
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"cell_type": "code",
|
| 207 |
+
"execution_count": 13,
|
| 208 |
+
"metadata": {},
|
| 209 |
+
"outputs": [
|
| 210 |
+
{
|
| 211 |
+
"name": "stderr",
|
| 212 |
+
"output_type": "stream",
|
| 213 |
+
"text": [
|
| 214 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 215 |
+
]
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"name": "stdout",
|
| 219 |
+
"output_type": "stream",
|
| 220 |
+
"text": [
|
| 221 |
+
"{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 222 |
+
"convert this to tokens.\n"
|
| 223 |
+
]
|
| 224 |
+
}
|
| 225 |
+
],
|
| 226 |
+
"source": [
|
| 227 |
+
"from transformers import AutoTokenizer\n",
|
| 228 |
+
"\n",
|
| 229 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 230 |
+
"tokens = tokenizer.tokenize('Convert this to tokens.')\n",
|
| 231 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 232 |
+
"inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 233 |
+
"print(inputs)\n",
|
| 234 |
+
"\n",
|
| 235 |
+
"# decode:\n",
|
| 236 |
+
"decode = tokenizer.decode(input_ids)\n",
|
| 237 |
+
"print(decode)"
|
| 238 |
+
]
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"cell_type": "code",
|
| 242 |
+
"execution_count": 11,
|
| 243 |
+
"metadata": {},
|
| 244 |
+
"outputs": [
|
| 245 |
+
{
|
| 246 |
+
"name": "stdout",
|
| 247 |
+
"output_type": "stream",
|
| 248 |
+
"text": [
|
| 249 |
+
"{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 250 |
+
"[CLS] convert this to tokens. [SEP]\n"
|
| 251 |
+
]
|
| 252 |
+
}
|
| 253 |
+
],
|
| 254 |
+
"source": [
|
| 255 |
+
"from transformers import AutoTokenizer\n",
|
| 256 |
+
"\n",
|
| 257 |
+
"tokenizer =AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 258 |
+
"inputs = tokenizer('Convert this to tokens.')\n",
|
| 259 |
+
"print(inputs)\n",
|
| 260 |
+
"decode = tokenizer.decode(inputs['input_ids'])\n",
|
| 261 |
+
"print(decode)"
|
| 262 |
+
]
|
| 263 |
+
},
|
| 264 |
+
{
|
| 265 |
+
"cell_type": "code",
|
| 266 |
+
"execution_count": 4,
|
| 267 |
+
"metadata": {},
|
| 268 |
+
"outputs": [
|
| 269 |
+
{
|
| 270 |
+
"name": "stderr",
|
| 271 |
+
"output_type": "stream",
|
| 272 |
+
"text": [
|
| 273 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"name": "stdout",
|
| 278 |
+
"output_type": "stream",
|
| 279 |
+
"text": [
|
| 280 |
+
"{'input_ids': [101, [[2023, 2003, 1037, 2862, 1997, 11746], [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012], [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012], [1045, 2572, 5458, 1012]], 102], 'token_type_ids': [0, 0, 0], 'attention_mask': [1, 1, 1]}\n"
|
| 281 |
+
]
|
| 282 |
+
}
|
| 283 |
+
],
|
| 284 |
+
"source": [
|
| 285 |
+
"'''Enough revision, now pad them:'''\n",
|
| 286 |
+
"from transformers import AutoTokenizer\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"sentences = ['This is a list of sentences',\n",
|
| 289 |
+
" 'I will try my best to keep it short.',\n",
|
| 290 |
+
" 'It is hard to learn like this.',\n",
|
| 291 |
+
" 'I am tired.']\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 294 |
+
"tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
|
| 295 |
+
"input_ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
|
| 296 |
+
"inputs = tokenizer.prepare_for_model([input_ids])\n",
|
| 297 |
+
"print(inputs)\n",
|
| 298 |
+
"\n"
|
| 299 |
+
]
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "code",
|
| 303 |
+
"execution_count": 7,
|
| 304 |
+
"metadata": {},
|
| 305 |
+
"outputs": [
|
| 306 |
+
{
|
| 307 |
+
"name": "stdout",
|
| 308 |
+
"output_type": "stream",
|
| 309 |
+
"text": [
|
| 310 |
+
"101\n",
|
| 311 |
+
"[[2023, 2003, 1037, 2862, 1997, 11746], [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012], [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012], [1045, 2572, 5458, 1012]]\n",
|
| 312 |
+
"102\n"
|
| 313 |
+
]
|
| 314 |
+
}
|
| 315 |
+
],
|
| 316 |
+
"source": [
|
| 317 |
+
"for i in range(len(inputs)): print(inputs['input_ids'][i])"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": 11,
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"outputs": [
|
| 325 |
+
{
|
| 326 |
+
"data": {
|
| 327 |
+
"text/plain": [
|
| 328 |
+
"[[2023, 2003, 1037, 2862, 1997, 11746],\n",
|
| 329 |
+
" [1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012],\n",
|
| 330 |
+
" [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012],\n",
|
| 331 |
+
" [1045, 2572, 5458, 1012]]"
|
| 332 |
+
]
|
| 333 |
+
},
|
| 334 |
+
"execution_count": 11,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"output_type": "execute_result"
|
| 337 |
+
}
|
| 338 |
+
],
|
| 339 |
+
"source": [
|
| 340 |
+
"inputs['input_ids'][1]"
|
| 341 |
+
]
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"cell_type": "code",
|
| 345 |
+
"execution_count": 12,
|
| 346 |
+
"metadata": {},
|
| 347 |
+
"outputs": [
|
| 348 |
+
{
|
| 349 |
+
"data": {
|
| 350 |
+
"text/plain": [
|
| 351 |
+
"[2023, 2003, 1037, 2862, 1997, 11746]"
|
| 352 |
+
]
|
| 353 |
+
},
|
| 354 |
+
"execution_count": 12,
|
| 355 |
+
"metadata": {},
|
| 356 |
+
"output_type": "execute_result"
|
| 357 |
+
}
|
| 358 |
+
],
|
| 359 |
+
"source": [
|
| 360 |
+
"inputs['input_ids'][1][0]"
|
| 361 |
+
]
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"cell_type": "code",
|
| 365 |
+
"execution_count": 13,
|
| 366 |
+
"metadata": {},
|
| 367 |
+
"outputs": [
|
| 368 |
+
{
|
| 369 |
+
"data": {
|
| 370 |
+
"text/plain": [
|
| 371 |
+
"[1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012]"
|
| 372 |
+
]
|
| 373 |
+
},
|
| 374 |
+
"execution_count": 13,
|
| 375 |
+
"metadata": {},
|
| 376 |
+
"output_type": "execute_result"
|
| 377 |
+
}
|
| 378 |
+
],
|
| 379 |
+
"source": [
|
| 380 |
+
"inputs['input_ids'][1][1]"
|
| 381 |
+
]
|
| 382 |
+
},
|
| 383 |
+
{
|
| 384 |
+
"cell_type": "code",
|
| 385 |
+
"execution_count": 14,
|
| 386 |
+
"metadata": {},
|
| 387 |
+
"outputs": [
|
| 388 |
+
{
|
| 389 |
+
"data": {
|
| 390 |
+
"text/plain": [
|
| 391 |
+
"[2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012]"
|
| 392 |
+
]
|
| 393 |
+
},
|
| 394 |
+
"execution_count": 14,
|
| 395 |
+
"metadata": {},
|
| 396 |
+
"output_type": "execute_result"
|
| 397 |
+
}
|
| 398 |
+
],
|
| 399 |
+
"source": [
|
| 400 |
+
"inputs['input_ids'][1][2]"
|
| 401 |
+
]
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"cell_type": "code",
|
| 405 |
+
"execution_count": 15,
|
| 406 |
+
"metadata": {},
|
| 407 |
+
"outputs": [
|
| 408 |
+
{
|
| 409 |
+
"data": {
|
| 410 |
+
"text/plain": [
|
| 411 |
+
"[1045, 2572, 5458, 1012]"
|
| 412 |
+
]
|
| 413 |
+
},
|
| 414 |
+
"execution_count": 15,
|
| 415 |
+
"metadata": {},
|
| 416 |
+
"output_type": "execute_result"
|
| 417 |
+
}
|
| 418 |
+
],
|
| 419 |
+
"source": [
|
| 420 |
+
"inputs['input_ids'][1][3]"
|
| 421 |
+
]
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"cell_type": "code",
|
| 425 |
+
"execution_count": 20,
|
| 426 |
+
"metadata": {},
|
| 427 |
+
"outputs": [],
|
| 428 |
+
"source": [
|
| 429 |
+
"'''Now we won't receive error.'''\n",
|
| 430 |
+
"import torch\n",
|
| 431 |
+
"padded_input_ids = [[1045, 2097, 3046, 2026, 2190, 2000, 2562, 2009, 2460, 1012],\n",
|
| 432 |
+
" [2023, 2003, 1037, 2862, 1997, 11746, 0, 0, 0, 0],\n",
|
| 433 |
+
" [2009, 2003, 2524, 2000, 4553, 2066, 2023, 1012, 0, 0],\n",
|
| 434 |
+
" [1045, 2572, 5458, 1012, 0, 0, 0, 0, 0, 0]]\n",
|
| 435 |
+
"\n",
|
| 436 |
+
"input_ids = torch.tensor(padded_input_ids) "
|
| 437 |
+
]
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"cell_type": "code",
|
| 441 |
+
"execution_count": 27,
|
| 442 |
+
"metadata": {},
|
| 443 |
+
"outputs": [
|
| 444 |
+
{
|
| 445 |
+
"name": "stderr",
|
| 446 |
+
"output_type": "stream",
|
| 447 |
+
"text": [
|
| 448 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 449 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 450 |
+
"We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n"
|
| 451 |
+
]
|
| 452 |
+
},
|
| 453 |
+
{
|
| 454 |
+
"name": "stdout",
|
| 455 |
+
"output_type": "stream",
|
| 456 |
+
"text": [
|
| 457 |
+
"tensor([[0.0690, 0.7675]], grad_fn=<AddmmBackward0>)\n",
|
| 458 |
+
"tensor([[-0.2026, 0.1231]], grad_fn=<AddmmBackward0>)\n",
|
| 459 |
+
"tensor([[0.0924, 0.7572]], grad_fn=<AddmmBackward0>)\n",
|
| 460 |
+
"tensor([[0.2478, 0.6774]], grad_fn=<AddmmBackward0>)\n",
|
| 461 |
+
"tensor([[ 0.0690, 0.7675],\n",
|
| 462 |
+
" [-0.2026, 0.1231],\n",
|
| 463 |
+
" [ 0.0924, 0.7572],\n",
|
| 464 |
+
" [ 0.2478, 0.6774]], grad_fn=<AddmmBackward0>)\n"
|
| 465 |
+
]
|
| 466 |
+
}
|
| 467 |
+
],
|
| 468 |
+
"source": [
|
| 469 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 470 |
+
"\n",
|
| 471 |
+
"ids1 = torch.tensor([padded_input_ids[0]])\n",
|
| 472 |
+
"ids2 = torch.tensor([padded_input_ids[1]])\n",
|
| 473 |
+
"ids3 = torch.tensor([padded_input_ids[2]])\n",
|
| 474 |
+
"ids4 = torch.tensor([padded_input_ids[3]])\n",
|
| 475 |
+
"\n",
|
| 476 |
+
"all_ids = torch.tensor([padded_input_ids[0], \n",
|
| 477 |
+
" padded_input_ids[1], \n",
|
| 478 |
+
" padded_input_ids[2],\n",
|
| 479 |
+
" padded_input_ids[3]])\n",
|
| 480 |
+
"# # same effect:\n",
|
| 481 |
+
"# all_ids =torch.tensor(padded_input_ids)\n",
|
| 482 |
+
"\n",
|
| 483 |
+
"model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
|
| 484 |
+
"print(model(ids1).logits)\n",
|
| 485 |
+
"print(model(ids2).logits)\n",
|
| 486 |
+
"print(model(ids3).logits)\n",
|
| 487 |
+
"print(model(ids4).logits)\n",
|
| 488 |
+
"print(model(all_ids).logits)"
|
| 489 |
+
]
|
| 490 |
+
},
|
| 491 |
+
{
|
| 492 |
+
"cell_type": "code",
|
| 493 |
+
"execution_count": 28,
|
| 494 |
+
"metadata": {},
|
| 495 |
+
"outputs": [],
|
| 496 |
+
"source": [
|
| 497 |
+
"all_ids = torch.tensor(padded_input_ids)\n",
|
| 498 |
+
"attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 499 |
+
" [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],\n",
|
| 500 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],\n",
|
| 501 |
+
" [1, 1, 1, 1, 0, 0, 0, 0, 0, 0]])"
|
| 502 |
+
]
|
| 503 |
+
},
|
| 504 |
+
{
|
| 505 |
+
"cell_type": "code",
|
| 506 |
+
"execution_count": 30,
|
| 507 |
+
"metadata": {},
|
| 508 |
+
"outputs": [
|
| 509 |
+
{
|
| 510 |
+
"name": "stderr",
|
| 511 |
+
"output_type": "stream",
|
| 512 |
+
"text": [
|
| 513 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 514 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 515 |
+
]
|
| 516 |
+
},
|
| 517 |
+
{
|
| 518 |
+
"name": "stdout",
|
| 519 |
+
"output_type": "stream",
|
| 520 |
+
"text": [
|
| 521 |
+
"tensor([[ 0.1353, -0.0232]], grad_fn=<AddmmBackward0>)\n",
|
| 522 |
+
"tensor([[ 0.1116, -0.2974]], grad_fn=<AddmmBackward0>)\n",
|
| 523 |
+
"tensor([[ 0.1224, -0.1755]], grad_fn=<AddmmBackward0>)\n",
|
| 524 |
+
"tensor([[ 0.0059, -0.2736]], grad_fn=<AddmmBackward0>)\n",
|
| 525 |
+
"tensor([[ 0.1353, -0.0232],\n",
|
| 526 |
+
" [ 0.1041, -0.0745],\n",
|
| 527 |
+
" [ 0.1715, -0.1862],\n",
|
| 528 |
+
" [ 0.0225, -0.1921]], grad_fn=<AddmmBackward0>)\n"
|
| 529 |
+
]
|
| 530 |
+
}
|
| 531 |
+
],
|
| 532 |
+
"source": [
|
| 533 |
+
"model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
|
| 534 |
+
"output1 = model(ids1)\n",
|
| 535 |
+
"output2 = model(ids2)\n",
|
| 536 |
+
"output3 = model(ids3)\n",
|
| 537 |
+
"output4 = model(ids4)\n",
|
| 538 |
+
"print(output1.logits)\n",
|
| 539 |
+
"print(output2.logits)\n",
|
| 540 |
+
"print(output3.logits)\n",
|
| 541 |
+
"print(output4.logits)\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"output = model(all_ids, attention_mask=attention_mask)\n",
|
| 544 |
+
"print(output.logits)"
|
| 545 |
+
]
|
| 546 |
+
},
|
| 547 |
+
{
|
| 548 |
+
"cell_type": "code",
|
| 549 |
+
"execution_count": 31,
|
| 550 |
+
"metadata": {},
|
| 551 |
+
"outputs": [],
|
| 552 |
+
"source": [
|
| 553 |
+
"# In short, this is how you batch input together:\n",
|
| 554 |
+
"\n",
|
| 555 |
+
"from transformers import AutoTokenizer\n",
|
| 556 |
+
"\n",
|
| 557 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 558 |
+
"raw_inputs = ['I am so slow', 'I wish I had more time in a day',\n",
|
| 559 |
+
" 'We all have equal time per day so we need to make the best use of it',\n",
|
| 560 |
+
" 'This is a very long sentence that will not fit in the model.. of will it?']\n",
|
| 561 |
+
"batch = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n"
|
| 562 |
+
]
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"cell_type": "code",
|
| 566 |
+
"execution_count": 32,
|
| 567 |
+
"metadata": {},
|
| 568 |
+
"outputs": [
|
| 569 |
+
{
|
| 570 |
+
"name": "stdout",
|
| 571 |
+
"output_type": "stream",
|
| 572 |
+
"text": [
|
| 573 |
+
"{'input_ids': tensor([[ 101, 1045, 2572, 2061, 4030, 102, 0, 0, 0, 0, 0, 0,\n",
|
| 574 |
+
" 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 575 |
+
" [ 101, 1045, 4299, 1045, 2018, 2062, 2051, 1999, 1037, 2154, 102, 0,\n",
|
| 576 |
+
" 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 577 |
+
" [ 101, 2057, 2035, 2031, 5020, 2051, 2566, 2154, 2061, 2057, 2342, 2000,\n",
|
| 578 |
+
" 2191, 1996, 2190, 2224, 1997, 2009, 102, 0, 0],\n",
|
| 579 |
+
" [ 101, 2023, 2003, 1037, 2200, 2146, 6251, 2008, 2097, 2025, 4906, 1999,\n",
|
| 580 |
+
" 1996, 2944, 1012, 1012, 1997, 2097, 2009, 1029, 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 581 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 582 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 583 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 584 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 585 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],\n",
|
| 586 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}\n"
|
| 587 |
+
]
|
| 588 |
+
}
|
| 589 |
+
],
|
| 590 |
+
"source": [
|
| 591 |
+
"print(batch)"
|
| 592 |
+
]
|
| 593 |
+
},
|
| 594 |
+
{
|
| 595 |
+
"cell_type": "code",
|
| 596 |
+
"execution_count": null,
|
| 597 |
+
"metadata": {},
|
| 598 |
+
"outputs": [],
|
| 599 |
+
"source": []
|
| 600 |
+
}
|
| 601 |
+
],
|
| 602 |
+
"metadata": {
|
| 603 |
+
"kernelspec": {
|
| 604 |
+
"display_name": "Python 3",
|
| 605 |
+
"language": "python",
|
| 606 |
+
"name": "python3"
|
| 607 |
+
},
|
| 608 |
+
"language_info": {
|
| 609 |
+
"codemirror_mode": {
|
| 610 |
+
"name": "ipython",
|
| 611 |
+
"version": 3
|
| 612 |
+
},
|
| 613 |
+
"file_extension": ".py",
|
| 614 |
+
"mimetype": "text/x-python",
|
| 615 |
+
"name": "python",
|
| 616 |
+
"nbconvert_exporter": "python",
|
| 617 |
+
"pygments_lexer": "ipython3",
|
| 618 |
+
"version": "3.11.6"
|
| 619 |
+
}
|
| 620 |
+
},
|
| 621 |
+
"nbformat": 4,
|
| 622 |
+
"nbformat_minor": 2
|
| 623 |
+
}
|
DynamicPadding.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
PytorchTrainingLoop.ipynb
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"data": {
|
| 10 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 11 |
+
"model_id": "01f95b9bb5e44ed9b5a1fc5deb9fa736",
|
| 12 |
+
"version_major": 2,
|
| 13 |
+
"version_minor": 0
|
| 14 |
+
},
|
| 15 |
+
"text/plain": [
|
| 16 |
+
"Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"output_type": "display_data"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"data": {
|
| 24 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 25 |
+
"model_id": "a0cf3c495d53445cab1df0269126c7bc",
|
| 26 |
+
"version_major": 2,
|
| 27 |
+
"version_minor": 0
|
| 28 |
+
},
|
| 29 |
+
"text/plain": [
|
| 30 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"output_type": "display_data"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"data": {
|
| 38 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 39 |
+
"model_id": "3470da5867ee4478a66fb7daac246f60",
|
| 40 |
+
"version_major": 2,
|
| 41 |
+
"version_minor": 0
|
| 42 |
+
},
|
| 43 |
+
"text/plain": [
|
| 44 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
"metadata": {},
|
| 48 |
+
"output_type": "display_data"
|
| 49 |
+
}
|
| 50 |
+
],
|
| 51 |
+
"source": [
|
| 52 |
+
"from datasets import load_dataset\n",
|
| 53 |
+
"from transformers import AutoTokenizer, DataCollatorWithPadding\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"raw_datasets = load_dataset('glue', 'mrpc')\n",
|
| 56 |
+
"checkpoint = 'bert-base-cased'\n",
|
| 57 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"def tokenize_function(examples):\n",
|
| 60 |
+
" return tokenizer(examples['sentence1'], examples['sentence2'],\n",
|
| 61 |
+
" truncation=True)\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 64 |
+
"tokenized_datasets = tokenized_datasets.remove_columns(['idx', 'sentence1', 'sentence2'])\n",
|
| 65 |
+
"tokenized_datasets = tokenized_datasets.rename_column('label',\"labels\")\n",
|
| 66 |
+
"tokenized_datasets = tokenized_datasets.with_format('torch')\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n"
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"cell_type": "code",
|
| 73 |
+
"execution_count": 2,
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"outputs": [],
|
| 76 |
+
"source": [
|
| 77 |
+
"from torch.utils.data import DataLoader\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"train_dataloader = DataLoader(\n",
|
| 80 |
+
" tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n",
|
| 81 |
+
")\n",
|
| 82 |
+
"eval_dataloader = DataLoader(\n",
|
| 83 |
+
" tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n",
|
| 84 |
+
")"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": 3,
|
| 90 |
+
"metadata": {},
|
| 91 |
+
"outputs": [
|
| 92 |
+
{
|
| 93 |
+
"name": "stderr",
|
| 94 |
+
"output_type": "stream",
|
| 95 |
+
"text": [
|
| 96 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 97 |
+
]
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"name": "stdout",
|
| 101 |
+
"output_type": "stream",
|
| 102 |
+
"text": [
|
| 103 |
+
"{'labels': torch.Size([8]), 'input_ids': torch.Size([8, 60]), 'token_type_ids': torch.Size([8, 60]), 'attention_mask': torch.Size([8, 60])}\n"
|
| 104 |
+
]
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"source": [
|
| 108 |
+
"'''Grab a batch of data and inspect it, check to see if everything\n",
|
| 109 |
+
"works as intended'''\n",
|
| 110 |
+
"\n",
|
| 111 |
+
"for batch in train_dataloader:\n",
|
| 112 |
+
" break\n",
|
| 113 |
+
"print({k: v.shape for k, v in batch.items()})"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": 4,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [
|
| 121 |
+
{
|
| 122 |
+
"name": "stderr",
|
| 123 |
+
"output_type": "stream",
|
| 124 |
+
"text": [
|
| 125 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
| 126 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
],
|
| 130 |
+
"source": [
|
| 131 |
+
"'''Send the training data to the model'''\n",
|
| 132 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"checkpoint = 'bert-base-cased'\n",
|
| 135 |
+
"# Adjust number of labels to the number of classes, in this case 2\n",
|
| 136 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"cell_type": "code",
|
| 141 |
+
"execution_count": 5,
|
| 142 |
+
"metadata": {},
|
| 143 |
+
"outputs": [
|
| 144 |
+
{
|
| 145 |
+
"name": "stdout",
|
| 146 |
+
"output_type": "stream",
|
| 147 |
+
"text": [
|
| 148 |
+
"tensor(0.7639, grad_fn=<NllLossBackward0>) torch.Size([8, 2])\n"
|
| 149 |
+
]
|
| 150 |
+
}
|
| 151 |
+
],
|
| 152 |
+
"source": [
|
| 153 |
+
"'''To be sure everything is going well, check outputs'''\n",
|
| 154 |
+
"outputs = model(**batch)\n",
|
| 155 |
+
"print(outputs.loss, outputs.logits.shape)\n",
|
| 156 |
+
"# Note: The transformers model automatically calculates the loss"
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"cell_type": "code",
|
| 161 |
+
"execution_count": 6,
|
| 162 |
+
"metadata": {},
|
| 163 |
+
"outputs": [
|
| 164 |
+
{
|
| 165 |
+
"name": "stderr",
|
| 166 |
+
"output_type": "stream",
|
| 167 |
+
"text": [
|
| 168 |
+
"C:\\Users\\HP\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
| 169 |
+
" warnings.warn(\n"
|
| 170 |
+
]
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"source": [
|
| 174 |
+
"'''So, just initialize the optimizer and the training loop'''\n",
|
| 175 |
+
"# optimizer:\n",
|
| 176 |
+
"from transformers import AdamW\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"optimizer = AdamW(model.parameters(), lr=0.005)\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"# training loop:\n",
|
| 181 |
+
"loss = outputs.loss\n",
|
| 182 |
+
"loss.backward()\n",
|
| 183 |
+
"optimizer.step()\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"optimizer.zero_grad()"
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"cell_type": "code",
|
| 190 |
+
"execution_count": 7,
|
| 191 |
+
"metadata": {},
|
| 192 |
+
"outputs": [],
|
| 193 |
+
"source": [
|
| 194 |
+
"'''To reduce learning rate to 0 import get_scheduler\n",
|
| 195 |
+
"This is a convenient function from the transformers library to \n",
|
| 196 |
+
"progressively decay our learning rate to zero as we train the model'''\n",
|
| 197 |
+
"\n",
|
| 198 |
+
"from transformers import get_scheduler\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"num_epochs = 3\n",
|
| 201 |
+
"num_training_steps = num_epochs * len(train_dataloader)\n",
|
| 202 |
+
"\n",
|
| 203 |
+
"lr_scheduler = get_scheduler(\n",
|
| 204 |
+
" name = 'linear',\n",
|
| 205 |
+
" optimizer = optimizer,\n",
|
| 206 |
+
" num_warmup_steps = 0,\n",
|
| 207 |
+
" num_training_steps = num_training_steps \n",
|
| 208 |
+
")"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"cell_type": "code",
|
| 213 |
+
"execution_count": 8,
|
| 214 |
+
"metadata": {},
|
| 215 |
+
"outputs": [
|
| 216 |
+
{
|
| 217 |
+
"name": "stdout",
|
| 218 |
+
"output_type": "stream",
|
| 219 |
+
"text": [
|
| 220 |
+
"cpu\n"
|
| 221 |
+
]
|
| 222 |
+
}
|
| 223 |
+
],
|
| 224 |
+
"source": [
|
| 225 |
+
"'''Device agnostic code: use gpu'''\n",
|
| 226 |
+
"import torch\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
| 229 |
+
"model.to(device)\n",
|
| 230 |
+
"print(device)"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"cell_type": "code",
|
| 235 |
+
"execution_count": null,
|
| 236 |
+
"metadata": {},
|
| 237 |
+
"outputs": [],
|
| 238 |
+
"source": [
|
| 239 |
+
"'''Finally, we can put everything together:'''\n",
|
| 240 |
+
"\n",
|
| 241 |
+
"from tqdm.auto import tqdm\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"progress_bar = tqdm(range(num_training_steps))\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"# set model to training mode:\n",
|
| 246 |
+
"model.train()\n",
|
| 247 |
+
"for epoch in range(num_epochs):\n",
|
| 248 |
+
" # loop over batches:\n",
|
| 249 |
+
" for batch in train_dataloader:\n",
|
| 250 |
+
" # send batch to device:\n",
|
| 251 |
+
" batch = {k: v.to(device) for k,v in batch.items()}\n",
|
| 252 |
+
" # forward pass:\n",
|
| 253 |
+
" outputs = model(**batch)\n",
|
| 254 |
+
" # calculate loss:\n",
|
| 255 |
+
" loss = outputs.loss\n",
|
| 256 |
+
" loss.backward()\n",
|
| 257 |
+
" # update weights:\n",
|
| 258 |
+
" optimizer.step()\n",
|
| 259 |
+
" # update learning rate:\n",
|
| 260 |
+
" lr_scheduler.step()\n",
|
| 261 |
+
" # reset gradients:\n",
|
| 262 |
+
" optimizer.zero_grad()\n",
|
| 263 |
+
" # update progress bar:\n",
|
| 264 |
+
" progress_bar.update(1)\n"
|
| 265 |
+
]
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"cell_type": "code",
|
| 269 |
+
"execution_count": 9,
|
| 270 |
+
"metadata": {},
|
| 271 |
+
"outputs": [
|
| 272 |
+
{
|
| 273 |
+
"name": "stderr",
|
| 274 |
+
"output_type": "stream",
|
| 275 |
+
"text": [
|
| 276 |
+
"C:\\Users\\HP\\AppData\\Local\\Temp\\ipykernel_1992\\3192441023.py:6: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n",
|
| 277 |
+
" metric = load_metric(\"glue\", \"mrpc\")\n"
|
| 278 |
+
]
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"data": {
|
| 282 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 283 |
+
"model_id": "97b30eb20a8f481097917bf582f9a963",
|
| 284 |
+
"version_major": 2,
|
| 285 |
+
"version_minor": 0
|
| 286 |
+
},
|
| 287 |
+
"text/plain": [
|
| 288 |
+
"Downloading builder script: 0%| | 0.00/1.84k [00:00<?, ?B/s]"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
"metadata": {},
|
| 292 |
+
"output_type": "display_data"
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"data": {
|
| 296 |
+
"text/plain": [
|
| 297 |
+
"{'accuracy': 0.6838235294117647, 'f1': 0.8122270742358079}"
|
| 298 |
+
]
|
| 299 |
+
},
|
| 300 |
+
"execution_count": 9,
|
| 301 |
+
"metadata": {},
|
| 302 |
+
"output_type": "execute_result"
|
| 303 |
+
}
|
| 304 |
+
],
|
| 305 |
+
"source": [
|
| 306 |
+
"'''Once the above step is completed, we can evaluate our model\n",
|
| 307 |
+
"very easily using the load_metric module in datasets library:'''\n",
|
| 308 |
+
"\n",
|
| 309 |
+
"from datasets import load_metric\n",
|
| 310 |
+
"\n",
|
| 311 |
+
"metric = load_metric(\"glue\", \"mrpc\")\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"model.eval()\n",
|
| 314 |
+
"for batch in eval_dataloader:\n",
|
| 315 |
+
" batch = {k: v.to(device) for k,v in batch.items()}\n",
|
| 316 |
+
" with torch.no_grad():\n",
|
| 317 |
+
" outputs = model(**batch)\n",
|
| 318 |
+
" \n",
|
| 319 |
+
" logits = outputs.logits\n",
|
| 320 |
+
" predictions = torch.argmax(logits, dim=-1)\n",
|
| 321 |
+
" metric.add_batch(predictions=predictions, references=batch['labels'])\n",
|
| 322 |
+
" \n",
|
| 323 |
+
"metric.compute()"
|
| 324 |
+
]
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"cell_type": "code",
|
| 328 |
+
"execution_count": 10,
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"outputs": [
|
| 331 |
+
{
|
| 332 |
+
"data": {
|
| 333 |
+
"text/plain": [
|
| 334 |
+
"'Congratulations! You have successfully fine-tuned a model all by yourself'"
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
"execution_count": 10,
|
| 338 |
+
"metadata": {},
|
| 339 |
+
"output_type": "execute_result"
|
| 340 |
+
}
|
| 341 |
+
],
|
| 342 |
+
"source": [
|
| 343 |
+
"'''Congratulations! You have successfully fine-tuned a model all by yourself'''"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": null,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [],
|
| 351 |
+
"source": []
|
| 352 |
+
}
|
| 353 |
+
],
|
| 354 |
+
"metadata": {
|
| 355 |
+
"kernelspec": {
|
| 356 |
+
"display_name": "Python 3",
|
| 357 |
+
"language": "python",
|
| 358 |
+
"name": "python3"
|
| 359 |
+
},
|
| 360 |
+
"language_info": {
|
| 361 |
+
"codemirror_mode": {
|
| 362 |
+
"name": "ipython",
|
| 363 |
+
"version": 3
|
| 364 |
+
},
|
| 365 |
+
"file_extension": ".py",
|
| 366 |
+
"mimetype": "text/x-python",
|
| 367 |
+
"name": "python",
|
| 368 |
+
"nbconvert_exporter": "python",
|
| 369 |
+
"pygments_lexer": "ipython3",
|
| 370 |
+
"version": "3.11.6"
|
| 371 |
+
}
|
| 372 |
+
},
|
| 373 |
+
"nbformat": 4,
|
| 374 |
+
"nbformat_minor": 2
|
| 375 |
+
}
|
Tokenizer_pipeline.ipynb
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"['i', 'am', 'going', 'to', 'token', '##ize', 'this', 'sentence', '.']\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
+
"source": [
|
| 17 |
+
"from transformers import AutoTokenizer\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 20 |
+
"tokens = tokenizer.tokenize('''I am going to tokenize this sentence.''')\n",
|
| 21 |
+
"print(tokens)"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 3,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [
|
| 29 |
+
{
|
| 30 |
+
"name": "stdout",
|
| 31 |
+
"output_type": "stream",
|
| 32 |
+
"text": [
|
| 33 |
+
"['▁i', '▁am', '▁going', '▁to', '▁to', 'ken', 'ize', '▁this', '▁sentence', '▁using', '▁albert', '-', 'base', '-', 'v', '1', '▁model', \"'\", 's', '▁to', 'ken', 'izer', '.']\n"
|
| 34 |
+
]
|
| 35 |
+
}
|
| 36 |
+
],
|
| 37 |
+
"source": [
|
| 38 |
+
"from transformers import AutoTokenizer\n",
|
| 39 |
+
"\n",
|
| 40 |
+
"tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')\n",
|
| 41 |
+
"tokens = tokenizer.tokenize('''I am going to tokenize this sentence\n",
|
| 42 |
+
" using albert-base-v1 model's tokenizer.''')\n",
|
| 43 |
+
"print(tokens)"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": 4,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [
|
| 51 |
+
{
|
| 52 |
+
"data": {
|
| 53 |
+
"text/plain": [
|
| 54 |
+
"'Note that above, the subword based tokenizer \\nis used in both tokenizers. \\nHowever to depict the beginning of a word in albert\\nbase tokenizer it uses the prefix _\\nwhile, bert base tokenizer uses ##.'"
|
| 55 |
+
]
|
| 56 |
+
},
|
| 57 |
+
"execution_count": 4,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"output_type": "execute_result"
|
| 60 |
+
}
|
| 61 |
+
],
|
| 62 |
+
"source": [
|
| 63 |
+
"'''Note that above, the subword based tokenizer \n",
|
| 64 |
+
"is used in both tokenizers. \n",
|
| 65 |
+
"However to depict the beginning of a word in albert\n",
|
| 66 |
+
"base tokenizer it uses the prefix _\n",
|
| 67 |
+
"while, bert base tokenizer uses ##.'''\n"
|
| 68 |
+
]
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"cell_type": "code",
|
| 72 |
+
"execution_count": 5,
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [
|
| 75 |
+
{
|
| 76 |
+
"name": "stdout",
|
| 77 |
+
"output_type": "stream",
|
| 78 |
+
"text": [
|
| 79 |
+
"[11082, 3046, 2000, 19204, 4697]\n"
|
| 80 |
+
]
|
| 81 |
+
}
|
| 82 |
+
],
|
| 83 |
+
"source": [
|
| 84 |
+
"'''\n",
|
| 85 |
+
"The second task of the tokenization pipeline\n",
|
| 86 |
+
"is to map those tokens to their respective IDs.\n",
|
| 87 |
+
"This is done by the convert_tokens_to_ids method.\n",
|
| 88 |
+
"'''\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"from transformers import AutoTokenizer\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 93 |
+
"tokens = tokenizer.tokenize('Lets try to tokenize')\n",
|
| 94 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 95 |
+
"print(input_ids)\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"'''This is why we need to download a file while instantiating\n",
|
| 98 |
+
"the tokenizer from pretrained method.\n",
|
| 99 |
+
"We have to make sure we use the same mapping as to when the model\n",
|
| 100 |
+
"was pretrained\n",
|
| 101 |
+
"To do this we use the convert tokens to ids method.'''"
|
| 102 |
+
]
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"cell_type": "code",
|
| 106 |
+
"execution_count": 6,
|
| 107 |
+
"metadata": {},
|
| 108 |
+
"outputs": [
|
| 109 |
+
{
|
| 110 |
+
"name": "stderr",
|
| 111 |
+
"output_type": "stream",
|
| 112 |
+
"text": [
|
| 113 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"name": "stdout",
|
| 118 |
+
"output_type": "stream",
|
| 119 |
+
"text": [
|
| 120 |
+
"[101, 11082, 3046, 2000, 19204, 4697, 102]\n"
|
| 121 |
+
]
|
| 122 |
+
}
|
| 123 |
+
],
|
| 124 |
+
"source": [
|
| 125 |
+
"'''\n",
|
| 126 |
+
"You may have noticed that the tokens for CLS and SEP \n",
|
| 127 |
+
"are missing! Those are the special tokens that are added \n",
|
| 128 |
+
"by the prepare for model method. \n",
|
| 129 |
+
"The prepare for model method knows which special tokens\n",
|
| 130 |
+
"to add and where to add them based on the model type.\n",
|
| 131 |
+
"'''\n",
|
| 132 |
+
"final_inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 133 |
+
"print(final_inputs['input_ids'])"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": 8,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [
|
| 141 |
+
{
|
| 142 |
+
"name": "stdout",
|
| 143 |
+
"output_type": "stream",
|
| 144 |
+
"text": [
|
| 145 |
+
"[CLS] lets try to tokenize [SEP]\n"
|
| 146 |
+
]
|
| 147 |
+
}
|
| 148 |
+
],
|
| 149 |
+
"source": [
|
| 150 |
+
"'''\n",
|
| 151 |
+
"You can look at the special tokens modularly\n",
|
| 152 |
+
"by decoding the input ids as how the tokenizer \n",
|
| 153 |
+
"has changed your text by using the decode method.\n",
|
| 154 |
+
"'''\n",
|
| 155 |
+
"from transformers import AutoTokenizer\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 158 |
+
"inputs = tokenizer('Lets try to tokenize')\n",
|
| 159 |
+
"print(tokenizer.decode(inputs['input_ids']))\n"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"cell_type": "code",
|
| 164 |
+
"execution_count": 11,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"outputs": [
|
| 167 |
+
{
|
| 168 |
+
"name": "stdout",
|
| 169 |
+
"output_type": "stream",
|
| 170 |
+
"text": [
|
| 171 |
+
"<s>Lets try to tokenize.</s>\n"
|
| 172 |
+
]
|
| 173 |
+
}
|
| 174 |
+
],
|
| 175 |
+
"source": [
|
| 176 |
+
"'''Above the bert tokenizer uses the [CLS] and [SEP] tokens\n",
|
| 177 |
+
"But the roberta tokenizer uses the <s> and </s> tokens'''\n",
|
| 178 |
+
"from transformers import AutoTokenizer\n",
|
| 179 |
+
"\n",
|
| 180 |
+
"tokenizer = AutoTokenizer.from_pretrained('roberta-base')\n",
|
| 181 |
+
"inputs = tokenizer('Lets try to tokenize.')\n",
|
| 182 |
+
"print(tokenizer.decode(inputs['input_ids']))"
|
| 183 |
+
]
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"cell_type": "markdown",
|
| 187 |
+
"metadata": {},
|
| 188 |
+
"source": [
|
| 189 |
+
"There are actually two ways to decode:\n",
|
| 190 |
+
"- The one shown above\n",
|
| 191 |
+
"- The one shown below"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": null,
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [
|
| 199 |
+
{
|
| 200 |
+
"name": "stderr",
|
| 201 |
+
"output_type": "stream",
|
| 202 |
+
"text": [
|
| 203 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
{
|
| 207 |
+
"name": "stdout",
|
| 208 |
+
"output_type": "stream",
|
| 209 |
+
"text": [
|
| 210 |
+
"{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 211 |
+
"convert this to tokens.\n"
|
| 212 |
+
]
|
| 213 |
+
}
|
| 214 |
+
],
|
| 215 |
+
"source": [
|
| 216 |
+
"from transformers import AutoTokenizer\n",
|
| 217 |
+
"\n",
|
| 218 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 219 |
+
"tokens = tokenizer.tokenize('Convert this to tokens.')\n",
|
| 220 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 221 |
+
"inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 222 |
+
"print(inputs)\n",
|
| 223 |
+
"\n",
|
| 224 |
+
"# decode:\n",
|
| 225 |
+
"decode = tokenizer.decode(input_ids)\n",
|
| 226 |
+
"print(decode)"
|
| 227 |
+
]
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"cell_type": "code",
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"metadata": {},
|
| 233 |
+
"outputs": [
|
| 234 |
+
{
|
| 235 |
+
"name": "stdout",
|
| 236 |
+
"output_type": "stream",
|
| 237 |
+
"text": [
|
| 238 |
+
"{'input_ids': [101, 10463, 2023, 2000, 19204, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 239 |
+
"[CLS] convert this to tokens. [SEP]\n"
|
| 240 |
+
]
|
| 241 |
+
}
|
| 242 |
+
],
|
| 243 |
+
"source": [
|
| 244 |
+
"from transformers import AutoTokenizer\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"tokenizer =AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 247 |
+
"inputs = tokenizer('Convert this to tokens.')\n",
|
| 248 |
+
"print(inputs)\n",
|
| 249 |
+
"decode = tokenizer.decode(inputs['input_ids'])\n",
|
| 250 |
+
"print(decode)"
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"cell_type": "code",
|
| 255 |
+
"execution_count": 12,
|
| 256 |
+
"metadata": {},
|
| 257 |
+
"outputs": [
|
| 258 |
+
{
|
| 259 |
+
"name": "stdout",
|
| 260 |
+
"output_type": "stream",
|
| 261 |
+
"text": [
|
| 262 |
+
"{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n"
|
| 263 |
+
]
|
| 264 |
+
}
|
| 265 |
+
],
|
| 266 |
+
"source": [
|
| 267 |
+
"'''\n",
|
| 268 |
+
"Now that you know the intermediate stuff about how \n",
|
| 269 |
+
"a tokenizer works, you can forget all that stuff\n",
|
| 270 |
+
"and only remember that you have to call it on the input \n",
|
| 271 |
+
"text.\n",
|
| 272 |
+
"'''\n",
|
| 273 |
+
"from transformers import AutoTokenizer\n",
|
| 274 |
+
"\n",
|
| 275 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 276 |
+
"inputs = tokenizer('Lets try to tokenize')\n",
|
| 277 |
+
"print(inputs)"
|
| 278 |
+
]
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"cell_type": "code",
|
| 282 |
+
"execution_count": 13,
|
| 283 |
+
"metadata": {},
|
| 284 |
+
"outputs": [
|
| 285 |
+
{
|
| 286 |
+
"data": {
|
| 287 |
+
"text/plain": [
|
| 288 |
+
"'To learn what attention mask is\\ncheck out the --batch input together\\n\\nTo learn what the token type ids are\\ncheck out --process pairs of sentences \\n'"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
"execution_count": 13,
|
| 292 |
+
"metadata": {},
|
| 293 |
+
"output_type": "execute_result"
|
| 294 |
+
}
|
| 295 |
+
],
|
| 296 |
+
"source": [
|
| 297 |
+
"'''To learn what attention mask is\n",
|
| 298 |
+
"check out the --batch input together\n",
|
| 299 |
+
"\n",
|
| 300 |
+
"To learn what the token type ids are\n",
|
| 301 |
+
"check out --process pairs of sentences \n",
|
| 302 |
+
"'''"
|
| 303 |
+
]
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"cell_type": "markdown",
|
| 307 |
+
"metadata": {},
|
| 308 |
+
"source": [
|
| 309 |
+
"The primary and easy way to batch inputs together is as follows:"
|
| 310 |
+
]
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"cell_type": "code",
|
| 314 |
+
"execution_count": 2,
|
| 315 |
+
"metadata": {},
|
| 316 |
+
"outputs": [
|
| 317 |
+
{
|
| 318 |
+
"name": "stdout",
|
| 319 |
+
"output_type": "stream",
|
| 320 |
+
"text": [
|
| 321 |
+
"{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607,\n",
|
| 322 |
+
" 2026, 2878, 2166, 1012, 102],\n",
|
| 323 |
+
" [ 101, 1045, 5223, 2023, 2061, 2172, 102, 0, 0, 0,\n",
|
| 324 |
+
" 0, 0, 0, 0, 0],\n",
|
| 325 |
+
" [ 101, 1045, 2572, 2025, 9657, 1012, 102, 0, 0, 0,\n",
|
| 326 |
+
" 0, 0, 0, 0, 0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 327 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 328 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 329 |
+
" [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 330 |
+
" [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}\n"
|
| 331 |
+
]
|
| 332 |
+
}
|
| 333 |
+
],
|
| 334 |
+
"source": [
|
| 335 |
+
"from transformers import AutoTokenizer\n",
|
| 336 |
+
"\n",
|
| 337 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 338 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 339 |
+
"\n",
|
| 340 |
+
"sentences = ['I have been waiting for a hugging face course my whole life.',\n",
|
| 341 |
+
" 'I hate this so much',\n",
|
| 342 |
+
" 'I am not confident.']\n",
|
| 343 |
+
"tokens = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
|
| 344 |
+
"print(tokens)"
|
| 345 |
+
]
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"cell_type": "markdown",
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"source": [
|
| 351 |
+
"However, if you wish to batch inputs together from beneath the tokenizer pipeline:"
|
| 352 |
+
]
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"cell_type": "code",
|
| 356 |
+
"execution_count": 4,
|
| 357 |
+
"metadata": {},
|
| 358 |
+
"outputs": [
|
| 359 |
+
{
|
| 360 |
+
"name": "stdout",
|
| 361 |
+
"output_type": "stream",
|
| 362 |
+
"text": [
|
| 363 |
+
"[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n",
|
| 364 |
+
"[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
|
| 365 |
+
"[1045, 5223, 2023, 2061, 2172]\n",
|
| 366 |
+
"[1045, 2572, 2025, 9657, 1012]\n"
|
| 367 |
+
]
|
| 368 |
+
}
|
| 369 |
+
],
|
| 370 |
+
"source": [
|
| 371 |
+
"from transformers import AutoTokenizer\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 374 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"sentences = ['I have been waiting for a hugging face course my whole life.',\n",
|
| 377 |
+
" 'I hate this so much',\n",
|
| 378 |
+
" 'I am not confident.']\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
|
| 381 |
+
"ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"print(ids)\n",
|
| 384 |
+
"print(ids[0])\n",
|
| 385 |
+
"print(ids[1])\n",
|
| 386 |
+
"print(ids[2])"
|
| 387 |
+
]
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"cell_type": "code",
|
| 391 |
+
"execution_count": 5,
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [
|
| 394 |
+
{
|
| 395 |
+
"name": "stdout",
|
| 396 |
+
"output_type": "stream",
|
| 397 |
+
"text": [
|
| 398 |
+
"[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n"
|
| 399 |
+
]
|
| 400 |
+
},
|
| 401 |
+
{
|
| 402 |
+
"ename": "ValueError",
|
| 403 |
+
"evalue": "expected sequence of length 13 at dim 1 (got 5)",
|
| 404 |
+
"output_type": "error",
|
| 405 |
+
"traceback": [
|
| 406 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 407 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
| 408 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 14\u001b[0m line \u001b[0;36m9\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mtorch\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39mprint\u001b[39m(ids)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X21sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m input_ids \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39;49mtensor(ids)\n",
|
| 409 |
+
"\u001b[1;31mValueError\u001b[0m: expected sequence of length 13 at dim 1 (got 5)"
|
| 410 |
+
]
|
| 411 |
+
}
|
| 412 |
+
],
|
| 413 |
+
"source": [
|
| 414 |
+
"'''Trying to create a tensor from the three lists \n",
|
| 415 |
+
"in torch or tensorflow will result in an error. This\n",
|
| 416 |
+
"is because the tensors must be of the same size, i.e. rectangular\n",
|
| 417 |
+
"This is done by padding. which we will see later on.'''\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"\n",
|
| 420 |
+
"import torch\n",
|
| 421 |
+
"print(ids)\n",
|
| 422 |
+
"input_ids = torch.tensor(ids)"
|
| 423 |
+
]
|
| 424 |
+
},
|
| 425 |
+
{
|
| 426 |
+
"cell_type": "code",
|
| 427 |
+
"execution_count": 7,
|
| 428 |
+
"metadata": {},
|
| 429 |
+
"outputs": [
|
| 430 |
+
{
|
| 431 |
+
"name": "stdout",
|
| 432 |
+
"output_type": "stream",
|
| 433 |
+
"text": [
|
| 434 |
+
"0\n"
|
| 435 |
+
]
|
| 436 |
+
}
|
| 437 |
+
],
|
| 438 |
+
"source": [
|
| 439 |
+
"'''The padding id provided to the model via training is 0.\n",
|
| 440 |
+
"One should not try to change it. you can pad your outputs like so:'''\n",
|
| 441 |
+
"\n",
|
| 442 |
+
"from transformers import AutoTokenizer\n",
|
| 443 |
+
"\n",
|
| 444 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 445 |
+
"pad = tokenizer.pad_token_id\n",
|
| 446 |
+
"print(pad)"
|
| 447 |
+
]
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"cell_type": "code",
|
| 451 |
+
"execution_count": 15,
|
| 452 |
+
"metadata": {},
|
| 453 |
+
"outputs": [
|
| 454 |
+
{
|
| 455 |
+
"name": "stdout",
|
| 456 |
+
"output_type": "stream",
|
| 457 |
+
"text": [
|
| 458 |
+
"[[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012], [1045, 5223, 2023, 2061, 2172], [1045, 2572, 2025, 9657, 1012]]\n",
|
| 459 |
+
"[1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 2607, 2026, 2878, 2166, 1012]\n",
|
| 460 |
+
"[1045, 5223, 2023, 2061, 2172]\n",
|
| 461 |
+
"[1045, 2572, 2025, 9657, 1012]\n"
|
| 462 |
+
]
|
| 463 |
+
},
|
| 464 |
+
{
|
| 465 |
+
"ename": "TypeError",
|
| 466 |
+
"evalue": "'int' object is not callable",
|
| 467 |
+
"output_type": "error",
|
| 468 |
+
"traceback": [
|
| 469 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 470 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
| 471 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m1\u001b[39m])\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m2\u001b[39m])\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m pad_ids \u001b[39m=\u001b[39m [tokenizer\u001b[39m.\u001b[39;49mpad_token_id(_) \u001b[39mfor\u001b[39;49;00m _ \u001b[39min\u001b[39;49;00m ids]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m \u001b[39mprint\u001b[39m(pad_ids)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m ids1 \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(pad_ids[\u001b[39m0\u001b[39m])\n",
|
| 472 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Tokenizer_pipeline.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m1\u001b[39m])\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m \u001b[39mprint\u001b[39m(ids[\u001b[39m2\u001b[39m])\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m pad_ids \u001b[39m=\u001b[39m [tokenizer\u001b[39m.\u001b[39;49mpad_token_id(_) \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m ids]\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=17'>18</a>\u001b[0m \u001b[39mprint\u001b[39m(pad_ids)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Tokenizer_pipeline.ipynb#X23sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m ids1 \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mtensor(pad_ids[\u001b[39m0\u001b[39m])\n",
|
| 473 |
+
"\u001b[1;31mTypeError\u001b[0m: 'int' object is not callable"
|
| 474 |
+
]
|
| 475 |
+
}
|
| 476 |
+
],
|
| 477 |
+
"source": [
|
| 478 |
+
"from transformers import AutoTokenizer\n",
|
| 479 |
+
"\n",
|
| 480 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 481 |
+
"\n",
|
| 482 |
+
"sentences = ['I have been waiting for a hugging face course my whole life.',\n",
|
| 483 |
+
" 'I hate this so much',\n",
|
| 484 |
+
" 'I am not confident.']\n",
|
| 485 |
+
"\n",
|
| 486 |
+
"tokens = [tokenizer.tokenize(sentence) for sentence in sentences]\n",
|
| 487 |
+
"ids = [tokenizer.convert_tokens_to_ids(token) for token in tokens]\n",
|
| 488 |
+
"\n",
|
| 489 |
+
"print(ids)\n",
|
| 490 |
+
"print(ids[0])\n",
|
| 491 |
+
"print(ids[1])\n",
|
| 492 |
+
"print(ids[2])\n",
|
| 493 |
+
"\n",
|
| 494 |
+
"pad_ids = [tokenizer.pad_token_id(_) for _ in ids]\n",
|
| 495 |
+
"print(pad_ids)\n",
|
| 496 |
+
"ids1 = torch.tensor(pad_ids[0])\n",
|
| 497 |
+
"ids2 = torch.tensor(pad_ids[1])\n",
|
| 498 |
+
"ids3 = torch.tensor(pad_ids[2])\n",
|
| 499 |
+
"all_ids = torch.tensor(ids1, ids2, ids3)"
|
| 500 |
+
]
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"cell_type": "code",
|
| 504 |
+
"execution_count": 1,
|
| 505 |
+
"metadata": {},
|
| 506 |
+
"outputs": [
|
| 507 |
+
{
|
| 508 |
+
"name": "stdout",
|
| 509 |
+
"output_type": "stream",
|
| 510 |
+
"text": [
|
| 511 |
+
"['this', 'is', 'me', 'practicing']\n",
|
| 512 |
+
"[2023, 2003, 2033, 12560]\n",
|
| 513 |
+
"['this', 'is', 'me', 'practicing']\n",
|
| 514 |
+
"this is me practicing\n"
|
| 515 |
+
]
|
| 516 |
+
}
|
| 517 |
+
],
|
| 518 |
+
"source": [
|
| 519 |
+
"'''More methods!!!'''\n",
|
| 520 |
+
"from transformers import AutoTokenizer\n",
|
| 521 |
+
"\n",
|
| 522 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 523 |
+
"tokens = tokenizer.tokenize('This is me practicing')\n",
|
| 524 |
+
"print(tokens)\n",
|
| 525 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 526 |
+
"print(input_ids)\n",
|
| 527 |
+
"tokens_2 = tokenizer.convert_ids_to_tokens(input_ids)\n",
|
| 528 |
+
"print(tokens_2)\n",
|
| 529 |
+
"strings = tokenizer.convert_tokens_to_string(tokens)\n",
|
| 530 |
+
"print(strings)"
|
| 531 |
+
]
|
| 532 |
+
},
|
| 533 |
+
{
|
| 534 |
+
"cell_type": "code",
|
| 535 |
+
"execution_count": null,
|
| 536 |
+
"metadata": {},
|
| 537 |
+
"outputs": [],
|
| 538 |
+
"source": []
|
| 539 |
+
}
|
| 540 |
+
],
|
| 541 |
+
"metadata": {
|
| 542 |
+
"kernelspec": {
|
| 543 |
+
"display_name": "Python 3",
|
| 544 |
+
"language": "python",
|
| 545 |
+
"name": "python3"
|
| 546 |
+
},
|
| 547 |
+
"language_info": {
|
| 548 |
+
"codemirror_mode": {
|
| 549 |
+
"name": "ipython",
|
| 550 |
+
"version": 3
|
| 551 |
+
},
|
| 552 |
+
"file_extension": ".py",
|
| 553 |
+
"mimetype": "text/x-python",
|
| 554 |
+
"name": "python",
|
| 555 |
+
"nbconvert_exporter": "python",
|
| 556 |
+
"pygments_lexer": "ipython3",
|
| 557 |
+
"version": "3.11.6"
|
| 558 |
+
}
|
| 559 |
+
},
|
| 560 |
+
"nbformat": 4,
|
| 561 |
+
"nbformat_minor": 2
|
| 562 |
+
}
|
Trainer API 18-11-23.ipynb
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"from datasets import load_dataset\n",
|
| 10 |
+
"from transformers import AutoTokenizer, DataCollatorWithPadding\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 13 |
+
"raw_dataset = load_dataset('glue', 'mrpc')\n",
|
| 14 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"def tokenize_function(example):\n",
|
| 17 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 18 |
+
" truncation=True)\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"tokenized_dataset = raw_dataset.map(tokenize_function, batched=True)\n",
|
| 21 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"# Specify training arguments hyperparameters:\n",
|
| 28 |
+
"from transformers import TrainingArguments\n",
|
| 29 |
+
"training_args = TrainingArguments(\"test-trainer\",\n",
|
| 30 |
+
" per_device_train_batch_size=16,\n",
|
| 31 |
+
" per_device_eval_batch_size=16,\n",
|
| 32 |
+
" num_train_epochs=5,\n",
|
| 33 |
+
" learning_rate=2e-5,\n",
|
| 34 |
+
" weight_decay=0.01)\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"# Create the Trainer instance:\n",
|
| 37 |
+
"from transformers import Trainer\n",
|
| 38 |
+
"trainer = Trainer(\n",
|
| 39 |
+
" model=model,\n",
|
| 40 |
+
" args=training_args,\n",
|
| 41 |
+
" train_dataset=tokenized_dataset['train'],\n",
|
| 42 |
+
" eval_dataset=tokenized_dataset['validation'],\n",
|
| 43 |
+
" data_collator=data_collator,\n",
|
| 44 |
+
" tokenizer=tokenizer\n",
|
| 45 |
+
")\n",
|
| 46 |
+
"trainer.train()\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"'''The results will however be anticlimatic because \n",
|
| 49 |
+
"you will only get a training loss that doesn't tell you how well the\n",
|
| 50 |
+
"model is actually doing..\n",
|
| 51 |
+
"To fix this, you need to specify the evaluation metric'''\n",
|
| 52 |
+
"\n",
|
| 53 |
+
"predictions = trainer.predict(tokenized_dataset['validation'])\n",
|
| 54 |
+
"print(predictions)\n",
|
| 55 |
+
"print(predictions.predictions.shape, predictions.label_ids.shape)\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"# it returns a named tuple with 3 elements: predictions, label_ids, metrics\n",
|
| 58 |
+
"# the predictions are the logits of the model with all the sentences of the dataset\n",
|
| 59 |
+
"# so a numpy array of shape(488 x 2)\n",
|
| 60 |
+
"\n",
|
| 61 |
+
"# to match them with our labels we need to take the maximum logits for each prediction\n",
|
| 62 |
+
"# to know which is the maximum, use the argmax function\n",
|
| 63 |
+
"import numpy as np\n",
|
| 64 |
+
"from datasets import load_metric\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"metric = load_metric('glue', 'mrpc')\n",
|
| 67 |
+
"preds = np.argmax(predictions.predictions, axis=-1)\n",
|
| 68 |
+
"metric.compute(predictions=preds, references=predictions.label_ids)\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"'''We can see that our model did learn something!'''"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "code",
|
| 75 |
+
"execution_count": null,
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"outputs": [],
|
| 78 |
+
"source": [
|
| 79 |
+
"'''To monitor the metrics during training, we need to define a\n",
|
| 80 |
+
"compute metric function as we did above\n",
|
| 81 |
+
"and pass it to the Trainer\n",
|
| 82 |
+
"'''\n",
|
| 83 |
+
"metric = load_metric('glue','mrpc')\n",
|
| 84 |
+
"def compute_metrics(eval_preds):\n",
|
| 85 |
+
" logits, labels = eval_preds\n",
|
| 86 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
| 87 |
+
" return metric.compute(predictions=predictions, references=labels)\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"training_args = TrainingArguments(\"test-trainer\",\n",
|
| 90 |
+
" evaluation_strategy='epoch')\n",
|
| 91 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"trainer = Trainer(\n",
|
| 94 |
+
" model=model,\n",
|
| 95 |
+
" args=training_args,\n",
|
| 96 |
+
" train_dataset=tokenized_dataset['train'],\n",
|
| 97 |
+
" eval_dataset=tokenized_dataset['validation'],\n",
|
| 98 |
+
" data_collator=data_collator,\n",
|
| 99 |
+
" tokenizer=tokenizer,\n",
|
| 100 |
+
" compute_metrics=compute_metrics\n",
|
| 101 |
+
")\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"trainer.train()\n"
|
| 104 |
+
]
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"metadata": {
|
| 108 |
+
"language_info": {
|
| 109 |
+
"name": "python"
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
"nbformat": 4,
|
| 113 |
+
"nbformat_minor": 2
|
| 114 |
+
}
|
Trainer API.ipynb
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"data": {
|
| 10 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 11 |
+
"model_id": "a2e9b1825c9d48a8abb3535087c66b42",
|
| 12 |
+
"version_major": 2,
|
| 13 |
+
"version_minor": 0
|
| 14 |
+
},
|
| 15 |
+
"text/plain": [
|
| 16 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"output_type": "display_data"
|
| 21 |
+
}
|
| 22 |
+
],
|
| 23 |
+
"source": [
|
| 24 |
+
"from transformers import AutoTokenizer, DataCollatorWithPadding\n",
|
| 25 |
+
"from datasets import load_dataset\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 28 |
+
"raw_datasets = load_dataset('glue','mrpc')\n",
|
| 29 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 30 |
+
"\n",
|
| 31 |
+
"def tokenizer_function(example):\n",
|
| 32 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 33 |
+
" truncation=True)\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"tokenized_datasets = raw_datasets.map(tokenizer_function, batched=True)\n",
|
| 36 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"cell_type": "code",
|
| 41 |
+
"execution_count": 2,
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"outputs": [
|
| 44 |
+
{
|
| 45 |
+
"data": {
|
| 46 |
+
"text/plain": [
|
| 47 |
+
"'Note that we do not do the final steps such as:\\nRemoving the unnecessary columns, Renaming the column label->labels,\\nFormating the input_ids to pytorch tensors, etc.\\nThe trainer API will automatically do this for us by analyzing the model\\nsignature!\\n'"
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"execution_count": 2,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"output_type": "execute_result"
|
| 53 |
+
}
|
| 54 |
+
],
|
| 55 |
+
"source": [
|
| 56 |
+
"'''Note we did not apply padding in the preprocessing as we used Dynamic Padding\n",
|
| 57 |
+
"by the DataCollatorWithPadding!'''\n",
|
| 58 |
+
"\"\"\"Note that we do not do the final steps such as:\n",
|
| 59 |
+
"Removing the unnecessary columns, Renaming the column label->labels,\n",
|
| 60 |
+
"Formating the input_ids to pytorch tensors, etc.\n",
|
| 61 |
+
"The trainer API will automatically do this for us by analyzing the model\n",
|
| 62 |
+
"signature!\n",
|
| 63 |
+
"\"\"\""
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"cell_type": "code",
|
| 68 |
+
"execution_count": 3,
|
| 69 |
+
"metadata": {},
|
| 70 |
+
"outputs": [
|
| 71 |
+
{
|
| 72 |
+
"name": "stderr",
|
| 73 |
+
"output_type": "stream",
|
| 74 |
+
"text": [
|
| 75 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 76 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 77 |
+
]
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
"source": [
|
| 81 |
+
"'''The last step is to define our model and\n",
|
| 82 |
+
"prepare some training hyper parameters'''\n",
|
| 83 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 84 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": 4,
|
| 90 |
+
"metadata": {},
|
| 91 |
+
"outputs": [],
|
| 92 |
+
"source": [
|
| 93 |
+
"from transformers import TrainingArguments\n",
|
| 94 |
+
"import torch\n",
|
| 95 |
+
"training_args = TrainingArguments('test-trainer')"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"cell_type": "code",
|
| 100 |
+
"execution_count": 6,
|
| 101 |
+
"metadata": {},
|
| 102 |
+
"outputs": [],
|
| 103 |
+
"source": [
|
| 104 |
+
"from transformers import TrainingArguments\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"# You can also assign all the hyper parameters in the arguments:\n",
|
| 107 |
+
"training_args = TrainingArguments(\n",
|
| 108 |
+
" 'test-trainer',\n",
|
| 109 |
+
" per_device_train_batch_size=16,\n",
|
| 110 |
+
" per_device_eval_batch_size=64,\n",
|
| 111 |
+
" num_train_epochs=5,\n",
|
| 112 |
+
" learning_rate=2e-5,\n",
|
| 113 |
+
" weight_decay=0.01,)\n"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": 7,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [
|
| 121 |
+
{
|
| 122 |
+
"data": {
|
| 123 |
+
"text/plain": [
|
| 124 |
+
"\"It's then very easy to create a trainer and launch a training\""
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
"execution_count": 7,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"output_type": "execute_result"
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"source": [
|
| 133 |
+
"'''It's then very easy to create a trainer and launch a training'''"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"cell_type": "code",
|
| 138 |
+
"execution_count": 8,
|
| 139 |
+
"metadata": {},
|
| 140 |
+
"outputs": [
|
| 141 |
+
{
|
| 142 |
+
"data": {
|
| 143 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 144 |
+
"model_id": "90b1d1a6ce2f4288b6645258ddf83af6",
|
| 145 |
+
"version_major": 2,
|
| 146 |
+
"version_minor": 0
|
| 147 |
+
},
|
| 148 |
+
"text/plain": [
|
| 149 |
+
" 0%| | 0/1150 [00:00<?, ?it/s]"
|
| 150 |
+
]
|
| 151 |
+
},
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"output_type": "display_data"
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"name": "stderr",
|
| 157 |
+
"output_type": "stream",
|
| 158 |
+
"text": [
|
| 159 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"ename": "KeyboardInterrupt",
|
| 164 |
+
"evalue": "",
|
| 165 |
+
"output_type": "error",
|
| 166 |
+
"traceback": [
|
| 167 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 168 |
+
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
| 169 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Trainer API.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m Trainer\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m trainer \u001b[39m=\u001b[39m Trainer(\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m model, \n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m training_args,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m tokenizer\u001b[39m=\u001b[39mtokenizer,\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m )\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#W6sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m trainer\u001b[39m.\u001b[39;49mtrain()\n",
|
| 170 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:1591\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m 1589\u001b[0m hf_hub_utils\u001b[39m.\u001b[39menable_progress_bars()\n\u001b[0;32m 1590\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1591\u001b[0m \u001b[39mreturn\u001b[39;00m inner_training_loop(\n\u001b[0;32m 1592\u001b[0m args\u001b[39m=\u001b[39;49margs,\n\u001b[0;32m 1593\u001b[0m resume_from_checkpoint\u001b[39m=\u001b[39;49mresume_from_checkpoint,\n\u001b[0;32m 1594\u001b[0m trial\u001b[39m=\u001b[39;49mtrial,\n\u001b[0;32m 1595\u001b[0m ignore_keys_for_eval\u001b[39m=\u001b[39;49mignore_keys_for_eval,\n\u001b[0;32m 1596\u001b[0m )\n",
|
| 171 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:1892\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m 1889\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcallback_handler\u001b[39m.\u001b[39mon_step_begin(args, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcontrol)\n\u001b[0;32m 1891\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maccelerator\u001b[39m.\u001b[39maccumulate(model):\n\u001b[1;32m-> 1892\u001b[0m tr_loss_step \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtraining_step(model, inputs)\n\u001b[0;32m 1894\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 1895\u001b[0m args\u001b[39m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m 1896\u001b[0m \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m 1897\u001b[0m \u001b[39mand\u001b[39;00m (torch\u001b[39m.\u001b[39misnan(tr_loss_step) \u001b[39mor\u001b[39;00m torch\u001b[39m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m 1898\u001b[0m ):\n\u001b[0;32m 1899\u001b[0m \u001b[39m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m 1900\u001b[0m tr_loss \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m tr_loss \u001b[39m/\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m+\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mglobal_step \u001b[39m-\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_globalstep_last_logged)\n",
|
| 172 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\trainer.py:2787\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[1;34m(self, model, inputs)\u001b[0m\n\u001b[0;32m 2785\u001b[0m scaled_loss\u001b[39m.\u001b[39mbackward()\n\u001b[0;32m 2786\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 2787\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49maccelerator\u001b[39m.\u001b[39;49mbackward(loss)\n\u001b[0;32m 2789\u001b[0m \u001b[39mreturn\u001b[39;00m loss\u001b[39m.\u001b[39mdetach() \u001b[39m/\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs\u001b[39m.\u001b[39mgradient_accumulation_steps\n",
|
| 173 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\accelerate\\accelerator.py:1989\u001b[0m, in \u001b[0;36mAccelerator.backward\u001b[1;34m(self, loss, **kwargs)\u001b[0m\n\u001b[0;32m 1987\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mscaler\u001b[39m.\u001b[39mscale(loss)\u001b[39m.\u001b[39mbackward(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 1988\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1989\u001b[0m loss\u001b[39m.\u001b[39;49mbackward(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
| 174 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\_tensor.py:492\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m 482\u001b[0m \u001b[39mif\u001b[39;00m has_torch_function_unary(\u001b[39mself\u001b[39m):\n\u001b[0;32m 483\u001b[0m \u001b[39mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m 484\u001b[0m Tensor\u001b[39m.\u001b[39mbackward,\n\u001b[0;32m 485\u001b[0m (\u001b[39mself\u001b[39m,),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 490\u001b[0m inputs\u001b[39m=\u001b[39minputs,\n\u001b[0;32m 491\u001b[0m )\n\u001b[1;32m--> 492\u001b[0m torch\u001b[39m.\u001b[39;49mautograd\u001b[39m.\u001b[39;49mbackward(\n\u001b[0;32m 493\u001b[0m \u001b[39mself\u001b[39;49m, gradient, retain_graph, create_graph, inputs\u001b[39m=\u001b[39;49minputs\n\u001b[0;32m 494\u001b[0m )\n",
|
| 175 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\autograd\\__init__.py:251\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m 246\u001b[0m retain_graph \u001b[39m=\u001b[39m create_graph\n\u001b[0;32m 248\u001b[0m \u001b[39m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[39m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[39m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 251\u001b[0m Variable\u001b[39m.\u001b[39;49m_execution_engine\u001b[39m.\u001b[39;49mrun_backward( \u001b[39m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[0;32m 252\u001b[0m tensors,\n\u001b[0;32m 253\u001b[0m grad_tensors_,\n\u001b[0;32m 254\u001b[0m retain_graph,\n\u001b[0;32m 255\u001b[0m create_graph,\n\u001b[0;32m 256\u001b[0m inputs,\n\u001b[0;32m 257\u001b[0m allow_unreachable\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[0;32m 258\u001b[0m accumulate_grad\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[0;32m 259\u001b[0m )\n",
|
| 176 |
+
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
| 177 |
+
]
|
| 178 |
+
}
|
| 179 |
+
],
|
| 180 |
+
"source": [
|
| 181 |
+
"from transformers import Trainer\n",
|
| 182 |
+
"\n",
|
| 183 |
+
"trainer = Trainer(\n",
|
| 184 |
+
" model, \n",
|
| 185 |
+
" training_args,\n",
|
| 186 |
+
" train_dataset=tokenized_datasets['train'],\n",
|
| 187 |
+
" eval_dataset=tokenized_datasets['validation'],\n",
|
| 188 |
+
" data_collator=data_collator,\n",
|
| 189 |
+
" tokenizer=tokenizer,\n",
|
| 190 |
+
")\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"trainer.train()"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"cell_type": "code",
|
| 197 |
+
"execution_count": 9,
|
| 198 |
+
"metadata": {},
|
| 199 |
+
"outputs": [
|
| 200 |
+
{
|
| 201 |
+
"data": {
|
| 202 |
+
"text/plain": [
|
| 203 |
+
"\"The result will however be anticlimactic as you will only get a training loss\\nwhich doesnt really tell you how well your model is performing\\n\\nThis is because we did not define a metric to evaluate our model on!\\n\\nTo get the metrics we will first gathers the predictions on the whole evaluation set('validation set')\\nusing the trainer.predict method\\n\\nIt will return a namedtuple with the following attributes:\\npredictions, label_ids, metrics, num_samples\\n\\nwe are trying to get the metrics attribute which is empty here!\\n\""
|
| 204 |
+
]
|
| 205 |
+
},
|
| 206 |
+
"execution_count": 9,
|
| 207 |
+
"metadata": {},
|
| 208 |
+
"output_type": "execute_result"
|
| 209 |
+
}
|
| 210 |
+
],
|
| 211 |
+
"source": [
|
| 212 |
+
"'''The result will however be anticlimactic as you will only get a training loss\n",
|
| 213 |
+
"which doesnt really tell you how well your model is performing\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"This is because we did not define a metric to evaluate our model on!\n",
|
| 216 |
+
"\n",
|
| 217 |
+
"To get the metrics we will first gathers the predictions on the whole evaluation set('validation set')\n",
|
| 218 |
+
"using the trainer.predict method\n",
|
| 219 |
+
"\n",
|
| 220 |
+
"It will return a namedtuple with the following attributes:\n",
|
| 221 |
+
"predictions, label_ids, metrics, num_samples\n",
|
| 222 |
+
"\n",
|
| 223 |
+
"we are trying to get the metrics attribute which is empty here!\n",
|
| 224 |
+
"'''"
|
| 225 |
+
]
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"cell_type": "code",
|
| 229 |
+
"execution_count": 10,
|
| 230 |
+
"metadata": {},
|
| 231 |
+
"outputs": [
|
| 232 |
+
{
|
| 233 |
+
"data": {
|
| 234 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 235 |
+
"model_id": "43c2affe978b4dfd904e22f2766afe46",
|
| 236 |
+
"version_major": 2,
|
| 237 |
+
"version_minor": 0
|
| 238 |
+
},
|
| 239 |
+
"text/plain": [
|
| 240 |
+
" 0%| | 0/7 [00:00<?, ?it/s]"
|
| 241 |
+
]
|
| 242 |
+
},
|
| 243 |
+
"metadata": {},
|
| 244 |
+
"output_type": "display_data"
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"ename": "AttributeError",
|
| 248 |
+
"evalue": "'numpy.ndarray' object has no attribute 'label_ids'",
|
| 249 |
+
"output_type": "error",
|
| 250 |
+
"traceback": [
|
| 251 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 252 |
+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
| 253 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\Trainer API.ipynb Cell 9\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#X11sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m predictions \u001b[39m=\u001b[39m trainer\u001b[39m.\u001b[39mpredict(tokenized_datasets[\u001b[39m'\u001b[39m\u001b[39mvalidation\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/Trainer%20API.ipynb#X11sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39mprint\u001b[39m(predictions\u001b[39m.\u001b[39mpredictions\u001b[39m.\u001b[39mshape, predictions\u001b[39m.\u001b[39;49mpredictions\u001b[39m.\u001b[39;49mlabel_ids\u001b[39m.\u001b[39mshape)\n",
|
| 254 |
+
"\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'label_ids'"
|
| 255 |
+
]
|
| 256 |
+
}
|
| 257 |
+
],
|
| 258 |
+
"source": [
|
| 259 |
+
"predictions = trainer.predict(tokenized_datasets['validation'])\n",
|
| 260 |
+
"print(predictions.predictions.shape, predictions.predictions.label_ids.shape)"
|
| 261 |
+
]
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"cell_type": "code",
|
| 265 |
+
"execution_count": null,
|
| 266 |
+
"metadata": {},
|
| 267 |
+
"outputs": [],
|
| 268 |
+
"source": [
|
| 269 |
+
"'''The predictions are the logits of the model for all the sentences in the dataset\n",
|
| 270 |
+
"a numpy array of shape 408 x 2'''"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
{
|
| 274 |
+
"cell_type": "code",
|
| 275 |
+
"execution_count": null,
|
| 276 |
+
"metadata": {},
|
| 277 |
+
"outputs": [],
|
| 278 |
+
"source": [
|
| 279 |
+
"'''To match them with our labels we have to take the maximum\n",
|
| 280 |
+
"logits for each prediction, to know which of the two classes was predicted\n",
|
| 281 |
+
"We do this using the argmax function of numpy\n",
|
| 282 |
+
"Then we can use the metrics from the datasets library\n",
|
| 283 |
+
"it can be loaded as easily as the dataset with the load_metric function\n",
|
| 284 |
+
"and it returns the evaluation metric for the dataset'''"
|
| 285 |
+
]
|
| 286 |
+
},
|
| 287 |
+
{
|
| 288 |
+
"cell_type": "code",
|
| 289 |
+
"execution_count": null,
|
| 290 |
+
"metadata": {},
|
| 291 |
+
"outputs": [],
|
| 292 |
+
"source": [
|
| 293 |
+
"import numpy as np\n",
|
| 294 |
+
"from datasets import load_metric\n",
|
| 295 |
+
"\n",
|
| 296 |
+
"metric = load_metric('glue','mrpc')\n",
|
| 297 |
+
"preds = np.argmax(predictions.predictions, axis=-1)\n",
|
| 298 |
+
"metric.compute(predictions=preds, references=predictions.label_ids)"
|
| 299 |
+
]
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "code",
|
| 303 |
+
"execution_count": null,
|
| 304 |
+
"metadata": {},
|
| 305 |
+
"outputs": [],
|
| 306 |
+
"source": [
|
| 307 |
+
"''''We can see our model did learn something!'''"
|
| 308 |
+
]
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"cell_type": "code",
|
| 312 |
+
"execution_count": null,
|
| 313 |
+
"metadata": {},
|
| 314 |
+
"outputs": [],
|
| 315 |
+
"source": [
|
| 316 |
+
"'''To monitor the evaluation metrics during training\n",
|
| 317 |
+
"we need to define a compute_metrics function\n",
|
| 318 |
+
"as we have just did\n",
|
| 319 |
+
"it takes a namedtuple with predictions and the labels and\n",
|
| 320 |
+
"returns a dictionary with the metrics we want to keep track of\n",
|
| 321 |
+
"'''"
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
{
|
| 325 |
+
"cell_type": "code",
|
| 326 |
+
"execution_count": null,
|
| 327 |
+
"metadata": {},
|
| 328 |
+
"outputs": [],
|
| 329 |
+
"source": [
|
| 330 |
+
"metric = load_metric('glue','mrpc')\n",
|
| 331 |
+
"\n",
|
| 332 |
+
"def compute_metric(eval_preds):\n",
|
| 333 |
+
" logits, labels = eval_preds\n",
|
| 334 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
| 335 |
+
" return metric.compute(predictions=predictions, references=labels)\n"
|
| 336 |
+
]
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"cell_type": "code",
|
| 340 |
+
"execution_count": null,
|
| 341 |
+
"metadata": {},
|
| 342 |
+
"outputs": [],
|
| 343 |
+
"source": [
|
| 344 |
+
"'''By passing the epoch evaluation strategy to the trainer,\n",
|
| 345 |
+
"we tell the trainer to evaluate at the end of every epoch'''"
|
| 346 |
+
]
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"cell_type": "code",
|
| 350 |
+
"execution_count": null,
|
| 351 |
+
"metadata": {},
|
| 352 |
+
"outputs": [],
|
| 353 |
+
"source": [
|
| 354 |
+
"training_args = TrainingArguments(\n",
|
| 355 |
+
" 'test-trainer', evaluation_strategy='epoch'\n",
|
| 356 |
+
")\n",
|
| 357 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"trainer = Trainer(\n",
|
| 360 |
+
" model,\n",
|
| 361 |
+
" training_args,\n",
|
| 362 |
+
" train_dataset=tokenized_datasets['train'],\n",
|
| 363 |
+
" eval_dataset=tokenized_datasets['validation'],\n",
|
| 364 |
+
" data_collator=data_collator,\n",
|
| 365 |
+
" tokenizer=tokenizer,\n",
|
| 366 |
+
" compute_metrics=compute_metrics\n",
|
| 367 |
+
")"
|
| 368 |
+
]
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"cell_type": "code",
|
| 372 |
+
"execution_count": null,
|
| 373 |
+
"metadata": {},
|
| 374 |
+
"outputs": [],
|
| 375 |
+
"source": [
|
| 376 |
+
"trainer.train()"
|
| 377 |
+
]
|
| 378 |
+
}
|
| 379 |
+
],
|
| 380 |
+
"metadata": {
|
| 381 |
+
"kernelspec": {
|
| 382 |
+
"display_name": "Python 3",
|
| 383 |
+
"language": "python",
|
| 384 |
+
"name": "python3"
|
| 385 |
+
},
|
| 386 |
+
"language_info": {
|
| 387 |
+
"codemirror_mode": {
|
| 388 |
+
"name": "ipython",
|
| 389 |
+
"version": 3
|
| 390 |
+
},
|
| 391 |
+
"file_extension": ".py",
|
| 392 |
+
"mimetype": "text/x-python",
|
| 393 |
+
"name": "python",
|
| 394 |
+
"nbconvert_exporter": "python",
|
| 395 |
+
"pygments_lexer": "ipython3",
|
| 396 |
+
"version": "3.11.6"
|
| 397 |
+
}
|
| 398 |
+
},
|
| 399 |
+
"nbformat": 4,
|
| 400 |
+
"nbformat_minor": 2
|
| 401 |
+
}
|
datasets_library.ipynb
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"### Hugging Face Datasets Library\n",
|
| 8 |
+
" You can find the names of the datasets provided by the glue benchmark in the video 22,23\n",
|
| 9 |
+
" \n",
|
| 10 |
+
" https://huggingface.co/docs/datasets/glue.html\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"mrpc is one of the datasets provided by this benchmark to test para-\n",
|
| 13 |
+
"phrases"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": 2,
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"outputs": [
|
| 21 |
+
{
|
| 22 |
+
"data": {
|
| 23 |
+
"text/plain": [
|
| 24 |
+
"DatasetDict({\n",
|
| 25 |
+
" train: Dataset({\n",
|
| 26 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 27 |
+
" num_rows: 3668\n",
|
| 28 |
+
" })\n",
|
| 29 |
+
" validation: Dataset({\n",
|
| 30 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 31 |
+
" num_rows: 408\n",
|
| 32 |
+
" })\n",
|
| 33 |
+
" test: Dataset({\n",
|
| 34 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 35 |
+
" num_rows: 1725\n",
|
| 36 |
+
" })\n",
|
| 37 |
+
"})"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
"execution_count": 2,
|
| 41 |
+
"metadata": {},
|
| 42 |
+
"output_type": "execute_result"
|
| 43 |
+
}
|
| 44 |
+
],
|
| 45 |
+
"source": [
|
| 46 |
+
"from datasets import load_dataset\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"# Load the dataset\n",
|
| 49 |
+
"raw_datasets = load_dataset(\"glue\", \"mrpc\")\n",
|
| 50 |
+
"raw_datasets\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"# The output is a DatasetDict object, which contains each split of the Dataset."
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": 3,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [
|
| 60 |
+
{
|
| 61 |
+
"data": {
|
| 62 |
+
"text/plain": [
|
| 63 |
+
"Dataset({\n",
|
| 64 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx'],\n",
|
| 65 |
+
" num_rows: 3668\n",
|
| 66 |
+
"})"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
"execution_count": 3,
|
| 70 |
+
"metadata": {},
|
| 71 |
+
"output_type": "execute_result"
|
| 72 |
+
}
|
| 73 |
+
],
|
| 74 |
+
"source": [
|
| 75 |
+
"# Connect to each split by indexing!\n",
|
| 76 |
+
"raw_datasets['train']"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 4,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"# we can see the number of training examples in the dataset as num_rows: 3668 in the above output~"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"execution_count": 5,
|
| 91 |
+
"metadata": {},
|
| 92 |
+
"outputs": [
|
| 93 |
+
{
|
| 94 |
+
"data": {
|
| 95 |
+
"text/plain": [
|
| 96 |
+
"{'sentence1': 'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
|
| 97 |
+
" 'sentence2': 'Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
|
| 98 |
+
" 'label': 1,\n",
|
| 99 |
+
" 'idx': 0}"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
"execution_count": 5,
|
| 103 |
+
"metadata": {},
|
| 104 |
+
"output_type": "execute_result"
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"source": [
|
| 108 |
+
"# Access a given element by it's index:\n",
|
| 109 |
+
"raw_datasets['train'][0]"
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"cell_type": "code",
|
| 114 |
+
"execution_count": 7,
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"outputs": [
|
| 117 |
+
{
|
| 118 |
+
"data": {
|
| 119 |
+
"text/plain": [
|
| 120 |
+
"{'sentence1': ['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
|
| 121 |
+
" \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\",\n",
|
| 122 |
+
" 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .',\n",
|
| 123 |
+
" 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .',\n",
|
| 124 |
+
" 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .'],\n",
|
| 125 |
+
" 'sentence2': ['Referring to him as only \" the witness \" , Amrozi accused his brother of deliberately distorting his evidence .',\n",
|
| 126 |
+
" \"Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .\",\n",
|
| 127 |
+
" \"On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .\",\n",
|
| 128 |
+
" 'Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .',\n",
|
| 129 |
+
" 'PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .'],\n",
|
| 130 |
+
" 'label': [1, 0, 1, 0, 1],\n",
|
| 131 |
+
" 'idx': [0, 1, 2, 3, 4]}"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
"execution_count": 7,
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"output_type": "execute_result"
|
| 137 |
+
}
|
| 138 |
+
],
|
| 139 |
+
"source": [
|
| 140 |
+
"# Access a slice of your dataset:\n",
|
| 141 |
+
"raw_datasets['train'][:5]"
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"cell_type": "code",
|
| 146 |
+
"execution_count": 8,
|
| 147 |
+
"metadata": {},
|
| 148 |
+
"outputs": [
|
| 149 |
+
{
|
| 150 |
+
"data": {
|
| 151 |
+
"text/plain": [
|
| 152 |
+
"'Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .'"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
"execution_count": 8,
|
| 156 |
+
"metadata": {},
|
| 157 |
+
"output_type": "execute_result"
|
| 158 |
+
}
|
| 159 |
+
],
|
| 160 |
+
"source": [
|
| 161 |
+
"# Access the sentence1 of the first element:\n",
|
| 162 |
+
"raw_datasets['train'][0]['sentence1']"
|
| 163 |
+
]
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"cell_type": "code",
|
| 167 |
+
"execution_count": 9,
|
| 168 |
+
"metadata": {},
|
| 169 |
+
"outputs": [
|
| 170 |
+
{
|
| 171 |
+
"data": {
|
| 172 |
+
"text/plain": [
|
| 173 |
+
"['Amrozi accused his brother , whom he called \" the witness \" , of deliberately distorting his evidence .',\n",
|
| 174 |
+
" \"Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .\",\n",
|
| 175 |
+
" 'They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .',\n",
|
| 176 |
+
" 'Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .',\n",
|
| 177 |
+
" 'The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .']"
|
| 178 |
+
]
|
| 179 |
+
},
|
| 180 |
+
"execution_count": 9,
|
| 181 |
+
"metadata": {},
|
| 182 |
+
"output_type": "execute_result"
|
| 183 |
+
}
|
| 184 |
+
],
|
| 185 |
+
"source": [
|
| 186 |
+
"# Access the first 5 sentences of sentence1\n",
|
| 187 |
+
"raw_datasets['train'][:5]['sentence1']"
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"cell_type": "code",
|
| 192 |
+
"execution_count": 10,
|
| 193 |
+
"metadata": {},
|
| 194 |
+
"outputs": [
|
| 195 |
+
{
|
| 196 |
+
"data": {
|
| 197 |
+
"text/plain": [
|
| 198 |
+
"{'sentence1': Value(dtype='string', id=None),\n",
|
| 199 |
+
" 'sentence2': Value(dtype='string', id=None),\n",
|
| 200 |
+
" 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None),\n",
|
| 201 |
+
" 'idx': Value(dtype='int32', id=None)}"
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
"execution_count": 10,
|
| 205 |
+
"metadata": {},
|
| 206 |
+
"output_type": "execute_result"
|
| 207 |
+
}
|
| 208 |
+
],
|
| 209 |
+
"source": [
|
| 210 |
+
"# Use the features attribute to see the information your dataset contains:\n",
|
| 211 |
+
"raw_datasets['train'].features"
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"cell_type": "code",
|
| 216 |
+
"execution_count": 11,
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [
|
| 219 |
+
{
|
| 220 |
+
"data": {
|
| 221 |
+
"text/plain": [
|
| 222 |
+
"'Remember features are the input variables to your model.'"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
"execution_count": 11,
|
| 226 |
+
"metadata": {},
|
| 227 |
+
"output_type": "execute_result"
|
| 228 |
+
}
|
| 229 |
+
],
|
| 230 |
+
"source": [
|
| 231 |
+
"'''Remember features are the input variables to your model.'''"
|
| 232 |
+
]
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"cell_type": "code",
|
| 236 |
+
"execution_count": 14,
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"outputs": [
|
| 239 |
+
{
|
| 240 |
+
"data": {
|
| 241 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 242 |
+
"model_id": "390d148b78f84283b5c3273c08fca389",
|
| 243 |
+
"version_major": 2,
|
| 244 |
+
"version_minor": 0
|
| 245 |
+
},
|
| 246 |
+
"text/plain": [
|
| 247 |
+
"Map: 0%| | 0/3668 [00:00<?, ? examples/s]"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
"metadata": {},
|
| 251 |
+
"output_type": "display_data"
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"data": {
|
| 255 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 256 |
+
"model_id": "4be5e99804ce4588aefb566219523f97",
|
| 257 |
+
"version_major": 2,
|
| 258 |
+
"version_minor": 0
|
| 259 |
+
},
|
| 260 |
+
"text/plain": [
|
| 261 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 262 |
+
]
|
| 263 |
+
},
|
| 264 |
+
"metadata": {},
|
| 265 |
+
"output_type": "display_data"
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"data": {
|
| 269 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 270 |
+
"model_id": "1bbbf398ada1455bb9b726a462e4b7e5",
|
| 271 |
+
"version_major": 2,
|
| 272 |
+
"version_minor": 0
|
| 273 |
+
},
|
| 274 |
+
"text/plain": [
|
| 275 |
+
"Map: 0%| | 0/1725 [00:00<?, ? examples/s]"
|
| 276 |
+
]
|
| 277 |
+
},
|
| 278 |
+
"metadata": {},
|
| 279 |
+
"output_type": "display_data"
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"name": "stdout",
|
| 283 |
+
"output_type": "stream",
|
| 284 |
+
"text": [
|
| 285 |
+
"{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}\n"
|
| 286 |
+
]
|
| 287 |
+
}
|
| 288 |
+
],
|
| 289 |
+
"source": [
|
| 290 |
+
"# To preprocess all the elements of the dataset we need to tokenize them!\n",
|
| 291 |
+
"\n",
|
| 292 |
+
"from transformers import AutoTokenizer\n",
|
| 293 |
+
"\n",
|
| 294 |
+
"checkpoint = 'bert-base-cased'\n",
|
| 295 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 296 |
+
"\n",
|
| 297 |
+
"def tokenize_function(example):\n",
|
| 298 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 299 |
+
" padding='max_length',\n",
|
| 300 |
+
" truncation=True,\n",
|
| 301 |
+
" max_length=128)\n",
|
| 302 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function)\n",
|
| 303 |
+
"print(tokenized_datasets.column_names)"
|
| 304 |
+
]
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"cell_type": "code",
|
| 308 |
+
"execution_count": 15,
|
| 309 |
+
"metadata": {},
|
| 310 |
+
"outputs": [
|
| 311 |
+
{
|
| 312 |
+
"data": {
|
| 313 |
+
"text/plain": [
|
| 314 |
+
"'To speed up tokenization, the map method uses multiprocessing.\\nYou could also set the batched=True'"
|
| 315 |
+
]
|
| 316 |
+
},
|
| 317 |
+
"execution_count": 15,
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"output_type": "execute_result"
|
| 320 |
+
}
|
| 321 |
+
],
|
| 322 |
+
"source": [
|
| 323 |
+
"'''To speed up tokenization, the map method uses multiprocessing.\n",
|
| 324 |
+
"You could also set the batched=True'''"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": 17,
|
| 330 |
+
"metadata": {},
|
| 331 |
+
"outputs": [
|
| 332 |
+
{
|
| 333 |
+
"data": {
|
| 334 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 335 |
+
"model_id": "29966e62f612498d8d2d4c54d40467e3",
|
| 336 |
+
"version_major": 2,
|
| 337 |
+
"version_minor": 0
|
| 338 |
+
},
|
| 339 |
+
"text/plain": [
|
| 340 |
+
"Map: 0%| | 0/408 [00:00<?, ? examples/s]"
|
| 341 |
+
]
|
| 342 |
+
},
|
| 343 |
+
"metadata": {},
|
| 344 |
+
"output_type": "display_data"
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"data": {
|
| 348 |
+
"text/plain": [
|
| 349 |
+
"DatasetDict({\n",
|
| 350 |
+
" train: Dataset({\n",
|
| 351 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 352 |
+
" num_rows: 3668\n",
|
| 353 |
+
" })\n",
|
| 354 |
+
" validation: Dataset({\n",
|
| 355 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 356 |
+
" num_rows: 408\n",
|
| 357 |
+
" })\n",
|
| 358 |
+
" test: Dataset({\n",
|
| 359 |
+
" features: ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 360 |
+
" num_rows: 1725\n",
|
| 361 |
+
" })\n",
|
| 362 |
+
"})"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
"execution_count": 17,
|
| 366 |
+
"metadata": {},
|
| 367 |
+
"output_type": "execute_result"
|
| 368 |
+
}
|
| 369 |
+
],
|
| 370 |
+
"source": [
|
| 371 |
+
"from transformers import AutoTokenizer\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"checkpoint = 'bert-base-cased'\n",
|
| 374 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"def tokenize_function(example):\n",
|
| 377 |
+
" return tokenizer(example['sentence1'], example['sentence2'],\n",
|
| 378 |
+
" padding='max_length',\n",
|
| 379 |
+
" truncation=True,\n",
|
| 380 |
+
" max_length=128)\n",
|
| 381 |
+
" \n",
|
| 382 |
+
"from datasets import load_dataset\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"raw_datasets = load_dataset(\"glue\",\"mrpc\") \n",
|
| 385 |
+
"tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
|
| 386 |
+
"tokenized_datasets"
|
| 387 |
+
]
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"cell_type": "code",
|
| 391 |
+
"execution_count": 18,
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [
|
| 394 |
+
{
|
| 395 |
+
"data": {
|
| 396 |
+
"text/plain": [
|
| 397 |
+
"\"Once done, we are almost ready for training!\\nJust remove the columns we don't need anymore by the remove columns function\\nRename the column label to labels\\nand use the format torch\""
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
"execution_count": 18,
|
| 401 |
+
"metadata": {},
|
| 402 |
+
"output_type": "execute_result"
|
| 403 |
+
}
|
| 404 |
+
],
|
| 405 |
+
"source": [
|
| 406 |
+
"'''Once done, we are almost ready for training!\n",
|
| 407 |
+
"Just remove the columns we don't need anymore by the remove columns function\n",
|
| 408 |
+
"Rename the column label to labels\n",
|
| 409 |
+
"and use the format torch'''"
|
| 410 |
+
]
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"cell_type": "code",
|
| 414 |
+
"execution_count": 20,
|
| 415 |
+
"metadata": {},
|
| 416 |
+
"outputs": [
|
| 417 |
+
{
|
| 418 |
+
"data": {
|
| 419 |
+
"text/plain": [
|
| 420 |
+
"Dataset({\n",
|
| 421 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 422 |
+
" num_rows: 3668\n",
|
| 423 |
+
"})"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
"execution_count": 20,
|
| 427 |
+
"metadata": {},
|
| 428 |
+
"output_type": "execute_result"
|
| 429 |
+
}
|
| 430 |
+
],
|
| 431 |
+
"source": [
|
| 432 |
+
"tokenized_datasets = tokenized_datasets.remove_columns([\"idx\",\"sentence1\",\"sentence2\"])\n",
|
| 433 |
+
"tokenized_datasets = tokenized_datasets.rename_column(\"label\",\"labels\")\n",
|
| 434 |
+
"tokenized_datasets = tokenized_datasets.with_format(\"torch\") # The format could be torch/tensorflow/numpy~\n",
|
| 435 |
+
"tokenized_datasets[\"train\"]"
|
| 436 |
+
]
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"cell_type": "code",
|
| 440 |
+
"execution_count": 21,
|
| 441 |
+
"metadata": {},
|
| 442 |
+
"outputs": [
|
| 443 |
+
{
|
| 444 |
+
"data": {
|
| 445 |
+
"text/plain": [
|
| 446 |
+
"'If needed we can also generate a short sample of the dataset using the select method!'"
|
| 447 |
+
]
|
| 448 |
+
},
|
| 449 |
+
"execution_count": 21,
|
| 450 |
+
"metadata": {},
|
| 451 |
+
"output_type": "execute_result"
|
| 452 |
+
}
|
| 453 |
+
],
|
| 454 |
+
"source": [
|
| 455 |
+
"'''If needed we can also generate a short sample of the dataset using the select method!'''\n"
|
| 456 |
+
]
|
| 457 |
+
},
|
| 458 |
+
{
|
| 459 |
+
"cell_type": "code",
|
| 460 |
+
"execution_count": 23,
|
| 461 |
+
"metadata": {},
|
| 462 |
+
"outputs": [
|
| 463 |
+
{
|
| 464 |
+
"data": {
|
| 465 |
+
"text/plain": [
|
| 466 |
+
"Dataset({\n",
|
| 467 |
+
" features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],\n",
|
| 468 |
+
" num_rows: 10\n",
|
| 469 |
+
"})"
|
| 470 |
+
]
|
| 471 |
+
},
|
| 472 |
+
"execution_count": 23,
|
| 473 |
+
"metadata": {},
|
| 474 |
+
"output_type": "execute_result"
|
| 475 |
+
}
|
| 476 |
+
],
|
| 477 |
+
"source": [
|
| 478 |
+
"small_train_dataset = tokenized_datasets[\"train\"].select(range(10))\n",
|
| 479 |
+
"small_train_dataset"
|
| 480 |
+
]
|
| 481 |
+
},
|
| 482 |
+
{
|
| 483 |
+
"cell_type": "code",
|
| 484 |
+
"execution_count": null,
|
| 485 |
+
"metadata": {},
|
| 486 |
+
"outputs": [],
|
| 487 |
+
"source": [
|
| 488 |
+
"'''As you can see, the number of training examples has now reduced to 10'''"
|
| 489 |
+
]
|
| 490 |
+
}
|
| 491 |
+
],
|
| 492 |
+
"metadata": {
|
| 493 |
+
"kernelspec": {
|
| 494 |
+
"display_name": "Python 3",
|
| 495 |
+
"language": "python",
|
| 496 |
+
"name": "python3"
|
| 497 |
+
},
|
| 498 |
+
"language_info": {
|
| 499 |
+
"codemirror_mode": {
|
| 500 |
+
"name": "ipython",
|
| 501 |
+
"version": 3
|
| 502 |
+
},
|
| 503 |
+
"file_extension": ".py",
|
| 504 |
+
"mimetype": "text/x-python",
|
| 505 |
+
"name": "python",
|
| 506 |
+
"nbconvert_exporter": "python",
|
| 507 |
+
"pygments_lexer": "ipython3",
|
| 508 |
+
"version": "3.11.6"
|
| 509 |
+
}
|
| 510 |
+
},
|
| 511 |
+
"nbformat": 4,
|
| 512 |
+
"nbformat_minor": 2
|
| 513 |
+
}
|
pipeline.ipynb
ADDED
|
@@ -0,0 +1,450 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 5,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"# ! pip install tensorflow"
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": 6,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"# ! pip install torch"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "code",
|
| 23 |
+
"execution_count": 8,
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"source": [
|
| 27 |
+
"# ! pip install transformers"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"cell_type": "markdown",
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"source": [
|
| 34 |
+
"# Pipeline:\n",
|
| 35 |
+
"The pipeline function is the most high level api in the transformers library.\n",
|
| 36 |
+
"The pipeline function returns an end-to-end object that performs an NLP task on one or several texts.\n",
|
| 37 |
+
"A pipeline includes all the necessary pre-processing as the model does not expect texts but numbers, it feeds the numbers to the model and the post-processing to make the output human readable."
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "markdown",
|
| 42 |
+
"metadata": {},
|
| 43 |
+
"source": [
|
| 44 |
+
"# Sentiment Analysis Pipeline"
|
| 45 |
+
]
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"cell_type": "code",
|
| 49 |
+
"execution_count": 7,
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"outputs": [
|
| 52 |
+
{
|
| 53 |
+
"name": "stderr",
|
| 54 |
+
"output_type": "stream",
|
| 55 |
+
"text": [
|
| 56 |
+
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']\n",
|
| 57 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"name": "stdout",
|
| 62 |
+
"output_type": "stream",
|
| 63 |
+
"text": [
|
| 64 |
+
"[{'label': 'LABEL_0', 'score': 0.06559786200523376}]\n",
|
| 65 |
+
"[{'label': 'LABEL_0', 'score': 0.12948733568191528}, {'label': 'LABEL_0', 'score': 0.12888683378696442}]\n"
|
| 66 |
+
]
|
| 67 |
+
}
|
| 68 |
+
],
|
| 69 |
+
"source": [
|
| 70 |
+
"from transformers import pipeline\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"classifier = pipeline('sentiment-analysis', model='distilgpt2')\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"# pass single text:\n",
|
| 75 |
+
"res = classifier(\"I've been waiting for a Huggingface course\")\n",
|
| 76 |
+
"print(res)\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"# Pass multiple texts:\n",
|
| 79 |
+
"res = classifier(['I love you', 'I hate you'])\n",
|
| 80 |
+
"print(res)"
|
| 81 |
+
]
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"cell_type": "markdown",
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"source": [
|
| 87 |
+
"# Zero Shot Classification Pipeline\n",
|
| 88 |
+
"Helps to classify what the sentence or topic is about "
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"cell_type": "code",
|
| 93 |
+
"execution_count": 6,
|
| 94 |
+
"metadata": {},
|
| 95 |
+
"outputs": [
|
| 96 |
+
{
|
| 97 |
+
"name": "stderr",
|
| 98 |
+
"output_type": "stream",
|
| 99 |
+
"text": [
|
| 100 |
+
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at distilgpt2 and are newly initialized: ['score.weight']\n",
|
| 101 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 102 |
+
"Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.\n",
|
| 103 |
+
"Tokenizer was not supporting padding necessary for zero-shot, attempting to use `pad_token=eos_token`\n"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"data": {
|
| 108 |
+
"text/plain": [
|
| 109 |
+
"{'sequence': 'This is a course about the Transformers library',\n",
|
| 110 |
+
" 'labels': ['education', 'ploitics', 'business'],\n",
|
| 111 |
+
" 'scores': [0.36338528990745544, 0.3443466126918793, 0.29226812720298767]}"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
"execution_count": 6,
|
| 115 |
+
"metadata": {},
|
| 116 |
+
"output_type": "execute_result"
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"source": [
|
| 120 |
+
"from transformers import pipeline \n",
|
| 121 |
+
"\n",
|
| 122 |
+
"classifier = pipeline('zero-shot-classification', model='distilgpt2')\n",
|
| 123 |
+
"classifier('This is a course about the Transformers library',\n",
|
| 124 |
+
" candidate_labels=['education', 'ploitics', 'business'])"
|
| 125 |
+
]
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"cell_type": "markdown",
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"source": [
|
| 131 |
+
"# Text Generation pipeline:"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"cell_type": "markdown",
|
| 136 |
+
"metadata": {},
|
| 137 |
+
"source": [
|
| 138 |
+
"will auto complete a given prompt. \n",
|
| 139 |
+
"Output is generated with a bit of randomness so it changes when you run it each time."
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": 4,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [
|
| 147 |
+
{
|
| 148 |
+
"name": "stderr",
|
| 149 |
+
"output_type": "stream",
|
| 150 |
+
"text": [
|
| 151 |
+
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"data": {
|
| 156 |
+
"text/plain": [
|
| 157 |
+
"[{'generated_text': 'In this course we will teach you how to play and take your skill set as a starter, how to play, and as a player. I will'},\n",
|
| 158 |
+
" {'generated_text': 'In this course we will teach you how to convert to Java as your main operating system and write to your friends through our website at Google+.\\n\\n'}]"
|
| 159 |
+
]
|
| 160 |
+
},
|
| 161 |
+
"execution_count": 4,
|
| 162 |
+
"metadata": {},
|
| 163 |
+
"output_type": "execute_result"
|
| 164 |
+
}
|
| 165 |
+
],
|
| 166 |
+
"source": [
|
| 167 |
+
"from transformers import pipeline\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"generator = pipeline('text-generation', model='distilgpt2')\n",
|
| 170 |
+
"generator('In this course we will teach you how to',\n",
|
| 171 |
+
" max_length=30,\n",
|
| 172 |
+
" num_return_sequences=2)"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": 8,
|
| 178 |
+
"metadata": {},
|
| 179 |
+
"outputs": [
|
| 180 |
+
{
|
| 181 |
+
"name": "stderr",
|
| 182 |
+
"output_type": "stream",
|
| 183 |
+
"text": [
|
| 184 |
+
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"data": {
|
| 189 |
+
"text/plain": [
|
| 190 |
+
"[{'generated_text': 'in this course we will teach you how to play with a real life experience. It will be a lot more about the importance of understanding and building a'},\n",
|
| 191 |
+
" {'generated_text': 'in this course we will teach you how to achieve the objectives of the program. We will teach you how to achieve the objectives of the program. We'}]"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
"execution_count": 8,
|
| 195 |
+
"metadata": {},
|
| 196 |
+
"output_type": "execute_result"
|
| 197 |
+
}
|
| 198 |
+
],
|
| 199 |
+
"source": [
|
| 200 |
+
"from transformers import pipeline\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"generator = pipeline('text-generation', model='distilgpt2')\n",
|
| 203 |
+
"generator('in this course we will teach you how to',\n",
|
| 204 |
+
" max_length=30,\n",
|
| 205 |
+
" num_return_sequences=2)"
|
| 206 |
+
]
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"cell_type": "markdown",
|
| 210 |
+
"metadata": {},
|
| 211 |
+
"source": [
|
| 212 |
+
"The text-generation pipeline is used with the model distilgpt2 above"
|
| 213 |
+
]
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"cell_type": "markdown",
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"source": [
|
| 219 |
+
"# Fill Mask Pipeline\n",
|
| 220 |
+
"This pipeline is a pertraining objective of BERT. This is guess masked words like fill in the blanks. \n",
|
| 221 |
+
"In this case we ask the pipeline to generate the two most likely words in the mask using top_k "
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
{
|
| 225 |
+
"cell_type": "code",
|
| 226 |
+
"execution_count": 2,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"outputs": [
|
| 229 |
+
{
|
| 230 |
+
"name": "stderr",
|
| 231 |
+
"output_type": "stream",
|
| 232 |
+
"text": [
|
| 233 |
+
"Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.bias']\n",
|
| 234 |
+
"- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
| 235 |
+
"- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"data": {
|
| 240 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 241 |
+
"model_id": "96e2afa0d0574883abf3b4e6a86ccaca",
|
| 242 |
+
"version_major": 2,
|
| 243 |
+
"version_minor": 0
|
| 244 |
+
},
|
| 245 |
+
"text/plain": [
|
| 246 |
+
"Downloading (…)okenizer_config.json: 0%| | 0.00/29.0 [00:00<?, ?B/s]"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
"metadata": {},
|
| 250 |
+
"output_type": "display_data"
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"data": {
|
| 254 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 255 |
+
"model_id": "e4bfb78bd48b446aab7bcc7d1c2064fd",
|
| 256 |
+
"version_major": 2,
|
| 257 |
+
"version_minor": 0
|
| 258 |
+
},
|
| 259 |
+
"text/plain": [
|
| 260 |
+
"Downloading (…)solve/main/vocab.txt: 0%| | 0.00/213k [00:00<?, ?B/s]"
|
| 261 |
+
]
|
| 262 |
+
},
|
| 263 |
+
"metadata": {},
|
| 264 |
+
"output_type": "display_data"
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"data": {
|
| 268 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 269 |
+
"model_id": "f3554d73b7914b8d824c843c347343ef",
|
| 270 |
+
"version_major": 2,
|
| 271 |
+
"version_minor": 0
|
| 272 |
+
},
|
| 273 |
+
"text/plain": [
|
| 274 |
+
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/436k [00:00<?, ?B/s]"
|
| 275 |
+
]
|
| 276 |
+
},
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"output_type": "display_data"
|
| 279 |
+
},
|
| 280 |
+
{
|
| 281 |
+
"data": {
|
| 282 |
+
"text/plain": [
|
| 283 |
+
"[{'score': 0.2596316933631897,\n",
|
| 284 |
+
" 'token': 1648,\n",
|
| 285 |
+
" 'token_str': 'role',\n",
|
| 286 |
+
" 'sequence': 'This course will teach you all about role models.'},\n",
|
| 287 |
+
" {'score': 0.09427264332771301,\n",
|
| 288 |
+
" 'token': 1103,\n",
|
| 289 |
+
" 'token_str': 'the',\n",
|
| 290 |
+
" 'sequence': 'This course will teach you all about the models.'}]"
|
| 291 |
+
]
|
| 292 |
+
},
|
| 293 |
+
"execution_count": 2,
|
| 294 |
+
"metadata": {},
|
| 295 |
+
"output_type": "execute_result"
|
| 296 |
+
}
|
| 297 |
+
],
|
| 298 |
+
"source": [
|
| 299 |
+
"from transformers import pipeline\n",
|
| 300 |
+
"\n",
|
| 301 |
+
"unmasker = pipeline('fill-mask', model='bert-base-cased')\n",
|
| 302 |
+
"unmasker('This course will teach you all about [MASK] models.', top_k=2)\n"
|
| 303 |
+
]
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"cell_type": "markdown",
|
| 307 |
+
"metadata": {},
|
| 308 |
+
"source": [
|
| 309 |
+
"# Text Classifier Pipeline:\n",
|
| 310 |
+
"Name Entity Recognition Pipeline within text classifier pipeline which helps identify entities in a sentence."
|
| 311 |
+
]
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"cell_type": "code",
|
| 315 |
+
"execution_count": null,
|
| 316 |
+
"metadata": {},
|
| 317 |
+
"outputs": [],
|
| 318 |
+
"source": [
|
| 319 |
+
"from transformers import pipeline\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"ner = pipeline('ner', grouped_entities=True, model='distilgpt2')\n",
|
| 322 |
+
"ner('My name is Abdullah and I work at Hackules in Bangladesh')"
|
| 323 |
+
]
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"cell_type": "markdown",
|
| 327 |
+
"metadata": {},
|
| 328 |
+
"source": [
|
| 329 |
+
"# Extractive Question Answering\n",
|
| 330 |
+
"Another task available with pipeline api, is the extractive question answering.\n",
|
| 331 |
+
"Providing a context and a question the model will identify a span of text in the context containing the answer to the question\n",
|
| 332 |
+
"The model will classify whether the sentence is a question or an answer."
|
| 333 |
+
]
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"cell_type": "code",
|
| 337 |
+
"execution_count": null,
|
| 338 |
+
"metadata": {},
|
| 339 |
+
"outputs": [],
|
| 340 |
+
"source": [
|
| 341 |
+
"from transformers import pipeline\n",
|
| 342 |
+
"\n",
|
| 343 |
+
"question_answerer = pipeline('question-answering', model='distilgpt2')\n",
|
| 344 |
+
"question_answerer(\n",
|
| 345 |
+
" question='Where do I work?',\n",
|
| 346 |
+
" context='My name is Abdullah and I work at Hackules in Bangladesh'\n",
|
| 347 |
+
")"
|
| 348 |
+
]
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"cell_type": "markdown",
|
| 352 |
+
"metadata": {},
|
| 353 |
+
"source": [
|
| 354 |
+
"# Summarization Pipeline:\n",
|
| 355 |
+
"Getting short summaries with articles."
|
| 356 |
+
]
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"cell_type": "code",
|
| 360 |
+
"execution_count": null,
|
| 361 |
+
"metadata": {},
|
| 362 |
+
"outputs": [],
|
| 363 |
+
"source": [
|
| 364 |
+
"from transformers import pipeline\n",
|
| 365 |
+
"\n",
|
| 366 |
+
"summarizer = pipeline('summarization', model='distilgpt2')\n",
|
| 367 |
+
"summarizer('''\n",
|
| 368 |
+
"It was the 1st of November yesterday, and I had decided to grind my research paper to completion. I failed at the task but I did make some progress. I also discovered that, the conference papers can't be more than 10 pages long and too long conference papers get rejected. I really have a lot to learn about conferences and paper submissions but I don't have anybody to guide me through the steps. I am not complaining, I am just saying that it's going to take me a while but I will get there in shaa Allah!\n",
|
| 369 |
+
"''')"
|
| 370 |
+
]
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"cell_type": "markdown",
|
| 374 |
+
"metadata": {},
|
| 375 |
+
"source": [
|
| 376 |
+
"# Translation Pipeline:\n",
|
| 377 |
+
"The last task by the pipeline API is translation. "
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"cell_type": "code",
|
| 382 |
+
"execution_count": 16,
|
| 383 |
+
"metadata": {},
|
| 384 |
+
"outputs": [
|
| 385 |
+
{
|
| 386 |
+
"ename": "ValueError",
|
| 387 |
+
"evalue": "This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer.",
|
| 388 |
+
"output_type": "error",
|
| 389 |
+
"traceback": [
|
| 390 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 391 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
| 392 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\pipeline.ipynb Cell 23\u001b[0m line \u001b[0;36m4\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m \u001b[39m# ! pip install sentencepiece\u001b[39;00m\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msentencepiece\u001b[39;00m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m translator \u001b[39m=\u001b[39m pipeline(\u001b[39m'\u001b[39;49m\u001b[39mtranslation\u001b[39;49m\u001b[39m'\u001b[39;49m, model\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mHelsinki-NLP/opus-mt-fr-en\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/pipeline.ipynb#X54sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m translator(\u001b[39m'\u001b[39m\u001b[39mCe cours est produit par Hugging Face.\u001b[39m\u001b[39m'\u001b[39m)\n",
|
| 393 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\pipelines\\__init__.py:931\u001b[0m, in \u001b[0;36mpipeline\u001b[1;34m(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[0;32m 928\u001b[0m tokenizer_kwargs \u001b[39m=\u001b[39m model_kwargs\u001b[39m.\u001b[39mcopy()\n\u001b[0;32m 929\u001b[0m tokenizer_kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mtorch_dtype\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m--> 931\u001b[0m tokenizer \u001b[39m=\u001b[39m AutoTokenizer\u001b[39m.\u001b[39;49mfrom_pretrained(\n\u001b[0;32m 932\u001b[0m tokenizer_identifier, use_fast\u001b[39m=\u001b[39;49muse_fast, _from_pipeline\u001b[39m=\u001b[39;49mtask, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mhub_kwargs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtokenizer_kwargs\n\u001b[0;32m 933\u001b[0m )\n\u001b[0;32m 935\u001b[0m \u001b[39mif\u001b[39;00m load_image_processor:\n\u001b[0;32m 936\u001b[0m \u001b[39m# Try to infer image processor from model or config name (if provided as str)\u001b[39;00m\n\u001b[0;32m 937\u001b[0m \u001b[39mif\u001b[39;00m image_processor \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
|
| 394 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\models\\auto\\tokenization_auto.py:774\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[1;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[0;32m 772\u001b[0m \u001b[39mreturn\u001b[39;00m tokenizer_class_py\u001b[39m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[39m*\u001b[39minputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 773\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 774\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 775\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mThis tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 776\u001b[0m \u001b[39m\"\u001b[39m\u001b[39min order to use this tokenizer.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 777\u001b[0m )\n\u001b[0;32m 779\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 780\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mUnrecognized configuration class \u001b[39m\u001b[39m{\u001b[39;00mconfig\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m to build an AutoTokenizer.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 781\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mModel type should be one of \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m, \u001b[39m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39mjoin(c\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m\u001b[39m \u001b[39m\u001b[39mfor\u001b[39;00m\u001b[39m \u001b[39mc\u001b[39m \u001b[39m\u001b[39min\u001b[39;00m\u001b[39m \u001b[39mTOKENIZER_MAPPING\u001b[39m.\u001b[39mkeys())\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 782\u001b[0m )\n",
|
| 395 |
+
"\u001b[1;31mValueError\u001b[0m: This tokenizer cannot be instantiated. Please make sure you have `sentencepiece` installed in order to use this tokenizer."
|
| 396 |
+
]
|
| 397 |
+
}
|
| 398 |
+
],
|
| 399 |
+
"source": [
|
| 400 |
+
"from transformers import pipeline\n",
|
| 401 |
+
"# ! pip install sentencepiece\n",
|
| 402 |
+
"import sentencepiece\n",
|
| 403 |
+
"translator = pipeline('translation', model='Helsinki-NLP/opus-mt-fr-en')\n",
|
| 404 |
+
"translator('Ce cours est produit par Hugging Face.')"
|
| 405 |
+
]
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"cell_type": "markdown",
|
| 409 |
+
"metadata": {},
|
| 410 |
+
"source": [
|
| 411 |
+
"So, there are the following tasks available withing our Pipeline API:\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"- Text-Classification(Also called sequence classification)\n",
|
| 414 |
+
"- Zero Shot Classification\n",
|
| 415 |
+
"- Text Generation\n",
|
| 416 |
+
"- Text Completion(mask filling)/ Masked Language Modeling\n",
|
| 417 |
+
"- Token Classification\n",
|
| 418 |
+
"- Question Answering\n",
|
| 419 |
+
"- Summarization\n",
|
| 420 |
+
"- Translation"
|
| 421 |
+
]
|
| 422 |
+
},
|
| 423 |
+
{
|
| 424 |
+
"cell_type": "markdown",
|
| 425 |
+
"metadata": {},
|
| 426 |
+
"source": []
|
| 427 |
+
}
|
| 428 |
+
],
|
| 429 |
+
"metadata": {
|
| 430 |
+
"kernelspec": {
|
| 431 |
+
"display_name": "Python 3",
|
| 432 |
+
"language": "python",
|
| 433 |
+
"name": "python3"
|
| 434 |
+
},
|
| 435 |
+
"language_info": {
|
| 436 |
+
"codemirror_mode": {
|
| 437 |
+
"name": "ipython",
|
| 438 |
+
"version": 3
|
| 439 |
+
},
|
| 440 |
+
"file_extension": ".py",
|
| 441 |
+
"mimetype": "text/x-python",
|
| 442 |
+
"name": "python",
|
| 443 |
+
"nbconvert_exporter": "python",
|
| 444 |
+
"pygments_lexer": "ipython3",
|
| 445 |
+
"version": "3.11.6"
|
| 446 |
+
}
|
| 447 |
+
},
|
| 448 |
+
"nbformat": 4,
|
| 449 |
+
"nbformat_minor": 2
|
| 450 |
+
}
|
pipeline2.ipynb
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"data": {
|
| 10 |
+
"text/plain": [
|
| 11 |
+
"[{'label': 'NEGATIVE', 'score': 0.9961605072021484}]"
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
"execution_count": 1,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"output_type": "execute_result"
|
| 17 |
+
}
|
| 18 |
+
],
|
| 19 |
+
"source": [
|
| 20 |
+
"from transformers import pipeline\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')\n",
|
| 23 |
+
"classifier('I suck at coding')"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": 7,
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"name": "stdout",
|
| 33 |
+
"output_type": "stream",
|
| 34 |
+
"text": [
|
| 35 |
+
"{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 2227, 14924,\n",
|
| 36 |
+
" 4818, 2026, 2878, 2166, 1010, 1045, 2572, 2061, 3407, 999,\n",
|
| 37 |
+
" 102],\n",
|
| 38 |
+
" [ 101, 1045, 2572, 2061, 9364, 1999, 2026, 3754, 2000, 4553,\n",
|
| 39 |
+
" 102, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
| 40 |
+
" 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 41 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}\n",
|
| 42 |
+
"torch.Size([2, 21, 768])\n",
|
| 43 |
+
"BaseModelOutput(last_hidden_state=tensor([[[ 0.4223, 0.2644, 0.2841, ..., 0.5133, 0.7970, -0.5389],\n",
|
| 44 |
+
" [ 0.6034, 0.4271, 0.2106, ..., 0.5094, 0.8745, -0.4014],\n",
|
| 45 |
+
" [ 0.6883, 0.5008, 0.2713, ..., 0.4099, 0.7448, -0.1239],\n",
|
| 46 |
+
" ...,\n",
|
| 47 |
+
" [ 0.5705, 0.3254, 0.1810, ..., 0.5382, 0.7913, -0.5631],\n",
|
| 48 |
+
" [ 0.5371, 0.2903, 0.1535, ..., 0.5578, 0.8199, -0.4776],\n",
|
| 49 |
+
" [ 1.2266, 0.2534, 0.4621, ..., 0.7747, 0.5059, -0.8307]],\n",
|
| 50 |
+
"\n",
|
| 51 |
+
" [[-0.7461, 0.9088, -0.0971, ..., 0.1799, -0.9920, -0.4135],\n",
|
| 52 |
+
" [-0.7748, 0.9645, -0.0093, ..., -0.0285, -0.9143, -0.1535],\n",
|
| 53 |
+
" [-0.8590, 0.9238, -0.0158, ..., 0.0062, -1.0241, -0.1349],\n",
|
| 54 |
+
" ...,\n",
|
| 55 |
+
" [-0.6346, 0.9681, -0.0236, ..., 0.1793, -1.1010, -0.2452],\n",
|
| 56 |
+
" [-0.5911, 0.9420, -0.1765, ..., 0.2015, -1.0720, -0.2666],\n",
|
| 57 |
+
" [-0.5166, 0.9548, -0.1337, ..., 0.2211, -1.0757, -0.2626]]],\n",
|
| 58 |
+
" grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)\n",
|
| 59 |
+
"tensor([[-4.2574, 4.6149],\n",
|
| 60 |
+
" [ 4.6649, -3.7399]], grad_fn=<AddmmBackward0>)\n",
|
| 61 |
+
"SequenceClassifierOutput(loss=None, logits=tensor([[-4.2574, 4.6149],\n",
|
| 62 |
+
" [ 4.6649, -3.7399]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
|
| 63 |
+
"tensor([[1.4020e-04, 9.9986e-01],\n",
|
| 64 |
+
" [9.9978e-01, 2.2374e-04]], grad_fn=<SoftmaxBackward0>)\n",
|
| 65 |
+
"tensor(2)\n",
|
| 66 |
+
"2\n",
|
| 67 |
+
"{0: 'NEGATIVE', 1: 'POSITIVE'}\n"
|
| 68 |
+
]
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"source": [
|
| 72 |
+
"from transformers import AutoTokenizer\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
|
| 75 |
+
"raw_inputs = ['''I have been waiting for a hugging face tutorial my whole life, i am so happy!''',\n",
|
| 76 |
+
" 'I am so disappointed in my ability to learn']\n",
|
| 77 |
+
"inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
|
| 78 |
+
"print(inputs)\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"from transformers import AutoModel\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"model = AutoModel.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
|
| 83 |
+
"outputs = model(**inputs)\n",
|
| 84 |
+
"print(outputs.last_hidden_state.shape)\n",
|
| 85 |
+
"print(outputs)\n",
|
| 86 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')\n",
|
| 89 |
+
"outputs = model(**inputs)\n",
|
| 90 |
+
"print(outputs.logits)\n",
|
| 91 |
+
"print(outputs)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"import torch\n",
|
| 94 |
+
"torch_inputs = outputs.logits\n",
|
| 95 |
+
"probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 96 |
+
"print(probabilities)\n",
|
| 97 |
+
"\n",
|
| 98 |
+
"# check output rating:\n",
|
| 99 |
+
"print(torch.argmax(outputs.logits))\n",
|
| 100 |
+
"print(int(torch.argmax(outputs.logits)))\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"print(model.config.id2label)"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"execution_count": null,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [],
|
| 110 |
+
"source": []
|
| 111 |
+
}
|
| 112 |
+
],
|
| 113 |
+
"metadata": {
|
| 114 |
+
"kernelspec": {
|
| 115 |
+
"display_name": "Python 3",
|
| 116 |
+
"language": "python",
|
| 117 |
+
"name": "python3"
|
| 118 |
+
},
|
| 119 |
+
"language_info": {
|
| 120 |
+
"codemirror_mode": {
|
| 121 |
+
"name": "ipython",
|
| 122 |
+
"version": 3
|
| 123 |
+
},
|
| 124 |
+
"file_extension": ".py",
|
| 125 |
+
"mimetype": "text/x-python",
|
| 126 |
+
"name": "python",
|
| 127 |
+
"nbconvert_exporter": "python",
|
| 128 |
+
"pygments_lexer": "ipython3",
|
| 129 |
+
"version": "3.11.6"
|
| 130 |
+
}
|
| 131 |
+
},
|
| 132 |
+
"nbformat": 4,
|
| 133 |
+
"nbformat_minor": 2
|
| 134 |
+
}
|
prac.ipynb
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"['lets', '##s', 'try', 'to', 'token', '##ize']\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
+
"source": [
|
| 17 |
+
"from transformers import AutoTokenizer\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 20 |
+
"tokens = tokenizer.tokenize('''Letss try to tokenize''')\n",
|
| 21 |
+
"print(tokens)"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 2,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [
|
| 29 |
+
{
|
| 30 |
+
"data": {
|
| 31 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 32 |
+
"model_id": "c2fdecef86644ec1b3467bf653e8d30d",
|
| 33 |
+
"version_major": 2,
|
| 34 |
+
"version_minor": 0
|
| 35 |
+
},
|
| 36 |
+
"text/plain": [
|
| 37 |
+
"Downloading (…)lve/main/config.json: 0%| | 0.00/684 [00:00<?, ?B/s]"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"output_type": "display_data"
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"data": {
|
| 45 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 46 |
+
"model_id": "6a4c6b7d714c40ca9695acf581de7bb2",
|
| 47 |
+
"version_major": 2,
|
| 48 |
+
"version_minor": 0
|
| 49 |
+
},
|
| 50 |
+
"text/plain": [
|
| 51 |
+
"Downloading (…)ve/main/spiece.model: 0%| | 0.00/760k [00:00<?, ?B/s]"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
"metadata": {},
|
| 55 |
+
"output_type": "display_data"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"data": {
|
| 59 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 60 |
+
"model_id": "49267a76ecfc4aee9d4906e96ddbca5b",
|
| 61 |
+
"version_major": 2,
|
| 62 |
+
"version_minor": 0
|
| 63 |
+
},
|
| 64 |
+
"text/plain": [
|
| 65 |
+
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/1.31M [00:00<?, ?B/s]"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"metadata": {},
|
| 69 |
+
"output_type": "display_data"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "stdout",
|
| 73 |
+
"output_type": "stream",
|
| 74 |
+
"text": [
|
| 75 |
+
"['▁let', \"'\", 's', '▁learn', '▁to', '▁code', '▁in', '▁hugging', 'face']\n"
|
| 76 |
+
]
|
| 77 |
+
}
|
| 78 |
+
],
|
| 79 |
+
"source": [
|
| 80 |
+
"from transformers import AutoTokenizer\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"albert_tokenizer = AutoTokenizer.from_pretrained('albert-base-v2')\n",
|
| 83 |
+
"tokens = albert_tokenizer.tokenize('''Let's learn to code in huggingface''')\n",
|
| 84 |
+
"print(tokens)"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": 3,
|
| 90 |
+
"metadata": {},
|
| 91 |
+
"outputs": [
|
| 92 |
+
{
|
| 93 |
+
"name": "stdout",
|
| 94 |
+
"output_type": "stream",
|
| 95 |
+
"text": [
|
| 96 |
+
"['these', 'are', 'broken', 'down', 'into', 'token', '##s']\n",
|
| 97 |
+
"[2122, 2024, 3714, 2091, 2046, 19204, 2015]\n"
|
| 98 |
+
]
|
| 99 |
+
}
|
| 100 |
+
],
|
| 101 |
+
"source": [
|
| 102 |
+
"from transformers import AutoTokenizer\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"tokeninzer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 105 |
+
"tokens = tokenizer.tokenize('These are broken down into tokens')\n",
|
| 106 |
+
"print(tokens)\n",
|
| 107 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 108 |
+
"print(input_ids)"
|
| 109 |
+
]
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"cell_type": "code",
|
| 113 |
+
"execution_count": 4,
|
| 114 |
+
"metadata": {},
|
| 115 |
+
"outputs": [
|
| 116 |
+
{
|
| 117 |
+
"name": "stdout",
|
| 118 |
+
"output_type": "stream",
|
| 119 |
+
"text": [
|
| 120 |
+
"['this', 'is', 'me', 'practicing']\n",
|
| 121 |
+
"[2023, 2003, 2033, 12560]\n",
|
| 122 |
+
"['this', 'is', 'me', 'practicing']\n"
|
| 123 |
+
]
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"source": [
|
| 127 |
+
"from transformers import AutoTokenizer\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 130 |
+
"tokens = tokenizer.tokenize('This is me practicing')\n",
|
| 131 |
+
"print(tokens)\n",
|
| 132 |
+
"\n",
|
| 133 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 134 |
+
"print(input_ids)\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"tokens = tokenizer.convert_ids_to_tokens(input_ids)\n",
|
| 137 |
+
"print(tokens)"
|
| 138 |
+
]
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"cell_type": "code",
|
| 142 |
+
"execution_count": 5,
|
| 143 |
+
"metadata": {},
|
| 144 |
+
"outputs": [
|
| 145 |
+
{
|
| 146 |
+
"name": "stdout",
|
| 147 |
+
"output_type": "stream",
|
| 148 |
+
"text": [
|
| 149 |
+
"['this', 'is', 'me', 'practicing']\n",
|
| 150 |
+
"[2023, 2003, 2033, 12560]\n",
|
| 151 |
+
"['this', 'is', 'me', 'practicing']\n",
|
| 152 |
+
"this is me practicing\n"
|
| 153 |
+
]
|
| 154 |
+
}
|
| 155 |
+
],
|
| 156 |
+
"source": [
|
| 157 |
+
"from transformers import AutoTokenizer\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 160 |
+
"tokens = tokenizer.tokenize('This is me practicing')\n",
|
| 161 |
+
"print(tokens)\n",
|
| 162 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 163 |
+
"print(input_ids)\n",
|
| 164 |
+
"tokens_2 = tokenizer.convert_ids_to_tokens(input_ids)\n",
|
| 165 |
+
"print(tokens_2)\n",
|
| 166 |
+
"strings = tokenizer.convert_tokens_to_string(tokens)\n",
|
| 167 |
+
"print(strings)"
|
| 168 |
+
]
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"cell_type": "code",
|
| 172 |
+
"execution_count": 6,
|
| 173 |
+
"metadata": {},
|
| 174 |
+
"outputs": [
|
| 175 |
+
{
|
| 176 |
+
"name": "stderr",
|
| 177 |
+
"output_type": "stream",
|
| 178 |
+
"text": [
|
| 179 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 180 |
+
]
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"name": "stdout",
|
| 184 |
+
"output_type": "stream",
|
| 185 |
+
"text": [
|
| 186 |
+
"{'input_ids': [101, 2023, 2003, 2033, 12560, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1]}\n"
|
| 187 |
+
]
|
| 188 |
+
}
|
| 189 |
+
],
|
| 190 |
+
"source": [
|
| 191 |
+
"final_ids = tokenizer.prepare_for_model(input_ids)\n",
|
| 192 |
+
"print(final_ids)\n"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"cell_type": "code",
|
| 197 |
+
"execution_count": 7,
|
| 198 |
+
"metadata": {},
|
| 199 |
+
"outputs": [
|
| 200 |
+
{
|
| 201 |
+
"name": "stderr",
|
| 202 |
+
"output_type": "stream",
|
| 203 |
+
"text": [
|
| 204 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 205 |
+
]
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"name": "stdout",
|
| 209 |
+
"output_type": "stream",
|
| 210 |
+
"text": [
|
| 211 |
+
"['this', 'is', 'me', 'practicing', 'the', 'use', 'of', 'auto', '##tok', '##eni', '##zer']\n",
|
| 212 |
+
"[2023, 2003, 2033, 12560, 1996, 2224, 1997, 8285, 18715, 18595, 6290]\n",
|
| 213 |
+
"{'input_ids': [101, 2023, 2003, 2033, 12560, 1996, 2224, 1997, 8285, 18715, 18595, 6290, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 214 |
+
"[CLS] this is me practicing the use of autotokenizer [SEP]\n"
|
| 215 |
+
]
|
| 216 |
+
}
|
| 217 |
+
],
|
| 218 |
+
"source": [
|
| 219 |
+
"from transformers import AutoTokenizer\n",
|
| 220 |
+
"\n",
|
| 221 |
+
"sentence = 'This is me practicing the use of AutoTokenizer'\n",
|
| 222 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 223 |
+
"tokens = tokenizer.tokenize(sentence)\n",
|
| 224 |
+
"print(tokens)\n",
|
| 225 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 226 |
+
"print(input_ids)\n",
|
| 227 |
+
"inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 228 |
+
"print(inputs)\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"decode = tokenizer.decode(inputs['input_ids'])\n",
|
| 231 |
+
"print(decode)"
|
| 232 |
+
]
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"cell_type": "code",
|
| 236 |
+
"execution_count": null,
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"outputs": [],
|
| 239 |
+
"source": []
|
| 240 |
+
}
|
| 241 |
+
],
|
| 242 |
+
"metadata": {
|
| 243 |
+
"kernelspec": {
|
| 244 |
+
"display_name": "Python 3",
|
| 245 |
+
"language": "python",
|
| 246 |
+
"name": "python3"
|
| 247 |
+
},
|
| 248 |
+
"language_info": {
|
| 249 |
+
"codemirror_mode": {
|
| 250 |
+
"name": "ipython",
|
| 251 |
+
"version": 3
|
| 252 |
+
},
|
| 253 |
+
"file_extension": ".py",
|
| 254 |
+
"mimetype": "text/x-python",
|
| 255 |
+
"name": "python",
|
| 256 |
+
"nbconvert_exporter": "python",
|
| 257 |
+
"pygments_lexer": "ipython3",
|
| 258 |
+
"version": "3.11.6"
|
| 259 |
+
}
|
| 260 |
+
},
|
| 261 |
+
"nbformat": 4,
|
| 262 |
+
"nbformat_minor": 2
|
| 263 |
+
}
|
practise basics.ipynb
ADDED
|
@@ -0,0 +1,962 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stderr",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 13 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 14 |
+
]
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"source": [
|
| 18 |
+
"import transformers\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"from transformers import pipeline\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 23 |
+
"classifier = pipeline('sentiment-analysis', model=checkpoint)"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "code",
|
| 28 |
+
"execution_count": 2,
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"data": {
|
| 33 |
+
"text/plain": [
|
| 34 |
+
"[{'label': 'LABEL_1', 'score': 0.5578101277351379}]"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"execution_count": 2,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"output_type": "execute_result"
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"source": [
|
| 43 |
+
"classifier('This is a test sentence')"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": 5,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [
|
| 51 |
+
{
|
| 52 |
+
"name": "stderr",
|
| 53 |
+
"output_type": "stream",
|
| 54 |
+
"text": [
|
| 55 |
+
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
| 56 |
+
]
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"data": {
|
| 60 |
+
"text/plain": [
|
| 61 |
+
"[{'generated_text': 'In this course, I will teach you how to make a really big use of the language you\\u202are learning to use.\\u202a'},\n",
|
| 62 |
+
" {'generated_text': 'In this course, I will teach you how to manipulate sound design to better enhance your sound design while also illustrating the application of certain audio and video technologies. In this section I will introduce some examples of how to manipulate sound design in my introductory video.'}]"
|
| 63 |
+
]
|
| 64 |
+
},
|
| 65 |
+
"execution_count": 5,
|
| 66 |
+
"metadata": {},
|
| 67 |
+
"output_type": "execute_result"
|
| 68 |
+
}
|
| 69 |
+
],
|
| 70 |
+
"source": [
|
| 71 |
+
"generator = pipeline('text-generation', model='distilgpt2')\n",
|
| 72 |
+
"generator('In this course, I will teach you how to',\n",
|
| 73 |
+
" max_length=50, num_return_sequences = 2\n",
|
| 74 |
+
" )"
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "code",
|
| 79 |
+
"execution_count": 6,
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [
|
| 82 |
+
{
|
| 83 |
+
"name": "stderr",
|
| 84 |
+
"output_type": "stream",
|
| 85 |
+
"text": [
|
| 86 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 87 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 88 |
+
]
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"data": {
|
| 92 |
+
"text/plain": [
|
| 93 |
+
"[{'label': 'LABEL_0', 'score': 0.6686140894889832}]"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
"execution_count": 6,
|
| 97 |
+
"metadata": {},
|
| 98 |
+
"output_type": "execute_result"
|
| 99 |
+
}
|
| 100 |
+
],
|
| 101 |
+
"source": [
|
| 102 |
+
"# inside the pipeline function;\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"from transformers import pipeline\n",
|
| 105 |
+
"classifier = pipeline('sentiment-analysis', model=checkpoint)\n",
|
| 106 |
+
"classifier('I am very sad')\n"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "code",
|
| 111 |
+
"execution_count": 7,
|
| 112 |
+
"metadata": {},
|
| 113 |
+
"outputs": [],
|
| 114 |
+
"source": [
|
| 115 |
+
"# Tokenization: Raw text -> Tokenizer -> Tokenized Text -> Input IDs for model"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": 8,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"outputs": [],
|
| 123 |
+
"source": [
|
| 124 |
+
"from transformers import AutoTokenizer\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 127 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"raw_inputs = ['This is a course on huggingface',\n",
|
| 130 |
+
" 'I am very disgusted at my stupidity']\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
|
| 133 |
+
]
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"cell_type": "code",
|
| 137 |
+
"execution_count": 9,
|
| 138 |
+
"metadata": {},
|
| 139 |
+
"outputs": [
|
| 140 |
+
{
|
| 141 |
+
"data": {
|
| 142 |
+
"text/plain": [
|
| 143 |
+
"{'input_ids': tensor([[ 101, 2023, 2003, 1037, 2607, 2006, 17662, 12172, 102],\n",
|
| 144 |
+
" [ 101, 1045, 2572, 2200, 17733, 2012, 2026, 28072, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 145 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
|
| 146 |
+
]
|
| 147 |
+
},
|
| 148 |
+
"execution_count": 9,
|
| 149 |
+
"metadata": {},
|
| 150 |
+
"output_type": "execute_result"
|
| 151 |
+
}
|
| 152 |
+
],
|
| 153 |
+
"source": [
|
| 154 |
+
"inputs"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "code",
|
| 159 |
+
"execution_count": 10,
|
| 160 |
+
"metadata": {},
|
| 161 |
+
"outputs": [
|
| 162 |
+
{
|
| 163 |
+
"data": {
|
| 164 |
+
"text/plain": [
|
| 165 |
+
"Encoding(num_tokens=9, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])"
|
| 166 |
+
]
|
| 167 |
+
},
|
| 168 |
+
"execution_count": 10,
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"output_type": "execute_result"
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"source": [
|
| 174 |
+
"inputs[0]"
|
| 175 |
+
]
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"cell_type": "code",
|
| 179 |
+
"execution_count": 11,
|
| 180 |
+
"metadata": {},
|
| 181 |
+
"outputs": [
|
| 182 |
+
{
|
| 183 |
+
"data": {
|
| 184 |
+
"text/plain": [
|
| 185 |
+
"tensor([[ 101, 2023, 2003, 1037, 2607, 2006, 17662, 12172, 102],\n",
|
| 186 |
+
" [ 101, 1045, 2572, 2200, 17733, 2012, 2026, 28072, 102]])"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
"execution_count": 11,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"output_type": "execute_result"
|
| 192 |
+
}
|
| 193 |
+
],
|
| 194 |
+
"source": [
|
| 195 |
+
"inputs['input_ids']"
|
| 196 |
+
]
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"cell_type": "code",
|
| 200 |
+
"execution_count": 12,
|
| 201 |
+
"metadata": {},
|
| 202 |
+
"outputs": [
|
| 203 |
+
{
|
| 204 |
+
"data": {
|
| 205 |
+
"text/plain": [
|
| 206 |
+
"tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 207 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
"execution_count": 12,
|
| 211 |
+
"metadata": {},
|
| 212 |
+
"output_type": "execute_result"
|
| 213 |
+
}
|
| 214 |
+
],
|
| 215 |
+
"source": [
|
| 216 |
+
"inputs['attention_mask']"
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"cell_type": "code",
|
| 221 |
+
"execution_count": 13,
|
| 222 |
+
"metadata": {},
|
| 223 |
+
"outputs": [],
|
| 224 |
+
"source": [
|
| 225 |
+
"from transformers import AutoTokenizer\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 228 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 229 |
+
"\n",
|
| 230 |
+
"raw_inputs = ['This is very good','I am learning slowly.. sad']\n",
|
| 231 |
+
"\n",
|
| 232 |
+
"inputs=tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"execution_count": 14,
|
| 238 |
+
"metadata": {},
|
| 239 |
+
"outputs": [
|
| 240 |
+
{
|
| 241 |
+
"data": {
|
| 242 |
+
"text/plain": [
|
| 243 |
+
"{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0, 0],\n",
|
| 244 |
+
" [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
|
| 245 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
|
| 246 |
+
]
|
| 247 |
+
},
|
| 248 |
+
"execution_count": 14,
|
| 249 |
+
"metadata": {},
|
| 250 |
+
"output_type": "execute_result"
|
| 251 |
+
}
|
| 252 |
+
],
|
| 253 |
+
"source": [
|
| 254 |
+
"inputs"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": 15,
|
| 260 |
+
"metadata": {},
|
| 261 |
+
"outputs": [
|
| 262 |
+
{
|
| 263 |
+
"data": {
|
| 264 |
+
"text/plain": [
|
| 265 |
+
"tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0, 0],\n",
|
| 266 |
+
" [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517, 102]])"
|
| 267 |
+
]
|
| 268 |
+
},
|
| 269 |
+
"execution_count": 15,
|
| 270 |
+
"metadata": {},
|
| 271 |
+
"output_type": "execute_result"
|
| 272 |
+
}
|
| 273 |
+
],
|
| 274 |
+
"source": [
|
| 275 |
+
"inputs['input_ids']"
|
| 276 |
+
]
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"cell_type": "code",
|
| 280 |
+
"execution_count": 16,
|
| 281 |
+
"metadata": {},
|
| 282 |
+
"outputs": [
|
| 283 |
+
{
|
| 284 |
+
"data": {
|
| 285 |
+
"text/plain": [
|
| 286 |
+
"tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
|
| 287 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
|
| 288 |
+
]
|
| 289 |
+
},
|
| 290 |
+
"execution_count": 16,
|
| 291 |
+
"metadata": {},
|
| 292 |
+
"output_type": "execute_result"
|
| 293 |
+
}
|
| 294 |
+
],
|
| 295 |
+
"source": [
|
| 296 |
+
"inputs['attention_mask']"
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"cell_type": "code",
|
| 301 |
+
"execution_count": 17,
|
| 302 |
+
"metadata": {},
|
| 303 |
+
"outputs": [
|
| 304 |
+
{
|
| 305 |
+
"name": "stdout",
|
| 306 |
+
"output_type": "stream",
|
| 307 |
+
"text": [
|
| 308 |
+
"torch.Size([2, 9, 768])\n"
|
| 309 |
+
]
|
| 310 |
+
}
|
| 311 |
+
],
|
| 312 |
+
"source": [
|
| 313 |
+
"from transformers import AutoModel\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"model = AutoModel.from_pretrained(checkpoint)\n",
|
| 316 |
+
"outputs = model(**inputs)\n",
|
| 317 |
+
"print(outputs.last_hidden_state.shape)"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"cell_type": "code",
|
| 322 |
+
"execution_count": 23,
|
| 323 |
+
"metadata": {},
|
| 324 |
+
"outputs": [
|
| 325 |
+
{
|
| 326 |
+
"name": "stdout",
|
| 327 |
+
"output_type": "stream",
|
| 328 |
+
"text": [
|
| 329 |
+
"{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204, 102, 0, 0],\n",
|
| 330 |
+
" [ 101, 1045, 2572, 2667, 2000, 4553, 2242, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0],\n",
|
| 331 |
+
" [1, 1, 1, 1, 1, 1, 1, 1]])}\n",
|
| 332 |
+
"torch.Size([2, 8, 768])\n",
|
| 333 |
+
"torch.Size([2, 2])\n",
|
| 334 |
+
"SequenceClassifierOutput(loss=None, logits=tensor([[-4.1928, 4.5727],\n",
|
| 335 |
+
" [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
|
| 336 |
+
"tensor([[-4.1928, 4.5727],\n",
|
| 337 |
+
" [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>)\n",
|
| 338 |
+
"tensor([[1.5600e-04, 9.9984e-01],\n",
|
| 339 |
+
" [9.7146e-01, 2.8543e-02]], grad_fn=<SoftmaxBackward0>)\n",
|
| 340 |
+
"{0: 'NEGATIVE', 1: 'POSITIVE'}\n"
|
| 341 |
+
]
|
| 342 |
+
}
|
| 343 |
+
],
|
| 344 |
+
"source": [
|
| 345 |
+
"from transformers import AutoTokenizer\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 348 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 349 |
+
"\n",
|
| 350 |
+
"raw_inputs = ['This is very good', 'I am trying to learn something']\n",
|
| 351 |
+
"\n",
|
| 352 |
+
"inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
|
| 353 |
+
"\n",
|
| 354 |
+
"print(inputs)\n",
|
| 355 |
+
"\n",
|
| 356 |
+
"from transformers import AutoModel\n",
|
| 357 |
+
"\n",
|
| 358 |
+
"model = AutoModel.from_pretrained(checkpoint)\n",
|
| 359 |
+
"outputs = model(**inputs)\n",
|
| 360 |
+
"print(outputs.last_hidden_state.shape)\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 363 |
+
"\n",
|
| 364 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
|
| 365 |
+
"outputs = model(**inputs)\n",
|
| 366 |
+
"print(outputs.logits.shape)\n",
|
| 367 |
+
"print(outputs)\n",
|
| 368 |
+
"print(outputs.logits)\n",
|
| 369 |
+
"\n",
|
| 370 |
+
"import torch\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 373 |
+
"print(predictions)\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"print(model.config.id2label)\n"
|
| 376 |
+
]
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"cell_type": "code",
|
| 380 |
+
"execution_count": 24,
|
| 381 |
+
"metadata": {},
|
| 382 |
+
"outputs": [
|
| 383 |
+
{
|
| 384 |
+
"name": "stdout",
|
| 385 |
+
"output_type": "stream",
|
| 386 |
+
"text": [
|
| 387 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
|
| 388 |
+
"<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n",
|
| 389 |
+
"<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
|
| 390 |
+
]
|
| 391 |
+
}
|
| 392 |
+
],
|
| 393 |
+
"source": [
|
| 394 |
+
"# instantiate a transformers model:\n",
|
| 395 |
+
"\n",
|
| 396 |
+
"from transformers import AutoModel\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
|
| 399 |
+
"print(type(bert_model))\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"gpt_model = AutoModel.from_pretrained('gpt2')\n",
|
| 402 |
+
"print(type(gpt_model))\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
|
| 405 |
+
"print(type(bart_model))"
|
| 406 |
+
]
|
| 407 |
+
},
|
| 408 |
+
{
|
| 409 |
+
"cell_type": "code",
|
| 410 |
+
"execution_count": 25,
|
| 411 |
+
"metadata": {},
|
| 412 |
+
"outputs": [
|
| 413 |
+
{
|
| 414 |
+
"name": "stdout",
|
| 415 |
+
"output_type": "stream",
|
| 416 |
+
"text": [
|
| 417 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 418 |
+
"<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
|
| 419 |
+
"<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
|
| 420 |
+
]
|
| 421 |
+
}
|
| 422 |
+
],
|
| 423 |
+
"source": [
|
| 424 |
+
"from transformers import AutoConfig\n",
|
| 425 |
+
"\n",
|
| 426 |
+
"bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
|
| 427 |
+
"print(type(bert_config))\n",
|
| 428 |
+
"\n",
|
| 429 |
+
"gpt_config = AutoConfig.from_pretrained('gpt2')\n",
|
| 430 |
+
"print(type(gpt_config))\n",
|
| 431 |
+
"\n",
|
| 432 |
+
"bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n",
|
| 433 |
+
"print(type(bart_config))"
|
| 434 |
+
]
|
| 435 |
+
},
|
| 436 |
+
{
|
| 437 |
+
"cell_type": "code",
|
| 438 |
+
"execution_count": 26,
|
| 439 |
+
"metadata": {},
|
| 440 |
+
"outputs": [
|
| 441 |
+
{
|
| 442 |
+
"name": "stdout",
|
| 443 |
+
"output_type": "stream",
|
| 444 |
+
"text": [
|
| 445 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 446 |
+
"<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
|
| 447 |
+
"<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
|
| 448 |
+
]
|
| 449 |
+
}
|
| 450 |
+
],
|
| 451 |
+
"source": [
|
| 452 |
+
"from transformers import BertConfig\n",
|
| 453 |
+
"\n",
|
| 454 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 455 |
+
"print(type(bert_config))\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"from transformers import GPT2Config\n",
|
| 458 |
+
"gpt_config = GPT2Config.from_pretrained('gpt2')\n",
|
| 459 |
+
"print(type(gpt_config))\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"from transformers import BartConfig\n",
|
| 462 |
+
"bart_config = BartConfig.from_pretrained('facebook/bart-base')\n",
|
| 463 |
+
"print(type(bart_config))"
|
| 464 |
+
]
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"cell_type": "code",
|
| 468 |
+
"execution_count": 27,
|
| 469 |
+
"metadata": {},
|
| 470 |
+
"outputs": [
|
| 471 |
+
{
|
| 472 |
+
"name": "stdout",
|
| 473 |
+
"output_type": "stream",
|
| 474 |
+
"text": [
|
| 475 |
+
"BertConfig {\n",
|
| 476 |
+
" \"architectures\": [\n",
|
| 477 |
+
" \"BertForMaskedLM\"\n",
|
| 478 |
+
" ],\n",
|
| 479 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 480 |
+
" \"classifier_dropout\": null,\n",
|
| 481 |
+
" \"gradient_checkpointing\": false,\n",
|
| 482 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 483 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 484 |
+
" \"hidden_size\": 768,\n",
|
| 485 |
+
" \"initializer_range\": 0.02,\n",
|
| 486 |
+
" \"intermediate_size\": 3072,\n",
|
| 487 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 488 |
+
" \"max_position_embeddings\": 512,\n",
|
| 489 |
+
" \"model_type\": \"bert\",\n",
|
| 490 |
+
" \"num_attention_heads\": 12,\n",
|
| 491 |
+
" \"num_hidden_layers\": 12,\n",
|
| 492 |
+
" \"pad_token_id\": 0,\n",
|
| 493 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 494 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 495 |
+
" \"type_vocab_size\": 2,\n",
|
| 496 |
+
" \"use_cache\": true,\n",
|
| 497 |
+
" \"vocab_size\": 30522\n",
|
| 498 |
+
"}\n",
|
| 499 |
+
"\n"
|
| 500 |
+
]
|
| 501 |
+
}
|
| 502 |
+
],
|
| 503 |
+
"source": [
|
| 504 |
+
"from transformers import BertConfig\n",
|
| 505 |
+
"\n",
|
| 506 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 507 |
+
"print(bert_config)"
|
| 508 |
+
]
|
| 509 |
+
},
|
| 510 |
+
{
|
| 511 |
+
"cell_type": "code",
|
| 512 |
+
"execution_count": 31,
|
| 513 |
+
"metadata": {},
|
| 514 |
+
"outputs": [
|
| 515 |
+
{
|
| 516 |
+
"name": "stdout",
|
| 517 |
+
"output_type": "stream",
|
| 518 |
+
"text": [
|
| 519 |
+
"BertModel(\n",
|
| 520 |
+
" (embeddings): BertEmbeddings(\n",
|
| 521 |
+
" (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
|
| 522 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 523 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 524 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 525 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 526 |
+
" )\n",
|
| 527 |
+
" (encoder): BertEncoder(\n",
|
| 528 |
+
" (layer): ModuleList(\n",
|
| 529 |
+
" (0-11): 12 x BertLayer(\n",
|
| 530 |
+
" (attention): BertAttention(\n",
|
| 531 |
+
" (self): BertSelfAttention(\n",
|
| 532 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 533 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 534 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 535 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 536 |
+
" )\n",
|
| 537 |
+
" (output): BertSelfOutput(\n",
|
| 538 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 539 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 540 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 541 |
+
" )\n",
|
| 542 |
+
" )\n",
|
| 543 |
+
" (intermediate): BertIntermediate(\n",
|
| 544 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 545 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 546 |
+
" )\n",
|
| 547 |
+
" (output): BertOutput(\n",
|
| 548 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 549 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 550 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 551 |
+
" )\n",
|
| 552 |
+
" )\n",
|
| 553 |
+
" )\n",
|
| 554 |
+
" )\n",
|
| 555 |
+
" (pooler): BertPooler(\n",
|
| 556 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 557 |
+
" (activation): Tanh()\n",
|
| 558 |
+
" )\n",
|
| 559 |
+
")\n"
|
| 560 |
+
]
|
| 561 |
+
}
|
| 562 |
+
],
|
| 563 |
+
"source": [
|
| 564 |
+
"from transformers import BertConfig, BertModel\n",
|
| 565 |
+
"\n",
|
| 566 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 567 |
+
"bert_model = BertModel(bert_config)\n",
|
| 568 |
+
"print(bert_model)"
|
| 569 |
+
]
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"cell_type": "code",
|
| 573 |
+
"execution_count": 32,
|
| 574 |
+
"metadata": {},
|
| 575 |
+
"outputs": [],
|
| 576 |
+
"source": [
|
| 577 |
+
"from transformers import BertConfig, BertModel\n",
|
| 578 |
+
"\n",
|
| 579 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
|
| 580 |
+
"bert_model = BertModel(bert_config)"
|
| 581 |
+
]
|
| 582 |
+
},
|
| 583 |
+
{
|
| 584 |
+
"cell_type": "code",
|
| 585 |
+
"execution_count": 33,
|
| 586 |
+
"metadata": {},
|
| 587 |
+
"outputs": [
|
| 588 |
+
{
|
| 589 |
+
"name": "stdout",
|
| 590 |
+
"output_type": "stream",
|
| 591 |
+
"text": [
|
| 592 |
+
"BertModel(\n",
|
| 593 |
+
" (embeddings): BertEmbeddings(\n",
|
| 594 |
+
" (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
|
| 595 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 596 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 597 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 598 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 599 |
+
" )\n",
|
| 600 |
+
" (encoder): BertEncoder(\n",
|
| 601 |
+
" (layer): ModuleList(\n",
|
| 602 |
+
" (0-9): 10 x BertLayer(\n",
|
| 603 |
+
" (attention): BertAttention(\n",
|
| 604 |
+
" (self): BertSelfAttention(\n",
|
| 605 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 606 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 607 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 608 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 609 |
+
" )\n",
|
| 610 |
+
" (output): BertSelfOutput(\n",
|
| 611 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 612 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 613 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 614 |
+
" )\n",
|
| 615 |
+
" )\n",
|
| 616 |
+
" (intermediate): BertIntermediate(\n",
|
| 617 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 618 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 619 |
+
" )\n",
|
| 620 |
+
" (output): BertOutput(\n",
|
| 621 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 622 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 623 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 624 |
+
" )\n",
|
| 625 |
+
" )\n",
|
| 626 |
+
" )\n",
|
| 627 |
+
" )\n",
|
| 628 |
+
" (pooler): BertPooler(\n",
|
| 629 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 630 |
+
" (activation): Tanh()\n",
|
| 631 |
+
" )\n",
|
| 632 |
+
")\n"
|
| 633 |
+
]
|
| 634 |
+
}
|
| 635 |
+
],
|
| 636 |
+
"source": [
|
| 637 |
+
"print(bert_model)"
|
| 638 |
+
]
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"cell_type": "code",
|
| 642 |
+
"execution_count": 34,
|
| 643 |
+
"metadata": {},
|
| 644 |
+
"outputs": [],
|
| 645 |
+
"source": [
|
| 646 |
+
"from transformers import BertConfig, BertModel\n",
|
| 647 |
+
"\n",
|
| 648 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers = 10)\n",
|
| 649 |
+
"bert_model = BertModel(bert_config)\n",
|
| 650 |
+
"\n",
|
| 651 |
+
"bert_model.save_pretrained('my_bert_model')"
|
| 652 |
+
]
|
| 653 |
+
},
|
| 654 |
+
{
|
| 655 |
+
"cell_type": "code",
|
| 656 |
+
"execution_count": 35,
|
| 657 |
+
"metadata": {},
|
| 658 |
+
"outputs": [],
|
| 659 |
+
"source": [
|
| 660 |
+
"from transformers import BertModel\n",
|
| 661 |
+
"\n",
|
| 662 |
+
"bert_model = BertModel.from_pretrained('my_bert_model')"
|
| 663 |
+
]
|
| 664 |
+
},
|
| 665 |
+
{
|
| 666 |
+
"cell_type": "code",
|
| 667 |
+
"execution_count": 39,
|
| 668 |
+
"metadata": {},
|
| 669 |
+
"outputs": [
|
| 670 |
+
{
|
| 671 |
+
"name": "stdout",
|
| 672 |
+
"output_type": "stream",
|
| 673 |
+
"text": [
|
| 674 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
|
| 675 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 676 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
|
| 677 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 678 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n"
|
| 679 |
+
]
|
| 680 |
+
}
|
| 681 |
+
],
|
| 682 |
+
"source": [
|
| 683 |
+
"from transformers import AutoModel\n",
|
| 684 |
+
"\n",
|
| 685 |
+
"bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
|
| 686 |
+
"print(type(bert_model))\n",
|
| 687 |
+
"\n",
|
| 688 |
+
"from transformers import AutoConfig, BertModel\n",
|
| 689 |
+
"\n",
|
| 690 |
+
"bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
|
| 691 |
+
"print(type(bert_config))\n",
|
| 692 |
+
"bert_model = BertModel(bert_config)\n",
|
| 693 |
+
"print(type(bert_model))\n",
|
| 694 |
+
"\n",
|
| 695 |
+
"from transformers import BertConfig, BertModel\n",
|
| 696 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 697 |
+
"print(type(bert_config))\n",
|
| 698 |
+
"bert_model = BertModel(bert_config)\n",
|
| 699 |
+
"print(type(bert_model))"
|
| 700 |
+
]
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"cell_type": "code",
|
| 704 |
+
"execution_count": 40,
|
| 705 |
+
"metadata": {},
|
| 706 |
+
"outputs": [
|
| 707 |
+
{
|
| 708 |
+
"data": {
|
| 709 |
+
"text/plain": [
|
| 710 |
+
"BertConfig {\n",
|
| 711 |
+
" \"architectures\": [\n",
|
| 712 |
+
" \"BertForMaskedLM\"\n",
|
| 713 |
+
" ],\n",
|
| 714 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 715 |
+
" \"classifier_dropout\": null,\n",
|
| 716 |
+
" \"gradient_checkpointing\": false,\n",
|
| 717 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 718 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 719 |
+
" \"hidden_size\": 768,\n",
|
| 720 |
+
" \"initializer_range\": 0.02,\n",
|
| 721 |
+
" \"intermediate_size\": 3072,\n",
|
| 722 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 723 |
+
" \"max_position_embeddings\": 512,\n",
|
| 724 |
+
" \"model_type\": \"bert\",\n",
|
| 725 |
+
" \"num_attention_heads\": 12,\n",
|
| 726 |
+
" \"num_hidden_layers\": 12,\n",
|
| 727 |
+
" \"pad_token_id\": 0,\n",
|
| 728 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 729 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 730 |
+
" \"type_vocab_size\": 2,\n",
|
| 731 |
+
" \"use_cache\": true,\n",
|
| 732 |
+
" \"vocab_size\": 30522\n",
|
| 733 |
+
"}"
|
| 734 |
+
]
|
| 735 |
+
},
|
| 736 |
+
"execution_count": 40,
|
| 737 |
+
"metadata": {},
|
| 738 |
+
"output_type": "execute_result"
|
| 739 |
+
}
|
| 740 |
+
],
|
| 741 |
+
"source": [
|
| 742 |
+
"from transformers import BertConfig\n",
|
| 743 |
+
"\n",
|
| 744 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 745 |
+
"bert_config"
|
| 746 |
+
]
|
| 747 |
+
},
|
| 748 |
+
{
|
| 749 |
+
"cell_type": "code",
|
| 750 |
+
"execution_count": 49,
|
| 751 |
+
"metadata": {},
|
| 752 |
+
"outputs": [],
|
| 753 |
+
"source": [
|
| 754 |
+
"from transformers import BertConfig, BertModel\n",
|
| 755 |
+
"new_bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
|
| 756 |
+
"new_bert_model = BertModel(new_bert_config)\n",
|
| 757 |
+
"\n",
|
| 758 |
+
"new_bert_model.save_pretrained('new-bert-model')\n",
|
| 759 |
+
"\n"
|
| 760 |
+
]
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"cell_type": "code",
|
| 764 |
+
"execution_count": 50,
|
| 765 |
+
"metadata": {},
|
| 766 |
+
"outputs": [],
|
| 767 |
+
"source": [
|
| 768 |
+
"load_new_bert_model = BertModel.from_pretrained('new-bert-model')\n"
|
| 769 |
+
]
|
| 770 |
+
},
|
| 771 |
+
{
|
| 772 |
+
"cell_type": "code",
|
| 773 |
+
"execution_count": 57,
|
| 774 |
+
"metadata": {},
|
| 775 |
+
"outputs": [
|
| 776 |
+
{
|
| 777 |
+
"name": "stdout",
|
| 778 |
+
"output_type": "stream",
|
| 779 |
+
"text": [
|
| 780 |
+
"['let', \"'\", 's', 'try', 'to', 'token', '##ize']\n"
|
| 781 |
+
]
|
| 782 |
+
},
|
| 783 |
+
{
|
| 784 |
+
"name": "stderr",
|
| 785 |
+
"output_type": "stream",
|
| 786 |
+
"text": [
|
| 787 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 788 |
+
]
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"name": "stdout",
|
| 792 |
+
"output_type": "stream",
|
| 793 |
+
"text": [
|
| 794 |
+
"['▁let', \"'\", 's', '▁try', '▁to', '▁to', 'ken', 'ize']\n",
|
| 795 |
+
"[2292, 1005, 1055, 3046, 2000, 19204, 4697]\n",
|
| 796 |
+
"[101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102]\n",
|
| 797 |
+
"{'input_ids': [101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
|
| 798 |
+
"[CLS] let's try to tokenize [SEP]\n"
|
| 799 |
+
]
|
| 800 |
+
}
|
| 801 |
+
],
|
| 802 |
+
"source": [
|
| 803 |
+
"from transformers import AutoTokenizer\n",
|
| 804 |
+
"\n",
|
| 805 |
+
"# split our input into tokens:\n",
|
| 806 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 807 |
+
"tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
|
| 808 |
+
"print(tokens)\n",
|
| 809 |
+
"\n",
|
| 810 |
+
"albert_tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')\n",
|
| 811 |
+
"albert_tokens = albert_tokenizer.tokenize(\"Let's try to tokenize\")\n",
|
| 812 |
+
"print(albert_tokens)\n",
|
| 813 |
+
"\n",
|
| 814 |
+
"# map tokens to respective ids:\n",
|
| 815 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 816 |
+
"print(input_ids)\n",
|
| 817 |
+
"\n",
|
| 818 |
+
"# add special tokens:\n",
|
| 819 |
+
"final_inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 820 |
+
"print(final_inputs['input_ids'])\n",
|
| 821 |
+
"print(final_inputs)\n",
|
| 822 |
+
"\n",
|
| 823 |
+
"print(tokenizer.decode(final_inputs['input_ids']))"
|
| 824 |
+
]
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"cell_type": "code",
|
| 828 |
+
"execution_count": 61,
|
| 829 |
+
"metadata": {},
|
| 830 |
+
"outputs": [
|
| 831 |
+
{
|
| 832 |
+
"name": "stderr",
|
| 833 |
+
"output_type": "stream",
|
| 834 |
+
"text": [
|
| 835 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 836 |
+
]
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"name": "stdout",
|
| 840 |
+
"output_type": "stream",
|
| 841 |
+
"text": [
|
| 842 |
+
"['lets', 'try', 'to', 'token', '##ize']\n",
|
| 843 |
+
"[11082, 3046, 2000, 19204, 4697]\n",
|
| 844 |
+
"{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
|
| 845 |
+
"[CLS] lets try to tokenize [SEP]\n",
|
| 846 |
+
"{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
|
| 847 |
+
"[101, 11082, 3046, 2000, 19204, 4697, 102]\n",
|
| 848 |
+
"[CLS] lets try to tokenize [SEP]\n"
|
| 849 |
+
]
|
| 850 |
+
}
|
| 851 |
+
],
|
| 852 |
+
"source": [
|
| 853 |
+
"from transformers import AutoTokenizer\n",
|
| 854 |
+
"\n",
|
| 855 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 856 |
+
"tokens = tokenizer.tokenize(\"lets try to tokenize\")\n",
|
| 857 |
+
"print(tokens)\n",
|
| 858 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 859 |
+
"print(input_ids)\n",
|
| 860 |
+
"final_inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 861 |
+
"print(final_inputs)\n",
|
| 862 |
+
"decoded_inputs = tokenizer.decode(final_inputs['input_ids'])\n",
|
| 863 |
+
"print(decoded_inputs)\n",
|
| 864 |
+
"\n",
|
| 865 |
+
"from transformers import AutoTokenizer\n",
|
| 866 |
+
"\n",
|
| 867 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 868 |
+
"inputs = tokenizer('lets try to tokenize')\n",
|
| 869 |
+
"print(inputs)\n",
|
| 870 |
+
"print(inputs['input_ids'])\n",
|
| 871 |
+
"print(tokenizer.decode(inputs['input_ids']))"
|
| 872 |
+
]
|
| 873 |
+
},
|
| 874 |
+
{
|
| 875 |
+
"cell_type": "code",
|
| 876 |
+
"execution_count": 1,
|
| 877 |
+
"metadata": {},
|
| 878 |
+
"outputs": [
|
| 879 |
+
{
|
| 880 |
+
"name": "stderr",
|
| 881 |
+
"output_type": "stream",
|
| 882 |
+
"text": [
|
| 883 |
+
"You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
|
| 884 |
+
]
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"ename": "AttributeError",
|
| 888 |
+
"evalue": "'list' object has no attribute 'size'",
|
| 889 |
+
"output_type": "error",
|
| 890 |
+
"traceback": [
|
| 891 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 892 |
+
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
| 893 |
+
"\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\practise basics.ipynb Cell 32\u001b[0m line \u001b[0;36m1\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m BertModel\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m Bert_model \u001b[39m=\u001b[39m BertModel(Bert_config)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m outputs \u001b[39m=\u001b[39m Bert_model(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49minputs)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39mprint\u001b[39m(outputs\u001b[39m.\u001b[39mlast_hidden_state\u001b[39m.\u001b[39mshape)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoModelForSequenceClassification\n",
|
| 894 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1516\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1518\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
|
| 895 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1525\u001b[0m \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1526\u001b[0m \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1527\u001b[0m \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m 1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 1530\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
|
| 896 |
+
"File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\models\\bert\\modeling_bert.py:970\u001b[0m, in \u001b[0;36mBertModel.forward\u001b[1;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m 968\u001b[0m \u001b[39melif\u001b[39;00m input_ids \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 969\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwarn_if_padding_and_no_attention_mask(input_ids, attention_mask)\n\u001b[1;32m--> 970\u001b[0m input_shape \u001b[39m=\u001b[39m input_ids\u001b[39m.\u001b[39;49msize()\n\u001b[0;32m 971\u001b[0m \u001b[39melif\u001b[39;00m inputs_embeds \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 972\u001b[0m input_shape \u001b[39m=\u001b[39m inputs_embeds\u001b[39m.\u001b[39msize()[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n",
|
| 897 |
+
"\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'size'"
|
| 898 |
+
]
|
| 899 |
+
}
|
| 900 |
+
],
|
| 901 |
+
"source": [
|
| 902 |
+
"from transformers import pipeline\n",
|
| 903 |
+
"from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, AutoConfig\n",
|
| 904 |
+
"\n",
|
| 905 |
+
"tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
|
| 906 |
+
"tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
|
| 907 |
+
"input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
|
| 908 |
+
"inputs = tokenizer.prepare_for_model(input_ids)\n",
|
| 909 |
+
"Bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
|
| 910 |
+
"from transformers import BertModel\n",
|
| 911 |
+
"Bert_model = BertModel(Bert_config)\n",
|
| 912 |
+
"outputs = Bert_model(**inputs)\n",
|
| 913 |
+
"print(outputs.last_hidden_state.shape)\n",
|
| 914 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 915 |
+
"model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
|
| 916 |
+
"outputs = model(**inputs)\n",
|
| 917 |
+
"print(outputs.logits)\n",
|
| 918 |
+
"import torch\n",
|
| 919 |
+
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 920 |
+
"print(predictions)\n",
|
| 921 |
+
"print(model.config.id2label)\n",
|
| 922 |
+
"\n",
|
| 923 |
+
"from transformers import AutoModel, AutoConfig, BertModel, BertConfig\n",
|
| 924 |
+
"\n",
|
| 925 |
+
"bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
|
| 926 |
+
"bert_model = BertModel(bert_config)\n",
|
| 927 |
+
"bert_model.save_pretrained(bert_model)\n",
|
| 928 |
+
"initialize_model = BertModel.from_pretrained('bert_model')\n",
|
| 929 |
+
"outputs = initialize_model(**inputs)\n",
|
| 930 |
+
"print(outputs.last_hidden_state.shape)"
|
| 931 |
+
]
|
| 932 |
+
},
|
| 933 |
+
{
|
| 934 |
+
"cell_type": "code",
|
| 935 |
+
"execution_count": null,
|
| 936 |
+
"metadata": {},
|
| 937 |
+
"outputs": [],
|
| 938 |
+
"source": []
|
| 939 |
+
}
|
| 940 |
+
],
|
| 941 |
+
"metadata": {
|
| 942 |
+
"kernelspec": {
|
| 943 |
+
"display_name": "Python 3",
|
| 944 |
+
"language": "python",
|
| 945 |
+
"name": "python3"
|
| 946 |
+
},
|
| 947 |
+
"language_info": {
|
| 948 |
+
"codemirror_mode": {
|
| 949 |
+
"name": "ipython",
|
| 950 |
+
"version": 3
|
| 951 |
+
},
|
| 952 |
+
"file_extension": ".py",
|
| 953 |
+
"mimetype": "text/x-python",
|
| 954 |
+
"name": "python",
|
| 955 |
+
"nbconvert_exporter": "python",
|
| 956 |
+
"pygments_lexer": "ipython3",
|
| 957 |
+
"version": "3.11.6"
|
| 958 |
+
}
|
| 959 |
+
},
|
| 960 |
+
"nbformat": 4,
|
| 961 |
+
"nbformat_minor": 2
|
| 962 |
+
}
|
preprocess-sentence-pairs.ipynb
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"This is how to batch inputs together."
|
| 8 |
+
]
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"cell_type": "code",
|
| 12 |
+
"execution_count": 1,
|
| 13 |
+
"metadata": {},
|
| 14 |
+
"outputs": [],
|
| 15 |
+
"source": [
|
| 16 |
+
"from transformers import AutoTokenizer\n",
|
| 17 |
+
"\n",
|
| 18 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 19 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 20 |
+
"sequences = [\n",
|
| 21 |
+
" 'I will not give up this time','I will try my best and see what happens'\n",
|
| 22 |
+
"]\n",
|
| 23 |
+
"batch = tokenizer(sequences, padding=True, truncation=True, return_tensors='pt')"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"cell_type": "markdown",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"source": [
|
| 30 |
+
"The tokenizer accepts sentence pairs as well!"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"execution_count": 2,
|
| 36 |
+
"metadata": {},
|
| 37 |
+
"outputs": [
|
| 38 |
+
{
|
| 39 |
+
"data": {
|
| 40 |
+
"text/plain": [
|
| 41 |
+
"{'input_ids': [101, 2026, 2171, 2003, 14093, 999, 102, 1045, 2572, 1037, 3076, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}"
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"execution_count": 2,
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"output_type": "execute_result"
|
| 47 |
+
}
|
| 48 |
+
],
|
| 49 |
+
"source": [
|
| 50 |
+
"from transformers import AutoTokenizer\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"checkpoint = \"bert-base-uncased\"\n",
|
| 53 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 54 |
+
"tokenizer('My name is Abdullah!', \"I am a student.\")"
|
| 55 |
+
]
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"cell_type": "markdown",
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"source": [
|
| 61 |
+
"If we have several pairs of sentences:"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "code",
|
| 66 |
+
"execution_count": 3,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"outputs": [
|
| 69 |
+
{
|
| 70 |
+
"data": {
|
| 71 |
+
"text/plain": [
|
| 72 |
+
"{'input_ids': tensor([[ 101, 2026, 2171, 2003, 14093, 999, 102, 1045, 2572, 1037,\n",
|
| 73 |
+
" 3076, 102, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
| 74 |
+
" 0],\n",
|
| 75 |
+
" [ 101, 1045, 2572, 6517, 1045, 2514, 2066, 1045, 2001, 6620,\n",
|
| 76 |
+
" 3993, 1012, 102, 2045, 2003, 2061, 2172, 2000, 4553, 1012,\n",
|
| 77 |
+
" 102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 78 |
+
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
| 79 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
"execution_count": 3,
|
| 83 |
+
"metadata": {},
|
| 84 |
+
"output_type": "execute_result"
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
"source": [
|
| 88 |
+
"from transformers import AutoTokenizer\n",
|
| 89 |
+
"\n",
|
| 90 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 93 |
+
"tokenizer(\n",
|
| 94 |
+
" ['My name is Abdullah!', 'I am sad I feel like I was prideful.'],\n",
|
| 95 |
+
" ['I am a student', 'There is so much to learn.'],\n",
|
| 96 |
+
" padding = True,\n",
|
| 97 |
+
" truncation = True,\n",
|
| 98 |
+
" return_tensors = 'pt'\n",
|
| 99 |
+
")"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": 4,
|
| 105 |
+
"metadata": {},
|
| 106 |
+
"outputs": [
|
| 107 |
+
{
|
| 108 |
+
"name": "stderr",
|
| 109 |
+
"output_type": "stream",
|
| 110 |
+
"text": [
|
| 111 |
+
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
|
| 112 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
| 113 |
+
]
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"name": "stdout",
|
| 117 |
+
"output_type": "stream",
|
| 118 |
+
"text": [
|
| 119 |
+
"tensor([[ 0.2318, -0.3874],\n",
|
| 120 |
+
" [-0.3586, -0.5364]], grad_fn=<AddmmBackward0>)\n",
|
| 121 |
+
"tensor([[0.6500, 0.3500],\n",
|
| 122 |
+
" [0.5443, 0.4557]], grad_fn=<SoftmaxBackward0>)\n"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"data": {
|
| 127 |
+
"text/plain": [
|
| 128 |
+
"{0: 'LABEL_0', 1: 'LABEL_1'}"
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"execution_count": 4,
|
| 132 |
+
"metadata": {},
|
| 133 |
+
"output_type": "execute_result"
|
| 134 |
+
}
|
| 135 |
+
],
|
| 136 |
+
"source": [
|
| 137 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 138 |
+
"from transformers import AutoTokenizer\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"checkpoint = 'bert-base-uncased'\n",
|
| 141 |
+
"\n",
|
| 142 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 143 |
+
"batch = tokenizer(\n",
|
| 144 |
+
" ['My name is Abdullah!', 'I work at Hackules.inc'],\n",
|
| 145 |
+
" ['I am a student', 'This movie is great'],\n",
|
| 146 |
+
" padding = True,\n",
|
| 147 |
+
" truncation = True,\n",
|
| 148 |
+
" return_tensors = 'pt'\n",
|
| 149 |
+
")\n",
|
| 150 |
+
"\n",
|
| 151 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
|
| 152 |
+
"outputs = model(**batch)\n",
|
| 153 |
+
"print(outputs.logits)\n",
|
| 154 |
+
"\n",
|
| 155 |
+
"import torch\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
|
| 158 |
+
"print(predictions)\n",
|
| 159 |
+
"\n",
|
| 160 |
+
"model.config.id2label\n",
|
| 161 |
+
"\n"
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"cell_type": "code",
|
| 166 |
+
"execution_count": null,
|
| 167 |
+
"metadata": {},
|
| 168 |
+
"outputs": [],
|
| 169 |
+
"source": []
|
| 170 |
+
}
|
| 171 |
+
],
|
| 172 |
+
"metadata": {
|
| 173 |
+
"kernelspec": {
|
| 174 |
+
"display_name": "Python 3",
|
| 175 |
+
"language": "python",
|
| 176 |
+
"name": "python3"
|
| 177 |
+
},
|
| 178 |
+
"language_info": {
|
| 179 |
+
"codemirror_mode": {
|
| 180 |
+
"name": "ipython",
|
| 181 |
+
"version": 3
|
| 182 |
+
},
|
| 183 |
+
"file_extension": ".py",
|
| 184 |
+
"mimetype": "text/x-python",
|
| 185 |
+
"name": "python",
|
| 186 |
+
"nbconvert_exporter": "python",
|
| 187 |
+
"pygments_lexer": "ipython3",
|
| 188 |
+
"version": "3.11.6"
|
| 189 |
+
}
|
| 190 |
+
},
|
| 191 |
+
"nbformat": 4,
|
| 192 |
+
"nbformat_minor": 2
|
| 193 |
+
}
|
study_pipeline.ipynb
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 4,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"data": {
|
| 10 |
+
"text/plain": [
|
| 11 |
+
"[{'label': 'POSITIVE', 'score': 0.9433633089065552},\n",
|
| 12 |
+
" {'label': 'NEGATIVE', 'score': 0.9994558691978455}]"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
"execution_count": 4,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"output_type": "execute_result"
|
| 18 |
+
}
|
| 19 |
+
],
|
| 20 |
+
"source": [
|
| 21 |
+
"from transformers import pipeline\n",
|
| 22 |
+
"\n",
|
| 23 |
+
"sentiment_analyser = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')\n",
|
| 24 |
+
"sentiment_analyser(['I have been waiting for a HuggingFace course my whole life.',\n",
|
| 25 |
+
" 'I hate this so much!'])"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "markdown",
|
| 30 |
+
"metadata": {},
|
| 31 |
+
"source": [
|
| 32 |
+
"What happens under the hood?"
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "markdown",
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"source": [
|
| 39 |
+
"Tokenizer -> Model -> PostProcessing"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "markdown",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"source": [
|
| 46 |
+
"Tokenizer takes in the raw data in this case the text and converts it into numerical representation for the model.\n",
|
| 47 |
+
"It does so using the following steps:\n",
|
| 48 |
+
"- Take input\n",
|
| 49 |
+
"- Break input down into tokens depending on spaces or punctuation\n",
|
| 50 |
+
"- Provide the sequence of tokens with a start token and a stop token, the start token for the BERT model is CLS which stands for Classification Tasks and the stop token for said model is SEP which stands for Seperation Tasks.\n",
|
| 51 |
+
"- Convert all the tokens in the sequence into their numerical representation for the model to ingest."
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "markdown",
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"source": [
|
| 58 |
+
"# Pytorch"
|
| 59 |
+
]
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"cell_type": "markdown",
|
| 63 |
+
"metadata": {},
|
| 64 |
+
"source": [
|
| 65 |
+
"### Tokenizer"
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": 9,
|
| 71 |
+
"metadata": {},
|
| 72 |
+
"outputs": [],
|
| 73 |
+
"source": [
|
| 74 |
+
"from transformers import AutoTokenizer\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"# initialize tokenizer and model from checkpoint name\n",
|
| 77 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 78 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 79 |
+
"\n",
|
| 80 |
+
"# use tokenizer to preprocess inputs:\n",
|
| 81 |
+
"raw_inputs = [\n",
|
| 82 |
+
" 'I have been waiting for a HuggingFace course my whole life.',\n",
|
| 83 |
+
" 'I hate this so much!'\n",
|
| 84 |
+
"]\n",
|
| 85 |
+
"inputs_pytorch = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"execution_count": 10,
|
| 91 |
+
"metadata": {},
|
| 92 |
+
"outputs": [
|
| 93 |
+
{
|
| 94 |
+
"name": "stdout",
|
| 95 |
+
"output_type": "stream",
|
| 96 |
+
"text": [
|
| 97 |
+
"{'input_ids': tensor([[ 101, 1045, 2031, 2042, 3403, 2005, 1037, 17662, 12172, 2607,\n",
|
| 98 |
+
" 2026, 2878, 2166, 1012, 102],\n",
|
| 99 |
+
" [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0, 0,\n",
|
| 100 |
+
" 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 101 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]])}\n"
|
| 102 |
+
]
|
| 103 |
+
}
|
| 104 |
+
],
|
| 105 |
+
"source": [
|
| 106 |
+
"print(inputs_pytorch)\n"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "markdown",
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"source": [
|
| 113 |
+
"### Model"
|
| 114 |
+
]
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"cell_type": "code",
|
| 118 |
+
"execution_count": 14,
|
| 119 |
+
"metadata": {},
|
| 120 |
+
"outputs": [
|
| 121 |
+
{
|
| 122 |
+
"name": "stdout",
|
| 123 |
+
"output_type": "stream",
|
| 124 |
+
"text": [
|
| 125 |
+
"torch.Size([2, 15, 768])\n"
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
],
|
| 129 |
+
"source": [
|
| 130 |
+
"from transformers import AutoModel\n",
|
| 131 |
+
"\n",
|
| 132 |
+
"# initialize model from checkpoint name\n",
|
| 133 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 134 |
+
"model = AutoModel.from_pretrained(checkpoint)\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"# forward pass\n",
|
| 137 |
+
"outputs_pytorch = model(**inputs_pytorch)\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"# print last hidden states of the first batch\n",
|
| 140 |
+
"print(outputs_pytorch.last_hidden_state.shape)\n"
|
| 141 |
+
]
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"cell_type": "markdown",
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"source": [
|
| 147 |
+
"However, the automodel api will only instantiate once the pre-training head is removed. It will output a high dimensional tensor that is a representation of the sentences passed but not directly useful for classification."
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "markdown",
|
| 152 |
+
"metadata": {},
|
| 153 |
+
"source": [
|
| 154 |
+
"Use Auto model for sequence classification api instead"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "code",
|
| 159 |
+
"execution_count": 18,
|
| 160 |
+
"metadata": {},
|
| 161 |
+
"outputs": [
|
| 162 |
+
{
|
| 163 |
+
"name": "stdout",
|
| 164 |
+
"output_type": "stream",
|
| 165 |
+
"text": [
|
| 166 |
+
"tensor([[-1.3782, 1.4346],\n",
|
| 167 |
+
" [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)\n"
|
| 168 |
+
]
|
| 169 |
+
}
|
| 170 |
+
],
|
| 171 |
+
"source": [
|
| 172 |
+
"from transformers import AutoModelForSequenceClassification\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 175 |
+
"model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
|
| 176 |
+
"\n",
|
| 177 |
+
"outputs_pytorch = model(**inputs_pytorch)\n",
|
| 178 |
+
"print(outputs_pytorch.logits)"
|
| 179 |
+
]
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"cell_type": "markdown",
|
| 183 |
+
"metadata": {},
|
| 184 |
+
"source": [
|
| 185 |
+
"It is seen that the outputs are not probabilities yet as each of the outputs don't are from from being between 0 and 1.\n",
|
| 186 |
+
"This is because each model of the transformers library returns logits.\n",
|
| 187 |
+
"The logits are converted into probabilities in the third and last step of the pipeline, which is\n",
|
| 188 |
+
"### Postprocessing "
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "code",
|
| 193 |
+
"execution_count": 19,
|
| 194 |
+
"metadata": {},
|
| 195 |
+
"outputs": [
|
| 196 |
+
{
|
| 197 |
+
"name": "stdout",
|
| 198 |
+
"output_type": "stream",
|
| 199 |
+
"text": [
|
| 200 |
+
"tensor([[5.6636e-02, 9.4336e-01],\n",
|
| 201 |
+
" [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)\n"
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"data": {
|
| 206 |
+
"text/plain": [
|
| 207 |
+
"{0: 'NEGATIVE', 1: 'POSITIVE'}"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
"execution_count": 19,
|
| 211 |
+
"metadata": {},
|
| 212 |
+
"output_type": "execute_result"
|
| 213 |
+
}
|
| 214 |
+
],
|
| 215 |
+
"source": [
|
| 216 |
+
"import torch\n",
|
| 217 |
+
"\n",
|
| 218 |
+
"'''to convert logits into probabilities we apply the softmax layer'''\n",
|
| 219 |
+
"predictions = torch.nn.functional.softmax(outputs_pytorch.logits, dim=-1)\n",
|
| 220 |
+
"print(predictions)\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"'''the last of these is to see which of these responses \n",
|
| 223 |
+
"are positive or negative. \n",
|
| 224 |
+
"this is given by id2label field of the model config'''\n",
|
| 225 |
+
"model.config.id2label"
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"cell_type": "markdown",
|
| 230 |
+
"metadata": {},
|
| 231 |
+
"source": [
|
| 232 |
+
"# Tensorflow:"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "markdown",
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"source": [
|
| 239 |
+
"### Tokenizer:"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": 21,
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [],
|
| 247 |
+
"source": [
|
| 248 |
+
"from transformers import AutoTokenizer\n",
|
| 249 |
+
"\n",
|
| 250 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 251 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
| 252 |
+
"\n",
|
| 253 |
+
"raw_inputs = [\n",
|
| 254 |
+
" '''I've been waiting for a HuggingFace course my whole life.''',\n",
|
| 255 |
+
" 'I hate this so much!'\n",
|
| 256 |
+
"]\n",
|
| 257 |
+
"\n",
|
| 258 |
+
"inputs_tensorflow = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='tf')"
|
| 259 |
+
]
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"cell_type": "code",
|
| 263 |
+
"execution_count": 22,
|
| 264 |
+
"metadata": {},
|
| 265 |
+
"outputs": [
|
| 266 |
+
{
|
| 267 |
+
"name": "stdout",
|
| 268 |
+
"output_type": "stream",
|
| 269 |
+
"text": [
|
| 270 |
+
"{'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=\n",
|
| 271 |
+
"array([[ 101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662,\n",
|
| 272 |
+
" 12172, 2607, 2026, 2878, 2166, 1012, 102],\n",
|
| 273 |
+
" [ 101, 1045, 5223, 2023, 2061, 2172, 999, 102, 0,\n",
|
| 274 |
+
" 0, 0, 0, 0, 0, 0, 0]])>, 'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=\n",
|
| 275 |
+
"array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
|
| 276 |
+
" [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])>}\n"
|
| 277 |
+
]
|
| 278 |
+
}
|
| 279 |
+
],
|
| 280 |
+
"source": [
|
| 281 |
+
"print(inputs_tensorflow)"
|
| 282 |
+
]
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"cell_type": "markdown",
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"source": [
|
| 288 |
+
"### Model:"
|
| 289 |
+
]
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"cell_type": "markdown",
|
| 293 |
+
"metadata": {},
|
| 294 |
+
"source": [
|
| 295 |
+
"#### AutoModel API"
|
| 296 |
+
]
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"cell_type": "code",
|
| 300 |
+
"execution_count": 23,
|
| 301 |
+
"metadata": {},
|
| 302 |
+
"outputs": [
|
| 303 |
+
{
|
| 304 |
+
"name": "stderr",
|
| 305 |
+
"output_type": "stream",
|
| 306 |
+
"text": [
|
| 307 |
+
"Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.bias']\n",
|
| 308 |
+
"- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
|
| 309 |
+
"- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
| 310 |
+
"All the weights of TFDistilBertModel were initialized from the PyTorch model.\n",
|
| 311 |
+
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.\n"
|
| 312 |
+
]
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"data": {
|
| 316 |
+
"text/plain": [
|
| 317 |
+
"TensorShape([2, 16, 768])"
|
| 318 |
+
]
|
| 319 |
+
},
|
| 320 |
+
"execution_count": 23,
|
| 321 |
+
"metadata": {},
|
| 322 |
+
"output_type": "execute_result"
|
| 323 |
+
}
|
| 324 |
+
],
|
| 325 |
+
"source": [
|
| 326 |
+
"from transformers import TFAutoModel\n",
|
| 327 |
+
"\n",
|
| 328 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 329 |
+
"model =TFAutoModel.from_pretrained(checkpoint)\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"outputs_tensorflow = model(inputs_tensorflow)\n",
|
| 332 |
+
"outputs_tensorflow.last_hidden_state.shape"
|
| 333 |
+
]
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"cell_type": "markdown",
|
| 337 |
+
"metadata": {},
|
| 338 |
+
"source": [
|
| 339 |
+
"#### AutoModel for Sequence Classification Class:"
|
| 340 |
+
]
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"cell_type": "code",
|
| 344 |
+
"execution_count": 24,
|
| 345 |
+
"metadata": {},
|
| 346 |
+
"outputs": [
|
| 347 |
+
{
|
| 348 |
+
"name": "stderr",
|
| 349 |
+
"output_type": "stream",
|
| 350 |
+
"text": [
|
| 351 |
+
"All PyTorch model weights were used when initializing TFDistilBertForSequenceClassification.\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"All the weights of TFDistilBertForSequenceClassification were initialized from the PyTorch model.\n",
|
| 354 |
+
"If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.\n"
|
| 355 |
+
]
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"data": {
|
| 359 |
+
"text/plain": [
|
| 360 |
+
"<tf.Tensor: shape=(2, 2), dtype=float32, numpy=\n",
|
| 361 |
+
"array([[-1.5606958, 1.6122808],\n",
|
| 362 |
+
" [ 4.169232 , -3.3464477]], dtype=float32)>"
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
+
"execution_count": 24,
|
| 366 |
+
"metadata": {},
|
| 367 |
+
"output_type": "execute_result"
|
| 368 |
+
}
|
| 369 |
+
],
|
| 370 |
+
"source": [
|
| 371 |
+
"from transformers import TFAutoModelForSequenceClassification\n",
|
| 372 |
+
"\n",
|
| 373 |
+
"checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
|
| 374 |
+
"model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"outputs_tensorflow = model(inputs_tensorflow)\n",
|
| 377 |
+
"outputs_tensorflow.logits"
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"cell_type": "markdown",
|
| 382 |
+
"metadata": {},
|
| 383 |
+
"source": [
|
| 384 |
+
"### Postprocessing"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": 25,
|
| 390 |
+
"metadata": {},
|
| 391 |
+
"outputs": [
|
| 392 |
+
{
|
| 393 |
+
"name": "stdout",
|
| 394 |
+
"output_type": "stream",
|
| 395 |
+
"text": [
|
| 396 |
+
"tf.Tensor(\n",
|
| 397 |
+
"[[4.0195428e-02 9.5980465e-01]\n",
|
| 398 |
+
" [9.9945587e-01 5.4418371e-04]], shape=(2, 2), dtype=float32)\n"
|
| 399 |
+
]
|
| 400 |
+
}
|
| 401 |
+
],
|
| 402 |
+
"source": [
|
| 403 |
+
"import tensorflow as tf\n",
|
| 404 |
+
"\n",
|
| 405 |
+
"predictions = tf.math.softmax(outputs_tensorflow.logits, axis=-1)\n",
|
| 406 |
+
"print(predictions)"
|
| 407 |
+
]
|
| 408 |
+
},
|
| 409 |
+
{
|
| 410 |
+
"cell_type": "code",
|
| 411 |
+
"execution_count": 26,
|
| 412 |
+
"metadata": {},
|
| 413 |
+
"outputs": [
|
| 414 |
+
{
|
| 415 |
+
"data": {
|
| 416 |
+
"text/plain": [
|
| 417 |
+
"{0: 'NEGATIVE', 1: 'POSITIVE'}"
|
| 418 |
+
]
|
| 419 |
+
},
|
| 420 |
+
"execution_count": 26,
|
| 421 |
+
"metadata": {},
|
| 422 |
+
"output_type": "execute_result"
|
| 423 |
+
}
|
| 424 |
+
],
|
| 425 |
+
"source": [
|
| 426 |
+
"model.config.id2label"
|
| 427 |
+
]
|
| 428 |
+
},
|
| 429 |
+
{
|
| 430 |
+
"cell_type": "code",
|
| 431 |
+
"execution_count": null,
|
| 432 |
+
"metadata": {},
|
| 433 |
+
"outputs": [],
|
| 434 |
+
"source": []
|
| 435 |
+
}
|
| 436 |
+
],
|
| 437 |
+
"metadata": {
|
| 438 |
+
"kernelspec": {
|
| 439 |
+
"display_name": "Python 3",
|
| 440 |
+
"language": "python",
|
| 441 |
+
"name": "python3"
|
| 442 |
+
},
|
| 443 |
+
"language_info": {
|
| 444 |
+
"codemirror_mode": {
|
| 445 |
+
"name": "ipython",
|
| 446 |
+
"version": 3
|
| 447 |
+
},
|
| 448 |
+
"file_extension": ".py",
|
| 449 |
+
"mimetype": "text/x-python",
|
| 450 |
+
"name": "python",
|
| 451 |
+
"nbconvert_exporter": "python",
|
| 452 |
+
"pygments_lexer": "ipython3",
|
| 453 |
+
"version": "3.11.6"
|
| 454 |
+
}
|
| 455 |
+
},
|
| 456 |
+
"nbformat": 4,
|
| 457 |
+
"nbformat_minor": 2
|
| 458 |
+
}
|
study_transformers.ipynb
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Transformer Architecture:\n",
|
| 8 |
+
"Encoders, Decoders, Encoder-Decoder "
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "markdown",
|
| 13 |
+
"metadata": {},
|
| 14 |
+
"source": [
|
| 15 |
+
"# Paper: Attention is All You Need, Vaswani"
|
| 16 |
+
]
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"cell_type": "markdown",
|
| 20 |
+
"metadata": {},
|
| 21 |
+
"source": [
|
| 22 |
+
"The transformer architecture has two parts, the encoder and the decoder."
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"cell_type": "markdown",
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"source": [
|
| 29 |
+
"# Encoder:\n",
|
| 30 |
+
"The encoder accepts inputs that represent text. It converts these words and texts into numerical representations. \n",
|
| 31 |
+
"These numerical representations can also be called Embeddings or Features.\n",
|
| 32 |
+
"The Encoder uses the self-attention mechanism as it's main component. It is bi-directional.\n",
|
| 33 |
+
"\n",
|
| 34 |
+
"An example of the encoders only architecture is BERT which is the most popular model of it's kind.\n",
|
| 35 |
+
"\n"
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"attachments": {
|
| 40 |
+
"image.png": {
|
| 41 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAABhaVRYdFNuaXBNZXRhZGF0YQAAAAAAeyJjbGlwUG9pbnRzIjpbeyJ4IjowLCJ5IjowfSx7IngiOjQyNCwieSI6MH0seyJ4Ijo0MjQsInkiOjI4MH0seyJ4IjowLCJ5IjoyODB9XX2kMJZ8AACQaUlEQVR4Xu19B2Adx3nm9zp67wBJsDeJRV2ybEm2XGW5O7bPsZPcJXHJJXHiu/TmXJxyKZfmuMtyt2zZlnuTZXWKReydANF77+W1+7+ZnfcWjw8gSIIEQMxH/tjdqf/M7v7f/LOz+zxxASwsLCwsLJYYvM7WwsLCwsJiScESlIWFhYXFkoQlKAsLCwuLJQlLUBYWFhYWSxKWoCwsLCwsliQsQVlYWFhYLElYgrKwsLCwWJKwBGVhYWFhsSRhCcrCwsLCYknCEpSFhYWFxZKEJSgLCwsLiyUJS1AWFhYWFksSlqAsLCwsLJYkLEFZWFhYWCxJWIKyWBKwv/piYWGRCktQ1wEikQhGRkYQi8WcEGBsbExJKtxEwP1UYnCXcTk4duwYuru71f6lkI7H48H3vvc9/OhHP3JCNFjG4OAgenp60Nvbe0GZ6epwh5n9dOnmC/bJ5eRPp8d8MTk5iYMHD6Y9h25cSbssLJY6LEFdBzh8+DDe+MY3or6+3gkBPvGJT+Bb3/qWc5QEiWAuw8n4ywUN+T/+4z8qfYhLNZ7f/e530d/f7xxpNDY24i1veQte//rX47WvfS1e85rX4IknnnBiL66viTfb+erkTuf16tvkUtvDOkkyBw4cmFe//vSnP0VXV5faJ8l/9KMfRUdHhzqeDfMp18JiucIS1HWA06dPK4P27LPPOiFQhrG0tNQ5mgm30abx5Si9ublZGWCGtbe3K2+FcHtUJMChoSHnKAnmZ3rm/X//7//hnnvuUeEsm56PKdsgHA7j/Pnzakswjl4gyWn79u0qzIBtKywsxOc//3l87Wtfw7333ot/+7d/w+joqIo3ZdHjMDDto66tra1q38DEkQjodRIsi22jDgZM19LSgoGBAXVs+mZqakql5TaVsJiWedz44he/iM7OTudIw/SJG9SFaY0Oq1evxsc//nGsX79eHRNNTU3o6+tzjmaC7TTnzMLieoH9yffrAH/2Z3+GaDSqDPx//Md/YGJiAr/8y7+M//t//y9qa2uVQX/ve9+Lqqoq7N27F0eOHMFv/uZv4ic/+YnydnJycvDMM89gy5Yt2Lp1K06ePIn9+/ercu+66y6cPXtWEQSNJ0f0f/AHf4AbbrhBeWg8/sUvfqHSveQlL8GePXvwO7/zO0of6kJDTAJ71atehbe//e2KOD/zmc8gFAohLy9Pbf/n//yfyvB+6EMfwmc/+1kUFxcniOSf//mfVf6/+Iu/UMcsn2Ff/epXcejQIXzzm99UZME2/+Vf/qVqI8sg6VBftouk9ru/+7uqf77yla9genpakTm9Per3/e9/X6Wlzn/zN3+D7OxsFcf0w8PDeM973oOXvexlqqzvfOc7Kh0Jkf1TXV2tvFW/36+mIp966inVTvb/pz71KfzLv/wLduzYgbe+9a1417vehU9/+tPqHLDP2f5Nmzbhla98Jf78z/9c9SN1/fCHP6ymSnlr/tIv/ZIi03/9139VOpKk3v3udyuPmQTMvszPz1ee5osvvoiPfOQjuPXWWxP9Z2GxnGE9qOsA9CA4BUaDdurUKWWsaPhXrVqFuro6RTaZmZkqLY2smTZ68skn1eieBpUGnIRDQ0sjt3btWkUmHNn/53/+J9785jfjH/7hH3D//fcnpg4Nwf3xH/8xPvjBD6qpLJZHz4meVFtbmzK8NPYvf/nLFRn87d/+rSqLhrukpEQRRW5uLs6cOYOCggJluN3GlWTKY7aL+v7TP/0THnzwQdUGehy/8Ru/oYw3PY6f//znyqPisyyWReIjsZFU6LXQy/jCF76AmpoafOxjH1Mk9thjjylC+Pd//3dkZGTg+eefV1OIJIK/+7u/U6S3efNmNDQ04JFHHlHtZNsqKyuV7iTPH/7whyovCfYd73iHCqfOJHsOEP7qr/5KEdSXv/xlRWA8/vu//3ucOHFCkWt5eTnKysrUOSTpcaDA6T62haTEMJ4PTvlxYEECZr087zwH1I/ESp1IUpacLK4XWIJa5uDomkZu27ZtyiDSiHNajKNqGvvjx48rj4T7xNGjR9WonaDRJTnRONJToFf0wAMPqDiWWVFRoZ4LkSTordCLooFlWsZzSokEcdNNNykDzbqoB4mJenzgAx9QdZMsuH344YdVPL0RGlF6IjSu3Gf569atQzAYVPUTrIOER8/kj/7ojxRh0JuhN0hyov6sh6Szb98+5UWRuEiyfF4VCAQUcXGakl4T9bvzzjvVMy16WiRals2FGWwbCYPTdCR2eo1f+tKXVB+QQEhOJLnHH39chbNPSBIkXQ4GSJrGi+LUKttEkmNetpl49NFH8b73vU8RJPuLg4aNGzeqOOp98803Y82aNYqUqAfPBwcUHCS8853vVH2zc+dO1Z7x8XHVHh6zTQTDeS7tpIjF9QJLUMsc9Dxo7LKyspRRptGiV0OyImh0+VyHxpojcho7xtHw0xByhE/Q4NI40tjSmNMY79q1C+fOnVNpSCYkAJIRSYIkxPI4sqcxpnGkh3LjjTcqT8rn8ylD7wZJ6JZbblF1EHyWY4w3y6OeZkECwWdhbNef/MmfKO+Ehp/TYayP05Csm/WSyNh2TnuxvTTkJEWWxWdCrI/PsThtxvrYFwQJjmTBdrBtb3rTm9TzM+pBMqQnRg+Q/URdTVuZlnqQlOihkoToBZoy6QERjLvtttuUHjxPrMfEkch4Llg/y+czRBPHdpOA2O88L9ya8klcTM9+4UCEBEdiZNkskyRvYXG9wBLUMgcN9YYNGxQh0MDRc3rhhRcUURA0downmJbGjUTEfRq5oqIiFUciomGmATYP8Pl8hQTAZyjvf//7lfzWb/2WGrXT4DIviYBgPSQxkhJ1oVE2RGTAKTXWSdALYxn0oGjwOV3FBQHu0T/Jhs+D6P3x+RaNMD1AgnXR+HPKi14cdSNRkIS4NZ4YF1YwL+uljoa4CZZ33333KU/PtI8eE58v0Suhx0eCoCdEMIx1mfpImBwM0EszYDsM6XPf1EfCYd+bqdbnnntOEQt1dZM9Qe+NnhDPJcG28rwQfPbGgQP7hVO5htRIxNSb5Zm0FhbLHZagljk4YqdhJykQNNr0ZIz3Qm+BzynoDXBajEaV3gRH9yQgel8ER/Nm6o8jcxIPCeZ1r3uderbz0EMPqak0PrMhSC5m9E6Q4Fg2H/yTEFgvn9XQoHL6jKD3ZcrgAgoaXupO74HG1r0SjyCJGq+OhMOFGFwUQXDxAKfduOiBz2Q49UaQXKkLp9P4zIbTkL/+67+uFmGQJEgmhgTpdTEvy+TiBXp+TPenf/qn+PrXv64Iim2i0Wfaz33uc/jGN76hFkVwQQNBYqBXRkJmv9PDoVdEMIx9zwEDPRv21Sc/+UmlG/WmLiROhpOwqQfLIOkZknvFK16RmG7kszbWx2lCergkfFMX+4qkxud6FhbXC3x/xSe2FssWNOC7d+9OjMz5bIUrwTiypuGj8aTRpPGikSV5mOdCnG4jWdFLIlEwD4mAZXElGPOQxOhZkUDoAbAuYwj5zISExBE7SYTkSGPO/LfffrsiC04XMh3DOfLndBzLefWrX62mycz0FfWhB8GyDbjPcoyXRm+Lhp7h1JV5SdA08vTquKXh5wo6rsCjjjTmXDxAUuIzHdZhpvjYLhIQy2A76CkyrVlcwr6gt8Q0rJtxJD96NmwTPTsSBOsm0bMfqO8mIW6SE70nEgnbt13Oww7xaukt8RzxOdgdd9yhvFCWs1baQg+PpMWBAvuSOrHfWDcHDayfZMs62R7qa4iPbWceeoAWFtcL7DLz6wQ8je6pndTj+cDkcecleaWWS7jDLrWehYRbVz4n4so9Lr0mCRBXUzfTF0RqPe5+MvtuXblPEnPDnYcwfU/hvjs9jwlTfmpZFhbXA+xVfZ3AGDWD1OP5wOQxW2NQ3UJDSHGHLSbc9XORwC0336I8nmuhm6kjXT3ucLPv7rd0hGLiCEM6PDbnwcB9XtLFUSwsrgdYD8pi2cBcqm6D7AafYXH60Cypv14w1y3KODfZub0uC4vlDktQFssGsxLUlVzBV2rHF/LuSacLy5/1FpUMzGPyxZx0lpyWDuypuCJYgrJYNnATlLLb6t+lccTF7MVi2hPxe5y9JHQLZ0N8Rp65084PM/szWTb3jFx5LdcvTP/oPuJfj/McJd3ZtbgYLEFZLBu4CYrQhiAO8znbi5mApNGYG4ttSEw70ukxU/uLt+VyoIlfDwKog1suF6Y8Yq5yki0ytV8MC9sHV9pOA62VHkDo8hai1JUHS1AWywa8VJXZcghK/TV2jB9GjzJgGWIp3YE0B+xf9ilfrSP7G/2uRE99ypYHrvR8sK2Ume+pJ4pdTl2x2LAEZbFsoAmKq9a8cuE6gT1T6N9/FIM9Q4gKQcWd5zDxhBXQ1sKkVyP5OZ7RMN21NiDxeAwxti3lVky0UZBsT9IXSUZzzxxxezktcOXzeuH1BRCVvqROXukvxrj1uVTEJbP+Z2rRZbqLvDzNTY7U0i4PqrR5FzOLtio4Bk/Ah8J1FSi/eSvAD6hIOHvA+FQWF4clKItlA16qHND79J0OnOnFsZ88jXJfDkpziuDx6a9izLz/eeAOSD1Og8W4I1JJM52KSi/5Y+LUsXOg2MPIRdqXFiafCE2CV9wnMr4MBpyKLhMzlE1uE/oSrNPZJS5Q3x1pYPIw8QUZrhHS6GVGEp4YxqMTaBzsxHR+ALsevAcoCcn1K4S/aPouP1iCslh+IEt1TOPUN36CsoJSZGXlwCejfa9cyTEx9FGv+BhiV6OxqCI1elyEMmX0vnjAq547s1z9Kq3evepwr0r0Od4h9aatM8/X/BLmE4lIWMTn9mikrYk2JHauAFoX7eto3YyJSGo5HzCPKUtpq/YJD810PKq2PPJIQ41XEZNzF5Vd7WnM3p5EiUnXckGQKPeioG5J/Uxr/aKPNyoDKTk5Ya9uX0dvD/pyI7j57fcDGfqTZBbzgyUoi2UBYwCUTRO+6XvsIPzNo8gvq8CU3PPTYgzCYsGHZeQ6EJnCZHQKXhIV88olHotrU+9T01UzL3n3kTFOaisR3M5MfZXgVOQTZg2Jj5jvC4qEEBBjRyIiKY1JG/vCExhBRNKLIVSKuQlqISD9I7o4PadCrqx4TT7ab/DALwScKd5ZiWxzI9I2GVhEpMLxgAdD0q7h8BTCkjpJUBRzVpK6pJ7DhYKuaa6y3VcEtxRqI1s5EVkIoDiQiTw5h8FwDP6oX1grgP2tx7D+VbegaPsaVYTOZXExWIKyWBZI3NBcCDEFnPj8D7Epd5X6rt64EFN3KIaG6BiGMn3wFOXCF/LDJ0ZdWVuBMXjamZr9klepJfpaGw/TPhKUf1rMeccgisbjKPdmKO+p0xfBec8EYsXZ8OZlwhvQnhY9Qt7BC3YbS3/xeRh1obBUVbI5mC+0eycbQ1D8K8cyTvCOTKKoewxrw3KOJGw8EEejZwo9OX6ECvPh5QeI5TxpD2smWKry6liYHEUX3HyxvPmXafo9KgMgXlr+aQnrGcbqST/WxWSAMcUGe1A/2I54RQ42PXCnnGSOs+xU33xgCcpiWcBxnMSVEBkBTn3pB9hUuk7d/CNi4I76x9BcLKPX7Rvgzc0UHotIepoBY27kr9gDzVezX/LKiC7QHeEu5mKmiIsIjDEPRX0oGI1i5Nh5ZPSMIs8fRGMgDN/u9YgLQU17o2oKk4bavGWjRvALAF2KQyYi7MHLL1m3h/9YhvahvMiKeJB5qgPBs+3ICAXQPj2Ckcp8ZO6Q9mXon0m58DzpHmSIIicRahZTV8b8wLy6lCRMDSZct/biZWqzKe1yPHND6pneIDLk3E3vPYv1Q0BFPChecBRdI/0YCkWx9V2vEBZLr4vFhbAEZbEskCAoLicfjuHkl7+PzRUbMS13eV8gikO+EWTcvgU5JfnwiUGIxSJiAPS6c206CDGQyqjPvOQvx1CoPFIMt6o02VmYG0kKisURktbGRyfR/OIJ+GUUnre+BpkbqjAZkw5gGxRBqT/KOC4cpLeE9HVrpKf4UVrV2EuEYRGXeEXfmGzpERZPedH64kmMToxjVLzDdTfdgFhOUOKlftUuSTSjWY4SstHBJpJnV/Y1o83MInByzQhPF5YEy0rGzJaWq0UZl9r31D3TI55h6zCGD5zDZm8OisWr6hztRV9wGje++9X6YWKiZIu5oIdfFhbLDMo4iCGZ9sYx4okgLMYtmh/ClHhOnngEATF1PhndUvzxqAi3cSV8dsMLn1saSyO0G/MVphfHbcY2NX6+YtJz6xX94p4YRr1hjBb40Z3nUVN7UwXSNs80PJLIL+32Sw/4nPawbT629wpF95ERXa7/gvDLFy9FBg1RTxRjGcBoSRbqvOMYLs7CdK6Qk3iG6qRIvBqScD4wIVxRyHB6xlFdlirXaX8sKued4VG1b4RhlIuF+YSI1TY1rSPqWAY+iTjRj3SrzoVIUC6oAJ95io6Tcj16SnIxWZCJUerOa03OlzK20jyNxI7FHLAEZbFswYEuB+okKr9ss2XkGlTUJV4FR+teoSmvH3GvT4lYdxUepdDMyTbiEhU+i1yQzslvylNmk1vnmPF8ymBEm+Y0YvI7W65C5NicrfBJPPNFRG+PtI1PbBhG94nmjaJH8EzN48v/p0szZSn61sK5xMsRlV/rRbCOmLSYq/fYKrbSz5MW1VOxXp5MRwftEym/yCW6BLPlyTetZj0mj65zpphw3asz0yX/EfI3jefnzs9pWHe4kaioYc4F0wTknHmdn0SJ0ruTPvHGzQo+U4bFxcCryMJi2YImK0LjJvc7Z6bUbR+jodBG0hCEMQjKoMhuTIRbpqQZ4XYuYRrmSaR18iYk5ZjVqa1gRpiIo0oiDUWbbCedQNUn9izs82I64EdUtu6ss8OUcKlw5VOKOMolar1UcUMKUgTEVsoZE5KKi6cUC3jFK4xiSjzeCMlLvA1NQMyhiShBSpKf4joDjpi/DmYczESqVgYXhM8sXsfLfiKdOzwNVNeJrhHRlYsnNMFxUYtch05ei/nBEpTFsgRveee2V7NCCsqAmQO9r42ISc2/EiZRaiaJ4oo1oqktKSZM5XOOWbz8V6LyOWWlxhtzqsA0PDABiQjmcR0ITJnUk1N/Kp8JdyQ9Zo+ZG5JvRtaZ+iwMtLfBLb0MvvPllS2XmivPMKGAomcRk5r9wClNSS8paei1J6O1NJqac+CGCnMSsERCHTth3Mw4Rylwl+nKlghPrU9BJaLuOkcijfOcTMOUZLYW6cCrwsJi+cExEEloYzAXvOKFhAJB+PkpHzl25/f7/AiIGGOWAOsRYZ7MYEi8NBpXCeZ0jhTAMgPBIAJ+nZdfuUiUwQrcskBIVxT1YtvMZ4nc8Pt8s8YpSFuof8AfSKjqTuf3c4oxbc75Q3fXjLLZT+5SzXGqkKQDQmYZ0gb2r9toUS++asC2zWbMeO74s/h6gYw2ekrkmD/rT5mzfc75VJzoTuaEm2ATlUziJDBwSDWJlHiLCzDbObWwWNpIvdfnAJOSfDrbO/DCc89jfGhYvTCqjBoNk4zg6+vq0NzYiGg0qt6tcY+qmXegrx/PP/cchgYG1So39bxB/o+PjWP/vn04+OJBRKanVXrGK6FBdMlCgyVyWpMyMTaG/S+8gLbmlhkETO/k9MmT2LtnD0YHh+CTOKML/3KxBSJRHD7wIs6cOqXSK5E49g3bd/LkKYxJ+VcCpYv014ztrKIpRHpNeU4hIdimunoc2rcf0alp52VrrR/P6Z5nn5Vz15QIN+A+z11TQyMOy/mZGp9QdSvCc87p3uf34PiRo4hJHyTr13IBJF8qp6TmccsFYOUWlwRLUBbLHHPf9DQUNMod7e34ype/jK62Nnzlc5/HxMioMlQ+r08MVR/+4//9Kx579FsqvSqRVl8MHo39kMR/+eEvoLujA5976CEM9A8oI++TUfnevXvR1taOF4Wkvv/Yd5ShI3mlLj9WkCI5iE6MxCkOLtV0MSv116Y8jke/+jV0trTim197BPWnzya8jk5p98ljxzHY04tPfey/MCwES8Nu8mcFQviB6H1K0jz+459gz/PPJ+Jo8El6f/vX/0eVsxAwNtrdfPe+2pM01JD/MsSrO7x3P37yvR/g7PGTco6+mSDRMTmHzz71FMKTU/jGV7+KM0Kkqm0i3Abl/NSdOSPn5TE01dfjm19/BLFwmDWA3xk8dvgQRgYH8aMffB8/+eEPVZsZZ8pIDDTSCNNQ1IlziSFfi4WBJSiL6x4Z/iCefPJJVFVU4B1vezsmRkeVp0FDEhdDVVNdg7e8+c0yqvap6TASmgGnxkg+WaGQysuY02dOK5IJRyO48yV34XWvey3uu/de1J09J3YvkrBXqZgtzC2XChrVnvZOtDY1481veCNu2LoN+4U0vWLAEY1hVVU13v7Wt+EV992HQSHWCfGIOD2pCEhkdGQY586cxQOvfS3uvP0OPCfeSET6REHyv/ZVr8a2zVuUl+k2vNyfr7jhbqPZ59YdrjNJfRIY9Prx3FNP45Zdu/Gm178B506fw/DgsOjvRX5WLt7+lrfi9a98Daorq1Q7kvXFkeEN4tmnn8L2LVvwlje+AV0ywOju6pQo/Z7cPS99Kd70ugexfu1aNDc1SF62MamJIpvZhPGOzETq00SLK4ElKItljtlNBWGMxcjgEEoKisTgeZEdyhRj3S/21zw218aXYkbRJmNApK+7FwU5OeA3DnKzczA8PKziaOiDMsLPycjEgX37UV1dDb8cE6o8luUSlp0QSUEhlLclu8q7UiHpwbhEvLNDMh0Xws3NylI3c2V5uSIrD9/rkTQh8SJaGhvxd3/zUdTW1KC8pESFMy09JE77BaSMPGlXcWEhJiYmpF1cbadUUnF+eguSxxhmA5NmLpkPZkurpiJFl7GhYVQUlwoh5SivcGxoRPSSgYToQwKbmBhHY109Nq5fn9CRwjaOCQHn5+YgMxCAXwh3sLdXOpzvUMVlQOLFtx77Bn72ox/hXiEr9c6Tk59555J0+mpIARYLBva1hcUyhjFHs4OxMTFKkXAEXjmY4rs3YrCC4hXxt49IEGrs7BTjF+OXGQhJiP4aXEDShSNRRVAsJyheFRdG+Hx+ZEjc/r37cPToUbxaPCmveGAZwZCaOtQfadEP4vkVBXokmZI3JPniUh4f7pPguEghUfksuIC8eCzC52X+UBDjU5Ny7MHE1BRy8vNU20y9tavX4Dd/8zfRK6R87uxZ1TZ6i2x3Vk42JiYnMR0O634QYbv8ohuh6iRRcCNCj1Lp7MBHr9M55j7rJBgWkn5gHiOzY/ZYkhTbOcH2yTG/bJGVmSl6BPSil1AGHnroYdRIG7eI9+j3BdRiCrU6UOX3Ymp6Wp0PGjsOMDJlQOGVY/b/3S+5Gw8++CCeefoZSe9BppRH70wtCZccoUCmlMFFIlxoIvkS+xlKN4J9Zj55ZLGwsARlcR1AmSK9mwZhMR633norTpw8jmf27sXIxBhq16/Fpz79KdTX12FwYADtHW3o7O5Gb1+veEzdePxnP8Xk+DgmwtO4cdcOtLa34bkXD6JXRuBbt27FV7/yFRw5cghHjxzBZz7zabzs3nswHQljdGwEP5O8I8NDQmZRMdRePPTZz+LUyRNivAN4+HOfw/Fjx8SwBvH5hx9W4WrhAilSLPDsrdBQRtoRgm/ZVFRVSUYv9u7fj2effx67du/G0888g8ceewxHDh3CvhdekLZMYGRkVK1WfPKJJ9SiAtaVn1+AnNxcPC/5nnr6aWzfvh3Hjh3Fww9/Xhn2ltZW9PX1obOzA2E5fmHPHpw+fUrpTDLaL3V+7WtfVft7JO6RR76uiOmZZ57GF7/0xcQngeYChwemPW7Q8JM0t27bhj1y3n7x9FPIFk+WC1k++clPqPZ86StfxsnTp3HHnXdhdHQMjY0NePqpp9RgZBpRac8NOHz4iBDQ04jJAKG8rAyf/MQncb7+PH7645+ip6dXTWmOjIygo7MTT/z8CWcg48X05BT+7d/+TS3EmJqYxH/918fQ3dWj+vI/P/af0iedjqaEaSW3F2uxxXxhCcpi2cP8lIaGGER1Vet1eDT80+Ep3HTLTbjlztvx/L4X8JoHH0BeYT5yc3OQJ9LU1IjBoUHkFeTi8NHDMmKP4/vf/y76hawi0TDWblyPl953D54Uo3//q16F6poalTczMwNNrc2o3bAOLe2teObZp5Xx/MlPfoxTp04qO0Xd8qWuQEZQfWAhpzAPniC/aBFHTl6Oo7vWk94d9y8Gdwo+agrKqP9d7/5lHDpyFJu2bMXNt92OYEYWMrOykZdfKCTcgCfFQ3jdAw8oL+O55/fgyaeekdwehMVov/uX34Ou3j7k5OTida97ABniYRQVFaop0Gefew4lZaVCSmfU1Ob58+fx+OOPK08yJpXTa8oVgmO7s7OzpYxs0S8uBByS4xzpv4giGoIeB38CxXge/MsY057E4gSmc7aT01N47etfj9LKCpw9X483vf1tyJQ6MqXOwdER9A0OYo0MNp569hkclz4fHR3Fd77zHfG4JpRneNfdd2PT5i04dOw43vj2t6t8eQUFiugCGRn4uZDZwPAI3vWe96qvQTzy6KPoHxyQtuvfEissLBCdtZlkO+m18ZjEHvDTW6SeMsBQFx1b5BaLK4X9WKzFsgDNuDIT/FjsUBSnv/wDbKrcgCEf0OyfQnNhHEV3beOMHQIR+hXa8LlNhfo5eDmg4TOXPX0XZTCdRNxvE6/h+eeeF4P+OjHWGZKIUz7Mo9N4/Zwe48+062c1iRVdgiEx4j/44Q/xyvvvR1FxcSJcPWfiVgw766AoIy9lcEqNK8M4wjd6RT2STuo5sv84xgYmsHP3diFV/m444yW/SpWEqYex9Aj4vhBXIPI5jVFcLZ8XoQHfuWsX1q1bx0g2OlGv2jrHicUiTj6PHLNtlVWV2C1eGtNERecAv3QhdRpDznDVpwK2iW1TZTJOiJnaZ3pC6Gxqx5Ezp5BZUoBdu26AT323jmVoj5LPicyKPb7HNDUdgc95JysifecGwzhld+TwIbS1teH++1+hwlgaPVPqxPOmW6lhnkF6SDqS9oyQ8FHxHt/0xjeqc2La4ZN4ttOA543lMJ+7PIKfNeI0YP60By3PHsLaUT/WhEPoGx7AYDCMLe+6T0YUTOnOmXo2LQycK9DCYumDBsMNdSz/eXtT+BxAGVIxEqlQOV2Gklum4/MNmhvmM1JQWITXyag9SzwCj3gIKg9H/mKQlFEyXg/rccqj8aeEMjPxpje9CQVFRTPKJJRBdOpN7rM88TlInilgKyjGyLphDrmlNhEpw+hAIqG+4gOAawr5C7UUtp/tfcUr70ft2lpml1j9ThfLUcL2SBrVNwx3xVHnu196N3bu3KnCCb7ES3JypydR8ViRMfvcgOQkh8nhA0ES0WlYbyJUDhiqiFt22A5PwKfaySlbcy6MsG0R8XrWrt+Ae+67T+rxKaKQAvR5lzQsg8IfRKTw13vVL/g6hEPP+AE573x+R0+J5bJvFBlKOnWtsI2S1hClagn1NkKd5S/Plxw6LdNInGPZJHvVYi5YgrJYfhAjwTtf3eLcKoOsbRGXDyvDqFPOgDEJc5kFGtiMzAwhmgwx9vpbalExqcrYSxw9HhKUMcYUQ0IUTnlxAQXDVXkuoUEjzL5JQ5h9juRVOhpO2aqfeBdrF1UMlUxPqBC2X4RGXP1UesqxEZKYWjwuZXJ6iyaZXxWncLqR6SnMmwpVq+Sj0c7Ly0u0g6De7mNC9QnPgYQb0aWwv3huWJ/0pwRr/Wj8mcYhJSW6reqv1KuJU+JJHDzfTlyqZGZlqoUTetpN102yJhHpvkz2ieoXEZZL7siQvJzejMqB5hLJL/nMFB6Fi2D4/huJncL/7vpV/8tW9T+3TngqJNpiHrAEZbEsMGOVlBhAYxDEauhFBhzkinCrRq8iZkl3Yl+SJ8QJTy9ilJSllnL5lbi4H76YD/449yWM8azakRnlOsIl3Nx6aMBJPizX+Wf2+SCeYsL5j1DlSj2syyd1UVd+B1undOIT6bS42+oWA9UckYSh5VaVzTbpdmmROlT7kv9YLyXOaS7VvxI6h7jTKAtNUf1AHflNPSmPYfS8hAm07tLWmK5ftYPqOm1Qx5KO+eYSlqn1ly3JSf0TT0fOn/pmhicg54Vfhddfhtdfh3c8LUlpthSjP+PdbTL9o/RMpGOc1tPoTobj1ky9EhJlcYngPWRhsSzBG56/6k7zM8UVdOI9Rfw+xPxexCQiLsJt1NlyJG2Ex+lFbIsvhog3KqNtKc87JTLuyITItAjj6AFomZ6HRIQ5o2K9IuKxULgfE8Up+ljqlO2UpJuQ9GEhEDHJCMowP1cMdzwcU182j4iO9KbcEmG5aSTsiNEhTGE9on/UEd2+SQmnTImwzdRR6+OWqI/lMs+liOlL5pXzJH0/QY9FTE9schqZXiELZbk9Urf52RHt3Sh9RQ+1ld4Ii08Slpx6Ox/hSr4p6dMJaf+EHE/KcVjKi8jfmOxLv8wmUidlytmmPZZ0Wh8t06LrpHP+6EEG5Zzxt8IMRQmVmR3JaelqPrAEZbEMoW9voRr4IxFkc0ppbAL9XQMyYiVdcXSb/Cdj5Hn8YyqdUuyLGl1neDOQ5c+SbQgBFaNf8OTCA/OyqwLtzsWEOsf5vTcaL72VHSVqlK08RAkTA03jHfIGMdHRj4KxGFZHMzDZ1ifRMp738R0fqZsj9LQiRbgkMW8nEudW0tDT8EUCyIjnIseXj0xfhv42odRv2kgKUfo4Qn0Zfumiy6Owbracv9GFyShinYNYFchBcHAcY139Uo30vdKD58B4J86+aMG/3HLPeJ9atBdkxMetyselIizD3T8xOWYJ+vx5OOXIvhfRU7cz230xYfmOxlKOKhEhOUfDrV3IHosiR0Y86huBIpwiVRWrvtUtspgbdhWfxbIAnwUpI8CHCMNxnPryD7CxuFZsdgwjoRhOBSZxJhRGxaa1KCkrUe8c8bt4ZuWeucjTXeyKLBTEWNF6yy2R4c1Fb9sYpgbDKMorQChbSEtEbLka0UfiEWX7lf13cs8FmqKYs+RaP5PR4LEWaR9f7vUFEBf3YbKjD8OnG1E1HUCBuFlnYyOY2liOirVVCAU4FUXjqOunuXMj2R5utZCbGENy88cCCEwG8eNHDyLDn4l1G1ahsCoH2aWSJCR9LV4ihwA0vnoZvNbf/L006JII4WVE/QGMDY0herIJed0japXkoPgfPaE48reuQ05hATIznJeEnboJd58RyTh3W7kRMlQkJ+QknTPSP6VenM7Ih3g4/DyVSiJwBgkO9B7/Xk4bdS7+fEhUBkzD7d2IH23C1mg2isP8EU0veseHMBAIY+M7XgYE+ASQeUxdl1fnSoAlKItlgQRBce5nAjj+0PewoWAVAkE/JvxR9Aej6AnG0Bodx3iGX62m44hZQbJwz21q9daIE+vhY20u0fYjG2XoPDuBn33zBfE0ipGVn4Oc8iBCJcNYtzMf2QVBpRPNb7JMN0y4jlMjaMfI8sG7WbasnpXIMZc18zt+sah4aJEYQoMTqPJkoljIKSieQG8wjibvFKYypYyQEJTkIdnQ+JN8dG1GG/7lYgTpM7oL0ib+Um9MRvhe8QUzPHnInlyLx796BKdebECWkESwKIycmgg231aJ8g05UmZECpUeTyGGywVL4TMs/qZTdHgc1VMe1EjbWP6UtKfDF0FzdALe3CwEMjPUS8/SRJWTg4ykmXK2ql2p4oDP1JQHnIcjz7Wgq3EapbVlCJV7UVaRjawsDhLkIvLoD8cqz5L5nY3eSRzMAEMNVK1OgCJ+kcDENDLHplCNTJTFQsgOe8Q79eF8XxtQmYN1D9zhEJQeACTrskgHS1AWywIJgqLLIOj/3hFEmwZQWlout3oMMV8ck+L9DGb60IlpvSzZEJSABkFnVWaFOwIpT4yZjH2V8JmPzxcSgyJGMlKKsW4fvvPFIxjp9iPmHcWUrxk331OMm15aikAWCY0Fsqy5DAzjdDw/t+MGXwbllBZBj4D7PrkdA0JShULEeeItZsiWuccCXgz6gVEh40kx5gSNojQy0Ro1aaUspuop7Slw2snLzwJliNfnx9j4FPq7xjHUkIWWI+K5NEwq0phAPTbeEsAdr14nJDwl2UjW1FdKukKSUj0gunB6LSj6ZUmbCqeiyOObsSRmOX994gn3i+M0KoMEj3i/0ThdHeqgGsFiHPCcqRJF2Hc8b9yy7dJ//gzZEzdQLWrJxfE9I3j8e2cw4ZvGRKgH+QXD2LWjAHfdxc8ihdU50O009Ziy5wd3LuEdFIpuRaJHSE6Rl8+gOOUc8OPFpuPY/rq7kLGhQpiMvx6sWuLknH99Kw2WoCyWBfRLsfqfWDSgJ47zX/8xijMLkJtXKOTjPFAXWxMRcXGTuv21CWAgPRez5Q0QlMMM8WLEa0AWBgen0dDYhfONPegfiGJ0OB99XTLS9nbgjruK8fJ7NyIjNCIGlNNg8zUs2nviEnT1kqfccmbfvNyq98XY0mCKavzpQL9YMY7K6fdwgQGnqLgsPE4jLrUrD0o9aGJ7JIF6/uaXcoISH0BEMg2PxNDVM47m1h7ZDqgvLQQCQZQWr0fAV4J9e44hGp7G3XdX4/Y7i5GRNSW6ieGOC8WLzoZANVjPpUERE/+R5KTdPObzroB4U/qrfRIufTEu7Z6UyAjPs1Tp83sQifL3tdgv0jq6U4qYvNJHfLbEwYSQkTdT+i6Aqek4xsam0d83hp6uIdkOybkLo683G11dQoKSdTrehy2bgFe+vAaV5SHJG3HIN9lGfUyZH0yPKMJh20T/ANsZ0bTJ7zaeb6rHVHkGtr71PjWFyidxUpHql0upayXCEpTFsgEvVZKCMickqcYR1P18DyLD0ygvLIaf01409HLPq8kulVDTiF6+LKZevAiKxyPmkQ+V/DkYH42ipaUT586eR2/3IDIy81C2ugbVGzejfyiEb33759i9swAvv6cCuYFJ+GNTUiZr0IYtMXXnbA3me2slU+k9lsxVeDSqqh0iemGGJi21JDsu7oYYZggZQbwGqGdXYqQnpoWI+tDW2oauji4x2tIfvlwUFBWjvLICxSXFsl8IT0YII9NTeOSRb2Hb1k24eUcJsoLdyBDvLODRL9/GpC71oVQhA82j+kXoVJh26hdRRWNXGj3daCBxTpvUqkZadJ5PqYPurW4XJ1j94m3ySw6SVtoU8ItVD2ZKW6W9wl7T4QiGh8fQ1zeInt5B2ZKQBhCZjiAUyEZeTjGKi4pQWFiGeHA1fvCzwxgZHsStN6/HnbcWIzezQ+ochf5yhVwfrF/AZ1cabp1nB1Pp9mihh663MlgSwh2dnkDf+DCyyguw8YG7gQIZgEid+irhX71nMTssQVksK/By1VM8AtqXkThiZ5vR0dCCydFx7aFIhPIsaAC5paGl4fNkqOcS/lgOersmcfR4AzqHJuDLyRaDFkJRvkdEtgVcDSFm1B8QY5iLzs5R8Th8yM4aE4M5oQiQtkxTX3rMfVul5uOxSU/zrQ0nPwukG6mpkKvE2IbMYL60IROTI0B3+wjqz3egbWAQ4aAPwdwM5Ir+hXkhhMQTyskKIT8/E8EQS52SUsQYk9yEAGJiUYeHRpGfmyvkPi51jCkjHQ6T1CQZn8lJ39Hbow7k3+QqNwNOJbqPZ2s3w5Pt0W0zadmZMnDwhBDwZiEo4hXPNjol+g1Mor2tF+0d/RgYHkdU2ujLDcEX8iMnJxNZ2QEhsBhCInlZAWlvpvpSe0A8MOo+HS/C4RNtKCrKwKrqbAT9k1LPhGgdEVW0J6p6X7Xr4jBnLtESVqNCklBxEp5VmIuyDasQXFcFZEiAXk/vwJRkMRcsQVksO3C6j6Nv3uJilzjvpYXWgscUWnRe2Uwk9ljt9wJ9R4ZwcN9RtLb0ICu/BGtvuAE33F6ErEqJ55wT01MIGlFjgUxZJo5IpHO2qWCey4Gpj2A7OHPH43GRDqD9TDfqzjagtbkTUxNAXkE58srLsGb7OqzZBIQKJZ04HDP0U/uTIlKYenHVBVMXt8JJ+z5/BLfdvR3YxLQSlsjvwKQ3cMepSGW6RRhBppONAffNMc+LAVXrk/FGyyQ627vQWN8kg4g+aV8UgWA2snMLUVZVg/K1NSirBXIKpPR8ycNzxurYJHrVqWD/sWxxMhPXhIHZT91eDExn2jyj7SmgTvSE/Vox/WSQSsyVycINS1AWyxK8atWMWqpxMcaDdoCGqwuoOzyBgy8eQ2trG/ILsrBVDPmNN21Ergxswe+v0pDodQcal2o/mN6IG7PdWYl0TGCMORUWYZwxpGPARAtw9kyvEFIjWpra1BRbvozM16yrxLrNNahelYdQsaSlsWc+IlUPwh2WTi+GCZdMHgH+4ve+jbtesx5v+v0dOi5dXneY2TceX8I7okJSqNHLQNo1Ku1qb5kQ6RKi7VDTdNORCDKy/NK+TKyuLUNVdYmQUgnyi8Swk2BIRiwrXd1uGB3dSJeOmC38SuBuPvtC9YcBK7walV6fsARlsQzAaS9zmXJr9jkmTbV+YmQHgKMHW3H4wEl0dwwhJ7tI/czEzpsqUVEtFz29CyElfjEcHv0gnmVxFZ82Hsky9TMUThsyPj30w26DpH6xBPloMB2f56gXSCVYmSqX+mFO2XVGcOZ0PerPNaCjvRuRaQ/Kymqwfu0mrK2tRmUNkFsk+ULCqZI3xvx82C918WkRt7o97DFjDE2bjJ5mq9uktGIWCf7mR8fx+KON8JQ14o8/9jrUbND9pPuBK+ak1ESTdMtdTUgWTUi6yWFx+jqmZXDQgeaWNnR19mGgfwTxiA85mXlYVbkaVeIZVVbmCxlByEhq4UICngo5R1SL/kdcRhAecFm4bpUG94ywLWaf6fWW/UHRx1pTrb6O19D5Es2aAzplEu59ghOp5rzr88ErVPeTRmoJFnPBEpTFMgFNY9L48ibnSjWDeK+Q0v4RHD58HA1NjcgvzsP2Gzdi2461qFodgJeeEu2C+2o3diLVXlzOHZHIIzskPtlqfZPQD+MpAkaNAi1n40JITeIddaCluRX+kEf0LUHNmjKs3VSNCjHc2bmSVk0XiZh6XLozKEl0PHJLKpjRyczpS/WMTus0egz4u9/fi5GufPTGj+C1/6MW7/mt28U7kzQqHZVwYOo3CEt0P9B8fgqtTZ3oFCLq6OrBwNAQvAEvSsryUVKej1W1VTJIKEFxcQaysqUYekWsnizkdI2CUdPdhBl1mva5A51j1S4dkkziLsgEmkQLCZbtrsvA1HU16rx+YQnKYgnBXIrJm1z/dcagPHDd3/SUTh7txuH9x9Hd2oPMQI6Q0g4hpVWokpG/mr5zDLf+gpu2gh6x9mZUm9ZcGDUSkIAZCU0Cbs2+U1rcqdAEJ4gDmO4Dms4Po04IqbWxFUMD4l7EQygurcaWTdtQuyZbeUgoEDFTdqpYshnf7NLPMugN0BPTbTGKmQoN3LoZmDAKy+SWZQpLxEL47r+dwy++2QzPeBnC2V0IVHbgj/7Pe1C2WZK4MQ4MdIlnJB5efV0jutp7MTw0gumJaYR82SgpqsDqNetRUVUupAQUlUoThIwUybJdCSJi/fzKHhcrEMY/YsOZiGfJ0VeRjtNWBvGPcuVM+wlz7ApTaQUzptnSpFswsGx3XQZXq77rG5agLJYgaDwpBI1V0srHBoHjR0exd89RNDa2oaAgiF271+HGXWIQq4WRQpKIdsAYdxdMicSc5uJid4TK6NaRYKDLBYiI6RWPokU8itMn6kXXDvT3jyCYGRQSKsOGjVWoXVcqBjyAAI23cQaNUrMqdxHMpbsy0kZvEpPZl8qjIfVcKCTk850v9+LWl5UgW8glK1/4aBKor+9HW3MXerq7MTwyiKmpKeTm5aBMvKLVayuEjIqFiPKRkyd+rXmuR7jbw6pEhQTXJE/rpSHRRtlJkJYTaI7nBNPOJ93lgGUnFHSB9V2tOq9fWIKyWAToS04/V9KiPQH3HI8L4mgcO9iJA/sOorVNPKWsAmzbfhN2iqdUvVri80SUseMyav5yEw/0Fwa0p5EEazNmImkuzC0w29YxLsY7IkxBphAhpBHxKpqbunH6dAOaG9owMjiOPNG1pnI1Nm26AatrvcinN2H0dSTuUT+fJ0UZf8gU7BZWqbczn5a4j0wKNyTEnTxVb4NpYEL0H2oHHvvq8/BnxjEWHRByEsby+JCTI0RUs1Y8oxpUVIZQIu0Qh1WvjmMXO2WSfGLeadU0E6i14l935aa1lNlg0jtwt2M2zFXcNQGVdCmaaMLF2mqRDpagLBYN+hkNCSUmBo0mLflMaXoIOCWe0uEXjqKzuQMZmR5sEy9p2+71KF+dAz9H6alQU2EG6QxCwlrMAmNcKM5wX6Uni4gVdmcVgz7YBjTVj+Hs6To01DdiOjyF7LwsVK+pwsYta1C1qhglJWK/uSiDzTPFGSFkq4laHyTpJmnOLwamYx+qBQwipGUNbjVVJCCJo+KF9vUCXR3TaJG+5ZLu6YkIwmNh5ASK0S6e0m33bMOG3eUorcxCYbHTBvMczI35KOiCtjamjZJ9Xh6Pg2S22XGJ+iw89Nm4EFRs0ZVbdrAEZXEVoS+tmZ4Swb9i8M2V59y3cTGcR4+249CRE6iva0NuVjF2bL4Rt+2uRdEqSWCmwrz0OOgtkURoMWk5+TzG7S25DYKpX5OOrjZZOddZaa9L9k0wt+4iJoH+DohereqLE22tvZgYi6K8pBKVZZXYsH4d1m7wIJPv5lBPqqKLFK9CEzHJQxfPPS0XIqmX+aupiluTnlsp3CQ14LGbQKTa6QGgo3MEbS2daBPvs69nHO2t/fD7gsjPz0dxURnKSsuwcX2peHrSveLdffuhZrzstatRzOd4rEp5SNLn6hNLWiMDvb5Pe6rJGE4fmqNk2itGanvdRTNuAau6fFCRVEWJBe6LFQJLUBZXFfriShpoDbKMc7MOAycPT+KAeErNTW3IyvVj641rsOPmzSivDoFfuUk4Vswu2dyLtJLGYD43PwmC6bW3wWO9CFhZYCZIYgLoaQAazvbizOkGdHb1YFI8pMKiHKzbUC1kVI2a2ny15NvdHLV1VFL06SYMF3Tts4HaaTJlcUkqk79sPPXVAUkIX0/0AW3NMTQ2tKoFDO3tneo7e4GAF3kF2aioKhGdq1BUVoDishCycoTaFeGLsDwqJNtH/+sY7nr5jaja4gSZulLrJNKFqVxE2sgVgHRndqX2xZXBEpTFAkBfQsbom2M92hZjaq4w5x6NDAkpHe/CwQMn0drYhSyuvtu2G7t2VaN8jSSgB0KjKVnV756qfNrLMe89GZuahK49WZk28Bomn+RgUDI4gegY0NgYw/m6ZjTWNaK3vQuxiTCqRaHaNRuxZkMFqmuBoPl6geE0bpXHwDd9WaemE+1ZsFbWbTQ1FRPUl8cmjqm4r5WasejMJDEYB3q7IATUg4bzjejs6MZA7wimxmLIzc5XX3gvK69CdXUV1qwGcqkzp+gMGVGU/m4vlFu9zO5rH9uD2+/ZhrXb8yRUe0PqO3X6RMxEmiALi4WCJSiLBYIxuDRo+pLyKEvugJ7SkTEcPngCdfWNajXb1hs2YtfN67GqNgNePmw3nEJ7mbTV+rmFYwjNxWrs4kz7aHSgGMIgQRjL7IDRQpItjcDpM0JIjR1ob+tQEdWryrB2XSXWry8VI5+NrALJl+4LBgYu3dwwWhA6Wutm9qmbeYGUkvTkXAhLK6TfutqBpsZe8Yq60NXVhcGBQUSiEeTkZqO8qhTVNWWidwlKynLEUxLuMQRKpBQ5H3zjv17ErS/ditrtWTO6TamdinR9YmGxQLAEZXEJMJcKt0YIWimxhDx0GaxpMa7nTvTi4J4j6GzvlfgAbrhhl5DSOlRy9Z1Zjqx+2Cgs5tp54z/On+72z7B9TEG6IRiuiYj7XGBByF/3CJ/RLuM6KdW3NU3izIkzqD9bj8mJKMJhD8oqV2HLthuwelU2aqolC1fYUSdVlJCtV1hC9GNxUUUi5mkLC3db/6SGenrOTMwlRf1lMu6kQqoZ7eeXJEaFMNvRIJ4cv7jAxQv8hd3MzGwhzvWorV0rRBQQLwnqV2KVCpQZCxhIznqgYCYztX+mddAKmIwabrVIULfcvQVrb8jW06kqYjbFLSyuHixBWVwGeMnQAGoPRVtHjaiQ0qnj4zj44in1LCQj6MHWLaux66aNqFqdDZ8hJWPvjM3jdrYrUeIYpc0+RXsjDDX+xwyyENUig0Bjvehy4hxamloxPDiCYMCP0ooCrNtUjfWbV6O4THwrszrNDaOT0Ue2NNT8GQUSjOYBCdDKiDCh6Q+dySwccLFGAlHx3oZImM3jaGhoQ5t4RkNDw4iEpxEMBVFcWojyyiLxiopRI54RvyTB50WqONaXqu+8YRpEsKAkGGNCZhBUQn3d1xom5cwyLCwWGpagLNLAXBJJIiASRJBqp8aA08eHsXfvQdTXtSA7qxBbtuzArl2rsUo8JeWV0NApQz7l5GOANuDa00hFar1OmtS6CXEYhjuAhvpu1J2rw/m6RkxNepGbU4qK8krRZRPWrPajoETS0tAbAy8qxNQPAE47xfGv8Tmom/hKYqF1/QzltJyjDy23+70oQrVR7yoIZ00KGbW3j6GzrRutTe3iSbZjoK8fGf58Icx8lJTyhd0NqKnJRrHol8cvkZPEWZYRVSm/JMHvBpoVh6bX+FcnSvaTjrkUsMUm16MfP4ib7tqIdTfm6mZKmH6qRrhPwOXVZWExX1iCspgDnHTTngHNtPodIWdJV1gM75njwJF9x9Ha0IJgthdrNlfgxt2bsXZ9hn5Pydgut01LBxYtm2Qy7YnwmFm04aURdiB2ur8NaGkYQ93JBuUhjY6PITsvE6tqS7BhczWqV1eIh+SDl6sATVZdWLIyp179/pSxxKyXU2QGyUzagxBtaLVTl+dNAiM9EAKKoqO1R61I7O7sxsTkBN9zRUEepxDLUCUeUWVNEUrKshEiWboe0yk9Hb0SYuDenxdUy5ytyTx7Ie5U3/zEIey6fT3W78pTnqOOc3tQbjDX7OVaWFwJLEGtaOhTr5+Z6D1Cr3hzDDbF2OIR8ZRODuHFF0/izOlmBH252LphG267qRarNkm88ZSUk0VPhD+LrgP0IgBj4oyxS/UCBKl1EsIXgy1S94mzqBPvqKW1C5NjEVSWi4dWvQ4b1q/F6nVAJr0PGnyuiUhMg5Fs+N4Ui2WhukZNfzzSZOhRH0INkJ9UqIJOokGHimBytYoujs4OIaKWTnTIlsu6x4YnUFIoXltZNcpLK1BdVaW+rVdAvbgIxPwmEYtXTRbyj4t+qjpHIzUAUJEJUdGXDJZmziuRbHs6sHY9ENAEtfM26dfd+S6CMh5UKhg6e7kWFlcCS1ArHsaQcRtHNBaFTy2pczAMNJyIYr94SvV1zWL4o9i2Yz2271yPNeuy9HfXjPGWItTFJPZKfV07xW7pC83Up0UnYQGuBytChD3twLkzPWgQQurp6sH01BSyczKwfuMq8ZBqUFqZj/wyyU0yMvXMtnWgjbDeuvXQZMmCTEMc8FmWWrgAtLcNiVfUopZ0jwyPIBqNI5QZEk+tHBWrilFZXSBeUS4KC7zw8rlWSt0K7jCzz9vvUr6mMG+YNlJYvpH0MKmIb33yMLbdtBpbbi5KM8VHzF2WhcVCwRLUioA+xUnDrI/1iFm5O1pIKoR4CMeP9WH/C0fQUteBTE82tmzZiRt3rxVSknxcPZaw55NiusKy5Rs/hi2cctVW16X2TT0micEY0NY4iTMnG3DmTCN6uwcRi3rEQ6rCxo1bsG5dGaqrhcLMlBiLpvj1yj/zfQPfDG9Dex+ESwMNo4MbU8Bon5BRxwgazrcIIXWir2dAyGgC3ngApSXlqKpeJXrUoKoyhNJyKZ3va3EKkdyaqI5Tony/yF2J2ZpEquNcYHyqQlcK09kGc9fh9qBIUFt2VWPrLaVKVd0Sxps2LbSuFhbpYQlqxYAmiKeaBlR7EknLKhCv5fyJCPbvFVJq7UAkHhFPZS123rQBa2qzEaSnZJ6XsBhmM3ZKtvylVw3xSlQ4vROBBHs9JC4HEhwfADpbIYTUhPPnW9HTN4CIeG6VVWVYt6EKtevLxSPJ0M9oTD2mLlONHKtV5RLvChKQLDmtp4nA43F5ZgZCiAM9QHPDGFpb2tEpZDQ4OIzp6TB8fg8Ki/Oxek0FVq0uQ3FpDgoKgwhx+jIdr6Ru1T7bTq1MBJGa+FrA9Awxd716AYi+Fr7zmaPYsL0C228vc3lQ5pq5lvpbrHRYgrquoE/lTE+JBsVZiWYODcRTOntyCAf3HUdTXRs8MR82b9qOm2/eguq1kpSf8RGjHIvFxFBNwufhRI8Za7Mg+atYgsNs/XbQjDpo78RWTwgZdHeM4eTRszh7qh4TY9Pwe4OoVO8g7UDlqkz1u0FBPqsxfMJtfBoxj4gynhQWLGQXN24U65e/rJOcwCBtYzUknL8Z1d05LjKA+nPNavFCZCqKiPBYpjBgdfVqrF+3AeUVmerdIv68hPrqAstiuZrnpBLJoBZT6HbrSIo51pXrEKZLhUm3NKF/k4ltAL730Ams3VKMG+6skEFA0rdautpbXK+wBHVdQk98adPCiS9teBSElM4dj+LoodPqK9xej3hKG6ux65bNqN1YCB+9FsLY3AS0QdYv07Jc/UxCj7pdHpI4aOFeoP7sBOrOtqCpoQVDA0PIyswQMijFmvVlqN1QgeKKEHz0SliIuQKNBUxs6Qnpr51rCAEoQvTrT++YdMSENG0Q6GiNokW8In4ZoqOjC+NjEwgE/CgsyEN5eRFqakpRtaoMhSUh/WFXdxtZniE4d9nu/dkwI026W2o+hSwe3AT1/c+dRO3mItxwlxCU+mcJymJxYAlqWcKcMm0+zLE2I2JkTLSxKFMkpSHs33MQdWdaEPDnYuuWnbjppnXqPSX1TEn90B+JgO/a6GlAba1JCuKBcYk5CzTBpmyusOsEzte1CyE1oqm+CRMjEygqrEJV5VpsWC/EJ95YnngnyS9HUKQuz6Rk5/s9NIH08vjeURBerqhzt8GQhoGQbGdHHB3tPWhtbkNzI0lwFNGIBxnBbKm7GNU1q7B6zSpUVHhQKJ6gh+s+2ASjuyk3LpSrXDCKaGHapeBu7IyI6w6pHhQJ6saXVEq4JSiLxYMlqGUNejOzPFMSj6L+BHDkxeNCHOcQjk1h3cZV2HXTVmwU46MWHBjDz8xmX3krbnZIMU1SLn/Urr5uAA314iE1t4uXMomcXPGQ1pRh3fpyrKnlL8UGNSFRPeOlmDo0F4A/38BdLv92JiFnQrgyPAz0dAMtTb3iHXWrZ0ZDwyMqOic3B6XlhSivKFC/6srnRbnilQVIRizM1R0XwFTm6LLSkUpQqzcVYudLquwUn8WiwhLUkoY+NUkvyZwqGoz0nlL9iUEcPHBCPKU2+D3Z2Lhug5CSeDF8Tymx6IDmaBpecRf0Cmf+4bMd2VDcRl3KHBRCOn2iUX3pu7WxA2Mjk6iuWoU1a9di7Ya1qF4FZBdLWrOijfCGEVUExMIc70ugolP1JsLA2ADQeK5fvKIu9HUPoKerD719AwiEMlEj9ZUUlaCK7xZVZqlVdEG2J/XdItpYTg2q94sYwEiTgHA3TqviVmOlInWKr3p9Hna/tMZFUIamLCyuHSxBLQPoZzA0INzSSLhcA/Eymk8Bh/adFE+pDpNTI9iyfQNu2LkV6zblqR+gMzY6LtljcrY9nhg5SrYMlDjj4RD8enaLEEVdH86eakBPT7+UOYWCwixs3ERSKkNFVQHyqiUt1aA6xm4Za6+mCimEYawk4lJHfw/Q3hpBw/lm9axoaLBffYuOX2goLilGbW2lkFERyioK1YuuIdMOt400dRsh1NZc0gmF1BFhYtxIxq5cuAnqBw+fQmVtNm66Z7X1oCwWFZaglgT0KUj1lLRZcDwlisNJnGY7e2YU+/ie0vl2eMIebFy/Cbtv2oF1GyXeGHOHeLSRkW1cL2lIfKyA4Lfi2iI4dvQU6s41YaBPAiIerK5ai/Xidq1ZX4xyIaOAefeJBSgRAqKnkvBS3CznQLwifouuvyeC+vpmNDd3oKe7D6Mj44iIZ1ZcUIby8kqUV1RgzZpCISMgM1uKNL9dpLtB6yr1RT1CYOKVJeC8Dax/Lt40iL2W3JfG660TZLrSwIld8UglqIo1Wbj53jWWoCwWFZaglhCMp2RMqDYYjuEfBVrOAvv3nsTZc+fFcESwdkOVkNJ6rN9UDL9ZETeXJZFip/uEkPjZoGN1aG1qQ58Qks/vQ25BCJu2rlLvIZVX5iKD5bFq2n6qY8o1+wx3Q7hqXLyiLv52UUM7Wpvb0dvTi5GREfgDPvXbRVWrKtQ38iqqi1BSGkBOgeQjEbHZRufUukTIMWpXpRFzyUtW9j16ftLZOolTYQlqXphJUCdRuVY8qJeRoHR/2X6yWAxYgrqm0J5M0kzqo1k9JfEyGs6M4dC+ozh9sgHxWBDr1m7CbbftEHKSZHxvyKRNJQwH/BnwpvoxnDtTL3IOI8OjyAhmoqpilfK6atcVK89FfRXB7SEZcD/FMvHnIgbEK+rpGkBjQxvOnWtWCyViQjRBfyaKxDNas3otateUoaRMHDohIo8pn2WxTDXzRzIOIxZ3nlWpl2r1O08mIacluR932FJ/zUBSO+REzElQqpxkWncqE5qMXblwE9QPv3AS5avoQdVagrJYVFiCusZIdjaNM40xrbU2DArT4uGIp3Rg/3mcOH4a0eiEkEgZdty8Eeu3ViDbeDZuK2ssR1iydwPnz4dRV98i2xb09w0gJ0cIaVUJalYXY+OWKpSWBZNfGzdkZKyQSxX1QYYRvlsk3ltzHzrbe9QHUjs7utQCi6KSfBQU5YpHVIw1tRUoK88RT0zycLEEy3FbNO6zDlOPA+MVzUxsEjqTS5KIezotUzIuicshqJmhFm6C+smXzqCwMoDbXr7OEpTFosIS1FWDMYVGCN7iYgRMkCEH8ZSa66ax94UDqDstnlLEi3XrtmPHjh1YJ55SBlfIGe/GbSWEQPjDd+fOtqPuVAOazrVgcjKMHGGxNWs2YtOWbVhbiyRpML+bD93rF4QvR3s0EXW09aLxfBP6uvswPRlTXlF+bpH66sJq8YxKSj1CRlKMeS5FnYxuaktm096iIRPjO+rKzQu3+r0jfQm6G8ZjLYagVEmJJLosM8WnYfKkQuc1cKdw517pSCWoggo/bn/FektQFosKS1BXFexazlE5XcwPm6kH+wKS0mnxlPbV43x9M8Yn+lG7oRQ37lyFzVurkVUQTBKKsQxjQkgdQP25EdTXNaC1rUX9DlJeQTaqV5dg7foKrF5bjrzCDISMh2TEqVZ9qm5UyukDWpvHRDqUV9Tb14/RsXFkZGWgtLwEFZWFqKoplnJLkV8EBM0XuklqpixjuczWwL1/RUgt+GJw+nkGFkyZ6xpugvrpl88ir8yDO+7faAnKYlFhCeqKoLvOWSPnCJ+TGAsuID+Zw2khhbNjOPDCEdSfFW9nwoPa2htw663b1Oo7Pz0dEoGBkFhPxxjOnmpCY10r2ho7MTEeR0VFLVbVrMbatWtQu9aLTH4zj54MP1GX4hVNCRl1tA7hvHhXvZ3D6OkawtDgOGKRCAryclFRXo1V1bUor8pHaQWQzeda9LYI6i2iPhjqmZSrZVo8Hhoxw1LaZBlfyYTpUP414RZLHW6C+tlXziG7KIa7Xr1ZzjvPrl3FZ7E4sAS1ANAGmkzEZ0pivDnsNHezeCx8pnT4wHmcOHYSk5OjqF1Xidtu24b14jH5zfQdwSXfTRAy6ceZUycwONiLqfCU+ro2fwNp1ZpSVNSUoaBEiEB9mkhnU5jQU3Q9XUKCLd04d7Yeff39CIfDagotmJmBdetrUS1llFfkoaw8iOxcyWem6IwYuPcTMJdK2kiLZQz318w1QUWFoLZYgrJYVFiCmhfYRZqGkkZaQ93U5uGIuYMjQjQNYex74RDOHm9CdDKI9Wu2Y/euDdi4VeKdr4Rzqq2zvR/HT9eprzR0dQhDRXNRXbEZ61avw8Z1sr9a0hkS0wNcxMel/M5xtLW1o6WpXfL1Yrh/AtPi5GSF8lBZXoOS4lKsWlWDyiqggN+ic6/SYzlKV7OknU+K9GdgdRMYZryhRGKBMVUrGTPP//WCpAflEYI6i6ziKF7yqq1yaesVljzr18+ZX+nX8PKBJahLgiEpx0xr264h5NB5Hjh08AyOHz+DyYkJ1NbW4PbbdmHTNmGHLPGvBoCOZuDs6R6cqz+FgeEu+EJeFBblYuOGVdiwcQ0qq7P0x1tpK6ZFhLN6O/i8aApNjQ3o7OpC39Cg8EcMmdkh8a7ysG7DapRXFYlnlKOWdHvNFB3hkFrae1JGx+kjZsOlprdYLkj1oDIKwnjpa7fZ022xqLAENQO6K5JTdsmu0TevWHu3jQ4LeTQAh/efwvEjdZiaADZt3IGbb14DcYAwLcctbUJI9YdR31CH4ZEJhHz5qCiuxdZtO7B+E5BfJuVwQYN4XeF+oLl9EK2dbTjfIB5VZz/CUzEEfEEU5BWjunIVamrWoGZVBopLhIg4RUdd+EjIEBHV9JovhBO6HbolDDGfZTWN0EYpeTw7zKUyc/Xc9Qy21whh+pG4vnxJTVC8iLzKgwrkjuPe1++UZnJFprlGljPSnUOC55Htc98TFksFlqBmAac89DMlXrSulQcSTFI68Hwdzp1oEhKKYP369dh2wwYUFAN9XPZ95jyam9rR2zsgYTlYtaYMm7etRe36LP31BMG4kFFdPcQr6lJfXOhsb8Hk+JB4RLkoKs1DaXkBqleXo7KmCPnFPgQ5RZfOTpj7ijxE0MbMcp+ZE52MZsgsidNg5REUwTabzjXtvh4M9ux4+tEWBHPGccdrNjsh1xPMgI3ES/CHNt0riyyWElYoQZlRlDE+biNED4OekhghY4/kWu4RMnlx/3k0nm3F1EgMNWUbsaqyWn18tbOnH+095zE23Yec7Bysq92IDbVlqKoAMsQ76h0Uz6htCm2d59DScR7DowNSmx+BoJBXzVqsrl6DyvJ8lJUCOVxFZ77QnZAIYvwAq1miLqBqM8bwzmnUfpN++0jHOmmEVEyYK5dFWrCn9ABF9xYNmKvXxoEBGWAM9o9hbGwc0agxdhr8+fvldlvx6vDEfcjyleO5Jw8hkDGJm27je1BiymMSywv9CuGZ+WNbC4Z0uiX7Pw6vz4NAwIeMjACy87JRUh6Cj7MPLnBAGlefLRF/Sq1UtXfJUsCK9qB0w/lwmBemISjnAY5EDoqndHR/H86f6lSLGbweP3JzMuATsoiEo5iajqKopBir1tagalUWQuLl8KfEO9rG0HK+BSMDQxgdHxWuiylPqnpViaQtRmF5ForKM1R6H8nIwNhDYsaWmsrOxe4ZnkpJk+6EqjAhKRZxsWIsCJ4M9pqeOw0PAMcPjuDk0fMYHZ7C2OgEgkG5Fnw+MYDJgYNBLBpbViSlCIr/4iHEwjJE88fFqMfVACzitOVyScoQ00J73u7+devG8xHwm6Wxci5iUWnDtGzDcs9OIRDyITs/hDUbK7H7jtUorHLS8SdapA80QVksBawwgqLnpA2PvlXkQpTRYWLGZlpIqRk4dWQEh/aeRd3JDkwMeVCUW4ni4lJkZgaRX+hHaXk2cgsyEPXG0T/cg4HhbvQMtGAyMoxgKIjiEiGiyirU1qxGWUVAveia+C0mihmQq3o5UtcjcE2RRj9JwF+yNQndL/nOBoeglLeUelpdxiG5Z5GE6S9nlOB40BNCTM8+cU6uiRYEUIiaqg0oys9FUOyfVy4fL5Ol6dDldle5m0Bbz2O/E8i2XEl73P2zkBzl1sm9P9v5MEkmeZ+PTKG5ow4DY62o3ViE+19zK4r5EzJMlPhiPu+3BVTY4pKx4ggqKc4oSbhhQkjp6IEw9jx1BvWnOzE6NIns7CwhmgIUFOairKwYGUJOQ8P9GBnvk+t2CuF4GP4Mr/rNog2bKlFSIekrgsg2L9vyuk7tWXO9u8PTXv+pGQl7o1xTyCk48EwET/z4aRTnFWLNmrXqF3y9Pv8VG+yliuv5CjOnK/m5LI3IVAzNjU1o7TiLW+/cjPteWwuPmtXgQOUiA0KLq44VRFBsplydHByJTLaKp7R/FIf216PudC/GBoII+UoQ8GfBKx5ObrEP8eAoPMExBLPC4jllorSiABVVpahZI2RUBgQ4j21emOW1rK5n1jMpvMf3R7g+iJGJCRRnn0n1sy4dZrG44BkRg6S8Jg9i48A3HzmPhnMj2Ll1J4pk0EHDRo9ZPQtUYJ7rC8kWXYdtUy8cm3bJXSfn2hv3wyenMyDndkLO+dETBxHMHcE7f+Ue5JSwF/S51qv8LBYDK46g+s4DX/rYPjQe68No9zQikRgCmfy9okwhIXpBeSgsz0bV2iJUrC5AWSVQzBdluYrOLPYxW11kGvCVV31xG2K6sJN1uN6zWHTwNuDc0Bjwhc+ew4h40du3b0Mo5FM/I6KmjTR/XZfeUxJs3PXXwJkEpaHuP2caXT0mk/N89swJDAx24Jd/834U17AnnOl2i0XByiAo1UIShhfRHuBbD/fgxL5mVBavwqraMpSvBqpESoSMAlxFRzLiDKAZOCnjFEU4Pi0Gin5P0vPhBcxvMGiPKDnSSt4OfB6kdhLQD4u1mD2LxYJcF/SciCngi586jfB4MbZsLVWedFTOnfpylYhZhGYJavkhHUGxnXHHI6Y35Y351MdWzpxuQ8/oCfzKB1+FwlIVLdD2w+LaYsURlNqXUfKUuPTqi9/8wKphCDdTmH2m53UpxzRU+gf0klBh8s8r6ZiM+x6fu6AkUjvajsyWAMxJkVPxvS+2ovF8D3bcuBsBXhe8XJx4EpQaVwgsQS0/pCeoJHhqPXIzq2l3OTh+/BB8gTH86vvvdp5J8dmAXd13rbEyhgTKsPCPXGTiCSE3gmD5FKK5E4iExhEOjiISlP3AFGKBMOKBCOAXJvLLBW2+gSc9pUbRXLnlFgnnj/fxXQsOsT1qyTHrulDc/3ivrJjZ1SUJngBnV07H6b3AqaP92HnDbmSIQfJwxMEEYtg8IgRPlz1l1zHkPPMZI73mndt2Y6gjgOeeaFFRnOizuPZYGQRFqCEwmyusIu68ByHZy4QfWcJBObLlfkg6JCBxfMgkaektXeTdD162iU6c7zUsRXK0ZrGYcM4rN+JNP/HT57Bl83oExXPSJJQ878ZL1mErQa4/KO+Iz5tmFZ5kbnkbc1AiJLXjFjzz1AH0dfFdSXOXX5/9s1QxL4K6fkb6DimoEfHFRCAeUVRYil8GWBC4imbPp764aD2qawlnrlbOw8GnhaEimSgty9ZkpN6DcU6WXCuKnOZ1zVwPcv2Cd9tsYv5yKpD9wIUxRUU+5GfX4JmfH3d1zfXdR0sN8yKodG+A05guX4PqvjRnim6SB01NTTh06BAmJyaFpxh3dcHfbYpEZn4yx+Jqwjmnwk0v7j2FtWs3aOda7ghO59gpnZUJM7PB9/f549ObN9+Ac8c7MdanggWWoK4lZiWoI0eO4F/+5V/w9a9/HSMjI07o9Q+S8dNPP42//Mu/xI9//GOMjo5eEyL+6Ec/imeffVbVFePwzeIqgy4s0NkATI4GUFiQJ0ZJBl2Jrwg4MKfe2qUVBUVQcs4LCzIRiJXg1LEuuQZ4Edh781oiLUF973vfwx/+4R+io6MDjz76KP74j/8YQ0NDTqxGOq/qesEXv/hFvPWtb8Uf/dEfoby8fOHaaopxFcdrfmpqCocPH0Zpaamqy8uVFxZXCQ7TOCPlU2c6kFtYoJaU68UrM881jzjrMzPU4vqHfupIVFRW4ezps3IRmKuAMSbW4moirSV86KGH8Gu/9mv4x3/8R3zqU5/Ce97zHoRC+iOqnZ2d+NGPfqQMqttwk8B+9rOf4fTp08rrmJiYUOG9vb2YnJxU+5zC4rHxEGiY6a1QTBqGMX9fX58q79y5cyrcoL29HT/5yU+Uhzc9zV/0kxGw5DXlmDDixIkT+OlPf4rW1lYnRMN4ROfPn1dlNTTIMNpBXV0dmpubkZOTo6bdDFgu29TT05PQy5QzODiYaC/D2EZumce05cknnkRLk+ghXXb6xGk88+QzmBqfUtc8pxOzs7MVMbFs6mXKItra2pQ3x74lGD48PKzK53k4deqU6tsDBw6o/KzP5LVIhdMvzoY/j18igxDDS8krOol0YRbXNww9cda9rLQSvd0yQB+XMM4BKy/K3l/XAr6/Ejj7CXzzm99UxvnlL385MjMzUVNTA7/frwjgv/7rv1Tc1772NeTm5mLDhg04fvw4WMzY2Jgijs985jNYs2YNVq9ejd/5nd/Bxo0blSdy7Ngx/Ou//ite8YpXKO/s7/7u7xSxsVwa6TvuuANPPvlkoqyzZ88qgrzzzjtRVFSE73znO/jEJz6hdNy3bx9uvPFGZaj/9m//Vm1ZDo0709Pz+8Y3voHx8XGV9qabbkqQLIn1S1/6kkpDYvnCF76AqqoqJf/xH/+BvXv3IhAIYOfOnYqoiOeeew4f+chHVD0kp//8z//E3XffrfT6X//rf6GiokL1E0nkr//6r3H//ffjzJkz+NM//VMMDAyotn/qk5/C8NCwIsCvfPWrOF9fj5fd+zI8//zziijZ1+wH9jHbxjIff/xxpR+J+5FHHkFlZSVWrVqFj33sY+o8sb/KysoUgb3wwguqX9lvbC/PmUUqaFiEcriRMdHzT9dLn66Fj68TWCaycKC8aeeC4OLfppbTuHHnOoRy5EJxLh+dxuJqIq0H9Wd/9mc4ePAg3v3udyvDStBzISn9wR/8gTLI//2//3c89dRTykP4+7//e7z+9a/Hn//5n+N//I//oUbwNPbd3d3K4yoo0L/SR4+GJEFP4d///d+VEWdZf/M3f6Pi6FmR4IqLi/HOd75TTbEFg0Fl4E+ePImPf/zj+K3f+i38/u//vnpGVFJSosp5zWtegw9/+MOqHHoTJDeSzxvf+EZVBuvIy8tTOhAkhG9/+9v4vd/7PdUeEuPnPvc5ZdBJuNSLz4RIqgZ79uxRBPLf/tt/U2Vyn54SpaWlRU3PEayfBEiP6OjRo8qTYT8yDz2tjIwMvO+D78Nb3vxm1WaT5+abb1ZeK9PV1tYqHeuFwEhCDGObSewkLJZJMiZZkVDvvfdeRVC/+qu/qkj013/91xNkbDETiXEvd6aAaCQKjy9+sbcJLFYc9AVBX8kvt1I0Kg7USNh1AVlcC6QlKHoOn//857Flyxa8//3vVyNyjtRJHp/97GeVh8NRPY3+L37xC0Uir3rVq1ReGuHCwkJUV1crA0xDTS+DYDn89VlOqTEfvQaSIYmFRBeVq4BTbPTcWAa9FZIWPQYSCj2j3bt3KwNNQ08PhSTJcqgTy6FHRKJ55StfiX/+539WXpfxggy+Kt4LjfratWvVMQ09iYbgyj3qmPrciV4iiZBkyzbSi6TnQo+N7ec+wTauW7dO7bP99913n2oLpwZJaq9+9atVXLt4Ohs2blD7bAdJkn1FsG0EpydZBkmYhPnYY4+pMrq6upR+73rXu5QXm5+fj5e85CX4kz/5E0Vcqe21SI9wROxNNAY/X7CWY/W1AbXM2MKC0Gs5/VzdGY9hOrHK1mt9p2uEtARF0Hv4i7/4C2Vsn3nmGTWav+eee/D2t78db5bR///+3/9beTP0LDiVZ4wiDTQNPo02n4/Q2Gdl8ZtCUFNbJD9OY23duhXvfe971WIEPuPitBjJiN4XvRiCdTIvParGxkaVl4bZkAfDOH1IQ/3ggw8qT4WeFb2HX/mVX1FeB6fCODXmXhnHaTCSrwEJc/Nm/fPW1G379u1q34Ckx+dc27ZtU8dsI3UiwZG06ckZj4UEa/RnPZu36HIbzjeoKUDjaTWxPTt2YnpqWhGOyUPQI6N+JCcS6dve9jbloXKxCr0jTjGyXwzxE/Sy6HXynHEwYXFxxGRUzEd1/MYeLyn93MESlAWRvBa4ZolUxS9uJmEp6lpgBkHxeRCJwkzr0YPhyJ+GlZ4DDTWfjZAodu3apTwoEhGn1Aw4BUXyIfhwn4aXhEKyouGlZ8URP8smudAj4vMSpmN6EgnrI+hZcKqQHhHrSV3sQGKgN7Np0yY1RUa9WI5ZqEFv5R3veIdaPGBIjeAUY39/vwpjmfv371fEyylJLjzgFJsbnN6k10ZSIjglx3YQ9KD4vI3PrEgcbOOOHTtUv7E/q6t0urPnhLilLYFgALFIXNW1pnaNqp8EVVjEr9QC3/rWt5R+9BZZH/uDfc22sZ94HujN0WMzHheJjHlI1Lfeeqt65mYXScwPEU7xyaVBI2RhkYS2F/xLp9rH75rNMJf2/roWmLFIglNLNHb/9E//hJ///OdqscMNN9ygPB16Qpwuo8fE5yP0OkguJBsuy6bnwedEnLqjh8X0NN58hkKviOVyCo8eEwmIz2c4ZUUy5LQUjS/z85kVnx2RPBhP74xGl4aZ9dBD4TJ41nvLLbcoD4bpmJeeA8vhgoZPfvKTKoxTgCQp6mNA74OLJNgGEgK9L5IZCZH6kKxIiAYkMJLUG97wBkWW3//+91V5bD/bTX3YRuYn8dKTo5fF6cJf+qVfUt7Vt7/1bey44UbcuPNGdLZ3qv591zvfhbr6Ohw8dFA8qibVr9Sd3imnNdl2PktjOexXTkNygMD28ryQtAguBvnKV76i9CRJ8lkUF1i4SdnCgGNh6RexL1G+pHugGTXVtepBON+DUgZJ/bWwEMQ9CPqBhubz2HbjWhSU6EUSmqDsdXK1ccHXzHlII8fpM04h0RsyI3V6BTTgPp9PTUFxGpDpST70CEg8H/rQh9SzH8bTs6L3wuckNOicJuPIn6N9xtGr4pZeD6cSadzppZnFCSyTpGkWOJCcOO1Hz4IGmnEskws6WA7roPdDw0wvh94Jjbx7Oo+gzvSyGM88nKpjHnpdyutxvCMD6sVVdGZ6jp4e20S9qC/byGdD9OS4apD6syzmowfIsjvaO5CXk4fsnGw1rUeyqaquwujYKCanJpUXxf41/UowH4mRRMvyOcVIoubUIadUqQPT8HkYCZ+LSagDzwPDKRYzwR9HUd9ViwJTvcDH//NJ3H7rvfCIEYp4GCcklfIulMUKRtSL7AzgZ0/9GG/7b/eidgu/JCzh6lmlvU6uNhb05za4zJnvTj388MNX9KB+LsNq1F2Wxtf9EvplTClxus9NPKmnzhLSxZFKUB/7t1/gjtvug1ccZktQFhdgBkHdJwQV0rxkCeqa4DLM5OygZ/SmN70p4XFZLCzc5GSxMPD6vPoj9xYWF8WCjeUt5okFvTVvu+02tSKPRGWM6eXIXJhPmiULNfJy5DKQ2m7TF8u6TxYZcfsClIXFkoUdO15LXCFBWSw8YrHoBb+SbGFhsTRgCcrCwsLCYknCEpTFCkfqFJ91by0slgosQVlYzJh2tc+kLCyWCixBWaxo8PNGpCRNS5acLCyWEixBWaxocJl5jCSluClJVRYWFosPS1AWKwqpT5j08nxDUIR9BmVhsVRgCcpiRSHVP+KXI6zXZGGxNGEJymLFw2s/bWQxB+y73IsHS1AWKwoXTPFR+CUOa4Qs0sB+oGVxYQnKYmVCCIk/VOgGD73WIlk40J8Qcw4E6nmlvTyuKSxBWawcGC/J2dLWGHujflXXelEWs0B/s9Gy07WGJSiLlQ2xO26isrAg4uoDje6rwl4hiwFLUBYrHtb0WKSH26XmVWJd7GsNS1AWFhYWF4CutZuQuG8J6lrDEpSFhYVFWqQQlH1Iec1hCcrCwsLiorDktBiwBGVhYXGJ4PMYIxYWVw+WoCwslj04ur+WI3xTn/UqLK4uLEFZWCxb0IPhLexzhPuX49Vcric0Xy/qcsu3WOmwBGVhsayxEF5MahmzeUcMiwnfRBEMehEIkHj4vpBJny4PMVv40gW/GqFfzrVYTFiCsrBYtiBZTGNyqhcTkz0i3PZjOjwGj7qzSSDccYvxZkyc+9iEGY/MhBEm3IOA34d9+5/HE0/+GPF4xAlPzZMqFhaXDl51FhYWyxGeGIZHOvB/Pvp7+NM//wD+/C9/F3/8Jx/Cww9/GtPTk0Ie6TwA4+loUZ+XEysw85tzzo4iFnd6Ig5/wIszZ4/g+Im9cjTthM8G5puPl2VhcSEsQVlYLFOQSOLxSbR1nMUvveMN+L3f+x18+MMfxpvf/BYV5/MDgUAcsdgEuntaEYlOCBlpkvD54wgEw+jqbsDwcLeERyRdWKWfDo9I+hZ4fVERVkRPLYJQBoQQezAk6X1+ek6TIiQoTvlBwiVuqE/iSGwxKTOm8vgC0xgdo2fH9ATjKRYWc8Mjoyw7pLFYGeCVTmMbl3FZFIj0A//17z/Hbbe+HB4xqlExwh6vF+ozbEseHkUAA4Pn8eH/9Zv4yEf+AetqdyIaDSAq3OHzx/Dssz8X0ujHwMAA2lpbxfMJ4dd+9TdRVFQuBNSMRx75nJBGGFOTETz44Juxc8dNeO75pyTfL4TgfAgGsvC2t70T5eWVUl8Y3//Bt3H06GFUVFTg3LnTWLW6Gh94/+9Kf4bw3e8+hsam85iaiuDGG3bhNa95LXp6urFv/zMYHx/EiweO47d/+w+watU66V8zLl7KHa1188b9CAWAx5/8IX7pvfdhzeZMFa5f2rUke7VhPSgLi2UMPswPR8J4/rm9+OlPn8TjP3sKvb0D8Iv3VFd/DE8++VPcffed+J0PfQAdHfU4dHiPkNg4PvvZjyO/IFsI5v341V/9NdTUVKD+/HF849Ev4xX334ff+p/vQ3lFIb705c9KJZPY88ITePa5n+E9732nyC+jsKgIMSF5f8CDx5/4Ls6cO4Rf++/vwK//xrtVHS2tdRgZ7cO3vvWI8qg++MEPCNFVuMjfjostLg5LUBYWyxY08jG1UIHTa8Mj3UIKvYjFpiQ8ioi4Uhs33oiK8k3IzSlFfn4+xsaH0NB4Bi0tbbjv3gfFSypFWckGlBRXYu++51FcVIHt2+5E0F+Al770HjQ01KGzs1mI7kns2HEzaqrXSVnFqKxYrbws6vDii/uwYcMGdHf1YmhoCNFYGOfPn1O6ZWUV4GUvfR1Wr96MgN/xPpTebrGwSA9LUBYWywpmWkmTE58N8bnRvffejTe+8Q0iD6K0rEiFq2dDgUz4vEIMca+a9oNnAuMTfQiF/MjJzpfwgKQLgAslRkcHkJubrfIQWdkS5wljZGRQiG0ERYWF8HrFNROz4fNJHskUiUyqKbzTp07hqaf24BdPPI28vFxUVJZImXw2FZS0GeI5kcxobqi/Q0z223YWF4ElKAuLZQnHyAtIFJmZuY5kICMjQ0K94ln5hFB8Ek8y4NxaXE2xlZaUY2yMCyFaEcqIi8RUmqqqGrR3tGJqehCZ2R709nZJeg+Ki6uFdIrQ3d0Nv98r5BZAVLwzPr0OhTJQWFiCbdt24v3v+xB+4zd+G3/4B3+GHTfulviYWngRj0+r52VG3wRSDi0sUmEJysJiOUNIKBz24KGHHsZHP/p/8Nd//df49Kc/gcmJKbUYIWZeNhUPKhbzIxL2orJqDV760nvx5S9/Cc88+ws8+ujX8MILL+Duu18uXlU2vva1L+CZp5/EV778NbziFQ+gorwWr3/gLTh8+Ci+973v4Wc/+xlOnz4rpBaQOgIqbt++Q/jud7+PfXsP4LFvfx/Dw2NKtxidNv1SloXFJcOu4rNYOeCVft2s4hN4ouLtDODkqRc1IQkhRCIeNXW3e/cuNDc3CjlkoKZ6LfwB4MTJQygsKEJlZTUmJ8dw8NB+NDe1IDsnDzfddBOqqyvR19eN/fv3ivfUj/XrNkv4HeKRZUvZU5L/II4dPYZi8cBq16xFMBhCbW2tmh6sqzuD48ePSrnTqK5ahVtvu1X2J9HY0ITNm7ch4M8QXTg9aKb4ljr0RWBX8S0uLEFZrBxcbwQlDYojDK9XtvGw0zafIqqk1yLH4klxtZ+G3jK918cFFmwsw8wzIikHfH4lZBIPOWGEdJjHvJTL9BLu1KNMiGdKxeu6+FyL+SRd3F2v2V8OsAS1FGCuPgsLi2UHj/zzK0ICMoQMstTWg6BsHcIRgkiSE40qDS+/IEEio0cjablQgqStyIRhJCaSTDK9BsOYluTF8mWjCI7xDM+UcqkH41meQ1IJsbC4NFiCsrBY1hCiiJNkjJBEGOZI4hYnQXDfHMtWEQkJiemctIqsWA7DTVoSUGp6xrlExbnqvyDNdQLD1RbXBNfRlWNhsRJhvBy3GCvq9loYZoRw53PHpSuHmC29kdR8qWJhcemwBGVhYWFhsSRhCcrCwsLCYknCEpSFhYWFxZKEJSgLCwsLiyUJS1AWFhYWFksSlqAsLCwsLJYkLEFZWFhYWCxJWIKysLCwsFiSsARlYWFhYbEkYQnKwsLC4gKk+Xag/SDGNYclKAsLC4tUpPu1X/u922sOS1AWFhYWF4Vlp8WAJSgLCwuLVCR+x8pAji8Is7jasARlYWFhkRap5tES1LWGJSgLCwuLCzCTjPQPj1uCutawBGVhYWFxAWYuktC/SmyX8V1rWIKyWDkwA+CUgTDNTswOkC3mhL04FgOWoCxWJsTemFkbbozEVKCFhYa5HLQHZXGtYQnKYkXBTT+GlPh8gQu0LDVZpINXcZO9OhYDlqAsVizcJseOjy1mQl8RCYeaAxjrXV9zWIKyWLGgubEmx2J+sFfKYsASlMWKhfWaLCyWNixBWVjY0bHFvGCHNNcalqAsLCwsLgY7hlkUWIKyWLHgymE+9/Z4fE6IhcWFiMZ4nZChLEtda1iCslix8AoveYWlrO2xSA+OYIBYlCQVletFmCoBO913LWAJymLFwh8EfD4/omKAkvxkmcqC4HWgCSkmGw5kMkJ+dWxx7WAJymLlIgsIZgQwOTnpBBB2ZGwhUD9YGFcv6U5NcCAjl0u2jGgsriksQVmsKCTohztidEpKijE6NOG6EawHZWHgUQQ1MjyBgoJcZOeqIItrCEtQFisMjoVxNrVrV6GrvQsezubYLwVYGMS5cManPKeOjmZU1xTDE5AgT1RFW1wbWIKyWJlwCGr79kxEpqYRnpSgWFzP7FiseMiVoLcycBke7MONN24SaxmWkIgKV462vVauOixBWawwuOZoxMCEyoHVa4vR2trA8bKEueItVgB4vo3MBL2n1tZuFJdkono13Sd6VdZkXkvY3rZYoUguGb7n/mp0dbYiNumDX03tGLiN14UGzOJ6AK8DI4R2i+hJe0XO1p3E7S/ZAmQwUMUkLwV7SVx1WIKyWKFwrIvYpYJ1wJZt63DmZKN6KO738rZgfKpYXJ8gKTkizMR/Qb+Q0+lOlJZnY9POTIAOlLoGHJNpL4drAktQFisXapgsW5FXP1CNsfFRtDYPwyfHfO8lFovrX9q1uE7hGnzwWuBKmbhfTfX29UyipfUUXvemW+HJU4kFLoKyuCawvW2xQuEyTNwrAt767htw6twRtLePgU6U1ytj6XhEhNM/TOfkUWKxvGHOI02gDx6PVyQu3jMwNQ68sP/nePUbdqO8Vs68nH77S8uLA0tQFiscYn28UWWrKm8E3vKOl+LIkSPoaBsTT8ornpRfEVQ8HkVcRtgU/bxiZRosttrI8oNb8+S++iZjzCfkFMTYSAzPP/8MXv6qm7D7rgLAL2ebVtKOSRYFnrj9mUiLFQ1e/rRQsnF4p+4I8Ogje1BVWYvNmyslKIpILKKGcxxpM72yV9flij/TpvRmwfw0PlM5zmcKllqfpG8H1WQMiYkeUigAtLeN4MixZ3H/q2/GS15ZpshJpdHJl1zLVgIsQVlYKPAFTDFBHC7Lbl8r8Ng3TmN8eALbtm5BQWEmwnKn8JkUR9zGWpmbJ/UuYhqVbhlCmwSHhAXupqkYcrTscJOKlG6YAXd3zDedQbr0l929UhibqKZxpZDBwTDq6s5hfGIQD7zpVmzZLWylyIkjFtaia7rs+iwuG5agLCzc4N1ghM8inhzH808eRSiYh9p161FcFoJP7Bc9CbWAQrazEdFyvLPYFMU/ontCfQk0+2bLdOmaPVeT3ennm84gXfp06WaFFGAIiecrKg7xQL94y/VnMDbWgRt3r8HLXrEWWaU6rX34sTRgCcrCIgHeCvxSgIyc40ERsWSyG+0FDh0QOXQco2OT8Ph8yC/IRSgjhGAggIA/AB9/u8NlRq/0obrb+C7kDZpq1M0XE1RDPTE5iokh96ppL/pQOpYLr3V3qCN+cUO2qWURKp3EcMvceqvhTq/bpMtluNkaSQXjjcyV7kLolGxPJBrF6OgYRkZGMTw6iszMEG7YUYubby9CfqUkS3wLdlqEDGW/Xr7YsARlYZEO5q5wW0axWz1dQEvTtGz70dPTi/CkEJoYP+Mu0Rxyj8vUPctgjs9QjdLbIaVwOIJghl8M+pQy017+FddDt1BD5XIHOIhLm5P0RXFB0qt6uJ8SZaBWezv7bjCP+5HfbOkuhMqppiWDQR9ycrNQUlaAmtpClAgpBbiEnAVR2FieS76hqzC/GiyuHjwxvuxhYWExA3psH5O/9KiiYqoCSpQBoxBm/3q4g5w2xPuA73yrE296RwVQIAG00Xw8x3hjyOfCldr0ufrSXfal9rnJSxIyWyX6/OoDPnhyEl5pOywWBJ5oNHo93F4WFlcf7jtltv3lChKt8O/R7wP//n+/gd/6wwex+zUZ+vNzxPXWXgNDRPSAE6QkO5aglgQ8kUjkerrcLCyuEnibUOhVmf3kHmFsmnlykwqd+kKkpk6WruMu11bOVW4SEUnnhy8aAgaBf/uTczj6fCc23RnFh//hXvgLucheu1BGH4OZ5Rgw1LiV6VNo6Ja5+2qu1FcKXZupkXviMbm/u6jUoN4mpcVSgCUoC4ulAN6FV8MuppTLw5k3fERMtX7h5/h3gE/83R5gMh9jwaP4wF+9FLe9ptqx47ogt4qmnAvVvrCW1FR8TpWab/byFhJz6cW4q1u7xaXBEw7z7Q4LC4tFg/sOXEj7mKZcBs0M1h4UhoCPf+Q4Tr8wCs9UDqYy67H+jig+9BdvgV99i47eBb0O7jtIqysTMC3hTsD9NBnc5RFpy1xIuPUjWCGfP1ksRXimp6dTLxELC4triYsa/ctEmnIZRDHVJBaCjwGdDRI3CPz0u6145Ztq4CmcQsXaEDz8qQmTLk2ZM2FqSAUTp81wYfJZki0MjH4Uo9NVrdDiCuCZmppKdzVZWFhcSxh7udBIKdd9syeCTSAdixHgkYeO4u3v3QFvvhxzeo8OhlpGn0bBC4KYzl2LAROmyU+kVepqIVW/OfSyWHRY39bCYingatnIlHKNOdaeExkphUwkMuyNIcJEbuvAd7qSmZNyAdIlMjIL5pFk4eCu7JpVanGZ8ExOTqZcoRYWFtc/HE9C3f0OE8l+bAz4/EPP4x3vvQuZhQyLCTfp+HSGwpp3i6sJz8TERLrrzsLC4roGb3sR9WKqQzMMGiVBvSAEdQcyikygjp/NUFiSsrha8IyPj8923VlYWFy34G0vEk96T0rGgS98RgjqV+5ASBGUs3rP2UuFi94sLBYc7llmCwuLFQfX+NTlSCm4oiwsFgOesbExexlaWKw48LYXMVN8ziEmHA/qvRd6UCYJc5gtYbYWFgsNz+joKK81CwuLFQWHbtwExd1x4MsP7cfb3n3rBQRlYXGt4RkZGbEEZWGx4jALQYkH9dXPvYi3vOtmS1AWiw575VlYWGhysrBYYvAMDw9bD8rCYsXB8aA4RnVbAPGgvvbwQbz5nTdZD8pi0eEZGhqyBGVhsZJhCcpiicIzODhoCcrCYiXDWABup4WgHnoRb/ylm5FZwkBLUBaLB3vlWVisdPD5E8nJcJHHgxj3LSwWGZ6BgQHrQVlYrFDw5lfrI0hIjjzy2YN4/VtvQnaZibDjWIvFgae/v98SlIXFCoP7pvcY78nZPvLZQ3j923Yju5SxlqAsFg+evr4+S1AWFisMvOnNja/ohzzkyNcfOoQH3moJymLxYa88C4sVDkVUnOdzJB6PMsTCYtHh6e3ttR6UhcUKg/GgKOQlr7ECwk1f/dRevOHtt1sPymLR4enp6bEEZWGxwpCWoLgTAb72qX148O23WYKyWHR4uru7LUFZWKwwmJvebBVBUYSPLEFZLBV4urq6LEFZWKxQmJvfE5e9GF0o4JFPH8Dr33qLJSiLRYe98iwsVjBISZqWkuNUj0eHWFgsNjydnZ3Wg7KwWOmIi6cUk/GqWIOvf/ZFPPCWm60HZbHo8HR0dFiCsrBYyaAF4Nu6/G2oCN+DOoDXvvEW5FYw0hKUxeLB097ebgnKwmLFgre/MQFCREJQj3xGE1ReJcMsQVksHuyVZ2GxopEyPlUWwY5ZLZYGPG1tbfZqtLBYsTAelLNcQna/8en9ePWDt1oPymLR4WltbbUEZWGxYuEQFJ8/UeT/Nz69TwjqNoegDHlZWFx7eFpaWixBWVisWLgIiu9BibP06Gf34VUPWIKyWHxY393CYkXDkI+z5SbuVbSkYcnJYvHgaW5uth6UxTLH9XAJz4cIFrKdpj4pk+9AcaxKL0o2fAb1ygduRUGVE+9OuyC41m1dbMynvRbp4GlqarqergSLFQZz8S7ni9iYr4uZsavRVtaZWu8T3zuFW+7cirySmXVerpl16zufts7WztTjpYjUdvH4cvvNQvqusbFxOZx3C4sUzDR76S5iE7YU57GpGw1XXP4lDJgJVHCbNdMSHZaurVcKVTILFolOAr6g7PsZmERceVqXB48n9SzMbFMSyXB3O7Vq+l9qHs8FZWjotDMx37SXV2byXJow/dfkSV+mxeywBGWxLMGLluaSt7w2fQwRSVzNjjFlgiV/hVNJX8J+UV1jypxWJZBoqzvRQoDTewoREUNErI3hFNbp1uQy4JE2siz5z5JMEyiECSNUO9WBE6p+l14knQ6zfTtwvmlna9d806amU0kkTEiZlMVDpkhTmsVF4GloaJjl7FhYLF2Yi9ajjKkcRSdEpsW2yrGyBCaFbK/UsF4VOCaLnoWXEhKOytJRLlBzite0Mx4GwuLisE0LavGkMNVN6o/AqU/pyDjWbYjrUsC8LEPaKBvlmvkzZSe5EIPB3HdqU6J+OZGuXETOqYlV9ZuUDlQ/OOW7MZeuF0t7KWUm0pJ83ZC8PnFB/TlqnxrrdllcCjznz593nW0Li2UC/iy5R2S8F4P1xzDS3yv2IwKvGAufT0yBGBOPY1BiNCIC/o3LPmXx4DZVNGwywhbjFvNlIr9sNQrXbXeMmtuc8SfYRxFtOYPutkZEpiZV7MxWuNNfXvt0Lv6VPhKi93g9qj9NjOnHSwGnuqQUeJWH5kVYxJddgMr12+ErrpaiJZzt18klVghpahDD9Scx1t+JqCIolsO6Y865M6l1m2f7+nq685wu7WzXw/zSio+UIDI9sccUnAqMefwIFVajfMNOILNAov0qLr22Fungqa+vT392LCyWIJI3+Dgw1oGufT9HKDyArIwMxGXEGqexkP/qeYmQGA3tkjYJauqK5tePkYk4IrmrUbPzHiCUp8IRj0h7phE9/QyGGw8iMzMgdk68rVlwuS3VRlWEVco2RoIXgkqCxG6I4eJInCcp0Ct7njhJSjwJiRifjmIgmoHanS+Br2ytJMzSPEXimerE6KHHMT3ai8wMP/z0QuCT8iTOE1N6pYMm0iRmI9PUdMTCpuWWE3tRhKNRDI+L7xsqQ9XNLweyyqWd5umUxXxgCcpiWYEXq7bp/cDhn2K69yyCPhll+3xiJYISrg2EGmUbY0ajYgzIDKO72BBdqGNM9KOO4kU1DYeQv/EuFKzbLdHSnvgEMNiAwf3fQ0HmmOSh58gVDKlgmw0us40mGx0CqqSPHFwaQRkoD0qxHgt3KvAEMBHLQE80F6vveoOQcZkTHgbO/BxTDc8hlCPEpBSgMpw+kwMhqMQ5dWuXhkgU0pFJurSzkM6807rTJeL5LI/IQv+weI2rb0H+9nulOSkrTyzmhKeurm6Ws2NhsURBryjcjtHnvoWs4JSYMDFsNKCyxxGqBi9rY8xmMWCXjHS3yqWWzTKYJ5mP00bqSAi2dzKIqcxKrLrtNXKcI8nGET/1DIZajwtB6QUMcWOwU/UR5k6uI7s0uEtiCbocd1lp6psDJqcaTHDrLsvjQ9SXhbbhGFbf+jqgcL0ULcQbG8HgnseQJ+fWG2B6Y8x1IWxdEpemz9UGtVUayY7ysmJR4S0hJhlMjE/70BdchVW3vx4IZDgpmcPVJxZp4fvt3/7tv3L2LSyWPpQVkD+T3Yg0HkTQH0NUBtkxWkJjDRXc+wsJY1gWwrh4HIeAz3rkr5BrTMhnMhxD3qqNEsCpvEmg/RSmR/vgD/J5lRFtDJNC861NeIzlyvZSJQEWqGC2GsobSmzT/0uNvQAqSNc2HY0ji1N8mSWitJBvZATTTceQ4RFPURXFqT2mZScZDd1l6vrmFoPZjmcLuzxRXcdBkoiPCyfkWqUPOBoPIa9azqkvIEcW84WXozcrVpaN8J/aehBRHhONgFzJInGxDsm05vhCiaUJu/bCQbZ4Q+INKl1p3ERi0iY+XNf6M07H029SU4EicYqKN201osOcnXmK7juKDtK6JbeOSJKY+senQbP/S8QmdE/RUVVFK+5FRAgqGok6cbpfJEji9bOqZH5HByWmLHfYXKLPt9N1zr45drfVLallzF+k6WqHYyWlv1pVKueUxyrK6G9lPkJyt7BYMaDhkFGZbLURcYv8VaINaDowES2QER6rjM7W7M8FXccFUDekY9AIpVSyLuo7E/Opay4YPVhOsj2m9Xrr8sUuqP8ywXKknQTLjkWjqvj0MGfC6Ho5khqS9O3YPg2dzvSD0+IUzCxlbpG/iWeduk+1oU2WPGuTLWbAelBWlrHIja7EHSbmQAn3OZLnmJ6miKNYMRNePr/hCF0MiFeP1GktzMg6Kmk8vpCk96lRrxYpIyEc1XPU7x7dm23qfjphvCsN/8mOKlt5UzoiHjPtkENn+O1EzSI6vZ7vc4fPJbp8mmsvp9PUMfWhHmKsZyROObxEiUobYiLK+6NQTfkTZZudPkik5ylJHJu4yxS2S7XNKVu8GUUZnCZV1wfDuPybYbLvk3ghF0VVksEtactPEZZnRA71uZPrLSpErI6ddDxw57OSXixBWVm+Yv4ZY5AwADSw3KcR8qG7bwSjk1FExBB19g5hfDqGMPzo6h9Rz0FojHRJkt4bxNPPHcDA0JiQlS47JmXosmYTTjNqiTtLqaUwKjBDLtTfiRJviVpoUhBR3MA0Zqs9GSeV/pcolvtGUo8vJkIcovOY9Ed3/7Ai56i0h29d6T6h8C9rVUqpfCqjIxeW6cgF/wQkADHY6p/KbvZTReIStc4ts/9jj/kwODSOnv5RxDwZiEj7+gZG0D86id6+IYn3S5gPrV2DmIgHMIUgzja2o66pE1PRmPQDhf+caUun5FQd3OHqryivCE2Rnj5zbHyibTO2VuYSS1BWlrHwJqcxc/8z5oJxXngD2XjhhcM4d74NI1NxPPTVb+JsWx+6RyL40RPPYTLmR9QjHpOMsr3+gAx2M3D85HkMjk0LWcmxP0OMTFD2M9XW68+UtEF4Apky+veLt5UFjz9L6suQvFlihzLg8YbkmCvQROiNiR7qCwokM+plRLWBBs3ozGPGMIz7TpoYKYMGjsduYXnuMvlP788HTD0tRry5ZwQ/f+YAIrEAwiJxaWOUbfaxzfQuOEEl7CJbvtXEbUKoh+xdIPJnpsiZITnJ1hzLxjl2icrPfeP9inA7q5jyZ4oK94dwsr4Fn3j4G+genhLS8eFnz+xBz+gUvv/40xid9uF8ax+e3n8E/ePAl775Yzy7/ySe3XsER4+fZs2O6H/p608Rpb/7vDgDFuWxmXTUkW20cjHhVWZhcZ2BFsAjNkEIQgxtbn4FuvrG0dY7hikhkcaeUbQPTCDsEQMcLMTew+fws6cPiJHeh/EIv2Ugo21vNvpGo3jiqRfwi188i8amDiGtOJ54ei9++sRT2Lf/GGK+bOzZexRPPbUPP3n8Wfzi6f3Ys+8YvvujX6B7cFII0Sve2ItSxl4cPXZaDBMNerpbTpsyijZtBs6e3KjOQ7I0SKa+FJhnWl4h4NPn23GqrgknTzdgMuzBk89Je558Hk/vPYiJaZKjpKVn59IzKQuLRIlslvIsJUQ9j5tFZtVBEyo/HxXx5+EZOS/T4h0PT0yhsLQcgewiHKtrx/6jZ7Fp207sPXBUDTYefMNb8OCDr8eWzZudMoikHvR2L9BBJBHugnrlQYkQO7cKrjM8M7lFGlgPysoyFbm/lbj3k8I/6plONILy8hIMj4ygrb0b27bvQk/vALq7e1BRVo7TZxpwur4L22+6G63iSdQ1tYjnIAZEjNXjT72oDPhNN+9CaUUlfv70HnGKQrjl9ttw4lwdmtr7xLA3IJSbi9rNW3DwxEnUbtqK6bgPp8Rje1JIbHDai827bsPh42fRPzQuI2w9op5VRHWOsi9or2qSO8wc09ty5b8EUcN9Gc4X5eejrKQEW7dtwf4DBzAxGcYdL3kZmjt6cOLsOeX58FlR1O3pOf8UbaUpe34iJSS2bkmmkSpmHF8o7jzJMkwfTk9PY/v2rdL3Q2hq7dQj8qlx3HzbrXjyhX0YHh3Fxtpq9LU3Ycu61cjyTiMvGEUm38OSQY6euuXAgh4Q67iYuPURkb7jak3VdyrcbK3MRyxBWVm+wn8Jo5BqHLiNIRKZQnllqXgCYbR39WPztl1ivPxoa+tAdXU1Wts70T0wgr0vHsV0jA/QA2KMfYjKtq17COXVa5CRlQV/Rg6aO/tRuXoDcooqEcorRXv/GKKBTGQXlsCflY3MwiKE8gvgz8zFWNSDhs4+NHcP4oXDJxALiFemH+M4usmOakE6I2za4Qj/qf3UdEZM/KULF0gEhXR94l34gyF0dvejvGINQuJh5BSWobOnX5EQB/162s2ZenO28za47n/mOK3uEjZrXDphunQicaI5nyD5gwFs33GLnONTCIuHqActlcjMK8Ga9euRGQoilJGBkbFxupRqqtfrCwi5yPWgFqs45c1DpFLZunQQ0f00/zKsJMUSlJVlK8oYzDBm3NKgcBqFiMrofwp5OUGMj/ajp7sNlaWFyA8F0HD2JEqKslFTXYDCfC92bKnFK15yGzasroEnFkYsPIKiPB/OnTuJbvG4+vp6UFNZgjOnT6G9rRu9vX0oKskTMuPHTMPweMQUhifhiU/L/jTikXFUlxeJd5KJXTu34WV334H8gmzRzazmcgmnfYzeifBkOxNh6f654y9BtC8kf6NRZIhBnhgfQ//gAErLy9DU1Iiezg70i1SWlcnon0aW+bzSn0bH1G2KSK/MEIYZcY75R4eZdlAnJ8xsLyYX+ReLTCA8OYitm9YjKyMbra0dqi0BrweFWSFkZwQwPj2BG3bvwotHj+PAkdOKyF48ckquHmkb/0lFF9Q7q5j0Jo+zz3fe0qaxMpf4PvjBD9ovSVgsL8iFi8ggpltPIcivK8g/Dc7ta+FUDvcksfq6Oaf4SkqKsX51NUKIwB+bxMZ1NSgtKRDvIa4IKypGukoMtJrhE9LZtWMz+ro60dbSjNycLGzbthn9vb1oaWxV02G1tdVi2IckTwkyA+J1CUGtrqnA1MSIkGIWdmzfgpGhPjTVn1VfvCgtytNfjFB6JaHey1GBHoRjfkwhA3mrNslonp/FmQQ668XIDsPvNzm5TS3l0qGJ0Yu8XPH4xoYwONgjbb4B42MjqD97BhtqV2HLxnXwO4/NErUrZefWITWWZygRxvzeAMam4sgorYUvt1yfUzknk61nkOmdkDQ8q07Fc0CXmSg5AVWfdPb4xBhCIT8q5LwW5eVLxBTWrq2W8+HH5OgwSotzkZ+bgaJi8RhzcnDi2FEMDw1ik7Q7LydTSlX+4yWC51OdVUcrP0Y9Wcir2SwXFr8kYcIFM9W2SIHn2LFj5u62sFge4MusE40Y3fMt5GTzywvOg3xz48uo3tz32giLL6W+cu4XuycxMpp1HsCocL7zxMk99fIu0zGPJOPPNHFxgDOOdsryyV8f4kJqMSE6xsmubL3qGXncQ9+EZTNGa6H3JB0PpTxqpSBDeepGe03hQ/WxSAZG4vmouvP1YtcKJNEgcPgnGB9oQ0aGKkCEW2efG1XfpYMlcSk23/9SOlIHCaOHQXj5zlAsKgRO/Uyf6lwGHOWmg1oz4OwTnObisVpHwI7wZ6F7JIaCrfcgWHmjJJBzGBnAwAvfRaGvT9JIL6pzMTd4PrVmFJdecsxzyD7VZ1DOWSINqc/k0u+0eX0yJOH5kGtDpeblovrVlMnU84HqyUR/qUsTQbTHi1F12wOIB8SLlpJVy1j0fItdofB94AMfsB6UxfICjWJYPKiW0wjKgFQTgmNIZhgsbmgsaGyiYvPCItN6X/JoI0VzQYrSYfoDSpHEsQ7TW+bz0ZiJlfVJOfSy/AwXfVQ6xjnpuGUZiXJo5EWdGQadiiUONQFOx3yYioeQy9E2f8RQ/Cl01iEyMax+/24mXGVdJnTbdJupNw22N87f1ZItvRilL82pW9ckOMWXFqlp5Vh7XoyQrfKgYsgoEQ8qp0yCxZLHJsSDOo0Mr3iNko69elGo+i/UQVXP8yJt0OeU50vOPY/V+eI5nlLTuV6p2yPXhk/C/Krd+pwxP/8lMUtbZ4BXlIGkV2zvxwiykVu1UQY2wRklWswN3/vf/35LUBbLB7y7aTQTBEWDYG55MQgXGKwUo6KWefNrEnpETVFEoaDzJsLNvsS7w5KQ/YSBdseZrWPYKSyDBu8C6yQBDlGx/ClO8SmC4hSf87HYznpMT47AL8N6lqb1IJiPBV5Q6CWCbaMPqUXrbfZZlyOqKlM34+RQGeA09acE0a80OVVZvgBGhXtDQlDenFIWJIkmMdkmBOWZUKl0/XpvVsxBUKpGdW4p9HDF+1Ur8piCG2mj2k+eXwq9aKqjNU4tm8dz6OOUYfaVlyz1jMSykFvNc5rup1IsZoO5AiwsljQSJkHd+/SYxGPh3Jra51anSH7VW/8Ti+SIpHPF6UUCKof8l9G7E59WVEpH1Pfx9M9eJMkhnRCuY1o897HTBkOO2qzRW1OmVIUxVG+4ZVrRWdXvKsckvWKwoNTCdB1aRdbliIKznWf9yTYRkld5m5xK41+WxXbR0+GW82IU09a5oM9n6j+dLykJtbmvjp24Gfo73jerdSGRVWHm0YVwynVdG5xCVM+y1CHrn3e3rXhYD8piWYD3dhJivKLDMto+h0BADJzc7TqenpGYH7GoCZEQLcosJcJpIRSZqa2O02YjKSavJjNj9pxy0oqJT5dOl3VhHdyyDSzfi8loEGFfHnKqNjqjbSHD3hZMjQ2BH7pQUPo7klLelQp1pDlVejnlJ3XX2wtFp6doitV5E+eAYsIVmE48qGkPcivXA5mFUggHCdOY7qhDMMbl3pKG39BL5Esv+vylFyZJ6mb2uTXCY/a9STNTVPkp24tJou9cEuUiCeVBSVv5dRKLecNz6NAhnisLi6UPXqm857m02zuI6X0/BCb7EORiN/6ekHqofrHL2RgOwqQ1x2mg54CSEGN2eWA5qbpJWSxfDLEy0GK8usdCCJRuQeHWu4RvnVV8rUfRd+p5FOdKWmXIr+bEB0lb66mfGRGpeicx6yIJ/kkXpdL7EPFkoXsyiKpbXwNkV0q4eBiecUwf/RlivaeRke2XMFP/5fa5QLWBkk4Zp9xZ2jAT80lDmPoceISIJ72YyF6F0pvul2M5pwmdXOks0sL3vve9z3pQFksfbvugHhCE4fP7MNreihA9Kt70NPRqNQL3uZ1NGO9OY45TRNXFP9x3RIW7xAzV3ZKaJhHu1kF0VcQk+zKK5/7YRASj8RyUbboZCOVKOOMlX2YmfEN9iAx1S5u5UsLdzjSSqC9lfy5JpDH6ONuLyYw2zSUsW/SWevjya8dIBPlrtgkZr5EgMdrOsyFfhh9j7c3wxcS34aoQN3kYPS9FVB4iJVzJpSBd/jlE1evFmIwvRsJBlG29Rc5jsYRLOy3mDc/BgwfnOzSwsFgcuK9Q3vcQgorJnR+dALrPY6rhKMLhcUSi/LqBkNW8R7uzw+sYtqQX4eByDYwYWnob/AKDBsvlz1zIViTq9cOfVYj89TuBonUSxwUSNOxMJnnG2hE7vQejw72Iibeo1ErV7QqhJzF175kJzYthXs7HDAhBeUPIXbUVodXbAT+JOKh4WDlu8WFgoBGj547I6R2QgKj6zNLl4oLzlwazeYGXB6lPGsN3sPh1eH7rsXSdnFNOZSrvidcP61MnkBks5oDnxRdfXMizY2Gx8HBfoeqelgDHi0JsShPVmBg2ZcjmMGaXc6WnGq8rIQX1IpRArXwj5NjYK37tPDtPvR/ED9yqb8AZA6Ys97Ru54S0c3LU0esKdEkHxb1SZkK/eeBS9QhI2zJyRKStbKfyqgipXJUVka20NSJtnBiUNvMcp6uDYfPQcz7na6EJSom0h88QM/OFf7NlnwMOCU/oMw+9LOA5cODAQp4dC4trAkNDyqYqW+WEpLv/L/cKX+g7I51NMgZeRtZsAe1XIpkTRTCOhz6mJzErqNYvMGZrdDrlCYbPFpcOorMkj1H0kYjsqbbqPuAaSfW9BRKVgiROSyKz6brYYH+QeB3y5SHF4pLh2b9//1I9yxYWM6BsmN5NQhkucwmnXMpMvByubtUo0zLX1q27CpYA1V6accKkvQQw+5zZJIGzSGJm3zFTuowSNiPdHFDZnXISRTGzKcAEcusKVxt3OjdmC18sOO1zy5V43SsclqAslgWMGTK3vcFs5ik1/XzzXSqutFyT3l1Owi9yFySRl1J2armmbG7d5bj354K7jFSYuHTlpNZHmHL0Np0G2pNyx5g6lhtmttXiUuHZt29f6tVhYbHkYIzVfA1V2vTuK30hLcYVlGv0NFgofU25FyvPpEviwhBCv8I6ixIXJk+PtNlT69Mapw9Nh9SUi43ZNbW4dHj27t27lM6uhUVapBqri2HW9Iy4GvbjMss1erqzL4S+plxirvIurJ97ydzJ8CskqFmyptanE2qCIpL1z1ZEav7FhtbfYmHgeeGFF5bS2bWwsFh0pDMJV9Pouuu7nHqWGkFZLBSuxjIgCwuLZQ3jBbjlauJK63HnX2yxWEh49uzZYz0oCwsLC4slB8/zzz9vCcrCwsLCYsnB89xzz1mCsrCwsLBYYgD+P87iRVnIPd8iAAAAAElFTkSuQmCC"
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"cell_type": "markdown",
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"source": [
|
| 47 |
+
""
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "markdown",
|
| 52 |
+
"metadata": {},
|
| 53 |
+
"source": [
|
| 54 |
+
"The encoder takes input the three words and converts them into a numerical representation of each word.\n",
|
| 55 |
+
"The encoder converts those three sequence of words into the shown sequence of numbers. \n",
|
| 56 |
+
"The encoder outputs exactly one sequence of numbers per word.\n",
|
| 57 |
+
"This numerical representation can also be called a feature vector or a feature tensor."
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"attachments": {
|
| 62 |
+
"image.png": {
|
| 63 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdwAAAErCAYAAAB0NDJUAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAABhaVRYdFNuaXBNZXRhZGF0YQAAAAAAeyJjbGlwUG9pbnRzIjpbeyJ4IjowLCJ5IjowfSx7IngiOjQ3NiwieSI6MH0seyJ4Ijo0NzYsInkiOjI5OX0seyJ4IjowLCJ5IjoyOTl9XX2gMSyYAAB/jklEQVR4Xu39aZQsx5Xnif3DPZbc93z59gV4eFhIAiBAAOwSq6pZBRbJqukajdQcqavPGemMdCSNjkbShzn61N2qOWemv2hOn/ki9YykOt2ntunqqVYVu0AAZG0soIokVhLE8la8B7x9z32LVfdv5jfCIjIiMzIzIjI80n7v3XR3c3Nzt2vX7JqZe7gnCoVCKQgC1KcUSSKSzdC4SqM0Pb1GqVRCIrGVfXg8e4+3VU87oF2RrWwrUSwWTczaiOo+1YXSfSZ0I5GXP0W7jqRIYOJpCDHx7apnH+IbNk9c8LbqaRWuLdWzK+NwK4HqUUl0UHktK7IusgDkF8W7itMtiVtNDALpMQkfEgmN0+UxoUjFBfvR7n5gK2PzeLoFb6uedrCVXSUkMPKypeifHa2GJnRNYoiDXbsOzH2O5XuXsLxwA9nsnPhSOlMZ3QZDSPUfwND4cQxOPQqMiYRTkpzs8za8b/GNmMfj8VTjONyCSFH+heInpbHErDjaT4Grb2D++o+wvnwVQbCCVEb8aYpxeHAoDWsCuZyMfdckLJhG//RLGD7xdWD6GYk4blKmM/fe1+PxeDz7mbLDrbjEJZEHwO03MX/2u8DyBxjtuw8MrMuAVpxyKLGSaYkc3aUtyUiXt4Fzsr6cxsLSNGYLpzF68psYO/MtOe64xON9Xo/H4/F49i+Rw1V3K061dBG48gpuX3wN/biGkfEACfGvSHAELEIHa5wtw3gM10WCUByvSD7E6mKIOwtjSE28iCNf/EfA6HMmuh/oejwej2e/kiiKwzU+sLQiW3eAC7+Hh5f/EIPpm8iMiYdMJlBI8H6seN1SyjjNQBytvT9HD0q4jBxyMSubAYrLfbj9cBSFoW/h2HP/R2DkUdnPyWrvcz0ej8ez/wissxVHmbgF3HwVNz99DX19y+Jsx4HUlMQYQ5gYlDFsRiKG4jAT5tljK9aBWhfKaeM+iT8qmyMIBvsxPVFEcf5t3Pv492TwLCNnQV20x+PxeDz7iURJhrhIyOh28QdYffe/Ed97HgNT4hbDEPmEOFCkxSvzRz4FFEtZlEriZjm6jZ5A5d+EjJETJk4gsexIN1lYt4Pe5QHcfTCOkSf+D+g7879GKZGK3LQ9vtuwHQL3r2drurMsPR7P/qBRC9RtLVOiVFoRv3IH+Q//O8x++vuYPhwA/XSJ4mCRQjGYxvxSKK50EWMjOXGi68ZhlhLRT4hKfeKbB7GwKM5YtgeHxCGX5pHkreE8fzbUh9XbBdwt/RJOvPR/A8YekdPyp0dBDJpp5tGzOdLZitY8Ho9nT6ltsLuscRKHe6eEhXdw/8f/DcZTP0M4Jk4y5NRwCkiO470LQ/iDP/kEWL+P/+hbo/jq80eRFOeLRE4ccgaF5CN4/8Nl/P/+9C0zM/2bv34ML345lD10znyYKg+IT79++zTGT//vMfjktyVswkxHR49e7QksFyvVJUT3wTIql5P3uFvjdeTxePaSGsfKB5N429Osyz+7u9K210TvGOFv//b/7rfx2V9g/u67GJkSF8iZYb5BqhDK6HYKf/z6Hfztz0LcvjOH3No6Xnz+BDLhMhIJjmgzWC09gt/9dxfxzs/W8GC2D3lxzF/98gTC0hoC49HkT2ZIBsYy0l0NMXT4SXHk0+bke5VpwnPz8lgE1vEnzC+QNxQGI7HkTF68bBCvGy9xEm+vvStsuNmYy1JXtW2n2L9W9opEKfdmqfST/weWVj7A8AG56qT0B+bXMLuQRd/4GfzxDxP40x/O4yvPjOKxg1fx618/gsHkXTlyBcXSIFbDL+PP/noBt+6M4J0f/RS/+NwS/tPvjGNt4RoK2TwmJkaA/jGUlkZx9+E0Zl78L4Dpb1YU1Caqk+dWpa4R9itMN4hwwR0PVrF46z5WZhexvLCCdelgZFfXUMzmzJuTPBWoMqO9BG8rcEVWy0+vezzdB+swxdtp75GXBjxIpZDpzyCVSaNveACDY4OYOHoQGMvYZ3oj7MhXR71sxzpnC4nS0p+VVv/uv0Ky7xZSo9JyJhPIzy5jXhzOwMgMloozuP6ghOMnhmVkew+ZYFV8ck4usYBiIkQhMYJsYUR81xQuXbqGQ+NrmBlbwMrcfeTF4Y6MicOVES6ywzJKHsT0l/5zhCd/i6e2V9Bi1C1yyTOYs0QbXFDZUSfIPtR1cw5zlz7HvWu3sTonI/f1EkYGRtDfP4BQClArpqmkZs2jUB9G3/q7bJdI3x5PN1AqFhGInQZBAtH3WizeTmONaZOlALksFgumQ7WezWJldRmr2TUglUBmZAAjR2cw8/gp4Ij4InMQiUqeA69yWHtJlOb/oLT6xj9BZmwJGM2IM5Izr9MTyTLk729Dez3JPAqlFQmVngQDzLCGVxm5r7BfcsxuhBzL3+IW6NoYzv18IUYKd24mMPTof4bBp/5PEmZ/02uSaCFMMlJjGT1F+VRyiesfX8X1j85j7d4CxtODmBwYkw4Fn8pmHuR6GbnF19a71Gpc8LrzdBt1zNTbaQ+g5WqWWqDcoBSxll/DnZV5PCysIiE+7vSXn8TQF47ZqIxiHgC2tHtYJQ73fygtv/FP0T+6iGBcht50pCVxOHyjlDjWIoZlBDuA+w8fYHKaIz8603kRfi1InGY4Kb5VRq+3b2B0NIX+DHuSfJI5crhmKU6s2Ifb14roO/6/xdgz/1cJ44s0oihthGcn5THYpw9x+W/fxdK9eUyPTWFiZByhdCrMT5tkt13K+J3TTxJQKQjus+tVOHFcTLbcHc3GU2riNxtP2RC/2XhKTfyG8Rrg6s7j2Wtov/Xqr7fTeOOWq2mjIhhkf0ljl0WuiROYnXuI+/MPEB4YxZmvPY/kSX3ff4SbSBswDnfpzX+CwZElJDjXzQvjm6WKObnQQC50Evfn+vAv/uWPxSkP46WXTuOJx0YwNJBBPhfi6vUCfvKTc7hy7ir+0388gKeeOIxEwGOtqwt5z8Q48D7cEYebOvq/wcSz/wX3WK20KYNGwZJ42dHKAP7G37yH+fOf48TgFAaHxmS37A2l0xBdQzEquSI7G2Zpt4kpWLtaBWNUYlWojd9sPKU2frPxlNr4zcZTauM3iteIRufzePaCZu3cEy/qlysdrX3bA/dyP/1QwPa8IBIk8GB1DleW72P0kYN47JdfAIbFHwk8pp6dtIpEafHflFbf/GfoH1oERjjqFGfDjxOYl1wEyCemsJQ7jP/yv/0xPrwyhEyYx/TIOgaGB7CWLUqPIcTaShbTfTfxT/8vh/HYyUEkEjk5PpqWZnpcLQ3izk1g8NH/DENP/ecSUHaFbYEus1wY13P48Ht/iaH1Eo7NSIdARrTFIl2yOGTe10nYwrEFJEvH0WoBlNOqQY/TeBviRwFbxquhNn6z8TbEjwK2jFdDbfxG8RpRe7zHsxdsZb/eTuPJhnItF6AtTftIlDpdpfKgFFv/bCmH6w9vYzVdxJe//cvAkWF7ONGILSZAsg+Z1AAKK+tYv30PizfEK85xylguLpdHaXURQ+kAz37pa+JDTyO98hxG5n4RuYtPIbjzLMKVJ5HPHsDpR2dw7NAMwBvVKyJZ8bJrOeQezmHlzj1kHywgUUhjaHDCnNZQrY2WQEXS2fInSdRZ4aN7+Pjf/TlOBGM4NX4UyVLadCSKcg3aC+JI1hWG1V6ahtWTepT3y0U0Fa+OuNTbr1KP8v4OnL+ReDzdQj37VPHEl3I5Ou1cvTK14XzUlzc7xe2JDxgSX/DExFEcS4zgnT/+AZZ+ft06DRH6kHrp7Jbwt3/7H/124vZPsDp7DUFYRCadxPrKKlLZHLILy1ieeyCD3RWcOnoCC9ev4oSs//3pKZxOFfHkRAKTw3cxMvwpfut/9igOjS1i6f5N5BaXxdmuIy/plPJ5pJIprIsPLiXG0X/iF4ChR2WDnyCKYCZbBJVq3LmkmX3vOj76q7dwcuIQBgeGUAjEwcpO0/+p89OASNdlIdUxGtNs/J2muxV7ff5G7PZ4j2c3dMrOPZ2lcXnpnuoY3HKFDroowmUmncFY3wCufHReBp/96Ds0Jr6hNoXWEP6z/3Dyt7N335ZR3yL6B/oQjI8hVSxidWEJfX396B9Ko7B0H6PDaTz/5AmcGclgMreMqf4VTIzO45HHc/j6N2dw/HgR2dkrCAo5DAyPobi2hmKugD5JL8j0o7iSRS4XYHGuIHH6kRo7KMNQPhUstCBndlzKsbNNrPjBLVz8i3fw2OQxDKaGRIGBcbj23m7llG4vZrPL2Ky3U++4evGbjae48bv5/I2odz6PZy/Ybv3xxIP65Vpdoo3bPfuAbEq8bibRhwOZYXz08ScYHBtH34HhaJTLIVzrLCT8P/+H+d8eHnwoa1ksrWXRx5/xDI8gn11Hsi+FxPCwOMgCVtZXMDiRxPCJDEYeT2Ls8QImvhRi8vE0BibzMpx8iOX5h+gbHEQwMiIj2lX0Dw9BhpYoLS4in1vFkDhvXvzs7CJGDpyCdCVMD6M12aFiuJC/lx7ig+//HU5MHpVOg5w/ybdm2TjEPR/XVTbDjVcr9dhNPBWXevtV6rGbeCou9fZvJR5Pt1DPPlU88aVeedZKPSr7Smaka5xqmMSADDLP/fwjHDw4g+T4gInVKI2dEAS5z4GhElIHxKuPjWBxdR3I5cVZDmOJ88CFPMK+NIrr91Fa/RBI/QyF8Q9QOnYRpenzQL9I8TJyyzcQhgmEA4MorCwjIU4OAzKyXVhENi/pTYgjHk9gILyBTPZjyecDcwE7GTm58HiKVYp41QcFXPrBWzg9dgwjmRGEQcr8xIf3Zv3LKzwej8ejcJhGKZrbiwlMJAfxhbEj+NnrbwD3l4y/UB/TCoJiIYmVO/MoLq0iMzKMQZEVcZIIkkinMlhfWpGR6AAGBkawuiCnX++XwXAGidUkEvl+cc5prM+tY3ktL0PxUTkugWUZ0faL4y0uLSOXyyEzMS7hIQoysl1aXUPevHlC0hX0N1S7wThRfihBBtqf/+WP0cd/cs0F6QAU+FusUsE+iSxO1+PxeDweYt0sHWpRfEURRfFfA0PDGC2lcfGv3xL/JvuLesNy9wRjQykMTEyhUChhYW7W3OMcEKfL795mRkaQkZEuh9rJkVFk0imszcvItCgj34R4N7mawsoSVpeX0T84aKaPkUpiZGrSLIN00jjxwuoq1pZXkJDt4ck+8fIPsPTguuk2cAZ4N+g9WaO1n9/C6tWHODRyACGSZmRbkHxYZ8sZeY/H4/F4qqEXUQlLAc5MHUX+s3tY+ehT8S3WJbeCIJ8Xxyn/UzKCHekfRm593bxPOTEoo1fx9vwQvTmZOMtwfBzr2QKyi1nxdCkjK8s59PcNiWPliyQkPq8tk5YE7ZQy0y8lQvSNTSAYGBVfLaPbYgKptMQxTwzvDn7hx9zdXgIu/ug9HJqeQSkdmN6KfduVnoHLdj3s7fF4PJ64QrdF4cuP8iIcgB6aPIyP3/25+JZ1GSSaaLsmSGIVKCyhsHBXnOGCjFT5ukZJPVFAiS+c4sNG5ket4pXDLIbTBaRy8+K3Vsxxpewy0imJL8cUxHPzYnPFHNfE6ZaQlBF0MiMZWJaR8eoD3jQWJ5lDSeLshrLb5Ipc4/oHFzGYDTEqnQbqRt8aRSV6PB6Px7MVxmvIwDEM0hgbmEB6pYQ7Zy/LjuidDdy/C4K1fJ+MXvsQjo4h6OuXREORlEhSEpdlwHVxwnwVowxOc9m8ODHxcHwtooxcA1kUzbyw7E8wXlLCMjIwlnSLEibD81JRUs3ItjhDlAawWhiTU/IFGNGTxbuBWpgr4tOzFzF54IDkSBQjo1vrbj0ej8fj2QZ8+ZEZ8iZwdGIGN94/KyM6ex931w43NfEi5h5ksPpQRqTFhHmNcgJrSMgINpTRb4hl2ZbRLKeeF/LIZgexuDKEpZsFLNzm9gBWlsQTL0v80jqSiTVZrsoAeUnSk9Ezp3bFAUPizd9M4Pb8IUw/+psIDnzJOEsOoHeCe++2dP4aUmslZPh73/KDUd7lejwej2f7lMzt0RCTfSMYXC5i+eJnka/anV8Rv/p6CbffwvrNt7A8d07840P0pfNIpgJxlHyNREEiBVhZHcRadgITh55AIjOBnHFsOaSTWaw/uILVpc8xkFlFKvoWQJH3bgsJrBf7sMgnm/tOYGTqWaQP/SIw9QKQktFoQkbUzJe5lO0hLl4cu/yRfsCtP/whBkp9GBoYMU8iay9kJ+l6PB6PZ39Cl0K/QQnp/4p53Hh4E3OTIb7wD1/e+QgxIlEqLpeQuCO+8zNg9iORC1h78Bmy60vI5tbFaeYQJvuQ6n8Ew0dfAmaeBNLjcqScWUa0CEVWbgHX3sHqnY+QW18wHwbgw0zpzDCSg4cQTj4OTHwRGJRlcFyOzURD9ugi7GJbGIfLldtF3PyD13H48HHkknb6mmqzae4kZY/H4/HsR9ThEv66JRQpFtbx7sNP8ZXf+jaC0QETwY23HRKl0lrJOM5EVjYpSzJqnJchKl+AIdu8Sct7s+GIrIvwO7bmQ+2Ebo9IvNKCxJ+VY9dEJJxD3bRcXMC3dURS6pe0Mia9yt3bnb2Mopzhn97GnR9+gJlDR7HuPEqm37X1eDwej2cn8Kkkjux+fONjnPnmVzF5RgaMHGtK8I4cbq7E10JU4KjRjBwj7HjRiroz7ue6hhOGcbStI27u56cHueR0uNkv2xzZctLX/mSHLpHT1s1fOs9nYkcrd7/7LgZurGJoYkIcbqG836RrhtEej8fj8TSP+pGkrNB/nZ27htQjkzj9Ky8YZ0bvpb5uOwR0hPTiFH6/h9v2ZVcU+xE7hiXN/lI5TrJUMsJP1lP4THPAG6p8NYdIUMpK2LqkmxMpyDHiZBMlicPpZl7sdl1tDdSInG51cRnpfrkCuZbKmNY7Wo/H4/FsD7oVQg/iepGhdD/WHyxWRp07hK+2iBK2jz1T+M1AK4FZ8hz2QqodWfUWXXNSxP6MqCCuuZAQ95xgGPcRzQ6pPrpZyi/Z4kWtis9dXkOYYjdAUpRdu3DhHo/H4/EY6EnMsFP8ykT/CErz4nDW7T7Xk20HMypmwnRUOiXMEagrDKt1Y3W/J2scnpVQNjgq1jStI2wkO4A5Xi2hmJcRc8gz8QySVvQ0VslPJ3s8Ho9nh1inaod4mXQaIe+9rvI5J7Izl0tfaNi1e9Lzl6/DOr6tZXtUZXM9H71yy6ZV7dQ9Ho/H49k59Dccu/EnqKEM7swDwbug7HB3jevjqrxiaymfhiv5nPOii7ae1uPx7Bmm2bOrHs8eQOtbF3/Dz9XuhtY53L2Ab78q7vIutsfj2WPYe64VheuVG1MeT7NoN61a9J/dbkStpfH2qfm8qzPA2wmtc7judXSqXjDzvg56Ok5tpaMRVp7tryeeRljdVUSdKx+0VHGfJKH0JvXsplaqdeVphK2htfpSUVy9bkRD6+/dGa1zuO24uibYXX/D49kJbrWhBfK+Dp/n54c9Ks/4u+ItdTOoGwpnqzhlR+Fv9SuuphKHonS4sWkrzCdthTbUWCr6UZty9eGxWJvhb2z4U9VqyVWJ1WulfqpFuZptpYZb53DJHtu/q5itLqVSlRmzWjbu89SnWm8qrv56T4+aFzsKq+RLq+hW4qlG9UJnazsuVrit4SrcrtW96r+XUJ00I56N6OOzutzYEvHHrraDo3a1EaNhicxlkUseuEta63D3AFWKUUi0vjkmVqRs26Os9BqtVPcoWfk9G6EetXFsrD8rGo/HxB1rPxT+46cpzecrzStL+yLhK0z5ztVBRzIST3+P7qmGlVZ0WOpHwujN1SOX+nt+1bqWQe9A2yiWUkgEowiTM5EcMBKE06KXKYk1hVJpEsXihMiQHEObU71UZH9ic27rWBqF4gAKpVHki+OynJAw0Z1IIhB9im4pYYq6HZOwARG+1qlaj64QXe6GRMncCY4P/Cwf31Al7ThweR7nXnsTj82ckg1p2vnuyAj9TXB9GM+VWjSMKfRyT3o3OPpLsJcYYVRXqytXj3FFr92OthJhCoVCiFSSTiJphN24YrEk4WKlpucnlZ/RxRiLxSWE4brEYCfPoyQSoqNSiEJeGrwiHQjdqXTOgkD+U6+yGkqdT0jHLcG32BUlbkGO0dFJ3GGFYU4yyOczuHN7GXfvLGItuy5OI49ioYA8JceOK9+VkBT7ystRi3jqC0dx7Pi0HM7fhtp0Nm/3ehmbf/vt9n58cvYqrl5/IPUuFNsRWzLvapAY5t0RRaRSSWQyKfT3hTg4M4ID0wNIpUWvYlcFPohrPoJjdcljgnQCH145i2e/8w3g2IhYKJ8s2L6mY+9wz77+Jk6Lw6UxFkxljZS0wfA0mzqxEL3WwzSMcjQLIjqAjYCFx4jRF2nQnIJQKmv7D6tHlgL1l0ikxKCpS+qEjoWKj+IYb0O4ZIGps4mj/tRuJB+mgzGEXLYP16/fxtLSChaXV7C8vCqNY07yzfwzrlRUcRrGdhJLePGFJzA+OmRS8lgdJcR2SsUkPvzwijibJWnnijIqyVr7Mk6Xtb2IvkwoksTIiDSOM1MYH+9DmOBrf1TXcYW2JPlMTuHHP7mAe3cXMHPgEPoG+tDXl5ERboikOItkMslYCGUkFgYJCV9Bpm9NnERORmfsiET6lEWctbFzbP45ui2V+rCaTWNl3bZPhbzYlAi/YsfOCm0sm81ibW0V6ysruHHtCp566iDOnJkynTuTjuk0R1PQ3uFKRiOH+4k4XI5w1eEqDR2uFEAi0SdG2i/KTyG7lje9x1xee5NSKOI07O97+RarFUxOpsToqXyGmSJgSvsMzTMbCHZsMqK/DGZn15HP2tGrHY2IcYZsKBPSi5QRYDqFdJojwazEXxQjZk+dqC7jgjpcVjXm7TDe+OHHMhJZwdTUoDR+kk/pMfeJhMnIUVAXoocg4BFZDPTJ+ES2vcN1sR23tVU2iqGpd0WOMqQjLFVR6mTJ1slsEctLa1hYXMe1G9fxay8/h9ERO+K1acQVXr803clD+Iu/+hkefWQGx48NSydWHIRjJtpMsw/LERtkxG/fW89ObLU9xalWtQ5tT+xT7XzPId/uXw/7Ex87C5VOTuP9985J/Fl86elDUlflKH5hQJTvHa5Q5XCvzOPjaEqZzVhRvKyqvdrhco17eJ9kCBcv3sb1G3eQzeVMhQ455WA+42BjRWo2zmFwqIDnnz9tGks1bDu6s1TO0etYnei92EQwLA1ggJ/97JyM8CqvO+M/01mRAjCj3UhX/f0JHDk8ikdOHRTnyzTY0MRJe5FNSP4LpRDp1CN49ZX38eXnHsPEFC3GjjISHP2KSO5RMkvbIHKUFpgKTPFUoDZoI5xtsve4Oelk61gkou9EKWOmnBPhEF7/8zfx4leOYGpiVfbTlmzdjSfMcVLGAcfw+g/ewxOPj+HEUQkt0Z6Yf+63FsM1tUMV/xnS+rD+GSdg2hljVQyOoL4YnpSB1Am89dYVGSTM4ZlneE+X8UTv3uFaduZwabiyVUphYSmNP/+Ld/HMs2cwPFLCQH8aKY7IODyTOByhJWTb3lviVKBUaDF+e++NhcSiq6RMqrd6FasfNeBEol/0I6PcYuQ8zTSrNWv6WQpfypLNieYKSczPreKn77+Pb3/rqxgaWDedmThoTu2Jf/mPDpTfuwqCo3jt1Xfx4gunMTkleWEDKQ6DhmecLf+ZCk/h0aIzuxql56mgtkUtKa6WZF2cLr9LlkjO4NXX/1Yc7nFMja9JWC843JQ43CN47ftv4anHp3H8KB8gy9FibBRDRUcV3VRsy1ON1d1meuG+EMnkEbz77mXZeiAOd0baf+5rn8OlJ4o1rvmRxipgVpPIZkP0D07i+MkTmJjIiNNNINOfRzK1hhQluYYwWBIlL4gsSrlEn4con8n2J13ZX9hcG4dZWhXHsyIdlEUZ8VYkCJfFkFeR6ctheAgYHU3hyNETsj0jztd+2WnzytB9mFw7l2xnRKTzF3CKfFlEdIE1WfLBKJFEtERWhB02+wzA/rOXZmBnjY6TelKh3lSHoteE2FliSeyOnTXbse4Nai2CjTxzZ5fVEtmhEf3nqcdGfdUXQodKsRW8vZYVb4cruikrj+uOVOCGHZWZZUJ6jolVOYaNI50GR7BUA0WKycxnJSWqjHIl3KbPYyuJls9ptixVp+xJXD0S0Z15alQMm6OPEp8OtJIwfT87BU/HE9Ahic5L0oiyYeWTgHHU2MYrZg5tR469ZfZ5rR1xaaUyNepai6carU2qK5WKHjWOvffWy7XNtRNdV/249LIO9op6em4ttOpYow6WGVF1qcpokiqVv1FvOvqZgXG6nDYue2qORNg74tIVTzXUI/VnnzxW/agarc74pheJk7CdG2qX0kvYxt9qoN54oxzWaxnvOFSg7agZp2ueD+hlyjUqWnragatdjnIrv6xoD7F3uIRKU3Exg1WVKKwck6NXM7IlHPlKJTYPudgKTbQJpQN2U9CtSohNdX/BHHPkoV0d6tNqzGgtmjUwoz4Z/dqRXu9h+2gc5Wu+I0uQcBMWyX60kNbTyzp0WxOtU7VCjMHVLDeHMXpVWoHRLG9TdGDmJL6toLFB08RVRPRllCewjdusYCrHUdG8BU7jtU6XSzPKZXoqEko0PZX9S0WDlHr/bCG48XoH80CdWWHOOH1em7+KHuw/j6cZ1FK4dKWW5luf2pR6SXaLSUdUaR6Q4n1cBz5Iq/d3a/ftlJ4YdpSdn6sT0xBaGqnKhvNvRWxYZMx2o7IUKjGrgj2NGgCjpAb7YoqpnFyJPg3JWZGidNQKCem4RU9rb8c2qJ1eFU+z+NZkL1AbdaeSG4107S9ZdkfsHa6rGreSq2oaqciES+TaeHq8puWOlEk5fB+Ipxmstuw/e0tiu9D2elU8zdL6Gqf12BUzfycFE3epzZdLvbDNsC856gyxc7j1JudUwbVqqzclrJTjike199hcuF25c1uOqwSyj4Xew9I81I6dhq8W1Zru36DFWKK9X45xuWYcrRnVMsT+2y7utFUrp6+6kV7OW1chek6mUkhm0gjTKSOBLiU89hLlxeRRhPklO21lOvUQXuxefMGLNarlA4uXF3D21Tfw2MFTxmnqqx2rq7QewYd3Mrg3m8Db753Ft15+HmHpLpLJBPLSXtqGk/dt2WQ2Ljz2hpIp+yLsXoYaUD3yZfHNY/VtHZKkYF4cEiJXnMbr338Xv/y1JzE2vCRxmCYfuupu1HqMTYhtFJEX4Q/mT+B7r/wEX3nhBKan+BQ2ne7288P0+SpMvlY0Edj+bymaqu4txB6k7tDhcmqO6zqyaG6qjjpJyujmML732tt46YVTODDJ38jHw442h2+a0hdfHMCJoynRT17Cm9HLRmhP9+7fx9179xAkbVvFF/KXp03NqyFlWa+B63ac6zZ2IzbEfJ08dgIDfX12R1MwkejFF+9cRr5wB8+9cFTCrD25gzRjs+kEPrpyFk//w5f385umFnBOHO7pyOG6b5qqoCHW4d6nw333LL75DetwQ3G4fGerVYKNW0+NZr+MbMn8/DzW1viWm96GvT7meHp6GmHkDJqF+rIjZVkTh1soHcBrr7+FX/raU+JwlyU83g43jBzuC+Jwp6b5cgZxmOZtSNuDNstXjObzOfD9yyRmVXJLrBlYLWbSaSTD0OTRO1xFHe475k1TJ44mRT/6asftQY1yxHfuwnnxqwlMTkwau6K+Kb00g0KT4gtobt66KbYwhYmRUWtr3Ocs6+eWe7bncH9++RP/8QJ+nq8VDncrTCrmlY8JXLhwAQMDA0hxKqOHYV5v3riBJ848jkwmE4U2B/VV63BfpcP9nzyB8ZFVqfz8HXT3zxSo9dRzuK9EDnd6hw6X+p1bnMfVG9fQ398fhfYWVneC6C6bzWN6ShrGsTGT9+3R6w73qDjct1vjcDNpXLx0ydjUwQMHRHXSueEIV/ZR73TEcce0LyKhtMHXr1/HoOR1fHikSmMap35uzdFIicN9p47Dpc6IsV/RmX+X8g4dbu2UsjvC3Ux9RkliqOwtXrl8BQcPHURaeuu9DA3t/LlzeOTkqW3nlfqqdbjfe+0nZkq51uFq49uNpqh20S6H+3B+FosrSzh0+LA5j2rA7WG3ApNuvQRrztMwnrLt6+IsCUdVAWYfzoGfR5uW0Yhb5s2NdPebw93ZlDI1mZK6evHTSxiUQcEMHS6/thQ5XFG8SVb1H2eYH47mP7/6OQb6B2xHrmn75NHW4XKEm+uQw91+F2ofw16iWZbEeKWR6GVhI2g+xCzwntDOoNFqDZCOkhgu01Z4Dm1s9yOac6onoJpEuDSi4dFyp1KVTnQOV2rP0yieSm38rYV/LXJ4WTxKI23sTkssq9qllkmvFIDmRx944l+VZtnJMbuB9WZfYQrJ0a7rALaCn5szhSOJmKPMhqQnjqTXxMleWbaHjGxEt3x0SjRnQmyqG1Env58wOo0Uoo6MXwZTCWQnZ20YyTzIt0Nxj2eatdJsPJV68ZsRlj4Xmm8uFXbEmruP26uwfqhGjIYi2T2uzrVuU9fGAcdcTP7swkALUtkOkXlWUdaTSCuJt8PdgTLYsPMo86agbd7L0NjlQm5tWXQVWjnBBy5ku7aR3Aqqxjpbe5Rd72mV7QjVKRsQjmyNs+W2kd75V8t+62A1RvXg6mN3umFd5SycfTI5GiREQqxtxVuUemHboVY37Sa+DneHGqZizY+nt1nh3YKtPXVtzyvO4lqe9u6oK7NLNp3dTSBNraa7QWsexfSwBS5MY8mlCWkdTK/25QGURuepjd/K6ylPAXqn23Kss5X6qksJk1VDr2m73G4569vFHBtJJ+jpKWXqX6UR7iu9mqWcpvxxC6xXROEMgMmrBJbzvANq0/VUqKfX3eh6MzRdVzaj2XjNUk4vMoZWpdsbtK6GUK/qbBXVe6/o3G1TdGDghjWL0UsknSB2Drfe9NRO2O6bRcoFI6fncieFGwdMviSDek9tN8bsaY5avbZLz1qGrmyG7t8qXjNo/eGzslqPPC5UiN5M2B2Nyo3bu0+9+9DbX9pWdTM9PcJVA9Ni2O40ci3aaGgqmrYb1guovtzpGk/76Gb7UVtoCZKYm8+Wpu3Zt/AnZ3a5fYsy7VsknaAnHG4jZbnKbKXT0PM1Om8vwLyZp/SidSPe8bacsrPtgDGZDlSNbEaz8ZrGSceky6Xd7NoOh6f74W1B83DYDgdUxr47ZICxc7jlOxO6kBqrunJ1pg6ipQ2GQ1X60XYvYabcacD8KZQsezWf3YCotu1o2dWTejQbb6dwNLK/fwrkaQUVZ7v9Z3HITuzbnI//zHm3V3tjPsJltm2DZZ6krNVapMlW3i+KkqxyQG7ynWg8O4X74MV2DcvTPK797AfM1N9+y/Q+gC2ESlzY9vXWaQe30zb23D1czboqsnlVNI/rZN31XqKsN98w9hTtrBfNUB7V7tUFeNqGtoW+yWhMvB2uVNpyIXM92lZYp8vSRitg+u2g6vobSNsQfbU1fc+e48vX4+kssR/hqqNlRtT5EjYmrsQRzc9m0k5c3cVZj54KjexHy9eVtiInb/s5PB2j1nbqiSfODte0GLbJ0MbDblncbffBKk/z6KsvTYWhDuvqkXEo2uXRpaebcBs9LTHihpfLuAPFx+cD3GcEOnBKTxsw5SftBD9dWn4rGQuTL83ZJ4W6nZ8jxX+E64qUftzLuNwA0tmZh0vs257Mb4ijNz9pQ6XSDspGxGV0LY3Pxbj1xNMtaInoPVQtSw1n54pl7paxrrfaxvQhk1an6+kspvzEbpbX1nD588+RKxZREGNaK+TxYG7OtFPG5kSs1fVAmUsdYT0x/6L17RB7h0tYiKYgJe9aoG7Bxs0Rs2donKoY81oui5XsujHkohTu0toqViWsKPvqPpndShy9cbn5qepp3tNNhMkQiSS/vZoqj0YCCePnF/kzMDpC89vrqBHhtunotRD/tHtvwFJk28OR7fsf/AwffPRzfPjJR2JfIT79/DP82fdeQT6XQyi2ZL9+ZY/b78TO4bJn4eKWY70yZezqI+JBQRqmfKmIv3v7J/jua9/D3bmHWM1n8VMx7Nf/4s8xv7TYfhcXKY4dFk98YfGxIbwko5B/9+//FB+e/QRZCS1IY/nDH/0dzn56yTjfWrTutKv+tCtdT2egXWULBTMgePr553B/fs7Y1dGTJ3BcJJ1Ol5+x8WVt6akRLkWJawHzutkbTAYB5ufmMTQygpOnH8Xt+/cQysjkxZdewujYqIln4koPsh0jERc9V33Hy0D+6FxFt9t3PZ7mYSlwJFISh3vlxjVrS3dvGwf84fmz+FTCFtZWjcM15RuNdAlty4x4mUYU1qpS1UaYQlqVrqdzsOxSyZSxkTff/FukMxl8eukSrn1+FZ98/DFu3ryJgpSszqZ4esDhsqKaBkXXGRhD9LpZIGb6RRo+TvfdfXAft+7cMVPLt+7dxb9/5RWsrq2hVCgiwcaRS4neDnt2e6a1DWRj4loCvQ27QNliAavr62b2ZHZxARevfo5bYl/nLn9qwkzZ1jpb2dzuhz42w02pHTbr6TAywn3+mWfx3NNP45mnvoDxoWGMDAzgK19+zoxwaXe8Hablvt/LPHYO1z4uZFZEokYioicKM8oXG7vx0VGcPHocB6emcfTgYawsLWGgv9+Mfgu5nHG4vEdiRiYtho0s9cmRjYonfrAMjX1Iw3j65CncuXkLxw8fwa1r1/Frv/Ir+Ef/8Dt4URpH08mL6pNxslreEqAPVLWSdqTp6SzWroqYHBrBFx89g7HMAA6PT+GEtFUvfeUFTE9PVw2GPD0wwu01zIMI0hCxwSvmC3jy9GN48Zkv49DkFE6fOCWN5K/i1371ZUxOTNqHXeQY9wnTerIT2BSaRnenCXi6BlOWYksnjxzFP/jWt/H4o6fxxTNPoA8hDo5PmnDTcVOJjtHbFDraJcYu1RnvAJM20/J2FWvURmg3dCIp2cokAiSlXDkQ0E4bi5nTyWzX2g3PtV2z6sBlVbHvHK4xFBqF3ewa9HpoMJyCIRx18Ak/GnGqCGQkVlrEjEYkDqdryiLbpjdZIx4PUVsKo/WUSB9tSiQtwjD5b1CzaYf5uA7dE3/Y+TLtj3T62W7ZX1NU2h4ujKhxtQkmv9P2boeH7Yh9OcLtpIK3izGcSHidLCB1ukGhZBtNEdODlAid6j164omYicHYkow86Fhdm2IHjusMY1yKottu2G7hdai5tjptT+dxy4/rZvYiRqgNdsoO4+1wd1C4VKxxUnaz69AGSXOm16s9Rz7colN6dLpsQM2IvYF49jdqR4TmQNuhDZnRiIxK8pFdcZ/G5borZhQT2Zw7vbxdzFE2mXLays5S9GyF3otnmbXDGTJFM3UsoqnHrR0yA5YOGWB8He4OFOQ2FLzvudOGo5PQVmuFqI00I7vFTAPGQFeeralnTyqkXrjKbqi1x93eC/ZsjtbZTtdbt5xrpRvZiX3be9M7e5A03iPcHWCNMD7Z3kvDNbqSjomnd6i1JVdIvXCV3VCVTvQzo92m6dmavXC6caLKLpuhxslu1+n2nMNVxVENrng8nr3FTC1GPzdruoHzeNpIp31E7Byu9NeitQq+8no83Y/v+Hr2O7Ef4bq9E3dde9EqHo9nb2E95AMqbj31ePaSTvuI2Dlc+1Nqs2LRCsxljdZUkUZaUMNrkt837OThAI+nEd6aPN2E8Q07NMrt3h+P9wjXPC1mdVWWKP/qZFVaQb1kasN6zSnXe48uQ1Q8rUf16uo5rkLKS6kcXDdvRqP4h3k8e4zxE5E0RfQQmvm3A/uN/ZSym+Xa7BvnK1I78m0FbqOiDYrH46mPryd7Q9QEltFOT9ylVbQ6va2IvcMlrkHpaFaV2Epl6nlMIdFwVTSsx8QQZdr9zSSDVDytp6JffaKXy3gKLckuLe6ooMrOPG1D7YmYGT8uYy4kjrYTf4frGlBNCXCzLFpKO6Rc0JJYVboU2dFI9C09rtSLR9lpvHbE1W3No4vsqugjojaOZ2dUGkR1Wvz8YsWBxVUCsSXNGzF2JRv+d96dw709pFrf6s1hut+VRjQbt9l4ZDtx40C8Ha4on+pnRTavOOS62RFVaEdahpzAOChZbSpdvShX6mAeTKqJ1/BhpZp4RhrRZNwN5xc0n+77mnW321P2tAtqv1fE0yk4i8D6HCZDhEFotrmeyqQRplMIUikkRMxStilcrye635V68Si18ZBMNhWPUi8epWE8rouEsh7KeeJCQgomVrWhKJU3YDNfkI3LCzj36hs4M3NKQvi1CjsiI6aaG2/ANdlbEsMrZXBvNoF33j+Pl7/+NPqTszKKy0uMMIq1OYViEUEmhU8/+wxHDx1Cfyq95Y1zjhJradRLq43bbDzSqrjcY44wD7aE+OTcWRw/dgyZZApJo2WLxrNH2CVHMpU9/IaILpMolGbw6utv42u/8DjGR9ekQbB6J93ca9UcmZyLQRWRF5GGLHkCr7zyE7zwwglMT6/L3oKxsWZhutTx/dkHWFldwbGZQ+ZjAgxTzRlUnXEnCPBwbh75XA7TU1PlemP+1nkwbyPWjoqJw/jea2/jpRdO4cDkuoSxIWhe792HzVcidRyvff9dPPX4JE4cDUz92Em+2DlOptM4f+E8FhYWMDkyaj6Xx6/5FKUMtONMjZekPXOpdQWN2rZ6LqMdcTeLp3vYoZh7+BDHpI2iXdGW6h9VC88VSp/gCN595zLyhTt47oWjEmbtyZ2NIUE6gQ+vnMWz3/kGcGxEYrH1au5MLrF3uOddhyv7dFRmxOiDa7K3RQ43kU7ikwsX0J/JIBP1HhNiyPWoNWiX2mNq49L50RFtFY9sJ66yVdx8STQtad68dQtPf+lL6GvgcLlGE6/vcFW8w63F6C5yuIvLS+JwD5rvidJmadrmQxWblF81rv5sqbSG1qXL16nOisNlg+gdrovNV6sd7tmzZ832walpGpL9fF5oa2i9TngjN1DP6TUbdzPX0mzcRudnKJ1iUkbud2/fwcT4uHe47YAXa7LpONzHIodbFC1pE1XJlD0iQePdpcM1I0Ap4I9k1DcyNIQ+GeGa89QxCqWeendjxKTZNMlOzs/97Ngwv7du3sQXnnoKacfhmqMZXVbU2UabDiwJFe9wazHalkbw3sP7uHrtKqZHx8xHvI2jFTGaE/2bDuSmDsleXQXG3Sx+s7Q23SARYnlp2TSKMwcOlG3Q/PUOt6UOl9OsFy9dxODAYJXDpRD+rW0Duqn9cWkUV0OTYYhr166hr68Pk5OTxpbqX0ktPJ93uFvCizXZjBxuoynlCnrE7h0uodFeunIZhw8eMg6XD32YY8UwNp7bGkctnTJispPzc38QBqbBP3/uHB45eco43DA6zj3amrd3uDt1uA/mZrG6bqeUw4KEShg/mWc+w8hT1im/aux12auUuGwpbOq7pFXp8hjp7gYh7t25Z1I7MD1dtkHz1zvclo9wL1y8YBzuzOSUqbe0p5LYFvVtipJLrXeyaNSpq/dQW724jR5+azbuTs+fkrxev34dyVQyFg63/lxozKDqVDoBDdU4JYqclCM//TB8rXBfrewmHqUdcWv3q8GpuMguR9/8yydp1bnabd3r2RpOt9ovWEnHJVIfb5uE4vCSslYtlXKy29JoyLFl2RC/WWlHupqW5EXqCqf/PJ2hqgMdSfnb2VznDsahIxYnZWZSHFHMvhqpR714lHo0G49sFZf55MDAPBzWOJmuIXYOV1SuK+WFMRJdZ2AHCIPAiDpd2mhPiRixqZyRQt1RqOpZpZqNIZ5mER2LMZt/sgzqiexjpTVLE8aG1F3uUFqdrkmLwmcuJFvSUMbps5hxR0evpjayjRJhiCkTCWS9NkvuppiI1bW3vK+LpcRnTUTCkJZrw9oNnbz5J8vtEu8aQKcQrZJOKHsDYshm6i/a7BWKUU+y3gNhatgqng7CYlFpJa1O102vVWl6toWZiQvs7R4tArO+SeWNc33eiQPcNjXn2O45fZezBRgjtqs9AyueGdX2WsZihLaJ9UXvnrdaWp2uHVlRPHuPloO2WVq992s177RdeofbAnqtQeE9EdKRHqOnR6muFb1WR+KMlkOj2h2He6GtpJN26R2upy7e2e4FqvNmdN+uZqKV6TKtSnrmmYBo3dN5aFX1hGi5aBntl3Kq1UO7ia/DNVbhq6+nV4iqfUKf8G7UBGhzqLbPeK14Orz96TKF8k9RBHbqap869bQHapn3bs3ShFRKRonryFbtSGfmtkv5nvY2MLdKeI98mz6o50e4roF5PN2NWutWFluvkrfCytubrpu6n0HZG9Tp1sKyUYkjO70NxtgqzbJdJ+vS8w43rgbk2Y/EvdnbHD4xu5vGyrN7enlaPw6duNg5XPvMY2N0r9tr8VXc49l7yvXRO11PCzFTyjt0trRElabYpe32xAh3KxUYhXZ/58fj6Vm0+pWdrqz4OulpBbvtwBk77JAtxt7huvckXL0ZJ+uIx+PZO1gv+eiV+2CLr5uevabTPiJ2Dpev1IpWzB8uTGUWjZmnzRx0s1LFPR7PXkFnyzrKF+nzfq6+bpD4OurZDRzlutIs6j86Rc89NKXKo8p7+QEBjyd2SGV0GzdfNz27JW7v5463w40catm51jjY2hGvx+PZQ6R+1j7cstv7bx5PnIj9CFcdLTOizpdota6u3h6Px+Px7A3xdrhR71gdrTpbhc62LLU7PS1CNa9dHl16PBtxP/NI4vDbSc/m8N58MpUyH4MP+MnS6AtjDHcfkvPE2eFG9ZaLskjZanUuO9pI4kyYDKuk+6gqBUc8nsb4xrg34HfBb9y4gXNnz+L8hQs4e+4slleWo70el3iPcCPKTrWmjTc/GVKJwuIGHeznn32OTz/9FFeufIbLl690aUOl1xRXTe8Pcrkcstls1XIv8O9Q7h04SzEyMoKDhw7i8OFDOHTwEDKZjJnNqJ3R2O/E3uG61ZbrvViNOVUzODCItCxpyDTwfCHvRwiebcEHlObn53Hlsysin5nl/Qf3o73tx7/asXfp7+szbRSFzjcZJqM9HpfYOdzKL/cqqKOtdT/lJ5cpUVjcKBaKmDlwAAciOXTwoJnCCYLQ/o6xSWkf1Lp5pUEkus2lp5tgR21ichKnTz+GRx45ZZYzMwejvZ2jvfbo8XQvPTHC1SljFcIq7UpcYSPJBopLOljdptPV/c1I5+jkuTzbhXajHTa1IY/H0xliV+PEfeiKwXWmcXasm1HrMDvvRD29gtqN3lvr5D023rdV2+X0ssez34h3F9c7HY8nnkjV9Q/UePaaTlugn1PyeDwdhY2cd7WebqGTtugdrsfj6RzFUrmB4/yUf4DKs5fQBlU6gXe4Ho+nI7iutbah827Xs1eYtxB2yADj7XB979jjiQXappXv20YL//CfpxWofW2X2o5fu4mvw92Jdj0ez56gDaIRad0o3tl69hrXLjtBT08pa8/FV2uPZ+8JpFnTxs07W08r2Wlbv9PjdkrsHK6tsh6PJ3ZI1eVDUv5Bqc6hb9lTjbvORdfjKESXBslg1XaX0tMjXDU0NTaPx+PZL2i757Z/bnuo63EU4q4bR6wb20DT2MGhO6InHG4jZbnKZE/P44kDbg8+7kJ0WVnxeFqLtvM7wfiGDtlm7Bxu7asd3U/vuTpTJ6vi2R5+6q9zaGNhtW1/p9orQnSpHySneNtqH2zujNSoWMPK+3tEiC63C1Wk0gliPsK17pdSpCHVai3S5E6mGvY7+s5bT3vZYLI9aqymnrIuOkvvdFuP0S9X+IGKVAqhSJBOm098UpKUTO9IkE4hKXncKWV9dYiENKqdPN+uKYp6+LQj8rJxZR6fvPYmHps5JRsJcbqSFfmvvQhbnZk91vBQ1jK4N5vAO++fx8tffxr9yVnpbeclnPs0fmPM92fDAJ9+dgXHDh9BfyptwnvRMSUkn8zVxYsXcezYMWQkr9trHnk0hZ/qS6JQmsGrr7+Nr/3C4xgfXROdWb2Tbn6nLnPAqzMP64mnKIrhFeW6w+QJvPLKT/DCCycwPb0uewtSmWx+msFYTJDAg/lZrK6v4ujMIQQFWrftPOpZewYp4wezs8jncpiZnjZlrjNPzdUfa0fFxGF877W38dILp3Bgcl3CCiLN6737sPlKpI7jte+/i6cen8SJo1L3pH7sJF8FMZlQHNEn589haWkJUyNjSMkpCrRcmnAUr1dIJpN4cP8+Tp44Ke3KqPENtK2tbYr7Qzn+CN595zLyhTt47oWjEmbtycyMMlpEkE7gwytn8ex3vgEcG5FYbL22Xz/j7XAvz+Ps65HDZWMY1eBqNTCMIWK8pd073CCdxMdnz2FybAx923ZC8cDoQoyWur567RoeP3PGO1z+bYPDvScOd2F1CUcOzCDk13QkpcrLD+OP6o+fA5ydfYiSdCoO0OFKqObSO9zWOVx21jiyvfDpRalzJRwYHUcoOjczgGFolr0EPzF5785dTE5OeofbDmod7rlohMvqW9Aus1C9xr2tc7gfffKJFO6YjHDtVEbMVNgcYrSFYhHXxOF+4Qtf8A6Xf9vgcG89vIcPz53F6OAg0gke7zrc3rGrIBFgWUZcJ44fx+HDhxGyzCV7zKl3uK11uJxG5ixcpq8PR6YPICEO19Rn2cc2zGjbKJ4rMUWun4MCOtwb169joH8AoyMj3uG2mrLDpV4uL+Dcq2/gjONwazPDkJIUABvDRIsc7jlOsx46jGEpZHMpjgqZhpl6jilqaCYP4hAuXLiIkydPbOJwNZRL1SKXbEh06R1uLUyX+r0no76ltVWxp0MyEpFQaUDMPqF7tbJ9EhyJPLiPfC6PAwcOmDI3emVmm6ov3uE2AzVJh8sR7oB04g5OTSMQuxLTFWWLI+oloxICsavr4nD7pS0eG+v+EW7sfxZk7EilRkntggXKgmahus5VtxkSV3FhHpv7UHhVKTji2QqO/MIglIrIpkLsSgqhlySMltKzt/kUm6rCm0nLMSotitL5ZSZZGKKvNNWWT+zF5i42xN7hEtG7dRZiUWYp6JK01RG7J4rgueIqCqdr2IEoseI2RT3Ne7aCIw7qmh2bXnxqV63B5JFLRzztQeuui3ace612VuUnBpmLncO11baCq+N6+vaVe+eYEXuRUyyedlGvM9irNqt59fWxM5Q7cpGz7cUOXRUxyF5PjXBdh9tp3Zd7lGxUYizbhwfxPpSKbu8osX0JbXVnuo8ZPd7edyvmfnkPO9va0Xw3E3uHS1VzWs4sI+kEasBm2tUpcNN4xlx2R6dKoDegvjltH6dGw+PpFswoXiTgKD6qQt1cl2LncO0jSWaFmq1yELt3Fs1T62jrwetRUbrXFDx7gXnIrlQ0thTnp9s9nr3CDH6k6nSy/d8pPTGl3A1oU6lL42hpBCo22OPZEtqQ2pHH49ka095yZZud1k63y97hthMtTe9tPZugI1s1k27+XbLH0424ndTtzhR1srZ5h9sian2rGoB7f9njqYdrM0onG4GO4SuBp8vQtrlTphlvh9sFIwFegTt17KIF6Qb3ZEPq2THGfuxqGXe7xqRii+bTrSv16ozHs1u2O0Nk38Jl19tNfB1uhxTUDFpebrmxHamVOLFvfru3xxibEeNQ23Glp9gPefTsCe7Dq9ttr7RtrqTQXvyUcpuobVi2ZwbdgRovl736JqS9pDzK03UuzZ7epFONmsfTLFrnOlXves7hquJYuV3xeLqJ2kpu1sVQvc16PM1T76eZ2xkYdLq+xc7hyjgrWqvQvHo9nu7EVHox5NrK723b49mard6J0C3EfoTrNlDuOhsqVzyebsY/ze7xdJ5O+4jYOdyqN00Rbai4rNGaKtKIb8k8XU6nTNTUl0g8njjjPmeibHe0a3xDhypDvEe4oljVVVkivauTVekV/Ov/ehO13XaPdM0DcFxyw9uSZ5eoo+N3jilhMjSynfuou2VXztaRThD7KWVXUbVKM85XpHbkGyfoYM2H4COjSoahWZqGMxJP/IlM1dAue6UtsUFS8XhaQTKVwp07d3Dp0iVcOH8BZ8+ew9LykrG3bh8g8Oo6eYWxd7jEbZ90NKtK7O7i3ppUKokbN2/g+vXruHnrJq5du4Z8IR/t9Xiahy8EyOZzKEgj6EqnGx1P71AoFlEsFDA4NISx8TEjE+PjSCVTUYzO081OPv4OV3Sro4PaqWNtSIy0adTQbvjpNhKElVFuIhH4kYpn23CmZHZuDmfPn8O5SxeM3Lx9K9YzQJ69hZ24ojjdgf5+cbQTmJyYxNTUFNLptNnn3wteTbwdrhSmOtpAxKybHTXOlgExhcZ86OChshw5cgShNJwez3ahLbFBfPT0aTzy6KNGpmcORHs9nt3RLbe5utnJx7fldnSqjrZWzey5lyUK87Qa1TxNyV16uhF21vpTafQlU0bSQbKnHir07D/cWb5uf6Yl9kMlbe6NiN59U78XVJWCI55uQ0smlLpC0e6RLy3PbvG3ubYmdg7XTiJXw+I1RSy73KI2DlglCvO0C9W8WwKebqNRKflS8+wWfSq5kw8t1XPu3TzKjf0I11W3u06Vu+LxeDye9rGX907jMqqOvcMlVLWKp9NQ60VHdNuXRjfiNoluCflOqacVdPrJ5G6/Z1tL7Byu/dWgWSkv9KEoFc9e4ksgDvj64ukVap+M7ubRbrxHuKJYt38Tr76Ox7N3sK6oeDy9QrdPLffElLLH4/F4PNul0x1O73A9Ho/H0zNs9ynpTjpd73A9Ho/Hsy/R5xg6NREdX4drbkDt7R0oLSS30OIuRJeeDuMV7+kAbn2Pq5Dy0hnR8uGp7T4lbd4l3iFX0vMjXLeAWo2WkZZXLwgxy3YpzdMYLYAepcezFwu0nsddiC6JG74d1D90qrnreYe7k0LweDpNbcX37//2eLamXvvu3zTVQuq92tFF97qNVzvUr+n3oiBI+Aa/Q9A2KeZrVyLlr15Fy15C7YlLFU97oNNJpVLmu7RBmEQYhgiSSSSSYc9IKPljngLJ207tSetfp+paohSXd2JF8GKNcgoilxdw7tU38NjBU7IjgSJbqQhdE9chIkeUWCgZ3JtN4J33z+Plrz+N/uQsiqW8hNsC20rpfPotSCVx8fKnOHb0KNI1H1nu5p7VtqHTlfyeO3sWj556BJl02jiB5s2F8Sh861QShdIMXn39bXztFx7H+OiapGP1Trr5c1pqF9aGxMaQF5HKnjyBV175CV544QSmp9dlb0H0Y/PTLLSX2dlZrKys4PDhw0a3DKOYdYmjWow7CbGnudk55PI5TE5NleuK+Rt983lzrB0VE4fxvdfexksvnMKByXUJY0OwPb13FzZfidRxvPb9d/HU45M4cZTfu85L+PbzRU2mpK7ym8crq6uYnpiQ1APRUhHFQNKV/cUSzxl/aDtJ6UzcvXsXR48cwdjIqGlLKKw/pr2O7Gwj1ESIZPII3n3nMnKFO3j+haMSZu2ptsMbpBP48MpZPPudbwDHRiQWW69GaTcm9g737GvicGdOSUakyYscrqsGdbilFjrcj85+grHxcSlsqRBsGJ1CddfjDPVBuXHtGr745FPe4fJvCx2uSVfy/VAc7vLKsvnWMWEYnZP+tEG1GGeoP378/uHDBygWipj2Dteh9Q6XI78LFy+Ykd+ByUkkROe5YlHGHIFVeI+0UYQO987tW+bj9yMjI6KxaodL6rcvRlNIOQ73Oe9wNyJmI/01ySjt8co8PnntTeNwqR6OcF1F2SWzJ2stcricyvjg448wPDyMTEqckIS7x5XEsOMMp5KlxTf6KBQLuHvrNp750pe8w+XfFjtcyTjuiRPijMn42DgNzIRpg2g02EDfdMqlphzV3sAr01Llkt/hXZifx+GDB0UOeYdbpn0Od3BoCDOTU0iJ7k2rxI6cON+itFHUe/daT2Pc6+YyKQOgW7dvo6+vDyNDw6Yt2Y7D1RFu3jvc+mzlcKlH3pimKrjJP2ZyrgUOlxTFaC98egnHjx3DYF8/CgU2tHZfzFRZF5MDNuasnJKfi+fP47SfUhahDbXe4T6Ym8PS2opxQnQ8dKR5aRA57WfOLZ2fzSh1+fSg1Z1kNQgx++ABStLgHzxwwDvcMu1xuOcvnMeQDAqmJyaNwzX6prD+dnFHrVk4MDB5lQHQjZu30NffZwZB9A3qcDfHHN1xhxvvp5QjpRjheo2SzGitDfA85uEWGm5eGkZpRKTnYpTZC2LyZfIk21s0+J6dQdNUG0qJ1tOi54xIWow2YyRAXyIU4bKx9AfJrpc+I9JJkVy3qUp6GsAOmY5q6YTYiS5KIcRdpOrY/PGPs93txL41VUdrHEUkpKpAWoimr+c0Ij0qc+8tEk6fxV1cnXraB/VLx2uE6yKhrFOSPSCaF+YrSacrtuXZG+hwpSh6Qmox7VS9HV1GvK1fnJtZOOJSVUi1O3eBnZ6RNDltEcgKp2AlXKUg4XEV9oCbnzYmVCyFpuQuPZtBDZueuoj22gtcRuHGrsTOXLuKsxi7kqWns/CWhGmvHLgVd3EpFsW2RGrDu5H4OtxIu24hmNEuAwW3sreqoutNeBcac+254iyK6rQqsCHlEqgRz1a4uqezpQPWbY9nJ+iMW3ldOm+1TrfX4CBBW53tDRg6S0/M75QbqBqbMo2XShTWCmjARB2wnpbLuMvOUQ23UtP7A9W7q//dlYVnv6NtFB2t64B7DY7g9cHBOLQ8sXe4rpK57pt7j8fjkbawi0d6rabbn9ZXYudwpa8WrVVQR1trXvpAlTvV3Cp4z6CbfwfZOagDc+cxEt32uvF4PJ2n3q2/bqEnRrjufS9VNR2sK55O0b3G7vHsF/T32+7Ucq/ijuS7fVQfO4dbftYxWrhm1G6T4r0QxTyIsA8eRvB4WoWvKZ2jfF8zmoVrlyNiqip7hdsG6z3rbiXeI9w96M10skBdY24knh6jhwvV22tnUSe7/Z/6NQ9bQpVaeMbas2pYq66G7bH96VOwo4fDOtOSV4j9lHKv0qxBtspwPXuLabSkMLUB8OXqiRu0WbXbrey3045uMzp5Ld7h7oJ2jnRNA9ykeHqDTpWlNoxbNYqtxttq7+Haktse6bpuq63Vhmn4XqHX0Knr8A53F7TjaTg3RddoG4nSKYPxtB925NrRmavXsHTKbnT0XvXLgUg8MSbgZ1HFjiJzZXFS9Nab2rE7pd16y94d5to7dFHxdrhtaJT2mp3mqPc0sT9gufEBl048XclzUVjpKbrdSfSce3FuT2uhxfI7u/yCGr+xQ+dL51X1cQGxax2Y8K9b9ip7Ca9JpRPE1+HudUm1GRqA++vWRtIpQ/G0ntpRHhun7T70sW14Dllw5OGeyduRZzvQXuhc+T3aXKGA67dulp3v8toaPrt+DXNLi+b97C7mOLvaFU04r0GlE/T0lLIWbnWRdzfl6xXDZU8xJ6Of9XzOvthewtaLeazmsmbd9CQpeownNrCCuyLDAP5t+UhXbYOSCAOkMhmxrcB8c5ek0mmzNNeh03+ReDxbUQpDvPfhB/jRO2/jg08+RiKVwl//3Zt45c+/j1lxuPyutk4tl22dSzEwI7Ku9rYXNtfpc8fO4Vb3y3sTOlH2Gtko/u2P/g7ffeXPMLe4YD56/O777+O1H3wfS8tLNm4U3xNjWuxkazH2IQ3f/MIC/vrNN3Bn9iFyEracXcfHZz8RW8sjEGfciWltT2+Rk8HAorRFL331JTwUuwqSIU6efhQHDh3E3YcPjMMl/FvVTG0I2B/09AhXyzRu5Wp6hDIKmV2Yw9DYKI4/+giu376FhBjz5IFpzEvPkc7YjlEqaHNpnHCNeLoYU0jtKyWmHMrI40NxrnMrS/jpxx+imAywuL5mRibLq6tm6k+vYJ+2hZ4dwO8b8wP377z7rrGxTy6cR1YGC4l0CtliQQypviUZk49E7U1jangnqD13u+kJh9tIWa4y9T5Zt6KXxwIxHyMXQ00mU3g4N4f70lPMrq/j5vXryEjY2PAIZh/OmnyZUQnf6xw12F2eTU89pCBZbu16ByzthGln83k8+dRTMgpJms7a1MwBPPGFp0zjaBwuIwrRwuDtybMp4mxfePpZnDp6DF988ilkwiRGBwdx6MAMHj/zOIocGIhtUWhLtUJobyp7gfENejFtJnYOt9wP14WUkurK1ZkpQAlQiRMlMWIa6vDwMCYnJzE0OIQZMeBCNo+5hw8xMz2NAyJmJCw5dQ22kXg8o6Oj+PGPf4xMJoOz587i8mdX8NMPfoZLn35aPcJlnYnWPZ7NSEhbNTk0jBef+TImZCBw/NBhHD98BM988UsY7O83bZRC+6oV0z7JSlk0TKQTdPp8MR/hVnpN+gBRFZEm43CPUy/R5EcfNJD1Z576Ar724ks4PH0AJ48dw7Nfehp/T7aHaMyy3wjjikTZrSsK0/fsL9iQFXM5fPGJJ/Gtl7+BL3/pSzh94hSmxybwD779Gzh14gTCyH44U9Kukbant6BdhWI1SZGgWEKar1eUZViQdQkLZT0hYtqmOval7ZJpn0X2YoaF6XfS2mPucDeiylNFdlKZrUCvmQZKY6bhpoqwBiw7XKExG1vl+lbCxD37DmMfFLGBdBhiYmQU/ak0BtIZDGX6zGzJyPBw9c+RZFWiezwNUbsyHX62L5FzTUloSrbZZtHxmn2RwyW0KzMraTfLdqbLXifeDpeFKQsjXI+2FRZiWdwdXY5eqxqzK8zjBspK2EQ8+x41HdoQbcuYRhSo66ZhdJyvNx3PZtB8OLuoQrigqI250L7MNLMao8DVWlFqj4878Xa4gmkkRMoNiAltXICxQXqLKlsZnZvPzcSzvzEP1omYaT5ZNSLh3DbhXJfGkLZC0YddPJ5GGOuIOmlqN4S3KDhDR5ui3elDnYHzMyG1Lz3OlV4lvg7Xtg6V1UgUd9tMYUTrcYAdCL3Xwd/empdeiOjSNcxmRZHDWwxTpGiXR5eeriPqwGnjR0yHNQpX9HkAj6cZdIqYwqffKXzVo7ZVbsfNDo6qrcs9nlTv7S3i63AjWDhlkcKMe2G5+TFEPUcaMZe7kfbhXrUrnm6BpbHZg3Vmmk+oGo1ImIZ7PI2oamfEXIyjpXBnhGtf8qdicyK1x+t6LxJ7h0vKBeQUsltgcXTEeq+D190qaS/1NO/pNmhX5j6tg7tdHok48dwRisdTi7YvVeK0uW6nrda+zB4JoyPWWDrS7UVi53CjIirjNgP1mgTG7uHy83haRiOn6ke5nlbQyL6M462/q+foqRGuW2a+iegU1LreudGJb1164gQdqzpXd2qZaLgf7cabdrSLZjRbI/VoZF+Mz/Ctju8FYu9wWTbuvH8Pl1VM8CXQC7iO1Z1e9sQXt41sldAqGkm9+CpqX4ynjng7x8eV2Dlc6Rfpikhl3p+46x6Pp/W4oxSPx3WCtdIMbrydHB83emJK2ePxtBbXsVZN/3lnGzt0ipYl1y2i9sV1ta/a8M2kVbQyrWbwDtfj6TLcXn63CaHTrbdvM/HsDWUnJYVQXu8yQfQ78Hr7NpNW0cq0tsI73BbRyULz9BhOY0h0vdtER7ykdh9FRyf1xI+MO4f7sxrt7GgZdSVyYYnoDVTN0qpOHNNR6QTxdrh7XIm71oA9nk3Qh6CaFT3GpTZO7X6iU4Uqns4ShiHCZIgglULApWybpUgiWnaDhKlkeT0RBkbc/fWE+aIwbiii7MTOTAelQ415Qi4wVjWhKH0R82owvjvs8gLOvfoGTh88JRlJoJiQii3B1brTkFBWM7g3m8A775/Hy19/Gv3JWRRLeYkR1jmuAVK4l65cNt987EulTUPDb4mS2uNjpdgI5oHXbZ4glF7n+QsXcOrkSWTSklfZ0by5MJ5JSSQpOprBq6+/ja/9wuMYH12TdKzeCUdA3QpzwKsTlyIbYmPIi7Cyn8Arr/wEL7xwAtPT67K3IPqx+WkW2s7D2Vmsrq7g4MFDJiwQ+yo5r1lsB9sdTTRDo2tWewmCALOS10I+j6mpqSoH3ZxNWTsqJg7je6+9jZdeOIUDk+sSxoZge3rvLmy+EqnjeO377+Kpxydx4qjYgNSPneSLmkyKgz134TzW1tcxOTFh2kvVMf+6I+C4Yj7kIsukOOu79+5h5sABTE1M2p3C1jbF/azHR/DOu5eRL9zBV75yVMKsPekUvBKkE/jwylk8+51vAMdGJBZbr+0r0jvcbTpc9gw/OXcOE2NjSCYCY9z6eLt7PNOLlWJroFkUS0VcvXoVTz75JDLSudieY1QNeIfbiLLDXVvF4SNHUCgWTVhRlu3EdXatol4zoufhkmU8+3AWuWwWB6anq66huSbIO9xmMPYqul1ZXcXi8pKMD+ybixneC45WKd+Tlja4UCyYT0yyjWoeasQ63Hcjh/u8d7gb2crhKtVrbC5b53Bv3r6N7NqaXIw4WjHu8u8VzV8LQ+Js4I4qcejwIaTCpHe4bXC4C0uL+Fw6Nem+PmPbxp4KbXa4bRjh1oN5MSLrYRBiZWnJjESmJ/0It0JrHa6itc8oXyhv9wjMlmvFtQ5ya6gN1uOj4nA/9Q63EbxYk03H4T4mDtc0hps63KSspnftcM03H9lYUG2Rw62nQhPCaHYRG8rXKyt0hG7evMNtrcMlTD8vI1r20tlBC8QxlUptdrgyKmglrvNsBGOUCgUkJX8s7+07XMYJpf4dEYf7Vu853KQ43B/Q4U6VHa7Wj3rY58Qb04xGe41aK7TzjvWh/qjfpBnhds7hxv4pZRqWGpe7TlW4UtmzO4xPp6OVpWk4ZJPLesJGRZdxkarrrsmbpxbViVqeyvbgQx/JZBLpZEocUmBmE9opPEcrxXz7dAuh/TCPtC1CJ6uyObW63Sp+TClXL3HACRndiiSkc2c6clViw7bSBZPbb1IN3W2t3ioi3dxoXXXZGWLncO1dWrNiEU0b0+OyRutaEEY26LSyV3dtiNIAHlWPSoqRSIJxldah2ug1qCRXUZpPVildr5X6UN/sLZc/Ct+jsrldVfTEprIiMtqLdMpty6YJxRTmiUriiDdaL0ttvnXZ2KY8iupQibaNMYpF0TCN09VZpfbqNN4jXPaQuXAl0hcX6jxsRTchXBEYQAVbcat3TWoi1WgqbkrEhMlGWTQsxtIc1MAmYhoQtxfZfMrdiWqHVUedq1Yj5pF5tT1oK7loWb8nrantJ9lItd5qRyN2hMJ6SuiAk2bZKLVYUgqk3UghDEZQKo5IzkYleyMiwyKDkQw40ifCX0nEeTq99dCSbEueFMmI9IsMODIkbkOkOCxL0XVpUHTYJ+Gqx411tJXE2+EKbpWrrX7W+bo9Y92qDmkk6oKbhoe5S6G9xdcNVOusIi7ctlUhzhqp5IrT7bSOIlLJDIJERtb5hCQrreZf8snOhorp3MU7/+1B9eHoq6wrQl2LM0pwOjqJIKC4eq6USpxhLgJxntevzeLu3RIWFvqxtj6EbG4YBXEORXEOSIzLiGzKSBCOij4yss8e7+2qgnmsNuhHMjUhMoUwtDpDYkoc7ITobBzZ7CiWlvpx+9YKbt16IJ2dzrjC+D6lzM7w5Xmcff1NnJ45JRv2oSkdXRK7lL/iedmDKSGFB7MB3n3vAn71688gk+JDUzkJ1yMq2JBKpW8EY+heN5Ye6e7vPWwuN+aRW64GUiiUDuB7r76Fr33tCUyMrorh86EQa+TdfH+4kjc2/GJjpYLYYCiO9jC+98qPcfDgOA4fHsbAQMb+dCpMSCUPkZSlna5SPeSRy9uHxSphlt61j2Zg7vnyAum4BOy0sK6WUCyWUJBBb0nqblEku76O9bUcltfSePNv38PLv/I0piftiDje4wZ6TBmNJaZx63YWly5cwvLirKhB9BKIDcmiJDZHm+GLK8IgZY8qLuGpp47j0KExhLzna2xqP1uSzb9py2Xkf/78ddy4OS+6S4lwPx9Es88M0LYYRuGDjiMjQ3jyqSmMT1DPFP+UcpkNDve1N+1TyhLGfaRaDdxiQXAqShzugwDvvXcJ3/7m35NGUQw7yCMvBq2FYWIzGaMWhrEAtBu5fQX3MnwARmy3Ck5xsaduH45RocOdwCuvvmGeUh4ZXpXQeDjcjdAd0ALHcPf2Gi5evIJ1cQbFQgKFfBGJkCMwcSBJPihkP0Em7aQ0lqt45tlHMTgkDabTQNZW7P1DlH8ZtfLXA2c/uY67d5ak0xw5XNlNPdPZ5mUYVyzkkRRzSadTmJoaxeNnZpBKLRsdszGNL1YPBfBnYRnTiJfy0iYVSsiJ5MWmctmcjMqkdTPNEJ9ip8NdxuhYCoMD/LmezgjsT0uy2PzbNiWJ2dk1rKzQI4hG+WS8dF74cGIoRpRgp1gkbTrGrMtF2Z+VZbacDnG16R0u26wr0c+CZITL0UfBVD5LdabobFkYKaws9+Gv/uJdTE+NSOOXwMBgH5JSiVPptFTgpBQAJfrpgqg1mcwhnZECMWk3VnA9JW6/OOIAm0Mx3iApjUACa+tFaRysfvnChnze9iCzWRnR5QrIiywurmBhcQ0PHj7EN1/+Cgb614xu2ZjqE9HdjpYvK6cp2VJGZEAawX4ZiaXFIQSmkWQDaX7mU8hFnTXpMxdzcsgSJqdSYk9iuOJwrRblnyTcm3ayFVqfZNQW9mP2QUFGsKJHOlnZF5iOC19JKPWRr/4TRSWTJWRS0jCWxNEG62I7ebE1NgTx12BRnKZt2+RfifWJM3KcOhcHYn5uxjxafdnluiyin6MZXe531J7ocNkpkZFtdK/faocVjRoWkaXVm31GgKNac9tik5/1eYfLtiz6He4Zx+FqZriU9ryCmaNPSkEMYm42h3t370sPaBlrq3njOPKFAnLSs2SB8A1L5lV1UjBDA1m88JXH0N9np3I81lVQl+xBvvv+WSyvUFccxlnHS3ivLZVKSy+SEmB0pA9TkwMYG5UGNsGpVYlpRsPbN9q9hZbFa5b8SqW2DWBlhEXtlModP43LJQ2WzlZHI5b963BdqAGOdEVoR2abtVw1w1pPvdoHHLltOz6cmarWZ1wxuWAnX7JCa7L1SEXRdS5t/jWmR7HasCPdSr1UaEe6Zm1JpVrTtXiHGznc867DlX10smU11urD9GCk51NMSWPPEYpdZxin/qyYVEQzPJj97QVxGJy6sqMVDzVL3aSko5KSToqMSmS0R/MzD7OI3qh3vlyBwpcasRcZyAivVJyXwzl1Y02Oo+T4YvNQQRpA0+GzzsHqyEUbAIZX9nmHKw1fIBqLHGfJvNbSulr7l7qkRvmP29Shdcrcth222rKIHyaXYgzV9qD5qpc/qyE3toe4OqunN6I6q102Zt86XF6syabjcHVKmQ9NsbqS+pnikTUNnxn5VhRnjqPVR9ipB57MNgj7lbLey0uru/K0l2xvHK1ym0dKY2mmcwpmCpAmF3bo9YLtx80HnUJ9THidLHuHKzU3shs6Xb75jXWtohOuRfvL6xRrV1wPWvzmrL3AtRubW0830SqHG3tLdQ211mg3CityToT3EFdElqV3vSAyV5YgISKj2gDzZsm41tl6qhGdJESXiVXR2SLChOisNG+WRkR3oejQLhdFl/ZBKTplvlfXlkgvQNtQcW1to3jqwy6/dvtpH4G0SqaDJr0RI+afC7esxO+WRH28newP4t81FCrVbytozswyG3xOZ/K+UUokXRHze0pOM+uS8ZtLvZdxGwKrDU7p8QGXUHp7KXGlKeQSKWRLSSM50S0lHy0LHA0bFfIPnVOv6NM3lbvD3sYpd1j4hLvYUoBMJPxKlWzzN7jmFoWKfRre690TJ2LncMt9XWdRdd+WgZ624erZiOieH3SoJ9yvrpXi8dSHTtc6XOtA6UhrhU0V97nUbns83U28R7gyZHKrnK9+7Yc6rhLxpBvCGoR7PPXQUSvh/f3Kk8e14vHEm3g7XI/H4/F4YoJ3uJ5dw9GsS+22x+LqxavI49l/eIfr2RXlKePIg7hTyZ4KG/RkFx6PZx8RX4drWi/fbHUTfmTbHKZTEomnEVSO3svVpccTb/wI19MSyiM4s+VphKsnr69m8I7W0zt4h+vxeDweTweIncO171dtjO5lv9gVj8fj8Xj2kp4Y4fppOY/H4/F0O7F3uPqWKbMeCaETdsXj8Xg8nr0kdg63+tWOdotiXiVY41nrOVx1yB6Px+PxdJKee2hKHapxtLJRFhvs8Xg8Hs+eEG+HGznSKudqdkToRge8rZy658Szd9Qrj7iLZ2+pVyZxljgS+xGuOlpmRJ0vKReMBOh6O+F5eQ3l75vICWMnUR5cPXr2BrWnKpuKoXh76ixSjavaOndby2Fru+LH1SmNwpuR2mPd42u3m5cgErUnN69xgHqPL9GbptSQtBAUNTa3UGrj7BamXfkUXQkFES6L0guIm7jXXk93nvbj6p3vV6qyqZgJr7tgbKtSR7w9dYZaPRdE/3nW8UjK5VMWibMhrFgjtfubkc3SrQ3fWoqJommfjJicxYv4OtzIc3JRFrGyKFiKZqO0Ek2PCkxJySdlKB2WAtnW/mP8hP+Yh1SRSxvq6tPTetQ2XXui7pNGqm0qETPhP9pSRrwt64i3p/ZTbgvNVsWe0lIGGSmLpNhTghKVT+WfhnBpj9soWrIq9eKocH9turXHaFi9fbVi47A+JCN7Yh3hPhIXe0qU+AHKGMGeGRUv3R3g8gLOv/oGHps5JSGJql4PM6VG1w5UaVrgeRlt58OEuSz25kmcFMtLDuSCUyJhwU7beDqDq2m1WdpQQWyqEIjIOuPovrjADnBaKmSqyCnAeI5I4oJrH1yqTXHdDESCwOg/Kw0WhTB8I4zMHbqTSzflJjE/GVGJ0qtKl2wjPcaVNHnpSXFZgbRRnJYmmup2UtsuQTqBD6+cxbPf+QZwbMSMtsMdnDH2DvecONwzkcPlVIlmpt0FoLBhzMp5HyaLuF1aR07Wi6JSTplsVGwnrmhn8MpMTzhXwHiQwXTYh6G8NXBW1O698t6CNpMTpc9Jz+dOKYtVsSQzBSg2ZdqrbscxFHbg0vkShmRkNZPsN/YUSieC2fD21FpUp6pXblO0/i6lgXvIYja/bgYGjT/8oke6oik3OqYetfE1LV7NTtK0cRNyeCKbx9HUIA6UUgilM9cJ9q3D1aJyHa6OcNkwdQo2JnS2i8kS7ibzuJxYQf7YBMZnppBOJeUipWHZUB7bL6DOIQ06RyJrOazduIexu2s4metHRnqSYiRdfeVxpGzH0ZKmS3tak277nNjUx8UFLB4awfSJw+jrS6OQz0u8ztn3bknIyNxUR6mnq3dmkbl8D6fz/Rgu2dsV3p7aj5klKRWxnglxNljCtaEiJo8dxuDQIIIkx4daCvXsimEq9UqLYVuVortfHS2pl+bm6ZkjEgHy0katzy0hvHQbJ5YDTCcySLONiuK0C+9wI4d7lg73oDhc8W7qcLevhu3DRkM67LjWX8AHpXmkv3gcqVPTKEiXkr1KXkmcFMu5AXuPBBjOi2G/dwWHbq5jppSGZNHkqSCmwpEWCRr2kD3NQC1Sg0Zkg0va0/10CZ8mlnHn8BAyzz6CRDol+woiRRMnDtjrtH955YMQG7pwF6mffo6TyUGxr0qH1dNabO202qd+l4Ii7qYK+GhwFZmvnkEwmAEtioOBStx6BcEWQWNoabowxIZWYlWo7Ry6aVVSczvylfQU9wh7NbYmZJDEuAwIsu9cxKlcGgfWbbvFWy/twk8pS6XFFTvCPR05XH26to16L8MCXhcv9MlAFudHC3jk+adMobBHaSdiq82pYj6dusLm0SvjVbHTkpbrHxCDXnj7PB6V5nIqG5gGMi8xvcNtDWoFZZEA3lu72lfAhdQapr58BpgalPIQeypydGDjdTuan3IDKRthKcTkWhIP3voERxZLOCwZ1Q6r6sHTGqhPOlM+WETd38uUcL64gOQTh9B3egZ5ccCsyxU4E7GxBErR4MVlY5AcJ/83xmSwuRK7Idg1nkdFESspu6BKeO0laWrsRISycyTRh3sfforxyw/xVH7APEBFe2oXrXK41jPElT3sK2hjMoccVkczyPWJmy0VzA390Ih9ACklxsGlfeq05Cz3TlKO2Gu1wgel+G9dzCk7mMSs9IyXSjmTX+0FJ8TRemfbWsQkysuVJDDfLyUxlJHyKCAs0qaKUlaU6rLrJlG7MjZWlHXpI6RlyW2Oc7N9ARYHQqyyYgh12nNPC4lqq9hTAg/S4mTHBhGGHAkWq4S25ZZduQylfFhUKrbdqpXq9qNWrB1I+ly6x4lt0GmadGWdYXa/e/6atCJhPUgU81hK5JCbGjL1xeRUjo8D8Xa4XUBfIkS/dLtSIiVxRCx3FY5LVNzwvZaq65KaWRbZZkXVysAHXIIwMKPaEg09crZcenaHalDLhNtVWpUNaadkH2duIpHgbhXmQdfNU9VRZjh6CiJ7SjEPBY4NnP124Wkh6nxMmcgGy4Mzg0UxKNqTEZaRs86Ro7s0ZVQrus+VevGMMF3arXQeZVm1T47jmq5Tmjl/hYQ43wT6+BOh6h1dj3e4O0SNwiC9ONlsQJdbhFwer71WmD/z1HcglUYcrnW6MbPumKBaNXoXsQ0kt+IPGxjbyES2JNnyVtQ+aDUqpCg65y8nskXp6Ej9Ld+vZQWP0PKoXe6WskOPxFyXJK4dAmVb5+fxuhq1S3HCO9xdwpEfi7xS7BVDtqEV4R7TAMkml8YANxGOJjnKrLfPlWbScsXFhMn1VImG8Z8sXWjk3vG2FrdMWJYcDVbfZ4sfZrQrGWM2jD2Zf7ruaTWuuRgd04ZkaWaqRPgLhAqVkrDO0JaXu3RjV+PGstiU+I9P19hy3gjPI1ck7ZmLnqvp89dPPDawTOJLTeF1BWUPpWZjhVeaCkOkRQJxWP3pDJKBefwLmVTarJvpWonOxpaOdn52DqsrqybOZsIz1AuvJ1vRTBxP+6iUFUu1PaRSKYRib+3GWL43qD2D7QjVb0a2MtIl1qoqVrZ9bAr96T5jQ0whnUxJ+yVtmyyDhA3rk/aNbRjbteH0AEbT/eiTdo7vnGod7asj7SK+DncnttIGOKVBI+MFlUwvkiNA/map2hjoVP/mr/4aH/7s52KYSfzR//BvcPniJfOk259997u4f/ee6YXSISfFUAdT/fi3f/RH+OTjj60TpvHSWcuSN/d0W++n0viZFo/nMinXRDGjaRETFh3PI7ZSH9NVMceIeNqPWI+MDOVvuePWWtjR++C99/Hzn31Q7uRxyc5gM3axXZgLjlo87cUtN2M5vHcarZdlV4VrU6G9rC2v4L/7l/8v3Ltz1zjRN//mb3BW2qk//qN/i9n7D6Wty+D1772Km1evYXl2Hv/9v/x/4p/8s3+Kv/7Lv0KxyLbRsiN7K2fCzRnZVeY6RrxHuFtQWyTthFPLFTaeMZkIcf/ePbzz1ltYk1Hrq6+8grd+9GMsyCj2nR//BH3SO8ytruHiJ+cwe+8+UnLM6sIi8mvrSMuIhKPdTz78CA/EMWfSaWPY3F6WOGww5x/OYv7BQ5z/+KxJ586Nm7h6+QoShSLSUvl4no9//iFWlpY7Mrrx7AxrOe27N1XM5fDDH/w53virv8KdO3fE0SZx7do1nDt7DiWxFe2QkVY2YZ2ogx6rZ+0c6+0f8/xFtG935WB/UpRbW8Off//7ePXP/gx9iSQufHIWi9K+zD98iB+98QZuf34d7/3kbQz3D+K//Rf/AhC7+vu/9MtYWFhATuyPsD+5oz5lmzqinSJ2LW+lOegO3PuZlfWN15gr5vHII49gbn4eF85fwMkTJ/BQDPTqZ59heHDQjGp//1/9a9y6fh3/5g/+ENevX0MgI1mOPD76+c/xO/+f/y8uf/qpkQ/e/yn+4Pd+F59d+hS/89//v5FbXsUf/f4f4E/+xz/GT99+B//lP/u/mzh/+Hu/j/ffeQfXP7+KP/jd38Pt6zfkuN/HvFxDvM3WsxOMVYqNstO2vLhk2q6//Mu/wPdfex3vvfcefud3fgdr0pgynp2zqWfJnrjgzkrp+k7L07YX9uhivoCnv/Q0Ll28hIuXLphOfVY6+b/x67+BKxL2p//2f8QLX34el6R9KhRK+Mf/yf8Kf+8Xv4Zf/w9+A6lM2qSxe3gtKiQeLVpPD3Vqi6RdFKtGt/UpFPI4ceKkafA+/PkH+IWXvmoeZuDU3onjJ/C2jHI5Kj00fQArMmr9UBwmH3nPyAjke9/99/jy00/jP/oHv4mvS0/x+6++ime+8CX8L//hdzCU6cM7MlLOitN99gtfxG/+xn+AlIxgv/nyy3hEnPrtG7fwFz/4AQrZLI4eOowr4rAvXrjYvE7arTxPx2BRDqT7xd6O44nHH8fE2Bh++Fd/jW9985v4T/7xP8aNq9dwWRpJj6dCdQOgMyBcfvvb35K26DWE0czIsRPHcfjgQXz805/hV6Sdmp+dx8jIKNLSRtHhm9lgxy/uaIq76qCdJLC39ITDbaR2hqsoe9UP4rTO9MwBpNJpnD17Fk+LAx0dHcX777+PRx59FCsysuBbqvLimL/xa7+Gr7z4onHkqWQSK8sr6O/rQyadQZ8UWT6XR18mgyHJGR+A4TQN7TCRDM2ToaEcY0ScdSIMsJbLGoPn+3j/F//xf4xHZaTt2V9oHWDXcE1sLBS7SYst5qQjxlsUA8mMjHjtj5EYR6VV9cWtg544wBLTeQ4+CGXXEwlpY8Qo8sUiXnjpJWnPUqYNk8YGWbGWIydP4vQTT6BvoB8nT500tyvm5+fMswNz9x8iu74u6Vi/SdtqbF/2vPZ1PFb0engNfK9yMwOdbiN2DtfejTArdhEVnFmPloTFpfcJjNjgtmAfmpJzsPcXSS28tnSQkoYuiTt372JqehqPPHIK12/ewMyRQ/jqL/w9DAwO4nMx0Idzs+iX9T6Rm7dv41df/lX88Ic/xJ/8yZ/gB3/z1/gV2f7xT36Cf/Xv/hj3HzzAC+KcmX5CjJ5LuYBoO8C6ONtfkdEuHfotSYv3UQb6+xlrUzQHfBAsbr91izPttFOSFzd64vSj+PlHH+Hq1at46cWX8N0//S5+9w9/D1OTUzgVdcZaXeLeguKKWiSXdHJSluLw+FBWQZzut7/967h9565YVQn5Er9sJb63L4OcOMMzTzyO577yPP75P//n+K/+6/8av/uv/zXWpR1iUs3bQzQyLsP2lQ/7iRMWf+ve0osDMX+X8jw+ee1N87UgFgSf7lTnqmJKVla4UDHhu0SKHatBCe9nlnDz2CBOfekxBMUc7dAYgnMFBq7xfuytGzexOL+AJ86cwdLiIi5f/RyPPX4GmTCFhYezuHb9OjL9fTgto9779+8bR3n02DHckPAHsj1z6JDIQbPNh7COHTmKmZkZfP755xgbG8PAwIBZ5/3ie7Kfxcv9t8XZ3rx5E2MTEzh8/JhxxtRFLXzZBfM2tArc+NHP8XhuAIeXA6SlkuXMmMfTDnj7gO9SPjeUx4eD63jk+S8gyLADZX8pudtOo1ojp/7WZZTB5wimxBYOHJjBZ59/htXVVZyS0cngyLA0nPUsY2fwDVl809Qw+nD7p+cxc2sFj62nTX7b+e7b/QpLjmWcLPLFoCVcGi7i7eIDHH7uDAYPjJgYLI+tUYux6BafZeco9caNGzgm7VI6TOLTCxcwfuAAhibGzMOZq4tLOHr0KC/EzMZduXzZPLvCEe/Y5KQJr1gY1+rZmz2j2Surav/cZv5SYQYPr9zGwAdX8Xxu0LxKlJ+1bBf+4wUNHC5LRPXuqkMzyeX21bSRxg5XjITdQHOWypl0rbyUKFzXlwOYn+/IkusUF9fgGJ9uj+tGovBmYdp6zloYRodLQ6LDvf53P8eZXD8Or4TG4Zo3T8WsRxkX1OGedRxu6DpcicOy3im1NtIovUa2sVPU4Y6Iw73lHW7bYdkZhys6ZxN5aaggDvchjrTC4UaG4R6t4Uyytjtu9kXCQ41I+8ivjjHMontqsUeavbLKc2g6Jn/icGdj6HDbeIkdICoEI1yPthUWjoqyfRU1pnzeaL0C1Vod4l4LhQ2beedszbZxqDVSG5/ocTTG8noz4qThUj6XGDPhX7vm6QRuGbUDN20tV2MLKtF2q6mqH1FnTa/D03rcIiyJC+S2bY0qa5uzdTxjK46YtkOWtZT3q0g8bhNdbmTr8xPuVYkTzFmsUUerRaQFwAKtlVbDNI0jlJPXvuCxEeYYaXgo+n1ZjhoZzp+E6yszNhOF62rIzYp7PJGgstj0WU2tc8+LYumAuY/z5H502z6oWdoD74NxFJCIfiutZbOh4FqALe8oaTlJK05Rvt4IMzqXhGlX5pUHak+eNhO1KaJ8ziRwpsHSAu0ziUjK7Qq3lWgfheHl9keCTHvHfduA9qPobI/Nm02zY5iZy90Rb4eblEE9GydZVVHcsNp9LYePxcuCYt6DG63rOfX8Gm6mj0X0nbk6nRxGUo7XpGj6zUrV8c61lK9dKmdR8lQq2nzxGHW29q+nHbAxYhmkCiUEtClpmcp2Ivtry7EZIbpu0mABRsJ1lfJ2FG/HImm416zPt5al3PB7OoPonw+WiD0luW7KmC9Y5Fvo6omNw7Jiy1r+J+FV65HY+FbK/6J9G+JE+924xj4anr8ixnbUptxlh+yJ7R+/nMYO427gdceXTJ/56QsxBSMFwKUs2u4Y9Fz9qTTy2TzW8wWsyahkVToAa6LVtWjJj9TrsizSGm2Q2jgdFN47zEpmspIpXveqeH0+GJHJlpCuNyz27BrXRs26qJlPt/cHKaSyRRRyeWTFLtYc27B2tT3R4zSNurbnirRi2xcnfRHaE5drYk98zmFV1qUPgSBXMC9z4ajEm1T7YfuUzgODxRDZbA7rqRDLobZPUm5SzxuJjVOsSFiUdmHnwuOZzrojNu361+Hab1kknO3riqyzvhRW1sxLN8qZbSEb6qfYbRDIBaT4DsCdI5ceU6iFTNJOv7HXYaSipPZjRyNDYsyZ+Szmb96XEUqAgnlkXjoB5rdi0qcsC8NF3SKJwO6vFrvPrnN/e6WkItdCSYR88bi9zmQihfnPb+NgMYXBguhXdCw7onx7WgUbDddepeQxkA8wvArcvXJN9C49fv6WWio6K3sQWgllfbsSsIxlWRHa4S5Eevt2XdOL0pfro5hrle2SXH8ylcH9z28gfLiEfqkjOsXoaT1qT1xSxUO5Eo6hD7NXbmB5NWvquRSBaTeLXEq95jdra8U8y8F6Hwnj71bc9Nx0eR1lia6nJCIGVJEorEgbS6aw9mAJuRv3MBbaN1e1o91362eukLd5yGSikJ0R76eU14GLv//nODU4g6RU7Lz0ltzMuOtyREuRU5l7CPfTJVxL5XA9uYbwyDiGJsfNlzKMQQn6m1xzLdLj57Sh2a65H2Cmxk1cFVIdp5UwZYo5E6dLZK0ko/TVtVXM3bqLw3fW8MXcIIbEAehlVB3j2RU6cUA70hJnX31R+kP3U0VcSCxgcaYPE4dn0N/Xb2yDcaKi2BH2WJ5QxGzspiSZgB5fSUesxV4n7VvsivelF+49QPrKA5zO9WOylEa/VBx2Vj2th2rlNCvLgW3NWqKIhVQC59PruDoETJ04iIHBjMTjP5aUSi1MyX2a2IbslHpnUKrTrVyPe4y0TqaulMT5rs4tIf/pLRyRoe6xUh/GcnYaupWdOJN7SY9T3NxazC7j7vocnvitX5NesbT9Eiau38TdDjF0uFGRRE8X3fi3f4fxVanE/QPIc4jAfXaXEWX7qtkaps/ps2VpPWbTedwpLpttvnUlVywgXyoao7fXYq+A68bp6kVG6OezKldqj+oE2pjLGAUZsdpRGbUfQT+mczI6qXzco3xFeoWenWMmwkSR9H00GcIw/rSB07Bz/SXcxiqWSjnz0IuxhMiWiHbclOqPZ9Sn+giWYitKkqlWUmaK5U6EOF3+/GdImqaDiQFM5JMYENviT1a4v/p6PK3EPIchNsJBAaf951LAgzCPeWSxLoXCD9Jz8MJbcnQBKi5qba2wkuaxZ+OZzZqscEmHK31RkUDaqABT0nGbytsZuIyYPuO00p40LTO4k61bC/exPpTA6f/5LwEyqN5XDpcEXGHj9INzWDl7E5MHDoqzq/TZ2p0pps/zpKJ5h7WwgKVkETlpQQti7DR0For9R3OxV8bjTKdpS0zMSJo6YMeY71rKNbO/0p/jtCbQJxlIlfhyDFtpeRWk/VezPzAlK4pU50QYxpEfy4L3U5dldLIqhp4Vq+dr7PiGHTNzwg6SSFDupMmxNTMmtURm6qBunmlsfmx99Dgr/MsQ7UAwX1yyMeyTThs7cqGIHVxLPYiuR4/ztBbqlbAMqHNO1cpAECvibJfDYrkTp+8NsA7X7bTtZano1RN2zsRi5Pr4gZe0bPWVQjNLkjY/5C6VZ4vaccWmcxgm8PGty5h59lFMfe0Jo9TyTOs2ia/D5VVTLi3j5itv4vChozLCDco9f6rCVYdmkstWFwzTNI/fy0Xp9TGs2rHaxiZabRI9oL2YEa405kZnUvn4YIv0HYxBmQZSduiVcNlq/e1H3JI1ererBu7jS1z4xicu2aDQrllO5rjy7YcKzVTj6hitLMVKytrAmzzJUp0vRwPcbu15Pc1AtRsbEmE7ZSYHZd0Uh4P7xK9bSrXx9gJ7PXS8YkfSPrFjGnK9vK81MK+aHpecoeE977eufYLn/qdfR/rEFIfZEo/dxu2fOXYOVy+Wjk06asACcP1fvYqD04dlqJ+ORpVRJXdypgZGaUcB2R59VATOeTew2b69xlGMNvQuvHTNr6c1qDmoo1LM3ZGqno6sUPGuo3XXu60a6+VEl8iscFSgoxHiXL2nzVDX2gGi04otxo5s+0Q7orQyN256XCYLRazl1vDx2m08/1vfAgZTVok7JHYOV2EvzThUWcm98hHm7sxibGISRU6tiaasAxQiDXKh0soC6jWom3r68fprD6rPst6jANV1LSZ8BwXAxnYHh+0anrNePjzxYKf21inabddhqYgbd28iPDONwy8/Z0a3u2EXvnrvUUNIPX4K95fmzVRo9NxUucFiHF3fS8rXE2PxtI+ynh17dcVO+3dxyxdRe90cjdSGeeLB/i6rhLlNw9uED1bncVh8DL2l8wzpjoitw2XTU25+Hh3E8MwElhYWTI9HqVfJ96rJ0uvtdiGu3rpFf72Kq89afbvlwsqvk1GmV79N6RTuNTcSTzzQsqpnT90i7YJ5D4IAN2fvYfjkDDAzLiG8b7s7YuxwqW0RDmllmH/0mSfwcPaBBEmfOiqIKEZZPFtTqzNXPO2jWX2r09qJeDzbpZ4ddZu0Eq17QcH+pO3O0kMcf+Ep81Mg+0xLo5rZHDF2uJG6VeOPTmD4yDRui9MtjwRqxLM1tTpzxdM+vL49nu4hSIe4c/8Oxg9NI3V42nhKWy93VzNj63AV81QycyE9kKmvvYC51SUU17Pm91Mej8fj8WyFjlvtz42AlfUV3F2bw5mvPY9SBsiZNy3tviMce4db/ikOp5aPBHji+S/i1u0b5hcTVKIq0uPxeDye+vBnRvx+MN++lcC5m5/h0V98Djg0DL50YycvuahH7B1uFeJ0w6+exOSpQ7h+8yrSMsrlSNc7Xo/H4/E0gu40USoiSIW4eOUiJk4fxsAzJ4xP4UswW+Nue8DhlhXBR9boVZPA2MtfQWIkjdl7d8w3DM0XKaIRr8fj8Xg8pDIYk5FtKokrt68CUwM4+csv2jdK0QnLoG23926VnhnhckKgFBTsBPwYcPQ3fwUryRzu3r2JTCJAqjL5XFayKx6Px+Ppfdx2n16B72/gR/pv3L2NlT7gsd/4JWCEn1rNy0DNjm5b9ROk2L5pqiGqSd7jni3h2vd+iGA+hyNHjiEnWdUv9bgKbE3fxePxeDzdiuvo9Jlajl6TQYArt66iOJzC6V8XZzsemqFoSZwtjzIDtUTCHLPbl8/0nMPVN4GY1z5SN3PAw9fewv2rt3H8yAlkUhn7EgHZRQXaNS6bw1V3TyluF+zOBD0ej6e11Gub2U4ZkZ2c2k0EIdYKOZy/dgUjJ2dw6uWXZGQLrIu/TaBongFSdNU73EYwVxR+qD4L5N65hCvvfYzxgVFMTx5AoSi9F1Ge3tstK7Q3tdEmKspyzdCr0OPx7D12VKqwbecjtPzMH7+OtvDgAT5fmcXh55/C5FceBTJRRHOjtdKKMY1WvVa1Zx2uvsPV9GQYsCZyaxW33ngbC7ce4MDYFMZHJ6K9VpkV51utkp5U0K7ZaIDU08bQWtwYrdRsq9Otl5NWp+vz3zp8/ivs5/zbNLllHSwlmscUpxmECdyfu4/bd29j/PA0jvx9GdWe6OderEqsUCStlxW0/hGn3h3hKsyd5pDzzesi52/gs3c+RHZ2BVND45iYmJLeTZ/EK6JYLFRGuyLu4YT7uK0Fun9RU5aOCqfojc6k59iUTlSzrabV6ZpMRbQ6XZ//1uPzv3/zbx+JpWtlqxRQ5BTGZSbFjeayuL8wi3tLswhGMjjx7BPof+okYH2tGaHZgZYcr6PZwM1/a+h5h2tVaEe8VD4LwTjeVZFP7+DhuSuYu3kfibzovm8AIyOjSKfTSCZToh1RuIqrJpZDT2utGagX0Sj1wvdXc51hTemFkajEVtPqdN3MtDpdn//W4/O/r/NvvKukzYFTPofVtVXMiZNdXFlCqS+F8SMHcPCMONlTB4AhcQNsvsQzBMViNBrmNYlIe9/KaWSX3h/h1sLc0vsSrvMe74KsXL2N2c9vYf7OQxTWcihm8+bptVB6R0EgPSR1LNFh7nI7uEW4k+Mb0ep065ladbo2htpkgtMvOjWwKbYHar+70Uz8Zml1ujY9pdXpti49pdXp2vSUVqfbuvSUVqdr01NanW7r0lNana5NT2l1uq1Lz8LfyxY4O2mWeSTSSRTSAQanxzBx7BAGTh0DxvhmJInM00bjA4O07dVtm8U73F3CjNJlqr+lzql/s4OOl5IT4eh3cR1YF8lmJVyGxMbhSrjS+rKIGaoMjm5FGcaMqBSKrtfCcJUqi4+Wm1EvPaUd6fJYtRTCeCo7TVevkeLzX4mz0/SUJtM1szBC1HG2NEqXx/ryr8B4KjtNV6+R0uL8h5JeKHFCadEH+kT6gUFZT8luSvTheE5u6tkpxPxUVKg9g3e47YC5d4Xo0tW3hrUCptvK9JRWp9uO/LdTp0or0nXTU1qdrs9/6/D5r7Df869oGJfueg2uC2yHk3XZ1w6XGddpE/blirLOqQ67Xm0Pbpk1Q218puXKdtNT9DhKbZo7Sbc2fm16ysZ03Rjc48ZmiH2zl+6hZnm/xK7bPTY9N53NsFegx7Uq3Y3XSdF/hH+5306CNYvPv89/dbr85/Nv1+2e7VynzXHluHrpMpRL3W9HsZX4KpbKWifxI1ziaiAqB1uYFaoLq0k2SXdH6SmtTneT9JRWpdsS3HTJbtPuxHX6/LcOn/9qfP5jg05je+qg5di15dnmC+v6/JOuvrgO4PO/v/H5jxV+hEs62MNjEIWbOz6VJuCwq3R5oLLji/J4PB7PZvgRLlEn02pn0yDdXZ+mQQI7TrfBdXYv2r1oJe1Is134/Pv8+/zHET/C9ewe14KacdrbjV9FnYN3lZ7SrnTr4PNfwed/m/j8V2jzdbYc4P8P1W/wtehX4OsAAAAASUVORK5CYII="
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"cell_type": "markdown",
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"source": [
|
| 69 |
+
""
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "markdown",
|
| 74 |
+
"metadata": {},
|
| 75 |
+
"source": [
|
| 76 |
+
"# Decoder: \n",
|
| 77 |
+
"The decoder is similar to the encoder. It can also accept text inputs. \n",
|
| 78 |
+
"It uses a similar method as the encoder which is the masked self attention mechanism as well.\n",
|
| 79 |
+
"It differs from the encoder in the sense that it is uni-directional and it is traditionally used in an auto regressive manner. "
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"cell_type": "markdown",
|
| 84 |
+
"metadata": {},
|
| 85 |
+
"source": [
|
| 86 |
+
"# Encoder Decoder:\n",
|
| 87 |
+
"Combining the two parts results in an encoder-decoder or a sequence to sequence transformer. \n",
|
| 88 |
+
"The encoder accepts inputs and computes a high level representation of those inputs. These outputs are then passed to the decoder. \n",
|
| 89 |
+
"The decoder uses the encoders outputs as inputs along side other inputs to generate a prediction. \n",
|
| 90 |
+
"It then provides outputs which is reused in future iterations hence the term auto-regressive."
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "markdown",
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"source": []
|
| 97 |
+
}
|
| 98 |
+
],
|
| 99 |
+
"metadata": {
|
| 100 |
+
"language_info": {
|
| 101 |
+
"name": "python"
|
| 102 |
+
}
|
| 103 |
+
},
|
| 104 |
+
"nbformat": 4,
|
| 105 |
+
"nbformat_minor": 2
|
| 106 |
+
}
|
transformer_instantiation.ipynb
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
|
| 13 |
+
"<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n",
|
| 14 |
+
"<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
|
| 15 |
+
]
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"source": [
|
| 19 |
+
"from transformers import AutoModel\n",
|
| 20 |
+
"\n",
|
| 21 |
+
"bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
|
| 22 |
+
"print(type(bert_model))\n",
|
| 23 |
+
"\n",
|
| 24 |
+
"gpt_model = AutoModel.from_pretrained('gpt2')\n",
|
| 25 |
+
"print(type(gpt_model))\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
|
| 28 |
+
"print(type(bart_model))"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": 37,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [
|
| 36 |
+
{
|
| 37 |
+
"name": "stdout",
|
| 38 |
+
"output_type": "stream",
|
| 39 |
+
"text": [
|
| 40 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 41 |
+
"<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
|
| 42 |
+
"<class 'transformers.models.bart.configuration_bart.BartConfig'>\n",
|
| 43 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 44 |
+
"<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n"
|
| 45 |
+
]
|
| 46 |
+
}
|
| 47 |
+
],
|
| 48 |
+
"source": [
|
| 49 |
+
"from transformers import AutoConfig\n",
|
| 50 |
+
"\n",
|
| 51 |
+
"bert_config_by_autoconfig = AutoConfig.from_pretrained('bert-base-cased')\n",
|
| 52 |
+
"print(type(bert_config_by_autoconfig))\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"gpt_config = AutoConfig.from_pretrained('gpt2')\n",
|
| 55 |
+
"print(type(gpt_config))\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n",
|
| 58 |
+
"print(type(bart_config))\n",
|
| 59 |
+
"\n",
|
| 60 |
+
"'''You can also use a specific class corresponding to a checkpoint\n",
|
| 61 |
+
"to get the same output as above'''\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"from transformers import BertConfig\n",
|
| 64 |
+
"bert_config_by_selfconfig = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 65 |
+
"print(type(bert_config_by_selfconfig))\n",
|
| 66 |
+
"\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"from transformers import GPT2Config\n",
|
| 69 |
+
"gpt_config_byselfconfig = GPT2Config.from_pretrained('gpt2')\n",
|
| 70 |
+
"print(type(gpt_config_byselfconfig))\n",
|
| 71 |
+
"\n",
|
| 72 |
+
"from transformers import BartConfig\n",
|
| 73 |
+
"bart_config_byselfconfig = BartConfig.from_pretrained('facebook/bart-base')\n",
|
| 74 |
+
"print(type(bart_config_byselfconfig)) "
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "code",
|
| 79 |
+
"execution_count": 38,
|
| 80 |
+
"metadata": {},
|
| 81 |
+
"outputs": [
|
| 82 |
+
{
|
| 83 |
+
"name": "stdout",
|
| 84 |
+
"output_type": "stream",
|
| 85 |
+
"text": [
|
| 86 |
+
"BertConfig {\n",
|
| 87 |
+
" \"architectures\": [\n",
|
| 88 |
+
" \"BertForMaskedLM\"\n",
|
| 89 |
+
" ],\n",
|
| 90 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 91 |
+
" \"classifier_dropout\": null,\n",
|
| 92 |
+
" \"gradient_checkpointing\": false,\n",
|
| 93 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 94 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 95 |
+
" \"hidden_size\": 768,\n",
|
| 96 |
+
" \"initializer_range\": 0.02,\n",
|
| 97 |
+
" \"intermediate_size\": 3072,\n",
|
| 98 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 99 |
+
" \"max_position_embeddings\": 512,\n",
|
| 100 |
+
" \"model_type\": \"bert\",\n",
|
| 101 |
+
" \"num_attention_heads\": 12,\n",
|
| 102 |
+
" \"num_hidden_layers\": 12,\n",
|
| 103 |
+
" \"pad_token_id\": 0,\n",
|
| 104 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 105 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 106 |
+
" \"type_vocab_size\": 2,\n",
|
| 107 |
+
" \"use_cache\": true,\n",
|
| 108 |
+
" \"vocab_size\": 28996\n",
|
| 109 |
+
"}\n",
|
| 110 |
+
"\n"
|
| 111 |
+
]
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"data": {
|
| 115 |
+
"text/plain": [
|
| 116 |
+
"'\\nThe configuration of a model is a blueprint that has all the information\\nnecessarty to create the model architecture\\nfor instance, the bert model associated with the bert-base-cased checkpoint\\nhas 12 layers and 768 hidden units in each layer\\n'"
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
"execution_count": 38,
|
| 120 |
+
"metadata": {},
|
| 121 |
+
"output_type": "execute_result"
|
| 122 |
+
}
|
| 123 |
+
],
|
| 124 |
+
"source": [
|
| 125 |
+
"from transformers import BertConfig\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 128 |
+
"print(bert_config)\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"'''\n",
|
| 131 |
+
"The configuration of a model is a blueprint that has all the information\n",
|
| 132 |
+
"necessarty to create the model architecture\n",
|
| 133 |
+
"for instance, the bert model associated with the bert-base-cased checkpoint\n",
|
| 134 |
+
"has 12 layers and 768 hidden units in each layer\n",
|
| 135 |
+
"'''"
|
| 136 |
+
]
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"cell_type": "code",
|
| 140 |
+
"execution_count": 2,
|
| 141 |
+
"metadata": {},
|
| 142 |
+
"outputs": [],
|
| 143 |
+
"source": [
|
| 144 |
+
"'''\n",
|
| 145 |
+
"Once we have a configuration of a model we can create\n",
|
| 146 |
+
"a model that has the same architecture as our checkpoint\n",
|
| 147 |
+
"but is randomly initialized\n",
|
| 148 |
+
"We can then train it from scratch like any pytorch or \n",
|
| 149 |
+
"tensorflow model\n",
|
| 150 |
+
"We can also change any part of the configuration by using\n",
|
| 151 |
+
"key word arguments\n",
|
| 152 |
+
"'''\n",
|
| 153 |
+
"# Same architecture as bert-base-cased\n",
|
| 154 |
+
"from transformers import BertConfig, BertModel\n",
|
| 155 |
+
"\n",
|
| 156 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 157 |
+
"bert_model = BertModel(bert_config)\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"# Using only 10 layers instead of 12\n",
|
| 160 |
+
"from transformers import BertConfig, BertModel\n",
|
| 161 |
+
"\n",
|
| 162 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=10)\n",
|
| 163 |
+
"bert_model = BertModel(bert_config)"
|
| 164 |
+
]
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"cell_type": "code",
|
| 168 |
+
"execution_count": 3,
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"outputs": [],
|
| 171 |
+
"source": [
|
| 172 |
+
"'''Saving a model after it's been trained or fine-tuned\n",
|
| 173 |
+
"is very easy.\n",
|
| 174 |
+
"Just use the save_pretrained method\n",
|
| 175 |
+
"'''\n",
|
| 176 |
+
"# Saving a model:\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"from transformers import BertConfig, BertModel\n",
|
| 179 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 180 |
+
"bert_model = BertModel(bert_config)\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"# Training code goes here\n",
|
| 183 |
+
"\n",
|
| 184 |
+
"bert_model.save_pretrained('my-bert-model')"
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"cell_type": "code",
|
| 189 |
+
"execution_count": 4,
|
| 190 |
+
"metadata": {},
|
| 191 |
+
"outputs": [],
|
| 192 |
+
"source": [
|
| 193 |
+
"# Reloading a saved model\n",
|
| 194 |
+
"from transformers import BertModel\n",
|
| 195 |
+
"\n",
|
| 196 |
+
"bert_model = BertModel.from_pretrained('my-bert-model')"
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"cell_type": "code",
|
| 201 |
+
"execution_count": null,
|
| 202 |
+
"metadata": {},
|
| 203 |
+
"outputs": [],
|
| 204 |
+
"source": []
|
| 205 |
+
}
|
| 206 |
+
],
|
| 207 |
+
"metadata": {
|
| 208 |
+
"kernelspec": {
|
| 209 |
+
"display_name": "Python 3",
|
| 210 |
+
"language": "python",
|
| 211 |
+
"name": "python3"
|
| 212 |
+
},
|
| 213 |
+
"language_info": {
|
| 214 |
+
"codemirror_mode": {
|
| 215 |
+
"name": "ipython",
|
| 216 |
+
"version": 3
|
| 217 |
+
},
|
| 218 |
+
"file_extension": ".py",
|
| 219 |
+
"mimetype": "text/x-python",
|
| 220 |
+
"name": "python",
|
| 221 |
+
"nbconvert_exporter": "python",
|
| 222 |
+
"pygments_lexer": "ipython3",
|
| 223 |
+
"version": "3.11.6"
|
| 224 |
+
}
|
| 225 |
+
},
|
| 226 |
+
"nbformat": 4,
|
| 227 |
+
"nbformat_minor": 2
|
| 228 |
+
}
|
transformer_instantiation_2.ipynb
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"<class 'transformers.models.bert.modeling_bert.BertModel'>\n"
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
],
|
| 16 |
+
"source": [
|
| 17 |
+
"from transformers import AutoModel\n",
|
| 18 |
+
"\n",
|
| 19 |
+
"bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
|
| 20 |
+
"print(type(bert_model))"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"cell_type": "code",
|
| 25 |
+
"execution_count": 2,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"outputs": [
|
| 28 |
+
{
|
| 29 |
+
"name": "stdout",
|
| 30 |
+
"output_type": "stream",
|
| 31 |
+
"text": [
|
| 32 |
+
"<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n"
|
| 33 |
+
]
|
| 34 |
+
}
|
| 35 |
+
],
|
| 36 |
+
"source": [
|
| 37 |
+
"gpt_model = AutoModel.from_pretrained('gpt2')\n",
|
| 38 |
+
"print(type(gpt_model))"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": 3,
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [
|
| 46 |
+
{
|
| 47 |
+
"name": "stdout",
|
| 48 |
+
"output_type": "stream",
|
| 49 |
+
"text": [
|
| 50 |
+
"<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
|
| 51 |
+
]
|
| 52 |
+
}
|
| 53 |
+
],
|
| 54 |
+
"source": [
|
| 55 |
+
"bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
|
| 56 |
+
"print(type(bart_model))"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "code",
|
| 61 |
+
"execution_count": 4,
|
| 62 |
+
"metadata": {},
|
| 63 |
+
"outputs": [
|
| 64 |
+
{
|
| 65 |
+
"name": "stdout",
|
| 66 |
+
"output_type": "stream",
|
| 67 |
+
"text": [
|
| 68 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n"
|
| 69 |
+
]
|
| 70 |
+
}
|
| 71 |
+
],
|
| 72 |
+
"source": [
|
| 73 |
+
"from transformers import AutoConfig\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"bert_config = AutoConfig.from_pretrained('bert-base-cased')\n",
|
| 76 |
+
"print(type(bert_config))"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 5,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [
|
| 84 |
+
{
|
| 85 |
+
"name": "stdout",
|
| 86 |
+
"output_type": "stream",
|
| 87 |
+
"text": [
|
| 88 |
+
"BertConfig {\n",
|
| 89 |
+
" \"_name_or_path\": \"bert-base-cased\",\n",
|
| 90 |
+
" \"architectures\": [\n",
|
| 91 |
+
" \"BertForMaskedLM\"\n",
|
| 92 |
+
" ],\n",
|
| 93 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 94 |
+
" \"classifier_dropout\": null,\n",
|
| 95 |
+
" \"gradient_checkpointing\": false,\n",
|
| 96 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 97 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 98 |
+
" \"hidden_size\": 768,\n",
|
| 99 |
+
" \"initializer_range\": 0.02,\n",
|
| 100 |
+
" \"intermediate_size\": 3072,\n",
|
| 101 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 102 |
+
" \"max_position_embeddings\": 512,\n",
|
| 103 |
+
" \"model_type\": \"bert\",\n",
|
| 104 |
+
" \"num_attention_heads\": 12,\n",
|
| 105 |
+
" \"num_hidden_layers\": 12,\n",
|
| 106 |
+
" \"pad_token_id\": 0,\n",
|
| 107 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 108 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 109 |
+
" \"type_vocab_size\": 2,\n",
|
| 110 |
+
" \"use_cache\": true,\n",
|
| 111 |
+
" \"vocab_size\": 28996\n",
|
| 112 |
+
"}\n",
|
| 113 |
+
"\n"
|
| 114 |
+
]
|
| 115 |
+
}
|
| 116 |
+
],
|
| 117 |
+
"source": [
|
| 118 |
+
"print(bert_config)"
|
| 119 |
+
]
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"cell_type": "code",
|
| 123 |
+
"execution_count": 6,
|
| 124 |
+
"metadata": {},
|
| 125 |
+
"outputs": [
|
| 126 |
+
{
|
| 127 |
+
"name": "stdout",
|
| 128 |
+
"output_type": "stream",
|
| 129 |
+
"text": [
|
| 130 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
|
| 131 |
+
"<class 'transformers.models.bert.configuration_bert.BertConfig'>\n"
|
| 132 |
+
]
|
| 133 |
+
}
|
| 134 |
+
],
|
| 135 |
+
"source": [
|
| 136 |
+
"from transformers import BertConfig\n",
|
| 137 |
+
"\n",
|
| 138 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 139 |
+
"print(type(bert_config))\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"from transformers import AutoConfig\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"bert_config = AutoConfig.from_pretrained('bert-base-cased')\n",
|
| 144 |
+
"print(type(bert_config))"
|
| 145 |
+
]
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"cell_type": "code",
|
| 149 |
+
"execution_count": 7,
|
| 150 |
+
"metadata": {},
|
| 151 |
+
"outputs": [
|
| 152 |
+
{
|
| 153 |
+
"name": "stdout",
|
| 154 |
+
"output_type": "stream",
|
| 155 |
+
"text": [
|
| 156 |
+
"BertConfig {\n",
|
| 157 |
+
" \"_name_or_path\": \"bert-base-cased\",\n",
|
| 158 |
+
" \"architectures\": [\n",
|
| 159 |
+
" \"BertForMaskedLM\"\n",
|
| 160 |
+
" ],\n",
|
| 161 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 162 |
+
" \"classifier_dropout\": null,\n",
|
| 163 |
+
" \"gradient_checkpointing\": false,\n",
|
| 164 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 165 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 166 |
+
" \"hidden_size\": 768,\n",
|
| 167 |
+
" \"initializer_range\": 0.02,\n",
|
| 168 |
+
" \"intermediate_size\": 3072,\n",
|
| 169 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 170 |
+
" \"max_position_embeddings\": 512,\n",
|
| 171 |
+
" \"model_type\": \"bert\",\n",
|
| 172 |
+
" \"num_attention_heads\": 12,\n",
|
| 173 |
+
" \"num_hidden_layers\": 12,\n",
|
| 174 |
+
" \"pad_token_id\": 0,\n",
|
| 175 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 176 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 177 |
+
" \"type_vocab_size\": 2,\n",
|
| 178 |
+
" \"use_cache\": true,\n",
|
| 179 |
+
" \"vocab_size\": 28996\n",
|
| 180 |
+
"}\n",
|
| 181 |
+
"\n"
|
| 182 |
+
]
|
| 183 |
+
}
|
| 184 |
+
],
|
| 185 |
+
"source": [
|
| 186 |
+
"print(bert_config)"
|
| 187 |
+
]
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"cell_type": "code",
|
| 191 |
+
"execution_count": 9,
|
| 192 |
+
"metadata": {},
|
| 193 |
+
"outputs": [
|
| 194 |
+
{
|
| 195 |
+
"name": "stdout",
|
| 196 |
+
"output_type": "stream",
|
| 197 |
+
"text": [
|
| 198 |
+
"BertModel(\n",
|
| 199 |
+
" (embeddings): BertEmbeddings(\n",
|
| 200 |
+
" (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
|
| 201 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 202 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 203 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 204 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 205 |
+
" )\n",
|
| 206 |
+
" (encoder): BertEncoder(\n",
|
| 207 |
+
" (layer): ModuleList(\n",
|
| 208 |
+
" (0-11): 12 x BertLayer(\n",
|
| 209 |
+
" (attention): BertAttention(\n",
|
| 210 |
+
" (self): BertSelfAttention(\n",
|
| 211 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 212 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 213 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 214 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 215 |
+
" )\n",
|
| 216 |
+
" (output): BertSelfOutput(\n",
|
| 217 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 218 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 219 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 220 |
+
" )\n",
|
| 221 |
+
" )\n",
|
| 222 |
+
" (intermediate): BertIntermediate(\n",
|
| 223 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 224 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 225 |
+
" )\n",
|
| 226 |
+
" (output): BertOutput(\n",
|
| 227 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 228 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 229 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 230 |
+
" )\n",
|
| 231 |
+
" )\n",
|
| 232 |
+
" )\n",
|
| 233 |
+
" )\n",
|
| 234 |
+
" (pooler): BertPooler(\n",
|
| 235 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 236 |
+
" (activation): Tanh()\n",
|
| 237 |
+
" )\n",
|
| 238 |
+
")\n"
|
| 239 |
+
]
|
| 240 |
+
}
|
| 241 |
+
],
|
| 242 |
+
"source": [
|
| 243 |
+
"from transformers import AutoModel\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"bert_model = AutoModel.from_pretrained('bert-base-cased')\n",
|
| 246 |
+
"print(bert_model)"
|
| 247 |
+
]
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"cell_type": "code",
|
| 251 |
+
"execution_count": 11,
|
| 252 |
+
"metadata": {},
|
| 253 |
+
"outputs": [
|
| 254 |
+
{
|
| 255 |
+
"name": "stdout",
|
| 256 |
+
"output_type": "stream",
|
| 257 |
+
"text": [
|
| 258 |
+
"BertModel(\n",
|
| 259 |
+
" (embeddings): BertEmbeddings(\n",
|
| 260 |
+
" (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
|
| 261 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 262 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 263 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 264 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 265 |
+
" )\n",
|
| 266 |
+
" (encoder): BertEncoder(\n",
|
| 267 |
+
" (layer): ModuleList(\n",
|
| 268 |
+
" (0-11): 12 x BertLayer(\n",
|
| 269 |
+
" (attention): BertAttention(\n",
|
| 270 |
+
" (self): BertSelfAttention(\n",
|
| 271 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 272 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 273 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 274 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 275 |
+
" )\n",
|
| 276 |
+
" (output): BertSelfOutput(\n",
|
| 277 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 278 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 279 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 280 |
+
" )\n",
|
| 281 |
+
" )\n",
|
| 282 |
+
" (intermediate): BertIntermediate(\n",
|
| 283 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 284 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 285 |
+
" )\n",
|
| 286 |
+
" (output): BertOutput(\n",
|
| 287 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 288 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 289 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 290 |
+
" )\n",
|
| 291 |
+
" )\n",
|
| 292 |
+
" )\n",
|
| 293 |
+
" )\n",
|
| 294 |
+
" (pooler): BertPooler(\n",
|
| 295 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 296 |
+
" (activation): Tanh()\n",
|
| 297 |
+
" )\n",
|
| 298 |
+
")\n",
|
| 299 |
+
"BertModel(\n",
|
| 300 |
+
" (embeddings): BertEmbeddings(\n",
|
| 301 |
+
" (word_embeddings): Embedding(28996, 768, padding_idx=0)\n",
|
| 302 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 303 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 304 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 305 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 306 |
+
" )\n",
|
| 307 |
+
" (encoder): BertEncoder(\n",
|
| 308 |
+
" (layer): ModuleList(\n",
|
| 309 |
+
" (0-7): 8 x BertLayer(\n",
|
| 310 |
+
" (attention): BertAttention(\n",
|
| 311 |
+
" (self): BertSelfAttention(\n",
|
| 312 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 313 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 314 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 315 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 316 |
+
" )\n",
|
| 317 |
+
" (output): BertSelfOutput(\n",
|
| 318 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 319 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 320 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 321 |
+
" )\n",
|
| 322 |
+
" )\n",
|
| 323 |
+
" (intermediate): BertIntermediate(\n",
|
| 324 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 325 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 326 |
+
" )\n",
|
| 327 |
+
" (output): BertOutput(\n",
|
| 328 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 329 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 330 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 331 |
+
" )\n",
|
| 332 |
+
" )\n",
|
| 333 |
+
" )\n",
|
| 334 |
+
" )\n",
|
| 335 |
+
" (pooler): BertPooler(\n",
|
| 336 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 337 |
+
" (activation): Tanh()\n",
|
| 338 |
+
" )\n",
|
| 339 |
+
")\n"
|
| 340 |
+
]
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"source": [
|
| 344 |
+
"from transformers import BertConfig, BertModel\n",
|
| 345 |
+
"\n",
|
| 346 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 347 |
+
"bert_model = BertModel(bert_config)\n",
|
| 348 |
+
"print(bert_model)\n",
|
| 349 |
+
"\n",
|
| 350 |
+
"bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=8)\n",
|
| 351 |
+
"bert_model = BertModel(bert_config)\n",
|
| 352 |
+
"print(bert_model)"
|
| 353 |
+
]
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"cell_type": "code",
|
| 357 |
+
"execution_count": 12,
|
| 358 |
+
"metadata": {},
|
| 359 |
+
"outputs": [
|
| 360 |
+
{
|
| 361 |
+
"name": "stdout",
|
| 362 |
+
"output_type": "stream",
|
| 363 |
+
"text": [
|
| 364 |
+
"BertConfig {\n",
|
| 365 |
+
" \"architectures\": [\n",
|
| 366 |
+
" \"BertForMaskedLM\"\n",
|
| 367 |
+
" ],\n",
|
| 368 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 369 |
+
" \"classifier_dropout\": null,\n",
|
| 370 |
+
" \"gradient_checkpointing\": false,\n",
|
| 371 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 372 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 373 |
+
" \"hidden_size\": 768,\n",
|
| 374 |
+
" \"initializer_range\": 0.02,\n",
|
| 375 |
+
" \"intermediate_size\": 3072,\n",
|
| 376 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 377 |
+
" \"max_position_embeddings\": 512,\n",
|
| 378 |
+
" \"model_type\": \"bert\",\n",
|
| 379 |
+
" \"num_attention_heads\": 12,\n",
|
| 380 |
+
" \"num_hidden_layers\": 12,\n",
|
| 381 |
+
" \"pad_token_id\": 0,\n",
|
| 382 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 383 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 384 |
+
" \"type_vocab_size\": 2,\n",
|
| 385 |
+
" \"use_cache\": true,\n",
|
| 386 |
+
" \"vocab_size\": 28996\n",
|
| 387 |
+
"}\n",
|
| 388 |
+
"\n"
|
| 389 |
+
]
|
| 390 |
+
}
|
| 391 |
+
],
|
| 392 |
+
"source": [
|
| 393 |
+
"from transformers import BertConfig, BertModel\n",
|
| 394 |
+
"bert_config_original = BertConfig.from_pretrained('bert-base-cased')\n",
|
| 395 |
+
"print(bert_config_original)\n",
|
| 396 |
+
"# bert_config = BertConfig.from_pretrained('bert-base-cased', num_hidden_layers=8)\n",
|
| 397 |
+
"# bert_model = BertModel(bert_config)"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": 13,
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [
|
| 405 |
+
{
|
| 406 |
+
"name": "stdout",
|
| 407 |
+
"output_type": "stream",
|
| 408 |
+
"text": [
|
| 409 |
+
"BertConfig {\n",
|
| 410 |
+
" \"_name_or_path\": \"bert-base-cased\",\n",
|
| 411 |
+
" \"architectures\": [\n",
|
| 412 |
+
" \"BertForMaskedLM\"\n",
|
| 413 |
+
" ],\n",
|
| 414 |
+
" \"attention_probs_dropout_prob\": 0.1,\n",
|
| 415 |
+
" \"classifier_dropout\": null,\n",
|
| 416 |
+
" \"gradient_checkpointing\": false,\n",
|
| 417 |
+
" \"hidden_act\": \"gelu\",\n",
|
| 418 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
| 419 |
+
" \"hidden_size\": 768,\n",
|
| 420 |
+
" \"initializer_range\": 0.02,\n",
|
| 421 |
+
" \"intermediate_size\": 3072,\n",
|
| 422 |
+
" \"layer_norm_eps\": 1e-12,\n",
|
| 423 |
+
" \"max_position_embeddings\": 512,\n",
|
| 424 |
+
" \"model_type\": \"bert\",\n",
|
| 425 |
+
" \"num_attention_heads\": 12,\n",
|
| 426 |
+
" \"num_hidden_layers\": 2,\n",
|
| 427 |
+
" \"pad_token_id\": 0,\n",
|
| 428 |
+
" \"position_embedding_type\": \"absolute\",\n",
|
| 429 |
+
" \"transformers_version\": \"4.34.1\",\n",
|
| 430 |
+
" \"type_vocab_size\": 2,\n",
|
| 431 |
+
" \"use_cache\": true,\n",
|
| 432 |
+
" \"vocab_size\": 100\n",
|
| 433 |
+
"}\n",
|
| 434 |
+
"\n"
|
| 435 |
+
]
|
| 436 |
+
}
|
| 437 |
+
],
|
| 438 |
+
"source": [
|
| 439 |
+
"bert_config_new = AutoConfig.from_pretrained('bert-base-cased', \n",
|
| 440 |
+
" num_hidden_layers=2,\n",
|
| 441 |
+
" vocab_size=100,\n",
|
| 442 |
+
" )\n",
|
| 443 |
+
"print(bert_config_new)"
|
| 444 |
+
]
|
| 445 |
+
},
|
| 446 |
+
{
|
| 447 |
+
"cell_type": "code",
|
| 448 |
+
"execution_count": 14,
|
| 449 |
+
"metadata": {},
|
| 450 |
+
"outputs": [
|
| 451 |
+
{
|
| 452 |
+
"name": "stdout",
|
| 453 |
+
"output_type": "stream",
|
| 454 |
+
"text": [
|
| 455 |
+
"BertModel(\n",
|
| 456 |
+
" (embeddings): BertEmbeddings(\n",
|
| 457 |
+
" (word_embeddings): Embedding(100, 768, padding_idx=0)\n",
|
| 458 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 459 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 460 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 461 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 462 |
+
" )\n",
|
| 463 |
+
" (encoder): BertEncoder(\n",
|
| 464 |
+
" (layer): ModuleList(\n",
|
| 465 |
+
" (0-1): 2 x BertLayer(\n",
|
| 466 |
+
" (attention): BertAttention(\n",
|
| 467 |
+
" (self): BertSelfAttention(\n",
|
| 468 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 469 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 470 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 471 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 472 |
+
" )\n",
|
| 473 |
+
" (output): BertSelfOutput(\n",
|
| 474 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 475 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 476 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 477 |
+
" )\n",
|
| 478 |
+
" )\n",
|
| 479 |
+
" (intermediate): BertIntermediate(\n",
|
| 480 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 481 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 482 |
+
" )\n",
|
| 483 |
+
" (output): BertOutput(\n",
|
| 484 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 485 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 486 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 487 |
+
" )\n",
|
| 488 |
+
" )\n",
|
| 489 |
+
" )\n",
|
| 490 |
+
" )\n",
|
| 491 |
+
" (pooler): BertPooler(\n",
|
| 492 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 493 |
+
" (activation): Tanh()\n",
|
| 494 |
+
" )\n",
|
| 495 |
+
")\n"
|
| 496 |
+
]
|
| 497 |
+
}
|
| 498 |
+
],
|
| 499 |
+
"source": [
|
| 500 |
+
"bert_model_new = BertModel(bert_config_new)\n",
|
| 501 |
+
"print(bert_model_new)"
|
| 502 |
+
]
|
| 503 |
+
},
|
| 504 |
+
{
|
| 505 |
+
"cell_type": "code",
|
| 506 |
+
"execution_count": 15,
|
| 507 |
+
"metadata": {},
|
| 508 |
+
"outputs": [],
|
| 509 |
+
"source": [
|
| 510 |
+
"bert_model_new.save_pretrained('pew_bert_ye')"
|
| 511 |
+
]
|
| 512 |
+
},
|
| 513 |
+
{
|
| 514 |
+
"cell_type": "code",
|
| 515 |
+
"execution_count": 16,
|
| 516 |
+
"metadata": {},
|
| 517 |
+
"outputs": [
|
| 518 |
+
{
|
| 519 |
+
"name": "stdout",
|
| 520 |
+
"output_type": "stream",
|
| 521 |
+
"text": [
|
| 522 |
+
"BertModel(\n",
|
| 523 |
+
" (embeddings): BertEmbeddings(\n",
|
| 524 |
+
" (word_embeddings): Embedding(100, 768, padding_idx=0)\n",
|
| 525 |
+
" (position_embeddings): Embedding(512, 768)\n",
|
| 526 |
+
" (token_type_embeddings): Embedding(2, 768)\n",
|
| 527 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 528 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 529 |
+
" )\n",
|
| 530 |
+
" (encoder): BertEncoder(\n",
|
| 531 |
+
" (layer): ModuleList(\n",
|
| 532 |
+
" (0-1): 2 x BertLayer(\n",
|
| 533 |
+
" (attention): BertAttention(\n",
|
| 534 |
+
" (self): BertSelfAttention(\n",
|
| 535 |
+
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 536 |
+
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 537 |
+
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 538 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 539 |
+
" )\n",
|
| 540 |
+
" (output): BertSelfOutput(\n",
|
| 541 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 542 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 543 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 544 |
+
" )\n",
|
| 545 |
+
" )\n",
|
| 546 |
+
" (intermediate): BertIntermediate(\n",
|
| 547 |
+
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
| 548 |
+
" (intermediate_act_fn): GELUActivation()\n",
|
| 549 |
+
" )\n",
|
| 550 |
+
" (output): BertOutput(\n",
|
| 551 |
+
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
| 552 |
+
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
| 553 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
| 554 |
+
" )\n",
|
| 555 |
+
" )\n",
|
| 556 |
+
" )\n",
|
| 557 |
+
" )\n",
|
| 558 |
+
" (pooler): BertPooler(\n",
|
| 559 |
+
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
| 560 |
+
" (activation): Tanh()\n",
|
| 561 |
+
" )\n",
|
| 562 |
+
")\n"
|
| 563 |
+
]
|
| 564 |
+
}
|
| 565 |
+
],
|
| 566 |
+
"source": [
|
| 567 |
+
"bert_model_new = BertModel.from_pretrained('pew_bert_ye')\n",
|
| 568 |
+
"print(bert_model_new)"
|
| 569 |
+
]
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"cell_type": "code",
|
| 573 |
+
"execution_count": null,
|
| 574 |
+
"metadata": {},
|
| 575 |
+
"outputs": [],
|
| 576 |
+
"source": []
|
| 577 |
+
}
|
| 578 |
+
],
|
| 579 |
+
"metadata": {
|
| 580 |
+
"kernelspec": {
|
| 581 |
+
"display_name": "Python 3",
|
| 582 |
+
"language": "python",
|
| 583 |
+
"name": "python3"
|
| 584 |
+
},
|
| 585 |
+
"language_info": {
|
| 586 |
+
"codemirror_mode": {
|
| 587 |
+
"name": "ipython",
|
| 588 |
+
"version": 3
|
| 589 |
+
},
|
| 590 |
+
"file_extension": ".py",
|
| 591 |
+
"mimetype": "text/x-python",
|
| 592 |
+
"name": "python",
|
| 593 |
+
"nbconvert_exporter": "python",
|
| 594 |
+
"pygments_lexer": "ipython3",
|
| 595 |
+
"version": "3.11.6"
|
| 596 |
+
}
|
| 597 |
+
},
|
| 598 |
+
"nbformat": 4,
|
| 599 |
+
"nbformat_minor": 2
|
| 600 |
+
}
|