wesfggfd commited on
Commit
9ede3a2
·
verified ·
1 Parent(s): cf407b3

Upload 99 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. Transformer Mechanism/Named Entity Recognition/tf/.Trash-0/files/W4A2-UGL-NER.tar.gz +3 -0
  3. Transformer Mechanism/Named Entity Recognition/tf/.Trash-0/info/W4A2-UGL-NER.tar.gz.trashinfo +3 -0
  4. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.DS_Store +0 -0
  5. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._.DS_Store +0 -0
  6. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._Transformer_application_Named_Entity_Recognition.ipynb +0 -0
  7. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._model +0 -0
  8. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._ner.json +0 -0
  9. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._tokenizer +0 -0
  10. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._utils.py +0 -0
  11. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.ipynb_checkpoints/._Transformer_application_Named_Entity_Recognition-checkpoint.ipynb +0 -0
  12. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.ipynb_checkpoints/Transformer_application_Named_Entity_Recognition-checkpoint.ipynb +715 -0
  13. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/Transformer_application_Named_Entity_Recognition.ipynb +0 -0
  14. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/._config.json +0 -0
  15. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/._tf_model.h5 +3 -0
  16. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/config.json +51 -0
  17. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/tf_model.h5 +3 -0
  18. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/ner.json +0 -0
  19. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._special_tokens_map.json +0 -0
  20. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._tokenizer_config.json +0 -0
  21. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._vocab.txt +0 -0
  22. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/special_tokens_map.json +1 -0
  23. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/tokenizer_config.json +1 -0
  24. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/vocab.txt +0 -0
  25. Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/utils.py +152 -0
  26. Transformer Mechanism/QA/tf/.Trash-0/files/QA_dataset.ipynb +2510 -0
  27. Transformer Mechanism/QA/tf/.Trash-0/files/W4A2.tar.gz +3 -0
  28. Transformer Mechanism/QA/tf/.Trash-0/files/W4A3UGLQA.tar.gz +3 -0
  29. Transformer Mechanism/QA/tf/.Trash-0/info/QA_dataset.ipynb.trashinfo +3 -0
  30. Transformer Mechanism/QA/tf/.Trash-0/info/W4A2.tar.gz.trashinfo +3 -0
  31. Transformer Mechanism/QA/tf/.Trash-0/info/W4A3UGLQA.tar.gz.trashinfo +3 -0
  32. Transformer Mechanism/QA/tf/W4A3_UGL/.DS_Store +0 -0
  33. Transformer Mechanism/QA/tf/W4A3_UGL/._.DS_Store +0 -0
  34. Transformer Mechanism/QA/tf/W4A3_UGL/._QA_dataset.ipynb +0 -0
  35. Transformer Mechanism/QA/tf/W4A3_UGL/._data +0 -0
  36. Transformer Mechanism/QA/tf/W4A3_UGL/._model +0 -0
  37. Transformer Mechanism/QA/tf/W4A3_UGL/._tokenizer +0 -0
  38. Transformer Mechanism/QA/tf/W4A3_UGL/.ipynb_checkpoints/._QA_dataset-checkpoint.ipynb +0 -0
  39. Transformer Mechanism/QA/tf/W4A3_UGL/.ipynb_checkpoints/QA_dataset-checkpoint.ipynb +2483 -0
  40. Transformer Mechanism/QA/tf/W4A3_UGL/QA_dataset.ipynb +0 -0
  41. Transformer Mechanism/QA/tf/W4A3_UGL/data/._dataset_dict.json +0 -0
  42. Transformer Mechanism/QA/tf/W4A3_UGL/data/._test +0 -0
  43. Transformer Mechanism/QA/tf/W4A3_UGL/data/._train +0 -0
  44. Transformer Mechanism/QA/tf/W4A3_UGL/data/dataset_dict.json +1 -0
  45. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._dataset.arrow +3 -0
  46. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._dataset_info.json +0 -0
  47. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._state.json +0 -0
  48. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-26c237c56fc0b951.arrow +3 -0
  49. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-6b23a7f03ef9fdb4.arrow +3 -0
  50. Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-c9959a793a67abd8.arrow +3 -0
.gitattributes CHANGED
@@ -109,3 +109,5 @@ Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1
109
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/encoder.png filter=lfs diff=lfs merge=lfs -text
110
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/self-attention.png filter=lfs diff=lfs merge=lfs -text
111
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/transformer.png filter=lfs diff=lfs merge=lfs -text
 
 
 
109
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/encoder.png filter=lfs diff=lfs merge=lfs -text
110
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/self-attention.png filter=lfs diff=lfs merge=lfs -text
111
  Transformer[[:space:]]Mechanism/Transformer_Implementation/home/jovyan/work/W4A1/transformer.png filter=lfs diff=lfs merge=lfs -text
112
+ Transformer[[:space:]]Mechanism/Transformer[[:space:]]Pre-Processing/home/jovyan/work/W4A4_UGL_POS/glove/glove.6B.100d.txt filter=lfs diff=lfs merge=lfs -text
113
+ Transformer[[:space:]]Mechanism/Transformer[[:space:]]Pre-Processing/home/jovyan/work/W4A4_UGL_POS/preprocessing.png filter=lfs diff=lfs merge=lfs -text
Transformer Mechanism/Named Entity Recognition/tf/.Trash-0/files/W4A2-UGL-NER.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c76298397280a20118c061ffdb3d9abfa2e52e6b833fcdfbc0dab89837635fd
3
+ size 245286524
Transformer Mechanism/Named Entity Recognition/tf/.Trash-0/info/W4A2-UGL-NER.tar.gz.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779e266c8830e8c475b5b4d1eddf4b3cd85148af4686f9eef77319d458a6c9a8
3
+ size 71
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._.DS_Store ADDED
Binary file (120 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._Transformer_application_Named_Entity_Recognition.ipynb ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._model ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._ner.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._tokenizer ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/._utils.py ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.ipynb_checkpoints/._Transformer_application_Named_Entity_Recognition-checkpoint.ipynb ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/.ipynb_checkpoints/Transformer_application_Named_Entity_Recognition-checkpoint.ipynb ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Transformer Network Application: Named-Entity Recognition\n",
8
+ "\n",
9
+ "Welcome to Week 4's second ungraded lab. In this notebook you'll explore one application of the transformer architecture that you built in the previous assignment.\n",
10
+ "\n",
11
+ "**After this assignment you'll be able to**:\n",
12
+ "\n",
13
+ "* Use tokenizers and pre-trained models from the HuggingFace Library.\n",
14
+ "* Fine-tune a pre-trained transformer model for Named-Entity Recognition"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "markdown",
19
+ "metadata": {},
20
+ "source": [
21
+ "## Table of Contents\n",
22
+ "\n",
23
+ "- [Packages](#0)\n",
24
+ "- [1 - Named-Entity Recogniton to Process Resumes](#1)\n",
25
+ " - [1.1 - Data Cleaning](#1-1)\n",
26
+ " - [1.2 - Padding and Generating Tags](#1-2)\n",
27
+ " - [1.3 - Tokenize and Align Labels with 🤗 Library](#1-3)\n",
28
+ " - [Exercise 1 - tokenize_and_align_labels](#ex-1)\n",
29
+ " - [1.4 - Optimization](#1-4)"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "markdown",
34
+ "metadata": {},
35
+ "source": [
36
+ "<a name='0'></a>\n",
37
+ "## Packages\n",
38
+ "\n",
39
+ "Run the following cell to load the packages you'll need."
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": null,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "import pandas as pd\n",
49
+ "import numpy as np\n",
50
+ "import tensorflow as tf\n",
51
+ "import json\n",
52
+ "import random\n",
53
+ "import logging\n",
54
+ "import re\n",
55
+ "\n",
56
+ "tf.get_logger().setLevel('ERROR')"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "markdown",
61
+ "metadata": {},
62
+ "source": [
63
+ "<a name='1'></a>\n",
64
+ "## 1 - Named-Entity Recogniton to Process Resumes\n",
65
+ "\n",
66
+ "When faced with a large amount of unstructured text data, named-entity recognition (NER) can help you detect and classify important information in your dataset. For instance, in the running example \"Jane vists Africa in September\", NER would help you detect \"Jane\", \"Africa\", and \"September\" as named-entities and classify them as person, location, and time. \n",
67
+ "\n",
68
+ "* You will use a variation of the Transformer model you built in the last assignment to process a large dataset of resumes.\n",
69
+ "* You will find and classify relavent information such as the companies the applicant worked at, skills, type of degree, etc. "
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "markdown",
74
+ "metadata": {},
75
+ "source": [
76
+ "<a name='1-1'></a>\n",
77
+ "### 1.1 - Dataset Cleaning\n",
78
+ "\n",
79
+ "In this assignment you will optimize a Transformer model on a dataset of resumes. Take a look at how the data you will be working with are structured."
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": null,
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "df_data = pd.read_json(\"ner.json\", lines=True)\n",
89
+ "df_data = df_data.drop(['extras'], axis=1)\n",
90
+ "df_data['content'] = df_data['content'].str.replace(\"\\n\", \" \")"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "df_data.head()"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": null,
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "df_data.iloc[0]['annotation']"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "def mergeIntervals(intervals):\n",
118
+ " sorted_by_lower_bound = sorted(intervals, key=lambda tup: tup[0])\n",
119
+ " merged = []\n",
120
+ "\n",
121
+ " for higher in sorted_by_lower_bound:\n",
122
+ " if not merged:\n",
123
+ " merged.append(higher)\n",
124
+ " else:\n",
125
+ " lower = merged[-1]\n",
126
+ " if higher[0] <= lower[1]:\n",
127
+ " if lower[2] is higher[2]:\n",
128
+ " upper_bound = max(lower[1], higher[1])\n",
129
+ " merged[-1] = (lower[0], upper_bound, lower[2])\n",
130
+ " else:\n",
131
+ " if lower[1] > higher[1]:\n",
132
+ " merged[-1] = lower\n",
133
+ " else:\n",
134
+ " merged[-1] = (lower[0], higher[1], higher[2])\n",
135
+ " else:\n",
136
+ " merged.append(higher)\n",
137
+ " return merged"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "metadata": {},
144
+ "outputs": [],
145
+ "source": [
146
+ "def get_entities(df):\n",
147
+ " \n",
148
+ " entities = []\n",
149
+ " \n",
150
+ " for i in range(len(df)):\n",
151
+ " entity = []\n",
152
+ " \n",
153
+ " for annot in df['annotation'][i]:\n",
154
+ " try:\n",
155
+ " ent = annot['label'][0]\n",
156
+ " start = annot['points'][0]['start']\n",
157
+ " end = annot['points'][0]['end'] + 1\n",
158
+ " entity.append((start, end, ent))\n",
159
+ " except:\n",
160
+ " pass\n",
161
+ " \n",
162
+ " entity = mergeIntervals(entity)\n",
163
+ " entities.append(entity)\n",
164
+ " \n",
165
+ " return entities"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": null,
171
+ "metadata": {},
172
+ "outputs": [],
173
+ "source": [
174
+ "df_data['entities'] = get_entities(df_data)\n",
175
+ "df_data.head()"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "metadata": {},
182
+ "outputs": [],
183
+ "source": [
184
+ "def convert_dataturks_to_spacy(dataturks_JSON_FilePath):\n",
185
+ " try:\n",
186
+ " training_data = []\n",
187
+ " lines=[]\n",
188
+ " with open(dataturks_JSON_FilePath, 'r') as f:\n",
189
+ " lines = f.readlines()\n",
190
+ "\n",
191
+ " for line in lines:\n",
192
+ " data = json.loads(line)\n",
193
+ " text = data['content'].replace(\"\\n\", \" \")\n",
194
+ " entities = []\n",
195
+ " data_annotations = data['annotation']\n",
196
+ " if data_annotations is not None:\n",
197
+ " for annotation in data_annotations:\n",
198
+ " #only a single point in text annotation.\n",
199
+ " point = annotation['points'][0]\n",
200
+ " labels = annotation['label']\n",
201
+ " # handle both list of labels or a single label.\n",
202
+ " if not isinstance(labels, list):\n",
203
+ " labels = [labels]\n",
204
+ "\n",
205
+ " for label in labels:\n",
206
+ " point_start = point['start']\n",
207
+ " point_end = point['end']\n",
208
+ " point_text = point['text']\n",
209
+ " \n",
210
+ " lstrip_diff = len(point_text) - len(point_text.lstrip())\n",
211
+ " rstrip_diff = len(point_text) - len(point_text.rstrip())\n",
212
+ " if lstrip_diff != 0:\n",
213
+ " point_start = point_start + lstrip_diff\n",
214
+ " if rstrip_diff != 0:\n",
215
+ " point_end = point_end - rstrip_diff\n",
216
+ " entities.append((point_start, point_end + 1 , label))\n",
217
+ " training_data.append((text, {\"entities\" : entities}))\n",
218
+ " return training_data\n",
219
+ " except Exception as e:\n",
220
+ " logging.exception(\"Unable to process \" + dataturks_JSON_FilePath + \"\\n\" + \"error = \" + str(e))\n",
221
+ " return None\n",
222
+ "\n",
223
+ "def trim_entity_spans(data: list) -> list:\n",
224
+ " \"\"\"Removes leading and trailing white spaces from entity spans.\n",
225
+ "\n",
226
+ " Args:\n",
227
+ " data (list): The data to be cleaned in spaCy JSON format.\n",
228
+ "\n",
229
+ " Returns:\n",
230
+ " list: The cleaned data.\n",
231
+ " \"\"\"\n",
232
+ " invalid_span_tokens = re.compile(r'\\s')\n",
233
+ "\n",
234
+ " cleaned_data = []\n",
235
+ " for text, annotations in data:\n",
236
+ " entities = annotations['entities']\n",
237
+ " valid_entities = []\n",
238
+ " for start, end, label in entities:\n",
239
+ " valid_start = start\n",
240
+ " valid_end = end\n",
241
+ " while valid_start < len(text) and invalid_span_tokens.match(\n",
242
+ " text[valid_start]):\n",
243
+ " valid_start += 1\n",
244
+ " while valid_end > 1 and invalid_span_tokens.match(\n",
245
+ " text[valid_end - 1]):\n",
246
+ " valid_end -= 1\n",
247
+ " valid_entities.append([valid_start, valid_end, label])\n",
248
+ " cleaned_data.append([text, {'entities': valid_entities}])\n",
249
+ " return cleaned_data "
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": null,
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "data = trim_entity_spans(convert_dataturks_to_spacy(\"ner.json\"))"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": null,
264
+ "metadata": {},
265
+ "outputs": [],
266
+ "source": [
267
+ "from tqdm.notebook import tqdm\n",
268
+ "def clean_dataset(data):\n",
269
+ " cleanedDF = pd.DataFrame(columns=[\"setences_cleaned\"])\n",
270
+ " sum1 = 0\n",
271
+ " for i in tqdm(range(len(data))):\n",
272
+ " start = 0\n",
273
+ " emptyList = [\"Empty\"] * len(data[i][0].split())\n",
274
+ " numberOfWords = 0\n",
275
+ " lenOfString = len(data[i][0])\n",
276
+ " strData = data[i][0]\n",
277
+ " strDictData = data[i][1]\n",
278
+ " lastIndexOfSpace = strData.rfind(' ')\n",
279
+ " for i in range(lenOfString):\n",
280
+ " if (strData[i]==\" \" and strData[i+1]!=\" \"):\n",
281
+ " for k,v in strDictData.items():\n",
282
+ " for j in range(len(v)):\n",
283
+ " entList = v[len(v)-j-1]\n",
284
+ " if (start>=int(entList[0]) and i<=int(entList[1])):\n",
285
+ " emptyList[numberOfWords] = entList[2]\n",
286
+ " break\n",
287
+ " else:\n",
288
+ " continue\n",
289
+ " start = i + 1 \n",
290
+ " numberOfWords += 1\n",
291
+ " if (i == lastIndexOfSpace):\n",
292
+ " for j in range(len(v)):\n",
293
+ " entList = v[len(v)-j-1]\n",
294
+ " if (lastIndexOfSpace>=int(entList[0]) and lenOfString<=int(entList[1])):\n",
295
+ " emptyList[numberOfWords] = entList[2]\n",
296
+ " numberOfWords += 1\n",
297
+ " cleanedDF = cleanedDF.append(pd.Series([emptyList], index=cleanedDF.columns ), ignore_index=True )\n",
298
+ " sum1 = sum1 + numberOfWords\n",
299
+ " return cleanedDF"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "cleanedDF = clean_dataset(data)"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "markdown",
313
+ "metadata": {},
314
+ "source": [
315
+ "Take a look at your cleaned dataset and the categories the named-entities are matched to, or 'tags'."
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": null,
321
+ "metadata": {},
322
+ "outputs": [],
323
+ "source": [
324
+ "cleanedDF.head()"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "markdown",
329
+ "metadata": {},
330
+ "source": [
331
+ "<a name='1-2'></a>\n",
332
+ "### 1.2 - Padding and Generating Tags\n",
333
+ "\n",
334
+ "Now, it is time to generate a list of unique tags you will match the named-entities to."
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": null,
340
+ "metadata": {},
341
+ "outputs": [],
342
+ "source": [
343
+ "unique_tags = set(cleanedDF['setences_cleaned'].explode().unique())#pd.unique(cleanedDF['setences_cleaned'])#set(tag for doc in cleanedDF['setences_cleaned'].values.tolist() for tag in doc)\n",
344
+ "tag2id = {tag: id for id, tag in enumerate(unique_tags)}\n",
345
+ "id2tag = {id: tag for tag, id in tag2id.items()}"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": null,
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "unique_tags"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "markdown",
359
+ "metadata": {},
360
+ "source": [
361
+ "Next, you will create an array of tags from your cleaned dataset. Oftentimes, your input sequence can exceeds the maximum length of a sequence your network can process, so it needs to be cut off to that desired maximum length. And when the input sequence is shorter than the desired length, you need to append zeroes onto its end using this [Keras padding API](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences)."
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": null,
367
+ "metadata": {},
368
+ "outputs": [],
369
+ "source": [
370
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "metadata": {},
377
+ "outputs": [],
378
+ "source": [
379
+ "MAX_LEN = 512\n",
380
+ "labels = cleanedDF['setences_cleaned'].values.tolist()\n",
381
+ "\n",
382
+ "tags = pad_sequences([[tag2id.get(l) for l in lab] for lab in labels],\n",
383
+ " maxlen=MAX_LEN, value=tag2id[\"Empty\"], padding=\"post\",\n",
384
+ " dtype=\"long\", truncating=\"post\")"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": null,
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": [
393
+ "tags"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "markdown",
398
+ "metadata": {},
399
+ "source": [
400
+ "<a name='1-3'></a>\n",
401
+ "### 1.3 - Tokenize and Align Labels with 🤗 Library\n",
402
+ "\n",
403
+ "Before feeding the texts to a Transformer model, you will need to tokenize your input using a [🤗 Transformer tokenizer](https://huggingface.co/transformers/main_classes/tokenizer.html). It is crucial that the tokenizer you use must match the Transformer model type you are using! In this exercise, you will use the 🤗 [DistilBERT fast tokenizer](https://huggingface.co/transformers/model_doc/distilbert.html), which standardizes the length of your sequence to 512 and pads with zeros. Notice this matches the maximum length you used when creating tags. "
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": null,
409
+ "metadata": {
410
+ "deletable": false,
411
+ "edittable": false
412
+ },
413
+ "outputs": [],
414
+ "source": [
415
+ "gpus = tf.config.list_physical_devices('GPU')\n",
416
+ "if gpus:\n",
417
+ " for gpu in gpus:\n",
418
+ " tf.config.experimental.set_virtual_device_configuration(gpu,[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": null,
424
+ "metadata": {},
425
+ "outputs": [],
426
+ "source": [
427
+ "from transformers import DistilBertTokenizerFast #, TFDistilBertModel\n",
428
+ "tokenizer = DistilBertTokenizerFast.from_pretrained('tokenizer/')"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "markdown",
433
+ "metadata": {},
434
+ "source": [
435
+ "Transformer models are often trained by tokenizers that split words into subwords. For instance, the word 'Africa' might get split into multiple subtokens. This can create some misalignment between the list of tags for the dataset and the list of labels generated by the tokenizer, since the tokenizer can split one word into several, or add special tokens. Before processing, it is important that you align the lists of tags and the list of labels generated by the selected tokenizer with a `tokenize_and_align_labels()` function.\n",
436
+ "\n",
437
+ "<a name='ex-1'></a>\n",
438
+ "### Exercise 1 - tokenize_and_align_labels\n",
439
+ "\n",
440
+ "Implement `tokenize_and_align_labels()`. The function should perform the following:\n",
441
+ "* The tokenizer cuts sequences that exceed the maximum size allowed by your model with the parameter `truncation=True`\n",
442
+ "* Aligns the list of tags and labels with the tokenizer `word_ids` method returns a list that maps the subtokens to the original word in the sentence and special tokens to `None`. \n",
443
+ "* Set the labels of all the special tokens (`None`) to -100 to prevent them from affecting the loss function. \n",
444
+ "* Label of the first subtoken of a word and set the label for the following subtokens to -100. "
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "code",
449
+ "execution_count": null,
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": [
453
+ "label_all_tokens = True\n",
454
+ "def tokenize_and_align_labels(tokenizer, examples, tags):\n",
455
+ " tokenized_inputs = tokenizer(examples, truncation=True, is_split_into_words=False, padding='max_length', max_length=512)\n",
456
+ " labels = []\n",
457
+ " for i, label in enumerate(tags):\n",
458
+ " word_ids = tokenized_inputs.word_ids(batch_index=i)\n",
459
+ " previous_word_idx = None\n",
460
+ " label_ids = []\n",
461
+ " for word_idx in word_ids:\n",
462
+ " # Special tokens have a word id that is None. We set the label to -100 so they are automatically\n",
463
+ " # ignored in the loss function.\n",
464
+ " if word_idx is None:\n",
465
+ " label_ids.append(-100)\n",
466
+ " # We set the label for the first token of each word.\n",
467
+ " elif word_idx != previous_word_idx:\n",
468
+ " label_ids.append(label[word_idx])\n",
469
+ " # For the other tokens in a word, we set the label to either the current label or -100, depending on\n",
470
+ " # the label_all_tokens flag.\n",
471
+ " else:\n",
472
+ " label_ids.append(label[word_idx] if label_all_tokens else -100)\n",
473
+ " previous_word_idx = word_idx\n",
474
+ "\n",
475
+ " labels.append(label_ids)\n",
476
+ "\n",
477
+ " tokenized_inputs[\"labels\"] = labels\n",
478
+ " return tokenized_inputs"
479
+ ]
480
+ },
481
+ {
482
+ "cell_type": "markdown",
483
+ "metadata": {},
484
+ "source": [
485
+ "Now that you have tokenized inputs, you can create train and test datasets!"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": null,
491
+ "metadata": {},
492
+ "outputs": [],
493
+ "source": [
494
+ "test = tokenize_and_align_labels(tokenizer, df_data['content'].values.tolist(), tags)\n",
495
+ "train_dataset = tf.data.Dataset.from_tensor_slices((\n",
496
+ " test['input_ids'],\n",
497
+ " test['labels']\n",
498
+ "))"
499
+ ]
500
+ },
501
+ {
502
+ "cell_type": "markdown",
503
+ "metadata": {},
504
+ "source": [
505
+ "<a name='1-4'></a>\n",
506
+ "### 1.4 - Optimization\n",
507
+ "\n",
508
+ "Fantastic! Now you can finally feed your data into into a pretrained 🤗 model. You will optimize a DistilBERT model, which matches the tokenizer you used to preprocess your data. Try playing around with the different hyperparamters to improve your results!"
509
+ ]
510
+ },
511
+ {
512
+ "cell_type": "code",
513
+ "execution_count": null,
514
+ "metadata": {},
515
+ "outputs": [],
516
+ "source": [
517
+ "from transformers import TFDistilBertForTokenClassification\n",
518
+ "\n",
519
+ "model = TFDistilBertForTokenClassification.from_pretrained('model/', num_labels=len(unique_tags))"
520
+ ]
521
+ },
522
+ {
523
+ "cell_type": "code",
524
+ "execution_count": null,
525
+ "metadata": {},
526
+ "outputs": [],
527
+ "source": [
528
+ "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)\n",
529
+ "model.compile(optimizer=optimizer, loss=model.hf_compute_loss, metrics=['accuracy']) # can also use any keras loss fn\n",
530
+ "model.fit(train_dataset.batch(4),\n",
531
+ " epochs=10, \n",
532
+ " batch_size=4)"
533
+ ]
534
+ },
535
+ {
536
+ "cell_type": "code",
537
+ "execution_count": null,
538
+ "metadata": {},
539
+ "outputs": [],
540
+ "source": [
541
+ "text = \"Manisha Bharti. 3.5 years of professional IT experience in Banking and Finance domain\"\n",
542
+ "inputs = tokenizer(text, return_tensors=\"tf\", truncation=True, is_split_into_words=False, padding=\"max_length\", max_length=512 )\n",
543
+ "input_ids = inputs[\"input_ids\"]\n",
544
+ "#inputs[\"labels\"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids)))"
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "execution_count": null,
550
+ "metadata": {},
551
+ "outputs": [],
552
+ "source": [
553
+ "output = model(inputs).logits\n",
554
+ "prediction = np.argmax(output, axis=2)\n",
555
+ "print( prediction)"
556
+ ]
557
+ },
558
+ {
559
+ "cell_type": "code",
560
+ "execution_count": null,
561
+ "metadata": {},
562
+ "outputs": [],
563
+ "source": [
564
+ "model(inputs)"
565
+ ]
566
+ },
567
+ {
568
+ "cell_type": "code",
569
+ "execution_count": null,
570
+ "metadata": {},
571
+ "outputs": [],
572
+ "source": [
573
+ "pred_labels = []"
574
+ ]
575
+ },
576
+ {
577
+ "cell_type": "code",
578
+ "execution_count": null,
579
+ "metadata": {},
580
+ "outputs": [],
581
+ "source": [
582
+ "!pip install seqeval"
583
+ ]
584
+ },
585
+ {
586
+ "cell_type": "code",
587
+ "execution_count": null,
588
+ "metadata": {},
589
+ "outputs": [],
590
+ "source": [
591
+ "true_labels = [[id2tag.get(true_index, \"Empty\") for true_index in test['labels'][i]] for i in range(len(test['labels']))]\n",
592
+ "np.array(true_labels).shape"
593
+ ]
594
+ },
595
+ {
596
+ "cell_type": "code",
597
+ "execution_count": null,
598
+ "metadata": {},
599
+ "outputs": [],
600
+ "source": [
601
+ "output = model.predict(train_dataset)"
602
+ ]
603
+ },
604
+ {
605
+ "cell_type": "code",
606
+ "execution_count": null,
607
+ "metadata": {},
608
+ "outputs": [],
609
+ "source": [
610
+ "predictions = np.argmax(output['logits'].reshape(220, -1, 12), axis=-1)"
611
+ ]
612
+ },
613
+ {
614
+ "cell_type": "code",
615
+ "execution_count": null,
616
+ "metadata": {},
617
+ "outputs": [],
618
+ "source": [
619
+ "predictions.shape"
620
+ ]
621
+ },
622
+ {
623
+ "cell_type": "code",
624
+ "execution_count": null,
625
+ "metadata": {},
626
+ "outputs": [],
627
+ "source": [
628
+ "from matplotlib import pyplot as plt \n",
629
+ "\n",
630
+ "p = plt.hist(np.array(true_labels).flatten())\n",
631
+ "plt.xticks(rotation='vertical')\n",
632
+ "plt.show()"
633
+ ]
634
+ },
635
+ {
636
+ "cell_type": "code",
637
+ "execution_count": null,
638
+ "metadata": {},
639
+ "outputs": [],
640
+ "source": [
641
+ "from collections import Counter\n",
642
+ "Counter(np.array(true_labels).flatten())"
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "code",
647
+ "execution_count": null,
648
+ "metadata": {},
649
+ "outputs": [],
650
+ "source": [
651
+ "pred_labels = [[id2tag.get(index, \"Empty\") for index in predictions[i]] for i in range(len(predictions))]\n",
652
+ "p = plt.hist(np.array(pred_labels).flatten())\n",
653
+ "plt.xticks(rotation='vertical')\n",
654
+ "plt.show()"
655
+ ]
656
+ },
657
+ {
658
+ "cell_type": "code",
659
+ "execution_count": null,
660
+ "metadata": {},
661
+ "outputs": [],
662
+ "source": [
663
+ "from seqeval.metrics import classification_report\n",
664
+ "print(classification_report(true_labels, pred_labels))"
665
+ ]
666
+ },
667
+ {
668
+ "cell_type": "markdown",
669
+ "metadata": {},
670
+ "source": [
671
+ "### Congratulations!\n",
672
+ "\n",
673
+ "#### Here's what you should remember\n",
674
+ "\n",
675
+ "- Named-entity recognition (NER) detects and classifies named-entities, and can help process resumes, customer reviews, browsing histories, etc. \n",
676
+ "- You must preprocess text data with the corresponding tokenizer to the pretrained model before feeding your input into your Transformer model."
677
+ ]
678
+ },
679
+ {
680
+ "cell_type": "code",
681
+ "execution_count": null,
682
+ "metadata": {},
683
+ "outputs": [],
684
+ "source": []
685
+ },
686
+ {
687
+ "cell_type": "code",
688
+ "execution_count": null,
689
+ "metadata": {},
690
+ "outputs": [],
691
+ "source": []
692
+ }
693
+ ],
694
+ "metadata": {
695
+ "kernelspec": {
696
+ "display_name": "Python 3",
697
+ "language": "python",
698
+ "name": "python3"
699
+ },
700
+ "language_info": {
701
+ "codemirror_mode": {
702
+ "name": "ipython",
703
+ "version": 3
704
+ },
705
+ "file_extension": ".py",
706
+ "mimetype": "text/x-python",
707
+ "name": "python",
708
+ "nbconvert_exporter": "python",
709
+ "pygments_lexer": "ipython3",
710
+ "version": "3.8.10"
711
+ }
712
+ },
713
+ "nbformat": 4,
714
+ "nbformat_minor": 2
715
+ }
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/Transformer_application_Named_Entity_Recognition.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/._config.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/._tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c609005a68991082f5f7a122c44818a33fd6be0205464bbfdd514dd50eb8295f
3
+ size 212
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForMaskedLM"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8",
21
+ "9": "LABEL_9",
22
+ "10": "LABEL_10",
23
+ "11": "LABEL_11"
24
+ },
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "LABEL_0": 0,
28
+ "LABEL_1": 1,
29
+ "LABEL_10": 10,
30
+ "LABEL_11": 11,
31
+ "LABEL_2": 2,
32
+ "LABEL_3": 3,
33
+ "LABEL_4": 4,
34
+ "LABEL_5": 5,
35
+ "LABEL_6": 6,
36
+ "LABEL_7": 7,
37
+ "LABEL_8": 8,
38
+ "LABEL_9": 9
39
+ },
40
+ "max_position_embeddings": 512,
41
+ "model_type": "distilbert",
42
+ "n_heads": 12,
43
+ "n_layers": 6,
44
+ "pad_token_id": 0,
45
+ "qa_dropout": 0.1,
46
+ "seq_classif_dropout": 0.2,
47
+ "sinusoidal_pos_embds": false,
48
+ "tie_weights_": true,
49
+ "transformers_version": "4.5.1",
50
+ "vocab_size": 30522
51
+ }
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/model/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18c2bfd591b3cb73cf6619c3b59870b09291057287cc4f2fabf65e06232ced8
3
+ size 265614944
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/ner.json ADDED
The diff for this file is too large to render. See raw diff
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._special_tokens_map.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._tokenizer_config.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/._vocab.txt ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased"}
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/tokenizer/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Transformer Mechanism/Named Entity Recognition/tf/W4A2_UGL/utils.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def mergeIntervals(intervals):
4
+ sorted_by_lower_bound = sorted(intervals, key=lambda tup: tup[0])
5
+ merged = []
6
+
7
+ for higher in sorted_by_lower_bound:
8
+ if not merged:
9
+ merged.append(higher)
10
+ else:
11
+ lower = merged[-1]
12
+ if higher[0] <= lower[1]:
13
+ if lower[2] is higher[2]:
14
+ upper_bound = max(lower[1], higher[1])
15
+ merged[-1] = (lower[0], upper_bound, lower[2])
16
+ else:
17
+ if lower[1] > higher[1]:
18
+ merged[-1] = lower
19
+ else:
20
+ merged[-1] = (lower[0], higher[1], higher[2])
21
+ else:
22
+ merged.append(higher)
23
+ return merged
24
+
25
+ def get_entities(df):
26
+
27
+ entities = []
28
+
29
+ for i in range(len(df)):
30
+ entity = []
31
+
32
+ for annot in df['annotation'][i]:
33
+ try:
34
+ ent = annot['label'][0]
35
+ start = annot['points'][0]['start']
36
+ end = annot['points'][0]['end'] + 1
37
+ entity.append((start, end, ent))
38
+ except:
39
+ pass
40
+
41
+ entity = mergeIntervals(entity)
42
+ entities.append(entity)
43
+
44
+ return entities
45
+
46
+ def read_dataset()
47
+ df_data = pd.read_json("ner.json", lines=True)
48
+ df_data = df_data.drop(['extras'], axis=1)
49
+ df_data['content'] = df_data['content'].str.replace("\n", " ")
50
+ df_data['entities'] = get_entities(df_data)
51
+
52
+ return df_data
53
+
54
+ def convert_dataturks_to_spacy(dataturks_JSON_FilePath):
55
+ try:
56
+ training_data = []
57
+ lines=[]
58
+ with open(dataturks_JSON_FilePath, 'r') as f:
59
+ lines = f.readlines()
60
+
61
+ for line in lines:
62
+ data = json.loads(line)
63
+ text = data['content'].replace("\n", " ")
64
+ entities = []
65
+ data_annotations = data['annotation']
66
+ if data_annotations is not None:
67
+ for annotation in data_annotations:
68
+ #only a single point in text annotation.
69
+ point = annotation['points'][0]
70
+ labels = annotation['label']
71
+ # handle both list of labels or a single label.
72
+ if not isinstance(labels, list):
73
+ labels = [labels]
74
+
75
+ for label in labels:
76
+ point_start = point['start']
77
+ point_end = point['end']
78
+ point_text = point['text']
79
+
80
+ lstrip_diff = len(point_text) - len(point_text.lstrip())
81
+ rstrip_diff = len(point_text) - len(point_text.rstrip())
82
+ if lstrip_diff != 0:
83
+ point_start = point_start + lstrip_diff
84
+ if rstrip_diff != 0:
85
+ point_end = point_end - rstrip_diff
86
+ entities.append((point_start, point_end + 1 , label))
87
+ training_data.append((text, {"entities" : entities}))
88
+ return training_data
89
+ except Exception as e:
90
+ logging.exception("Unable to process " + dataturks_JSON_FilePath + "\n" + "error = " + str(e))
91
+ return None
92
+
93
+ def trim_entity_spans(data: list) -> list:
94
+ """Removes leading and trailing white spaces from entity spans.
95
+
96
+ Args:
97
+ data (list): The data to be cleaned in spaCy JSON format.
98
+
99
+ Returns:
100
+ list: The cleaned data.
101
+ """
102
+ invalid_span_tokens = re.compile(r'\s')
103
+
104
+ cleaned_data = []
105
+ for text, annotations in data:
106
+ entities = annotations['entities']
107
+ valid_entities = []
108
+ for start, end, label in entities:
109
+ valid_start = start
110
+ valid_end = end
111
+ while valid_start < len(text) and invalid_span_tokens.match(
112
+ text[valid_start]):
113
+ valid_start += 1
114
+ while valid_end > 1 and invalid_span_tokens.match(
115
+ text[valid_end - 1]):
116
+ valid_end -= 1
117
+ valid_entities.append([valid_start, valid_end, label])
118
+ cleaned_data.append([text, {'entities': valid_entities}])
119
+ return cleaned_data
120
+
121
+ def clean_dataset(data):
122
+ cleanedDF = pd.DataFrame(columns=["setences_cleaned"])
123
+ sum1 = 0
124
+ for i in range(len(data)):
125
+ start = 0
126
+ emptyList = ["Empty"] * len(data[i][0].split())
127
+ numberOfWords = 0
128
+ lenOfString = len(data[i][0])
129
+ strData = data[i][0]
130
+ strDictData = data[i][1]
131
+ lastIndexOfSpace = strData.rfind(' ')
132
+ for i in range(lenOfString):
133
+ if (strData[i]==" " and strData[i+1]!=" "):
134
+ for k,v in strDictData.items():
135
+ for j in range(len(v)):
136
+ entList = v[len(v)-j-1]
137
+ if (start>=int(entList[0]) and i<=int(entList[1])):
138
+ emptyList[numberOfWords] = entList[2]
139
+ break
140
+ else:
141
+ continue
142
+ start = i + 1
143
+ numberOfWords += 1
144
+ if (i == lastIndexOfSpace):
145
+ for j in range(len(v)):
146
+ entList = v[len(v)-j-1]
147
+ if (lastIndexOfSpace>=int(entList[0]) and lenOfString<=int(entList[1])):
148
+ emptyList[numberOfWords] = entList[2]
149
+ numberOfWords += 1
150
+ cleanedDF = cleanedDF.append(pd.Series([emptyList], index=cleanedDF.columns ), ignore_index=True )
151
+ sum1 = sum1 + numberOfWords
152
+ return cleanedDF
Transformer Mechanism/QA/tf/.Trash-0/files/QA_dataset.ipynb ADDED
@@ -0,0 +1,2510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "TBjVwYpHJ7ra"
7
+ },
8
+ "source": [
9
+ "# Transformer Network Application: Question Answering\n",
10
+ "\n",
11
+ "Welcome to Week 4's third, and the last lab of the course! Congratulations on making it this far. In this notebook you'll explore another application of the transformer architecture that you built.\n",
12
+ "\n",
13
+ "**After this assignment you'll be able to**:\n",
14
+ "\n",
15
+ "* Perform extractive Question Answering \n",
16
+ "* Fine-tune a pre-trained transformer model to a custom dataset\n",
17
+ "* Implement a QA model in TensorFlow and PyTorch"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {
23
+ "id": "SoRb7ykXJ_C4"
24
+ },
25
+ "source": [
26
+ "## Table of Contents\n",
27
+ "\n",
28
+ "\n",
29
+ "- [1 - Extractive Question Answering](#1)\n",
30
+ " - [1.1 - Data Cleaning](#1-1)\n",
31
+ " - [1.2 - Tokenize and Align Labels with 🤗 Library](#1-2)\n",
32
+ "- [2 - Training](#2)\n",
33
+ " - [2.1 TensorFlow implementation](#2-1)\n",
34
+ " - [2.2 PyTorch implementation](#2-2)\n"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "markdown",
39
+ "metadata": {
40
+ "id": "C0k56ZVXLDbi"
41
+ },
42
+ "source": [
43
+ "<a name='1'></a>\n",
44
+ "## 1 - Extractive Question Answering\n",
45
+ "\n",
46
+ "Question answering (QA) is a task of natural language processing that aims to automatically answer questions. The goal of *extractive* QA is to identify the portion of the text that contains the answer to a question. For example, when tasked with answering the question 'When will Jane go to Africa?' given the text data 'Jane visits Africa in September', the question answering model will highlight 'September'.\n",
47
+ "\n",
48
+ "* You will use a variation of the Transformer model you built in the last assignment to answer questions about stories.\n",
49
+ "* You will implement extractive QA model in TensorFlow and in PyTorch.\n",
50
+ "\n",
51
+ "**Recommendation:**\n",
52
+ "* If you are interested, check out the [Course 4: Natural Language Processing with Attention Models](https://www.coursera.org/learn/attention-models-in-nlp/home/welcome) of our [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?=) where you can learn how to build Transformers and perform QA using the [Trax](https://trax.readthedocs.io/en/latest/) library. \n",
53
+ "\n",
54
+ "<a name='1-1'></a>\n",
55
+ "### 1.1 - Data preprocessing\n",
56
+ "\n",
57
+ "Run the following cell to load the [QA bAbI dataset](https://research.fb.com/downloads/babi/), which is one of the bAbI datasets generated by Facebook AI Research to advance natural language processing."
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "!pip install pyarrow==6.0.0"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 1,
72
+ "metadata": {
73
+ "colab": {
74
+ "base_uri": "https://localhost:8080/"
75
+ },
76
+ "id": "XxU0G_PYLSXJ",
77
+ "outputId": "44e7877f-5c33-45fc-ed83-3aa4920dcc40"
78
+ },
79
+ "outputs": [
80
+ {
81
+ "ename": "ModuleNotFoundError",
82
+ "evalue": "No module named 'fsspec.archive'",
83
+ "output_type": "error",
84
+ "traceback": [
85
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
86
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
87
+ "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_from_disk\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Load a dataset and print the first example in the training set\u001b[39;00m\n\u001b[1;32m 4\u001b[0m babi_dataset \u001b[38;5;241m=\u001b[39m load_from_disk(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata/\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
88
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/__init__.py:43\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m pyarrow\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m version\n\u001b[0;32m---> 43\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_dataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_reader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReadInstruction\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbuilder\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder\n",
89
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/arrow_dataset.py:63\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtqdm\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mauto\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tqdm\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m---> 63\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_reader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ArrowReader\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_writer\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ArrowWriter, OptimizedTypedSequence\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DownloadConfig\n",
90
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/arrow_reader.py:29\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpa\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mparquet\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpq\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DownloadConfig\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnaming\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _split_re, filenames_for_dataset_split\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtable\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InMemoryTable, MemoryMappedTable, Table, concat_tables\n",
91
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/download/__init__.py:10\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DownloadConfig\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdownload_manager\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DownloadManager, DownloadMode\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstreaming_download_manager\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StreamingDownloadManager\n",
92
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/download/streaming_download_manager.py:20\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01maiohttp\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient_exceptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ClientError\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfilesystems\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m COMPRESSION_FILESYSTEMS\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfile_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 22\u001b[0m get_authentication_headers_for_url,\n\u001b[1;32m 23\u001b[0m http_head,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 27\u001b[0m url_or_path_join,\n\u001b[1;32m 28\u001b[0m )\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogging\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_logger\n",
93
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/filesystems/__init__.py:6\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compression\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhffilesystem\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HfFileSystem\n\u001b[1;32m 10\u001b[0m _has_s3fs \u001b[38;5;241m=\u001b[39m importlib\u001b[38;5;241m.\u001b[39mutil\u001b[38;5;241m.\u001b[39mfind_spec(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ms3fs\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
94
+ "File \u001b[0;32m/usr/local/lib/python3.8/dist-packages/datasets/filesystems/compression.py:5\u001b[0m, in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Optional\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marchive\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AbstractArchiveFileSystem\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DEFAULT_BLOCK_SIZE\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mBaseCompressedFileFileSystem\u001b[39;00m(AbstractArchiveFileSystem):\n",
95
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'fsspec.archive'"
96
+ ]
97
+ }
98
+ ],
99
+ "source": [
100
+ "from datasets import load_from_disk\n",
101
+ "\n",
102
+ "# Load a dataset and print the first example in the training set\n",
103
+ "babi_dataset = load_from_disk('data/')\n",
104
+ "print(babi_dataset['train'][0])"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "markdown",
109
+ "metadata": {
110
+ "id": "XJwacC3bMhZM"
111
+ },
112
+ "source": [
113
+ "Take a look at the format of the data. For a given story, there are two sentences which serve as the context, and one question. Each of these phrases has an ID. There is also a supporting fact ID which refers to a sentence in the story that helps answer the question. For example, for the question 'What is east of the hallway?', the supporting fact 'The bedroom is east of the hallway' has the ID '2'. There is also the answer, 'bedroom' for the question."
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": null,
119
+ "metadata": {
120
+ "colab": {
121
+ "base_uri": "https://localhost:8080/"
122
+ },
123
+ "id": "aizPXfGlLZ1D",
124
+ "outputId": "0e1d47bc-9c1a-458a-983e-22f47f8184bd"
125
+ },
126
+ "outputs": [],
127
+ "source": [
128
+ "babi_dataset['train'][102]"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "markdown",
133
+ "metadata": {
134
+ "id": "ewtXZUPjMm2l"
135
+ },
136
+ "source": [
137
+ "Check and see if the entire dataset of stories has this format."
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "metadata": {
144
+ "id": "55BSWxwuM1hN"
145
+ },
146
+ "outputs": [],
147
+ "source": [
148
+ "type_set = set()\n",
149
+ "for story in babi_dataset['train']:\n",
150
+ " if str(story['story']['type'] )not in type_set:\n",
151
+ " type_set.add(str(story['story']['type'] ))"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "metadata": {
158
+ "colab": {
159
+ "base_uri": "https://localhost:8080/"
160
+ },
161
+ "id": "bdJ8VMF1UT7S",
162
+ "outputId": "2b959467-75e8-4e25-e7bb-481b657a2fce"
163
+ },
164
+ "outputs": [],
165
+ "source": [
166
+ "type_set"
167
+ ]
168
+ },
169
+ {
170
+ "cell_type": "markdown",
171
+ "metadata": {
172
+ "id": "JsHx1tcyMq_k"
173
+ },
174
+ "source": [
175
+ "To make the data easier to work with, you will flatten the dataset to transform it from a dictionary structure to a table structure."
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "metadata": {
182
+ "id": "YxixFI-pVOK9"
183
+ },
184
+ "outputs": [],
185
+ "source": [
186
+ "flattened_babi = babi_dataset.flatten()"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "metadata": {
193
+ "colab": {
194
+ "base_uri": "https://localhost:8080/"
195
+ },
196
+ "id": "kXU43CqCdX98",
197
+ "outputId": "e968ff5e-0db0-4e9d-e1e9-e93f965b2582"
198
+ },
199
+ "outputs": [],
200
+ "source": [
201
+ "flattened_babi"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": null,
207
+ "metadata": {
208
+ "colab": {
209
+ "base_uri": "https://localhost:8080/"
210
+ },
211
+ "id": "OQw59MgT6Luh",
212
+ "outputId": "ea5eac53-027e-42d3-d19f-98ed7863de2b"
213
+ },
214
+ "outputs": [],
215
+ "source": [
216
+ "next(iter(flattened_babi['train']))"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "markdown",
221
+ "metadata": {
222
+ "id": "4vXfmhOPMvt1"
223
+ },
224
+ "source": [
225
+ "Now it is much easier to access the information you need! You can now easily extract the answer, question, and facts from the story, and also join the facts into a single entry under 'sentences'."
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "metadata": {
232
+ "id": "O5NcABwkdbrf"
233
+ },
234
+ "outputs": [],
235
+ "source": [
236
+ "def get_question_and_facts(story):\n",
237
+ " dic = {}\n",
238
+ " dic['question'] = story['story.text'][2]\n",
239
+ " dic['sentences'] = ' '.join([story['story.text'][0], story['story.text'][1]])\n",
240
+ " dic['answer'] = story['story.answer'][2]\n",
241
+ " return dic"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": null,
247
+ "metadata": {
248
+ "colab": {
249
+ "base_uri": "https://localhost:8080/",
250
+ "height": 115,
251
+ "referenced_widgets": [
252
+ "44b7bea3e09d4e5684921c66dd4c7514",
253
+ "6af3ec5091d74bd1a95bf02a87dd240b",
254
+ "7e1325e57bf9417e93d7ef180794ab3c",
255
+ "3dab28395f3f475d8242e4d4d45ed059",
256
+ "ca722dcd857c433c9058585e31a1673d",
257
+ "7fb1118c0b4443b6b6dbb5803e9ec2e8",
258
+ "58718e12f1b7459989ab5296846c4be6",
259
+ "63b4ebafcead4c0784b5511219a6a198",
260
+ "c42644a4e6184a1cbdb2b453b5dbb7d6",
261
+ "364ba960eb474c9084cc71851594d345",
262
+ "e8f1abd85f3e49f991d4c1312ffd416b",
263
+ "929946fdfaa04cf59d3b31cf92fc08d1",
264
+ "aa5c0d374889482697fc0f7ce9c81afe",
265
+ "ff444b253e9a40e5bec755926d83740f",
266
+ "89fdda6e6688476495ca297bfe010bf8",
267
+ "cda72c45821a4eb89f1a3ab5510b26d3"
268
+ ]
269
+ },
270
+ "id": "LHKNQ75afMoZ",
271
+ "outputId": "6ceeae5c-392c-4553-c487-14a648eb9209"
272
+ },
273
+ "outputs": [],
274
+ "source": [
275
+ "processed = flattened_babi.map(get_question_and_facts)"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": null,
281
+ "metadata": {
282
+ "colab": {
283
+ "base_uri": "https://localhost:8080/"
284
+ },
285
+ "id": "KaTacKMufPba",
286
+ "outputId": "2433d446-e985-45cd-a200-f9805b4056bd"
287
+ },
288
+ "outputs": [],
289
+ "source": [
290
+ "processed['train'][2]"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": null,
296
+ "metadata": {
297
+ "colab": {
298
+ "base_uri": "https://localhost:8080/"
299
+ },
300
+ "id": "IOrYr5LI0pbP",
301
+ "outputId": "8142f23c-7dab-49b9-8027-fbe7364ae4e9"
302
+ },
303
+ "outputs": [],
304
+ "source": [
305
+ "processed['test'][2]"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "markdown",
310
+ "metadata": {
311
+ "id": "oN7D3fszM2hy"
312
+ },
313
+ "source": [
314
+ "The goal of extractive QA is to find the part of the text that contains the answer to the question. You will identify the position of the answer using the indexes of the string. For example, if the answer to some question was 'September', you would need to find the start and end string indices of the word 'September' in the context sentence 'Jane visits Africa in September.'\n",
315
+ "\n",
316
+ "\n",
317
+ "Use this next function to get the start and end indices of the answer in each of the stories in your dataset."
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": null,
323
+ "metadata": {
324
+ "id": "J1JJx3PafSyR"
325
+ },
326
+ "outputs": [],
327
+ "source": [
328
+ "def get_start_end_idx(story):\n",
329
+ " str_idx = story['sentences'].find(story['answer'])\n",
330
+ " end_idx = str_idx + len(story['answer'])\n",
331
+ " return {'str_idx':str_idx,\n",
332
+ " 'end_idx': end_idx}"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": null,
338
+ "metadata": {
339
+ "colab": {
340
+ "base_uri": "https://localhost:8080/",
341
+ "height": 115,
342
+ "referenced_widgets": [
343
+ "8968319cdaca476fb15c11a388dce39a",
344
+ "863c5ce96db84e3da162072c9a13c913",
345
+ "a725734893004a45b61194f649f5f602",
346
+ "c4a24656d67844e995d3b8e175c6c497",
347
+ "4f5b06c3a5e44c6cade5bf83634d9f69",
348
+ "afc33fa78b5d440192c435bfca6f7914",
349
+ "f37bd346f8614fec92d6c5b5e9b66d2f",
350
+ "b4c6a18610734036a16a14a43174c52e",
351
+ "07aaa9b79a744856b19d723370d6e588",
352
+ "afedd2328cf141f78775e4cfa7758267",
353
+ "b39b85d8cb05418aa92e8476ad02f755",
354
+ "0a8534ac52af4d48ad82b66463ad08c3",
355
+ "3abb36da57c841838867c56e2a3a325b",
356
+ "8b961844b5004905922531bd805a9d57",
357
+ "31fc08a1e7e04f6b9b3ea400ccfaea75",
358
+ "8cfbd3b14b23417993270f851a2d8ff9"
359
+ ]
360
+ },
361
+ "id": "4e7BdgJJhwXi",
362
+ "outputId": "d9c7a923-d2eb-4533-f37e-4f269f22eb89"
363
+ },
364
+ "outputs": [],
365
+ "source": [
366
+ "processed = processed.map(get_start_end_idx)"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "execution_count": null,
372
+ "metadata": {
373
+ "colab": {
374
+ "base_uri": "https://localhost:8080/"
375
+ },
376
+ "id": "P8ytxyfvh0kB",
377
+ "outputId": "c008b161-be24-40bb-a32d-47d92e624787"
378
+ },
379
+ "outputs": [],
380
+ "source": [
381
+ "num = 187\n",
382
+ "print(processed['test'][num])\n",
383
+ "start_idx = processed['test'][num]['str_idx']\n",
384
+ "end_idx = processed['test'][num]['end_idx']\n",
385
+ "print('answer:', processed['test'][num]['sentences'][start_idx:end_idx])"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "markdown",
390
+ "metadata": {
391
+ "id": "VVX3TA2xM-vJ"
392
+ },
393
+ "source": [
394
+ "<a name='1-2'></a>\n",
395
+ "### 1.2 - Tokenize and Align with 🤗 Library\n",
396
+ "\n",
397
+ "Now you have all the data you need to train a Transformer model to perform Question Answering! You are ready for a task you may have already encountered in the Named-Entity Recognition lab - tokenizing and aligning your input. To feed text data to a Transformer model, you will need to tokenize your input using a [🤗 Transformer tokenizer](https://huggingface.co/transformers/main_classes/tokenizer.html). It is crucial that the tokenizer you use must match the Transformer model type you are using! In this exercise, you will use the 🤗 [DistilBERT fast tokenizer](https://huggingface.co/transformers/model_doc/distilbert.html), which standardizes the length of your sequence to 512 and pads with zeros. "
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "markdown",
402
+ "metadata": {
403
+ "id": "c892hk9NNF9O"
404
+ },
405
+ "source": [
406
+ "Transformer models are often trained by tokenizers that split words into subwords. For instance, the word 'Africa' might get split into multiple subtokens. This can create some misalignment between the list of tags for the dataset and the list of labels generated by the tokenizer, since the tokenizer can split one word into several, or add special tokens. Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word with a `tokenize_and_align()` function. In this case, since you are interested in the start and end indices of the answer, you will want to align the index of the sentence to match the index of the token for a word. \n"
407
+ ]
408
+ },
409
+ {
410
+ "cell_type": "code",
411
+ "execution_count": null,
412
+ "metadata": {
413
+ "id": "UI-9P7VYitxv"
414
+ },
415
+ "outputs": [],
416
+ "source": [
417
+ "from transformers import DistilBertTokenizerFast\n",
418
+ "tokenizer = DistilBertTokenizerFast.from_pretrained('tokenizer/')"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": null,
424
+ "metadata": {
425
+ "id": "Pex-YXJnnwb9"
426
+ },
427
+ "outputs": [],
428
+ "source": [
429
+ "def tokenize_align(example):\n",
430
+ " encoding = tokenizer(example['sentences'], example['question'], truncation=True, padding=True, max_length=tokenizer.model_max_length)\n",
431
+ " start_positions = encoding.char_to_token(example['str_idx'])\n",
432
+ " end_positions = encoding.char_to_token(example['end_idx']-1)\n",
433
+ " if start_positions is None:\n",
434
+ " start_positions = tokenizer.model_max_length\n",
435
+ " if end_positions is None:\n",
436
+ " end_positions = tokenizer.model_max_length\n",
437
+ " return {'input_ids': encoding['input_ids'],\n",
438
+ " 'attention_mask': encoding['attention_mask'],\n",
439
+ " 'start_positions': start_positions,\n",
440
+ " 'end_positions': end_positions}"
441
+ ]
442
+ },
443
+ {
444
+ "cell_type": "code",
445
+ "execution_count": null,
446
+ "metadata": {
447
+ "colab": {
448
+ "base_uri": "https://localhost:8080/",
449
+ "height": 115,
450
+ "referenced_widgets": [
451
+ "4d9152a30e824931983a425ee6d607a6",
452
+ "1f2773e3e80c4dd8b6b26e171bf33bc7",
453
+ "013f041c3e0b4e35bf2432fc345cb7bf",
454
+ "ef4e12f29f1e458f811a400faf21bdcc",
455
+ "f0e34f2bf626434fa73f0def26b3d1a5",
456
+ "1e6c02317171453cbd3d4d665879b0d4",
457
+ "5b6dbe662ca24834b7678638e101e1ff",
458
+ "39029f730ae140c7902fca6dac5361ad",
459
+ "723acefae33d448199fa5c1a9ec3f246",
460
+ "32a5c82c7a9845c09c11bb4e30c2f1aa",
461
+ "77273c2e4b4e4e4c8ee4b6b344749518",
462
+ "f0ac3b9b8f664479940c6ee18fc2f13e",
463
+ "393697738e724e9fad4d163de0a77840",
464
+ "e592db98c0c34c5e800f5d7b6d3c099e",
465
+ "568f11b4462f4b4e95f3ad5947bb275e",
466
+ "7fefe9e1121a43558d773500aef8935c"
467
+ ]
468
+ },
469
+ "id": "kKyLNWCvksOr",
470
+ "outputId": "7af3d914-4546-430c-c2f0-206b732e5131"
471
+ },
472
+ "outputs": [],
473
+ "source": [
474
+ "qa_dataset = processed.map(tokenize_align)"
475
+ ]
476
+ },
477
+ {
478
+ "cell_type": "code",
479
+ "execution_count": null,
480
+ "metadata": {
481
+ "id": "8v5odGZBmGw0"
482
+ },
483
+ "outputs": [],
484
+ "source": [
485
+ "qa_dataset = qa_dataset.remove_columns(['story.answer', 'story.id', 'story.supporting_ids', 'story.text', 'story.type'])"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": null,
491
+ "metadata": {
492
+ "colab": {
493
+ "base_uri": "https://localhost:8080/"
494
+ },
495
+ "id": "yBHzbjffmJa8",
496
+ "outputId": "b0688636-fdec-4de0-c2d9-69372b1ddbac"
497
+ },
498
+ "outputs": [],
499
+ "source": [
500
+ "qa_dataset['train'][200]"
501
+ ]
502
+ },
503
+ {
504
+ "cell_type": "markdown",
505
+ "metadata": {
506
+ "id": "qw79BQfW4feu"
507
+ },
508
+ "source": [
509
+ "<font color='blue'><b>What you should remember:</b>\n",
510
+ "- The goal of *extractive* QA is to identify the portion of the text that contains the answer to a question.\n",
511
+ "- Transformer models are often trained by tokenizers that split words into subwords.\n",
512
+ " - Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word.\n",
513
+ "</font>"
514
+ ]
515
+ },
516
+ {
517
+ "cell_type": "markdown",
518
+ "metadata": {
519
+ "id": "rFfJozZvNZWG"
520
+ },
521
+ "source": [
522
+ "<a name='2'></a>\n",
523
+ "# 2 - Training \n",
524
+ "\n",
525
+ "Now that you have finished tokenizing and aligning your data, you can feed it into a pre-trained 🤗 Transformer model! You will use a DistilBERT model, which matches the tokenizer you used to preprocess your data."
526
+ ]
527
+ },
528
+ {
529
+ "cell_type": "code",
530
+ "execution_count": null,
531
+ "metadata": {
532
+ "id": "8sdX5XY0Gwwc"
533
+ },
534
+ "outputs": [],
535
+ "source": [
536
+ "train_ds = qa_dataset['train']\n",
537
+ "test_ds = qa_dataset['test']"
538
+ ]
539
+ },
540
+ {
541
+ "cell_type": "code",
542
+ "execution_count": null,
543
+ "metadata": {
544
+ "colab": {
545
+ "base_uri": "https://localhost:8080/"
546
+ },
547
+ "id": "Be5k3ilHsJ6q",
548
+ "outputId": "f2f7fea3-1394-4aaf-b159-994a38476994"
549
+ },
550
+ "outputs": [],
551
+ "source": [
552
+ "from transformers import TFDistilBertForQuestionAnswering\n",
553
+ "model = TFDistilBertForQuestionAnswering.from_pretrained(\"model/tensorflow\", return_dict=False)"
554
+ ]
555
+ },
556
+ {
557
+ "cell_type": "markdown",
558
+ "metadata": {
559
+ "id": "-aQVOG4ANcd2"
560
+ },
561
+ "source": [
562
+ "<a name='2-1'></a>\n",
563
+ "### 2.1 - TensorFlow implementation\n",
564
+ "For this assignment you will execute two implemenations, one in TensorFlow and one in PyTorch.\n",
565
+ "\n",
566
+ "\n"
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "markdown",
571
+ "metadata": {
572
+ "id": "8pCRo_parYMc"
573
+ },
574
+ "source": [
575
+ "\n",
576
+ "#### Train and test datasets\n",
577
+ "\n",
578
+ "**Note:**\n",
579
+ "* In the TensorFlow implementation, you will have to set the data format type to tensors, which may create ragged tensors (tensors of different lengths). \n",
580
+ "* You will have to convert the ragged tensors to normal tensors using the `to_tensor()` method, which pads the tensors and sets the dimensions to `[None, tokenizer.model_max_length]` so you can feed different size tensors into your model based on the batch size. "
581
+ ]
582
+ },
583
+ {
584
+ "cell_type": "code",
585
+ "execution_count": null,
586
+ "metadata": {
587
+ "id": "FbpplBxNtanH"
588
+ },
589
+ "outputs": [],
590
+ "source": [
591
+ "import tensorflow as tf\n",
592
+ "\n",
593
+ "columns_to_return = ['input_ids','attention_mask', 'start_positions', 'end_positions']\n",
594
+ "\n",
595
+ "train_ds.set_format(type='tf', columns=columns_to_return)\n",
596
+ "\n",
597
+ "train_features = {x: train_ds[x].to_tensor(default_value=0, shape=[None, tokenizer.model_max_length]) for x in ['input_ids', 'attention_mask']}\n",
598
+ "train_labels = {\"start_positions\": tf.reshape(train_ds['start_positions'], shape=[-1,1]),\n",
599
+ " 'end_positions': tf.reshape(train_ds['end_positions'], shape=[-1,1])}\n",
600
+ "\n",
601
+ "\n",
602
+ "train_tfdataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(8)"
603
+ ]
604
+ },
605
+ {
606
+ "cell_type": "markdown",
607
+ "metadata": {
608
+ "id": "0_Jj8Av6rEuN"
609
+ },
610
+ "source": [
611
+ "#### Training \n",
612
+ "\n",
613
+ "It is finally time to start training your model! \n",
614
+ "\n",
615
+ "* Create a custom training function using [tf.GradientTape()](https://www.tensorflow.org/api_docs/python/tf/GradientTape)\n",
616
+ "* Target two loss functions, one for the start index and one for the end index. \n",
617
+ "* `tf.GradientTape()` records the operations performed during forward prop for automatic differentiation during backprop. \n"
618
+ ]
619
+ },
620
+ {
621
+ "cell_type": "code",
622
+ "execution_count": null,
623
+ "metadata": {
624
+ "colab": {
625
+ "base_uri": "https://localhost:8080/"
626
+ },
627
+ "id": "PtZz249vQbLn",
628
+ "outputId": "24cdf861-af63-4581-a0ae-2de29d1880ed"
629
+ },
630
+ "outputs": [],
631
+ "source": [
632
+ "EPOCHS = 3\n",
633
+ "loss_fn1 = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True)\n",
634
+ "loss_fn2 = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True)\n",
635
+ "opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n",
636
+ "\n",
637
+ "losses = []\n",
638
+ "for epoch in range(EPOCHS):\n",
639
+ " print(\"Starting epoch: %d\"% epoch )\n",
640
+ " for step, (x_batch_train, y_batch_train) in enumerate(train_tfdataset):\n",
641
+ " with tf.GradientTape() as tape:\n",
642
+ " answer_start_scores, answer_end_scores = model(x_batch_train)\n",
643
+ " loss_start = loss_fn1(y_batch_train['start_positions'], answer_start_scores)\n",
644
+ " loss_end = loss_fn2(y_batch_train['end_positions'], answer_end_scores)\n",
645
+ " loss = 0.5 * (loss_start + loss_end)\n",
646
+ " losses.append(loss)\n",
647
+ " grads = tape.gradient(loss, model.trainable_weights)\n",
648
+ " opt.apply_gradients(zip(grads, model.trainable_weights))\n",
649
+ "\n",
650
+ " if step % 20 == 0:\n",
651
+ " print(\"Training loss (for one batch) at step %d: %.4f\"% (step, \n",
652
+ " float(loss_start)))\n"
653
+ ]
654
+ },
655
+ {
656
+ "cell_type": "markdown",
657
+ "metadata": {
658
+ "id": "Q8ggB0JUWQuW"
659
+ },
660
+ "source": [
661
+ "Take a look at your losses and try playing around with some of the hyperparameters for better results!"
662
+ ]
663
+ },
664
+ {
665
+ "cell_type": "code",
666
+ "execution_count": null,
667
+ "metadata": {
668
+ "colab": {
669
+ "base_uri": "https://localhost:8080/",
670
+ "height": 282
671
+ },
672
+ "id": "fK91EPvRYFcX",
673
+ "outputId": "6b7099dd-f918-4905-e3a3-fcce2880e506"
674
+ },
675
+ "outputs": [],
676
+ "source": [
677
+ "from matplotlib.pyplot import plot\n",
678
+ "\n",
679
+ "plot(losses)"
680
+ ]
681
+ },
682
+ {
683
+ "cell_type": "markdown",
684
+ "metadata": {
685
+ "id": "64OtEmyUWUiM"
686
+ },
687
+ "source": [
688
+ "You have successfully trained your model to help automatically answer questions! Try asking it a question about a story."
689
+ ]
690
+ },
691
+ {
692
+ "cell_type": "code",
693
+ "execution_count": null,
694
+ "metadata": {
695
+ "colab": {
696
+ "base_uri": "https://localhost:8080/"
697
+ },
698
+ "id": "eFniMzpp1bpz",
699
+ "outputId": "0ce0e2a3-3d6a-4e6e-adff-d0c16b622c9a"
700
+ },
701
+ "outputs": [],
702
+ "source": [
703
+ "question, text = 'What is south of the bedroom?','The hallway is south of the garden. The garden is south of the bedroom.'\n",
704
+ "input_dict = tokenizer(text, question, return_tensors='tf')\n",
705
+ "outputs = model(input_dict)\n",
706
+ "start_logits = outputs[0]\n",
707
+ "end_logits = outputs[1]\n",
708
+ "\n",
709
+ "all_tokens = tokenizer.convert_ids_to_tokens(input_dict[\"input_ids\"].numpy()[0])\n",
710
+ "answer = ' '.join(all_tokens[tf.math.argmax(start_logits, 1)[0] : tf.math.argmax(end_logits, 1)[0]+1])\n",
711
+ "print(question, answer.capitalize())"
712
+ ]
713
+ },
714
+ {
715
+ "cell_type": "markdown",
716
+ "metadata": {
717
+ "id": "f07OtnCpuKFa"
718
+ },
719
+ "source": [
720
+ "Congratulations! You just implemented your first QA model in TensorFlow. "
721
+ ]
722
+ },
723
+ {
724
+ "cell_type": "markdown",
725
+ "metadata": {
726
+ "id": "9UaM5pY9u8EW"
727
+ },
728
+ "source": [
729
+ "<a name='2-1'></a>\n",
730
+ "## 2.2 PyTorch implementation\n",
731
+ "\n",
732
+ "[PyTorch](https://pytorch.org/) is an open source machine learning framework developed by Facebook's AI Research lab that can be used for computer vision and natural language processing. As you can imagine, it is quite compatible with the bAbI dataset."
733
+ ]
734
+ },
735
+ {
736
+ "cell_type": "markdown",
737
+ "metadata": {
738
+ "id": "nD9akXoXxMjd"
739
+ },
740
+ "source": [
741
+ "#### Train and test dataset\n",
742
+ "\n",
743
+ "Go ahead and try creating a train and test dataset by importing PyTorch."
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": null,
749
+ "metadata": {
750
+ "id": "JxMYWSG173ch"
751
+ },
752
+ "outputs": [],
753
+ "source": [
754
+ "from torch.utils.data import DataLoader\n",
755
+ "\n",
756
+ "columns_to_return = ['input_ids','attention_mask', 'start_positions', 'end_positions']\n",
757
+ "train_ds.set_format(type='pt', columns=columns_to_return)\n",
758
+ "test_ds.set_format(type='pt', columns=columns_to_return)"
759
+ ]
760
+ },
761
+ {
762
+ "cell_type": "markdown",
763
+ "metadata": {
764
+ "id": "OeuzZKlPHAAQ"
765
+ },
766
+ "source": [
767
+ "For the accuracy metrics for the PyTorch implementation, you will change things up a bit and use the [F1 score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) for start and end indicies over the entire test dataset as the loss functions. "
768
+ ]
769
+ },
770
+ {
771
+ "cell_type": "code",
772
+ "execution_count": null,
773
+ "metadata": {
774
+ "id": "aD9tDpZfJsIB"
775
+ },
776
+ "outputs": [],
777
+ "source": [
778
+ "from sklearn.metrics import f1_score\n",
779
+ "\n",
780
+ "def compute_metrics(pred):\n",
781
+ " start_labels = pred.label_ids[0]\n",
782
+ " start_preds = pred.predictions[0].argmax(-1)\n",
783
+ " end_labels = pred.label_ids[1]\n",
784
+ " end_preds = pred.predictions[1].argmax(-1)\n",
785
+ " \n",
786
+ " f1_start = f1_score(start_labels, start_preds, average='macro')\n",
787
+ " f1_end = f1_score(end_labels, end_preds, average='macro')\n",
788
+ " \n",
789
+ " return {\n",
790
+ " 'f1_start': f1_start,\n",
791
+ " 'f1_end': f1_end,\n",
792
+ " }"
793
+ ]
794
+ },
795
+ {
796
+ "cell_type": "markdown",
797
+ "metadata": {
798
+ "id": "laX5cYQRHMXb"
799
+ },
800
+ "source": [
801
+ "#### Training\n",
802
+ "\n",
803
+ "Now it is time to load a pre-trained model. \n",
804
+ "\n",
805
+ "**Note:** You will be using the DistilBERT instead of TFDistilBERT for a PyTorch implementation."
806
+ ]
807
+ },
808
+ {
809
+ "cell_type": "code",
810
+ "execution_count": null,
811
+ "metadata": {},
812
+ "outputs": [],
813
+ "source": [
814
+ "del model # We delete the tensorflow model to avoid memory issues"
815
+ ]
816
+ },
817
+ {
818
+ "cell_type": "code",
819
+ "execution_count": null,
820
+ "metadata": {
821
+ "colab": {
822
+ "base_uri": "https://localhost:8080/"
823
+ },
824
+ "id": "YXFCsNcY79jx",
825
+ "outputId": "09af112f-e1e9-4a47-c988-37ee2a068df2"
826
+ },
827
+ "outputs": [],
828
+ "source": [
829
+ "from transformers import DistilBertForQuestionAnswering\n",
830
+ "\n",
831
+ "pytorch_model = DistilBertForQuestionAnswering.from_pretrained(\"model/pytorch\")"
832
+ ]
833
+ },
834
+ {
835
+ "cell_type": "markdown",
836
+ "metadata": {
837
+ "id": "xCUdMmCxHP6_"
838
+ },
839
+ "source": [
840
+ "Instead of a custom training loop, you will use the [🤗 Trainer](https://huggingface.co/transformers/main_classes/trainer.html), which contains a basic training loop and is fairly easy to implement in PyTorch."
841
+ ]
842
+ },
843
+ {
844
+ "cell_type": "code",
845
+ "execution_count": null,
846
+ "metadata": {
847
+ "colab": {
848
+ "base_uri": "https://localhost:8080/",
849
+ "height": 329
850
+ },
851
+ "id": "1htmS3TV-2Bk",
852
+ "outputId": "cc21bfbb-da09-47f9-ee16-7db0096d35e7"
853
+ },
854
+ "outputs": [],
855
+ "source": [
856
+ "from transformers import Trainer, TrainingArguments\n",
857
+ "\n",
858
+ "training_args = TrainingArguments(\n",
859
+ " output_dir='results', # output directory\n",
860
+ " overwrite_output_dir=True,\n",
861
+ " num_train_epochs=3, # total number of training epochs\n",
862
+ " per_device_train_batch_size=8, # batch size per device during training\n",
863
+ " per_device_eval_batch_size=8, # batch size for evaluation\n",
864
+ " warmup_steps=20, # number of warmup steps for learning rate scheduler\n",
865
+ " weight_decay=0.01, # strength of weight decay\n",
866
+ " logging_dir=None, # directory for storing logs\n",
867
+ " logging_steps=50\n",
868
+ ")\n",
869
+ "\n",
870
+ "trainer = Trainer(\n",
871
+ " model=pytorch_model, # the instantiated 🤗 Transformers model to be trained\n",
872
+ " args=training_args, # training arguments, defined above\n",
873
+ " train_dataset=train_ds, # training dataset\n",
874
+ " eval_dataset=test_ds,\n",
875
+ " compute_metrics=compute_metrics # evaluation dataset\n",
876
+ ")\n",
877
+ "\n",
878
+ "trainer.train()"
879
+ ]
880
+ },
881
+ {
882
+ "cell_type": "code",
883
+ "execution_count": null,
884
+ "metadata": {
885
+ "colab": {
886
+ "base_uri": "https://localhost:8080/",
887
+ "height": 207
888
+ },
889
+ "id": "lDzbm7vzAiPJ",
890
+ "outputId": "7cd62f51-a04b-4583-bc0e-e459813d3103"
891
+ },
892
+ "outputs": [],
893
+ "source": [
894
+ "trainer.evaluate(test_ds)"
895
+ ]
896
+ },
897
+ {
898
+ "cell_type": "markdown",
899
+ "metadata": {
900
+ "id": "QAgrcs2pHvVu"
901
+ },
902
+ "source": [
903
+ "Now it is time to ask your PyTorch model a question! \n",
904
+ "* Before testing your model with a question, you can tell PyTorch to send your model and inputs to the GPU if your machine has one, or the CPU if it does not. \n",
905
+ "* You can then proceed to tokenize your input and create PyTorch tensors and send them to your device. \n",
906
+ "* The rest of the pipeline is relatively similar to the one you implemented for TensorFlow. \n"
907
+ ]
908
+ },
909
+ {
910
+ "cell_type": "code",
911
+ "execution_count": null,
912
+ "metadata": {
913
+ "colab": {
914
+ "base_uri": "https://localhost:8080/"
915
+ },
916
+ "id": "yfBe9AFABqUr",
917
+ "outputId": "b5ca6039-8ce2-4e75-9161-1c96a0f39425"
918
+ },
919
+ "outputs": [],
920
+ "source": [
921
+ "import torch\n",
922
+ "\n",
923
+ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
924
+ "\n",
925
+ "pytorch_model.to(device)\n",
926
+ "\n",
927
+ "question, text = 'What is east of the hallway?','The kitchen is east of the hallway. The garden is south of the bedroom.'\n",
928
+ "\n",
929
+ "input_dict = tokenizer(text, question, return_tensors='pt')\n",
930
+ "\n",
931
+ "input_ids = input_dict['input_ids'].to(device)\n",
932
+ "attention_mask = input_dict['attention_mask'].to(device)\n",
933
+ "\n",
934
+ "outputs = pytorch_model(input_ids, attention_mask=attention_mask)\n",
935
+ "\n",
936
+ "start_logits = outputs[0]\n",
937
+ "end_logits = outputs[1]\n",
938
+ "\n",
939
+ "all_tokens = tokenizer.convert_ids_to_tokens(input_dict[\"input_ids\"].numpy()[0])\n",
940
+ "answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])\n",
941
+ "\n",
942
+ "print(question, answer.capitalize())"
943
+ ]
944
+ },
945
+ {
946
+ "cell_type": "markdown",
947
+ "metadata": {
948
+ "id": "eGzuHkMZ4q9I"
949
+ },
950
+ "source": [
951
+ "### Congratulations!\n",
952
+ " \n",
953
+ "You've completed this notebook, and can now implement Transformer models for QA tasks!\n",
954
+ "\n",
955
+ "You are now able to:\n",
956
+ "* Perform extractive Question Answering \n",
957
+ "* Fine-tune a pre-trained transformer model to a custom dataset\n",
958
+ "* Implement a QA model in TensorFlow and PyTorch"
959
+ ]
960
+ },
961
+ {
962
+ "cell_type": "markdown",
963
+ "metadata": {
964
+ "id": "G8tAV-584vKE"
965
+ },
966
+ "source": [
967
+ "<font color='blue'><b>What you should remember</b>:\n",
968
+ "- Transformer models are often trained by tokenizers that split words into subwords.\n",
969
+ " - Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word.\n",
970
+ "- PyTorch is a relatively light and easy to implement framework that can make rapid prototyping easier, while TensorFlow has advantages in scaling and is more widely used in production\n",
971
+ " - `tf.GradientTape` allows you to build custom training loops in TensorFlow\n",
972
+ " - The `Trainer` API in PyTorch gives you a basic training loop that is compatible with 🤗 models and datasets"
973
+ ]
974
+ },
975
+ {
976
+ "cell_type": "code",
977
+ "execution_count": null,
978
+ "metadata": {},
979
+ "outputs": [],
980
+ "source": [
981
+ "%%javascript\n",
982
+ "let element = document.getElementById('submit-notebook-button-group');\n",
983
+ "if (!element) {\n",
984
+ " window._save_and_close = function(){\n",
985
+ " IPython.notebook.save_checkpoint();\n",
986
+ " IPython.notebook.session.delete();\n",
987
+ " window.onbeforeunload = null\n",
988
+ " setTimeout(function() {window.close();}, 1000)\n",
989
+ " }\n",
990
+ " let header = document.getElementById('maintoolbar-container');\n",
991
+ " element = document.createElement(\"div\");\n",
992
+ " element.setAttribute(\"class\", \"btn-group\");\n",
993
+ " element.setAttribute(\"id\", \"submit-notebook-button-group\");\n",
994
+ " element.setAttribute(\"align\", \"right\");\n",
995
+ " element.setAttribute(\"style\", \"float:right\")\n",
996
+ " element.innerHTML = '<button class=\"btn btn-default\" title=\"Save and close this notebook.\" style=\"background-color:rgb(42, 115, 204); color:white; padding:4px 8px\" onclick=window._save_and_close()>Save and close</button>'\n",
997
+ " header.appendChild(element); \n",
998
+ "} "
999
+ ]
1000
+ },
1001
+ {
1002
+ "cell_type": "code",
1003
+ "execution_count": null,
1004
+ "metadata": {},
1005
+ "outputs": [],
1006
+ "source": []
1007
+ }
1008
+ ],
1009
+ "metadata": {
1010
+ "accelerator": "GPU",
1011
+ "colab": {
1012
+ "collapsed_sections": [],
1013
+ "name": "QA-dataset.ipynb",
1014
+ "provenance": []
1015
+ },
1016
+ "kernelspec": {
1017
+ "display_name": "Python 3",
1018
+ "language": "python",
1019
+ "name": "python3"
1020
+ },
1021
+ "language_info": {
1022
+ "codemirror_mode": {
1023
+ "name": "ipython",
1024
+ "version": 3
1025
+ },
1026
+ "file_extension": ".py",
1027
+ "mimetype": "text/x-python",
1028
+ "name": "python",
1029
+ "nbconvert_exporter": "python",
1030
+ "pygments_lexer": "ipython3",
1031
+ "version": "3.8.10"
1032
+ },
1033
+ "widgets": {
1034
+ "application/vnd.jupyter.widget-state+json": {
1035
+ "013f041c3e0b4e35bf2432fc345cb7bf": {
1036
+ "model_module": "@jupyter-widgets/controls",
1037
+ "model_name": "FloatProgressModel",
1038
+ "state": {
1039
+ "_dom_classes": [],
1040
+ "_model_module": "@jupyter-widgets/controls",
1041
+ "_model_module_version": "1.5.0",
1042
+ "_model_name": "FloatProgressModel",
1043
+ "_view_count": null,
1044
+ "_view_module": "@jupyter-widgets/controls",
1045
+ "_view_module_version": "1.5.0",
1046
+ "_view_name": "ProgressView",
1047
+ "bar_style": "success",
1048
+ "description": "100%",
1049
+ "description_tooltip": null,
1050
+ "layout": "IPY_MODEL_1e6c02317171453cbd3d4d665879b0d4",
1051
+ "max": 1000,
1052
+ "min": 0,
1053
+ "orientation": "horizontal",
1054
+ "style": "IPY_MODEL_f0e34f2bf626434fa73f0def26b3d1a5",
1055
+ "value": 1000
1056
+ }
1057
+ },
1058
+ "07aaa9b79a744856b19d723370d6e588": {
1059
+ "model_module": "@jupyter-widgets/controls",
1060
+ "model_name": "HBoxModel",
1061
+ "state": {
1062
+ "_dom_classes": [],
1063
+ "_model_module": "@jupyter-widgets/controls",
1064
+ "_model_module_version": "1.5.0",
1065
+ "_model_name": "HBoxModel",
1066
+ "_view_count": null,
1067
+ "_view_module": "@jupyter-widgets/controls",
1068
+ "_view_module_version": "1.5.0",
1069
+ "_view_name": "HBoxView",
1070
+ "box_style": "",
1071
+ "children": [
1072
+ "IPY_MODEL_b39b85d8cb05418aa92e8476ad02f755",
1073
+ "IPY_MODEL_0a8534ac52af4d48ad82b66463ad08c3"
1074
+ ],
1075
+ "layout": "IPY_MODEL_afedd2328cf141f78775e4cfa7758267"
1076
+ }
1077
+ },
1078
+ "0a8534ac52af4d48ad82b66463ad08c3": {
1079
+ "model_module": "@jupyter-widgets/controls",
1080
+ "model_name": "HTMLModel",
1081
+ "state": {
1082
+ "_dom_classes": [],
1083
+ "_model_module": "@jupyter-widgets/controls",
1084
+ "_model_module_version": "1.5.0",
1085
+ "_model_name": "HTMLModel",
1086
+ "_view_count": null,
1087
+ "_view_module": "@jupyter-widgets/controls",
1088
+ "_view_module_version": "1.5.0",
1089
+ "_view_name": "HTMLView",
1090
+ "description": "",
1091
+ "description_tooltip": null,
1092
+ "layout": "IPY_MODEL_8cfbd3b14b23417993270f851a2d8ff9",
1093
+ "placeholder": "​",
1094
+ "style": "IPY_MODEL_31fc08a1e7e04f6b9b3ea400ccfaea75",
1095
+ "value": " 1000/1000 [01:40&lt;00:00, 9.90ex/s]"
1096
+ }
1097
+ },
1098
+ "1e6c02317171453cbd3d4d665879b0d4": {
1099
+ "model_module": "@jupyter-widgets/base",
1100
+ "model_name": "LayoutModel",
1101
+ "state": {
1102
+ "_model_module": "@jupyter-widgets/base",
1103
+ "_model_module_version": "1.2.0",
1104
+ "_model_name": "LayoutModel",
1105
+ "_view_count": null,
1106
+ "_view_module": "@jupyter-widgets/base",
1107
+ "_view_module_version": "1.2.0",
1108
+ "_view_name": "LayoutView",
1109
+ "align_content": null,
1110
+ "align_items": null,
1111
+ "align_self": null,
1112
+ "border": null,
1113
+ "bottom": null,
1114
+ "display": null,
1115
+ "flex": null,
1116
+ "flex_flow": null,
1117
+ "grid_area": null,
1118
+ "grid_auto_columns": null,
1119
+ "grid_auto_flow": null,
1120
+ "grid_auto_rows": null,
1121
+ "grid_column": null,
1122
+ "grid_gap": null,
1123
+ "grid_row": null,
1124
+ "grid_template_areas": null,
1125
+ "grid_template_columns": null,
1126
+ "grid_template_rows": null,
1127
+ "height": null,
1128
+ "justify_content": null,
1129
+ "justify_items": null,
1130
+ "left": null,
1131
+ "margin": null,
1132
+ "max_height": null,
1133
+ "max_width": null,
1134
+ "min_height": null,
1135
+ "min_width": null,
1136
+ "object_fit": null,
1137
+ "object_position": null,
1138
+ "order": null,
1139
+ "overflow": null,
1140
+ "overflow_x": null,
1141
+ "overflow_y": null,
1142
+ "padding": null,
1143
+ "right": null,
1144
+ "top": null,
1145
+ "visibility": null,
1146
+ "width": null
1147
+ }
1148
+ },
1149
+ "1f2773e3e80c4dd8b6b26e171bf33bc7": {
1150
+ "model_module": "@jupyter-widgets/base",
1151
+ "model_name": "LayoutModel",
1152
+ "state": {
1153
+ "_model_module": "@jupyter-widgets/base",
1154
+ "_model_module_version": "1.2.0",
1155
+ "_model_name": "LayoutModel",
1156
+ "_view_count": null,
1157
+ "_view_module": "@jupyter-widgets/base",
1158
+ "_view_module_version": "1.2.0",
1159
+ "_view_name": "LayoutView",
1160
+ "align_content": null,
1161
+ "align_items": null,
1162
+ "align_self": null,
1163
+ "border": null,
1164
+ "bottom": null,
1165
+ "display": null,
1166
+ "flex": null,
1167
+ "flex_flow": null,
1168
+ "grid_area": null,
1169
+ "grid_auto_columns": null,
1170
+ "grid_auto_flow": null,
1171
+ "grid_auto_rows": null,
1172
+ "grid_column": null,
1173
+ "grid_gap": null,
1174
+ "grid_row": null,
1175
+ "grid_template_areas": null,
1176
+ "grid_template_columns": null,
1177
+ "grid_template_rows": null,
1178
+ "height": null,
1179
+ "justify_content": null,
1180
+ "justify_items": null,
1181
+ "left": null,
1182
+ "margin": null,
1183
+ "max_height": null,
1184
+ "max_width": null,
1185
+ "min_height": null,
1186
+ "min_width": null,
1187
+ "object_fit": null,
1188
+ "object_position": null,
1189
+ "order": null,
1190
+ "overflow": null,
1191
+ "overflow_x": null,
1192
+ "overflow_y": null,
1193
+ "padding": null,
1194
+ "right": null,
1195
+ "top": null,
1196
+ "visibility": null,
1197
+ "width": null
1198
+ }
1199
+ },
1200
+ "31fc08a1e7e04f6b9b3ea400ccfaea75": {
1201
+ "model_module": "@jupyter-widgets/controls",
1202
+ "model_name": "DescriptionStyleModel",
1203
+ "state": {
1204
+ "_model_module": "@jupyter-widgets/controls",
1205
+ "_model_module_version": "1.5.0",
1206
+ "_model_name": "DescriptionStyleModel",
1207
+ "_view_count": null,
1208
+ "_view_module": "@jupyter-widgets/base",
1209
+ "_view_module_version": "1.2.0",
1210
+ "_view_name": "StyleView",
1211
+ "description_width": ""
1212
+ }
1213
+ },
1214
+ "32a5c82c7a9845c09c11bb4e30c2f1aa": {
1215
+ "model_module": "@jupyter-widgets/base",
1216
+ "model_name": "LayoutModel",
1217
+ "state": {
1218
+ "_model_module": "@jupyter-widgets/base",
1219
+ "_model_module_version": "1.2.0",
1220
+ "_model_name": "LayoutModel",
1221
+ "_view_count": null,
1222
+ "_view_module": "@jupyter-widgets/base",
1223
+ "_view_module_version": "1.2.0",
1224
+ "_view_name": "LayoutView",
1225
+ "align_content": null,
1226
+ "align_items": null,
1227
+ "align_self": null,
1228
+ "border": null,
1229
+ "bottom": null,
1230
+ "display": null,
1231
+ "flex": null,
1232
+ "flex_flow": null,
1233
+ "grid_area": null,
1234
+ "grid_auto_columns": null,
1235
+ "grid_auto_flow": null,
1236
+ "grid_auto_rows": null,
1237
+ "grid_column": null,
1238
+ "grid_gap": null,
1239
+ "grid_row": null,
1240
+ "grid_template_areas": null,
1241
+ "grid_template_columns": null,
1242
+ "grid_template_rows": null,
1243
+ "height": null,
1244
+ "justify_content": null,
1245
+ "justify_items": null,
1246
+ "left": null,
1247
+ "margin": null,
1248
+ "max_height": null,
1249
+ "max_width": null,
1250
+ "min_height": null,
1251
+ "min_width": null,
1252
+ "object_fit": null,
1253
+ "object_position": null,
1254
+ "order": null,
1255
+ "overflow": null,
1256
+ "overflow_x": null,
1257
+ "overflow_y": null,
1258
+ "padding": null,
1259
+ "right": null,
1260
+ "top": null,
1261
+ "visibility": null,
1262
+ "width": null
1263
+ }
1264
+ },
1265
+ "364ba960eb474c9084cc71851594d345": {
1266
+ "model_module": "@jupyter-widgets/base",
1267
+ "model_name": "LayoutModel",
1268
+ "state": {
1269
+ "_model_module": "@jupyter-widgets/base",
1270
+ "_model_module_version": "1.2.0",
1271
+ "_model_name": "LayoutModel",
1272
+ "_view_count": null,
1273
+ "_view_module": "@jupyter-widgets/base",
1274
+ "_view_module_version": "1.2.0",
1275
+ "_view_name": "LayoutView",
1276
+ "align_content": null,
1277
+ "align_items": null,
1278
+ "align_self": null,
1279
+ "border": null,
1280
+ "bottom": null,
1281
+ "display": null,
1282
+ "flex": null,
1283
+ "flex_flow": null,
1284
+ "grid_area": null,
1285
+ "grid_auto_columns": null,
1286
+ "grid_auto_flow": null,
1287
+ "grid_auto_rows": null,
1288
+ "grid_column": null,
1289
+ "grid_gap": null,
1290
+ "grid_row": null,
1291
+ "grid_template_areas": null,
1292
+ "grid_template_columns": null,
1293
+ "grid_template_rows": null,
1294
+ "height": null,
1295
+ "justify_content": null,
1296
+ "justify_items": null,
1297
+ "left": null,
1298
+ "margin": null,
1299
+ "max_height": null,
1300
+ "max_width": null,
1301
+ "min_height": null,
1302
+ "min_width": null,
1303
+ "object_fit": null,
1304
+ "object_position": null,
1305
+ "order": null,
1306
+ "overflow": null,
1307
+ "overflow_x": null,
1308
+ "overflow_y": null,
1309
+ "padding": null,
1310
+ "right": null,
1311
+ "top": null,
1312
+ "visibility": null,
1313
+ "width": null
1314
+ }
1315
+ },
1316
+ "39029f730ae140c7902fca6dac5361ad": {
1317
+ "model_module": "@jupyter-widgets/base",
1318
+ "model_name": "LayoutModel",
1319
+ "state": {
1320
+ "_model_module": "@jupyter-widgets/base",
1321
+ "_model_module_version": "1.2.0",
1322
+ "_model_name": "LayoutModel",
1323
+ "_view_count": null,
1324
+ "_view_module": "@jupyter-widgets/base",
1325
+ "_view_module_version": "1.2.0",
1326
+ "_view_name": "LayoutView",
1327
+ "align_content": null,
1328
+ "align_items": null,
1329
+ "align_self": null,
1330
+ "border": null,
1331
+ "bottom": null,
1332
+ "display": null,
1333
+ "flex": null,
1334
+ "flex_flow": null,
1335
+ "grid_area": null,
1336
+ "grid_auto_columns": null,
1337
+ "grid_auto_flow": null,
1338
+ "grid_auto_rows": null,
1339
+ "grid_column": null,
1340
+ "grid_gap": null,
1341
+ "grid_row": null,
1342
+ "grid_template_areas": null,
1343
+ "grid_template_columns": null,
1344
+ "grid_template_rows": null,
1345
+ "height": null,
1346
+ "justify_content": null,
1347
+ "justify_items": null,
1348
+ "left": null,
1349
+ "margin": null,
1350
+ "max_height": null,
1351
+ "max_width": null,
1352
+ "min_height": null,
1353
+ "min_width": null,
1354
+ "object_fit": null,
1355
+ "object_position": null,
1356
+ "order": null,
1357
+ "overflow": null,
1358
+ "overflow_x": null,
1359
+ "overflow_y": null,
1360
+ "padding": null,
1361
+ "right": null,
1362
+ "top": null,
1363
+ "visibility": null,
1364
+ "width": null
1365
+ }
1366
+ },
1367
+ "393697738e724e9fad4d163de0a77840": {
1368
+ "model_module": "@jupyter-widgets/controls",
1369
+ "model_name": "ProgressStyleModel",
1370
+ "state": {
1371
+ "_model_module": "@jupyter-widgets/controls",
1372
+ "_model_module_version": "1.5.0",
1373
+ "_model_name": "ProgressStyleModel",
1374
+ "_view_count": null,
1375
+ "_view_module": "@jupyter-widgets/base",
1376
+ "_view_module_version": "1.2.0",
1377
+ "_view_name": "StyleView",
1378
+ "bar_color": null,
1379
+ "description_width": "initial"
1380
+ }
1381
+ },
1382
+ "3abb36da57c841838867c56e2a3a325b": {
1383
+ "model_module": "@jupyter-widgets/controls",
1384
+ "model_name": "ProgressStyleModel",
1385
+ "state": {
1386
+ "_model_module": "@jupyter-widgets/controls",
1387
+ "_model_module_version": "1.5.0",
1388
+ "_model_name": "ProgressStyleModel",
1389
+ "_view_count": null,
1390
+ "_view_module": "@jupyter-widgets/base",
1391
+ "_view_module_version": "1.2.0",
1392
+ "_view_name": "StyleView",
1393
+ "bar_color": null,
1394
+ "description_width": "initial"
1395
+ }
1396
+ },
1397
+ "3dab28395f3f475d8242e4d4d45ed059": {
1398
+ "model_module": "@jupyter-widgets/controls",
1399
+ "model_name": "HTMLModel",
1400
+ "state": {
1401
+ "_dom_classes": [],
1402
+ "_model_module": "@jupyter-widgets/controls",
1403
+ "_model_module_version": "1.5.0",
1404
+ "_model_name": "HTMLModel",
1405
+ "_view_count": null,
1406
+ "_view_module": "@jupyter-widgets/controls",
1407
+ "_view_module_version": "1.5.0",
1408
+ "_view_name": "HTMLView",
1409
+ "description": "",
1410
+ "description_tooltip": null,
1411
+ "layout": "IPY_MODEL_63b4ebafcead4c0784b5511219a6a198",
1412
+ "placeholder": "​",
1413
+ "style": "IPY_MODEL_58718e12f1b7459989ab5296846c4be6",
1414
+ "value": " 1000/1000 [00:10&lt;00:00, 97.35ex/s]"
1415
+ }
1416
+ },
1417
+ "44b7bea3e09d4e5684921c66dd4c7514": {
1418
+ "model_module": "@jupyter-widgets/controls",
1419
+ "model_name": "HBoxModel",
1420
+ "state": {
1421
+ "_dom_classes": [],
1422
+ "_model_module": "@jupyter-widgets/controls",
1423
+ "_model_module_version": "1.5.0",
1424
+ "_model_name": "HBoxModel",
1425
+ "_view_count": null,
1426
+ "_view_module": "@jupyter-widgets/controls",
1427
+ "_view_module_version": "1.5.0",
1428
+ "_view_name": "HBoxView",
1429
+ "box_style": "",
1430
+ "children": [
1431
+ "IPY_MODEL_7e1325e57bf9417e93d7ef180794ab3c",
1432
+ "IPY_MODEL_3dab28395f3f475d8242e4d4d45ed059"
1433
+ ],
1434
+ "layout": "IPY_MODEL_6af3ec5091d74bd1a95bf02a87dd240b"
1435
+ }
1436
+ },
1437
+ "4d9152a30e824931983a425ee6d607a6": {
1438
+ "model_module": "@jupyter-widgets/controls",
1439
+ "model_name": "HBoxModel",
1440
+ "state": {
1441
+ "_dom_classes": [],
1442
+ "_model_module": "@jupyter-widgets/controls",
1443
+ "_model_module_version": "1.5.0",
1444
+ "_model_name": "HBoxModel",
1445
+ "_view_count": null,
1446
+ "_view_module": "@jupyter-widgets/controls",
1447
+ "_view_module_version": "1.5.0",
1448
+ "_view_name": "HBoxView",
1449
+ "box_style": "",
1450
+ "children": [
1451
+ "IPY_MODEL_013f041c3e0b4e35bf2432fc345cb7bf",
1452
+ "IPY_MODEL_ef4e12f29f1e458f811a400faf21bdcc"
1453
+ ],
1454
+ "layout": "IPY_MODEL_1f2773e3e80c4dd8b6b26e171bf33bc7"
1455
+ }
1456
+ },
1457
+ "4f5b06c3a5e44c6cade5bf83634d9f69": {
1458
+ "model_module": "@jupyter-widgets/controls",
1459
+ "model_name": "ProgressStyleModel",
1460
+ "state": {
1461
+ "_model_module": "@jupyter-widgets/controls",
1462
+ "_model_module_version": "1.5.0",
1463
+ "_model_name": "ProgressStyleModel",
1464
+ "_view_count": null,
1465
+ "_view_module": "@jupyter-widgets/base",
1466
+ "_view_module_version": "1.2.0",
1467
+ "_view_name": "StyleView",
1468
+ "bar_color": null,
1469
+ "description_width": "initial"
1470
+ }
1471
+ },
1472
+ "568f11b4462f4b4e95f3ad5947bb275e": {
1473
+ "model_module": "@jupyter-widgets/controls",
1474
+ "model_name": "DescriptionStyleModel",
1475
+ "state": {
1476
+ "_model_module": "@jupyter-widgets/controls",
1477
+ "_model_module_version": "1.5.0",
1478
+ "_model_name": "DescriptionStyleModel",
1479
+ "_view_count": null,
1480
+ "_view_module": "@jupyter-widgets/base",
1481
+ "_view_module_version": "1.2.0",
1482
+ "_view_name": "StyleView",
1483
+ "description_width": ""
1484
+ }
1485
+ },
1486
+ "58718e12f1b7459989ab5296846c4be6": {
1487
+ "model_module": "@jupyter-widgets/controls",
1488
+ "model_name": "DescriptionStyleModel",
1489
+ "state": {
1490
+ "_model_module": "@jupyter-widgets/controls",
1491
+ "_model_module_version": "1.5.0",
1492
+ "_model_name": "DescriptionStyleModel",
1493
+ "_view_count": null,
1494
+ "_view_module": "@jupyter-widgets/base",
1495
+ "_view_module_version": "1.2.0",
1496
+ "_view_name": "StyleView",
1497
+ "description_width": ""
1498
+ }
1499
+ },
1500
+ "5b6dbe662ca24834b7678638e101e1ff": {
1501
+ "model_module": "@jupyter-widgets/controls",
1502
+ "model_name": "DescriptionStyleModel",
1503
+ "state": {
1504
+ "_model_module": "@jupyter-widgets/controls",
1505
+ "_model_module_version": "1.5.0",
1506
+ "_model_name": "DescriptionStyleModel",
1507
+ "_view_count": null,
1508
+ "_view_module": "@jupyter-widgets/base",
1509
+ "_view_module_version": "1.2.0",
1510
+ "_view_name": "StyleView",
1511
+ "description_width": ""
1512
+ }
1513
+ },
1514
+ "63b4ebafcead4c0784b5511219a6a198": {
1515
+ "model_module": "@jupyter-widgets/base",
1516
+ "model_name": "LayoutModel",
1517
+ "state": {
1518
+ "_model_module": "@jupyter-widgets/base",
1519
+ "_model_module_version": "1.2.0",
1520
+ "_model_name": "LayoutModel",
1521
+ "_view_count": null,
1522
+ "_view_module": "@jupyter-widgets/base",
1523
+ "_view_module_version": "1.2.0",
1524
+ "_view_name": "LayoutView",
1525
+ "align_content": null,
1526
+ "align_items": null,
1527
+ "align_self": null,
1528
+ "border": null,
1529
+ "bottom": null,
1530
+ "display": null,
1531
+ "flex": null,
1532
+ "flex_flow": null,
1533
+ "grid_area": null,
1534
+ "grid_auto_columns": null,
1535
+ "grid_auto_flow": null,
1536
+ "grid_auto_rows": null,
1537
+ "grid_column": null,
1538
+ "grid_gap": null,
1539
+ "grid_row": null,
1540
+ "grid_template_areas": null,
1541
+ "grid_template_columns": null,
1542
+ "grid_template_rows": null,
1543
+ "height": null,
1544
+ "justify_content": null,
1545
+ "justify_items": null,
1546
+ "left": null,
1547
+ "margin": null,
1548
+ "max_height": null,
1549
+ "max_width": null,
1550
+ "min_height": null,
1551
+ "min_width": null,
1552
+ "object_fit": null,
1553
+ "object_position": null,
1554
+ "order": null,
1555
+ "overflow": null,
1556
+ "overflow_x": null,
1557
+ "overflow_y": null,
1558
+ "padding": null,
1559
+ "right": null,
1560
+ "top": null,
1561
+ "visibility": null,
1562
+ "width": null
1563
+ }
1564
+ },
1565
+ "6af3ec5091d74bd1a95bf02a87dd240b": {
1566
+ "model_module": "@jupyter-widgets/base",
1567
+ "model_name": "LayoutModel",
1568
+ "state": {
1569
+ "_model_module": "@jupyter-widgets/base",
1570
+ "_model_module_version": "1.2.0",
1571
+ "_model_name": "LayoutModel",
1572
+ "_view_count": null,
1573
+ "_view_module": "@jupyter-widgets/base",
1574
+ "_view_module_version": "1.2.0",
1575
+ "_view_name": "LayoutView",
1576
+ "align_content": null,
1577
+ "align_items": null,
1578
+ "align_self": null,
1579
+ "border": null,
1580
+ "bottom": null,
1581
+ "display": null,
1582
+ "flex": null,
1583
+ "flex_flow": null,
1584
+ "grid_area": null,
1585
+ "grid_auto_columns": null,
1586
+ "grid_auto_flow": null,
1587
+ "grid_auto_rows": null,
1588
+ "grid_column": null,
1589
+ "grid_gap": null,
1590
+ "grid_row": null,
1591
+ "grid_template_areas": null,
1592
+ "grid_template_columns": null,
1593
+ "grid_template_rows": null,
1594
+ "height": null,
1595
+ "justify_content": null,
1596
+ "justify_items": null,
1597
+ "left": null,
1598
+ "margin": null,
1599
+ "max_height": null,
1600
+ "max_width": null,
1601
+ "min_height": null,
1602
+ "min_width": null,
1603
+ "object_fit": null,
1604
+ "object_position": null,
1605
+ "order": null,
1606
+ "overflow": null,
1607
+ "overflow_x": null,
1608
+ "overflow_y": null,
1609
+ "padding": null,
1610
+ "right": null,
1611
+ "top": null,
1612
+ "visibility": null,
1613
+ "width": null
1614
+ }
1615
+ },
1616
+ "723acefae33d448199fa5c1a9ec3f246": {
1617
+ "model_module": "@jupyter-widgets/controls",
1618
+ "model_name": "HBoxModel",
1619
+ "state": {
1620
+ "_dom_classes": [],
1621
+ "_model_module": "@jupyter-widgets/controls",
1622
+ "_model_module_version": "1.5.0",
1623
+ "_model_name": "HBoxModel",
1624
+ "_view_count": null,
1625
+ "_view_module": "@jupyter-widgets/controls",
1626
+ "_view_module_version": "1.5.0",
1627
+ "_view_name": "HBoxView",
1628
+ "box_style": "",
1629
+ "children": [
1630
+ "IPY_MODEL_77273c2e4b4e4e4c8ee4b6b344749518",
1631
+ "IPY_MODEL_f0ac3b9b8f664479940c6ee18fc2f13e"
1632
+ ],
1633
+ "layout": "IPY_MODEL_32a5c82c7a9845c09c11bb4e30c2f1aa"
1634
+ }
1635
+ },
1636
+ "77273c2e4b4e4e4c8ee4b6b344749518": {
1637
+ "model_module": "@jupyter-widgets/controls",
1638
+ "model_name": "FloatProgressModel",
1639
+ "state": {
1640
+ "_dom_classes": [],
1641
+ "_model_module": "@jupyter-widgets/controls",
1642
+ "_model_module_version": "1.5.0",
1643
+ "_model_name": "FloatProgressModel",
1644
+ "_view_count": null,
1645
+ "_view_module": "@jupyter-widgets/controls",
1646
+ "_view_module_version": "1.5.0",
1647
+ "_view_name": "ProgressView",
1648
+ "bar_style": "success",
1649
+ "description": "100%",
1650
+ "description_tooltip": null,
1651
+ "layout": "IPY_MODEL_e592db98c0c34c5e800f5d7b6d3c099e",
1652
+ "max": 1000,
1653
+ "min": 0,
1654
+ "orientation": "horizontal",
1655
+ "style": "IPY_MODEL_393697738e724e9fad4d163de0a77840",
1656
+ "value": 1000
1657
+ }
1658
+ },
1659
+ "7e1325e57bf9417e93d7ef180794ab3c": {
1660
+ "model_module": "@jupyter-widgets/controls",
1661
+ "model_name": "FloatProgressModel",
1662
+ "state": {
1663
+ "_dom_classes": [],
1664
+ "_model_module": "@jupyter-widgets/controls",
1665
+ "_model_module_version": "1.5.0",
1666
+ "_model_name": "FloatProgressModel",
1667
+ "_view_count": null,
1668
+ "_view_module": "@jupyter-widgets/controls",
1669
+ "_view_module_version": "1.5.0",
1670
+ "_view_name": "ProgressView",
1671
+ "bar_style": "success",
1672
+ "description": "100%",
1673
+ "description_tooltip": null,
1674
+ "layout": "IPY_MODEL_7fb1118c0b4443b6b6dbb5803e9ec2e8",
1675
+ "max": 1000,
1676
+ "min": 0,
1677
+ "orientation": "horizontal",
1678
+ "style": "IPY_MODEL_ca722dcd857c433c9058585e31a1673d",
1679
+ "value": 1000
1680
+ }
1681
+ },
1682
+ "7fb1118c0b4443b6b6dbb5803e9ec2e8": {
1683
+ "model_module": "@jupyter-widgets/base",
1684
+ "model_name": "LayoutModel",
1685
+ "state": {
1686
+ "_model_module": "@jupyter-widgets/base",
1687
+ "_model_module_version": "1.2.0",
1688
+ "_model_name": "LayoutModel",
1689
+ "_view_count": null,
1690
+ "_view_module": "@jupyter-widgets/base",
1691
+ "_view_module_version": "1.2.0",
1692
+ "_view_name": "LayoutView",
1693
+ "align_content": null,
1694
+ "align_items": null,
1695
+ "align_self": null,
1696
+ "border": null,
1697
+ "bottom": null,
1698
+ "display": null,
1699
+ "flex": null,
1700
+ "flex_flow": null,
1701
+ "grid_area": null,
1702
+ "grid_auto_columns": null,
1703
+ "grid_auto_flow": null,
1704
+ "grid_auto_rows": null,
1705
+ "grid_column": null,
1706
+ "grid_gap": null,
1707
+ "grid_row": null,
1708
+ "grid_template_areas": null,
1709
+ "grid_template_columns": null,
1710
+ "grid_template_rows": null,
1711
+ "height": null,
1712
+ "justify_content": null,
1713
+ "justify_items": null,
1714
+ "left": null,
1715
+ "margin": null,
1716
+ "max_height": null,
1717
+ "max_width": null,
1718
+ "min_height": null,
1719
+ "min_width": null,
1720
+ "object_fit": null,
1721
+ "object_position": null,
1722
+ "order": null,
1723
+ "overflow": null,
1724
+ "overflow_x": null,
1725
+ "overflow_y": null,
1726
+ "padding": null,
1727
+ "right": null,
1728
+ "top": null,
1729
+ "visibility": null,
1730
+ "width": null
1731
+ }
1732
+ },
1733
+ "7fefe9e1121a43558d773500aef8935c": {
1734
+ "model_module": "@jupyter-widgets/base",
1735
+ "model_name": "LayoutModel",
1736
+ "state": {
1737
+ "_model_module": "@jupyter-widgets/base",
1738
+ "_model_module_version": "1.2.0",
1739
+ "_model_name": "LayoutModel",
1740
+ "_view_count": null,
1741
+ "_view_module": "@jupyter-widgets/base",
1742
+ "_view_module_version": "1.2.0",
1743
+ "_view_name": "LayoutView",
1744
+ "align_content": null,
1745
+ "align_items": null,
1746
+ "align_self": null,
1747
+ "border": null,
1748
+ "bottom": null,
1749
+ "display": null,
1750
+ "flex": null,
1751
+ "flex_flow": null,
1752
+ "grid_area": null,
1753
+ "grid_auto_columns": null,
1754
+ "grid_auto_flow": null,
1755
+ "grid_auto_rows": null,
1756
+ "grid_column": null,
1757
+ "grid_gap": null,
1758
+ "grid_row": null,
1759
+ "grid_template_areas": null,
1760
+ "grid_template_columns": null,
1761
+ "grid_template_rows": null,
1762
+ "height": null,
1763
+ "justify_content": null,
1764
+ "justify_items": null,
1765
+ "left": null,
1766
+ "margin": null,
1767
+ "max_height": null,
1768
+ "max_width": null,
1769
+ "min_height": null,
1770
+ "min_width": null,
1771
+ "object_fit": null,
1772
+ "object_position": null,
1773
+ "order": null,
1774
+ "overflow": null,
1775
+ "overflow_x": null,
1776
+ "overflow_y": null,
1777
+ "padding": null,
1778
+ "right": null,
1779
+ "top": null,
1780
+ "visibility": null,
1781
+ "width": null
1782
+ }
1783
+ },
1784
+ "863c5ce96db84e3da162072c9a13c913": {
1785
+ "model_module": "@jupyter-widgets/base",
1786
+ "model_name": "LayoutModel",
1787
+ "state": {
1788
+ "_model_module": "@jupyter-widgets/base",
1789
+ "_model_module_version": "1.2.0",
1790
+ "_model_name": "LayoutModel",
1791
+ "_view_count": null,
1792
+ "_view_module": "@jupyter-widgets/base",
1793
+ "_view_module_version": "1.2.0",
1794
+ "_view_name": "LayoutView",
1795
+ "align_content": null,
1796
+ "align_items": null,
1797
+ "align_self": null,
1798
+ "border": null,
1799
+ "bottom": null,
1800
+ "display": null,
1801
+ "flex": null,
1802
+ "flex_flow": null,
1803
+ "grid_area": null,
1804
+ "grid_auto_columns": null,
1805
+ "grid_auto_flow": null,
1806
+ "grid_auto_rows": null,
1807
+ "grid_column": null,
1808
+ "grid_gap": null,
1809
+ "grid_row": null,
1810
+ "grid_template_areas": null,
1811
+ "grid_template_columns": null,
1812
+ "grid_template_rows": null,
1813
+ "height": null,
1814
+ "justify_content": null,
1815
+ "justify_items": null,
1816
+ "left": null,
1817
+ "margin": null,
1818
+ "max_height": null,
1819
+ "max_width": null,
1820
+ "min_height": null,
1821
+ "min_width": null,
1822
+ "object_fit": null,
1823
+ "object_position": null,
1824
+ "order": null,
1825
+ "overflow": null,
1826
+ "overflow_x": null,
1827
+ "overflow_y": null,
1828
+ "padding": null,
1829
+ "right": null,
1830
+ "top": null,
1831
+ "visibility": null,
1832
+ "width": null
1833
+ }
1834
+ },
1835
+ "8968319cdaca476fb15c11a388dce39a": {
1836
+ "model_module": "@jupyter-widgets/controls",
1837
+ "model_name": "HBoxModel",
1838
+ "state": {
1839
+ "_dom_classes": [],
1840
+ "_model_module": "@jupyter-widgets/controls",
1841
+ "_model_module_version": "1.5.0",
1842
+ "_model_name": "HBoxModel",
1843
+ "_view_count": null,
1844
+ "_view_module": "@jupyter-widgets/controls",
1845
+ "_view_module_version": "1.5.0",
1846
+ "_view_name": "HBoxView",
1847
+ "box_style": "",
1848
+ "children": [
1849
+ "IPY_MODEL_a725734893004a45b61194f649f5f602",
1850
+ "IPY_MODEL_c4a24656d67844e995d3b8e175c6c497"
1851
+ ],
1852
+ "layout": "IPY_MODEL_863c5ce96db84e3da162072c9a13c913"
1853
+ }
1854
+ },
1855
+ "89fdda6e6688476495ca297bfe010bf8": {
1856
+ "model_module": "@jupyter-widgets/controls",
1857
+ "model_name": "DescriptionStyleModel",
1858
+ "state": {
1859
+ "_model_module": "@jupyter-widgets/controls",
1860
+ "_model_module_version": "1.5.0",
1861
+ "_model_name": "DescriptionStyleModel",
1862
+ "_view_count": null,
1863
+ "_view_module": "@jupyter-widgets/base",
1864
+ "_view_module_version": "1.2.0",
1865
+ "_view_name": "StyleView",
1866
+ "description_width": ""
1867
+ }
1868
+ },
1869
+ "8b961844b5004905922531bd805a9d57": {
1870
+ "model_module": "@jupyter-widgets/base",
1871
+ "model_name": "LayoutModel",
1872
+ "state": {
1873
+ "_model_module": "@jupyter-widgets/base",
1874
+ "_model_module_version": "1.2.0",
1875
+ "_model_name": "LayoutModel",
1876
+ "_view_count": null,
1877
+ "_view_module": "@jupyter-widgets/base",
1878
+ "_view_module_version": "1.2.0",
1879
+ "_view_name": "LayoutView",
1880
+ "align_content": null,
1881
+ "align_items": null,
1882
+ "align_self": null,
1883
+ "border": null,
1884
+ "bottom": null,
1885
+ "display": null,
1886
+ "flex": null,
1887
+ "flex_flow": null,
1888
+ "grid_area": null,
1889
+ "grid_auto_columns": null,
1890
+ "grid_auto_flow": null,
1891
+ "grid_auto_rows": null,
1892
+ "grid_column": null,
1893
+ "grid_gap": null,
1894
+ "grid_row": null,
1895
+ "grid_template_areas": null,
1896
+ "grid_template_columns": null,
1897
+ "grid_template_rows": null,
1898
+ "height": null,
1899
+ "justify_content": null,
1900
+ "justify_items": null,
1901
+ "left": null,
1902
+ "margin": null,
1903
+ "max_height": null,
1904
+ "max_width": null,
1905
+ "min_height": null,
1906
+ "min_width": null,
1907
+ "object_fit": null,
1908
+ "object_position": null,
1909
+ "order": null,
1910
+ "overflow": null,
1911
+ "overflow_x": null,
1912
+ "overflow_y": null,
1913
+ "padding": null,
1914
+ "right": null,
1915
+ "top": null,
1916
+ "visibility": null,
1917
+ "width": null
1918
+ }
1919
+ },
1920
+ "8cfbd3b14b23417993270f851a2d8ff9": {
1921
+ "model_module": "@jupyter-widgets/base",
1922
+ "model_name": "LayoutModel",
1923
+ "state": {
1924
+ "_model_module": "@jupyter-widgets/base",
1925
+ "_model_module_version": "1.2.0",
1926
+ "_model_name": "LayoutModel",
1927
+ "_view_count": null,
1928
+ "_view_module": "@jupyter-widgets/base",
1929
+ "_view_module_version": "1.2.0",
1930
+ "_view_name": "LayoutView",
1931
+ "align_content": null,
1932
+ "align_items": null,
1933
+ "align_self": null,
1934
+ "border": null,
1935
+ "bottom": null,
1936
+ "display": null,
1937
+ "flex": null,
1938
+ "flex_flow": null,
1939
+ "grid_area": null,
1940
+ "grid_auto_columns": null,
1941
+ "grid_auto_flow": null,
1942
+ "grid_auto_rows": null,
1943
+ "grid_column": null,
1944
+ "grid_gap": null,
1945
+ "grid_row": null,
1946
+ "grid_template_areas": null,
1947
+ "grid_template_columns": null,
1948
+ "grid_template_rows": null,
1949
+ "height": null,
1950
+ "justify_content": null,
1951
+ "justify_items": null,
1952
+ "left": null,
1953
+ "margin": null,
1954
+ "max_height": null,
1955
+ "max_width": null,
1956
+ "min_height": null,
1957
+ "min_width": null,
1958
+ "object_fit": null,
1959
+ "object_position": null,
1960
+ "order": null,
1961
+ "overflow": null,
1962
+ "overflow_x": null,
1963
+ "overflow_y": null,
1964
+ "padding": null,
1965
+ "right": null,
1966
+ "top": null,
1967
+ "visibility": null,
1968
+ "width": null
1969
+ }
1970
+ },
1971
+ "929946fdfaa04cf59d3b31cf92fc08d1": {
1972
+ "model_module": "@jupyter-widgets/controls",
1973
+ "model_name": "HTMLModel",
1974
+ "state": {
1975
+ "_dom_classes": [],
1976
+ "_model_module": "@jupyter-widgets/controls",
1977
+ "_model_module_version": "1.5.0",
1978
+ "_model_name": "HTMLModel",
1979
+ "_view_count": null,
1980
+ "_view_module": "@jupyter-widgets/controls",
1981
+ "_view_module_version": "1.5.0",
1982
+ "_view_name": "HTMLView",
1983
+ "description": "",
1984
+ "description_tooltip": null,
1985
+ "layout": "IPY_MODEL_cda72c45821a4eb89f1a3ab5510b26d3",
1986
+ "placeholder": "​",
1987
+ "style": "IPY_MODEL_89fdda6e6688476495ca297bfe010bf8",
1988
+ "value": " 1000/1000 [00:08&lt;00:00, 123.32ex/s]"
1989
+ }
1990
+ },
1991
+ "a725734893004a45b61194f649f5f602": {
1992
+ "model_module": "@jupyter-widgets/controls",
1993
+ "model_name": "FloatProgressModel",
1994
+ "state": {
1995
+ "_dom_classes": [],
1996
+ "_model_module": "@jupyter-widgets/controls",
1997
+ "_model_module_version": "1.5.0",
1998
+ "_model_name": "FloatProgressModel",
1999
+ "_view_count": null,
2000
+ "_view_module": "@jupyter-widgets/controls",
2001
+ "_view_module_version": "1.5.0",
2002
+ "_view_name": "ProgressView",
2003
+ "bar_style": "success",
2004
+ "description": "100%",
2005
+ "description_tooltip": null,
2006
+ "layout": "IPY_MODEL_afc33fa78b5d440192c435bfca6f7914",
2007
+ "max": 1000,
2008
+ "min": 0,
2009
+ "orientation": "horizontal",
2010
+ "style": "IPY_MODEL_4f5b06c3a5e44c6cade5bf83634d9f69",
2011
+ "value": 1000
2012
+ }
2013
+ },
2014
+ "aa5c0d374889482697fc0f7ce9c81afe": {
2015
+ "model_module": "@jupyter-widgets/controls",
2016
+ "model_name": "ProgressStyleModel",
2017
+ "state": {
2018
+ "_model_module": "@jupyter-widgets/controls",
2019
+ "_model_module_version": "1.5.0",
2020
+ "_model_name": "ProgressStyleModel",
2021
+ "_view_count": null,
2022
+ "_view_module": "@jupyter-widgets/base",
2023
+ "_view_module_version": "1.2.0",
2024
+ "_view_name": "StyleView",
2025
+ "bar_color": null,
2026
+ "description_width": "initial"
2027
+ }
2028
+ },
2029
+ "afc33fa78b5d440192c435bfca6f7914": {
2030
+ "model_module": "@jupyter-widgets/base",
2031
+ "model_name": "LayoutModel",
2032
+ "state": {
2033
+ "_model_module": "@jupyter-widgets/base",
2034
+ "_model_module_version": "1.2.0",
2035
+ "_model_name": "LayoutModel",
2036
+ "_view_count": null,
2037
+ "_view_module": "@jupyter-widgets/base",
2038
+ "_view_module_version": "1.2.0",
2039
+ "_view_name": "LayoutView",
2040
+ "align_content": null,
2041
+ "align_items": null,
2042
+ "align_self": null,
2043
+ "border": null,
2044
+ "bottom": null,
2045
+ "display": null,
2046
+ "flex": null,
2047
+ "flex_flow": null,
2048
+ "grid_area": null,
2049
+ "grid_auto_columns": null,
2050
+ "grid_auto_flow": null,
2051
+ "grid_auto_rows": null,
2052
+ "grid_column": null,
2053
+ "grid_gap": null,
2054
+ "grid_row": null,
2055
+ "grid_template_areas": null,
2056
+ "grid_template_columns": null,
2057
+ "grid_template_rows": null,
2058
+ "height": null,
2059
+ "justify_content": null,
2060
+ "justify_items": null,
2061
+ "left": null,
2062
+ "margin": null,
2063
+ "max_height": null,
2064
+ "max_width": null,
2065
+ "min_height": null,
2066
+ "min_width": null,
2067
+ "object_fit": null,
2068
+ "object_position": null,
2069
+ "order": null,
2070
+ "overflow": null,
2071
+ "overflow_x": null,
2072
+ "overflow_y": null,
2073
+ "padding": null,
2074
+ "right": null,
2075
+ "top": null,
2076
+ "visibility": null,
2077
+ "width": null
2078
+ }
2079
+ },
2080
+ "afedd2328cf141f78775e4cfa7758267": {
2081
+ "model_module": "@jupyter-widgets/base",
2082
+ "model_name": "LayoutModel",
2083
+ "state": {
2084
+ "_model_module": "@jupyter-widgets/base",
2085
+ "_model_module_version": "1.2.0",
2086
+ "_model_name": "LayoutModel",
2087
+ "_view_count": null,
2088
+ "_view_module": "@jupyter-widgets/base",
2089
+ "_view_module_version": "1.2.0",
2090
+ "_view_name": "LayoutView",
2091
+ "align_content": null,
2092
+ "align_items": null,
2093
+ "align_self": null,
2094
+ "border": null,
2095
+ "bottom": null,
2096
+ "display": null,
2097
+ "flex": null,
2098
+ "flex_flow": null,
2099
+ "grid_area": null,
2100
+ "grid_auto_columns": null,
2101
+ "grid_auto_flow": null,
2102
+ "grid_auto_rows": null,
2103
+ "grid_column": null,
2104
+ "grid_gap": null,
2105
+ "grid_row": null,
2106
+ "grid_template_areas": null,
2107
+ "grid_template_columns": null,
2108
+ "grid_template_rows": null,
2109
+ "height": null,
2110
+ "justify_content": null,
2111
+ "justify_items": null,
2112
+ "left": null,
2113
+ "margin": null,
2114
+ "max_height": null,
2115
+ "max_width": null,
2116
+ "min_height": null,
2117
+ "min_width": null,
2118
+ "object_fit": null,
2119
+ "object_position": null,
2120
+ "order": null,
2121
+ "overflow": null,
2122
+ "overflow_x": null,
2123
+ "overflow_y": null,
2124
+ "padding": null,
2125
+ "right": null,
2126
+ "top": null,
2127
+ "visibility": null,
2128
+ "width": null
2129
+ }
2130
+ },
2131
+ "b39b85d8cb05418aa92e8476ad02f755": {
2132
+ "model_module": "@jupyter-widgets/controls",
2133
+ "model_name": "FloatProgressModel",
2134
+ "state": {
2135
+ "_dom_classes": [],
2136
+ "_model_module": "@jupyter-widgets/controls",
2137
+ "_model_module_version": "1.5.0",
2138
+ "_model_name": "FloatProgressModel",
2139
+ "_view_count": null,
2140
+ "_view_module": "@jupyter-widgets/controls",
2141
+ "_view_module_version": "1.5.0",
2142
+ "_view_name": "ProgressView",
2143
+ "bar_style": "success",
2144
+ "description": "100%",
2145
+ "description_tooltip": null,
2146
+ "layout": "IPY_MODEL_8b961844b5004905922531bd805a9d57",
2147
+ "max": 1000,
2148
+ "min": 0,
2149
+ "orientation": "horizontal",
2150
+ "style": "IPY_MODEL_3abb36da57c841838867c56e2a3a325b",
2151
+ "value": 1000
2152
+ }
2153
+ },
2154
+ "b4c6a18610734036a16a14a43174c52e": {
2155
+ "model_module": "@jupyter-widgets/base",
2156
+ "model_name": "LayoutModel",
2157
+ "state": {
2158
+ "_model_module": "@jupyter-widgets/base",
2159
+ "_model_module_version": "1.2.0",
2160
+ "_model_name": "LayoutModel",
2161
+ "_view_count": null,
2162
+ "_view_module": "@jupyter-widgets/base",
2163
+ "_view_module_version": "1.2.0",
2164
+ "_view_name": "LayoutView",
2165
+ "align_content": null,
2166
+ "align_items": null,
2167
+ "align_self": null,
2168
+ "border": null,
2169
+ "bottom": null,
2170
+ "display": null,
2171
+ "flex": null,
2172
+ "flex_flow": null,
2173
+ "grid_area": null,
2174
+ "grid_auto_columns": null,
2175
+ "grid_auto_flow": null,
2176
+ "grid_auto_rows": null,
2177
+ "grid_column": null,
2178
+ "grid_gap": null,
2179
+ "grid_row": null,
2180
+ "grid_template_areas": null,
2181
+ "grid_template_columns": null,
2182
+ "grid_template_rows": null,
2183
+ "height": null,
2184
+ "justify_content": null,
2185
+ "justify_items": null,
2186
+ "left": null,
2187
+ "margin": null,
2188
+ "max_height": null,
2189
+ "max_width": null,
2190
+ "min_height": null,
2191
+ "min_width": null,
2192
+ "object_fit": null,
2193
+ "object_position": null,
2194
+ "order": null,
2195
+ "overflow": null,
2196
+ "overflow_x": null,
2197
+ "overflow_y": null,
2198
+ "padding": null,
2199
+ "right": null,
2200
+ "top": null,
2201
+ "visibility": null,
2202
+ "width": null
2203
+ }
2204
+ },
2205
+ "c42644a4e6184a1cbdb2b453b5dbb7d6": {
2206
+ "model_module": "@jupyter-widgets/controls",
2207
+ "model_name": "HBoxModel",
2208
+ "state": {
2209
+ "_dom_classes": [],
2210
+ "_model_module": "@jupyter-widgets/controls",
2211
+ "_model_module_version": "1.5.0",
2212
+ "_model_name": "HBoxModel",
2213
+ "_view_count": null,
2214
+ "_view_module": "@jupyter-widgets/controls",
2215
+ "_view_module_version": "1.5.0",
2216
+ "_view_name": "HBoxView",
2217
+ "box_style": "",
2218
+ "children": [
2219
+ "IPY_MODEL_e8f1abd85f3e49f991d4c1312ffd416b",
2220
+ "IPY_MODEL_929946fdfaa04cf59d3b31cf92fc08d1"
2221
+ ],
2222
+ "layout": "IPY_MODEL_364ba960eb474c9084cc71851594d345"
2223
+ }
2224
+ },
2225
+ "c4a24656d67844e995d3b8e175c6c497": {
2226
+ "model_module": "@jupyter-widgets/controls",
2227
+ "model_name": "HTMLModel",
2228
+ "state": {
2229
+ "_dom_classes": [],
2230
+ "_model_module": "@jupyter-widgets/controls",
2231
+ "_model_module_version": "1.5.0",
2232
+ "_model_name": "HTMLModel",
2233
+ "_view_count": null,
2234
+ "_view_module": "@jupyter-widgets/controls",
2235
+ "_view_module_version": "1.5.0",
2236
+ "_view_name": "HTMLView",
2237
+ "description": "",
2238
+ "description_tooltip": null,
2239
+ "layout": "IPY_MODEL_b4c6a18610734036a16a14a43174c52e",
2240
+ "placeholder": "​",
2241
+ "style": "IPY_MODEL_f37bd346f8614fec92d6c5b5e9b66d2f",
2242
+ "value": " 1000/1000 [01:41&lt;00:00, 9.86ex/s]"
2243
+ }
2244
+ },
2245
+ "ca722dcd857c433c9058585e31a1673d": {
2246
+ "model_module": "@jupyter-widgets/controls",
2247
+ "model_name": "ProgressStyleModel",
2248
+ "state": {
2249
+ "_model_module": "@jupyter-widgets/controls",
2250
+ "_model_module_version": "1.5.0",
2251
+ "_model_name": "ProgressStyleModel",
2252
+ "_view_count": null,
2253
+ "_view_module": "@jupyter-widgets/base",
2254
+ "_view_module_version": "1.2.0",
2255
+ "_view_name": "StyleView",
2256
+ "bar_color": null,
2257
+ "description_width": "initial"
2258
+ }
2259
+ },
2260
+ "cda72c45821a4eb89f1a3ab5510b26d3": {
2261
+ "model_module": "@jupyter-widgets/base",
2262
+ "model_name": "LayoutModel",
2263
+ "state": {
2264
+ "_model_module": "@jupyter-widgets/base",
2265
+ "_model_module_version": "1.2.0",
2266
+ "_model_name": "LayoutModel",
2267
+ "_view_count": null,
2268
+ "_view_module": "@jupyter-widgets/base",
2269
+ "_view_module_version": "1.2.0",
2270
+ "_view_name": "LayoutView",
2271
+ "align_content": null,
2272
+ "align_items": null,
2273
+ "align_self": null,
2274
+ "border": null,
2275
+ "bottom": null,
2276
+ "display": null,
2277
+ "flex": null,
2278
+ "flex_flow": null,
2279
+ "grid_area": null,
2280
+ "grid_auto_columns": null,
2281
+ "grid_auto_flow": null,
2282
+ "grid_auto_rows": null,
2283
+ "grid_column": null,
2284
+ "grid_gap": null,
2285
+ "grid_row": null,
2286
+ "grid_template_areas": null,
2287
+ "grid_template_columns": null,
2288
+ "grid_template_rows": null,
2289
+ "height": null,
2290
+ "justify_content": null,
2291
+ "justify_items": null,
2292
+ "left": null,
2293
+ "margin": null,
2294
+ "max_height": null,
2295
+ "max_width": null,
2296
+ "min_height": null,
2297
+ "min_width": null,
2298
+ "object_fit": null,
2299
+ "object_position": null,
2300
+ "order": null,
2301
+ "overflow": null,
2302
+ "overflow_x": null,
2303
+ "overflow_y": null,
2304
+ "padding": null,
2305
+ "right": null,
2306
+ "top": null,
2307
+ "visibility": null,
2308
+ "width": null
2309
+ }
2310
+ },
2311
+ "e592db98c0c34c5e800f5d7b6d3c099e": {
2312
+ "model_module": "@jupyter-widgets/base",
2313
+ "model_name": "LayoutModel",
2314
+ "state": {
2315
+ "_model_module": "@jupyter-widgets/base",
2316
+ "_model_module_version": "1.2.0",
2317
+ "_model_name": "LayoutModel",
2318
+ "_view_count": null,
2319
+ "_view_module": "@jupyter-widgets/base",
2320
+ "_view_module_version": "1.2.0",
2321
+ "_view_name": "LayoutView",
2322
+ "align_content": null,
2323
+ "align_items": null,
2324
+ "align_self": null,
2325
+ "border": null,
2326
+ "bottom": null,
2327
+ "display": null,
2328
+ "flex": null,
2329
+ "flex_flow": null,
2330
+ "grid_area": null,
2331
+ "grid_auto_columns": null,
2332
+ "grid_auto_flow": null,
2333
+ "grid_auto_rows": null,
2334
+ "grid_column": null,
2335
+ "grid_gap": null,
2336
+ "grid_row": null,
2337
+ "grid_template_areas": null,
2338
+ "grid_template_columns": null,
2339
+ "grid_template_rows": null,
2340
+ "height": null,
2341
+ "justify_content": null,
2342
+ "justify_items": null,
2343
+ "left": null,
2344
+ "margin": null,
2345
+ "max_height": null,
2346
+ "max_width": null,
2347
+ "min_height": null,
2348
+ "min_width": null,
2349
+ "object_fit": null,
2350
+ "object_position": null,
2351
+ "order": null,
2352
+ "overflow": null,
2353
+ "overflow_x": null,
2354
+ "overflow_y": null,
2355
+ "padding": null,
2356
+ "right": null,
2357
+ "top": null,
2358
+ "visibility": null,
2359
+ "width": null
2360
+ }
2361
+ },
2362
+ "e8f1abd85f3e49f991d4c1312ffd416b": {
2363
+ "model_module": "@jupyter-widgets/controls",
2364
+ "model_name": "FloatProgressModel",
2365
+ "state": {
2366
+ "_dom_classes": [],
2367
+ "_model_module": "@jupyter-widgets/controls",
2368
+ "_model_module_version": "1.5.0",
2369
+ "_model_name": "FloatProgressModel",
2370
+ "_view_count": null,
2371
+ "_view_module": "@jupyter-widgets/controls",
2372
+ "_view_module_version": "1.5.0",
2373
+ "_view_name": "ProgressView",
2374
+ "bar_style": "success",
2375
+ "description": "100%",
2376
+ "description_tooltip": null,
2377
+ "layout": "IPY_MODEL_ff444b253e9a40e5bec755926d83740f",
2378
+ "max": 1000,
2379
+ "min": 0,
2380
+ "orientation": "horizontal",
2381
+ "style": "IPY_MODEL_aa5c0d374889482697fc0f7ce9c81afe",
2382
+ "value": 1000
2383
+ }
2384
+ },
2385
+ "ef4e12f29f1e458f811a400faf21bdcc": {
2386
+ "model_module": "@jupyter-widgets/controls",
2387
+ "model_name": "HTMLModel",
2388
+ "state": {
2389
+ "_dom_classes": [],
2390
+ "_model_module": "@jupyter-widgets/controls",
2391
+ "_model_module_version": "1.5.0",
2392
+ "_model_name": "HTMLModel",
2393
+ "_view_count": null,
2394
+ "_view_module": "@jupyter-widgets/controls",
2395
+ "_view_module_version": "1.5.0",
2396
+ "_view_name": "HTMLView",
2397
+ "description": "",
2398
+ "description_tooltip": null,
2399
+ "layout": "IPY_MODEL_39029f730ae140c7902fca6dac5361ad",
2400
+ "placeholder": "​",
2401
+ "style": "IPY_MODEL_5b6dbe662ca24834b7678638e101e1ff",
2402
+ "value": " 1000/1000 [01:25&lt;00:00, 11.68ex/s]"
2403
+ }
2404
+ },
2405
+ "f0ac3b9b8f664479940c6ee18fc2f13e": {
2406
+ "model_module": "@jupyter-widgets/controls",
2407
+ "model_name": "HTMLModel",
2408
+ "state": {
2409
+ "_dom_classes": [],
2410
+ "_model_module": "@jupyter-widgets/controls",
2411
+ "_model_module_version": "1.5.0",
2412
+ "_model_name": "HTMLModel",
2413
+ "_view_count": null,
2414
+ "_view_module": "@jupyter-widgets/controls",
2415
+ "_view_module_version": "1.5.0",
2416
+ "_view_name": "HTMLView",
2417
+ "description": "",
2418
+ "description_tooltip": null,
2419
+ "layout": "IPY_MODEL_7fefe9e1121a43558d773500aef8935c",
2420
+ "placeholder": "​",
2421
+ "style": "IPY_MODEL_568f11b4462f4b4e95f3ad5947bb275e",
2422
+ "value": " 1000/1000 [01:24&lt;00:00, 11.77ex/s]"
2423
+ }
2424
+ },
2425
+ "f0e34f2bf626434fa73f0def26b3d1a5": {
2426
+ "model_module": "@jupyter-widgets/controls",
2427
+ "model_name": "ProgressStyleModel",
2428
+ "state": {
2429
+ "_model_module": "@jupyter-widgets/controls",
2430
+ "_model_module_version": "1.5.0",
2431
+ "_model_name": "ProgressStyleModel",
2432
+ "_view_count": null,
2433
+ "_view_module": "@jupyter-widgets/base",
2434
+ "_view_module_version": "1.2.0",
2435
+ "_view_name": "StyleView",
2436
+ "bar_color": null,
2437
+ "description_width": "initial"
2438
+ }
2439
+ },
2440
+ "f37bd346f8614fec92d6c5b5e9b66d2f": {
2441
+ "model_module": "@jupyter-widgets/controls",
2442
+ "model_name": "DescriptionStyleModel",
2443
+ "state": {
2444
+ "_model_module": "@jupyter-widgets/controls",
2445
+ "_model_module_version": "1.5.0",
2446
+ "_model_name": "DescriptionStyleModel",
2447
+ "_view_count": null,
2448
+ "_view_module": "@jupyter-widgets/base",
2449
+ "_view_module_version": "1.2.0",
2450
+ "_view_name": "StyleView",
2451
+ "description_width": ""
2452
+ }
2453
+ },
2454
+ "ff444b253e9a40e5bec755926d83740f": {
2455
+ "model_module": "@jupyter-widgets/base",
2456
+ "model_name": "LayoutModel",
2457
+ "state": {
2458
+ "_model_module": "@jupyter-widgets/base",
2459
+ "_model_module_version": "1.2.0",
2460
+ "_model_name": "LayoutModel",
2461
+ "_view_count": null,
2462
+ "_view_module": "@jupyter-widgets/base",
2463
+ "_view_module_version": "1.2.0",
2464
+ "_view_name": "LayoutView",
2465
+ "align_content": null,
2466
+ "align_items": null,
2467
+ "align_self": null,
2468
+ "border": null,
2469
+ "bottom": null,
2470
+ "display": null,
2471
+ "flex": null,
2472
+ "flex_flow": null,
2473
+ "grid_area": null,
2474
+ "grid_auto_columns": null,
2475
+ "grid_auto_flow": null,
2476
+ "grid_auto_rows": null,
2477
+ "grid_column": null,
2478
+ "grid_gap": null,
2479
+ "grid_row": null,
2480
+ "grid_template_areas": null,
2481
+ "grid_template_columns": null,
2482
+ "grid_template_rows": null,
2483
+ "height": null,
2484
+ "justify_content": null,
2485
+ "justify_items": null,
2486
+ "left": null,
2487
+ "margin": null,
2488
+ "max_height": null,
2489
+ "max_width": null,
2490
+ "min_height": null,
2491
+ "min_width": null,
2492
+ "object_fit": null,
2493
+ "object_position": null,
2494
+ "order": null,
2495
+ "overflow": null,
2496
+ "overflow_x": null,
2497
+ "overflow_y": null,
2498
+ "padding": null,
2499
+ "right": null,
2500
+ "top": null,
2501
+ "visibility": null,
2502
+ "width": null
2503
+ }
2504
+ }
2505
+ }
2506
+ }
2507
+ },
2508
+ "nbformat": 4,
2509
+ "nbformat_minor": 1
2510
+ }
Transformer Mechanism/QA/tf/.Trash-0/files/W4A2.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2078c9a6640abf78f244c07b6e5863cfd8b3e9b3d563010e40353df03bc2abdb
3
+ size 448771063
Transformer Mechanism/QA/tf/.Trash-0/files/W4A3UGLQA.tar.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a356ecbc0ab59f15b6bb3708be0f1a5ad495b1db61be7b97781374b509f3c9d6
3
+ size 490112767
Transformer Mechanism/QA/tf/.Trash-0/info/QA_dataset.ipynb.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [Trash Info]
2
+ Path=W4A3_UGL/QA_dataset.ipynb
3
+ DeletionDate=2022-12-19T21:11:08
Transformer Mechanism/QA/tf/.Trash-0/info/W4A2.tar.gz.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c55e5daa95f2064559c98ace901b6fa7316e88069a8cd4ada77c73c7b53100
3
+ size 63
Transformer Mechanism/QA/tf/.Trash-0/info/W4A3UGLQA.tar.gz.trashinfo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25931e152b0a335d4129d856f91d75e8550688320c55ec4a0d76fd0000024e2
3
+ size 68
Transformer Mechanism/QA/tf/W4A3_UGL/.DS_Store ADDED
Binary file (6.15 kB). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/._.DS_Store ADDED
Binary file (120 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/._QA_dataset.ipynb ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/._data ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/._model ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/._tokenizer ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/.ipynb_checkpoints/._QA_dataset-checkpoint.ipynb ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/.ipynb_checkpoints/QA_dataset-checkpoint.ipynb ADDED
@@ -0,0 +1,2483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "TBjVwYpHJ7ra"
7
+ },
8
+ "source": [
9
+ "# Transformer Network Application: Question Answering\n",
10
+ "\n",
11
+ "Welcome to Week 4's third, and the last lab of the course! Congratulations on making it this far. In this notebook you'll explore another application of the transformer architecture that you built.\n",
12
+ "\n",
13
+ "**After this assignment you'll be able to**:\n",
14
+ "\n",
15
+ "* Perform extractive Question Answering \n",
16
+ "* Fine-tune a pre-trained transformer model to a custom dataset\n",
17
+ "* Implement a QA model in TensorFlow and PyTorch"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {
23
+ "id": "SoRb7ykXJ_C4"
24
+ },
25
+ "source": [
26
+ "## Table of Contents\n",
27
+ "\n",
28
+ "\n",
29
+ "- [1 - Extractive Question Answering](#1)\n",
30
+ " - [1.1 - Data Cleaning](#1-1)\n",
31
+ " - [1.2 - Tokenize and Align Labels with 🤗 Library](#1-2)\n",
32
+ "- [2 - Training](#2)\n",
33
+ " - [2.1 TensorFlow implementation](#2-1)\n",
34
+ " - [2.2 PyTorch implementation](#2-2)\n"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "markdown",
39
+ "metadata": {
40
+ "id": "C0k56ZVXLDbi"
41
+ },
42
+ "source": [
43
+ "<a name='1'></a>\n",
44
+ "## 1 - Extractive Question Answering\n",
45
+ "\n",
46
+ "Question answering (QA) is a task of natural language processing that aims to automatically answer questions. The goal of *extractive* QA is to identify the portion of the text that contains the answer to a question. For example, when tasked with answering the question 'When will Jane go to Africa?' given the text data 'Jane visits Africa in September', the question answering model will highlight 'September'.\n",
47
+ "\n",
48
+ "* You will use a variation of the Transformer model you built in the last assignment to answer questions about stories.\n",
49
+ "* You will implement extractive QA model in TensorFlow and in PyTorch.\n",
50
+ "\n",
51
+ "**Recommendation:**\n",
52
+ "* If you are interested, check out the [Course 4: Natural Language Processing with Attention Models](https://www.coursera.org/learn/attention-models-in-nlp/home/welcome) of our [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?=) where you can learn how to build Transformers and perform QA using the [Trax](https://trax.readthedocs.io/en/latest/) library. \n",
53
+ "\n",
54
+ "<a name='1-1'></a>\n",
55
+ "### 1.1 - Data preprocessing\n",
56
+ "\n",
57
+ "Run the following cell to load the [QA bAbI dataset](https://research.fb.com/downloads/babi/), which is one of the bAbI datasets generated by Facebook AI Research to advance natural language processing."
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "metadata": {
64
+ "colab": {
65
+ "base_uri": "https://localhost:8080/"
66
+ },
67
+ "id": "XxU0G_PYLSXJ",
68
+ "outputId": "44e7877f-5c33-45fc-ed83-3aa4920dcc40"
69
+ },
70
+ "outputs": [],
71
+ "source": [
72
+ "from datasets import load_from_disk\n",
73
+ "\n",
74
+ "# Load a dataset and print the first example in the training set\n",
75
+ "babi_dataset = load_from_disk('data/')\n",
76
+ "print(babi_dataset['train'][0])"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "markdown",
81
+ "metadata": {
82
+ "id": "XJwacC3bMhZM"
83
+ },
84
+ "source": [
85
+ "Take a look at the format of the data. For a given story, there are two sentences which serve as the context, and one question. Each of these phrases has an ID. There is also a supporting fact ID which refers to a sentence in the story that helps answer the question. For example, for the question 'What is east of the hallway?', the supporting fact 'The bedroom is east of the hallway' has the ID '2'. There is also the answer, 'bedroom' for the question."
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "metadata": {
92
+ "colab": {
93
+ "base_uri": "https://localhost:8080/"
94
+ },
95
+ "id": "aizPXfGlLZ1D",
96
+ "outputId": "0e1d47bc-9c1a-458a-983e-22f47f8184bd"
97
+ },
98
+ "outputs": [],
99
+ "source": [
100
+ "babi_dataset['train'][102]"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "markdown",
105
+ "metadata": {
106
+ "id": "ewtXZUPjMm2l"
107
+ },
108
+ "source": [
109
+ "Check and see if the entire dataset of stories has this format."
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "metadata": {
116
+ "id": "55BSWxwuM1hN"
117
+ },
118
+ "outputs": [],
119
+ "source": [
120
+ "type_set = set()\n",
121
+ "for story in babi_dataset['train']:\n",
122
+ " if str(story['story']['type'] )not in type_set:\n",
123
+ " type_set.add(str(story['story']['type'] ))"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": null,
129
+ "metadata": {
130
+ "colab": {
131
+ "base_uri": "https://localhost:8080/"
132
+ },
133
+ "id": "bdJ8VMF1UT7S",
134
+ "outputId": "2b959467-75e8-4e25-e7bb-481b657a2fce"
135
+ },
136
+ "outputs": [],
137
+ "source": [
138
+ "type_set"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "markdown",
143
+ "metadata": {
144
+ "id": "JsHx1tcyMq_k"
145
+ },
146
+ "source": [
147
+ "To make the data easier to work with, you will flatten the dataset to transform it from a dictionary structure to a table structure."
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": null,
153
+ "metadata": {
154
+ "id": "YxixFI-pVOK9"
155
+ },
156
+ "outputs": [],
157
+ "source": [
158
+ "flattened_babi = babi_dataset.flatten()"
159
+ ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "metadata": {
165
+ "colab": {
166
+ "base_uri": "https://localhost:8080/"
167
+ },
168
+ "id": "kXU43CqCdX98",
169
+ "outputId": "e968ff5e-0db0-4e9d-e1e9-e93f965b2582"
170
+ },
171
+ "outputs": [],
172
+ "source": [
173
+ "flattened_babi"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "metadata": {
180
+ "colab": {
181
+ "base_uri": "https://localhost:8080/"
182
+ },
183
+ "id": "OQw59MgT6Luh",
184
+ "outputId": "ea5eac53-027e-42d3-d19f-98ed7863de2b"
185
+ },
186
+ "outputs": [],
187
+ "source": [
188
+ "next(iter(flattened_babi['train']))"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "markdown",
193
+ "metadata": {
194
+ "id": "4vXfmhOPMvt1"
195
+ },
196
+ "source": [
197
+ "Now it is much easier to access the information you need! You can now easily extract the answer, question, and facts from the story, and also join the facts into a single entry under 'sentences'."
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "metadata": {
204
+ "id": "O5NcABwkdbrf"
205
+ },
206
+ "outputs": [],
207
+ "source": [
208
+ "def get_question_and_facts(story):\n",
209
+ " dic = {}\n",
210
+ " dic['question'] = story['story.text'][2]\n",
211
+ " dic['sentences'] = ' '.join([story['story.text'][0], story['story.text'][1]])\n",
212
+ " dic['answer'] = story['story.answer'][2]\n",
213
+ " return dic"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": null,
219
+ "metadata": {
220
+ "colab": {
221
+ "base_uri": "https://localhost:8080/",
222
+ "height": 115,
223
+ "referenced_widgets": [
224
+ "44b7bea3e09d4e5684921c66dd4c7514",
225
+ "6af3ec5091d74bd1a95bf02a87dd240b",
226
+ "7e1325e57bf9417e93d7ef180794ab3c",
227
+ "3dab28395f3f475d8242e4d4d45ed059",
228
+ "ca722dcd857c433c9058585e31a1673d",
229
+ "7fb1118c0b4443b6b6dbb5803e9ec2e8",
230
+ "58718e12f1b7459989ab5296846c4be6",
231
+ "63b4ebafcead4c0784b5511219a6a198",
232
+ "c42644a4e6184a1cbdb2b453b5dbb7d6",
233
+ "364ba960eb474c9084cc71851594d345",
234
+ "e8f1abd85f3e49f991d4c1312ffd416b",
235
+ "929946fdfaa04cf59d3b31cf92fc08d1",
236
+ "aa5c0d374889482697fc0f7ce9c81afe",
237
+ "ff444b253e9a40e5bec755926d83740f",
238
+ "89fdda6e6688476495ca297bfe010bf8",
239
+ "cda72c45821a4eb89f1a3ab5510b26d3"
240
+ ]
241
+ },
242
+ "id": "LHKNQ75afMoZ",
243
+ "outputId": "6ceeae5c-392c-4553-c487-14a648eb9209"
244
+ },
245
+ "outputs": [],
246
+ "source": [
247
+ "processed = flattened_babi.map(get_question_and_facts)"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": null,
253
+ "metadata": {
254
+ "colab": {
255
+ "base_uri": "https://localhost:8080/"
256
+ },
257
+ "id": "KaTacKMufPba",
258
+ "outputId": "2433d446-e985-45cd-a200-f9805b4056bd"
259
+ },
260
+ "outputs": [],
261
+ "source": [
262
+ "processed['train'][2]"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": null,
268
+ "metadata": {
269
+ "colab": {
270
+ "base_uri": "https://localhost:8080/"
271
+ },
272
+ "id": "IOrYr5LI0pbP",
273
+ "outputId": "8142f23c-7dab-49b9-8027-fbe7364ae4e9"
274
+ },
275
+ "outputs": [],
276
+ "source": [
277
+ "processed['test'][2]"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "markdown",
282
+ "metadata": {
283
+ "id": "oN7D3fszM2hy"
284
+ },
285
+ "source": [
286
+ "The goal of extractive QA is to find the part of the text that contains the answer to the question. You will identify the position of the answer using the indexes of the string. For example, if the answer to some question was 'September', you would need to find the start and end string indices of the word 'September' in the context sentence 'Jane visits Africa in September.'\n",
287
+ "\n",
288
+ "\n",
289
+ "Use this next function to get the start and end indices of the answer in each of the stories in your dataset."
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": null,
295
+ "metadata": {
296
+ "id": "J1JJx3PafSyR"
297
+ },
298
+ "outputs": [],
299
+ "source": [
300
+ "def get_start_end_idx(story):\n",
301
+ " str_idx = story['sentences'].find(story['answer'])\n",
302
+ " end_idx = str_idx + len(story['answer'])\n",
303
+ " return {'str_idx':str_idx,\n",
304
+ " 'end_idx': end_idx}"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": null,
310
+ "metadata": {
311
+ "colab": {
312
+ "base_uri": "https://localhost:8080/",
313
+ "height": 115,
314
+ "referenced_widgets": [
315
+ "8968319cdaca476fb15c11a388dce39a",
316
+ "863c5ce96db84e3da162072c9a13c913",
317
+ "a725734893004a45b61194f649f5f602",
318
+ "c4a24656d67844e995d3b8e175c6c497",
319
+ "4f5b06c3a5e44c6cade5bf83634d9f69",
320
+ "afc33fa78b5d440192c435bfca6f7914",
321
+ "f37bd346f8614fec92d6c5b5e9b66d2f",
322
+ "b4c6a18610734036a16a14a43174c52e",
323
+ "07aaa9b79a744856b19d723370d6e588",
324
+ "afedd2328cf141f78775e4cfa7758267",
325
+ "b39b85d8cb05418aa92e8476ad02f755",
326
+ "0a8534ac52af4d48ad82b66463ad08c3",
327
+ "3abb36da57c841838867c56e2a3a325b",
328
+ "8b961844b5004905922531bd805a9d57",
329
+ "31fc08a1e7e04f6b9b3ea400ccfaea75",
330
+ "8cfbd3b14b23417993270f851a2d8ff9"
331
+ ]
332
+ },
333
+ "id": "4e7BdgJJhwXi",
334
+ "outputId": "d9c7a923-d2eb-4533-f37e-4f269f22eb89"
335
+ },
336
+ "outputs": [],
337
+ "source": [
338
+ "processed = processed.map(get_start_end_idx)"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": null,
344
+ "metadata": {
345
+ "colab": {
346
+ "base_uri": "https://localhost:8080/"
347
+ },
348
+ "id": "P8ytxyfvh0kB",
349
+ "outputId": "c008b161-be24-40bb-a32d-47d92e624787"
350
+ },
351
+ "outputs": [],
352
+ "source": [
353
+ "num = 187\n",
354
+ "print(processed['test'][num])\n",
355
+ "start_idx = processed['test'][num]['str_idx']\n",
356
+ "end_idx = processed['test'][num]['end_idx']\n",
357
+ "print('answer:', processed['test'][num]['sentences'][start_idx:end_idx])"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "markdown",
362
+ "metadata": {
363
+ "id": "VVX3TA2xM-vJ"
364
+ },
365
+ "source": [
366
+ "<a name='1-2'></a>\n",
367
+ "### 1.2 - Tokenize and Align with 🤗 Library\n",
368
+ "\n",
369
+ "Now you have all the data you need to train a Transformer model to perform Question Answering! You are ready for a task you may have already encountered in the Named-Entity Recognition lab - tokenizing and aligning your input. To feed text data to a Transformer model, you will need to tokenize your input using a [🤗 Transformer tokenizer](https://huggingface.co/transformers/main_classes/tokenizer.html). It is crucial that the tokenizer you use must match the Transformer model type you are using! In this exercise, you will use the 🤗 [DistilBERT fast tokenizer](https://huggingface.co/transformers/model_doc/distilbert.html), which standardizes the length of your sequence to 512 and pads with zeros. "
370
+ ]
371
+ },
372
+ {
373
+ "cell_type": "markdown",
374
+ "metadata": {
375
+ "id": "c892hk9NNF9O"
376
+ },
377
+ "source": [
378
+ "Transformer models are often trained by tokenizers that split words into subwords. For instance, the word 'Africa' might get split into multiple subtokens. This can create some misalignment between the list of tags for the dataset and the list of labels generated by the tokenizer, since the tokenizer can split one word into several, or add special tokens. Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word with a `tokenize_and_align()` function. In this case, since you are interested in the start and end indices of the answer, you will want to align the index of the sentence to match the index of the token for a word. \n"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "metadata": {
385
+ "id": "UI-9P7VYitxv"
386
+ },
387
+ "outputs": [],
388
+ "source": [
389
+ "from transformers import DistilBertTokenizerFast\n",
390
+ "tokenizer = DistilBertTokenizerFast.from_pretrained('tokenizer/')"
391
+ ]
392
+ },
393
+ {
394
+ "cell_type": "code",
395
+ "execution_count": null,
396
+ "metadata": {
397
+ "id": "Pex-YXJnnwb9"
398
+ },
399
+ "outputs": [],
400
+ "source": [
401
+ "def tokenize_align(example):\n",
402
+ " encoding = tokenizer(example['sentences'], example['question'], truncation=True, padding=True, max_length=tokenizer.model_max_length)\n",
403
+ " start_positions = encoding.char_to_token(example['str_idx'])\n",
404
+ " end_positions = encoding.char_to_token(example['end_idx']-1)\n",
405
+ " if start_positions is None:\n",
406
+ " start_positions = tokenizer.model_max_length\n",
407
+ " if end_positions is None:\n",
408
+ " end_positions = tokenizer.model_max_length\n",
409
+ " return {'input_ids': encoding['input_ids'],\n",
410
+ " 'attention_mask': encoding['attention_mask'],\n",
411
+ " 'start_positions': start_positions,\n",
412
+ " 'end_positions': end_positions}"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "code",
417
+ "execution_count": null,
418
+ "metadata": {
419
+ "colab": {
420
+ "base_uri": "https://localhost:8080/",
421
+ "height": 115,
422
+ "referenced_widgets": [
423
+ "4d9152a30e824931983a425ee6d607a6",
424
+ "1f2773e3e80c4dd8b6b26e171bf33bc7",
425
+ "013f041c3e0b4e35bf2432fc345cb7bf",
426
+ "ef4e12f29f1e458f811a400faf21bdcc",
427
+ "f0e34f2bf626434fa73f0def26b3d1a5",
428
+ "1e6c02317171453cbd3d4d665879b0d4",
429
+ "5b6dbe662ca24834b7678638e101e1ff",
430
+ "39029f730ae140c7902fca6dac5361ad",
431
+ "723acefae33d448199fa5c1a9ec3f246",
432
+ "32a5c82c7a9845c09c11bb4e30c2f1aa",
433
+ "77273c2e4b4e4e4c8ee4b6b344749518",
434
+ "f0ac3b9b8f664479940c6ee18fc2f13e",
435
+ "393697738e724e9fad4d163de0a77840",
436
+ "e592db98c0c34c5e800f5d7b6d3c099e",
437
+ "568f11b4462f4b4e95f3ad5947bb275e",
438
+ "7fefe9e1121a43558d773500aef8935c"
439
+ ]
440
+ },
441
+ "id": "kKyLNWCvksOr",
442
+ "outputId": "7af3d914-4546-430c-c2f0-206b732e5131"
443
+ },
444
+ "outputs": [],
445
+ "source": [
446
+ "qa_dataset = processed.map(tokenize_align)"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "metadata": {
453
+ "id": "8v5odGZBmGw0"
454
+ },
455
+ "outputs": [],
456
+ "source": [
457
+ "qa_dataset = qa_dataset.remove_columns(['story.answer', 'story.id', 'story.supporting_ids', 'story.text', 'story.type'])"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": null,
463
+ "metadata": {
464
+ "colab": {
465
+ "base_uri": "https://localhost:8080/"
466
+ },
467
+ "id": "yBHzbjffmJa8",
468
+ "outputId": "b0688636-fdec-4de0-c2d9-69372b1ddbac"
469
+ },
470
+ "outputs": [],
471
+ "source": [
472
+ "qa_dataset['train'][200]"
473
+ ]
474
+ },
475
+ {
476
+ "cell_type": "markdown",
477
+ "metadata": {
478
+ "id": "qw79BQfW4feu"
479
+ },
480
+ "source": [
481
+ "<font color='blue'><b>What you should remember:</b>\n",
482
+ "- The goal of *extractive* QA is to identify the portion of the text that contains the answer to a question.\n",
483
+ "- Transformer models are often trained by tokenizers that split words into subwords.\n",
484
+ " - Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word.\n",
485
+ "</font>"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "markdown",
490
+ "metadata": {
491
+ "id": "rFfJozZvNZWG"
492
+ },
493
+ "source": [
494
+ "<a name='2'></a>\n",
495
+ "# 2 - Training \n",
496
+ "\n",
497
+ "Now that you have finished tokenizing and aligning your data, you can feed it into a pre-trained 🤗 Transformer model! You will use a DistilBERT model, which matches the tokenizer you used to preprocess your data."
498
+ ]
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": null,
503
+ "metadata": {
504
+ "id": "8sdX5XY0Gwwc"
505
+ },
506
+ "outputs": [],
507
+ "source": [
508
+ "train_ds = qa_dataset['train']\n",
509
+ "test_ds = qa_dataset['test']"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": null,
515
+ "metadata": {
516
+ "colab": {
517
+ "base_uri": "https://localhost:8080/"
518
+ },
519
+ "id": "Be5k3ilHsJ6q",
520
+ "outputId": "f2f7fea3-1394-4aaf-b159-994a38476994"
521
+ },
522
+ "outputs": [],
523
+ "source": [
524
+ "from transformers import TFDistilBertForQuestionAnswering\n",
525
+ "model = TFDistilBertForQuestionAnswering.from_pretrained(\"model/tensorflow\", return_dict=False)"
526
+ ]
527
+ },
528
+ {
529
+ "cell_type": "markdown",
530
+ "metadata": {
531
+ "id": "-aQVOG4ANcd2"
532
+ },
533
+ "source": [
534
+ "<a name='2-1'></a>\n",
535
+ "### 2.1 - TensorFlow implementation\n",
536
+ "For this assignment you will execute two implemenations, one in TensorFlow and one in PyTorch.\n",
537
+ "\n",
538
+ "\n"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "markdown",
543
+ "metadata": {
544
+ "id": "8pCRo_parYMc"
545
+ },
546
+ "source": [
547
+ "\n",
548
+ "#### Train and test datasets\n",
549
+ "\n",
550
+ "**Note:**\n",
551
+ "* In the TensorFlow implementation, you will have to set the data format type to tensors, which may create ragged tensors (tensors of different lengths). \n",
552
+ "* You will have to convert the ragged tensors to normal tensors using the `to_tensor()` method, which pads the tensors and sets the dimensions to `[None, tokenizer.model_max_length]` so you can feed different size tensors into your model based on the batch size. "
553
+ ]
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": null,
558
+ "metadata": {
559
+ "id": "FbpplBxNtanH"
560
+ },
561
+ "outputs": [],
562
+ "source": [
563
+ "import tensorflow as tf\n",
564
+ "\n",
565
+ "columns_to_return = ['input_ids','attention_mask', 'start_positions', 'end_positions']\n",
566
+ "\n",
567
+ "train_ds.set_format(type='tf', columns=columns_to_return)\n",
568
+ "\n",
569
+ "train_features = {x: train_ds[x] for x in ['input_ids', 'attention_mask']}\n",
570
+ "train_labels = {\"start_positions\": tf.reshape(train_ds['start_positions'], shape=[-1,1]),\n",
571
+ " 'end_positions': tf.reshape(train_ds['end_positions'], shape=[-1,1])}\n",
572
+ "\n",
573
+ "\n",
574
+ "train_tfdataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels)).batch(8)"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "markdown",
579
+ "metadata": {
580
+ "id": "0_Jj8Av6rEuN"
581
+ },
582
+ "source": [
583
+ "#### Training \n",
584
+ "\n",
585
+ "It is finally time to start training your model! \n",
586
+ "\n",
587
+ "* Create a custom training function using [tf.GradientTape()](https://www.tensorflow.org/api_docs/python/tf/GradientTape)\n",
588
+ "* Target two loss functions, one for the start index and one for the end index. \n",
589
+ "* `tf.GradientTape()` records the operations performed during forward prop for automatic differentiation during backprop. \n"
590
+ ]
591
+ },
592
+ {
593
+ "cell_type": "code",
594
+ "execution_count": null,
595
+ "metadata": {
596
+ "colab": {
597
+ "base_uri": "https://localhost:8080/"
598
+ },
599
+ "id": "PtZz249vQbLn",
600
+ "outputId": "24cdf861-af63-4581-a0ae-2de29d1880ed"
601
+ },
602
+ "outputs": [],
603
+ "source": [
604
+ "EPOCHS = 3\n",
605
+ "loss_fn1 = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True)\n",
606
+ "loss_fn2 = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True)\n",
607
+ "opt = tf.keras.optimizers.Adam(learning_rate=3e-5)\n",
608
+ "\n",
609
+ "losses = []\n",
610
+ "for epoch in range(EPOCHS):\n",
611
+ " print(\"Starting epoch: %d\"% epoch )\n",
612
+ " for step, (x_batch_train, y_batch_train) in enumerate(train_tfdataset):\n",
613
+ " with tf.GradientTape() as tape:\n",
614
+ " answer_start_scores, answer_end_scores = model(x_batch_train)\n",
615
+ " loss_start = loss_fn1(y_batch_train['start_positions'], answer_start_scores)\n",
616
+ " loss_end = loss_fn2(y_batch_train['end_positions'], answer_end_scores)\n",
617
+ " loss = 0.5 * (loss_start + loss_end)\n",
618
+ " losses.append(loss)\n",
619
+ " grads = tape.gradient(loss, model.trainable_weights)\n",
620
+ " opt.apply_gradients(zip(grads, model.trainable_weights))\n",
621
+ "\n",
622
+ " if step % 20 == 0:\n",
623
+ " print(\"Training loss (for one batch) at step %d: %.4f\"% (step, \n",
624
+ " float(loss_start)))\n"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "markdown",
629
+ "metadata": {
630
+ "id": "Q8ggB0JUWQuW"
631
+ },
632
+ "source": [
633
+ "Take a look at your losses and try playing around with some of the hyperparameters for better results!"
634
+ ]
635
+ },
636
+ {
637
+ "cell_type": "code",
638
+ "execution_count": null,
639
+ "metadata": {
640
+ "colab": {
641
+ "base_uri": "https://localhost:8080/",
642
+ "height": 282
643
+ },
644
+ "id": "fK91EPvRYFcX",
645
+ "outputId": "6b7099dd-f918-4905-e3a3-fcce2880e506"
646
+ },
647
+ "outputs": [],
648
+ "source": [
649
+ "import matplotlib.pyplot as plt\n",
650
+ "\n",
651
+ "plt.plot(losses)\n",
652
+ "plt.show()"
653
+ ]
654
+ },
655
+ {
656
+ "cell_type": "markdown",
657
+ "metadata": {
658
+ "id": "64OtEmyUWUiM"
659
+ },
660
+ "source": [
661
+ "You have successfully trained your model to help automatically answer questions! Try asking it a question about a story."
662
+ ]
663
+ },
664
+ {
665
+ "cell_type": "code",
666
+ "execution_count": null,
667
+ "metadata": {
668
+ "colab": {
669
+ "base_uri": "https://localhost:8080/"
670
+ },
671
+ "id": "eFniMzpp1bpz",
672
+ "outputId": "0ce0e2a3-3d6a-4e6e-adff-d0c16b622c9a"
673
+ },
674
+ "outputs": [],
675
+ "source": [
676
+ "question, text = 'What is south of the bedroom?','The hallway is south of the garden. The garden is south of the bedroom.'\n",
677
+ "input_dict = tokenizer(text, question, return_tensors='tf')\n",
678
+ "outputs = model(input_dict)\n",
679
+ "start_logits = outputs[0]\n",
680
+ "end_logits = outputs[1]\n",
681
+ "\n",
682
+ "all_tokens = tokenizer.convert_ids_to_tokens(input_dict[\"input_ids\"].numpy()[0])\n",
683
+ "answer = ' '.join(all_tokens[tf.math.argmax(start_logits, 1)[0] : tf.math.argmax(end_logits, 1)[0]+1])\n",
684
+ "print(question, answer.capitalize())"
685
+ ]
686
+ },
687
+ {
688
+ "cell_type": "markdown",
689
+ "metadata": {
690
+ "id": "f07OtnCpuKFa"
691
+ },
692
+ "source": [
693
+ "Congratulations! You just implemented your first QA model in TensorFlow. "
694
+ ]
695
+ },
696
+ {
697
+ "cell_type": "markdown",
698
+ "metadata": {
699
+ "id": "9UaM5pY9u8EW"
700
+ },
701
+ "source": [
702
+ "<a name='2-1'></a>\n",
703
+ "## 2.2 PyTorch implementation\n",
704
+ "\n",
705
+ "[PyTorch](https://pytorch.org/) is an open source machine learning framework developed by Facebook's AI Research lab that can be used for computer vision and natural language processing. As you can imagine, it is quite compatible with the bAbI dataset."
706
+ ]
707
+ },
708
+ {
709
+ "cell_type": "markdown",
710
+ "metadata": {
711
+ "id": "nD9akXoXxMjd"
712
+ },
713
+ "source": [
714
+ "#### Train and test dataset\n",
715
+ "\n",
716
+ "Go ahead and try creating a train and test dataset by importing PyTorch."
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": null,
722
+ "metadata": {
723
+ "id": "JxMYWSG173ch"
724
+ },
725
+ "outputs": [],
726
+ "source": [
727
+ "from torch.utils.data import DataLoader\n",
728
+ "\n",
729
+ "columns_to_return = ['input_ids','attention_mask', 'start_positions', 'end_positions']\n",
730
+ "train_ds.set_format(type='pt', columns=columns_to_return)\n",
731
+ "test_ds.set_format(type='pt', columns=columns_to_return)"
732
+ ]
733
+ },
734
+ {
735
+ "cell_type": "markdown",
736
+ "metadata": {
737
+ "id": "OeuzZKlPHAAQ"
738
+ },
739
+ "source": [
740
+ "For the accuracy metrics for the PyTorch implementation, you will change things up a bit and use the [F1 score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html) for start and end indicies over the entire test dataset as the loss functions. "
741
+ ]
742
+ },
743
+ {
744
+ "cell_type": "code",
745
+ "execution_count": null,
746
+ "metadata": {
747
+ "id": "aD9tDpZfJsIB"
748
+ },
749
+ "outputs": [],
750
+ "source": [
751
+ "from sklearn.metrics import f1_score\n",
752
+ "\n",
753
+ "def compute_metrics(pred):\n",
754
+ " start_labels = pred.label_ids[0]\n",
755
+ " start_preds = pred.predictions[0].argmax(-1)\n",
756
+ " end_labels = pred.label_ids[1]\n",
757
+ " end_preds = pred.predictions[1].argmax(-1)\n",
758
+ " \n",
759
+ " f1_start = f1_score(start_labels, start_preds, average='macro')\n",
760
+ " f1_end = f1_score(end_labels, end_preds, average='macro')\n",
761
+ " \n",
762
+ " return {\n",
763
+ " 'f1_start': f1_start,\n",
764
+ " 'f1_end': f1_end,\n",
765
+ " }"
766
+ ]
767
+ },
768
+ {
769
+ "cell_type": "markdown",
770
+ "metadata": {
771
+ "id": "laX5cYQRHMXb"
772
+ },
773
+ "source": [
774
+ "#### Training\n",
775
+ "\n",
776
+ "Now it is time to load a pre-trained model. \n",
777
+ "\n",
778
+ "**Note:** You will be using the DistilBERT instead of TFDistilBERT for a PyTorch implementation."
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "code",
783
+ "execution_count": null,
784
+ "metadata": {},
785
+ "outputs": [],
786
+ "source": [
787
+ "del model # We delete the tensorflow model to avoid memory issues"
788
+ ]
789
+ },
790
+ {
791
+ "cell_type": "code",
792
+ "execution_count": null,
793
+ "metadata": {
794
+ "colab": {
795
+ "base_uri": "https://localhost:8080/"
796
+ },
797
+ "id": "YXFCsNcY79jx",
798
+ "outputId": "09af112f-e1e9-4a47-c988-37ee2a068df2"
799
+ },
800
+ "outputs": [],
801
+ "source": [
802
+ "from transformers import DistilBertForQuestionAnswering\n",
803
+ "\n",
804
+ "pytorch_model = DistilBertForQuestionAnswering.from_pretrained(\"model/pytorch\")"
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "markdown",
809
+ "metadata": {
810
+ "id": "xCUdMmCxHP6_"
811
+ },
812
+ "source": [
813
+ "Instead of a custom training loop, you will use the [🤗 Trainer](https://huggingface.co/transformers/main_classes/trainer.html), which contains a basic training loop and is fairly easy to implement in PyTorch."
814
+ ]
815
+ },
816
+ {
817
+ "cell_type": "code",
818
+ "execution_count": null,
819
+ "metadata": {
820
+ "colab": {
821
+ "base_uri": "https://localhost:8080/",
822
+ "height": 329
823
+ },
824
+ "id": "1htmS3TV-2Bk",
825
+ "outputId": "cc21bfbb-da09-47f9-ee16-7db0096d35e7"
826
+ },
827
+ "outputs": [],
828
+ "source": [
829
+ "from transformers import Trainer, TrainingArguments\n",
830
+ "\n",
831
+ "training_args = TrainingArguments(\n",
832
+ " output_dir='results', # output directory\n",
833
+ " overwrite_output_dir=True,\n",
834
+ " num_train_epochs=3, # total number of training epochs\n",
835
+ " per_device_train_batch_size=8, # batch size per device during training\n",
836
+ " per_device_eval_batch_size=8, # batch size for evaluation\n",
837
+ " warmup_steps=20, # number of warmup steps for learning rate scheduler\n",
838
+ " weight_decay=0.01, # strength of weight decay\n",
839
+ " logging_dir=None, # directory for storing logs\n",
840
+ " logging_steps=50\n",
841
+ ")\n",
842
+ "\n",
843
+ "trainer = Trainer(\n",
844
+ " model=pytorch_model, # the instantiated 🤗 Transformers model to be trained\n",
845
+ " args=training_args, # training arguments, defined above\n",
846
+ " train_dataset=train_ds, # training dataset\n",
847
+ " eval_dataset=test_ds,\n",
848
+ " compute_metrics=compute_metrics # evaluation dataset\n",
849
+ ")\n",
850
+ "\n",
851
+ "trainer.train()"
852
+ ]
853
+ },
854
+ {
855
+ "cell_type": "code",
856
+ "execution_count": null,
857
+ "metadata": {
858
+ "colab": {
859
+ "base_uri": "https://localhost:8080/",
860
+ "height": 207
861
+ },
862
+ "id": "lDzbm7vzAiPJ",
863
+ "outputId": "7cd62f51-a04b-4583-bc0e-e459813d3103"
864
+ },
865
+ "outputs": [],
866
+ "source": [
867
+ "trainer.evaluate(test_ds)"
868
+ ]
869
+ },
870
+ {
871
+ "cell_type": "markdown",
872
+ "metadata": {
873
+ "id": "QAgrcs2pHvVu"
874
+ },
875
+ "source": [
876
+ "Now it is time to ask your PyTorch model a question! \n",
877
+ "* Before testing your model with a question, you can tell PyTorch to send your model and inputs to the GPU if your machine has one, or the CPU if it does not. \n",
878
+ "* You can then proceed to tokenize your input and create PyTorch tensors and send them to your device. \n",
879
+ "* The rest of the pipeline is relatively similar to the one you implemented for TensorFlow. \n"
880
+ ]
881
+ },
882
+ {
883
+ "cell_type": "code",
884
+ "execution_count": null,
885
+ "metadata": {
886
+ "colab": {
887
+ "base_uri": "https://localhost:8080/"
888
+ },
889
+ "id": "yfBe9AFABqUr",
890
+ "outputId": "b5ca6039-8ce2-4e75-9161-1c96a0f39425"
891
+ },
892
+ "outputs": [],
893
+ "source": [
894
+ "import torch\n",
895
+ "\n",
896
+ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
897
+ "\n",
898
+ "pytorch_model.to(device)\n",
899
+ "\n",
900
+ "question, text = 'What is east of the hallway?','The kitchen is east of the hallway. The garden is south of the bedroom.'\n",
901
+ "\n",
902
+ "input_dict = tokenizer(text, question, return_tensors='pt')\n",
903
+ "\n",
904
+ "input_ids = input_dict['input_ids'].to(device)\n",
905
+ "attention_mask = input_dict['attention_mask'].to(device)\n",
906
+ "\n",
907
+ "outputs = pytorch_model(input_ids, attention_mask=attention_mask)\n",
908
+ "\n",
909
+ "start_logits = outputs[0]\n",
910
+ "end_logits = outputs[1]\n",
911
+ "\n",
912
+ "all_tokens = tokenizer.convert_ids_to_tokens(input_dict[\"input_ids\"].numpy()[0])\n",
913
+ "answer = ' '.join(all_tokens[torch.argmax(start_logits, 1)[0] : torch.argmax(end_logits, 1)[0]+1])\n",
914
+ "\n",
915
+ "print(question, answer.capitalize())"
916
+ ]
917
+ },
918
+ {
919
+ "cell_type": "markdown",
920
+ "metadata": {
921
+ "id": "eGzuHkMZ4q9I"
922
+ },
923
+ "source": [
924
+ "### Congratulations!\n",
925
+ " \n",
926
+ "You've completed this notebook, and can now implement Transformer models for QA tasks!\n",
927
+ "\n",
928
+ "You are now able to:\n",
929
+ "* Perform extractive Question Answering \n",
930
+ "* Fine-tune a pre-trained transformer model to a custom dataset\n",
931
+ "* Implement a QA model in TensorFlow and PyTorch"
932
+ ]
933
+ },
934
+ {
935
+ "cell_type": "markdown",
936
+ "metadata": {
937
+ "id": "G8tAV-584vKE"
938
+ },
939
+ "source": [
940
+ "<font color='blue'><b>What you should remember</b>:\n",
941
+ "- Transformer models are often trained by tokenizers that split words into subwords.\n",
942
+ " - Before processing, it is important that you align the start and end indices with the tokens associated with the target answer word.\n",
943
+ "- PyTorch is a relatively light and easy to implement framework that can make rapid prototyping easier, while TensorFlow has advantages in scaling and is more widely used in production\n",
944
+ " - `tf.GradientTape` allows you to build custom training loops in TensorFlow\n",
945
+ " - The `Trainer` API in PyTorch gives you a basic training loop that is compatible with 🤗 models and datasets"
946
+ ]
947
+ },
948
+ {
949
+ "cell_type": "code",
950
+ "execution_count": null,
951
+ "metadata": {},
952
+ "outputs": [],
953
+ "source": [
954
+ "%%javascript\n",
955
+ "let element = document.getElementById('submit-notebook-button-group');\n",
956
+ "if (!element) {\n",
957
+ " window._save_and_close = function(){\n",
958
+ " IPython.notebook.save_checkpoint();\n",
959
+ " IPython.notebook.session.delete();\n",
960
+ " window.onbeforeunload = null\n",
961
+ " setTimeout(function() {window.close();}, 1000)\n",
962
+ " }\n",
963
+ " let header = document.getElementById('maintoolbar-container');\n",
964
+ " element = document.createElement(\"div\");\n",
965
+ " element.setAttribute(\"class\", \"btn-group\");\n",
966
+ " element.setAttribute(\"id\", \"submit-notebook-button-group\");\n",
967
+ " element.setAttribute(\"align\", \"right\");\n",
968
+ " element.setAttribute(\"style\", \"float:right\")\n",
969
+ " element.innerHTML = '<button class=\"btn btn-default\" title=\"Save and close this notebook.\" style=\"background-color:rgb(42, 115, 204); color:white; padding:4px 8px\" onclick=window._save_and_close()>Save and close</button>'\n",
970
+ " header.appendChild(element); \n",
971
+ "} "
972
+ ]
973
+ },
974
+ {
975
+ "cell_type": "code",
976
+ "execution_count": null,
977
+ "metadata": {},
978
+ "outputs": [],
979
+ "source": []
980
+ }
981
+ ],
982
+ "metadata": {
983
+ "accelerator": "GPU",
984
+ "colab": {
985
+ "collapsed_sections": [],
986
+ "name": "QA-dataset.ipynb",
987
+ "provenance": []
988
+ },
989
+ "kernelspec": {
990
+ "display_name": "Python 3",
991
+ "language": "python",
992
+ "name": "python3"
993
+ },
994
+ "language_info": {
995
+ "codemirror_mode": {
996
+ "name": "ipython",
997
+ "version": 3
998
+ },
999
+ "file_extension": ".py",
1000
+ "mimetype": "text/x-python",
1001
+ "name": "python",
1002
+ "nbconvert_exporter": "python",
1003
+ "pygments_lexer": "ipython3",
1004
+ "version": "3.8.10"
1005
+ },
1006
+ "widgets": {
1007
+ "application/vnd.jupyter.widget-state+json": {
1008
+ "013f041c3e0b4e35bf2432fc345cb7bf": {
1009
+ "model_module": "@jupyter-widgets/controls",
1010
+ "model_name": "FloatProgressModel",
1011
+ "state": {
1012
+ "_dom_classes": [],
1013
+ "_model_module": "@jupyter-widgets/controls",
1014
+ "_model_module_version": "1.5.0",
1015
+ "_model_name": "FloatProgressModel",
1016
+ "_view_count": null,
1017
+ "_view_module": "@jupyter-widgets/controls",
1018
+ "_view_module_version": "1.5.0",
1019
+ "_view_name": "ProgressView",
1020
+ "bar_style": "success",
1021
+ "description": "100%",
1022
+ "description_tooltip": null,
1023
+ "layout": "IPY_MODEL_1e6c02317171453cbd3d4d665879b0d4",
1024
+ "max": 1000,
1025
+ "min": 0,
1026
+ "orientation": "horizontal",
1027
+ "style": "IPY_MODEL_f0e34f2bf626434fa73f0def26b3d1a5",
1028
+ "value": 1000
1029
+ }
1030
+ },
1031
+ "07aaa9b79a744856b19d723370d6e588": {
1032
+ "model_module": "@jupyter-widgets/controls",
1033
+ "model_name": "HBoxModel",
1034
+ "state": {
1035
+ "_dom_classes": [],
1036
+ "_model_module": "@jupyter-widgets/controls",
1037
+ "_model_module_version": "1.5.0",
1038
+ "_model_name": "HBoxModel",
1039
+ "_view_count": null,
1040
+ "_view_module": "@jupyter-widgets/controls",
1041
+ "_view_module_version": "1.5.0",
1042
+ "_view_name": "HBoxView",
1043
+ "box_style": "",
1044
+ "children": [
1045
+ "IPY_MODEL_b39b85d8cb05418aa92e8476ad02f755",
1046
+ "IPY_MODEL_0a8534ac52af4d48ad82b66463ad08c3"
1047
+ ],
1048
+ "layout": "IPY_MODEL_afedd2328cf141f78775e4cfa7758267"
1049
+ }
1050
+ },
1051
+ "0a8534ac52af4d48ad82b66463ad08c3": {
1052
+ "model_module": "@jupyter-widgets/controls",
1053
+ "model_name": "HTMLModel",
1054
+ "state": {
1055
+ "_dom_classes": [],
1056
+ "_model_module": "@jupyter-widgets/controls",
1057
+ "_model_module_version": "1.5.0",
1058
+ "_model_name": "HTMLModel",
1059
+ "_view_count": null,
1060
+ "_view_module": "@jupyter-widgets/controls",
1061
+ "_view_module_version": "1.5.0",
1062
+ "_view_name": "HTMLView",
1063
+ "description": "",
1064
+ "description_tooltip": null,
1065
+ "layout": "IPY_MODEL_8cfbd3b14b23417993270f851a2d8ff9",
1066
+ "placeholder": "​",
1067
+ "style": "IPY_MODEL_31fc08a1e7e04f6b9b3ea400ccfaea75",
1068
+ "value": " 1000/1000 [01:40&lt;00:00, 9.90ex/s]"
1069
+ }
1070
+ },
1071
+ "1e6c02317171453cbd3d4d665879b0d4": {
1072
+ "model_module": "@jupyter-widgets/base",
1073
+ "model_name": "LayoutModel",
1074
+ "state": {
1075
+ "_model_module": "@jupyter-widgets/base",
1076
+ "_model_module_version": "1.2.0",
1077
+ "_model_name": "LayoutModel",
1078
+ "_view_count": null,
1079
+ "_view_module": "@jupyter-widgets/base",
1080
+ "_view_module_version": "1.2.0",
1081
+ "_view_name": "LayoutView",
1082
+ "align_content": null,
1083
+ "align_items": null,
1084
+ "align_self": null,
1085
+ "border": null,
1086
+ "bottom": null,
1087
+ "display": null,
1088
+ "flex": null,
1089
+ "flex_flow": null,
1090
+ "grid_area": null,
1091
+ "grid_auto_columns": null,
1092
+ "grid_auto_flow": null,
1093
+ "grid_auto_rows": null,
1094
+ "grid_column": null,
1095
+ "grid_gap": null,
1096
+ "grid_row": null,
1097
+ "grid_template_areas": null,
1098
+ "grid_template_columns": null,
1099
+ "grid_template_rows": null,
1100
+ "height": null,
1101
+ "justify_content": null,
1102
+ "justify_items": null,
1103
+ "left": null,
1104
+ "margin": null,
1105
+ "max_height": null,
1106
+ "max_width": null,
1107
+ "min_height": null,
1108
+ "min_width": null,
1109
+ "object_fit": null,
1110
+ "object_position": null,
1111
+ "order": null,
1112
+ "overflow": null,
1113
+ "overflow_x": null,
1114
+ "overflow_y": null,
1115
+ "padding": null,
1116
+ "right": null,
1117
+ "top": null,
1118
+ "visibility": null,
1119
+ "width": null
1120
+ }
1121
+ },
1122
+ "1f2773e3e80c4dd8b6b26e171bf33bc7": {
1123
+ "model_module": "@jupyter-widgets/base",
1124
+ "model_name": "LayoutModel",
1125
+ "state": {
1126
+ "_model_module": "@jupyter-widgets/base",
1127
+ "_model_module_version": "1.2.0",
1128
+ "_model_name": "LayoutModel",
1129
+ "_view_count": null,
1130
+ "_view_module": "@jupyter-widgets/base",
1131
+ "_view_module_version": "1.2.0",
1132
+ "_view_name": "LayoutView",
1133
+ "align_content": null,
1134
+ "align_items": null,
1135
+ "align_self": null,
1136
+ "border": null,
1137
+ "bottom": null,
1138
+ "display": null,
1139
+ "flex": null,
1140
+ "flex_flow": null,
1141
+ "grid_area": null,
1142
+ "grid_auto_columns": null,
1143
+ "grid_auto_flow": null,
1144
+ "grid_auto_rows": null,
1145
+ "grid_column": null,
1146
+ "grid_gap": null,
1147
+ "grid_row": null,
1148
+ "grid_template_areas": null,
1149
+ "grid_template_columns": null,
1150
+ "grid_template_rows": null,
1151
+ "height": null,
1152
+ "justify_content": null,
1153
+ "justify_items": null,
1154
+ "left": null,
1155
+ "margin": null,
1156
+ "max_height": null,
1157
+ "max_width": null,
1158
+ "min_height": null,
1159
+ "min_width": null,
1160
+ "object_fit": null,
1161
+ "object_position": null,
1162
+ "order": null,
1163
+ "overflow": null,
1164
+ "overflow_x": null,
1165
+ "overflow_y": null,
1166
+ "padding": null,
1167
+ "right": null,
1168
+ "top": null,
1169
+ "visibility": null,
1170
+ "width": null
1171
+ }
1172
+ },
1173
+ "31fc08a1e7e04f6b9b3ea400ccfaea75": {
1174
+ "model_module": "@jupyter-widgets/controls",
1175
+ "model_name": "DescriptionStyleModel",
1176
+ "state": {
1177
+ "_model_module": "@jupyter-widgets/controls",
1178
+ "_model_module_version": "1.5.0",
1179
+ "_model_name": "DescriptionStyleModel",
1180
+ "_view_count": null,
1181
+ "_view_module": "@jupyter-widgets/base",
1182
+ "_view_module_version": "1.2.0",
1183
+ "_view_name": "StyleView",
1184
+ "description_width": ""
1185
+ }
1186
+ },
1187
+ "32a5c82c7a9845c09c11bb4e30c2f1aa": {
1188
+ "model_module": "@jupyter-widgets/base",
1189
+ "model_name": "LayoutModel",
1190
+ "state": {
1191
+ "_model_module": "@jupyter-widgets/base",
1192
+ "_model_module_version": "1.2.0",
1193
+ "_model_name": "LayoutModel",
1194
+ "_view_count": null,
1195
+ "_view_module": "@jupyter-widgets/base",
1196
+ "_view_module_version": "1.2.0",
1197
+ "_view_name": "LayoutView",
1198
+ "align_content": null,
1199
+ "align_items": null,
1200
+ "align_self": null,
1201
+ "border": null,
1202
+ "bottom": null,
1203
+ "display": null,
1204
+ "flex": null,
1205
+ "flex_flow": null,
1206
+ "grid_area": null,
1207
+ "grid_auto_columns": null,
1208
+ "grid_auto_flow": null,
1209
+ "grid_auto_rows": null,
1210
+ "grid_column": null,
1211
+ "grid_gap": null,
1212
+ "grid_row": null,
1213
+ "grid_template_areas": null,
1214
+ "grid_template_columns": null,
1215
+ "grid_template_rows": null,
1216
+ "height": null,
1217
+ "justify_content": null,
1218
+ "justify_items": null,
1219
+ "left": null,
1220
+ "margin": null,
1221
+ "max_height": null,
1222
+ "max_width": null,
1223
+ "min_height": null,
1224
+ "min_width": null,
1225
+ "object_fit": null,
1226
+ "object_position": null,
1227
+ "order": null,
1228
+ "overflow": null,
1229
+ "overflow_x": null,
1230
+ "overflow_y": null,
1231
+ "padding": null,
1232
+ "right": null,
1233
+ "top": null,
1234
+ "visibility": null,
1235
+ "width": null
1236
+ }
1237
+ },
1238
+ "364ba960eb474c9084cc71851594d345": {
1239
+ "model_module": "@jupyter-widgets/base",
1240
+ "model_name": "LayoutModel",
1241
+ "state": {
1242
+ "_model_module": "@jupyter-widgets/base",
1243
+ "_model_module_version": "1.2.0",
1244
+ "_model_name": "LayoutModel",
1245
+ "_view_count": null,
1246
+ "_view_module": "@jupyter-widgets/base",
1247
+ "_view_module_version": "1.2.0",
1248
+ "_view_name": "LayoutView",
1249
+ "align_content": null,
1250
+ "align_items": null,
1251
+ "align_self": null,
1252
+ "border": null,
1253
+ "bottom": null,
1254
+ "display": null,
1255
+ "flex": null,
1256
+ "flex_flow": null,
1257
+ "grid_area": null,
1258
+ "grid_auto_columns": null,
1259
+ "grid_auto_flow": null,
1260
+ "grid_auto_rows": null,
1261
+ "grid_column": null,
1262
+ "grid_gap": null,
1263
+ "grid_row": null,
1264
+ "grid_template_areas": null,
1265
+ "grid_template_columns": null,
1266
+ "grid_template_rows": null,
1267
+ "height": null,
1268
+ "justify_content": null,
1269
+ "justify_items": null,
1270
+ "left": null,
1271
+ "margin": null,
1272
+ "max_height": null,
1273
+ "max_width": null,
1274
+ "min_height": null,
1275
+ "min_width": null,
1276
+ "object_fit": null,
1277
+ "object_position": null,
1278
+ "order": null,
1279
+ "overflow": null,
1280
+ "overflow_x": null,
1281
+ "overflow_y": null,
1282
+ "padding": null,
1283
+ "right": null,
1284
+ "top": null,
1285
+ "visibility": null,
1286
+ "width": null
1287
+ }
1288
+ },
1289
+ "39029f730ae140c7902fca6dac5361ad": {
1290
+ "model_module": "@jupyter-widgets/base",
1291
+ "model_name": "LayoutModel",
1292
+ "state": {
1293
+ "_model_module": "@jupyter-widgets/base",
1294
+ "_model_module_version": "1.2.0",
1295
+ "_model_name": "LayoutModel",
1296
+ "_view_count": null,
1297
+ "_view_module": "@jupyter-widgets/base",
1298
+ "_view_module_version": "1.2.0",
1299
+ "_view_name": "LayoutView",
1300
+ "align_content": null,
1301
+ "align_items": null,
1302
+ "align_self": null,
1303
+ "border": null,
1304
+ "bottom": null,
1305
+ "display": null,
1306
+ "flex": null,
1307
+ "flex_flow": null,
1308
+ "grid_area": null,
1309
+ "grid_auto_columns": null,
1310
+ "grid_auto_flow": null,
1311
+ "grid_auto_rows": null,
1312
+ "grid_column": null,
1313
+ "grid_gap": null,
1314
+ "grid_row": null,
1315
+ "grid_template_areas": null,
1316
+ "grid_template_columns": null,
1317
+ "grid_template_rows": null,
1318
+ "height": null,
1319
+ "justify_content": null,
1320
+ "justify_items": null,
1321
+ "left": null,
1322
+ "margin": null,
1323
+ "max_height": null,
1324
+ "max_width": null,
1325
+ "min_height": null,
1326
+ "min_width": null,
1327
+ "object_fit": null,
1328
+ "object_position": null,
1329
+ "order": null,
1330
+ "overflow": null,
1331
+ "overflow_x": null,
1332
+ "overflow_y": null,
1333
+ "padding": null,
1334
+ "right": null,
1335
+ "top": null,
1336
+ "visibility": null,
1337
+ "width": null
1338
+ }
1339
+ },
1340
+ "393697738e724e9fad4d163de0a77840": {
1341
+ "model_module": "@jupyter-widgets/controls",
1342
+ "model_name": "ProgressStyleModel",
1343
+ "state": {
1344
+ "_model_module": "@jupyter-widgets/controls",
1345
+ "_model_module_version": "1.5.0",
1346
+ "_model_name": "ProgressStyleModel",
1347
+ "_view_count": null,
1348
+ "_view_module": "@jupyter-widgets/base",
1349
+ "_view_module_version": "1.2.0",
1350
+ "_view_name": "StyleView",
1351
+ "bar_color": null,
1352
+ "description_width": "initial"
1353
+ }
1354
+ },
1355
+ "3abb36da57c841838867c56e2a3a325b": {
1356
+ "model_module": "@jupyter-widgets/controls",
1357
+ "model_name": "ProgressStyleModel",
1358
+ "state": {
1359
+ "_model_module": "@jupyter-widgets/controls",
1360
+ "_model_module_version": "1.5.0",
1361
+ "_model_name": "ProgressStyleModel",
1362
+ "_view_count": null,
1363
+ "_view_module": "@jupyter-widgets/base",
1364
+ "_view_module_version": "1.2.0",
1365
+ "_view_name": "StyleView",
1366
+ "bar_color": null,
1367
+ "description_width": "initial"
1368
+ }
1369
+ },
1370
+ "3dab28395f3f475d8242e4d4d45ed059": {
1371
+ "model_module": "@jupyter-widgets/controls",
1372
+ "model_name": "HTMLModel",
1373
+ "state": {
1374
+ "_dom_classes": [],
1375
+ "_model_module": "@jupyter-widgets/controls",
1376
+ "_model_module_version": "1.5.0",
1377
+ "_model_name": "HTMLModel",
1378
+ "_view_count": null,
1379
+ "_view_module": "@jupyter-widgets/controls",
1380
+ "_view_module_version": "1.5.0",
1381
+ "_view_name": "HTMLView",
1382
+ "description": "",
1383
+ "description_tooltip": null,
1384
+ "layout": "IPY_MODEL_63b4ebafcead4c0784b5511219a6a198",
1385
+ "placeholder": "​",
1386
+ "style": "IPY_MODEL_58718e12f1b7459989ab5296846c4be6",
1387
+ "value": " 1000/1000 [00:10&lt;00:00, 97.35ex/s]"
1388
+ }
1389
+ },
1390
+ "44b7bea3e09d4e5684921c66dd4c7514": {
1391
+ "model_module": "@jupyter-widgets/controls",
1392
+ "model_name": "HBoxModel",
1393
+ "state": {
1394
+ "_dom_classes": [],
1395
+ "_model_module": "@jupyter-widgets/controls",
1396
+ "_model_module_version": "1.5.0",
1397
+ "_model_name": "HBoxModel",
1398
+ "_view_count": null,
1399
+ "_view_module": "@jupyter-widgets/controls",
1400
+ "_view_module_version": "1.5.0",
1401
+ "_view_name": "HBoxView",
1402
+ "box_style": "",
1403
+ "children": [
1404
+ "IPY_MODEL_7e1325e57bf9417e93d7ef180794ab3c",
1405
+ "IPY_MODEL_3dab28395f3f475d8242e4d4d45ed059"
1406
+ ],
1407
+ "layout": "IPY_MODEL_6af3ec5091d74bd1a95bf02a87dd240b"
1408
+ }
1409
+ },
1410
+ "4d9152a30e824931983a425ee6d607a6": {
1411
+ "model_module": "@jupyter-widgets/controls",
1412
+ "model_name": "HBoxModel",
1413
+ "state": {
1414
+ "_dom_classes": [],
1415
+ "_model_module": "@jupyter-widgets/controls",
1416
+ "_model_module_version": "1.5.0",
1417
+ "_model_name": "HBoxModel",
1418
+ "_view_count": null,
1419
+ "_view_module": "@jupyter-widgets/controls",
1420
+ "_view_module_version": "1.5.0",
1421
+ "_view_name": "HBoxView",
1422
+ "box_style": "",
1423
+ "children": [
1424
+ "IPY_MODEL_013f041c3e0b4e35bf2432fc345cb7bf",
1425
+ "IPY_MODEL_ef4e12f29f1e458f811a400faf21bdcc"
1426
+ ],
1427
+ "layout": "IPY_MODEL_1f2773e3e80c4dd8b6b26e171bf33bc7"
1428
+ }
1429
+ },
1430
+ "4f5b06c3a5e44c6cade5bf83634d9f69": {
1431
+ "model_module": "@jupyter-widgets/controls",
1432
+ "model_name": "ProgressStyleModel",
1433
+ "state": {
1434
+ "_model_module": "@jupyter-widgets/controls",
1435
+ "_model_module_version": "1.5.0",
1436
+ "_model_name": "ProgressStyleModel",
1437
+ "_view_count": null,
1438
+ "_view_module": "@jupyter-widgets/base",
1439
+ "_view_module_version": "1.2.0",
1440
+ "_view_name": "StyleView",
1441
+ "bar_color": null,
1442
+ "description_width": "initial"
1443
+ }
1444
+ },
1445
+ "568f11b4462f4b4e95f3ad5947bb275e": {
1446
+ "model_module": "@jupyter-widgets/controls",
1447
+ "model_name": "DescriptionStyleModel",
1448
+ "state": {
1449
+ "_model_module": "@jupyter-widgets/controls",
1450
+ "_model_module_version": "1.5.0",
1451
+ "_model_name": "DescriptionStyleModel",
1452
+ "_view_count": null,
1453
+ "_view_module": "@jupyter-widgets/base",
1454
+ "_view_module_version": "1.2.0",
1455
+ "_view_name": "StyleView",
1456
+ "description_width": ""
1457
+ }
1458
+ },
1459
+ "58718e12f1b7459989ab5296846c4be6": {
1460
+ "model_module": "@jupyter-widgets/controls",
1461
+ "model_name": "DescriptionStyleModel",
1462
+ "state": {
1463
+ "_model_module": "@jupyter-widgets/controls",
1464
+ "_model_module_version": "1.5.0",
1465
+ "_model_name": "DescriptionStyleModel",
1466
+ "_view_count": null,
1467
+ "_view_module": "@jupyter-widgets/base",
1468
+ "_view_module_version": "1.2.0",
1469
+ "_view_name": "StyleView",
1470
+ "description_width": ""
1471
+ }
1472
+ },
1473
+ "5b6dbe662ca24834b7678638e101e1ff": {
1474
+ "model_module": "@jupyter-widgets/controls",
1475
+ "model_name": "DescriptionStyleModel",
1476
+ "state": {
1477
+ "_model_module": "@jupyter-widgets/controls",
1478
+ "_model_module_version": "1.5.0",
1479
+ "_model_name": "DescriptionStyleModel",
1480
+ "_view_count": null,
1481
+ "_view_module": "@jupyter-widgets/base",
1482
+ "_view_module_version": "1.2.0",
1483
+ "_view_name": "StyleView",
1484
+ "description_width": ""
1485
+ }
1486
+ },
1487
+ "63b4ebafcead4c0784b5511219a6a198": {
1488
+ "model_module": "@jupyter-widgets/base",
1489
+ "model_name": "LayoutModel",
1490
+ "state": {
1491
+ "_model_module": "@jupyter-widgets/base",
1492
+ "_model_module_version": "1.2.0",
1493
+ "_model_name": "LayoutModel",
1494
+ "_view_count": null,
1495
+ "_view_module": "@jupyter-widgets/base",
1496
+ "_view_module_version": "1.2.0",
1497
+ "_view_name": "LayoutView",
1498
+ "align_content": null,
1499
+ "align_items": null,
1500
+ "align_self": null,
1501
+ "border": null,
1502
+ "bottom": null,
1503
+ "display": null,
1504
+ "flex": null,
1505
+ "flex_flow": null,
1506
+ "grid_area": null,
1507
+ "grid_auto_columns": null,
1508
+ "grid_auto_flow": null,
1509
+ "grid_auto_rows": null,
1510
+ "grid_column": null,
1511
+ "grid_gap": null,
1512
+ "grid_row": null,
1513
+ "grid_template_areas": null,
1514
+ "grid_template_columns": null,
1515
+ "grid_template_rows": null,
1516
+ "height": null,
1517
+ "justify_content": null,
1518
+ "justify_items": null,
1519
+ "left": null,
1520
+ "margin": null,
1521
+ "max_height": null,
1522
+ "max_width": null,
1523
+ "min_height": null,
1524
+ "min_width": null,
1525
+ "object_fit": null,
1526
+ "object_position": null,
1527
+ "order": null,
1528
+ "overflow": null,
1529
+ "overflow_x": null,
1530
+ "overflow_y": null,
1531
+ "padding": null,
1532
+ "right": null,
1533
+ "top": null,
1534
+ "visibility": null,
1535
+ "width": null
1536
+ }
1537
+ },
1538
+ "6af3ec5091d74bd1a95bf02a87dd240b": {
1539
+ "model_module": "@jupyter-widgets/base",
1540
+ "model_name": "LayoutModel",
1541
+ "state": {
1542
+ "_model_module": "@jupyter-widgets/base",
1543
+ "_model_module_version": "1.2.0",
1544
+ "_model_name": "LayoutModel",
1545
+ "_view_count": null,
1546
+ "_view_module": "@jupyter-widgets/base",
1547
+ "_view_module_version": "1.2.0",
1548
+ "_view_name": "LayoutView",
1549
+ "align_content": null,
1550
+ "align_items": null,
1551
+ "align_self": null,
1552
+ "border": null,
1553
+ "bottom": null,
1554
+ "display": null,
1555
+ "flex": null,
1556
+ "flex_flow": null,
1557
+ "grid_area": null,
1558
+ "grid_auto_columns": null,
1559
+ "grid_auto_flow": null,
1560
+ "grid_auto_rows": null,
1561
+ "grid_column": null,
1562
+ "grid_gap": null,
1563
+ "grid_row": null,
1564
+ "grid_template_areas": null,
1565
+ "grid_template_columns": null,
1566
+ "grid_template_rows": null,
1567
+ "height": null,
1568
+ "justify_content": null,
1569
+ "justify_items": null,
1570
+ "left": null,
1571
+ "margin": null,
1572
+ "max_height": null,
1573
+ "max_width": null,
1574
+ "min_height": null,
1575
+ "min_width": null,
1576
+ "object_fit": null,
1577
+ "object_position": null,
1578
+ "order": null,
1579
+ "overflow": null,
1580
+ "overflow_x": null,
1581
+ "overflow_y": null,
1582
+ "padding": null,
1583
+ "right": null,
1584
+ "top": null,
1585
+ "visibility": null,
1586
+ "width": null
1587
+ }
1588
+ },
1589
+ "723acefae33d448199fa5c1a9ec3f246": {
1590
+ "model_module": "@jupyter-widgets/controls",
1591
+ "model_name": "HBoxModel",
1592
+ "state": {
1593
+ "_dom_classes": [],
1594
+ "_model_module": "@jupyter-widgets/controls",
1595
+ "_model_module_version": "1.5.0",
1596
+ "_model_name": "HBoxModel",
1597
+ "_view_count": null,
1598
+ "_view_module": "@jupyter-widgets/controls",
1599
+ "_view_module_version": "1.5.0",
1600
+ "_view_name": "HBoxView",
1601
+ "box_style": "",
1602
+ "children": [
1603
+ "IPY_MODEL_77273c2e4b4e4e4c8ee4b6b344749518",
1604
+ "IPY_MODEL_f0ac3b9b8f664479940c6ee18fc2f13e"
1605
+ ],
1606
+ "layout": "IPY_MODEL_32a5c82c7a9845c09c11bb4e30c2f1aa"
1607
+ }
1608
+ },
1609
+ "77273c2e4b4e4e4c8ee4b6b344749518": {
1610
+ "model_module": "@jupyter-widgets/controls",
1611
+ "model_name": "FloatProgressModel",
1612
+ "state": {
1613
+ "_dom_classes": [],
1614
+ "_model_module": "@jupyter-widgets/controls",
1615
+ "_model_module_version": "1.5.0",
1616
+ "_model_name": "FloatProgressModel",
1617
+ "_view_count": null,
1618
+ "_view_module": "@jupyter-widgets/controls",
1619
+ "_view_module_version": "1.5.0",
1620
+ "_view_name": "ProgressView",
1621
+ "bar_style": "success",
1622
+ "description": "100%",
1623
+ "description_tooltip": null,
1624
+ "layout": "IPY_MODEL_e592db98c0c34c5e800f5d7b6d3c099e",
1625
+ "max": 1000,
1626
+ "min": 0,
1627
+ "orientation": "horizontal",
1628
+ "style": "IPY_MODEL_393697738e724e9fad4d163de0a77840",
1629
+ "value": 1000
1630
+ }
1631
+ },
1632
+ "7e1325e57bf9417e93d7ef180794ab3c": {
1633
+ "model_module": "@jupyter-widgets/controls",
1634
+ "model_name": "FloatProgressModel",
1635
+ "state": {
1636
+ "_dom_classes": [],
1637
+ "_model_module": "@jupyter-widgets/controls",
1638
+ "_model_module_version": "1.5.0",
1639
+ "_model_name": "FloatProgressModel",
1640
+ "_view_count": null,
1641
+ "_view_module": "@jupyter-widgets/controls",
1642
+ "_view_module_version": "1.5.0",
1643
+ "_view_name": "ProgressView",
1644
+ "bar_style": "success",
1645
+ "description": "100%",
1646
+ "description_tooltip": null,
1647
+ "layout": "IPY_MODEL_7fb1118c0b4443b6b6dbb5803e9ec2e8",
1648
+ "max": 1000,
1649
+ "min": 0,
1650
+ "orientation": "horizontal",
1651
+ "style": "IPY_MODEL_ca722dcd857c433c9058585e31a1673d",
1652
+ "value": 1000
1653
+ }
1654
+ },
1655
+ "7fb1118c0b4443b6b6dbb5803e9ec2e8": {
1656
+ "model_module": "@jupyter-widgets/base",
1657
+ "model_name": "LayoutModel",
1658
+ "state": {
1659
+ "_model_module": "@jupyter-widgets/base",
1660
+ "_model_module_version": "1.2.0",
1661
+ "_model_name": "LayoutModel",
1662
+ "_view_count": null,
1663
+ "_view_module": "@jupyter-widgets/base",
1664
+ "_view_module_version": "1.2.0",
1665
+ "_view_name": "LayoutView",
1666
+ "align_content": null,
1667
+ "align_items": null,
1668
+ "align_self": null,
1669
+ "border": null,
1670
+ "bottom": null,
1671
+ "display": null,
1672
+ "flex": null,
1673
+ "flex_flow": null,
1674
+ "grid_area": null,
1675
+ "grid_auto_columns": null,
1676
+ "grid_auto_flow": null,
1677
+ "grid_auto_rows": null,
1678
+ "grid_column": null,
1679
+ "grid_gap": null,
1680
+ "grid_row": null,
1681
+ "grid_template_areas": null,
1682
+ "grid_template_columns": null,
1683
+ "grid_template_rows": null,
1684
+ "height": null,
1685
+ "justify_content": null,
1686
+ "justify_items": null,
1687
+ "left": null,
1688
+ "margin": null,
1689
+ "max_height": null,
1690
+ "max_width": null,
1691
+ "min_height": null,
1692
+ "min_width": null,
1693
+ "object_fit": null,
1694
+ "object_position": null,
1695
+ "order": null,
1696
+ "overflow": null,
1697
+ "overflow_x": null,
1698
+ "overflow_y": null,
1699
+ "padding": null,
1700
+ "right": null,
1701
+ "top": null,
1702
+ "visibility": null,
1703
+ "width": null
1704
+ }
1705
+ },
1706
+ "7fefe9e1121a43558d773500aef8935c": {
1707
+ "model_module": "@jupyter-widgets/base",
1708
+ "model_name": "LayoutModel",
1709
+ "state": {
1710
+ "_model_module": "@jupyter-widgets/base",
1711
+ "_model_module_version": "1.2.0",
1712
+ "_model_name": "LayoutModel",
1713
+ "_view_count": null,
1714
+ "_view_module": "@jupyter-widgets/base",
1715
+ "_view_module_version": "1.2.0",
1716
+ "_view_name": "LayoutView",
1717
+ "align_content": null,
1718
+ "align_items": null,
1719
+ "align_self": null,
1720
+ "border": null,
1721
+ "bottom": null,
1722
+ "display": null,
1723
+ "flex": null,
1724
+ "flex_flow": null,
1725
+ "grid_area": null,
1726
+ "grid_auto_columns": null,
1727
+ "grid_auto_flow": null,
1728
+ "grid_auto_rows": null,
1729
+ "grid_column": null,
1730
+ "grid_gap": null,
1731
+ "grid_row": null,
1732
+ "grid_template_areas": null,
1733
+ "grid_template_columns": null,
1734
+ "grid_template_rows": null,
1735
+ "height": null,
1736
+ "justify_content": null,
1737
+ "justify_items": null,
1738
+ "left": null,
1739
+ "margin": null,
1740
+ "max_height": null,
1741
+ "max_width": null,
1742
+ "min_height": null,
1743
+ "min_width": null,
1744
+ "object_fit": null,
1745
+ "object_position": null,
1746
+ "order": null,
1747
+ "overflow": null,
1748
+ "overflow_x": null,
1749
+ "overflow_y": null,
1750
+ "padding": null,
1751
+ "right": null,
1752
+ "top": null,
1753
+ "visibility": null,
1754
+ "width": null
1755
+ }
1756
+ },
1757
+ "863c5ce96db84e3da162072c9a13c913": {
1758
+ "model_module": "@jupyter-widgets/base",
1759
+ "model_name": "LayoutModel",
1760
+ "state": {
1761
+ "_model_module": "@jupyter-widgets/base",
1762
+ "_model_module_version": "1.2.0",
1763
+ "_model_name": "LayoutModel",
1764
+ "_view_count": null,
1765
+ "_view_module": "@jupyter-widgets/base",
1766
+ "_view_module_version": "1.2.0",
1767
+ "_view_name": "LayoutView",
1768
+ "align_content": null,
1769
+ "align_items": null,
1770
+ "align_self": null,
1771
+ "border": null,
1772
+ "bottom": null,
1773
+ "display": null,
1774
+ "flex": null,
1775
+ "flex_flow": null,
1776
+ "grid_area": null,
1777
+ "grid_auto_columns": null,
1778
+ "grid_auto_flow": null,
1779
+ "grid_auto_rows": null,
1780
+ "grid_column": null,
1781
+ "grid_gap": null,
1782
+ "grid_row": null,
1783
+ "grid_template_areas": null,
1784
+ "grid_template_columns": null,
1785
+ "grid_template_rows": null,
1786
+ "height": null,
1787
+ "justify_content": null,
1788
+ "justify_items": null,
1789
+ "left": null,
1790
+ "margin": null,
1791
+ "max_height": null,
1792
+ "max_width": null,
1793
+ "min_height": null,
1794
+ "min_width": null,
1795
+ "object_fit": null,
1796
+ "object_position": null,
1797
+ "order": null,
1798
+ "overflow": null,
1799
+ "overflow_x": null,
1800
+ "overflow_y": null,
1801
+ "padding": null,
1802
+ "right": null,
1803
+ "top": null,
1804
+ "visibility": null,
1805
+ "width": null
1806
+ }
1807
+ },
1808
+ "8968319cdaca476fb15c11a388dce39a": {
1809
+ "model_module": "@jupyter-widgets/controls",
1810
+ "model_name": "HBoxModel",
1811
+ "state": {
1812
+ "_dom_classes": [],
1813
+ "_model_module": "@jupyter-widgets/controls",
1814
+ "_model_module_version": "1.5.0",
1815
+ "_model_name": "HBoxModel",
1816
+ "_view_count": null,
1817
+ "_view_module": "@jupyter-widgets/controls",
1818
+ "_view_module_version": "1.5.0",
1819
+ "_view_name": "HBoxView",
1820
+ "box_style": "",
1821
+ "children": [
1822
+ "IPY_MODEL_a725734893004a45b61194f649f5f602",
1823
+ "IPY_MODEL_c4a24656d67844e995d3b8e175c6c497"
1824
+ ],
1825
+ "layout": "IPY_MODEL_863c5ce96db84e3da162072c9a13c913"
1826
+ }
1827
+ },
1828
+ "89fdda6e6688476495ca297bfe010bf8": {
1829
+ "model_module": "@jupyter-widgets/controls",
1830
+ "model_name": "DescriptionStyleModel",
1831
+ "state": {
1832
+ "_model_module": "@jupyter-widgets/controls",
1833
+ "_model_module_version": "1.5.0",
1834
+ "_model_name": "DescriptionStyleModel",
1835
+ "_view_count": null,
1836
+ "_view_module": "@jupyter-widgets/base",
1837
+ "_view_module_version": "1.2.0",
1838
+ "_view_name": "StyleView",
1839
+ "description_width": ""
1840
+ }
1841
+ },
1842
+ "8b961844b5004905922531bd805a9d57": {
1843
+ "model_module": "@jupyter-widgets/base",
1844
+ "model_name": "LayoutModel",
1845
+ "state": {
1846
+ "_model_module": "@jupyter-widgets/base",
1847
+ "_model_module_version": "1.2.0",
1848
+ "_model_name": "LayoutModel",
1849
+ "_view_count": null,
1850
+ "_view_module": "@jupyter-widgets/base",
1851
+ "_view_module_version": "1.2.0",
1852
+ "_view_name": "LayoutView",
1853
+ "align_content": null,
1854
+ "align_items": null,
1855
+ "align_self": null,
1856
+ "border": null,
1857
+ "bottom": null,
1858
+ "display": null,
1859
+ "flex": null,
1860
+ "flex_flow": null,
1861
+ "grid_area": null,
1862
+ "grid_auto_columns": null,
1863
+ "grid_auto_flow": null,
1864
+ "grid_auto_rows": null,
1865
+ "grid_column": null,
1866
+ "grid_gap": null,
1867
+ "grid_row": null,
1868
+ "grid_template_areas": null,
1869
+ "grid_template_columns": null,
1870
+ "grid_template_rows": null,
1871
+ "height": null,
1872
+ "justify_content": null,
1873
+ "justify_items": null,
1874
+ "left": null,
1875
+ "margin": null,
1876
+ "max_height": null,
1877
+ "max_width": null,
1878
+ "min_height": null,
1879
+ "min_width": null,
1880
+ "object_fit": null,
1881
+ "object_position": null,
1882
+ "order": null,
1883
+ "overflow": null,
1884
+ "overflow_x": null,
1885
+ "overflow_y": null,
1886
+ "padding": null,
1887
+ "right": null,
1888
+ "top": null,
1889
+ "visibility": null,
1890
+ "width": null
1891
+ }
1892
+ },
1893
+ "8cfbd3b14b23417993270f851a2d8ff9": {
1894
+ "model_module": "@jupyter-widgets/base",
1895
+ "model_name": "LayoutModel",
1896
+ "state": {
1897
+ "_model_module": "@jupyter-widgets/base",
1898
+ "_model_module_version": "1.2.0",
1899
+ "_model_name": "LayoutModel",
1900
+ "_view_count": null,
1901
+ "_view_module": "@jupyter-widgets/base",
1902
+ "_view_module_version": "1.2.0",
1903
+ "_view_name": "LayoutView",
1904
+ "align_content": null,
1905
+ "align_items": null,
1906
+ "align_self": null,
1907
+ "border": null,
1908
+ "bottom": null,
1909
+ "display": null,
1910
+ "flex": null,
1911
+ "flex_flow": null,
1912
+ "grid_area": null,
1913
+ "grid_auto_columns": null,
1914
+ "grid_auto_flow": null,
1915
+ "grid_auto_rows": null,
1916
+ "grid_column": null,
1917
+ "grid_gap": null,
1918
+ "grid_row": null,
1919
+ "grid_template_areas": null,
1920
+ "grid_template_columns": null,
1921
+ "grid_template_rows": null,
1922
+ "height": null,
1923
+ "justify_content": null,
1924
+ "justify_items": null,
1925
+ "left": null,
1926
+ "margin": null,
1927
+ "max_height": null,
1928
+ "max_width": null,
1929
+ "min_height": null,
1930
+ "min_width": null,
1931
+ "object_fit": null,
1932
+ "object_position": null,
1933
+ "order": null,
1934
+ "overflow": null,
1935
+ "overflow_x": null,
1936
+ "overflow_y": null,
1937
+ "padding": null,
1938
+ "right": null,
1939
+ "top": null,
1940
+ "visibility": null,
1941
+ "width": null
1942
+ }
1943
+ },
1944
+ "929946fdfaa04cf59d3b31cf92fc08d1": {
1945
+ "model_module": "@jupyter-widgets/controls",
1946
+ "model_name": "HTMLModel",
1947
+ "state": {
1948
+ "_dom_classes": [],
1949
+ "_model_module": "@jupyter-widgets/controls",
1950
+ "_model_module_version": "1.5.0",
1951
+ "_model_name": "HTMLModel",
1952
+ "_view_count": null,
1953
+ "_view_module": "@jupyter-widgets/controls",
1954
+ "_view_module_version": "1.5.0",
1955
+ "_view_name": "HTMLView",
1956
+ "description": "",
1957
+ "description_tooltip": null,
1958
+ "layout": "IPY_MODEL_cda72c45821a4eb89f1a3ab5510b26d3",
1959
+ "placeholder": "​",
1960
+ "style": "IPY_MODEL_89fdda6e6688476495ca297bfe010bf8",
1961
+ "value": " 1000/1000 [00:08&lt;00:00, 123.32ex/s]"
1962
+ }
1963
+ },
1964
+ "a725734893004a45b61194f649f5f602": {
1965
+ "model_module": "@jupyter-widgets/controls",
1966
+ "model_name": "FloatProgressModel",
1967
+ "state": {
1968
+ "_dom_classes": [],
1969
+ "_model_module": "@jupyter-widgets/controls",
1970
+ "_model_module_version": "1.5.0",
1971
+ "_model_name": "FloatProgressModel",
1972
+ "_view_count": null,
1973
+ "_view_module": "@jupyter-widgets/controls",
1974
+ "_view_module_version": "1.5.0",
1975
+ "_view_name": "ProgressView",
1976
+ "bar_style": "success",
1977
+ "description": "100%",
1978
+ "description_tooltip": null,
1979
+ "layout": "IPY_MODEL_afc33fa78b5d440192c435bfca6f7914",
1980
+ "max": 1000,
1981
+ "min": 0,
1982
+ "orientation": "horizontal",
1983
+ "style": "IPY_MODEL_4f5b06c3a5e44c6cade5bf83634d9f69",
1984
+ "value": 1000
1985
+ }
1986
+ },
1987
+ "aa5c0d374889482697fc0f7ce9c81afe": {
1988
+ "model_module": "@jupyter-widgets/controls",
1989
+ "model_name": "ProgressStyleModel",
1990
+ "state": {
1991
+ "_model_module": "@jupyter-widgets/controls",
1992
+ "_model_module_version": "1.5.0",
1993
+ "_model_name": "ProgressStyleModel",
1994
+ "_view_count": null,
1995
+ "_view_module": "@jupyter-widgets/base",
1996
+ "_view_module_version": "1.2.0",
1997
+ "_view_name": "StyleView",
1998
+ "bar_color": null,
1999
+ "description_width": "initial"
2000
+ }
2001
+ },
2002
+ "afc33fa78b5d440192c435bfca6f7914": {
2003
+ "model_module": "@jupyter-widgets/base",
2004
+ "model_name": "LayoutModel",
2005
+ "state": {
2006
+ "_model_module": "@jupyter-widgets/base",
2007
+ "_model_module_version": "1.2.0",
2008
+ "_model_name": "LayoutModel",
2009
+ "_view_count": null,
2010
+ "_view_module": "@jupyter-widgets/base",
2011
+ "_view_module_version": "1.2.0",
2012
+ "_view_name": "LayoutView",
2013
+ "align_content": null,
2014
+ "align_items": null,
2015
+ "align_self": null,
2016
+ "border": null,
2017
+ "bottom": null,
2018
+ "display": null,
2019
+ "flex": null,
2020
+ "flex_flow": null,
2021
+ "grid_area": null,
2022
+ "grid_auto_columns": null,
2023
+ "grid_auto_flow": null,
2024
+ "grid_auto_rows": null,
2025
+ "grid_column": null,
2026
+ "grid_gap": null,
2027
+ "grid_row": null,
2028
+ "grid_template_areas": null,
2029
+ "grid_template_columns": null,
2030
+ "grid_template_rows": null,
2031
+ "height": null,
2032
+ "justify_content": null,
2033
+ "justify_items": null,
2034
+ "left": null,
2035
+ "margin": null,
2036
+ "max_height": null,
2037
+ "max_width": null,
2038
+ "min_height": null,
2039
+ "min_width": null,
2040
+ "object_fit": null,
2041
+ "object_position": null,
2042
+ "order": null,
2043
+ "overflow": null,
2044
+ "overflow_x": null,
2045
+ "overflow_y": null,
2046
+ "padding": null,
2047
+ "right": null,
2048
+ "top": null,
2049
+ "visibility": null,
2050
+ "width": null
2051
+ }
2052
+ },
2053
+ "afedd2328cf141f78775e4cfa7758267": {
2054
+ "model_module": "@jupyter-widgets/base",
2055
+ "model_name": "LayoutModel",
2056
+ "state": {
2057
+ "_model_module": "@jupyter-widgets/base",
2058
+ "_model_module_version": "1.2.0",
2059
+ "_model_name": "LayoutModel",
2060
+ "_view_count": null,
2061
+ "_view_module": "@jupyter-widgets/base",
2062
+ "_view_module_version": "1.2.0",
2063
+ "_view_name": "LayoutView",
2064
+ "align_content": null,
2065
+ "align_items": null,
2066
+ "align_self": null,
2067
+ "border": null,
2068
+ "bottom": null,
2069
+ "display": null,
2070
+ "flex": null,
2071
+ "flex_flow": null,
2072
+ "grid_area": null,
2073
+ "grid_auto_columns": null,
2074
+ "grid_auto_flow": null,
2075
+ "grid_auto_rows": null,
2076
+ "grid_column": null,
2077
+ "grid_gap": null,
2078
+ "grid_row": null,
2079
+ "grid_template_areas": null,
2080
+ "grid_template_columns": null,
2081
+ "grid_template_rows": null,
2082
+ "height": null,
2083
+ "justify_content": null,
2084
+ "justify_items": null,
2085
+ "left": null,
2086
+ "margin": null,
2087
+ "max_height": null,
2088
+ "max_width": null,
2089
+ "min_height": null,
2090
+ "min_width": null,
2091
+ "object_fit": null,
2092
+ "object_position": null,
2093
+ "order": null,
2094
+ "overflow": null,
2095
+ "overflow_x": null,
2096
+ "overflow_y": null,
2097
+ "padding": null,
2098
+ "right": null,
2099
+ "top": null,
2100
+ "visibility": null,
2101
+ "width": null
2102
+ }
2103
+ },
2104
+ "b39b85d8cb05418aa92e8476ad02f755": {
2105
+ "model_module": "@jupyter-widgets/controls",
2106
+ "model_name": "FloatProgressModel",
2107
+ "state": {
2108
+ "_dom_classes": [],
2109
+ "_model_module": "@jupyter-widgets/controls",
2110
+ "_model_module_version": "1.5.0",
2111
+ "_model_name": "FloatProgressModel",
2112
+ "_view_count": null,
2113
+ "_view_module": "@jupyter-widgets/controls",
2114
+ "_view_module_version": "1.5.0",
2115
+ "_view_name": "ProgressView",
2116
+ "bar_style": "success",
2117
+ "description": "100%",
2118
+ "description_tooltip": null,
2119
+ "layout": "IPY_MODEL_8b961844b5004905922531bd805a9d57",
2120
+ "max": 1000,
2121
+ "min": 0,
2122
+ "orientation": "horizontal",
2123
+ "style": "IPY_MODEL_3abb36da57c841838867c56e2a3a325b",
2124
+ "value": 1000
2125
+ }
2126
+ },
2127
+ "b4c6a18610734036a16a14a43174c52e": {
2128
+ "model_module": "@jupyter-widgets/base",
2129
+ "model_name": "LayoutModel",
2130
+ "state": {
2131
+ "_model_module": "@jupyter-widgets/base",
2132
+ "_model_module_version": "1.2.0",
2133
+ "_model_name": "LayoutModel",
2134
+ "_view_count": null,
2135
+ "_view_module": "@jupyter-widgets/base",
2136
+ "_view_module_version": "1.2.0",
2137
+ "_view_name": "LayoutView",
2138
+ "align_content": null,
2139
+ "align_items": null,
2140
+ "align_self": null,
2141
+ "border": null,
2142
+ "bottom": null,
2143
+ "display": null,
2144
+ "flex": null,
2145
+ "flex_flow": null,
2146
+ "grid_area": null,
2147
+ "grid_auto_columns": null,
2148
+ "grid_auto_flow": null,
2149
+ "grid_auto_rows": null,
2150
+ "grid_column": null,
2151
+ "grid_gap": null,
2152
+ "grid_row": null,
2153
+ "grid_template_areas": null,
2154
+ "grid_template_columns": null,
2155
+ "grid_template_rows": null,
2156
+ "height": null,
2157
+ "justify_content": null,
2158
+ "justify_items": null,
2159
+ "left": null,
2160
+ "margin": null,
2161
+ "max_height": null,
2162
+ "max_width": null,
2163
+ "min_height": null,
2164
+ "min_width": null,
2165
+ "object_fit": null,
2166
+ "object_position": null,
2167
+ "order": null,
2168
+ "overflow": null,
2169
+ "overflow_x": null,
2170
+ "overflow_y": null,
2171
+ "padding": null,
2172
+ "right": null,
2173
+ "top": null,
2174
+ "visibility": null,
2175
+ "width": null
2176
+ }
2177
+ },
2178
+ "c42644a4e6184a1cbdb2b453b5dbb7d6": {
2179
+ "model_module": "@jupyter-widgets/controls",
2180
+ "model_name": "HBoxModel",
2181
+ "state": {
2182
+ "_dom_classes": [],
2183
+ "_model_module": "@jupyter-widgets/controls",
2184
+ "_model_module_version": "1.5.0",
2185
+ "_model_name": "HBoxModel",
2186
+ "_view_count": null,
2187
+ "_view_module": "@jupyter-widgets/controls",
2188
+ "_view_module_version": "1.5.0",
2189
+ "_view_name": "HBoxView",
2190
+ "box_style": "",
2191
+ "children": [
2192
+ "IPY_MODEL_e8f1abd85f3e49f991d4c1312ffd416b",
2193
+ "IPY_MODEL_929946fdfaa04cf59d3b31cf92fc08d1"
2194
+ ],
2195
+ "layout": "IPY_MODEL_364ba960eb474c9084cc71851594d345"
2196
+ }
2197
+ },
2198
+ "c4a24656d67844e995d3b8e175c6c497": {
2199
+ "model_module": "@jupyter-widgets/controls",
2200
+ "model_name": "HTMLModel",
2201
+ "state": {
2202
+ "_dom_classes": [],
2203
+ "_model_module": "@jupyter-widgets/controls",
2204
+ "_model_module_version": "1.5.0",
2205
+ "_model_name": "HTMLModel",
2206
+ "_view_count": null,
2207
+ "_view_module": "@jupyter-widgets/controls",
2208
+ "_view_module_version": "1.5.0",
2209
+ "_view_name": "HTMLView",
2210
+ "description": "",
2211
+ "description_tooltip": null,
2212
+ "layout": "IPY_MODEL_b4c6a18610734036a16a14a43174c52e",
2213
+ "placeholder": "​",
2214
+ "style": "IPY_MODEL_f37bd346f8614fec92d6c5b5e9b66d2f",
2215
+ "value": " 1000/1000 [01:41&lt;00:00, 9.86ex/s]"
2216
+ }
2217
+ },
2218
+ "ca722dcd857c433c9058585e31a1673d": {
2219
+ "model_module": "@jupyter-widgets/controls",
2220
+ "model_name": "ProgressStyleModel",
2221
+ "state": {
2222
+ "_model_module": "@jupyter-widgets/controls",
2223
+ "_model_module_version": "1.5.0",
2224
+ "_model_name": "ProgressStyleModel",
2225
+ "_view_count": null,
2226
+ "_view_module": "@jupyter-widgets/base",
2227
+ "_view_module_version": "1.2.0",
2228
+ "_view_name": "StyleView",
2229
+ "bar_color": null,
2230
+ "description_width": "initial"
2231
+ }
2232
+ },
2233
+ "cda72c45821a4eb89f1a3ab5510b26d3": {
2234
+ "model_module": "@jupyter-widgets/base",
2235
+ "model_name": "LayoutModel",
2236
+ "state": {
2237
+ "_model_module": "@jupyter-widgets/base",
2238
+ "_model_module_version": "1.2.0",
2239
+ "_model_name": "LayoutModel",
2240
+ "_view_count": null,
2241
+ "_view_module": "@jupyter-widgets/base",
2242
+ "_view_module_version": "1.2.0",
2243
+ "_view_name": "LayoutView",
2244
+ "align_content": null,
2245
+ "align_items": null,
2246
+ "align_self": null,
2247
+ "border": null,
2248
+ "bottom": null,
2249
+ "display": null,
2250
+ "flex": null,
2251
+ "flex_flow": null,
2252
+ "grid_area": null,
2253
+ "grid_auto_columns": null,
2254
+ "grid_auto_flow": null,
2255
+ "grid_auto_rows": null,
2256
+ "grid_column": null,
2257
+ "grid_gap": null,
2258
+ "grid_row": null,
2259
+ "grid_template_areas": null,
2260
+ "grid_template_columns": null,
2261
+ "grid_template_rows": null,
2262
+ "height": null,
2263
+ "justify_content": null,
2264
+ "justify_items": null,
2265
+ "left": null,
2266
+ "margin": null,
2267
+ "max_height": null,
2268
+ "max_width": null,
2269
+ "min_height": null,
2270
+ "min_width": null,
2271
+ "object_fit": null,
2272
+ "object_position": null,
2273
+ "order": null,
2274
+ "overflow": null,
2275
+ "overflow_x": null,
2276
+ "overflow_y": null,
2277
+ "padding": null,
2278
+ "right": null,
2279
+ "top": null,
2280
+ "visibility": null,
2281
+ "width": null
2282
+ }
2283
+ },
2284
+ "e592db98c0c34c5e800f5d7b6d3c099e": {
2285
+ "model_module": "@jupyter-widgets/base",
2286
+ "model_name": "LayoutModel",
2287
+ "state": {
2288
+ "_model_module": "@jupyter-widgets/base",
2289
+ "_model_module_version": "1.2.0",
2290
+ "_model_name": "LayoutModel",
2291
+ "_view_count": null,
2292
+ "_view_module": "@jupyter-widgets/base",
2293
+ "_view_module_version": "1.2.0",
2294
+ "_view_name": "LayoutView",
2295
+ "align_content": null,
2296
+ "align_items": null,
2297
+ "align_self": null,
2298
+ "border": null,
2299
+ "bottom": null,
2300
+ "display": null,
2301
+ "flex": null,
2302
+ "flex_flow": null,
2303
+ "grid_area": null,
2304
+ "grid_auto_columns": null,
2305
+ "grid_auto_flow": null,
2306
+ "grid_auto_rows": null,
2307
+ "grid_column": null,
2308
+ "grid_gap": null,
2309
+ "grid_row": null,
2310
+ "grid_template_areas": null,
2311
+ "grid_template_columns": null,
2312
+ "grid_template_rows": null,
2313
+ "height": null,
2314
+ "justify_content": null,
2315
+ "justify_items": null,
2316
+ "left": null,
2317
+ "margin": null,
2318
+ "max_height": null,
2319
+ "max_width": null,
2320
+ "min_height": null,
2321
+ "min_width": null,
2322
+ "object_fit": null,
2323
+ "object_position": null,
2324
+ "order": null,
2325
+ "overflow": null,
2326
+ "overflow_x": null,
2327
+ "overflow_y": null,
2328
+ "padding": null,
2329
+ "right": null,
2330
+ "top": null,
2331
+ "visibility": null,
2332
+ "width": null
2333
+ }
2334
+ },
2335
+ "e8f1abd85f3e49f991d4c1312ffd416b": {
2336
+ "model_module": "@jupyter-widgets/controls",
2337
+ "model_name": "FloatProgressModel",
2338
+ "state": {
2339
+ "_dom_classes": [],
2340
+ "_model_module": "@jupyter-widgets/controls",
2341
+ "_model_module_version": "1.5.0",
2342
+ "_model_name": "FloatProgressModel",
2343
+ "_view_count": null,
2344
+ "_view_module": "@jupyter-widgets/controls",
2345
+ "_view_module_version": "1.5.0",
2346
+ "_view_name": "ProgressView",
2347
+ "bar_style": "success",
2348
+ "description": "100%",
2349
+ "description_tooltip": null,
2350
+ "layout": "IPY_MODEL_ff444b253e9a40e5bec755926d83740f",
2351
+ "max": 1000,
2352
+ "min": 0,
2353
+ "orientation": "horizontal",
2354
+ "style": "IPY_MODEL_aa5c0d374889482697fc0f7ce9c81afe",
2355
+ "value": 1000
2356
+ }
2357
+ },
2358
+ "ef4e12f29f1e458f811a400faf21bdcc": {
2359
+ "model_module": "@jupyter-widgets/controls",
2360
+ "model_name": "HTMLModel",
2361
+ "state": {
2362
+ "_dom_classes": [],
2363
+ "_model_module": "@jupyter-widgets/controls",
2364
+ "_model_module_version": "1.5.0",
2365
+ "_model_name": "HTMLModel",
2366
+ "_view_count": null,
2367
+ "_view_module": "@jupyter-widgets/controls",
2368
+ "_view_module_version": "1.5.0",
2369
+ "_view_name": "HTMLView",
2370
+ "description": "",
2371
+ "description_tooltip": null,
2372
+ "layout": "IPY_MODEL_39029f730ae140c7902fca6dac5361ad",
2373
+ "placeholder": "​",
2374
+ "style": "IPY_MODEL_5b6dbe662ca24834b7678638e101e1ff",
2375
+ "value": " 1000/1000 [01:25&lt;00:00, 11.68ex/s]"
2376
+ }
2377
+ },
2378
+ "f0ac3b9b8f664479940c6ee18fc2f13e": {
2379
+ "model_module": "@jupyter-widgets/controls",
2380
+ "model_name": "HTMLModel",
2381
+ "state": {
2382
+ "_dom_classes": [],
2383
+ "_model_module": "@jupyter-widgets/controls",
2384
+ "_model_module_version": "1.5.0",
2385
+ "_model_name": "HTMLModel",
2386
+ "_view_count": null,
2387
+ "_view_module": "@jupyter-widgets/controls",
2388
+ "_view_module_version": "1.5.0",
2389
+ "_view_name": "HTMLView",
2390
+ "description": "",
2391
+ "description_tooltip": null,
2392
+ "layout": "IPY_MODEL_7fefe9e1121a43558d773500aef8935c",
2393
+ "placeholder": "​",
2394
+ "style": "IPY_MODEL_568f11b4462f4b4e95f3ad5947bb275e",
2395
+ "value": " 1000/1000 [01:24&lt;00:00, 11.77ex/s]"
2396
+ }
2397
+ },
2398
+ "f0e34f2bf626434fa73f0def26b3d1a5": {
2399
+ "model_module": "@jupyter-widgets/controls",
2400
+ "model_name": "ProgressStyleModel",
2401
+ "state": {
2402
+ "_model_module": "@jupyter-widgets/controls",
2403
+ "_model_module_version": "1.5.0",
2404
+ "_model_name": "ProgressStyleModel",
2405
+ "_view_count": null,
2406
+ "_view_module": "@jupyter-widgets/base",
2407
+ "_view_module_version": "1.2.0",
2408
+ "_view_name": "StyleView",
2409
+ "bar_color": null,
2410
+ "description_width": "initial"
2411
+ }
2412
+ },
2413
+ "f37bd346f8614fec92d6c5b5e9b66d2f": {
2414
+ "model_module": "@jupyter-widgets/controls",
2415
+ "model_name": "DescriptionStyleModel",
2416
+ "state": {
2417
+ "_model_module": "@jupyter-widgets/controls",
2418
+ "_model_module_version": "1.5.0",
2419
+ "_model_name": "DescriptionStyleModel",
2420
+ "_view_count": null,
2421
+ "_view_module": "@jupyter-widgets/base",
2422
+ "_view_module_version": "1.2.0",
2423
+ "_view_name": "StyleView",
2424
+ "description_width": ""
2425
+ }
2426
+ },
2427
+ "ff444b253e9a40e5bec755926d83740f": {
2428
+ "model_module": "@jupyter-widgets/base",
2429
+ "model_name": "LayoutModel",
2430
+ "state": {
2431
+ "_model_module": "@jupyter-widgets/base",
2432
+ "_model_module_version": "1.2.0",
2433
+ "_model_name": "LayoutModel",
2434
+ "_view_count": null,
2435
+ "_view_module": "@jupyter-widgets/base",
2436
+ "_view_module_version": "1.2.0",
2437
+ "_view_name": "LayoutView",
2438
+ "align_content": null,
2439
+ "align_items": null,
2440
+ "align_self": null,
2441
+ "border": null,
2442
+ "bottom": null,
2443
+ "display": null,
2444
+ "flex": null,
2445
+ "flex_flow": null,
2446
+ "grid_area": null,
2447
+ "grid_auto_columns": null,
2448
+ "grid_auto_flow": null,
2449
+ "grid_auto_rows": null,
2450
+ "grid_column": null,
2451
+ "grid_gap": null,
2452
+ "grid_row": null,
2453
+ "grid_template_areas": null,
2454
+ "grid_template_columns": null,
2455
+ "grid_template_rows": null,
2456
+ "height": null,
2457
+ "justify_content": null,
2458
+ "justify_items": null,
2459
+ "left": null,
2460
+ "margin": null,
2461
+ "max_height": null,
2462
+ "max_width": null,
2463
+ "min_height": null,
2464
+ "min_width": null,
2465
+ "object_fit": null,
2466
+ "object_position": null,
2467
+ "order": null,
2468
+ "overflow": null,
2469
+ "overflow_x": null,
2470
+ "overflow_y": null,
2471
+ "padding": null,
2472
+ "right": null,
2473
+ "top": null,
2474
+ "visibility": null,
2475
+ "width": null
2476
+ }
2477
+ }
2478
+ }
2479
+ }
2480
+ },
2481
+ "nbformat": 4,
2482
+ "nbformat_minor": 1
2483
+ }
Transformer Mechanism/QA/tf/W4A3_UGL/QA_dataset.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/._dataset_dict.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/._test ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/._train ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "test"]}
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._dataset.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a591d9521aff12eea1e7ee705de14a1c50ae25b9c5de477d9bcdd56c5986e83e
3
+ size 212
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._dataset_info.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/._state.json ADDED
Binary file (212 Bytes). View file
 
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-26c237c56fc0b951.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36347fc2d623e02c4b5b1a365abadea94bb73d145a3ea91a3d0f02da01385d9e
3
+ size 326328
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-6b23a7f03ef9fdb4.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e559136154bac6bc023887fc2b04d5a6ac67121e31f2c1969bfa88b19d7d895
3
+ size 342632
Transformer Mechanism/QA/tf/W4A3_UGL/data/test/cache-c9959a793a67abd8.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e1b69377781f5b617299b386b6b1185d60b4ae9c443dc12c4433dd7a98b8e2
3
+ size 497544