aryn25 commited on
Commit
2a17ef2
·
verified ·
1 Parent(s): 57d1d5c

Delete fine_tuned_model.ipynb

Browse files
Files changed (1) hide show
  1. fine_tuned_model.ipynb +0 -2057
fine_tuned_model.ipynb DELETED
@@ -1,2057 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": []
7
- },
8
- "kernelspec": {
9
- "name": "python3",
10
- "display_name": "Python 3"
11
- },
12
- "language_info": {
13
- "name": "python"
14
- },
15
- "widgets": {
16
- "application/vnd.jupyter.widget-state+json": {
17
- "b4906906681c43ba89fb34066e11b4ac": {
18
- "model_module": "@jupyter-widgets/controls",
19
- "model_name": "HBoxModel",
20
- "model_module_version": "1.5.0",
21
- "state": {
22
- "_dom_classes": [],
23
- "_model_module": "@jupyter-widgets/controls",
24
- "_model_module_version": "1.5.0",
25
- "_model_name": "HBoxModel",
26
- "_view_count": null,
27
- "_view_module": "@jupyter-widgets/controls",
28
- "_view_module_version": "1.5.0",
29
- "_view_name": "HBoxView",
30
- "box_style": "",
31
- "children": [
32
- "IPY_MODEL_fcdcced29331494e90e1d92488a02ac5",
33
- "IPY_MODEL_39ed42acf1eb483db84485fd11557ee7",
34
- "IPY_MODEL_db4bab5e72bd4973a14b17ce9af0514e"
35
- ],
36
- "layout": "IPY_MODEL_8d9c237dc1b844eaa53081c4a1e22d2d"
37
- }
38
- },
39
- "fcdcced29331494e90e1d92488a02ac5": {
40
- "model_module": "@jupyter-widgets/controls",
41
- "model_name": "HTMLModel",
42
- "model_module_version": "1.5.0",
43
- "state": {
44
- "_dom_classes": [],
45
- "_model_module": "@jupyter-widgets/controls",
46
- "_model_module_version": "1.5.0",
47
- "_model_name": "HTMLModel",
48
- "_view_count": null,
49
- "_view_module": "@jupyter-widgets/controls",
50
- "_view_module_version": "1.5.0",
51
- "_view_name": "HTMLView",
52
- "description": "",
53
- "description_tooltip": null,
54
- "layout": "IPY_MODEL_98946dd1aa624168a9e50073c37996ad",
55
- "placeholder": "​",
56
- "style": "IPY_MODEL_adbbd9c4fd8f475ea522142e85aa4197",
57
- "value": "tokenizer_config.json: 100%"
58
- }
59
- },
60
- "39ed42acf1eb483db84485fd11557ee7": {
61
- "model_module": "@jupyter-widgets/controls",
62
- "model_name": "FloatProgressModel",
63
- "model_module_version": "1.5.0",
64
- "state": {
65
- "_dom_classes": [],
66
- "_model_module": "@jupyter-widgets/controls",
67
- "_model_module_version": "1.5.0",
68
- "_model_name": "FloatProgressModel",
69
- "_view_count": null,
70
- "_view_module": "@jupyter-widgets/controls",
71
- "_view_module_version": "1.5.0",
72
- "_view_name": "ProgressView",
73
- "bar_style": "success",
74
- "description": "",
75
- "description_tooltip": null,
76
- "layout": "IPY_MODEL_1e80c6fed76e4d56b1d0950b90040131",
77
- "max": 48,
78
- "min": 0,
79
- "orientation": "horizontal",
80
- "style": "IPY_MODEL_28c8f58a19d14f0fb544a6b30adc4481",
81
- "value": 48
82
- }
83
- },
84
- "db4bab5e72bd4973a14b17ce9af0514e": {
85
- "model_module": "@jupyter-widgets/controls",
86
- "model_name": "HTMLModel",
87
- "model_module_version": "1.5.0",
88
- "state": {
89
- "_dom_classes": [],
90
- "_model_module": "@jupyter-widgets/controls",
91
- "_model_module_version": "1.5.0",
92
- "_model_name": "HTMLModel",
93
- "_view_count": null,
94
- "_view_module": "@jupyter-widgets/controls",
95
- "_view_module_version": "1.5.0",
96
- "_view_name": "HTMLView",
97
- "description": "",
98
- "description_tooltip": null,
99
- "layout": "IPY_MODEL_253c42626cc04cd19300ff9cc6372c94",
100
- "placeholder": "​",
101
- "style": "IPY_MODEL_7470f834fbe141b188ed48c47faca306",
102
- "value": " 48.0/48.0 [00:00<00:00, 3.44kB/s]"
103
- }
104
- },
105
- "8d9c237dc1b844eaa53081c4a1e22d2d": {
106
- "model_module": "@jupyter-widgets/base",
107
- "model_name": "LayoutModel",
108
- "model_module_version": "1.2.0",
109
- "state": {
110
- "_model_module": "@jupyter-widgets/base",
111
- "_model_module_version": "1.2.0",
112
- "_model_name": "LayoutModel",
113
- "_view_count": null,
114
- "_view_module": "@jupyter-widgets/base",
115
- "_view_module_version": "1.2.0",
116
- "_view_name": "LayoutView",
117
- "align_content": null,
118
- "align_items": null,
119
- "align_self": null,
120
- "border": null,
121
- "bottom": null,
122
- "display": null,
123
- "flex": null,
124
- "flex_flow": null,
125
- "grid_area": null,
126
- "grid_auto_columns": null,
127
- "grid_auto_flow": null,
128
- "grid_auto_rows": null,
129
- "grid_column": null,
130
- "grid_gap": null,
131
- "grid_row": null,
132
- "grid_template_areas": null,
133
- "grid_template_columns": null,
134
- "grid_template_rows": null,
135
- "height": null,
136
- "justify_content": null,
137
- "justify_items": null,
138
- "left": null,
139
- "margin": null,
140
- "max_height": null,
141
- "max_width": null,
142
- "min_height": null,
143
- "min_width": null,
144
- "object_fit": null,
145
- "object_position": null,
146
- "order": null,
147
- "overflow": null,
148
- "overflow_x": null,
149
- "overflow_y": null,
150
- "padding": null,
151
- "right": null,
152
- "top": null,
153
- "visibility": null,
154
- "width": null
155
- }
156
- },
157
- "98946dd1aa624168a9e50073c37996ad": {
158
- "model_module": "@jupyter-widgets/base",
159
- "model_name": "LayoutModel",
160
- "model_module_version": "1.2.0",
161
- "state": {
162
- "_model_module": "@jupyter-widgets/base",
163
- "_model_module_version": "1.2.0",
164
- "_model_name": "LayoutModel",
165
- "_view_count": null,
166
- "_view_module": "@jupyter-widgets/base",
167
- "_view_module_version": "1.2.0",
168
- "_view_name": "LayoutView",
169
- "align_content": null,
170
- "align_items": null,
171
- "align_self": null,
172
- "border": null,
173
- "bottom": null,
174
- "display": null,
175
- "flex": null,
176
- "flex_flow": null,
177
- "grid_area": null,
178
- "grid_auto_columns": null,
179
- "grid_auto_flow": null,
180
- "grid_auto_rows": null,
181
- "grid_column": null,
182
- "grid_gap": null,
183
- "grid_row": null,
184
- "grid_template_areas": null,
185
- "grid_template_columns": null,
186
- "grid_template_rows": null,
187
- "height": null,
188
- "justify_content": null,
189
- "justify_items": null,
190
- "left": null,
191
- "margin": null,
192
- "max_height": null,
193
- "max_width": null,
194
- "min_height": null,
195
- "min_width": null,
196
- "object_fit": null,
197
- "object_position": null,
198
- "order": null,
199
- "overflow": null,
200
- "overflow_x": null,
201
- "overflow_y": null,
202
- "padding": null,
203
- "right": null,
204
- "top": null,
205
- "visibility": null,
206
- "width": null
207
- }
208
- },
209
- "adbbd9c4fd8f475ea522142e85aa4197": {
210
- "model_module": "@jupyter-widgets/controls",
211
- "model_name": "DescriptionStyleModel",
212
- "model_module_version": "1.5.0",
213
- "state": {
214
- "_model_module": "@jupyter-widgets/controls",
215
- "_model_module_version": "1.5.0",
216
- "_model_name": "DescriptionStyleModel",
217
- "_view_count": null,
218
- "_view_module": "@jupyter-widgets/base",
219
- "_view_module_version": "1.2.0",
220
- "_view_name": "StyleView",
221
- "description_width": ""
222
- }
223
- },
224
- "1e80c6fed76e4d56b1d0950b90040131": {
225
- "model_module": "@jupyter-widgets/base",
226
- "model_name": "LayoutModel",
227
- "model_module_version": "1.2.0",
228
- "state": {
229
- "_model_module": "@jupyter-widgets/base",
230
- "_model_module_version": "1.2.0",
231
- "_model_name": "LayoutModel",
232
- "_view_count": null,
233
- "_view_module": "@jupyter-widgets/base",
234
- "_view_module_version": "1.2.0",
235
- "_view_name": "LayoutView",
236
- "align_content": null,
237
- "align_items": null,
238
- "align_self": null,
239
- "border": null,
240
- "bottom": null,
241
- "display": null,
242
- "flex": null,
243
- "flex_flow": null,
244
- "grid_area": null,
245
- "grid_auto_columns": null,
246
- "grid_auto_flow": null,
247
- "grid_auto_rows": null,
248
- "grid_column": null,
249
- "grid_gap": null,
250
- "grid_row": null,
251
- "grid_template_areas": null,
252
- "grid_template_columns": null,
253
- "grid_template_rows": null,
254
- "height": null,
255
- "justify_content": null,
256
- "justify_items": null,
257
- "left": null,
258
- "margin": null,
259
- "max_height": null,
260
- "max_width": null,
261
- "min_height": null,
262
- "min_width": null,
263
- "object_fit": null,
264
- "object_position": null,
265
- "order": null,
266
- "overflow": null,
267
- "overflow_x": null,
268
- "overflow_y": null,
269
- "padding": null,
270
- "right": null,
271
- "top": null,
272
- "visibility": null,
273
- "width": null
274
- }
275
- },
276
- "28c8f58a19d14f0fb544a6b30adc4481": {
277
- "model_module": "@jupyter-widgets/controls",
278
- "model_name": "ProgressStyleModel",
279
- "model_module_version": "1.5.0",
280
- "state": {
281
- "_model_module": "@jupyter-widgets/controls",
282
- "_model_module_version": "1.5.0",
283
- "_model_name": "ProgressStyleModel",
284
- "_view_count": null,
285
- "_view_module": "@jupyter-widgets/base",
286
- "_view_module_version": "1.2.0",
287
- "_view_name": "StyleView",
288
- "bar_color": null,
289
- "description_width": ""
290
- }
291
- },
292
- "253c42626cc04cd19300ff9cc6372c94": {
293
- "model_module": "@jupyter-widgets/base",
294
- "model_name": "LayoutModel",
295
- "model_module_version": "1.2.0",
296
- "state": {
297
- "_model_module": "@jupyter-widgets/base",
298
- "_model_module_version": "1.2.0",
299
- "_model_name": "LayoutModel",
300
- "_view_count": null,
301
- "_view_module": "@jupyter-widgets/base",
302
- "_view_module_version": "1.2.0",
303
- "_view_name": "LayoutView",
304
- "align_content": null,
305
- "align_items": null,
306
- "align_self": null,
307
- "border": null,
308
- "bottom": null,
309
- "display": null,
310
- "flex": null,
311
- "flex_flow": null,
312
- "grid_area": null,
313
- "grid_auto_columns": null,
314
- "grid_auto_flow": null,
315
- "grid_auto_rows": null,
316
- "grid_column": null,
317
- "grid_gap": null,
318
- "grid_row": null,
319
- "grid_template_areas": null,
320
- "grid_template_columns": null,
321
- "grid_template_rows": null,
322
- "height": null,
323
- "justify_content": null,
324
- "justify_items": null,
325
- "left": null,
326
- "margin": null,
327
- "max_height": null,
328
- "max_width": null,
329
- "min_height": null,
330
- "min_width": null,
331
- "object_fit": null,
332
- "object_position": null,
333
- "order": null,
334
- "overflow": null,
335
- "overflow_x": null,
336
- "overflow_y": null,
337
- "padding": null,
338
- "right": null,
339
- "top": null,
340
- "visibility": null,
341
- "width": null
342
- }
343
- },
344
- "7470f834fbe141b188ed48c47faca306": {
345
- "model_module": "@jupyter-widgets/controls",
346
- "model_name": "DescriptionStyleModel",
347
- "model_module_version": "1.5.0",
348
- "state": {
349
- "_model_module": "@jupyter-widgets/controls",
350
- "_model_module_version": "1.5.0",
351
- "_model_name": "DescriptionStyleModel",
352
- "_view_count": null,
353
- "_view_module": "@jupyter-widgets/base",
354
- "_view_module_version": "1.2.0",
355
- "_view_name": "StyleView",
356
- "description_width": ""
357
- }
358
- },
359
- "1b27c3504c1b42a4b34df15050933bdb": {
360
- "model_module": "@jupyter-widgets/controls",
361
- "model_name": "HBoxModel",
362
- "model_module_version": "1.5.0",
363
- "state": {
364
- "_dom_classes": [],
365
- "_model_module": "@jupyter-widgets/controls",
366
- "_model_module_version": "1.5.0",
367
- "_model_name": "HBoxModel",
368
- "_view_count": null,
369
- "_view_module": "@jupyter-widgets/controls",
370
- "_view_module_version": "1.5.0",
371
- "_view_name": "HBoxView",
372
- "box_style": "",
373
- "children": [
374
- "IPY_MODEL_98f521a1cd184d09b57a76dfc072b5c1",
375
- "IPY_MODEL_f16f9a28caaa4b41863fe21b1f0c8895",
376
- "IPY_MODEL_43a39db0c9f8472b8c8663bebd351c9d"
377
- ],
378
- "layout": "IPY_MODEL_f9c3a5788ce14e9186db9f64105fdf78"
379
- }
380
- },
381
- "98f521a1cd184d09b57a76dfc072b5c1": {
382
- "model_module": "@jupyter-widgets/controls",
383
- "model_name": "HTMLModel",
384
- "model_module_version": "1.5.0",
385
- "state": {
386
- "_dom_classes": [],
387
- "_model_module": "@jupyter-widgets/controls",
388
- "_model_module_version": "1.5.0",
389
- "_model_name": "HTMLModel",
390
- "_view_count": null,
391
- "_view_module": "@jupyter-widgets/controls",
392
- "_view_module_version": "1.5.0",
393
- "_view_name": "HTMLView",
394
- "description": "",
395
- "description_tooltip": null,
396
- "layout": "IPY_MODEL_d9d32f14c9704eb78127d7866c1d44ba",
397
- "placeholder": "​",
398
- "style": "IPY_MODEL_d1fd6f666e0f41fdb5f2c50ddabc6140",
399
- "value": "vocab.txt: 100%"
400
- }
401
- },
402
- "f16f9a28caaa4b41863fe21b1f0c8895": {
403
- "model_module": "@jupyter-widgets/controls",
404
- "model_name": "FloatProgressModel",
405
- "model_module_version": "1.5.0",
406
- "state": {
407
- "_dom_classes": [],
408
- "_model_module": "@jupyter-widgets/controls",
409
- "_model_module_version": "1.5.0",
410
- "_model_name": "FloatProgressModel",
411
- "_view_count": null,
412
- "_view_module": "@jupyter-widgets/controls",
413
- "_view_module_version": "1.5.0",
414
- "_view_name": "ProgressView",
415
- "bar_style": "success",
416
- "description": "",
417
- "description_tooltip": null,
418
- "layout": "IPY_MODEL_c57215fcc21a4fbe99f8e1425d0e7729",
419
- "max": 231508,
420
- "min": 0,
421
- "orientation": "horizontal",
422
- "style": "IPY_MODEL_7ab8f5df787946b186bc31b9bf5aaacb",
423
- "value": 231508
424
- }
425
- },
426
- "43a39db0c9f8472b8c8663bebd351c9d": {
427
- "model_module": "@jupyter-widgets/controls",
428
- "model_name": "HTMLModel",
429
- "model_module_version": "1.5.0",
430
- "state": {
431
- "_dom_classes": [],
432
- "_model_module": "@jupyter-widgets/controls",
433
- "_model_module_version": "1.5.0",
434
- "_model_name": "HTMLModel",
435
- "_view_count": null,
436
- "_view_module": "@jupyter-widgets/controls",
437
- "_view_module_version": "1.5.0",
438
- "_view_name": "HTMLView",
439
- "description": "",
440
- "description_tooltip": null,
441
- "layout": "IPY_MODEL_b8c47fd876ce41d094ea4344cc1269e5",
442
- "placeholder": "​",
443
- "style": "IPY_MODEL_c9db91385b5c437bb6ba3481d6946a6e",
444
- "value": " 232k/232k [00:00<00:00, 3.32MB/s]"
445
- }
446
- },
447
- "f9c3a5788ce14e9186db9f64105fdf78": {
448
- "model_module": "@jupyter-widgets/base",
449
- "model_name": "LayoutModel",
450
- "model_module_version": "1.2.0",
451
- "state": {
452
- "_model_module": "@jupyter-widgets/base",
453
- "_model_module_version": "1.2.0",
454
- "_model_name": "LayoutModel",
455
- "_view_count": null,
456
- "_view_module": "@jupyter-widgets/base",
457
- "_view_module_version": "1.2.0",
458
- "_view_name": "LayoutView",
459
- "align_content": null,
460
- "align_items": null,
461
- "align_self": null,
462
- "border": null,
463
- "bottom": null,
464
- "display": null,
465
- "flex": null,
466
- "flex_flow": null,
467
- "grid_area": null,
468
- "grid_auto_columns": null,
469
- "grid_auto_flow": null,
470
- "grid_auto_rows": null,
471
- "grid_column": null,
472
- "grid_gap": null,
473
- "grid_row": null,
474
- "grid_template_areas": null,
475
- "grid_template_columns": null,
476
- "grid_template_rows": null,
477
- "height": null,
478
- "justify_content": null,
479
- "justify_items": null,
480
- "left": null,
481
- "margin": null,
482
- "max_height": null,
483
- "max_width": null,
484
- "min_height": null,
485
- "min_width": null,
486
- "object_fit": null,
487
- "object_position": null,
488
- "order": null,
489
- "overflow": null,
490
- "overflow_x": null,
491
- "overflow_y": null,
492
- "padding": null,
493
- "right": null,
494
- "top": null,
495
- "visibility": null,
496
- "width": null
497
- }
498
- },
499
- "d9d32f14c9704eb78127d7866c1d44ba": {
500
- "model_module": "@jupyter-widgets/base",
501
- "model_name": "LayoutModel",
502
- "model_module_version": "1.2.0",
503
- "state": {
504
- "_model_module": "@jupyter-widgets/base",
505
- "_model_module_version": "1.2.0",
506
- "_model_name": "LayoutModel",
507
- "_view_count": null,
508
- "_view_module": "@jupyter-widgets/base",
509
- "_view_module_version": "1.2.0",
510
- "_view_name": "LayoutView",
511
- "align_content": null,
512
- "align_items": null,
513
- "align_self": null,
514
- "border": null,
515
- "bottom": null,
516
- "display": null,
517
- "flex": null,
518
- "flex_flow": null,
519
- "grid_area": null,
520
- "grid_auto_columns": null,
521
- "grid_auto_flow": null,
522
- "grid_auto_rows": null,
523
- "grid_column": null,
524
- "grid_gap": null,
525
- "grid_row": null,
526
- "grid_template_areas": null,
527
- "grid_template_columns": null,
528
- "grid_template_rows": null,
529
- "height": null,
530
- "justify_content": null,
531
- "justify_items": null,
532
- "left": null,
533
- "margin": null,
534
- "max_height": null,
535
- "max_width": null,
536
- "min_height": null,
537
- "min_width": null,
538
- "object_fit": null,
539
- "object_position": null,
540
- "order": null,
541
- "overflow": null,
542
- "overflow_x": null,
543
- "overflow_y": null,
544
- "padding": null,
545
- "right": null,
546
- "top": null,
547
- "visibility": null,
548
- "width": null
549
- }
550
- },
551
- "d1fd6f666e0f41fdb5f2c50ddabc6140": {
552
- "model_module": "@jupyter-widgets/controls",
553
- "model_name": "DescriptionStyleModel",
554
- "model_module_version": "1.5.0",
555
- "state": {
556
- "_model_module": "@jupyter-widgets/controls",
557
- "_model_module_version": "1.5.0",
558
- "_model_name": "DescriptionStyleModel",
559
- "_view_count": null,
560
- "_view_module": "@jupyter-widgets/base",
561
- "_view_module_version": "1.2.0",
562
- "_view_name": "StyleView",
563
- "description_width": ""
564
- }
565
- },
566
- "c57215fcc21a4fbe99f8e1425d0e7729": {
567
- "model_module": "@jupyter-widgets/base",
568
- "model_name": "LayoutModel",
569
- "model_module_version": "1.2.0",
570
- "state": {
571
- "_model_module": "@jupyter-widgets/base",
572
- "_model_module_version": "1.2.0",
573
- "_model_name": "LayoutModel",
574
- "_view_count": null,
575
- "_view_module": "@jupyter-widgets/base",
576
- "_view_module_version": "1.2.0",
577
- "_view_name": "LayoutView",
578
- "align_content": null,
579
- "align_items": null,
580
- "align_self": null,
581
- "border": null,
582
- "bottom": null,
583
- "display": null,
584
- "flex": null,
585
- "flex_flow": null,
586
- "grid_area": null,
587
- "grid_auto_columns": null,
588
- "grid_auto_flow": null,
589
- "grid_auto_rows": null,
590
- "grid_column": null,
591
- "grid_gap": null,
592
- "grid_row": null,
593
- "grid_template_areas": null,
594
- "grid_template_columns": null,
595
- "grid_template_rows": null,
596
- "height": null,
597
- "justify_content": null,
598
- "justify_items": null,
599
- "left": null,
600
- "margin": null,
601
- "max_height": null,
602
- "max_width": null,
603
- "min_height": null,
604
- "min_width": null,
605
- "object_fit": null,
606
- "object_position": null,
607
- "order": null,
608
- "overflow": null,
609
- "overflow_x": null,
610
- "overflow_y": null,
611
- "padding": null,
612
- "right": null,
613
- "top": null,
614
- "visibility": null,
615
- "width": null
616
- }
617
- },
618
- "7ab8f5df787946b186bc31b9bf5aaacb": {
619
- "model_module": "@jupyter-widgets/controls",
620
- "model_name": "ProgressStyleModel",
621
- "model_module_version": "1.5.0",
622
- "state": {
623
- "_model_module": "@jupyter-widgets/controls",
624
- "_model_module_version": "1.5.0",
625
- "_model_name": "ProgressStyleModel",
626
- "_view_count": null,
627
- "_view_module": "@jupyter-widgets/base",
628
- "_view_module_version": "1.2.0",
629
- "_view_name": "StyleView",
630
- "bar_color": null,
631
- "description_width": ""
632
- }
633
- },
634
- "b8c47fd876ce41d094ea4344cc1269e5": {
635
- "model_module": "@jupyter-widgets/base",
636
- "model_name": "LayoutModel",
637
- "model_module_version": "1.2.0",
638
- "state": {
639
- "_model_module": "@jupyter-widgets/base",
640
- "_model_module_version": "1.2.0",
641
- "_model_name": "LayoutModel",
642
- "_view_count": null,
643
- "_view_module": "@jupyter-widgets/base",
644
- "_view_module_version": "1.2.0",
645
- "_view_name": "LayoutView",
646
- "align_content": null,
647
- "align_items": null,
648
- "align_self": null,
649
- "border": null,
650
- "bottom": null,
651
- "display": null,
652
- "flex": null,
653
- "flex_flow": null,
654
- "grid_area": null,
655
- "grid_auto_columns": null,
656
- "grid_auto_flow": null,
657
- "grid_auto_rows": null,
658
- "grid_column": null,
659
- "grid_gap": null,
660
- "grid_row": null,
661
- "grid_template_areas": null,
662
- "grid_template_columns": null,
663
- "grid_template_rows": null,
664
- "height": null,
665
- "justify_content": null,
666
- "justify_items": null,
667
- "left": null,
668
- "margin": null,
669
- "max_height": null,
670
- "max_width": null,
671
- "min_height": null,
672
- "min_width": null,
673
- "object_fit": null,
674
- "object_position": null,
675
- "order": null,
676
- "overflow": null,
677
- "overflow_x": null,
678
- "overflow_y": null,
679
- "padding": null,
680
- "right": null,
681
- "top": null,
682
- "visibility": null,
683
- "width": null
684
- }
685
- },
686
- "c9db91385b5c437bb6ba3481d6946a6e": {
687
- "model_module": "@jupyter-widgets/controls",
688
- "model_name": "DescriptionStyleModel",
689
- "model_module_version": "1.5.0",
690
- "state": {
691
- "_model_module": "@jupyter-widgets/controls",
692
- "_model_module_version": "1.5.0",
693
- "_model_name": "DescriptionStyleModel",
694
- "_view_count": null,
695
- "_view_module": "@jupyter-widgets/base",
696
- "_view_module_version": "1.2.0",
697
- "_view_name": "StyleView",
698
- "description_width": ""
699
- }
700
- },
701
- "e1c8ad6de85543d19c2fa3216ab21fd5": {
702
- "model_module": "@jupyter-widgets/controls",
703
- "model_name": "HBoxModel",
704
- "model_module_version": "1.5.0",
705
- "state": {
706
- "_dom_classes": [],
707
- "_model_module": "@jupyter-widgets/controls",
708
- "_model_module_version": "1.5.0",
709
- "_model_name": "HBoxModel",
710
- "_view_count": null,
711
- "_view_module": "@jupyter-widgets/controls",
712
- "_view_module_version": "1.5.0",
713
- "_view_name": "HBoxView",
714
- "box_style": "",
715
- "children": [
716
- "IPY_MODEL_a49b101ac9ee47319a4173c1e2772161",
717
- "IPY_MODEL_038e646a3d174e0480db73c6e650b2ed",
718
- "IPY_MODEL_aee756c78f754ffdad00bae8c95ffab9"
719
- ],
720
- "layout": "IPY_MODEL_d18b422acffb480889a5fcaf84ad02f0"
721
- }
722
- },
723
- "a49b101ac9ee47319a4173c1e2772161": {
724
- "model_module": "@jupyter-widgets/controls",
725
- "model_name": "HTMLModel",
726
- "model_module_version": "1.5.0",
727
- "state": {
728
- "_dom_classes": [],
729
- "_model_module": "@jupyter-widgets/controls",
730
- "_model_module_version": "1.5.0",
731
- "_model_name": "HTMLModel",
732
- "_view_count": null,
733
- "_view_module": "@jupyter-widgets/controls",
734
- "_view_module_version": "1.5.0",
735
- "_view_name": "HTMLView",
736
- "description": "",
737
- "description_tooltip": null,
738
- "layout": "IPY_MODEL_6bd53a4ecb0d4720b0e0a288b3b22dc5",
739
- "placeholder": "​",
740
- "style": "IPY_MODEL_f69fa4164bb047b9a81dae4daf08632a",
741
- "value": "tokenizer.json: 100%"
742
- }
743
- },
744
- "038e646a3d174e0480db73c6e650b2ed": {
745
- "model_module": "@jupyter-widgets/controls",
746
- "model_name": "FloatProgressModel",
747
- "model_module_version": "1.5.0",
748
- "state": {
749
- "_dom_classes": [],
750
- "_model_module": "@jupyter-widgets/controls",
751
- "_model_module_version": "1.5.0",
752
- "_model_name": "FloatProgressModel",
753
- "_view_count": null,
754
- "_view_module": "@jupyter-widgets/controls",
755
- "_view_module_version": "1.5.0",
756
- "_view_name": "ProgressView",
757
- "bar_style": "success",
758
- "description": "",
759
- "description_tooltip": null,
760
- "layout": "IPY_MODEL_ee1453c0e355449db2ea166417a71d86",
761
- "max": 466062,
762
- "min": 0,
763
- "orientation": "horizontal",
764
- "style": "IPY_MODEL_f13321d1862044038bcdcdeeebcc5f0c",
765
- "value": 466062
766
- }
767
- },
768
- "aee756c78f754ffdad00bae8c95ffab9": {
769
- "model_module": "@jupyter-widgets/controls",
770
- "model_name": "HTMLModel",
771
- "model_module_version": "1.5.0",
772
- "state": {
773
- "_dom_classes": [],
774
- "_model_module": "@jupyter-widgets/controls",
775
- "_model_module_version": "1.5.0",
776
- "_model_name": "HTMLModel",
777
- "_view_count": null,
778
- "_view_module": "@jupyter-widgets/controls",
779
- "_view_module_version": "1.5.0",
780
- "_view_name": "HTMLView",
781
- "description": "",
782
- "description_tooltip": null,
783
- "layout": "IPY_MODEL_afbe0d54241248f28036903aaade805d",
784
- "placeholder": "​",
785
- "style": "IPY_MODEL_13f6bfc3b9474faa8d268b712e3fa56a",
786
- "value": " 466k/466k [00:00<00:00, 12.1MB/s]"
787
- }
788
- },
789
- "d18b422acffb480889a5fcaf84ad02f0": {
790
- "model_module": "@jupyter-widgets/base",
791
- "model_name": "LayoutModel",
792
- "model_module_version": "1.2.0",
793
- "state": {
794
- "_model_module": "@jupyter-widgets/base",
795
- "_model_module_version": "1.2.0",
796
- "_model_name": "LayoutModel",
797
- "_view_count": null,
798
- "_view_module": "@jupyter-widgets/base",
799
- "_view_module_version": "1.2.0",
800
- "_view_name": "LayoutView",
801
- "align_content": null,
802
- "align_items": null,
803
- "align_self": null,
804
- "border": null,
805
- "bottom": null,
806
- "display": null,
807
- "flex": null,
808
- "flex_flow": null,
809
- "grid_area": null,
810
- "grid_auto_columns": null,
811
- "grid_auto_flow": null,
812
- "grid_auto_rows": null,
813
- "grid_column": null,
814
- "grid_gap": null,
815
- "grid_row": null,
816
- "grid_template_areas": null,
817
- "grid_template_columns": null,
818
- "grid_template_rows": null,
819
- "height": null,
820
- "justify_content": null,
821
- "justify_items": null,
822
- "left": null,
823
- "margin": null,
824
- "max_height": null,
825
- "max_width": null,
826
- "min_height": null,
827
- "min_width": null,
828
- "object_fit": null,
829
- "object_position": null,
830
- "order": null,
831
- "overflow": null,
832
- "overflow_x": null,
833
- "overflow_y": null,
834
- "padding": null,
835
- "right": null,
836
- "top": null,
837
- "visibility": null,
838
- "width": null
839
- }
840
- },
841
- "6bd53a4ecb0d4720b0e0a288b3b22dc5": {
842
- "model_module": "@jupyter-widgets/base",
843
- "model_name": "LayoutModel",
844
- "model_module_version": "1.2.0",
845
- "state": {
846
- "_model_module": "@jupyter-widgets/base",
847
- "_model_module_version": "1.2.0",
848
- "_model_name": "LayoutModel",
849
- "_view_count": null,
850
- "_view_module": "@jupyter-widgets/base",
851
- "_view_module_version": "1.2.0",
852
- "_view_name": "LayoutView",
853
- "align_content": null,
854
- "align_items": null,
855
- "align_self": null,
856
- "border": null,
857
- "bottom": null,
858
- "display": null,
859
- "flex": null,
860
- "flex_flow": null,
861
- "grid_area": null,
862
- "grid_auto_columns": null,
863
- "grid_auto_flow": null,
864
- "grid_auto_rows": null,
865
- "grid_column": null,
866
- "grid_gap": null,
867
- "grid_row": null,
868
- "grid_template_areas": null,
869
- "grid_template_columns": null,
870
- "grid_template_rows": null,
871
- "height": null,
872
- "justify_content": null,
873
- "justify_items": null,
874
- "left": null,
875
- "margin": null,
876
- "max_height": null,
877
- "max_width": null,
878
- "min_height": null,
879
- "min_width": null,
880
- "object_fit": null,
881
- "object_position": null,
882
- "order": null,
883
- "overflow": null,
884
- "overflow_x": null,
885
- "overflow_y": null,
886
- "padding": null,
887
- "right": null,
888
- "top": null,
889
- "visibility": null,
890
- "width": null
891
- }
892
- },
893
- "f69fa4164bb047b9a81dae4daf08632a": {
894
- "model_module": "@jupyter-widgets/controls",
895
- "model_name": "DescriptionStyleModel",
896
- "model_module_version": "1.5.0",
897
- "state": {
898
- "_model_module": "@jupyter-widgets/controls",
899
- "_model_module_version": "1.5.0",
900
- "_model_name": "DescriptionStyleModel",
901
- "_view_count": null,
902
- "_view_module": "@jupyter-widgets/base",
903
- "_view_module_version": "1.2.0",
904
- "_view_name": "StyleView",
905
- "description_width": ""
906
- }
907
- },
908
- "ee1453c0e355449db2ea166417a71d86": {
909
- "model_module": "@jupyter-widgets/base",
910
- "model_name": "LayoutModel",
911
- "model_module_version": "1.2.0",
912
- "state": {
913
- "_model_module": "@jupyter-widgets/base",
914
- "_model_module_version": "1.2.0",
915
- "_model_name": "LayoutModel",
916
- "_view_count": null,
917
- "_view_module": "@jupyter-widgets/base",
918
- "_view_module_version": "1.2.0",
919
- "_view_name": "LayoutView",
920
- "align_content": null,
921
- "align_items": null,
922
- "align_self": null,
923
- "border": null,
924
- "bottom": null,
925
- "display": null,
926
- "flex": null,
927
- "flex_flow": null,
928
- "grid_area": null,
929
- "grid_auto_columns": null,
930
- "grid_auto_flow": null,
931
- "grid_auto_rows": null,
932
- "grid_column": null,
933
- "grid_gap": null,
934
- "grid_row": null,
935
- "grid_template_areas": null,
936
- "grid_template_columns": null,
937
- "grid_template_rows": null,
938
- "height": null,
939
- "justify_content": null,
940
- "justify_items": null,
941
- "left": null,
942
- "margin": null,
943
- "max_height": null,
944
- "max_width": null,
945
- "min_height": null,
946
- "min_width": null,
947
- "object_fit": null,
948
- "object_position": null,
949
- "order": null,
950
- "overflow": null,
951
- "overflow_x": null,
952
- "overflow_y": null,
953
- "padding": null,
954
- "right": null,
955
- "top": null,
956
- "visibility": null,
957
- "width": null
958
- }
959
- },
960
- "f13321d1862044038bcdcdeeebcc5f0c": {
961
- "model_module": "@jupyter-widgets/controls",
962
- "model_name": "ProgressStyleModel",
963
- "model_module_version": "1.5.0",
964
- "state": {
965
- "_model_module": "@jupyter-widgets/controls",
966
- "_model_module_version": "1.5.0",
967
- "_model_name": "ProgressStyleModel",
968
- "_view_count": null,
969
- "_view_module": "@jupyter-widgets/base",
970
- "_view_module_version": "1.2.0",
971
- "_view_name": "StyleView",
972
- "bar_color": null,
973
- "description_width": ""
974
- }
975
- },
976
- "afbe0d54241248f28036903aaade805d": {
977
- "model_module": "@jupyter-widgets/base",
978
- "model_name": "LayoutModel",
979
- "model_module_version": "1.2.0",
980
- "state": {
981
- "_model_module": "@jupyter-widgets/base",
982
- "_model_module_version": "1.2.0",
983
- "_model_name": "LayoutModel",
984
- "_view_count": null,
985
- "_view_module": "@jupyter-widgets/base",
986
- "_view_module_version": "1.2.0",
987
- "_view_name": "LayoutView",
988
- "align_content": null,
989
- "align_items": null,
990
- "align_self": null,
991
- "border": null,
992
- "bottom": null,
993
- "display": null,
994
- "flex": null,
995
- "flex_flow": null,
996
- "grid_area": null,
997
- "grid_auto_columns": null,
998
- "grid_auto_flow": null,
999
- "grid_auto_rows": null,
1000
- "grid_column": null,
1001
- "grid_gap": null,
1002
- "grid_row": null,
1003
- "grid_template_areas": null,
1004
- "grid_template_columns": null,
1005
- "grid_template_rows": null,
1006
- "height": null,
1007
- "justify_content": null,
1008
- "justify_items": null,
1009
- "left": null,
1010
- "margin": null,
1011
- "max_height": null,
1012
- "max_width": null,
1013
- "min_height": null,
1014
- "min_width": null,
1015
- "object_fit": null,
1016
- "object_position": null,
1017
- "order": null,
1018
- "overflow": null,
1019
- "overflow_x": null,
1020
- "overflow_y": null,
1021
- "padding": null,
1022
- "right": null,
1023
- "top": null,
1024
- "visibility": null,
1025
- "width": null
1026
- }
1027
- },
1028
- "13f6bfc3b9474faa8d268b712e3fa56a": {
1029
- "model_module": "@jupyter-widgets/controls",
1030
- "model_name": "DescriptionStyleModel",
1031
- "model_module_version": "1.5.0",
1032
- "state": {
1033
- "_model_module": "@jupyter-widgets/controls",
1034
- "_model_module_version": "1.5.0",
1035
- "_model_name": "DescriptionStyleModel",
1036
- "_view_count": null,
1037
- "_view_module": "@jupyter-widgets/base",
1038
- "_view_module_version": "1.2.0",
1039
- "_view_name": "StyleView",
1040
- "description_width": ""
1041
- }
1042
- },
1043
- "e7ee9c4a9c0f4390b1df48af9db8e5db": {
1044
- "model_module": "@jupyter-widgets/controls",
1045
- "model_name": "HBoxModel",
1046
- "model_module_version": "1.5.0",
1047
- "state": {
1048
- "_dom_classes": [],
1049
- "_model_module": "@jupyter-widgets/controls",
1050
- "_model_module_version": "1.5.0",
1051
- "_model_name": "HBoxModel",
1052
- "_view_count": null,
1053
- "_view_module": "@jupyter-widgets/controls",
1054
- "_view_module_version": "1.5.0",
1055
- "_view_name": "HBoxView",
1056
- "box_style": "",
1057
- "children": [
1058
- "IPY_MODEL_c7bf8bc2c0e7414a9128cde3f2d18ae5",
1059
- "IPY_MODEL_34f657dc269444ba804c1063afa7e895",
1060
- "IPY_MODEL_a5fea64e5c8541f4bbb91b9940cdf3b4"
1061
- ],
1062
- "layout": "IPY_MODEL_096eb5eab7504998b5f13b1a075fa227"
1063
- }
1064
- },
1065
- "c7bf8bc2c0e7414a9128cde3f2d18ae5": {
1066
- "model_module": "@jupyter-widgets/controls",
1067
- "model_name": "HTMLModel",
1068
- "model_module_version": "1.5.0",
1069
- "state": {
1070
- "_dom_classes": [],
1071
- "_model_module": "@jupyter-widgets/controls",
1072
- "_model_module_version": "1.5.0",
1073
- "_model_name": "HTMLModel",
1074
- "_view_count": null,
1075
- "_view_module": "@jupyter-widgets/controls",
1076
- "_view_module_version": "1.5.0",
1077
- "_view_name": "HTMLView",
1078
- "description": "",
1079
- "description_tooltip": null,
1080
- "layout": "IPY_MODEL_6c9b0776a7e84861974b5292623e1937",
1081
- "placeholder": "​",
1082
- "style": "IPY_MODEL_c52e05ec6c0f4b03ad572b5baf53e88c",
1083
- "value": "config.json: 100%"
1084
- }
1085
- },
1086
- "34f657dc269444ba804c1063afa7e895": {
1087
- "model_module": "@jupyter-widgets/controls",
1088
- "model_name": "FloatProgressModel",
1089
- "model_module_version": "1.5.0",
1090
- "state": {
1091
- "_dom_classes": [],
1092
- "_model_module": "@jupyter-widgets/controls",
1093
- "_model_module_version": "1.5.0",
1094
- "_model_name": "FloatProgressModel",
1095
- "_view_count": null,
1096
- "_view_module": "@jupyter-widgets/controls",
1097
- "_view_module_version": "1.5.0",
1098
- "_view_name": "ProgressView",
1099
- "bar_style": "success",
1100
- "description": "",
1101
- "description_tooltip": null,
1102
- "layout": "IPY_MODEL_b2fafe71bf074ea6a4e24ecf6427e1f3",
1103
- "max": 570,
1104
- "min": 0,
1105
- "orientation": "horizontal",
1106
- "style": "IPY_MODEL_9d40d652d2194a788c1d92594584b50f",
1107
- "value": 570
1108
- }
1109
- },
1110
- "a5fea64e5c8541f4bbb91b9940cdf3b4": {
1111
- "model_module": "@jupyter-widgets/controls",
1112
- "model_name": "HTMLModel",
1113
- "model_module_version": "1.5.0",
1114
- "state": {
1115
- "_dom_classes": [],
1116
- "_model_module": "@jupyter-widgets/controls",
1117
- "_model_module_version": "1.5.0",
1118
- "_model_name": "HTMLModel",
1119
- "_view_count": null,
1120
- "_view_module": "@jupyter-widgets/controls",
1121
- "_view_module_version": "1.5.0",
1122
- "_view_name": "HTMLView",
1123
- "description": "",
1124
- "description_tooltip": null,
1125
- "layout": "IPY_MODEL_659ca32ffdbd4b70813af7abc9bdede5",
1126
- "placeholder": "​",
1127
- "style": "IPY_MODEL_0925f2144aef4500ab7eb78672c046d4",
1128
- "value": " 570/570 [00:00<00:00, 32.0kB/s]"
1129
- }
1130
- },
1131
- "096eb5eab7504998b5f13b1a075fa227": {
1132
- "model_module": "@jupyter-widgets/base",
1133
- "model_name": "LayoutModel",
1134
- "model_module_version": "1.2.0",
1135
- "state": {
1136
- "_model_module": "@jupyter-widgets/base",
1137
- "_model_module_version": "1.2.0",
1138
- "_model_name": "LayoutModel",
1139
- "_view_count": null,
1140
- "_view_module": "@jupyter-widgets/base",
1141
- "_view_module_version": "1.2.0",
1142
- "_view_name": "LayoutView",
1143
- "align_content": null,
1144
- "align_items": null,
1145
- "align_self": null,
1146
- "border": null,
1147
- "bottom": null,
1148
- "display": null,
1149
- "flex": null,
1150
- "flex_flow": null,
1151
- "grid_area": null,
1152
- "grid_auto_columns": null,
1153
- "grid_auto_flow": null,
1154
- "grid_auto_rows": null,
1155
- "grid_column": null,
1156
- "grid_gap": null,
1157
- "grid_row": null,
1158
- "grid_template_areas": null,
1159
- "grid_template_columns": null,
1160
- "grid_template_rows": null,
1161
- "height": null,
1162
- "justify_content": null,
1163
- "justify_items": null,
1164
- "left": null,
1165
- "margin": null,
1166
- "max_height": null,
1167
- "max_width": null,
1168
- "min_height": null,
1169
- "min_width": null,
1170
- "object_fit": null,
1171
- "object_position": null,
1172
- "order": null,
1173
- "overflow": null,
1174
- "overflow_x": null,
1175
- "overflow_y": null,
1176
- "padding": null,
1177
- "right": null,
1178
- "top": null,
1179
- "visibility": null,
1180
- "width": null
1181
- }
1182
- },
1183
- "6c9b0776a7e84861974b5292623e1937": {
1184
- "model_module": "@jupyter-widgets/base",
1185
- "model_name": "LayoutModel",
1186
- "model_module_version": "1.2.0",
1187
- "state": {
1188
- "_model_module": "@jupyter-widgets/base",
1189
- "_model_module_version": "1.2.0",
1190
- "_model_name": "LayoutModel",
1191
- "_view_count": null,
1192
- "_view_module": "@jupyter-widgets/base",
1193
- "_view_module_version": "1.2.0",
1194
- "_view_name": "LayoutView",
1195
- "align_content": null,
1196
- "align_items": null,
1197
- "align_self": null,
1198
- "border": null,
1199
- "bottom": null,
1200
- "display": null,
1201
- "flex": null,
1202
- "flex_flow": null,
1203
- "grid_area": null,
1204
- "grid_auto_columns": null,
1205
- "grid_auto_flow": null,
1206
- "grid_auto_rows": null,
1207
- "grid_column": null,
1208
- "grid_gap": null,
1209
- "grid_row": null,
1210
- "grid_template_areas": null,
1211
- "grid_template_columns": null,
1212
- "grid_template_rows": null,
1213
- "height": null,
1214
- "justify_content": null,
1215
- "justify_items": null,
1216
- "left": null,
1217
- "margin": null,
1218
- "max_height": null,
1219
- "max_width": null,
1220
- "min_height": null,
1221
- "min_width": null,
1222
- "object_fit": null,
1223
- "object_position": null,
1224
- "order": null,
1225
- "overflow": null,
1226
- "overflow_x": null,
1227
- "overflow_y": null,
1228
- "padding": null,
1229
- "right": null,
1230
- "top": null,
1231
- "visibility": null,
1232
- "width": null
1233
- }
1234
- },
1235
- "c52e05ec6c0f4b03ad572b5baf53e88c": {
1236
- "model_module": "@jupyter-widgets/controls",
1237
- "model_name": "DescriptionStyleModel",
1238
- "model_module_version": "1.5.0",
1239
- "state": {
1240
- "_model_module": "@jupyter-widgets/controls",
1241
- "_model_module_version": "1.5.0",
1242
- "_model_name": "DescriptionStyleModel",
1243
- "_view_count": null,
1244
- "_view_module": "@jupyter-widgets/base",
1245
- "_view_module_version": "1.2.0",
1246
- "_view_name": "StyleView",
1247
- "description_width": ""
1248
- }
1249
- },
1250
- "b2fafe71bf074ea6a4e24ecf6427e1f3": {
1251
- "model_module": "@jupyter-widgets/base",
1252
- "model_name": "LayoutModel",
1253
- "model_module_version": "1.2.0",
1254
- "state": {
1255
- "_model_module": "@jupyter-widgets/base",
1256
- "_model_module_version": "1.2.0",
1257
- "_model_name": "LayoutModel",
1258
- "_view_count": null,
1259
- "_view_module": "@jupyter-widgets/base",
1260
- "_view_module_version": "1.2.0",
1261
- "_view_name": "LayoutView",
1262
- "align_content": null,
1263
- "align_items": null,
1264
- "align_self": null,
1265
- "border": null,
1266
- "bottom": null,
1267
- "display": null,
1268
- "flex": null,
1269
- "flex_flow": null,
1270
- "grid_area": null,
1271
- "grid_auto_columns": null,
1272
- "grid_auto_flow": null,
1273
- "grid_auto_rows": null,
1274
- "grid_column": null,
1275
- "grid_gap": null,
1276
- "grid_row": null,
1277
- "grid_template_areas": null,
1278
- "grid_template_columns": null,
1279
- "grid_template_rows": null,
1280
- "height": null,
1281
- "justify_content": null,
1282
- "justify_items": null,
1283
- "left": null,
1284
- "margin": null,
1285
- "max_height": null,
1286
- "max_width": null,
1287
- "min_height": null,
1288
- "min_width": null,
1289
- "object_fit": null,
1290
- "object_position": null,
1291
- "order": null,
1292
- "overflow": null,
1293
- "overflow_x": null,
1294
- "overflow_y": null,
1295
- "padding": null,
1296
- "right": null,
1297
- "top": null,
1298
- "visibility": null,
1299
- "width": null
1300
- }
1301
- },
1302
- "9d40d652d2194a788c1d92594584b50f": {
1303
- "model_module": "@jupyter-widgets/controls",
1304
- "model_name": "ProgressStyleModel",
1305
- "model_module_version": "1.5.0",
1306
- "state": {
1307
- "_model_module": "@jupyter-widgets/controls",
1308
- "_model_module_version": "1.5.0",
1309
- "_model_name": "ProgressStyleModel",
1310
- "_view_count": null,
1311
- "_view_module": "@jupyter-widgets/base",
1312
- "_view_module_version": "1.2.0",
1313
- "_view_name": "StyleView",
1314
- "bar_color": null,
1315
- "description_width": ""
1316
- }
1317
- },
1318
- "659ca32ffdbd4b70813af7abc9bdede5": {
1319
- "model_module": "@jupyter-widgets/base",
1320
- "model_name": "LayoutModel",
1321
- "model_module_version": "1.2.0",
1322
- "state": {
1323
- "_model_module": "@jupyter-widgets/base",
1324
- "_model_module_version": "1.2.0",
1325
- "_model_name": "LayoutModel",
1326
- "_view_count": null,
1327
- "_view_module": "@jupyter-widgets/base",
1328
- "_view_module_version": "1.2.0",
1329
- "_view_name": "LayoutView",
1330
- "align_content": null,
1331
- "align_items": null,
1332
- "align_self": null,
1333
- "border": null,
1334
- "bottom": null,
1335
- "display": null,
1336
- "flex": null,
1337
- "flex_flow": null,
1338
- "grid_area": null,
1339
- "grid_auto_columns": null,
1340
- "grid_auto_flow": null,
1341
- "grid_auto_rows": null,
1342
- "grid_column": null,
1343
- "grid_gap": null,
1344
- "grid_row": null,
1345
- "grid_template_areas": null,
1346
- "grid_template_columns": null,
1347
- "grid_template_rows": null,
1348
- "height": null,
1349
- "justify_content": null,
1350
- "justify_items": null,
1351
- "left": null,
1352
- "margin": null,
1353
- "max_height": null,
1354
- "max_width": null,
1355
- "min_height": null,
1356
- "min_width": null,
1357
- "object_fit": null,
1358
- "object_position": null,
1359
- "order": null,
1360
- "overflow": null,
1361
- "overflow_x": null,
1362
- "overflow_y": null,
1363
- "padding": null,
1364
- "right": null,
1365
- "top": null,
1366
- "visibility": null,
1367
- "width": null
1368
- }
1369
- },
1370
- "0925f2144aef4500ab7eb78672c046d4": {
1371
- "model_module": "@jupyter-widgets/controls",
1372
- "model_name": "DescriptionStyleModel",
1373
- "model_module_version": "1.5.0",
1374
- "state": {
1375
- "_model_module": "@jupyter-widgets/controls",
1376
- "_model_module_version": "1.5.0",
1377
- "_model_name": "DescriptionStyleModel",
1378
- "_view_count": null,
1379
- "_view_module": "@jupyter-widgets/base",
1380
- "_view_module_version": "1.2.0",
1381
- "_view_name": "StyleView",
1382
- "description_width": ""
1383
- }
1384
- },
1385
- "67a0910b76d14a1b9cdbd8bd33d06136": {
1386
- "model_module": "@jupyter-widgets/controls",
1387
- "model_name": "HBoxModel",
1388
- "model_module_version": "1.5.0",
1389
- "state": {
1390
- "_dom_classes": [],
1391
- "_model_module": "@jupyter-widgets/controls",
1392
- "_model_module_version": "1.5.0",
1393
- "_model_name": "HBoxModel",
1394
- "_view_count": null,
1395
- "_view_module": "@jupyter-widgets/controls",
1396
- "_view_module_version": "1.5.0",
1397
- "_view_name": "HBoxView",
1398
- "box_style": "",
1399
- "children": [
1400
- "IPY_MODEL_30584b8ff3ad410d8300dcc93c25fa71",
1401
- "IPY_MODEL_011a27b6d2c34c7ab470856726e03aa8",
1402
- "IPY_MODEL_ebc0ea5a9a4347c6aa41985b8c97cb04"
1403
- ],
1404
- "layout": "IPY_MODEL_cfdb390139a1496e96d0b1fb2cfca9a6"
1405
- }
1406
- },
1407
- "30584b8ff3ad410d8300dcc93c25fa71": {
1408
- "model_module": "@jupyter-widgets/controls",
1409
- "model_name": "HTMLModel",
1410
- "model_module_version": "1.5.0",
1411
- "state": {
1412
- "_dom_classes": [],
1413
- "_model_module": "@jupyter-widgets/controls",
1414
- "_model_module_version": "1.5.0",
1415
- "_model_name": "HTMLModel",
1416
- "_view_count": null,
1417
- "_view_module": "@jupyter-widgets/controls",
1418
- "_view_module_version": "1.5.0",
1419
- "_view_name": "HTMLView",
1420
- "description": "",
1421
- "description_tooltip": null,
1422
- "layout": "IPY_MODEL_1f5e5b6785b6492c84cf433737341374",
1423
- "placeholder": "​",
1424
- "style": "IPY_MODEL_43394d4c01cf405c94aaa766366fa865",
1425
- "value": "model.safetensors: 100%"
1426
- }
1427
- },
1428
- "011a27b6d2c34c7ab470856726e03aa8": {
1429
- "model_module": "@jupyter-widgets/controls",
1430
- "model_name": "FloatProgressModel",
1431
- "model_module_version": "1.5.0",
1432
- "state": {
1433
- "_dom_classes": [],
1434
- "_model_module": "@jupyter-widgets/controls",
1435
- "_model_module_version": "1.5.0",
1436
- "_model_name": "FloatProgressModel",
1437
- "_view_count": null,
1438
- "_view_module": "@jupyter-widgets/controls",
1439
- "_view_module_version": "1.5.0",
1440
- "_view_name": "ProgressView",
1441
- "bar_style": "success",
1442
- "description": "",
1443
- "description_tooltip": null,
1444
- "layout": "IPY_MODEL_60215eb7d10b4bb686e28307423ee403",
1445
- "max": 440449768,
1446
- "min": 0,
1447
- "orientation": "horizontal",
1448
- "style": "IPY_MODEL_3d9467c350e943b196f15f9776c80e59",
1449
- "value": 440449768
1450
- }
1451
- },
1452
- "ebc0ea5a9a4347c6aa41985b8c97cb04": {
1453
- "model_module": "@jupyter-widgets/controls",
1454
- "model_name": "HTMLModel",
1455
- "model_module_version": "1.5.0",
1456
- "state": {
1457
- "_dom_classes": [],
1458
- "_model_module": "@jupyter-widgets/controls",
1459
- "_model_module_version": "1.5.0",
1460
- "_model_name": "HTMLModel",
1461
- "_view_count": null,
1462
- "_view_module": "@jupyter-widgets/controls",
1463
- "_view_module_version": "1.5.0",
1464
- "_view_name": "HTMLView",
1465
- "description": "",
1466
- "description_tooltip": null,
1467
- "layout": "IPY_MODEL_4c4c7ed5844e4c1cb030f93a041c2107",
1468
- "placeholder": "​",
1469
- "style": "IPY_MODEL_f2d1d42d7c4d427ebac2c98b55e470b8",
1470
- "value": " 440M/440M [00:02<00:00, 183MB/s]"
1471
- }
1472
- },
1473
- "cfdb390139a1496e96d0b1fb2cfca9a6": {
1474
- "model_module": "@jupyter-widgets/base",
1475
- "model_name": "LayoutModel",
1476
- "model_module_version": "1.2.0",
1477
- "state": {
1478
- "_model_module": "@jupyter-widgets/base",
1479
- "_model_module_version": "1.2.0",
1480
- "_model_name": "LayoutModel",
1481
- "_view_count": null,
1482
- "_view_module": "@jupyter-widgets/base",
1483
- "_view_module_version": "1.2.0",
1484
- "_view_name": "LayoutView",
1485
- "align_content": null,
1486
- "align_items": null,
1487
- "align_self": null,
1488
- "border": null,
1489
- "bottom": null,
1490
- "display": null,
1491
- "flex": null,
1492
- "flex_flow": null,
1493
- "grid_area": null,
1494
- "grid_auto_columns": null,
1495
- "grid_auto_flow": null,
1496
- "grid_auto_rows": null,
1497
- "grid_column": null,
1498
- "grid_gap": null,
1499
- "grid_row": null,
1500
- "grid_template_areas": null,
1501
- "grid_template_columns": null,
1502
- "grid_template_rows": null,
1503
- "height": null,
1504
- "justify_content": null,
1505
- "justify_items": null,
1506
- "left": null,
1507
- "margin": null,
1508
- "max_height": null,
1509
- "max_width": null,
1510
- "min_height": null,
1511
- "min_width": null,
1512
- "object_fit": null,
1513
- "object_position": null,
1514
- "order": null,
1515
- "overflow": null,
1516
- "overflow_x": null,
1517
- "overflow_y": null,
1518
- "padding": null,
1519
- "right": null,
1520
- "top": null,
1521
- "visibility": null,
1522
- "width": null
1523
- }
1524
- },
1525
- "1f5e5b6785b6492c84cf433737341374": {
1526
- "model_module": "@jupyter-widgets/base",
1527
- "model_name": "LayoutModel",
1528
- "model_module_version": "1.2.0",
1529
- "state": {
1530
- "_model_module": "@jupyter-widgets/base",
1531
- "_model_module_version": "1.2.0",
1532
- "_model_name": "LayoutModel",
1533
- "_view_count": null,
1534
- "_view_module": "@jupyter-widgets/base",
1535
- "_view_module_version": "1.2.0",
1536
- "_view_name": "LayoutView",
1537
- "align_content": null,
1538
- "align_items": null,
1539
- "align_self": null,
1540
- "border": null,
1541
- "bottom": null,
1542
- "display": null,
1543
- "flex": null,
1544
- "flex_flow": null,
1545
- "grid_area": null,
1546
- "grid_auto_columns": null,
1547
- "grid_auto_flow": null,
1548
- "grid_auto_rows": null,
1549
- "grid_column": null,
1550
- "grid_gap": null,
1551
- "grid_row": null,
1552
- "grid_template_areas": null,
1553
- "grid_template_columns": null,
1554
- "grid_template_rows": null,
1555
- "height": null,
1556
- "justify_content": null,
1557
- "justify_items": null,
1558
- "left": null,
1559
- "margin": null,
1560
- "max_height": null,
1561
- "max_width": null,
1562
- "min_height": null,
1563
- "min_width": null,
1564
- "object_fit": null,
1565
- "object_position": null,
1566
- "order": null,
1567
- "overflow": null,
1568
- "overflow_x": null,
1569
- "overflow_y": null,
1570
- "padding": null,
1571
- "right": null,
1572
- "top": null,
1573
- "visibility": null,
1574
- "width": null
1575
- }
1576
- },
1577
- "43394d4c01cf405c94aaa766366fa865": {
1578
- "model_module": "@jupyter-widgets/controls",
1579
- "model_name": "DescriptionStyleModel",
1580
- "model_module_version": "1.5.0",
1581
- "state": {
1582
- "_model_module": "@jupyter-widgets/controls",
1583
- "_model_module_version": "1.5.0",
1584
- "_model_name": "DescriptionStyleModel",
1585
- "_view_count": null,
1586
- "_view_module": "@jupyter-widgets/base",
1587
- "_view_module_version": "1.2.0",
1588
- "_view_name": "StyleView",
1589
- "description_width": ""
1590
- }
1591
- },
1592
- "60215eb7d10b4bb686e28307423ee403": {
1593
- "model_module": "@jupyter-widgets/base",
1594
- "model_name": "LayoutModel",
1595
- "model_module_version": "1.2.0",
1596
- "state": {
1597
- "_model_module": "@jupyter-widgets/base",
1598
- "_model_module_version": "1.2.0",
1599
- "_model_name": "LayoutModel",
1600
- "_view_count": null,
1601
- "_view_module": "@jupyter-widgets/base",
1602
- "_view_module_version": "1.2.0",
1603
- "_view_name": "LayoutView",
1604
- "align_content": null,
1605
- "align_items": null,
1606
- "align_self": null,
1607
- "border": null,
1608
- "bottom": null,
1609
- "display": null,
1610
- "flex": null,
1611
- "flex_flow": null,
1612
- "grid_area": null,
1613
- "grid_auto_columns": null,
1614
- "grid_auto_flow": null,
1615
- "grid_auto_rows": null,
1616
- "grid_column": null,
1617
- "grid_gap": null,
1618
- "grid_row": null,
1619
- "grid_template_areas": null,
1620
- "grid_template_columns": null,
1621
- "grid_template_rows": null,
1622
- "height": null,
1623
- "justify_content": null,
1624
- "justify_items": null,
1625
- "left": null,
1626
- "margin": null,
1627
- "max_height": null,
1628
- "max_width": null,
1629
- "min_height": null,
1630
- "min_width": null,
1631
- "object_fit": null,
1632
- "object_position": null,
1633
- "order": null,
1634
- "overflow": null,
1635
- "overflow_x": null,
1636
- "overflow_y": null,
1637
- "padding": null,
1638
- "right": null,
1639
- "top": null,
1640
- "visibility": null,
1641
- "width": null
1642
- }
1643
- },
1644
- "3d9467c350e943b196f15f9776c80e59": {
1645
- "model_module": "@jupyter-widgets/controls",
1646
- "model_name": "ProgressStyleModel",
1647
- "model_module_version": "1.5.0",
1648
- "state": {
1649
- "_model_module": "@jupyter-widgets/controls",
1650
- "_model_module_version": "1.5.0",
1651
- "_model_name": "ProgressStyleModel",
1652
- "_view_count": null,
1653
- "_view_module": "@jupyter-widgets/base",
1654
- "_view_module_version": "1.2.0",
1655
- "_view_name": "StyleView",
1656
- "bar_color": null,
1657
- "description_width": ""
1658
- }
1659
- },
1660
- "4c4c7ed5844e4c1cb030f93a041c2107": {
1661
- "model_module": "@jupyter-widgets/base",
1662
- "model_name": "LayoutModel",
1663
- "model_module_version": "1.2.0",
1664
- "state": {
1665
- "_model_module": "@jupyter-widgets/base",
1666
- "_model_module_version": "1.2.0",
1667
- "_model_name": "LayoutModel",
1668
- "_view_count": null,
1669
- "_view_module": "@jupyter-widgets/base",
1670
- "_view_module_version": "1.2.0",
1671
- "_view_name": "LayoutView",
1672
- "align_content": null,
1673
- "align_items": null,
1674
- "align_self": null,
1675
- "border": null,
1676
- "bottom": null,
1677
- "display": null,
1678
- "flex": null,
1679
- "flex_flow": null,
1680
- "grid_area": null,
1681
- "grid_auto_columns": null,
1682
- "grid_auto_flow": null,
1683
- "grid_auto_rows": null,
1684
- "grid_column": null,
1685
- "grid_gap": null,
1686
- "grid_row": null,
1687
- "grid_template_areas": null,
1688
- "grid_template_columns": null,
1689
- "grid_template_rows": null,
1690
- "height": null,
1691
- "justify_content": null,
1692
- "justify_items": null,
1693
- "left": null,
1694
- "margin": null,
1695
- "max_height": null,
1696
- "max_width": null,
1697
- "min_height": null,
1698
- "min_width": null,
1699
- "object_fit": null,
1700
- "object_position": null,
1701
- "order": null,
1702
- "overflow": null,
1703
- "overflow_x": null,
1704
- "overflow_y": null,
1705
- "padding": null,
1706
- "right": null,
1707
- "top": null,
1708
- "visibility": null,
1709
- "width": null
1710
- }
1711
- },
1712
- "f2d1d42d7c4d427ebac2c98b55e470b8": {
1713
- "model_module": "@jupyter-widgets/controls",
1714
- "model_name": "DescriptionStyleModel",
1715
- "model_module_version": "1.5.0",
1716
- "state": {
1717
- "_model_module": "@jupyter-widgets/controls",
1718
- "_model_module_version": "1.5.0",
1719
- "_model_name": "DescriptionStyleModel",
1720
- "_view_count": null,
1721
- "_view_module": "@jupyter-widgets/base",
1722
- "_view_module_version": "1.2.0",
1723
- "_view_name": "StyleView",
1724
- "description_width": ""
1725
- }
1726
- }
1727
- }
1728
- }
1729
- },
1730
- "cells": [
1731
- {
1732
- "cell_type": "code",
1733
- "execution_count": 2,
1734
- "metadata": {
1735
- "colab": {
1736
- "base_uri": "https://localhost:8080/",
1737
- "height": 533,
1738
- "referenced_widgets": [
1739
- "b4906906681c43ba89fb34066e11b4ac",
1740
- "fcdcced29331494e90e1d92488a02ac5",
1741
- "39ed42acf1eb483db84485fd11557ee7",
1742
- "db4bab5e72bd4973a14b17ce9af0514e",
1743
- "8d9c237dc1b844eaa53081c4a1e22d2d",
1744
- "98946dd1aa624168a9e50073c37996ad",
1745
- "adbbd9c4fd8f475ea522142e85aa4197",
1746
- "1e80c6fed76e4d56b1d0950b90040131",
1747
- "28c8f58a19d14f0fb544a6b30adc4481",
1748
- "253c42626cc04cd19300ff9cc6372c94",
1749
- "7470f834fbe141b188ed48c47faca306",
1750
- "1b27c3504c1b42a4b34df15050933bdb",
1751
- "98f521a1cd184d09b57a76dfc072b5c1",
1752
- "f16f9a28caaa4b41863fe21b1f0c8895",
1753
- "43a39db0c9f8472b8c8663bebd351c9d",
1754
- "f9c3a5788ce14e9186db9f64105fdf78",
1755
- "d9d32f14c9704eb78127d7866c1d44ba",
1756
- "d1fd6f666e0f41fdb5f2c50ddabc6140",
1757
- "c57215fcc21a4fbe99f8e1425d0e7729",
1758
- "7ab8f5df787946b186bc31b9bf5aaacb",
1759
- "b8c47fd876ce41d094ea4344cc1269e5",
1760
- "c9db91385b5c437bb6ba3481d6946a6e",
1761
- "e1c8ad6de85543d19c2fa3216ab21fd5",
1762
- "a49b101ac9ee47319a4173c1e2772161",
1763
- "038e646a3d174e0480db73c6e650b2ed",
1764
- "aee756c78f754ffdad00bae8c95ffab9",
1765
- "d18b422acffb480889a5fcaf84ad02f0",
1766
- "6bd53a4ecb0d4720b0e0a288b3b22dc5",
1767
- "f69fa4164bb047b9a81dae4daf08632a",
1768
- "ee1453c0e355449db2ea166417a71d86",
1769
- "f13321d1862044038bcdcdeeebcc5f0c",
1770
- "afbe0d54241248f28036903aaade805d",
1771
- "13f6bfc3b9474faa8d268b712e3fa56a",
1772
- "e7ee9c4a9c0f4390b1df48af9db8e5db",
1773
- "c7bf8bc2c0e7414a9128cde3f2d18ae5",
1774
- "34f657dc269444ba804c1063afa7e895",
1775
- "a5fea64e5c8541f4bbb91b9940cdf3b4",
1776
- "096eb5eab7504998b5f13b1a075fa227",
1777
- "6c9b0776a7e84861974b5292623e1937",
1778
- "c52e05ec6c0f4b03ad572b5baf53e88c",
1779
- "b2fafe71bf074ea6a4e24ecf6427e1f3",
1780
- "9d40d652d2194a788c1d92594584b50f",
1781
- "659ca32ffdbd4b70813af7abc9bdede5",
1782
- "0925f2144aef4500ab7eb78672c046d4",
1783
- "67a0910b76d14a1b9cdbd8bd33d06136",
1784
- "30584b8ff3ad410d8300dcc93c25fa71",
1785
- "011a27b6d2c34c7ab470856726e03aa8",
1786
- "ebc0ea5a9a4347c6aa41985b8c97cb04",
1787
- "cfdb390139a1496e96d0b1fb2cfca9a6",
1788
- "1f5e5b6785b6492c84cf433737341374",
1789
- "43394d4c01cf405c94aaa766366fa865",
1790
- "60215eb7d10b4bb686e28307423ee403",
1791
- "3d9467c350e943b196f15f9776c80e59",
1792
- "4c4c7ed5844e4c1cb030f93a041c2107",
1793
- "f2d1d42d7c4d427ebac2c98b55e470b8"
1794
- ]
1795
- },
1796
- "id": "ZXBibBYX9hbR",
1797
- "outputId": "a0ee977e-d77e-453d-aa3d-bacdb80917c1"
1798
- },
1799
- "outputs": [
1800
- {
1801
- "output_type": "stream",
1802
- "name": "stderr",
1803
- "text": [
1804
- "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
1805
- "[nltk_data] Package punkt is already up-to-date!\n",
1806
- "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
1807
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
1808
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
1809
- "You will be able to reuse this secret in all of your notebooks.\n",
1810
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
1811
- " warnings.warn(\n"
1812
- ]
1813
- },
1814
- {
1815
- "output_type": "display_data",
1816
- "data": {
1817
- "text/plain": [
1818
- "tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]"
1819
- ],
1820
- "application/vnd.jupyter.widget-view+json": {
1821
- "version_major": 2,
1822
- "version_minor": 0,
1823
- "model_id": "b4906906681c43ba89fb34066e11b4ac"
1824
- }
1825
- },
1826
- "metadata": {}
1827
- },
1828
- {
1829
- "output_type": "display_data",
1830
- "data": {
1831
- "text/plain": [
1832
- "vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]"
1833
- ],
1834
- "application/vnd.jupyter.widget-view+json": {
1835
- "version_major": 2,
1836
- "version_minor": 0,
1837
- "model_id": "1b27c3504c1b42a4b34df15050933bdb"
1838
- }
1839
- },
1840
- "metadata": {}
1841
- },
1842
- {
1843
- "output_type": "display_data",
1844
- "data": {
1845
- "text/plain": [
1846
- "tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]"
1847
- ],
1848
- "application/vnd.jupyter.widget-view+json": {
1849
- "version_major": 2,
1850
- "version_minor": 0,
1851
- "model_id": "e1c8ad6de85543d19c2fa3216ab21fd5"
1852
- }
1853
- },
1854
- "metadata": {}
1855
- },
1856
- {
1857
- "output_type": "display_data",
1858
- "data": {
1859
- "text/plain": [
1860
- "config.json: 0%| | 0.00/570 [00:00<?, ?B/s]"
1861
- ],
1862
- "application/vnd.jupyter.widget-view+json": {
1863
- "version_major": 2,
1864
- "version_minor": 0,
1865
- "model_id": "e7ee9c4a9c0f4390b1df48af9db8e5db"
1866
- }
1867
- },
1868
- "metadata": {}
1869
- },
1870
- {
1871
- "output_type": "display_data",
1872
- "data": {
1873
- "text/plain": [
1874
- "model.safetensors: 0%| | 0.00/440M [00:00<?, ?B/s]"
1875
- ],
1876
- "application/vnd.jupyter.widget-view+json": {
1877
- "version_major": 2,
1878
- "version_minor": 0,
1879
- "model_id": "67a0910b76d14a1b9cdbd8bd33d06136"
1880
- }
1881
- },
1882
- "metadata": {}
1883
- },
1884
- {
1885
- "output_type": "stream",
1886
- "name": "stderr",
1887
- "text": [
1888
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
1889
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
1890
- "/usr/local/lib/python3.11/dist-packages/transformers/training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
1891
- " warnings.warn(\n"
1892
- ]
1893
- },
1894
- {
1895
- "output_type": "display_data",
1896
- "data": {
1897
- "text/plain": [
1898
- "<IPython.core.display.HTML object>"
1899
- ],
1900
- "text/html": [
1901
- "\n",
1902
- " <div>\n",
1903
- " \n",
1904
- " <progress value='6' max='6' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1905
- " [6/6 00:23, Epoch 3/3]\n",
1906
- " </div>\n",
1907
- " <table border=\"1\" class=\"dataframe\">\n",
1908
- " <thead>\n",
1909
- " <tr style=\"text-align: left;\">\n",
1910
- " <th>Step</th>\n",
1911
- " <th>Training Loss</th>\n",
1912
- " <th>Validation Loss</th>\n",
1913
- " </tr>\n",
1914
- " </thead>\n",
1915
- " <tbody>\n",
1916
- " </tbody>\n",
1917
- "</table><p>"
1918
- ]
1919
- },
1920
- "metadata": {}
1921
- },
1922
- {
1923
- "output_type": "execute_result",
1924
- "data": {
1925
- "text/plain": [
1926
- "('./fine_tuned_model/tokenizer_config.json',\n",
1927
- " './fine_tuned_model/special_tokens_map.json',\n",
1928
- " './fine_tuned_model/vocab.txt',\n",
1929
- " './fine_tuned_model/added_tokens.json')"
1930
- ]
1931
- },
1932
- "metadata": {},
1933
- "execution_count": 2
1934
- }
1935
- ],
1936
- "source": [
1937
- "import re\n",
1938
- "import pandas as pd\n",
1939
- "import torch\n",
1940
- "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments\n",
1941
- "import nltk\n",
1942
- "nltk.download('punkt')\n",
1943
- "\n",
1944
- "# ===========================\n",
1945
- "# A. Data Collection and Preprocessing\n",
1946
- "# ===========================\n",
1947
- "# Load your narrative texts dataset (CSV with a \"text\" column)\n",
1948
- "df = pd.read_csv(\"narrative_texts.csv\")\n",
1949
- "\n",
1950
- "# Normalize the text (lowercase, remove non-alphabetical characters)\n",
1951
- "def normalize_text(text):\n",
1952
- " text = text.lower()\n",
1953
- " text = re.sub(r'[^a-z\\s]', '', text) # remove non-alphabetical characters\n",
1954
- " text = re.sub(r'\\s+', ' ', text).strip()\n",
1955
- " return text\n",
1956
- "\n",
1957
- "df['text'] = df['text'].apply(normalize_text)\n",
1958
- "\n",
1959
- "# ===========================\n",
1960
- "# B. Counterfactual Data Augmentation and Gender-Swapping\n",
1961
- "# ===========================\n",
1962
- "def gender_swap(text):\n",
1963
- " # Simplified swapping; extend dictionary as needed.\n",
1964
- " swaps = {\n",
1965
- " \" he \": \" TEMP \", \" she \": \" he \", \" TEMP \": \" she \",\n",
1966
- " \" his \": \" TEMP2 \", \" her \": \" his \", \" TEMP2 \": \" her \",\n",
1967
- " \" him \": \" TEMP3 \", \" her \": \" him \", \" TEMP3 \": \" her \"\n",
1968
- " }\n",
1969
- " for key, value in swaps.items():\n",
1970
- " text = text.replace(key, value)\n",
1971
- " return text\n",
1972
- "\n",
1973
- "df['text_swapped'] = df['text'].apply(gender_swap)\n",
1974
- "\n",
1975
- "def remove_gender_terms(text):\n",
1976
- " pattern = r'\\b(he|she|him|his|her)\\b'\n",
1977
- " return re.sub(pattern, '', text)\n",
1978
- "\n",
1979
- "df['text_removed'] = df['text'].apply(remove_gender_terms)\n",
1980
- "\n",
1981
- "# ===========================\n",
1982
- "# C. Create Dataset Conditions for Fine-Tuning\n",
1983
- "# ===========================\n",
1984
- "# Condition 1: Original dataset\n",
1985
- "df_original = df[['text']].copy()\n",
1986
- "\n",
1987
- "# Condition 2: Removed gender terms\n",
1988
- "df_removed = df[['text_removed']].rename(columns={'text_removed': 'text'}).copy()\n",
1989
- "\n",
1990
- "# Condition 3: Mixed dataset (original + gender-swapped)\n",
1991
- "df_mixed = pd.concat([df[['text']], df[['text_swapped']].rename(columns={'text_swapped': 'text'})], ignore_index=True)\n",
1992
- "labels_mixed = [0] * len(df) + [1] * len(df) # Label 0: original, Label 1: swapped\n",
1993
- "\n",
1994
- "# ===========================\n",
1995
- "# D. Fine-Tuning\n",
1996
- "# ===========================\n",
1997
- "# Load pretrained tokenizer and model (using BERT-base-uncased for demonstration)\n",
1998
- "tokenizer = BertTokenizer.from_pretrained(\"bert-base-uncased\")\n",
1999
- "model = BertForSequenceClassification.from_pretrained(\"bert-base-uncased\", num_labels=2)\n",
2000
- "\n",
2001
- "# Define a Torch Dataset\n",
2002
- "class TextDataset(torch.utils.data.Dataset):\n",
2003
- " def __init__(self, texts, labels):\n",
2004
- " self.encodings = tokenizer(texts.tolist(), truncation=True, padding=True, max_length=128)\n",
2005
- " self.labels = labels\n",
2006
- "\n",
2007
- " def __getitem__(self, idx):\n",
2008
- " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
2009
- " item[\"labels\"] = torch.tensor(self.labels[idx])\n",
2010
- " return item\n",
2011
- "\n",
2012
- " def __len__(self):\n",
2013
- " return len(self.labels)\n",
2014
- "\n",
2015
- "dataset_mixed = TextDataset(df_mixed[\"text\"], labels_mixed)\n",
2016
- "\n",
2017
- "# Set up training arguments (logging to wandb is disabled)\n",
2018
- "training_args = TrainingArguments(\n",
2019
- " output_dir=\"./results\",\n",
2020
- " num_train_epochs=3,\n",
2021
- " per_device_train_batch_size=16,\n",
2022
- " per_device_eval_batch_size=16,\n",
2023
- " learning_rate=2e-5,\n",
2024
- " evaluation_strategy=\"steps\",\n",
2025
- " eval_steps=500,\n",
2026
- " logging_dir=\"./logs\",\n",
2027
- " report_to=[], # Disable external logging\n",
2028
- " run_name=\"bias_bin_training\"\n",
2029
- ")\n",
2030
- "\n",
2031
- "# Initialize Trainer\n",
2032
- "trainer = Trainer(\n",
2033
- " model=model,\n",
2034
- " args=training_args,\n",
2035
- " train_dataset=dataset_mixed,\n",
2036
- " eval_dataset=dataset_mixed # In practice, use a separate validation set\n",
2037
- ")\n",
2038
- "\n",
2039
- "# Fine-tune the model\n",
2040
- "trainer.train()\n",
2041
- "\n",
2042
- "# Save the fine-tuned model and tokenizer for inference deployment\n",
2043
- "model.save_pretrained(\"./fine_tuned_model\")\n",
2044
- "tokenizer.save_pretrained(\"./fine_tuned_model\")\n"
2045
- ]
2046
- },
2047
- {
2048
- "cell_type": "code",
2049
- "source": [],
2050
- "metadata": {
2051
- "id": "v78LA2dA9u6s"
2052
- },
2053
- "execution_count": null,
2054
- "outputs": []
2055
- }
2056
- ]
2057
- }