{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"A100","mount_file_id":"1ibwAUkyKJzsvfosNmQrod9vp6k5kSwLJ","authorship_tag":"ABX9TyPKz9tldD+FLG6dVmuREi5Q"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"d959e79716954c5a9f5847a3c30f17c9":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3c7d76ee647e49218413ee59d2343c37","IPY_MODEL_9230a36324d94ffc87376555f894c154","IPY_MODEL_3b476323f31e4f7b827dde7348886151"],"layout":"IPY_MODEL_d46766a84d5e49a0985a9a8ac8cb0f0e"}},"3c7d76ee647e49218413ee59d2343c37":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_86334e1ec0694ff887c969260dbf2138","placeholder":"​","style":"IPY_MODEL_890674bace95461a8aa080b02c8c96fe","value":"Map: 100%"}},"9230a36324d94ffc87376555f894c154":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_23eb7dc6325c473695a5cfc775078617","max":79636,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0d13c27ab4c34b3980a949029ac0a0ef","value":79636}},"3b476323f31e4f7b827dde7348886151":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_00745097cc034fdfac5ae3fd8379d3ef","placeholder":"​","style":"IPY_MODEL_073617e333a54f9cbd525c03d3bf414b","value":" 79636/79636 [00:16<00:00, 4858.68 examples/s]"}},"d46766a84d5e49a0985a9a8ac8cb0f0e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"86334e1ec0694ff887c969260dbf2138":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"890674bace95461a8aa080b02c8c96fe":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"23eb7dc6325c473695a5cfc775078617":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0d13c27ab4c34b3980a949029ac0a0ef":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"00745097cc034fdfac5ae3fd8379d3ef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"073617e333a54f9cbd525c03d3bf414b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"67803b5fd32f4bd085cb436edf2fdc86":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_00e8be3451f64be29c8ae4cfa3bd4b31","IPY_MODEL_591cf3358a764ef0b5c2e2ed1634a90a","IPY_MODEL_93f0d22f8401419c90e6be6d2b8f501c"],"layout":"IPY_MODEL_c03e756a1bcb416ebe2839a9b0e48502"}},"00e8be3451f64be29c8ae4cfa3bd4b31":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bfd145a23c3d48e6a2d6a2041f08127a","placeholder":"​","style":"IPY_MODEL_48294650997a4c9c9cfe3760792f47b7","value":"Map: 100%"}},"591cf3358a764ef0b5c2e2ed1634a90a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2a7c0be5efda4c5e82c8b7c73013087b","max":19909,"min":0,"orientation":"horizontal","style":"IPY_MODEL_25017af2ea364b4b89e629f329c2995c","value":19909}},"93f0d22f8401419c90e6be6d2b8f501c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_05bea094ed074992805db4bf04303d94","placeholder":"​","style":"IPY_MODEL_9c2199eacfbe4b3eadcde54c3af50a66","value":" 19909/19909 [00:04<00:00, 4514.54 examples/s]"}},"c03e756a1bcb416ebe2839a9b0e48502":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bfd145a23c3d48e6a2d6a2041f08127a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"48294650997a4c9c9cfe3760792f47b7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2a7c0be5efda4c5e82c8b7c73013087b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25017af2ea364b4b89e629f329c2995c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"05bea094ed074992805db4bf04303d94":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9c2199eacfbe4b3eadcde54c3af50a66":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3b5fef4af6e94b7f9e66260485089de2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_746fa51ba3e74569b2f9b8678bf411a5","IPY_MODEL_c2db4f28a4a04fa7959c02a5fb63ea52","IPY_MODEL_4caf651fa193481787381bdc02214317"],"layout":"IPY_MODEL_1be48ef4629d4e9ab4c6ffeaa84f790b"}},"746fa51ba3e74569b2f9b8678bf411a5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_293e47b815264bb8aa4106c861eeccc5","placeholder":"​","style":"IPY_MODEL_b6ac20f5842a45b081f76c5d371fb10f","value":"Map: 100%"}},"c2db4f28a4a04fa7959c02a5fb63ea52":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fe1244263a9b4aee8264ef077448e990","max":79636,"min":0,"orientation":"horizontal","style":"IPY_MODEL_34959666fa8a4e2eaece1dc09676450f","value":79636}},"4caf651fa193481787381bdc02214317":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0638e4b48390486493db035d7a9509ae","placeholder":"​","style":"IPY_MODEL_475ed8c87d394765a1ea09bd01007dcc","value":" 79636/79636 [00:09<00:00, 10112.67 examples/s]"}},"1be48ef4629d4e9ab4c6ffeaa84f790b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"293e47b815264bb8aa4106c861eeccc5":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b6ac20f5842a45b081f76c5d371fb10f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fe1244263a9b4aee8264ef077448e990":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"34959666fa8a4e2eaece1dc09676450f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0638e4b48390486493db035d7a9509ae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"475ed8c87d394765a1ea09bd01007dcc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0d684f92091c4f40a3fd8ff5f711cd51":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3a57d0e662cb4402abf6fa34b881621a","IPY_MODEL_a542dbab078a47249b8d6a958669b7d5","IPY_MODEL_4a8ec2215e3f4ca088b667ef7eb22a00"],"layout":"IPY_MODEL_e99b336c7ad1494f93f7bb5c54427eb3"}},"3a57d0e662cb4402abf6fa34b881621a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b26104c7f6014a39bd83e9ef64cb51a1","placeholder":"​","style":"IPY_MODEL_4ea12f5c5a784e3aa092d98e1a795405","value":"Map: 100%"}},"a542dbab078a47249b8d6a958669b7d5":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_b181027d3ab34628a97fa17a7e72f8dc","max":19909,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5638877bc40d4f269d5e6fc82e01e801","value":19909}},"4a8ec2215e3f4ca088b667ef7eb22a00":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e1df768ad19140afaec3f894a617cbd6","placeholder":"​","style":"IPY_MODEL_b6fa227773ee4d53945e706a7036dbfc","value":" 19909/19909 [00:02<00:00, 9308.25 examples/s]"}},"e99b336c7ad1494f93f7bb5c54427eb3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b26104c7f6014a39bd83e9ef64cb51a1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4ea12f5c5a784e3aa092d98e1a795405":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b181027d3ab34628a97fa17a7e72f8dc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5638877bc40d4f269d5e6fc82e01e801":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e1df768ad19140afaec3f894a617cbd6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b6fa227773ee4d53945e706a7036dbfc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","source":["!pip install torch torchtext pytorch-crf torchcrf transformers seqeval datasets huggingface_hub evaluate\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"t3bN9zGVuHXy","executionInfo":{"status":"ok","timestamp":1730568030327,"user_tz":-240,"elapsed":3211,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"154a02f0-fe46-4838-94c5-ed1e7bf836d2"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.0+cu121)\n","Requirement already satisfied: torchtext in /usr/local/lib/python3.10/dist-packages (0.18.0)\n","Requirement already satisfied: pytorch-crf in /usr/local/lib/python3.10/dist-packages (0.7.2)\n","Requirement already satisfied: torchcrf in /usr/local/lib/python3.10/dist-packages (1.1.0)\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.44.2)\n","Requirement already satisfied: seqeval in /usr/local/lib/python3.10/dist-packages (1.2.2)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.1.0)\n","Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.24.7)\n","Requirement already satisfied: evaluate in /usr/local/lib/python3.10/dist-packages (0.4.3)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from torchtext) (4.66.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchtext) (2.32.3)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchtext) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.9.11)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n","Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n","Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.10/dist-packages (from seqeval) (1.5.2)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n","Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.10.10)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.3)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n","Requirement already satisfied: yarl<2.0,>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.17.0)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.4.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2.2.3)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2024.8.30)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.13.1)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0)\n"]}]},{"cell_type":"code","source":["import os\n","import warnings\n","import numpy as np\n","import torch\n","from datasets import load_dataset, DatasetDict\n","from transformers import (AutoTokenizer, DataCollatorForTokenClassification,\n"," TrainingArguments, Trainer, AutoModelForTokenClassification)\n","from huggingface_hub import login\n","import evaluate\n","import ast\n"],"metadata":{"id":"0u3Sl4J9upSR"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Log in to Hugging Face Hub\n","login(token=\"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\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ygEWbPjcupP_","executionInfo":{"status":"ok","timestamp":1730568044808,"user_tz":-240,"elapsed":4,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"5deafea9-7eb0-46d8-9f91-a8a83d9549f0"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n","Token is valid (permission: fineGrained).\n","Your token has been saved to /root/.cache/huggingface/token\n","Login successful\n"]}]},{"cell_type":"code","source":["# Disable W&B logs and filter warnings\n","os.environ[\"WANDB_DISABLED\"] = \"true\"\n","warnings.filterwarnings(\"ignore\")\n","\n","# Set device (GPU if available)\n","device = 'cuda' if torch.cuda.is_available() else 'cpu'\n"],"metadata":{"id":"e82PO4FTupNl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Load the dataset\n","dataset = load_dataset(\"LocalDoc/azerbaijani-ner-dataset\")\n","\n","# Split the train dataset into train and validation subsets\n","train_test_split = dataset[\"train\"].train_test_split(test_size=0.2, seed=42)\n","dataset = DatasetDict({\n"," \"train\": train_test_split[\"train\"],\n"," \"validation\": train_test_split[\"test\"]\n","})\n","\n","# Check the dataset split\n","print(dataset)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ljUw8jKMupLM","executionInfo":{"status":"ok","timestamp":1730568050511,"user_tz":-240,"elapsed":5705,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"40133f8b-064d-4e76-e57e-cca79d2264b1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["DatasetDict({\n"," train: Dataset({\n"," features: ['index', 'tokens', 'ner_tags'],\n"," num_rows: 79636\n"," })\n"," validation: Dataset({\n"," features: ['index', 'tokens', 'ner_tags'],\n"," num_rows: 19909\n"," })\n","})\n"]}]},{"cell_type":"code","source":["def process_ner_tags(example):\n"," if example[\"tokens\"] and example[\"ner_tags\"]:\n"," try:\n"," example[\"tokens\"] = ast.literal_eval(example[\"tokens\"])\n"," example[\"ner_tags\"] = [int(tag) for tag in ast.literal_eval(example[\"ner_tags\"])]\n"," except (ValueError, SyntaxError):\n"," example[\"tokens\"], example[\"ner_tags\"] = [], []\n"," else:\n"," example[\"tokens\"], example[\"ner_tags\"] = [], []\n"," return example\n","\n","# Apply preprocessing function\n","dataset = dataset.map(process_ner_tags)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":81,"referenced_widgets":["d959e79716954c5a9f5847a3c30f17c9","3c7d76ee647e49218413ee59d2343c37","9230a36324d94ffc87376555f894c154","3b476323f31e4f7b827dde7348886151","d46766a84d5e49a0985a9a8ac8cb0f0e","86334e1ec0694ff887c969260dbf2138","890674bace95461a8aa080b02c8c96fe","23eb7dc6325c473695a5cfc775078617","0d13c27ab4c34b3980a949029ac0a0ef","00745097cc034fdfac5ae3fd8379d3ef","073617e333a54f9cbd525c03d3bf414b","67803b5fd32f4bd085cb436edf2fdc86","00e8be3451f64be29c8ae4cfa3bd4b31","591cf3358a764ef0b5c2e2ed1634a90a","93f0d22f8401419c90e6be6d2b8f501c","c03e756a1bcb416ebe2839a9b0e48502","bfd145a23c3d48e6a2d6a2041f08127a","48294650997a4c9c9cfe3760792f47b7","2a7c0be5efda4c5e82c8b7c73013087b","25017af2ea364b4b89e629f329c2995c","05bea094ed074992805db4bf04303d94","9c2199eacfbe4b3eadcde54c3af50a66"]},"id":"4rjVhO1bupIi","executionInfo":{"status":"ok","timestamp":1730568071975,"user_tz":-240,"elapsed":21468,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"ee69bd5d-df92-494b-bd0f-67279a36372c"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map: 0%| | 0/79636 [00:00"],"text/html":["\n","
\n"," \n"," \n"," [3735/3735 16:27, Epoch 3/3]\n","
\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
10.2952000.2657110.7154240.6228530.6659370.919136
20.2486000.2520830.7210360.6379790.6769700.921439
30.2068000.2533720.7048720.6506840.6766950.920898

"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=3735, training_loss=0.27468724043334186, metrics={'train_runtime': 989.0494, 'train_samples_per_second': 241.553, 'train_steps_per_second': 3.776, 'total_flos': 1.15112026864878e+16, 'train_loss': 0.27468724043334186, 'epoch': 3.0})"]},"metadata":{},"execution_count":14}]},{"cell_type":"code","source":["eval_results = trainer.evaluate()\n","print(f\"Evaluation results: {eval_results}\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":75},"id":"lVFlghpFu8n6","executionInfo":{"status":"ok","timestamp":1730569109543,"user_tz":-240,"elapsed":34163,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"caa0a88d-50ed-4312-9f78-26089e4d9000"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n","

\n"," \n"," \n"," [312/312 00:24]\n","
\n"," "]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Evaluation results: {'eval_loss': 0.25337204337120056, 'eval_precision': 0.7048723772442137, 'eval_recall': 0.6506839057507987, 'eval_f1': 0.6766950472432769, 'eval_accuracy': 0.9208976538986301, 'eval_runtime': 34.0968, 'eval_samples_per_second': 583.896, 'eval_steps_per_second': 9.15, 'epoch': 3.0}\n"]}]},{"cell_type":"code","source":["model.save_pretrained(\"mbert-azerbaijani-ner\")\n","tokenizer.save_pretrained(\"mbert-azerbaijani-ner\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"y8RL5Joku8lZ","executionInfo":{"status":"ok","timestamp":1730569193749,"user_tz":-240,"elapsed":1604,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"2d6b3f6c-730e-42c4-939c-49974187314a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["('mbert-azerbaijani-ner/tokenizer_config.json',\n"," 'mbert-azerbaijani-ner/special_tokens_map.json',\n"," 'mbert-azerbaijani-ner/vocab.txt',\n"," 'mbert-azerbaijani-ner/added_tokens.json',\n"," 'mbert-azerbaijani-ner/tokenizer.json')"]},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["model = AutoModelForTokenClassification.from_pretrained(\"mbert-azerbaijani-ner\")\n","tokenizer = AutoTokenizer.from_pretrained(\"mbert-azerbaijani-ner\")\n"],"metadata":{"id":"6mb7lpAiu8jI"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import torch\n","from transformers import AutoModelForTokenClassification, AutoTokenizer\n","\n","# Load the trained model and tokenizer\n","model = AutoModelForTokenClassification.from_pretrained(\"mbert-azerbaijani-ner\")\n","tokenizer = AutoTokenizer.from_pretrained(\"mbert-azerbaijani-ner\")\n","\n","# Sample input text\n","text = \"Azərbaycanın paytaxtı Bakı, Xəzər dənizi sahilində yerləşir.\"\n","\n","# Tokenize the input text\n","inputs = tokenizer(text, return_tensors=\"pt\", truncation=True)\n","\n","# Make predictions\n","with torch.no_grad():\n"," outputs = model(**inputs)\n","\n","# Extract logits and predicted token class IDs\n","logits = outputs.logits\n","predicted_token_class_ids = logits.argmax(dim=-1).squeeze().tolist()\n","tokens = tokenizer.convert_ids_to_tokens(inputs[\"input_ids\"].squeeze())\n","\n","# Map predictions to label names\n","label_to_id = {\n"," \"O\": 0, \"PERSON\": 1, \"LOCATION\": 2, \"ORGANISATION\": 3, \"DATE\": 4, \"TIME\": 5, \"MONEY\": 6,\n"," \"PERCENTAGE\": 7, \"FACILITY\": 8, \"PRODUCT\": 9, \"EVENT\": 10, \"ART\": 11, \"LAW\": 12, \"LANGUAGE\": 13,\n"," \"GPE\": 14, \"NORP\": 15, \"ORDINAL\": 16, \"CARDINAL\": 17, \"DISEASE\": 18, \"CONTACT\": 19, \"ADAGE\": 20,\n"," \"QUANTITY\": 21, \"MISCELLANEOUS\": 22, \"POSITION\": 23, \"PROJECT\": 24\n","}\n","id_to_label = {v: k for k, v in label_to_id.items()}\n","predicted_labels = [id_to_label[label_id] for label_id in predicted_token_class_ids]\n","\n","# Combine subwords and print only complete tokens with labels\n","word_label_pairs = []\n","current_word = \"\"\n","current_label = None\n","\n","for token, label in zip(tokens, predicted_labels):\n"," if token.startswith(\"##\"):\n"," # Append subword to the current word\n"," current_word += token[2:]\n"," else:\n"," # Print the completed word and label before starting a new word\n"," if current_word:\n"," word_label_pairs.append((current_word, current_label))\n"," # Start a new word\n"," current_word = token\n"," current_label = label\n","\n","# Append the last word\n","if current_word:\n"," word_label_pairs.append((current_word, current_label))\n","\n","# Display the combined words and their labels\n","for word, label in word_label_pairs:\n"," print(f\"{word}: {label}\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Gk--Z0bGu8cb","executionInfo":{"status":"ok","timestamp":1730569309953,"user_tz":-240,"elapsed":606,"user":{"displayName":"Ismat Samadov","userId":"13714662825869203427"}},"outputId":"2f123449-67cf-49b0-86c0-3779621064c6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[CLS]: O\n","Azərbaycanın: GPE\n","paytaxtı: POSITION\n","Bakı: GPE\n",",: O\n","Xəzər: LOCATION\n","dənizi: LOCATION\n","sahilində: O\n","yerləşir: O\n",".: O\n","[SEP]: O\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"peZJHC-tuHSi"},"execution_count":null,"outputs":[]}]}