| {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyNCkH+E5xrivgsrID864kxM"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"0c249b924c59409db54890e02a5c7674":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_7ca42e8e814244f687deffd0a55b42cb","IPY_MODEL_0e08aed6fa2e459da0864715e4ae60a7","IPY_MODEL_7c9c2219c41546d8aa5bf8e42dcf5c42"],"layout":"IPY_MODEL_6a63762726de4fd0b77f47a2c8e84c95"}},"7ca42e8e814244f687deffd0a55b42cb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9238dca9919644e0b95ba843bd35287f","placeholder":"β","style":"IPY_MODEL_42cc6f2eea354dfba24f703abd3c4cc5","value":"config.json:β100%"}},"0e08aed6fa2e459da0864715e4ae60a7":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e16924784f146cca6d0766273073793","max":629,"min":0,"orientation":"horizontal","style":"IPY_MODEL_fc208d2cba9d42dbb6df19127253f737","value":629}},"7c9c2219c41546d8aa5bf8e42dcf5c42":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4eab67d792984db1a8a40806ae6cecb1","placeholder":"β","style":"IPY_MODEL_3c761692e05641b29fec807d669e4ba8","value":"β629/629β[00:00<00:00,β15.1kB/s]"}},"6a63762726de4fd0b77f47a2c8e84c95":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9238dca9919644e0b95ba843bd35287f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"42cc6f2eea354dfba24f703abd3c4cc5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e16924784f146cca6d0766273073793":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc208d2cba9d42dbb6df19127253f737":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4eab67d792984db1a8a40806ae6cecb1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3c761692e05641b29fec807d669e4ba8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ad30bdb5567744a6bbbed5c1083149a6":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f05f66583a1e42da9a8585ac86633740","IPY_MODEL_f783a3e8bd354c4f9e65d361027c9314","IPY_MODEL_0058e9ccfcbb45afb72205b144c168d9"],"layout":"IPY_MODEL_f3f00ba2c8ef41089590f9ccba71eb8e"}},"f05f66583a1e42da9a8585ac86633740":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2874ca3ee046483c85b89a52c21ce861","placeholder":"β","style":"IPY_MODEL_7812e5bd097b4a95b01cbd78805c0dba","value":"model.safetensors:β100%"}},"f783a3e8bd354c4f9e65d361027c9314":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0a67155d7b8f46cfb5ceb89d9226c314","max":267832558,"min":0,"orientation":"horizontal","style":"IPY_MODEL_ac6215f0b7f24a11b0c4394683a1ec9d","value":267832558}},"0058e9ccfcbb45afb72205b144c168d9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c0b2edfcac504f598cc2bcd451838ae4","placeholder":"β","style":"IPY_MODEL_6dbe0ee909644e0981f24e1d77e536da","value":"β268M/268Mβ[00:06<00:00,β36.8MB/s]"}},"f3f00ba2c8ef41089590f9ccba71eb8e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2874ca3ee046483c85b89a52c21ce861":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7812e5bd097b4a95b01cbd78805c0dba":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0a67155d7b8f46cfb5ceb89d9226c314":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac6215f0b7f24a11b0c4394683a1ec9d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c0b2edfcac504f598cc2bcd451838ae4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6dbe0ee909644e0981f24e1d77e536da":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f31ba36e66454ebab2724e697afc3604":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_27b50860f34a408f90ca03da77cf83ad","IPY_MODEL_b3749e99c2034bad8f62adf594b69cd5","IPY_MODEL_fb60f8433c1d41eda1c8d6d01781e732"],"layout":"IPY_MODEL_3c81cee942214c39923a600d6b707535"}},"27b50860f34a408f90ca03da77cf83ad":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_96d364fe8cc141cf9e1fd42cf92e7417","placeholder":"β","style":"IPY_MODEL_0719cc8b718c4548a5e14860305b4d44","value":"tokenizer_config.json:β100%"}},"b3749e99c2034bad8f62adf594b69cd5":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_f23187bbf0cd4daaac0082db9e73e18f","max":48,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d92e5fefa4db41d79b45cc451b7c0ef3","value":48}},"fb60f8433c1d41eda1c8d6d01781e732":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f27bd237fa0b420ca8291aef1188a45c","placeholder":"β","style":"IPY_MODEL_ab21c044751b4718ab7386a94b730226","value":"β48.0/48.0β[00:00<00:00,β587B/s]"}},"3c81cee942214c39923a600d6b707535":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"96d364fe8cc141cf9e1fd42cf92e7417":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0719cc8b718c4548a5e14860305b4d44":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f23187bbf0cd4daaac0082db9e73e18f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d92e5fefa4db41d79b45cc451b7c0ef3":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f27bd237fa0b420ca8291aef1188a45c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ab21c044751b4718ab7386a94b730226":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ec4b79cd7bf04b54a3c2233cb4db0204":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_a5f05b8872a34837b9a899680cd9088a","IPY_MODEL_579b0df1ca9d4690b92a12d5de0d728e","IPY_MODEL_639f645419ed44c191388c3b2b655b70"],"layout":"IPY_MODEL_8348dd7b98f949ccbdb3501d9c0200da"}},"a5f05b8872a34837b9a899680cd9088a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0514dd5b78d74a88aea99de41b22506f","placeholder":"β","style":"IPY_MODEL_b2c69b36ee7b4ee78178fd803052961a","value":"vocab.txt:β"}},"579b0df1ca9d4690b92a12d5de0d728e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_5496f8aa360940fb90f529a7d41b6d3c","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_745cfdb31785408bb4e0c6b464b359bd","value":1}},"639f645419ed44c191388c3b2b655b70":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c168013e2d3f44e1bca9f5b4876a9ea9","placeholder":"β","style":"IPY_MODEL_4ab621035aa74ea49f34b908ac61a13e","value":"β232k/?β[00:00<00:00,β9.34MB/s]"}},"8348dd7b98f949ccbdb3501d9c0200da":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0514dd5b78d74a88aea99de41b22506f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2c69b36ee7b4ee78178fd803052961a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5496f8aa360940fb90f529a7d41b6d3c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"20px"}},"745cfdb31785408bb4e0c6b464b359bd":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c168013e2d3f44e1bca9f5b4876a9ea9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4ab621035aa74ea49f34b908ac61a13e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"219a5741fe064f3894d736218d0914dc":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5ff5339b9f0343eb89177f5f54bf78bc","IPY_MODEL_fd8abae5df6c498c89a8bb9c384e55ef","IPY_MODEL_40a386774e974db0a523ef72881fcd40"],"layout":"IPY_MODEL_124a889362f04165a4594e856dfeaf6d"}},"5ff5339b9f0343eb89177f5f54bf78bc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ce5f0f6f4ad4444bb11ac636eb872b36","placeholder":"β","style":"IPY_MODEL_53b0981ae35448388db36b5fc7d29e59","value":"Map:β100%"}},"fd8abae5df6c498c89a8bb9c384e55ef":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_59cf36d2b2ff4081be913aee61db2e50","max":2108,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c19f26361f474d3d8576cdc642b4e560","value":2108}},"40a386774e974db0a523ef72881fcd40":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_68c7567357b740c7b449fd376a3c9012","placeholder":"β","style":"IPY_MODEL_48910a34ec91412d909faa2041166d57","value":"β2108/2108β[00:00<00:00,β9031.04βexamples/s]"}},"124a889362f04165a4594e856dfeaf6d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce5f0f6f4ad4444bb11ac636eb872b36":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"53b0981ae35448388db36b5fc7d29e59":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"59cf36d2b2ff4081be913aee61db2e50":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c19f26361f474d3d8576cdc642b4e560":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"68c7567357b740c7b449fd376a3c9012":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"48910a34ec91412d909faa2041166d57":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"a1R-5EiFJabW","executionInfo":{"status":"ok","timestamp":1763825849444,"user_tz":-330,"elapsed":25661,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"fed0a115-9bea-4d11-9e4a-16753810ba3d"},"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]},{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Unzipping corpora/stopwords.zip.\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')\n","\n","import pandas as pd\n","import numpy as np\n","from sklearn.model_selection import train_test_split\n","from sklearn.feature_extraction.text import TfidfVectorizer\n","from sklearn.linear_model import LogisticRegression\n","from sklearn.metrics import classification_report\n","import pickle\n","import re\n","import nltk\n","nltk.download('stopwords')\n","from nltk.corpus import stopwords\n"]},{"cell_type":"code","source":["# it detect the worng link with mistake and extrac colum and paste it\n","import csv\n","\n","with open('/content/drive/MyDrive/nlp/data/complaints_dataset.csv', 'r') as f:\n"," reader = csv.reader(f)\n"," for i, row in enumerate(reader, start=1):\n"," if len(row) != 4:\n"," print(i, row)\n"],"metadata":{"id":"lwYBfLMb67Zk"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Change this path to match your Drive folder\n","df = pd.read_csv('/content/drive/MyDrive/nlp/data/complaints_dataset.csv')\n","print(\"Shape:\", df.shape)\n","df.head()\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":223},"id":"1xxJzQCoKVlp","executionInfo":{"status":"ok","timestamp":1762673856383,"user_tz":-330,"elapsed":36,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"d631943f-7fa4-4bdd-9371-28894d97c215"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Shape: (1810, 4)\n"]},{"output_type":"execute_result","data":{"text/plain":[" complaint_text category sentiment \\\n","0 My parcel is still not delivered even after 4 ... Late Delivery Negative \n","1 The delivery guy was very polite and helpful, ... Late Delivery Positive \n","2 Tracking shows 'out for delivery' since 8 AM. ... Late Delivery Negative \n","3 I needed this for a gift tmrw. Thanks for ruin... Late Delivery Negative \n","4 Order is 2 days late. Please provide an accura... Late Delivery Negative \n","\n"," urgency \n","0 High \n","1 Low \n","2 Medium \n","3 High \n","4 Medium "],"text/html":["\n"," <div id=\"df-01eea597-aa1d-4bc9-b095-e1292a73f6ae\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>complaint_text</th>\n"," <th>category</th>\n"," <th>sentiment</th>\n"," <th>urgency</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>My parcel is still not delivered even after 4 ...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>High</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>The delivery guy was very polite and helpful, ...</td>\n"," <td>Late Delivery</td>\n"," <td>Positive</td>\n"," <td>Low</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Tracking shows 'out for delivery' since 8 AM. ...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>Medium</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>I needed this for a gift tmrw. Thanks for ruin...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>High</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Order is 2 days late. Please provide an accura...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>Medium</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-01eea597-aa1d-4bc9-b095-e1292a73f6ae')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-01eea597-aa1d-4bc9-b095-e1292a73f6ae button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-01eea597-aa1d-4bc9-b095-e1292a73f6ae');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-dfeacc14-34fc-40ca-b784-62ec375c5a41\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-dfeacc14-34fc-40ca-b784-62ec375c5a41')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-dfeacc14-34fc-40ca-b784-62ec375c5a41 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df","summary":"{\n \"name\": \"df\",\n \"rows\": 1810,\n \"fields\": [\n {\n \"column\": \"complaint_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1800,\n \"samples\": [\n \"Appreciate your fast delivery service!\",\n \"Your team resolved my last complaint so quickly. Thanks!\",\n \"The delivery was made, but he didn't have a mask on.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 32,\n \"samples\": [\n \"Order Cancellation Issue\",\n \"Fraudulent Order\",\n \"Order \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"Positive\",\n \"bringing the total to 700.\",\n \"Negative\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"urgency\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Low\",\n \"Nan\",\n \"High\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":25}]},{"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","\n","# Load your dataset\n","df = pd.read_csv('/content/drive/MyDrive/nlp/data/complaints_dataset.csv')\n","\n","print(\"Before cleaning:\")\n","print(df['category'].value_counts())\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kNgXRJKubVOW","executionInfo":{"status":"ok","timestamp":1762673859859,"user_tz":-330,"elapsed":20,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"5f7687cf-7a08-4377-ed8c-bdf0805b35b5"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Before cleaning:\n","category\n","Late Delivery 312\n","Refund Or Return Request 287\n","Damaged Product 285\n","Delivery Partner Issue 258\n","Wrong Item Delivered 255\n","Address Or Contact Problem 152\n","Positive Feedback 63\n","Refund or Return Request 32\n","Customer Service Issue 26\n","Customer Service 22\n","Customer Support 19\n","Address or Contact Problem 17\n","fast Delivery 17\n","Lost Package 9\n","Product Quality 7\n","Product 7\n","Order 7\n","Pickup Issue 6\n","Delivery Partner 5\n","Order Issue 4\n","Address Or Kcontact Problem 3\n","Fraudulent Order 3\n","Order Cancellation 2\n","Synthetic Complaints 2\n","right Item Delivered 2\n","Payment Issue 2\n","Negative 1\n","Refund Orreturn Request 1\n","While `Wrong Item Delivered` Has 17. Adjusting Count For Balance. 1\n","Delivery 1\n","Order Cancellation Issue 1\n","Technical Issue 1\n","Name: count, dtype: int64\n"]}]},{"cell_type":"code","source":["# --- Category Cleaning ---\n","\n","# Make text consistent (remove spaces and capitalization differences)\n","df['category'] = df['category'].astype(str).str.strip().str.title()\n","\n","# Fix known typos and duplicates\n","df['category'] = df['category'].replace({\n"," 'Address Or Kcontact Problem': 'Address Or Contact Problem',\n"," 'Refund Orreturn Request': 'Refund Or Return Request',\n"," 'Synthetic Complaints.': 'Synthetic Complaints',\n"," 'Nan': np.nan\n","})\n","\n","# Drop invalid or unnecessary rows\n","df = df[df['category'].notna()] # remove rows with NaN category\n","df = df[~df['category'].isin(['Synthetic Complaints'])] # remove unwanted fake rows\n","\n","print(\"β After cleaning:\")\n","print(df['category'].value_counts())\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ddynQztPbdym","executionInfo":{"status":"ok","timestamp":1762673862062,"user_tz":-330,"elapsed":22,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"c006fea7-3481-45ff-c834-8df9cfb54c1f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["β After cleaning:\n","category\n","Refund Or Return Request 320\n","Late Delivery 312\n","Damaged Product 285\n","Delivery Partner Issue 258\n","Wrong Item Delivered 255\n","Address Or Contact Problem 172\n","Positive Feedback 63\n","Customer Service Issue 26\n","Customer Service 22\n","Customer Support 19\n","Fast Delivery 17\n","Lost Package 9\n","Product 7\n","Order 7\n","Product Quality 7\n","Pickup Issue 6\n","Delivery Partner 5\n","Order Issue 4\n","Fraudulent Order 3\n","Right Item Delivered 2\n","Payment Issue 2\n","Order Cancellation 2\n","Negative 1\n","While `Wrong Item Delivered` Has 17. Adjusting Count For Balance. 1\n","Delivery 1\n","Order Cancellation Issue 1\n","Technical Issue 1\n","Name: count, dtype: int64\n"]}]},{"cell_type":"code","source":["def clean_text(text):\n"," text = str(text).lower()\n"," text = re.sub(r'http\\S+|www\\S+', '', text) # remove links\n"," text = re.sub(r'[^a-zA-Z\\s]', '', text) # keep letters only\n"," text = ' '.join([w for w in text.split() if w not in stopwords.words('english')])\n"," return text\n","\n","df['clean_text'] = df['complaint_text'].apply(clean_text)\n","df.head()\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"Kodf5MjdKg_n","executionInfo":{"status":"ok","timestamp":1762673867353,"user_tz":-330,"elapsed":1405,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"403f5fc7-d5d6-4c22-aa72-aef0e8495e6a"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" complaint_text category sentiment \\\n","0 My parcel is still not delivered even after 4 ... Late Delivery Negative \n","1 The delivery guy was very polite and helpful, ... Late Delivery Positive \n","2 Tracking shows 'out for delivery' since 8 AM. ... Late Delivery Negative \n","3 I needed this for a gift tmrw. Thanks for ruin... Late Delivery Negative \n","4 Order is 2 days late. Please provide an accura... Late Delivery Negative \n","\n"," urgency clean_text \n","0 High parcel still delivered even days \n","1 Low delivery guy polite helpful even though bit late \n","2 Medium tracking shows delivery since pm still nothing \n","3 High needed gift tmrw thanks ruining hata logistics... \n","4 Medium order days late please provide accurate update "],"text/html":["\n"," <div id=\"df-f5f23d8c-c0d4-4164-851c-1217e20a81c3\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>complaint_text</th>\n"," <th>category</th>\n"," <th>sentiment</th>\n"," <th>urgency</th>\n"," <th>clean_text</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>My parcel is still not delivered even after 4 ...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>High</td>\n"," <td>parcel still delivered even days</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>The delivery guy was very polite and helpful, ...</td>\n"," <td>Late Delivery</td>\n"," <td>Positive</td>\n"," <td>Low</td>\n"," <td>delivery guy polite helpful even though bit late</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Tracking shows 'out for delivery' since 8 AM. ...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>Medium</td>\n"," <td>tracking shows delivery since pm still nothing</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>I needed this for a gift tmrw. Thanks for ruin...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>High</td>\n"," <td>needed gift tmrw thanks ruining hata logistics...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Order is 2 days late. Please provide an accura...</td>\n"," <td>Late Delivery</td>\n"," <td>Negative</td>\n"," <td>Medium</td>\n"," <td>order days late please provide accurate update</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f5f23d8c-c0d4-4164-851c-1217e20a81c3')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-f5f23d8c-c0d4-4164-851c-1217e20a81c3 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-f5f23d8c-c0d4-4164-851c-1217e20a81c3');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-9f0012be-9d19-441f-a2c4-5b68380a0a7c\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9f0012be-9d19-441f-a2c4-5b68380a0a7c')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-9f0012be-9d19-441f-a2c4-5b68380a0a7c button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df","summary":"{\n \"name\": \"df\",\n \"rows\": 1808,\n \"fields\": [\n {\n \"column\": \"complaint_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1798,\n \"samples\": [\n \"Product arrived late, but no issues in quality.\",\n \"This is the wrong item! Now I have to go through the hassle of returning it. So annoying.\",\n \"Fantastic, my order number doesn\\u2019t even exist in your system.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"category\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 27,\n \"samples\": [\n \"Positive Feedback\",\n \"Payment Issue\",\n \"Customer Support\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Negative\",\n \"Positive\",\n \"Mixed\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"urgency\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Low\",\n \"Nan\",\n \"High\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clean_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1788,\n \"samples\": [\n \"delivery early packaging secure\",\n \"delivery timely smooth professional\",\n \"package arrived late driver extremely nice apologized im upset\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":28}]},{"cell_type":"code","source":["X = df['clean_text']\n","y_category = df['category']\n","y_sentiment = df['sentiment']\n","\n","vectorizer = TfidfVectorizer(max_features=3000)\n","X_vec = vectorizer.fit_transform(X)\n","\n","X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_vec, y_category, test_size=0.2, random_state=42)\n","X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_vec, y_sentiment, test_size=0.2, random_state=42)\n"],"metadata":{"id":"Yg8ZcmYdKk_W"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"SrCUGkXKa3Uh"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["model_category = LogisticRegression(max_iter=1000, class_weight='balanced')\n","model_category.fit(X_train_c, y_train_c)\n","\n","y_pred_c = model_category.predict(X_test_c)\n","print(\"π¦ Complaint Category Classification Report:\")\n","print(classification_report(y_test_c, y_pred_c))\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7ovYxjC_Knkf","executionInfo":{"status":"ok","timestamp":1762304850534,"user_tz":-330,"elapsed":308,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"bdceaa78-d70c-48d7-df40-0f96f386a15c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["π¦ Complaint Category Classification Report:\n"," precision recall f1-score support\n","\n","Address Or Contact Problem 1.00 0.96 0.98 23\n"," Damaged Product 0.94 0.94 0.94 33\n"," Delivery Partner Issue 0.92 0.92 0.92 37\n"," Late Delivery 0.92 0.92 0.92 26\n"," Negative 0.00 0.00 0.00 0\n"," Positive Feedback 0.67 0.71 0.69 17\n"," Refund Or Return Request 0.93 0.90 0.91 29\n"," Wrong Item Delivered 1.00 0.96 0.98 24\n","\n"," accuracy 0.91 189\n"," macro avg 0.80 0.79 0.79 189\n"," weighted avg 0.92 0.91 0.92 189\n","\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"]}]},{"cell_type":"code","source":["#SVM\n","from sklearn.model_selection import train_test_split\n","from sklearn.feature_extraction.text import TfidfVectorizer\n","\n","# Text data\n","X = df['clean_text']\n","y_category = df['category']\n","\n","# TF-IDF Vectorizer\n","vectorizer = TfidfVectorizer(max_features=3000)\n","X_vec = vectorizer.fit_transform(X)\n","\n","# Train-test split for category classification\n","X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(\n"," X_vec,\n"," y_category,\n"," test_size=0.2,\n"," random_state=42\n",")\n"],"metadata":{"id":"D4G4CFk1ccTj"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# -----------------------\n","# Improved Sentiment Model\n","# -----------------------\n","from sklearn.linear_model import LogisticRegression\n","\n","# Use stronger regularization (C=0.5) and balance weights\n","model_sentiment = LogisticRegression(\n"," max_iter=1000,\n"," class_weight='balanced',\n"," C=0.5,\n"," solver='lbfgs'\n",")\n","\n","model_sentiment.fit(X_train_s, y_train_s)\n","\n","# Evaluate\n","y_pred_s = model_sentiment.predict(X_test_s)\n","print(\"π¬ Sentiment Analysis Report (Improved):\")\n","print(classification_report(y_test_s, y_pred_s))\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aYkxwISfKsbG","executionInfo":{"status":"ok","timestamp":1762304854744,"user_tz":-330,"elapsed":14,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"092bad4f-8688-4c10-e584-124a1e5b68ee"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["π¬ Sentiment Analysis Report (Improved):\n"," precision recall f1-score support\n","\n"," High 0.00 0.00 0.00 0\n"," Negative 0.92 0.86 0.89 107\n"," Neutral 0.78 0.82 0.80 39\n"," Positive 0.85 0.91 0.88 43\n","\n"," accuracy 0.86 189\n"," macro avg 0.64 0.65 0.64 189\n","weighted avg 0.87 0.86 0.87 189\n","\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"]}]},{"cell_type":"code","source":["#svm\n","from sklearn.model_selection import train_test_split\n","from sklearn.feature_extraction.text import TfidfVectorizer\n","\n","# Text\n","X = df['clean_text']\n","\n","# Labels\n","y_category = df['category']\n","y_sentiment = df['sentiment']\n","\n","# TF-IDF\n","vectorizer = TfidfVectorizer(max_features=3000)\n","X_vec = vectorizer.fit_transform(X)\n","\n","# Category split\n","X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(\n"," X_vec, y_category, test_size=0.2, random_state=42\n",")\n","\n","# Sentiment split\n","X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(\n"," X_vec, y_sentiment, test_size=0.2, random_state=42\n",")\n","from sklearn.svm import LinearSVC\n","from sklearn.metrics import classification_report\n","\n","svm_category = LinearSVC(C=0.7, class_weight='balanced', max_iter=2000)\n","svm_category.fit(X_train_c, y_train_c)\n","\n","y_pred_c = svm_category.predict(X_test_c)\n","print(\"π¦ Complaint Category Classification Report (SVM):\")\n","print(classification_report(y_test_c, y_pred_c))\n","from sklearn.linear_model import LogisticRegression\n","\n","svm_sentiment = LogisticRegression(\n"," max_iter=1000,\n"," C=0.5,\n"," class_weight='balanced'\n",")\n","\n","svm_sentiment.fit(X_train_s, y_train_s)\n","from sklearn.svm import LinearSVC\n","\n","svm_sentiment = LinearSVC(C=0.7, class_weight='balanced')\n","svm_sentiment.fit(X_train_s, y_train_s)\n","y_pred_s = svm_sentiment.predict(X_test_s)\n","print(\"π¬ Sentiment Report (SVM/LogReg):\")\n","print(classification_report(y_test_s, y_pred_s))\n","import pickle\n","save_path = \"/content/drive/MyDrive/nlp/mode/\"\n","\n","pickle.dump(svm_category, open(save_path + \"complaint_classifier.pkl\", \"wb\"))\n","pickle.dump(svm_sentiment, open(save_path + \"sentiment_model.pkl\", \"wb\"))\n","pickle.dump(vectorizer, open(save_path + \"vectorizer.pkl\", \"wb\"))\n","\n","print(\"β Saved SVM category model, sentiment model & vectorizer\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MCSwu1FLc5bA","executionInfo":{"status":"ok","timestamp":1762571948932,"user_tz":-330,"elapsed":587,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"a1749fb2-49fb-4bed-b935-d150c60db476"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["π¦ Complaint Category Classification Report (SVM):\n"," precision recall f1-score support\n","\n","Address Or Contact Problem 1.00 0.96 0.98 23\n"," Damaged Product 0.97 1.00 0.99 33\n"," Delivery Partner Issue 0.92 0.95 0.93 37\n"," Late Delivery 0.92 0.92 0.92 26\n"," Negative 0.00 0.00 0.00 0\n"," Positive Feedback 0.71 0.59 0.65 17\n"," Refund Or Return Request 0.90 0.93 0.92 29\n"," Wrong Item Delivered 1.00 0.96 0.98 24\n","\n"," accuracy 0.92 189\n"," macro avg 0.80 0.79 0.79 189\n"," weighted avg 0.93 0.92 0.92 189\n","\n","π¬ Sentiment Report (SVM/LogReg):\n"," precision recall f1-score support\n","\n"," High 0.00 0.00 0.00 0\n"," Negative 0.88 0.86 0.87 107\n"," Neutral 0.78 0.82 0.80 39\n"," Positive 0.86 0.84 0.85 43\n","\n"," accuracy 0.85 189\n"," macro avg 0.63 0.63 0.63 189\n","weighted avg 0.86 0.85 0.85 189\n","\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"]},{"output_type":"stream","name":"stdout","text":["β Saved SVM category model, sentiment model & vectorizer\n"]}]},{"cell_type":"code","source":["tests = [\n"," \"My order was supposed to arrive two days ago but it's still not here.\",\n"," \"Package delivery got delayed again, this is frustrating.\",\n"," \"The parcel arrived late but the product is fine.\",\n"," \"Iβm happy it finally arrived even though it was late.\",\n","]\n"],"metadata":{"id":"L7g8szxSdz2J"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":211},"id":"wOP5nsSMKwgo","executionInfo":{"status":"error","timestamp":1762571751640,"user_tz":-330,"elapsed":36,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"1857ee43-21ef-4d99-d817-f824484f771b"},"execution_count":null,"outputs":[{"output_type":"error","ename":"NameError","evalue":"name 'svm_sentiment' is not defined","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/tmp/ipython-input-1111225212.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm_category\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_path\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'complaint_classifier.pkl'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm_sentiment\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_path\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'sentiment_model.pkl'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvectorizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msave_path\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m'vectorizer.pkl'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mNameError\u001b[0m: name 'svm_sentiment' is not defined"]}]},{"cell_type":"code","source":["# --- Predict Complaint Function ---\n","def predict_complaint(text):\n"," clean = clean_text(text) # Clean text using your function\n"," vec = vectorizer.transform([clean]) # Convert text to TF-IDF vector\n"," category = svm_category.predict(vec)[0] # Predict category using SVM\n"," sentiment = svm_sentiment.predict(vec)[0] # Predict sentiment using SVM or LogisticRegression\n"," return category, sentiment\n","\n","\n","# --- Test Sentences ---\n","samples = [\n"," \"The product was damaged and not usable.\",\n"," \"Delivery was on time and packaging was great.\",\n"," \"Refund took too long, very disappointed.\",\n"," \"Got a replacement quickly for the damaged product, thanks!\",\n"," \"The delivery guy was rude and shouted at me.\",\n"," \"Everything worked smoothly, Iβm happy with your service!\",\n"," \"My order hasn't been delivered even after 5 days.\",\n"," \"The product I got is completely different from what I ordered.\",\n"," \"The customer support team was helpful and polite.\",\n"," \"I received my refund quickly, thank you!\"\n","]\n","\n","# --- Run Predictions ---\n","for s in samples:\n"," category, sentiment = predict_complaint(s)\n"," print(f\"{s} β ({category}, {sentiment})\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QDWSIRAZLTyi","executionInfo":{"status":"ok","timestamp":1762572075762,"user_tz":-330,"elapsed":56,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"ae14817a-7018-453c-d292-7c87c5d744ae"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The product was damaged and not usable. β (Damaged Product, Positive)\n","Delivery was on time and packaging was great. β (Positive Feedback, Positive)\n","Refund took too long, very disappointed. β (Refund Or Return Request, Negative)\n","Got a replacement quickly for the damaged product, thanks! β (Damaged Product, Positive)\n","The delivery guy was rude and shouted at me. β (Delivery Partner Issue, Negative)\n","Everything worked smoothly, Iβm happy with your service! β (Positive Feedback, Positive)\n","My order hasn't been delivered even after 5 days. β (Late Delivery, Negative)\n","The product I got is completely different from what I ordered. β (Wrong Item Delivered, Negative)\n","The customer support team was helpful and polite. β (Positive Feedback, Positive)\n","I received my refund quickly, thank you! β (Refund Or Return Request, Positive)\n"]}]},{"cell_type":"code","source":[],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"izYqWRsENIZ7","executionInfo":{"status":"ok","timestamp":1762303245320,"user_tz":-330,"elapsed":10,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"cac77dff-8563-42d8-c6e0-17f6d2123d3c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["My parcel was supposed to arrive yesterday but still not delivered. β ('Late Delivery', 'Negative')\n","The laptop screen was cracked when I opened the box. β ('Damaged Product', 'Negative')\n","I ordered a blue shirt but received a green one. β ('Wrong Item Delivered', 'Negative')\n","I returned the product last week but still no refund. β ('Refund Or Return Request', 'Negative')\n","The delivery guy was rude and shouted at me. β ('Delivery Partner Issue', 'Negative')\n","Everything worked smoothly, Iβm happy with your service! β ('Late Delivery', 'Positive')\n"]}]},{"cell_type":"code","source":["!pip install xgboost\n","import xgboost as xgb\n","from xgboost import XGBClassifier\n","from sklearn.metrics import classification_report\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TEd66nqxeWCt","executionInfo":{"status":"ok","timestamp":1762572157375,"user_tz":-330,"elapsed":7942,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"043126a9-4909-4323-9b95-b958e4cc30f8"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: xgboost in /usr/local/lib/python3.12/dist-packages (3.1.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from xgboost) (2.0.2)\n","Requirement already satisfied: nvidia-nccl-cu12 in /usr/local/lib/python3.12/dist-packages (from xgboost) (2.27.3)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from xgboost) (1.16.3)\n"]}]},{"cell_type":"code","source":["X = df['clean_text']\n","y_category = df['category']\n","\n","from sklearn.model_selection import train_test_split\n","from sklearn.feature_extraction.text import TfidfVectorizer\n","\n","vectorizer = TfidfVectorizer(max_features=3000)\n","X_vec = vectorizer.fit_transform(X)\n","\n","X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_vec, y_category, test_size=0.2, random_state=42)\n"],"metadata":{"id":"3wB7uJW-ebsD"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# import xgboost as xgb\n","# from sklearn.preprocessing import LabelEncoder\n","\n","# # Encode category labels β numeric\n","# le = LabelEncoder()\n","# y_train_enc = le.fit_transform(y_train_c)\n","# y_test_enc = le.transform(y_test_c)\n","\n","# # Build DMatrix for XGBoost\n","# dtrain = xgb.DMatrix(X_train_c, label=y_train_enc)\n","# dtest = xgb.DMatrix(X_test_c, label=y_test_enc)\n","\n","# # Define parameters\n","# params = {\n","# 'objective': 'multi:softmax',\n","# 'num_class': len(le.classes_), # total number of unique categories\n","# 'max_depth': 7,\n","# 'eta': 0.1,\n","# 'subsample': 0.9,\n","# 'colsample_bytree': 0.9,\n","# 'eval_metric': 'mlogloss'\n","# }\n","\n","# # Train model\n","# xgb_category = xgb.train(params, dtrain, num_boost_round=200)\n","\n","# # Predict\n","# y_pred_enc = xgb_category.predict(dtest)\n","\n","# # Decode predictions back to original class names\n","# y_pred_c = le.inverse_transform(y_pred_enc.astype(int))\n","\n","from xgboost import XGBClassifier\n","from sklearn.preprocessing import LabelEncoder\n","from sklearn.metrics import classification_report\n","\n","# Encode labels\n","le = LabelEncoder()\n","y_train_enc = le.fit_transform(y_train_c)\n","y_test_enc = le.transform(y_test_c)\n","\n","# Train with sklearn-compatible API\n","xgb_category = XGBClassifier(\n"," objective='multi:softmax',\n"," num_class=len(le.classes_),\n"," n_estimators=200,\n"," learning_rate=0.1,\n"," max_depth=7,\n"," subsample=0.9,\n"," colsample_bytree=0.9,\n"," eval_metric='mlogloss',\n"," random_state=42,\n"," use_label_encoder=False\n",")\n","\n","xgb_category.fit(X_train_c, y_train_enc)\n","y_pred_enc = xgb_category.predict(X_test_c)\n","y_pred_c = le.inverse_transform(y_pred_enc)\n","\n","print(\"π¦ Complaint Category Classification Report (XGBoost - sklearn API):\")\n","print(classification_report(y_test_c, y_pred_c))\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aIhrVrnYe1l-","executionInfo":{"status":"ok","timestamp":1762572189447,"user_tz":-330,"elapsed":24139,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"845098ce-3d12-42b6-b657-2739d8427ea5"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/xgboost/training.py:199: UserWarning: [03:22:45] WARNING: /workspace/src/learner.cc:790: \n","Parameters: { \"use_label_encoder\" } are not used.\n","\n"," bst.update(dtrain, iteration=i, fobj=obj)\n"]},{"output_type":"stream","name":"stdout","text":["π¦ Complaint Category Classification Report (XGBoost - sklearn API):\n"," precision recall f1-score support\n","\n","Address Or Contact Problem 0.95 0.83 0.88 23\n"," Damaged Product 0.73 0.91 0.81 33\n"," Delivery Partner Issue 0.86 0.81 0.83 37\n"," Late Delivery 0.89 0.92 0.91 26\n"," Positive Feedback 0.67 0.47 0.55 17\n"," Refund Or Return Request 1.00 0.97 0.98 29\n"," Wrong Item Delivered 0.85 0.92 0.88 24\n","\n"," accuracy 0.85 189\n"," macro avg 0.85 0.83 0.84 189\n"," weighted avg 0.85 0.85 0.85 189\n","\n"]}]},{"cell_type":"code","source":["# xgb_category = XGBClassifier(\n","# n_estimators=200, # number of trees\n","# learning_rate=0.1, # step size shrinkage\n","# max_depth=7, # tree depth (higher = more complex)\n","# subsample=0.9, # sample ratio\n","# colsample_bytree=0.9, # feature sample ratio\n","# objective='multi:softmax',\n","# eval_metric='mlogloss',\n","# use_label_encoder=False,\n","# random_state=42\n","# )\n","\n","# xgb_category.fit(X_train_c, y_train_c)\n","# y_pred_c = xgb_category.predict(X_test_c)\n","\n","# print(\"π¦ Complaint Category Classification Report (XGBoost):\")\n","# print(classification_report(y_test_c, y_pred_c))\n","\n","y_sentiment = df['sentiment']\n","X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(X_vec, y_sentiment, test_size=0.2, random_state=42)\n","\n","from sklearn.linear_model import LogisticRegression\n","sentiment_model = LogisticRegression(max_iter=1000, class_weight='balanced', C=0.7)\n","sentiment_model.fit(X_train_s, y_train_s)\n","\n","y_pred_s = sentiment_model.predict(X_test_s)\n","print(\"π¬ Sentiment Analysis Report:\")\n","print(classification_report(y_test_s, y_pred_s))\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"6VYnaTZWeeRg","executionInfo":{"status":"ok","timestamp":1762572205522,"user_tz":-330,"elapsed":89,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"3bbf0cdb-7592-48ea-bb89-c7f59fb22ff6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["π¬ Sentiment Analysis Report:\n"," precision recall f1-score support\n","\n"," High 0.00 0.00 0.00 0\n"," Negative 0.91 0.87 0.89 107\n"," Neutral 0.78 0.82 0.80 39\n"," Positive 0.86 0.88 0.87 43\n","\n"," accuracy 0.86 189\n"," macro avg 0.64 0.64 0.64 189\n","weighted avg 0.87 0.86 0.87 189\n","\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n","/usr/local/lib/python3.12/dist-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n"," _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"]}]},{"cell_type":"code","source":["import pickle\n","\n","save_path = '/content/drive/MyDrive/nlp/mode/'\n","\n","# Save trained models and tools\n","pickle.dump(xgb_category, open(save_path + 'complaint_classifier_xgb.pkl', 'wb'))\n","pickle.dump(sentiment_model, open(save_path + 'sentiment_model_xgb.pkl', 'wb'))\n","pickle.dump(vectorizer, open(save_path + 'vectorizer_xgb.pkl', 'wb'))\n","pickle.dump(le, open(save_path + 'label_encoder_xgb.pkl', 'wb'))\n","\n","print(\"β Models saved successfully!\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RzceuV8vfioz","executionInfo":{"status":"ok","timestamp":1762572210186,"user_tz":-330,"elapsed":73,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"0fc46680-1033-4081-8946-581b66087463"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["β Models saved successfully!\n"]}]},{"cell_type":"code","source":["# def predict_complaint(text):\n","# clean = clean_text(text)\n","# vec = vectorizer.transform([clean])\n","# category = xgb_category.predict(vec)[0]\n","# sentiment = sentiment_model.predict(vec)[0]\n","# return category, sentiment\n","#output np.int32(2)\n","\n","def predict_complaint(text):\n"," clean = clean_text(text)\n"," vec = vectorizer.transform([clean])\n"," pred_enc = xgb_category.predict(vec)\n"," category = le.inverse_transform(pred_enc.astype(int))[0] # decode integer label\n"," sentiment = sentiment_model.predict(vec)[0]\n"," return category, sentiment\n","\n","\n","samples = [\n"," \"The product was damaged and not usable.\",\n"," \"Delivery was on time and packaging was great.\",\n"," \"Refund took too long, very disappointed.\",\n"," \"Got a replacement quickly for the damaged product, thanks!\",\n"," \"The delivery guy was rude and shouted at me.\",\n"," \"Everything worked smoothly, Iβm happy with your service!\",\n"," \"My order hasn't been delivered even after 5 days.\",\n"," \"The product I got is completely different from what I ordered.\",\n"," \"The customer support team was helpful and polite.\",\n"," \"I received my refund quickly, thank you!\"\n","]\n","\n","for s in samples:\n"," print(f\"{s} β {predict_complaint(s)}\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"htiKycXTft3l","executionInfo":{"status":"ok","timestamp":1762572251230,"user_tz":-330,"elapsed":8,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"78569dfa-4745-4021-c920-71476071b044"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The product was damaged and not usable. β ('Damaged Product', 'Positive')\n","Delivery was on time and packaging was great. β ('Positive Feedback', 'Positive')\n","Refund took too long, very disappointed. β ('Refund Or Return Request', 'Negative')\n","Got a replacement quickly for the damaged product, thanks! β ('Damaged Product', 'Positive')\n","The delivery guy was rude and shouted at me. β ('Delivery Partner Issue', 'Negative')\n","Everything worked smoothly, Iβm happy with your service! β ('Positive Feedback', 'Positive')\n","My order hasn't been delivered even after 5 days. β ('Late Delivery', 'Negative')\n","The product I got is completely different from what I ordered. β ('Wrong Item Delivered', 'Negative')\n","The customer support team was helpful and polite. β ('Positive Feedback', 'Positive')\n","I received my refund quickly, thank you! β ('Refund Or Return Request', 'Positive')\n"]}]},{"cell_type":"code","source":["#bert\n","\n","\n","\n","!pip install transformers torch scikit-learn pandas numpy nltk\n","\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TQLG_bTngKw0","executionInfo":{"status":"ok","timestamp":1762572557914,"user_tz":-330,"elapsed":15561,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"96721a9e-4d79-4423-8142-6e1f25da757e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.57.1)\n","Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.8.0+cu126)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (1.6.1)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.2.2)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (2.0.2)\n","Requirement already satisfied: nltk in /usr/local/lib/python3.12/dist-packages (3.9.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.20.0)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.36.0)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n","Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.1)\n","Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n","Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch) (4.15.0)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0)\n","Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.13.3)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch) (3.5)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.80)\n","Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21)\n","Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.4.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.0.4)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.7.77)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.1.2)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.4.2)\n","Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1)\n","Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.85)\n","Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch) (1.11.1.6)\n","Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.4.0)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.16.3)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.5.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (3.6.0)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n","Requirement already satisfied: click in /usr/local/lib/python3.12/dist-packages (from nltk) (8.3.0)\n","Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.2.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch) (3.0.3)\n","Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.4)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.11)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.10.5)\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","import re\n","import nltk\n","from nltk.corpus import stopwords\n","from sklearn.model_selection import train_test_split\n","from sklearn.feature_extraction.text import TfidfVectorizer\n","from sklearn.svm import LinearSVC\n","from transformers import pipeline\n","\n","nltk.download('stopwords')\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4CVe66DqgsUc","executionInfo":{"status":"ok","timestamp":1762663753604,"user_tz":-330,"elapsed":12,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"b977a7fb-a727-48bc-ec8d-dffb085a2847"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package stopwords to /root/nltk_data...\n","[nltk_data] Package stopwords is already up-to-date!\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":58}]},{"cell_type":"code","source":["from transformers import pipeline\n","\n","# Load once (takes a few seconds)\n","bert_sentiment = pipeline(\"sentiment-analysis\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":217,"referenced_widgets":["0c249b924c59409db54890e02a5c7674","7ca42e8e814244f687deffd0a55b42cb","0e08aed6fa2e459da0864715e4ae60a7","7c9c2219c41546d8aa5bf8e42dcf5c42","6a63762726de4fd0b77f47a2c8e84c95","9238dca9919644e0b95ba843bd35287f","42cc6f2eea354dfba24f703abd3c4cc5","2e16924784f146cca6d0766273073793","fc208d2cba9d42dbb6df19127253f737","4eab67d792984db1a8a40806ae6cecb1","3c761692e05641b29fec807d669e4ba8","ad30bdb5567744a6bbbed5c1083149a6","f05f66583a1e42da9a8585ac86633740","f783a3e8bd354c4f9e65d361027c9314","0058e9ccfcbb45afb72205b144c168d9","f3f00ba2c8ef41089590f9ccba71eb8e","2874ca3ee046483c85b89a52c21ce861","7812e5bd097b4a95b01cbd78805c0dba","0a67155d7b8f46cfb5ceb89d9226c314","ac6215f0b7f24a11b0c4394683a1ec9d","c0b2edfcac504f598cc2bcd451838ae4","6dbe0ee909644e0981f24e1d77e536da","f31ba36e66454ebab2724e697afc3604","27b50860f34a408f90ca03da77cf83ad","b3749e99c2034bad8f62adf594b69cd5","fb60f8433c1d41eda1c8d6d01781e732","3c81cee942214c39923a600d6b707535","96d364fe8cc141cf9e1fd42cf92e7417","0719cc8b718c4548a5e14860305b4d44","f23187bbf0cd4daaac0082db9e73e18f","d92e5fefa4db41d79b45cc451b7c0ef3","f27bd237fa0b420ca8291aef1188a45c","ab21c044751b4718ab7386a94b730226","ec4b79cd7bf04b54a3c2233cb4db0204","a5f05b8872a34837b9a899680cd9088a","579b0df1ca9d4690b92a12d5de0d728e","639f645419ed44c191388c3b2b655b70","8348dd7b98f949ccbdb3501d9c0200da","0514dd5b78d74a88aea99de41b22506f","b2c69b36ee7b4ee78178fd803052961a","5496f8aa360940fb90f529a7d41b6d3c","745cfdb31785408bb4e0c6b464b359bd","c168013e2d3f44e1bca9f5b4876a9ea9","4ab621035aa74ea49f34b908ac61a13e"]},"id":"5Ct4CRAggS0f","executionInfo":{"status":"ok","timestamp":1762663763212,"user_tz":-330,"elapsed":6969,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"ed56c2b8-fa2f-43a2-8924-8406daf261db"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).\n","Using a pipeline without specifying a model name and revision in production is not recommended.\n"]},{"output_type":"display_data","data":{"text/plain":["config.json: 0%| | 0.00/629 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0c249b924c59409db54890e02a5c7674"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["model.safetensors: 0%| | 0.00/268M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ad30bdb5567744a6bbbed5c1083149a6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f31ba36e66454ebab2724e697afc3604"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["vocab.txt: 0.00B [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ec4b79cd7bf04b54a3c2233cb4db0204"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["Device set to use cpu\n"]}]},{"cell_type":"code","source":["df = pd.read_csv('/content/drive/MyDrive/nlp/data/complaints_dataset.csv')\n","\n","def clean_text(text):\n"," text = str(text).lower()\n"," text = re.sub(r\"http\\S+|www\\S+\", \"\", text)\n"," text = re.sub(r\"[^a-zA-Z\\s]\", \"\", text)\n"," # Keep negations together\n"," text = re.sub(r\"\\bnot\\s+(\\w+)\", r\"not_\\1\", text)\n"," words = [w for w in text.split() if w not in stopwords.words(\"english\")]\n"," return \" \".join(words)\n","\n","df[\"clean_text\"] = df[\"complaint_text\"].apply(clean_text)\n","\n","X = df[\"clean_text\"]\n","y_category = df[\"category\"]\n","\n","# TF-IDF vectorizer\n","vectorizer = TfidfVectorizer(max_features=3000)\n","X_vec = vectorizer.fit_transform(X)\n","\n","# Train/test split\n","X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(\n"," X_vec, y_category, test_size=0.2, random_state=42\n",")\n","\n","# Train SVM\n","svm_category = LinearSVC(C=0.7, class_weight=\"balanced\", max_iter=2000)\n","svm_category.fit(X_train_c, y_train_c)\n","\n","from transformers import pipeline\n","\n","bert_sentiment = pipeline(\"sentiment-analysis\")\n","def predict_complaint(text):\n"," clean = clean_text(text)\n"," vec = vectorizer.transform([clean])\n","\n"," # Predict category with SVM\n"," category = svm_category.predict(vec)[0]\n","\n"," # Predict sentiment with BERT\n"," result = bert_sentiment(text)[0]\n"," label = result['label']\n"," score = round(result['score'] * 100, 2)\n","\n"," if \"NEG\" in label.upper():\n"," sentiment = \"Negative\"\n"," elif \"POS\" in label.upper():\n"," sentiment = \"Positive\"\n"," else:\n"," sentiment = \"Neutral\"\n","\n"," return category, sentiment, score\n","\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EJG4GiptgnLc","executionInfo":{"status":"ok","timestamp":1762572612520,"user_tz":-330,"elapsed":4125,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"aeb35ecf-8cec-4515-b368-396ac298db4c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).\n","Using a pipeline without specifying a model name and revision in production is not recommended.\n","Device set to use cpu\n"]}]},{"cell_type":"code","source":["samples = [\n"," \"The product was damaged and not usable.\",\n"," \"Delivery was on time and packaging was great.\",\n"," \"Refund took too long, very disappointed.\",\n"," \"Got a replacement quickly for the damaged product, thanks!\",\n"," \"The delivery guy was rude and shouted at me.\",\n"," \"Everything worked smoothly, Iβm happy with your service!\",\n"," \"My order hasn't been delivered even after 5 days.\",\n"," \"The product I got is completely different from what I ordered.\",\n"," \"The customer support team was helpful and polite.\",\n"," \"I received my refund quickly, thank you!\"\n","]\n","\n","for s in samples:\n"," cat, sent, conf = predict_complaint(s)\n"," print(f\"{s} β ({cat}, {sent}, {conf}% confidence)\")\n"],"metadata":{"id":"5ZZCGPvmg9sV","executionInfo":{"status":"ok","timestamp":1762572627382,"user_tz":-330,"elapsed":1865,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"73d59557-cc00-4c50-ac72-1317b27b90c1","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["The product was damaged and not usable. β (Damaged Product, Negative, 99.98% confidence)\n","Delivery was on time and packaging was great. β (Positive Feedback, Positive, 99.98% confidence)\n","Refund took too long, very disappointed. β (Refund Or Return Request, Negative, 99.96% confidence)\n","Got a replacement quickly for the damaged product, thanks! β (Damaged Product, Negative, 98.72% confidence)\n","The delivery guy was rude and shouted at me. β (Delivery Partner Issue, Negative, 99.88% confidence)\n","Everything worked smoothly, Iβm happy with your service! β (Positive Feedback, Positive, 99.99% confidence)\n","My order hasn't been delivered even after 5 days. β (Late Delivery, Negative, 99.84% confidence)\n","The product I got is completely different from what I ordered. β (Wrong Item Delivered, Positive, 99.54% confidence)\n","The customer support team was helpful and polite. β (Positive Feedback, Positive, 99.95% confidence)\n","I received my refund quickly, thank you! β (Refund Or Return Request, Positive, 99.98% confidence)\n"]}]},{"cell_type":"code","source":["# fining tuning\n","!pip install transformers torch datasets scikit-learn\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"u91Zg_NSnrKx","executionInfo":{"status":"ok","timestamp":1762672401028,"user_tz":-330,"elapsed":5145,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"999582bd-1177-4b4c-f75c-d95fd84eb55d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (4.57.1)\n","Requirement already satisfied: torch in /usr/local/lib/python3.12/dist-packages (2.8.0+cu126)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.12/dist-packages (4.0.0)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (1.6.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.20.0)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.36.0)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n","Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.1)\n","Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n","Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.12/dist-packages (from torch) (4.15.0)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from torch) (75.2.0)\n","Requirement already satisfied: sympy>=1.13.3 in /usr/local/lib/python3.12/dist-packages (from torch) (1.13.3)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.12/dist-packages (from torch) (3.5)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.12/dist-packages (from torch) (3.1.6)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.12/dist-packages (from torch) (2025.3.0)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.80)\n","Requirement already satisfied: nvidia-cudnn-cu12==9.10.2.21 in /usr/local/lib/python3.12/dist-packages (from torch) (9.10.2.21)\n","Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.4.1)\n","Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in /usr/local/lib/python3.12/dist-packages (from torch) (11.3.0.4)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in /usr/local/lib/python3.12/dist-packages (from torch) (10.3.7.77)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in /usr/local/lib/python3.12/dist-packages (from torch) (11.7.1.2)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in /usr/local/lib/python3.12/dist-packages (from torch) (12.5.4.2)\n","Requirement already satisfied: nvidia-cusparselt-cu12==0.7.1 in /usr/local/lib/python3.12/dist-packages (from torch) (0.7.1)\n","Requirement already satisfied: nvidia-nccl-cu12==2.27.3 in /usr/local/lib/python3.12/dist-packages (from torch) (2.27.3)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.77)\n","Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in /usr/local/lib/python3.12/dist-packages (from torch) (12.6.85)\n","Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in /usr/local/lib/python3.12/dist-packages (from torch) (1.11.1.6)\n","Requirement already satisfied: triton==3.4.0 in /usr/local/lib/python3.12/dist-packages (from torch) (3.4.0)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (18.1.0)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (from datasets) (2.2.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.12/dist-packages (from datasets) (3.6.0)\n","Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.12/dist-packages (from datasets) (0.70.16)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.16.3)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.5.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (3.6.0)\n","Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.12/dist-packages (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (3.13.2)\n","Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.2.0)\n","Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.4)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.11)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.10.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from sympy>=1.13.3->torch) (1.3.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2->torch) (3.0.3)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2.9.0.post0)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas->datasets) (2025.2)\n","Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (2.6.1)\n","Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.4.0)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (25.4.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.8.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (6.7.0)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (0.4.1)\n","Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.12/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.3.0,>=2023.1.0->datasets) (1.22.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","df = pd.read_csv('/content/drive/MyDrive/nlp/data/complaints_dataset.csv')\n","df = df[['complaint_text','sentiment']].dropna()\n","df.head()\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"671-8G2Yn5Wn","executionInfo":{"status":"ok","timestamp":1763827166976,"user_tz":-330,"elapsed":517,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"fa3aea65-da4f-4133-a3ee-13a0f3f063cf"},"execution_count":25,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" complaint_text sentiment\n","0 My parcel is still not delivered even after 4 ... Negative\n","1 The delivery guy was very polite and helpful, ... Positive\n","2 Tracking shows 'out for delivery' since 8 AM. ... Negative\n","3 I needed this for a gift tmrw. Thanks for ruin... Negative\n","4 Order is 2 days late. Please provide an accura... Negative"],"text/html":["\n"," <div id=\"df-51ab5d67-b632-4ea6-a61e-f0b948927b94\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>complaint_text</th>\n"," <th>sentiment</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>My parcel is still not delivered even after 4 ...</td>\n"," <td>Negative</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>The delivery guy was very polite and helpful, ...</td>\n"," <td>Positive</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Tracking shows 'out for delivery' since 8 AM. ...</td>\n"," <td>Negative</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>I needed this for a gift tmrw. Thanks for ruin...</td>\n"," <td>Negative</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Order is 2 days late. Please provide an accura...</td>\n"," <td>Negative</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-51ab5d67-b632-4ea6-a61e-f0b948927b94')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-51ab5d67-b632-4ea6-a61e-f0b948927b94 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-51ab5d67-b632-4ea6-a61e-f0b948927b94');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-411ec860-6573-4021-8d95-48bbeae6a853\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-411ec860-6573-4021-8d95-48bbeae6a853')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-411ec860-6573-4021-8d95-48bbeae6a853 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df","summary":"{\n \"name\": \"df\",\n \"rows\": 2108,\n \"fields\": [\n {\n \"column\": \"complaint_text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1920,\n \"samples\": [\n \"You delivered to my office. On a Sunday. Brilliant. At least the guard took it.\",\n \"Received timely updates and prompt support.\",\n \"You sent me the wrong item, but it's actually better than what I ordered! Thanks!\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sentiment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Negative\",\n \"Positive\",\n \"Mixed\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":25}]},{"cell_type":"code","source":["from sklearn.preprocessing import LabelEncoder\n","le = LabelEncoder()\n","df['label'] = le.fit_transform(df['sentiment'])\n","label2id = {l:i for i,l in enumerate(le.classes_)}\n","id2label = {i:l for l,i in label2id.items()}\n","print(label2id)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"FpkYUlFqoNuz","executionInfo":{"status":"ok","timestamp":1763827170053,"user_tz":-330,"elapsed":11,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"5ac0cc02-5036-4c96-8949-f2aff266193e"},"execution_count":26,"outputs":[{"output_type":"stream","name":"stdout","text":["{'High': 0, 'Mixed': 1, 'Nan': 2, 'Negative': 3, 'Neutral': 4, 'Positive': 5}\n"]}]},{"cell_type":"code","source":["from transformers import AutoTokenizer\n","tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n","\n","def tokenize(batch):\n"," return tokenizer(batch['complaint_text'], padding='max_length', truncation=True, max_length=128)\n","\n","from datasets import Dataset\n","dataset = Dataset.from_pandas(df)\n","dataset = dataset.map(tokenize, batched=True)\n","dataset = dataset.rename_column(\"label\",\"labels\")\n","dataset.set_format('torch', columns=['input_ids','attention_mask','labels'])\n","\n","train_test = dataset.train_test_split(test_size=0.2)\n","train_ds = train_test['train']\n","test_ds = train_test['test']\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["219a5741fe064f3894d736218d0914dc","5ff5339b9f0343eb89177f5f54bf78bc","fd8abae5df6c498c89a8bb9c384e55ef","40a386774e974db0a523ef72881fcd40","124a889362f04165a4594e856dfeaf6d","ce5f0f6f4ad4444bb11ac636eb872b36","53b0981ae35448388db36b5fc7d29e59","59cf36d2b2ff4081be913aee61db2e50","c19f26361f474d3d8576cdc642b4e560","68c7567357b740c7b449fd376a3c9012","48910a34ec91412d909faa2041166d57"]},"id":"VzGSCHALoUW9","executionInfo":{"status":"ok","timestamp":1763827173292,"user_tz":-330,"elapsed":587,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"eafe6ad2-131e-4115-f0f1-2066fd663368"},"execution_count":27,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map: 0%| | 0/2108 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"219a5741fe064f3894d736218d0914dc"}},"metadata":{}}]},{"cell_type":"code","source":["from transformers import AutoModelForSequenceClassification\n","\n","num_labels = len(le.classes_)\n","model = AutoModelForSequenceClassification.from_pretrained(\n"," \"distilbert-base-uncased\",\n"," num_labels=num_labels,\n"," id2label=id2label,\n"," label2id=label2id\n",")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"D2FhZvlrohNs","executionInfo":{"status":"ok","timestamp":1763827180820,"user_tz":-330,"elapsed":334,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"7fd03a5f-c71b-4bc3-9ec6-e902effa7765"},"execution_count":28,"outputs":[{"output_type":"stream","name":"stderr","text":["Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]}]},{"cell_type":"code","source":["!pip install -U transformers\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":564},"id":"rMiHxpEspY7T","executionInfo":{"status":"ok","timestamp":1762675258419,"user_tz":-330,"elapsed":27910,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"7732eaa9-ae6b-4b65-cdbb-1b4aeabed4ac"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting transformers\n"," Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers) (3.20.0)\n","Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)\n"," Using cached huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (25.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2024.11.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers) (2.32.4)\n","Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)\n"," Using cached tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n","Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.6.2)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.1)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (2025.3.0)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (4.15.0)\n","Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.2.0)\n","Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.4.4)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (3.11)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2.5.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers) (2025.10.5)\n","Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)\n","Using cached huggingface_hub-0.36.0-py3-none-any.whl (566 kB)\n","Using cached tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n","Installing collected packages: huggingface-hub, tokenizers, transformers\n","Successfully installed huggingface-hub-0.36.0 tokenizers-0.22.1 transformers-4.57.1\n"]},{"output_type":"display_data","data":{"application/vnd.colab-display-data+json":{"pip_warning":{"packages":["huggingface_hub","tokenizers","transformers"]},"id":"6541404747f24c548af477dac40d6ae8"}},"metadata":{}}]},{"cell_type":"code","source":["import transformers\n","print(transformers.__version__)\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"27EJqW8vpgsE","executionInfo":{"status":"ok","timestamp":1762675270199,"user_tz":-330,"elapsed":2923,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"1aab2f12-6908-4e98-eeb4-c4dd799590d6"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["4.57.1\n"]}]},{"cell_type":"code","source":["!pip uninstall -y transformers\n","!pip uninstall -y tokenizers\n","!pip uninstall -y huggingface_hub\n","!pip install transformers==4.57.1 datasets accelerate huggingface_hub --upgrade\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2ZyLecF0mHDq","executionInfo":{"status":"ok","timestamp":1762675227718,"user_tz":-330,"elapsed":2440,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"9b5e0a34-da18-4128-c0bb-f27601812c8b"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Found existing installation: transformers 4.57.1\n","Uninstalling transformers-4.57.1:\n"," Successfully uninstalled transformers-4.57.1\n","Found existing installation: tokenizers 0.22.1\n","Uninstalling tokenizers-0.22.1:\n"," Successfully uninstalled tokenizers-0.22.1\n","Found existing installation: huggingface-hub 0.36.0\n","Uninstalling huggingface-hub-0.36.0:\n"," Successfully uninstalled huggingface-hub-0.36.0\n"]}]},{"cell_type":"code","source":["import os\n","os.environ[\"WANDB_DISABLED\"] = \"true\"\n","from transformers import TrainingArguments, Trainer\n","import numpy as np\n","from sklearn.metrics import accuracy_score, f1_score\n","\n","def compute_metrics(pred):\n"," labels = pred.label_ids\n"," preds = np.argmax(pred.predictions, axis=1)\n"," acc = accuracy_score(labels, preds)\n"," f1 = f1_score(labels, preds, average='weighted')\n"," return {\"accuracy\": acc, \"f1\": f1}\n","\n","training_args = TrainingArguments(\n"," output_dir=\"./results\",\n"," do_eval=True,\n"," save_steps=500,\n"," logging_steps=500,\n"," learning_rate=2e-5,\n"," per_device_train_batch_size=8,\n"," per_device_eval_batch_size=8,\n"," num_train_epochs=6,\n"," weight_decay=0.01,\n"," logging_dir=\"./logs\",\n"," report_to=\"none\" # π disables wandb / tensorboard\n",")\n","\n","\n","trainer = Trainer(\n"," model=model,\n"," args=training_args,\n"," train_dataset=train_ds,\n"," eval_dataset=test_ds,\n"," compute_metrics=compute_metrics,\n",")\n","\n","#not used this code this is for cehck diffent betwwen 3 or 10 epoch\n","# from transformers import TrainingArguments, Trainer\n","# import numpy as np\n","# from sklearn.metrics import accuracy_score, f1_score\n","\n","# def compute_metrics(pred):\n","# labels = pred.label_ids\n","# preds = np.argmax(pred.predictions, axis=1)\n","# acc = accuracy_score(labels, preds)\n","# f1 = f1_score(labels, preds, average='weighted')\n","# return {\"accuracy\": acc, \"f1\": f1}\n","\n","# training_args = TrainingArguments(\n","# output_dir=\"./results\",\n","# evaluation_strategy=\"epoch\",\n","# save_strategy=\"epoch\",\n","# learning_rate=2e-5,\n","# per_device_train_batch_size=8,\n","# per_device_eval_batch_size=8,\n","# num_train_epochs=10,\n","# weight_decay=0.01,\n","# logging_dir=\"./logs\",\n","# load_best_model_at_end=True,\n","# metric_for_best_model=\"f1\",\n","# greater_is_better=True,\n","# report_to=\"none\"\n","# )\n","\n","# trainer = Trainer(\n","# model=model,\n","# args=training_args,\n","# train_dataset=train_ds,\n","# eval_dataset=test_ds,\n","# compute_metrics=compute_metrics\n","# )\n","\n","# trainer.train()\n","\n"],"metadata":{"id":"92Jv73fGorpe","executionInfo":{"status":"ok","timestamp":1763827190556,"user_tz":-330,"elapsed":1154,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}}},"execution_count":29,"outputs":[]},{"cell_type":"code","source":["#not used this code this is for cehck diffent betwwen 3 or 10 epoch# import pandas as pd\n","# import matplotlib.pyplot as plt\n","\n","# # Load training history from Trainer's log\n","# logs = pd.DataFrame(trainer.state.log_history)\n","\n","# # Keep only relevant columns\n","# logs = logs.dropna(subset=['loss','eval_loss'], how='all')\n","\n","# plt.figure(figsize=(8,5))\n","# plt.plot(logs[\"epoch\"], logs[\"loss\"], label=\"Training Loss\")\n","# if \"eval_loss\" in logs.columns:\n","# plt.plot(logs[\"epoch\"], logs[\"eval_loss\"], label=\"Validation Loss\")\n","# plt.xlabel(\"Epoch\")\n","# plt.ylabel(\"Loss\")\n","# plt.title(\"Training vs Validation Loss\")\n","# plt.legend()\n","# plt.grid(True)\n","# plt.show()\n"],"metadata":{"id":"LAZ_IBqutdXy","colab":{"base_uri":"https://localhost:8080/","height":332},"executionInfo":{"status":"error","timestamp":1762674413299,"user_tz":-330,"elapsed":58,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"c895ebf0-8120-4264-ec12-c1d548907d31"},"execution_count":null,"outputs":[{"output_type":"error","ename":"KeyError","evalue":"['eval_loss']","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/tmp/ipython-input-1199704202.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m# Keep only relevant columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mlogs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'loss'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'eval_loss'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'all'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.12/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdropna\u001b[0;34m(self, axis, how, thresh, subset, inplace, ignore_index)\u001b[0m\n\u001b[1;32m 6668\u001b[0m \u001b[0mcheck\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindices\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6669\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6670\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcheck\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6671\u001b[0m \u001b[0magg_obj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0magg_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6672\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyError\u001b[0m: ['eval_loss']"]}]},{"cell_type":"code","source":["trainer.train()\n","\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":173},"id":"JBzuXyaIqMnK","executionInfo":{"status":"ok","timestamp":1763827342692,"user_tz":-330,"elapsed":141930,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"5a45dd20-4be0-4703-e5dd-434138b0c254"},"execution_count":30,"outputs":[{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n"," <div>\n"," \n"," <progress value='1266' max='1266' style='width:300px; height:20px; vertical-align: middle;'></progress>\n"," [1266/1266 02:21, Epoch 6/6]\n"," </div>\n"," <table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: left;\">\n"," <th>Step</th>\n"," <th>Training Loss</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <td>500</td>\n"," <td>0.461400</td>\n"," </tr>\n"," <tr>\n"," <td>1000</td>\n"," <td>0.058800</td>\n"," </tr>\n"," </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=1266, training_loss=0.20852970756814002, metrics={'train_runtime': 141.8662, 'train_samples_per_second': 71.307, 'train_steps_per_second': 8.924, 'total_flos': 335033948915712.0, 'train_loss': 0.20852970756814002, 'epoch': 6.0})"]},"metadata":{},"execution_count":30}]},{"cell_type":"code","source":[],"metadata":{"id":"Gp--S5dElPIE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["trainer.save_model(\"/content/drive/MyDrive/nlp/10epoch/bert_finetuned\")\n","tokenizer.save_pretrained(\"/content/drive/MyDrive/nlp/10epoch/bert_finetuned\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-xYxCphGs7UD","executionInfo":{"status":"ok","timestamp":1763827346636,"user_tz":-330,"elapsed":2264,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"f46f2a4e-a254-4cbd-a3f8-2f0fab9082a0"},"execution_count":31,"outputs":[{"output_type":"execute_result","data":{"text/plain":["('/content/drive/MyDrive/nlp/10epoch/bert_finetuned/tokenizer_config.json',\n"," '/content/drive/MyDrive/nlp/10epoch/bert_finetuned/special_tokens_map.json',\n"," '/content/drive/MyDrive/nlp/10epoch/bert_finetuned/vocab.txt',\n"," '/content/drive/MyDrive/nlp/10epoch/bert_finetuned/added_tokens.json',\n"," '/content/drive/MyDrive/nlp/10epoch/bert_finetuned/tokenizer.json')"]},"metadata":{},"execution_count":31}]},{"cell_type":"code","source":["from transformers import pipeline\n","finetuned_sentiment = pipeline(\"text-classification\",\n"," model=\"/content/drive/MyDrive/nlp/10epoch/bert_finetuned\",\n"," tokenizer=\"/content/drive/MyDrive/nlp/10epoch/bert_finetuned\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"rTImh_Huv59f","executionInfo":{"status":"ok","timestamp":1763827348897,"user_tz":-330,"elapsed":370,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"52493e91-d055-417e-ff96-13bb58bab9d6"},"execution_count":32,"outputs":[{"output_type":"stream","name":"stderr","text":["Device set to use cuda:0\n"]}]},{"cell_type":"code","source":["samples = [\n"," \"dilvery was late \",\n"," \"The product was damaged and not usable.\",\n"," \"Everything worked smoothly, Iβm happy with your service!\",\n"," \"Refund took too long, very disappointed.\",\n","]\n","for s in samples:\n"," result = finetuned_sentiment(s)[0]\n"," print(f\"{s} β {result['label']} ({round(result['score']*100,2)}%)\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Q13ZN5auwFuw","executionInfo":{"status":"ok","timestamp":1763827351270,"user_tz":-330,"elapsed":26,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"6c703703-5c59-4b82-f1ae-1891577be114"},"execution_count":33,"outputs":[{"output_type":"stream","name":"stdout","text":["dilvery was late β Negative (99.9%)\n","The product was damaged and not usable. β Negative (99.89%)\n","Everything worked smoothly, Iβm happy with your service! β Positive (99.8%)\n","Refund took too long, very disappointed. β Negative (99.92%)\n"]}]},{"cell_type":"code","source":["hard_samples = [\n"," # --- Clear Negative Cases ---\n"," \"The package arrived broken and the box was torn.\",\n"," \"Refund still not received after two weeks of waiting.\",\n"," \"Very bad experience, customer support never replied.\",\n"," \"Totally disappointed, the item stopped working in one day.\",\n"," \"Delivery guy never called and marked as delivered!\",\n","\n"," # --- Clear Positive Cases ---\n"," \"Excellent service, delivery was super fast!\",\n"," \"Iβm very satisfied with the product quality and packaging.\",\n"," \"Got my refund quickly without any hassle, thank you!\",\n"," \"Everything went smoothly, highly recommend your service.\",\n"," \"Received the correct product earlier than expected!\",\n","\n"," # --- Mixed or Tricky Cases ---\n"," \"Product is good but the delivery took forever.\",\n"," \"The item was damaged but customer support replaced it quickly.\",\n"," \"It was late, but I appreciate the apology and compensation.\",\n"," \"Packaging was poor, but the phone works perfectly.\",\n"," \"Refund process was slow but finally got my money back.\",\n","\n"," # --- Neutral / Polite Queries ---\n"," \"Please confirm my delivery date.\",\n"," \"Can you update the tracking information?\",\n"," \"Awaiting pickup from courier, no updates yet.\",\n"," \"Just checking if my order has been shipped.\",\n"," \"Thanks for your assistance, waiting for the product.\",\n","\n"," # --- Sarcasm / Complex Tone ---\n"," \"Amazing, another delayed delivery. Great job!\",\n"," \"Wow, you managed to send me the wrong item twice!\",\n"," \"Fantastic! Still no refund after a month.\",\n"," \"Perfect, my damaged order just got lost too.\",\n"," \"What a wonderful experience getting scammed twice.\"\n","]\n","\n","for s in hard_samples:\n"," result = finetuned_sentiment(s)[0]\n"," print(f\"{s} β {result['label']} ({round(result['score']*100,2)}%)\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"D5Q-zKvrwaDX","executionInfo":{"status":"ok","timestamp":1763827355405,"user_tz":-330,"elapsed":190,"user":{"displayName":"Deepak kushwaha","userId":"10519047954179343607"}},"outputId":"01aa299e-b2cf-4970-c916-7a969499fb55"},"execution_count":34,"outputs":[{"output_type":"stream","name":"stdout","text":["The package arrived broken and the box was torn. β Negative (99.92%)\n","Refund still not received after two weeks of waiting. β Negative (99.92%)\n","Very bad experience, customer support never replied. β Negative (99.93%)\n","Totally disappointed, the item stopped working in one day. β Negative (99.93%)\n","Delivery guy never called and marked as delivered! β Negative (99.86%)\n","Excellent service, delivery was super fast! β Positive (99.82%)\n","Iβm very satisfied with the product quality and packaging. β Positive (99.82%)\n","Got my refund quickly without any hassle, thank you! β Positive (99.83%)\n","Everything went smoothly, highly recommend your service. β Positive (99.82%)\n","Received the correct product earlier than expected! β Positive (99.7%)\n","Product is good but the delivery took forever. β Mixed (99.86%)\n","The item was damaged but customer support replaced it quickly. β Mixed (99.87%)\n","It was late, but I appreciate the apology and compensation. β Positive (87.56%)\n","Packaging was poor, but the phone works perfectly. β Mixed (99.84%)\n","Refund process was slow but finally got my money back. β Mixed (99.87%)\n","Please confirm my delivery date. β Neutral (99.49%)\n","Can you update the tracking information? β Neutral (99.5%)\n","Awaiting pickup from courier, no updates yet. β Negative (99.88%)\n","Just checking if my order has been shipped. β Neutral (99.55%)\n","Thanks for your assistance, waiting for the product. β Positive (97.89%)\n","Amazing, another delayed delivery. Great job! β Negative (99.9%)\n","Wow, you managed to send me the wrong item twice! β Negative (99.87%)\n","Fantastic! Still no refund after a month. β Negative (99.92%)\n","Perfect, my damaged order just got lost too. β Negative (99.93%)\n","What a wonderful experience getting scammed twice. β Negative (99.86%)\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"59vqh9E8wMlv"},"execution_count":null,"outputs":[]}]} |