phi-2

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "machine_shape": "hm",
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "accelerator": "GPU",
+    "kaggle": {
+      "accelerator": "gpu"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "b486eefffb274a1d89c401ee56f0ea7b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_2757c39f0c9a4fa599bd2a7b58c3ad4a",
+              "IPY_MODEL_696a9b10ead84306a3886b2a5853eb7c",
+              "IPY_MODEL_d7684e7d23764317acc7bcc512944758"
+            ],
+            "layout": "IPY_MODEL_e44dbcae75684967ae5a797d20c2ec16"
+          }
+        },
+        "2757c39f0c9a4fa599bd2a7b58c3ad4a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_f318babe826841ab96db9716af5adc5e",
+            "placeholder": "",
+            "style": "IPY_MODEL_fa14e7cbb0d14d56b5276d77f97f4315",
+            "value": "Loading weights: 100%"
+          }
+        },
+        "696a9b10ead84306a3886b2a5853eb7c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_fd3f39e32b514e1f949a98ebc2922be0",
+            "max": 453,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_e94a0224955143e59fb03000dff11208",
+            "value": 453
+          }
+        },
+        "d7684e7d23764317acc7bcc512944758": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_51f5cb0a06e84813a224e51e8a11c128",
+            "placeholder": "",
+            "style": "IPY_MODEL_adc120af345247d6b3de0611e6d44490",
+            "value": " 453/453 [00:00&lt;00:00, 611.00it/s, Materializing param=model.layers.31.self_attn.v_proj.weight]"
+          }
+        },
+        "e44dbcae75684967ae5a797d20c2ec16": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f318babe826841ab96db9716af5adc5e": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "fa14e7cbb0d14d56b5276d77f97f4315": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "fd3f39e32b514e1f949a98ebc2922be0": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e94a0224955143e59fb03000dff11208": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "51f5cb0a06e84813a224e51e8a11c128": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "adc120af345247d6b3de0611e6d44490": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "1e548b10c6b44b5fb6ea8687d94486f7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_603cb08fbfdf4aa5bb1072da1501116f",
+              "IPY_MODEL_b30d2933522342bb8df152670e068a61",
+              "IPY_MODEL_c04efaac2bd042618f7d4923ec2be8c0"
+            ],
+            "layout": "IPY_MODEL_90559a506ecb4f46a345855a05cfc2d4"
+          }
+        },
+        "603cb08fbfdf4aa5bb1072da1501116f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e36f24e96c9a4941a071ad0a10e486af",
+            "placeholder": "",
+            "style": "IPY_MODEL_e2962aae45854cc39eb90fdbcef0f460",
+            "value": "Loading weights: 100%"
+          }
+        },
+        "b30d2933522342bb8df152670e068a61": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_b004dc925fb64e43ae97d73788648ef7",
+            "max": 453,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_c76b9cb1d93b48829805003ed178a2fa",
+            "value": 453
+          }
+        },
+        "c04efaac2bd042618f7d4923ec2be8c0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_19103b3d686c4da994dc68912fe162b6",
+            "placeholder": "",
+            "style": "IPY_MODEL_f3a46d6eebe04dd3894cb893ca8bc58a",
+            "value": " 453/453 [00:01&lt;00:00, 404.11it/s, Materializing param=model.layers.31.self_attn.v_proj.weight]"
+          }
+        },
+        "90559a506ecb4f46a345855a05cfc2d4": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e36f24e96c9a4941a071ad0a10e486af": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "e2962aae45854cc39eb90fdbcef0f460": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "b004dc925fb64e43ae97d73788648ef7": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c76b9cb1d93b48829805003ed178a2fa": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "19103b3d686c4da994dc68912fe162b6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "f3a46d6eebe04dd3894cb893ca8bc58a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "844da948344e4421b32fa41a679fbdb7": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_012adaa5fd314a589e7c67e381a26c2c",
+              "IPY_MODEL_dea346540d084b5c89d96b992c11c30e",
+              "IPY_MODEL_a82b849d29f64d878cd87cba3851b80b"
+            ],
+            "layout": "IPY_MODEL_fe5a48f5cb2743319c5fcd38a494ff2c"
+          }
+        },
+        "012adaa5fd314a589e7c67e381a26c2c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_098dee5390834dba9557964ccde15819",
+            "placeholder": "",
+            "style": "IPY_MODEL_8fda24ae999c4da29f2282b7b687d147",
+            "value": "Loading weights: 100%"
+          }
+        },
+        "dea346540d084b5c89d96b992c11c30e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_4d9edf9c343d4fc4a6381bba2d25ac4b",
+            "max": 453,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_4c77a8efb23a4a44ba22aedcee5e6f16",
+            "value": 453
+          }
+        },
+        "a82b849d29f64d878cd87cba3851b80b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_7ce7515d20fb4c06a27da6f45e0210d3",
+            "placeholder": "",
+            "style": "IPY_MODEL_0daa08754c274d51ac35ef34e3a9457d",
+            "value": " 453/453 [00:03&lt;00:00, 144.88it/s, Materializing param=model.layers.31.self_attn.v_proj.weight]"
+          }
+        },
+        "fe5a48f5cb2743319c5fcd38a494ff2c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "098dee5390834dba9557964ccde15819": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "8fda24ae999c4da29f2282b7b687d147": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "4d9edf9c343d4fc4a6381bba2d25ac4b": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "4c77a8efb23a4a44ba22aedcee5e6f16": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "7ce7515d20fb4c06a27da6f45e0210d3": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0daa08754c274d51ac35ef34e3a9457d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install -U transformers"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 810
+        },
+        "id": "o0zcQycu-UEa",
+        "outputId": "3d82776f-8531-4680-8f2a-5578e708c945"
+      },
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: transformers in /usr/local/lib/python3.12/dist-packages (5.0.0)\n",
+            "Collecting transformers\n",
+            "  Downloading transformers-5.8.1-py3-none-any.whl.metadata (33 kB)\n",
+            "Requirement already satisfied: huggingface-hub<2.0,>=1.5.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (1.11.0)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.12/dist-packages (from transformers) (2.0.2)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (26.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers) (6.0.3)\n",
+            "Requirement already satisfied: regex>=2025.10.22 in /usr/local/lib/python3.12/dist-packages (from transformers) (2025.11.3)\n",
+            "Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.22.2)\n",
+            "Requirement already satisfied: typer in /usr/local/lib/python3.12/dist-packages (from transformers) (0.24.2)\n",
+            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers) (0.7.0)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers) (4.67.3)\n",
+            "Requirement already satisfied: filelock>=3.10.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (3.29.0)\n",
+            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (2025.3.0)\n",
+            "Requirement already satisfied: hf-xet<2.0.0,>=1.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (1.4.3)\n",
+            "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (0.28.1)\n",
+            "Requirement already satisfied: typing-extensions>=4.1.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<2.0,>=1.5.0->transformers) (4.15.0)\n",
+            "Requirement already satisfied: click>=8.2.1 in /usr/local/lib/python3.12/dist-packages (from typer->transformers) (8.3.3)\n",
+            "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.12/dist-packages (from typer->transformers) (1.5.4)\n",
+            "Requirement already satisfied: rich>=12.3.0 in /usr/local/lib/python3.12/dist-packages (from typer->transformers) (13.9.4)\n",
+            "Requirement already satisfied: annotated-doc>=0.0.2 in /usr/local/lib/python3.12/dist-packages (from typer->transformers) (0.0.4)\n",
+            "Requirement already satisfied: anyio in /usr/local/lib/python3.12/dist-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (4.13.0)\n",
+            "Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (2026.4.22)\n",
+            "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.12/dist-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (1.0.9)\n",
+            "Requirement already satisfied: idna in /usr/local/lib/python3.12/dist-packages (from httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (3.13)\n",
+            "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.12/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->huggingface-hub<2.0,>=1.5.0->transformers) (0.16.0)\n",
+            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich>=12.3.0->typer->transformers) (4.0.0)\n",
+            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich>=12.3.0->typer->transformers) (2.20.0)\n",
+            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich>=12.3.0->typer->transformers) (0.1.2)\n",
+            "Downloading transformers-5.8.1-py3-none-any.whl (10.6 MB)\n",
+            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.6/10.6 MB\u001b[0m \u001b[31m61.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hInstalling collected packages: transformers\n",
+            "  Attempting uninstall: transformers\n",
+            "    Found existing installation: transformers 5.0.0\n",
+            "    Uninstalling transformers-5.0.0:\n",
+            "      Successfully uninstalled transformers-5.0.0\n",
+            "Successfully installed transformers-5.8.1\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "application/vnd.colab-display-data+json": {
+              "pip_warning": {
+                "packages": [
+                  "transformers"
+                ]
+              },
+              "id": "338a67b765ed413081d22a770dd5b35c"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Local Inference on GPU\n",
+        "Model page: https://huggingface.co/microsoft/phi-2\n",
+        "\n",
+        "⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/microsoft/phi-2)\n",
+        "\t\t\tand/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏"
+      ],
+      "metadata": {
+        "id": "qhYDm6yk-UEj"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Use a pipeline as a high-level helper\n",
+        "from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoConfig\n",
+        "import torch\n",
+        "\n",
+        "model_name = \"microsoft/phi-2\"\n",
+        "\n",
+        "# Load tokenizer and set pad_token\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "# Load model configuration and set pad_token_id\n",
+        "config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)\n",
+        "config.pad_token_id = tokenizer.eos_token_id\n",
+        "\n",
+        "# Load model with the modified configuration\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    model_name,\n",
+        "    config=config,\n",
+        "    trust_remote_code=True,\n",
+        "    torch_dtype=torch.float16 # Use float16 for potentially better performance/memory usage\n",
+        ")\n",
+        "\n",
+        "# Create the pipeline with the correctly loaded model and tokenizer\n",
+        "# Check for GPU and move model if available\n",
+        "device = 0 if torch.cuda.is_available() else -1\n",
+        "if torch.cuda.is_available():\n",
+        "    model.to('cuda')\n",
+        "    print(\"Model moved to GPU.\")\n",
+        "else:\n",
+        "    print(\"GPU not available, model will run on CPU.\")\n",
+        "\n",
+        "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, device=device)\n",
+        "print(\"Pipeline initialized successfully.\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 96,
+          "referenced_widgets": [
+            "b486eefffb274a1d89c401ee56f0ea7b",
+            "2757c39f0c9a4fa599bd2a7b58c3ad4a",
+            "696a9b10ead84306a3886b2a5853eb7c",
+            "d7684e7d23764317acc7bcc512944758",
+            "e44dbcae75684967ae5a797d20c2ec16",
+            "f318babe826841ab96db9716af5adc5e",
+            "fa14e7cbb0d14d56b5276d77f97f4315",
+            "fd3f39e32b514e1f949a98ebc2922be0",
+            "e94a0224955143e59fb03000dff11208",
+            "51f5cb0a06e84813a224e51e8a11c128",
+            "adc120af345247d6b3de0611e6d44490"
+          ]
+        },
+        "id": "b9Ly2RUM-UFe",
+        "outputId": "f89768a7-caf1-4972-8c24-d088907a31ea"
+      },
+      "execution_count": 25,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Loading weights:   0%|          | 0/453 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "b486eefffb274a1d89c401ee56f0ea7b"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "GPU not available, model will run on CPU.\n",
+            "Pipeline initialized successfully.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig\n",
+        "import torch\n",
+        "\n",
+        "# Load tokenizer first\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-2\", trust_remote_code=True)\n",
+        "\n",
+        "# Set pad_token for tokenizer using eos_token\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "# Load model configuration separately to ensure pad_token_id is set before model initialization\n",
+        "config = AutoConfig.from_pretrained(\"microsoft/phi-2\", trust_remote_code=True)\n",
+        "\n",
+        "# Explicitly set pad_token_id in the config\n",
+        "config.pad_token_id = tokenizer.eos_token_id\n",
+        "\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    \"microsoft/phi-2\",\n",
+        "    config=config, # Pass the modified config here\n",
+        "    trust_remote_code=True,\n",
+        "    torch_dtype=torch.float16 # Use float16 for potentially better performance/memory usage\n",
+        ")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 61,
+          "referenced_widgets": [
+            "1e548b10c6b44b5fb6ea8687d94486f7",
+            "603cb08fbfdf4aa5bb1072da1501116f",
+            "b30d2933522342bb8df152670e068a61",
+            "c04efaac2bd042618f7d4923ec2be8c0",
+            "90559a506ecb4f46a345855a05cfc2d4",
+            "e36f24e96c9a4941a071ad0a10e486af",
+            "e2962aae45854cc39eb90fdbcef0f460",
+            "b004dc925fb64e43ae97d73788648ef7",
+            "c76b9cb1d93b48829805003ed178a2fa",
+            "19103b3d686c4da994dc68912fe162b6",
+            "f3a46d6eebe04dd3894cb893ca8bc58a"
+          ]
+        },
+        "id": "q5AOq1_Z-UFu",
+        "outputId": "e1c11712-860c-40f7-e8eb-f639471858d7"
+      },
+      "execution_count": 39,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Loading weights:   0%|          | 0/453 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "1e548b10c6b44b5fb6ea8687d94486f7"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Remote Inference via Inference Providers\n",
+        "Ensure you have a valid **HF_TOKEN** set in your environment. You can get your token from [your settings page](https://huggingface.co/settings/tokens). Note: running this may incur charges above the free tier.\n",
+        "The following Python example shows how to run the model remotely on HF Inference Providers, automatically selecting an available inference provider for you.\n",
+        "For more information on how to use the Inference Providers, please refer to our [documentation and guides](https://huggingface.co/docs/inference-providers/en/index)."
+      ],
+      "metadata": {
+        "id": "IRyu8RyM-UF-"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "029ed965"
+      },
+      "source": [
+        "## Gradio Interface for Phi-2 Chat"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "b8b69d12"
+      },
+      "source": [
+        "# Install Gradio library\n",
+        "!pip install gradio -q"
+      ],
+      "execution_count": 27,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 113,
+          "referenced_widgets": [
+            "844da948344e4421b32fa41a679fbdb7",
+            "012adaa5fd314a589e7c67e381a26c2c",
+            "dea346540d084b5c89d96b992c11c30e",
+            "a82b849d29f64d878cd87cba3851b80b",
+            "fe5a48f5cb2743319c5fcd38a494ff2c",
+            "098dee5390834dba9557964ccde15819",
+            "8fda24ae999c4da29f2282b7b687d147",
+            "4d9edf9c343d4fc4a6381bba2d25ac4b",
+            "4c77a8efb23a4a44ba22aedcee5e6f16",
+            "7ce7515d20fb4c06a27da6f45e0210d3",
+            "0daa08754c274d51ac35ef34e3a9457d"
+          ]
+        },
+        "id": "b5a619ac",
+        "outputId": "7c85a144-3652-4541-ded7-dce03c49b549"
+      },
+      "source": [
+        "import gradio as gr\n",
+        "from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoConfig\n",
+        "import torch\n",
+        "\n",
+        "print(\"Loading Phi-2 model and tokenizer...\")\n",
+        "\n",
+        "# Load tokenizer first\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-2\", trust_remote_code=True)\n",
+        "\n",
+        "# Set pad_token for tokenizer using eos_token\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "# Load model configuration separately to ensure pad_token_id is set before model initialization\n",
+        "config = AutoConfig.from_pretrained(\"microsoft/phi-2\", trust_remote_code=True)\n",
+        "\n",
+        "# Explicitly set pad_token_id in the config, as Phi-2's config might not have it by default\n",
+        "# The model's internal structure expects this attribute to be present.\n",
+        "config.pad_token_id = tokenizer.eos_token_id\n",
+        "\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    \"microsoft/phi-2\",\n",
+        "    config=config, # Pass the modified config here\n",
+        "    trust_remote_code=True,\n",
+        "    torch_dtype=torch.float16\n",
+        ")\n",
+        "\n",
+        "# Check for GPU and move model if available\n",
+        "if torch.cuda.is_available():\n",
+        "    model.to('cuda')\n",
+        "    print(\"Model moved to GPU.\")\n",
+        "else:\n",
+        "    print(\"GPU not available, model will run on CPU.\")\n",
+        "\n",
+        "# Create a text generation pipeline and rename it to avoid conflict\n",
+        "text_generator_pipeline = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)\n",
+        "\n",
+        "print(\"Phi-2 model loaded successfully.\")"
+      ],
+      "execution_count": 28,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Loading Phi-2 model and tokenizer...\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Loading weights:   0%|          | 0/453 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "844da948344e4421b32fa41a679fbdb7"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "GPU not available, model will run on CPU.\n",
+            "Phi-2 model loaded successfully.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "61f7b058"
+      },
+      "source": [
+        "def predict(message, history):\n",
+        "    conversation_history = \"\"\n",
+        "    for human, assistant in history:\n",
+        "        conversation_history += f\"Human: {human}\\nAssistant: {assistant}\\n\"\n",
+        "    conversation_history += f\"Human: {message}\\nAssistant:\"\n",
+        "\n",
+        "    outputs = text_generator_pipeline(\n",
+        "        conversation_history, # Pass conversation_history directly as a string to the renamed pipeline object\n",
+        "        max_new_tokens=200, # Generate up to 200 new tokens\n",
+        "        do_sample=True,\n",
+        "        temperature=0.7,\n",
+        "        top_k=50,\n",
+        "        top_p=0.95,\n",
+        "        eos_token_id=tokenizer.eos_token_id # Stop generation at end-of-sequence token\n",
+        "    )\n",
+        "    generated_text = outputs[0]['generated_text']\n",
+        "\n",
+        "    # Extract only the assistant's response part\n",
+        "    assistant_response = generated_text.split(\"Assistant:\")[-1].strip()\n",
+        "    # Remove the last user input from the response if the model repeats it\n",
+        "    if assistant_response.startswith(message):\n",
+        "        assistant_response = assistant_response[len(message):].strip()\n",
+        "\n",
+        "    return assistant_response"
+      ],
+      "execution_count": 29,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cbdf81f5"
+      },
+      "source": [
+        "Now, let's launch the Gradio chat interface. Click the public URL to interact with the model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "63e656fe",
+        "outputId": "cc1a646b-cc0a-4f68-e309-2ea3660104b8"
+      },
+      "source": [
+        "gr.ChatInterface(\n",
+        "    predict,\n",
+        "    chatbot=gr.Chatbot(height=500), # Make the chatbot window larger\n",
+        "    textbox=gr.Textbox(placeholder=\"Ask me a question\", container=False, scale=7),\n",
+        "    title=\"Chat with Phi-2\",\n",
+        "    description=\"Interact with the Microsoft Phi-2 model. Ask questions, have conversations, or experiment with its generative capabilities!\",\n",
+        "    theme=\"soft\", # A pleasant theme\n",
+        "    examples=[\"Tell me a short story.\", \"Explain quantum physics simply.\", \"What is the capital of France?\"],\n",
+        "    cache_examples=False\n",
+        ").launch(debug=True, share=True)"
+      ],
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/tmp/ipykernel_3691/638742562.py:3: UserWarning: You have not specified a value for the `type` parameter. Defaulting to the 'tuples' format for chatbot messages, but this is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style dictionaries with 'role' and 'content' keys.\n",
+            "  chatbot=gr.Chatbot(height=500), # Make the chatbot window larger\n",
+            "/tmp/ipykernel_3691/638742562.py:3: DeprecationWarning: The default value of 'allow_tags' in gr.Chatbot will be changed from False to True in Gradio 6.0. You will need to explicitly set allow_tags=False if you want to disable tags in your chatbot.\n",
+            "  chatbot=gr.Chatbot(height=500), # Make the chatbot window larger\n",
+            "/usr/local/lib/python3.12/dist-packages/gradio/chat_interface.py:330: UserWarning: The gr.ChatInterface was not provided with a type, so the type of the gr.Chatbot, 'tuples', will be used.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n",
+            "* Running on public URL: https://65e2f7deee45e031ef.gradio.live\n",
+            "\n",
+            "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<div><iframe src=\"https://65e2f7deee45e031ef.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n",
+            "Both `max_new_tokens` (=200) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Keyboard interruption in main thread... closing server.\n"
+          ]
+        },
+        {
+          "output_type": "error",
+          "ename": "KeyboardInterrupt",
+          "evalue": "",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/gradio/blocks.py\u001b[0m in \u001b[0;36mblock_thread\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   3042\u001b[0m             \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3043\u001b[0;31m                 \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3044\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOSError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
+            "\nDuring handling of the above exception, another exception occurred:\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "\u001b[0;32m/tmp/ipykernel_3691/638742562.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0mexamples\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Tell me a short story.\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Explain quantum physics simply.\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"What is the capital of France?\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0mcache_examples\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m ).launch(debug=True, share=True)\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/gradio/blocks.py\u001b[0m in \u001b[0;36mlaunch\u001b[0;34m(self, inline, inbrowser, share, debug, max_threads, auth, auth_message, prevent_thread_lock, show_error, server_name, server_port, height, width, favicon_path, ssl_keyfile, ssl_certfile, ssl_keyfile_password, ssl_verify, quiet, show_api, allowed_paths, blocked_paths, root_path, app_kwargs, state_session_capacity, share_server_address, share_server_protocol, share_server_tls_certificate, auth_dependency, max_file_size, enable_monitoring, strict_cors, node_server_name, node_port, ssr_mode, pwa, mcp_server, _frontend, i18n)\u001b[0m\n\u001b[1;32m   2948\u001b[0m             )\n\u001b[1;32m   2949\u001b[0m         ):\n\u001b[0;32m-> 2950\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblock_thread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2951\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2952\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mTupleNoPrint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver_app\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlocal_url\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshare_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# type: ignore\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/gradio/blocks.py\u001b[0m in \u001b[0;36mblock_thread\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   3045\u001b[0m             \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Keyboard interruption in main thread... closing server.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3046\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3047\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3048\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mtunnel\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mCURRENT_TUNNELS\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3049\u001b[0m                 \u001b[0mtunnel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkill\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/gradio/http_server.py\u001b[0m in \u001b[0;36mclose\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     67\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreloader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     68\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwatch_thread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 69\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mthread\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/lib/python3.12/threading.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m   1151\u001b[0m             \u001b[0;31m# the behavior of a negative timeout isn't documented, but\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1152\u001b[0m             \u001b[0;31m# historically .join(timeout=x) for x<0 has acted as if timeout=0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1153\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_wait_for_tstate_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1154\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1155\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_wait_for_tstate_lock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/lib/python3.12/threading.py\u001b[0m in \u001b[0;36m_wait_for_tstate_lock\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m   1167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1168\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1169\u001b[0;31m             \u001b[0;32mif\u001b[0m \u001b[0mlock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mblock\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1170\u001b[0m                 \u001b[0mlock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrelease\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1171\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "61f7b008"
+      },
+      "source": [
+        "def predict(message, history):\n",
+        "    conversation_history = \"\"\n",
+        "    for human, assistant in history:\n",
+        "        conversation_history += f\"Human: {human}\\nAssistant: {assistant}\\n\"\n",
+        "    conversation_history += f\"Human: {message}\\nAssistant:\"\n",
+        "\n",
+        "    outputs = pipe(\n",
+        "        conversation_history, # Pass conversation_history directly as a string\n",
+        "        max_new_tokens=200, # Generate up to 200 new tokens\n",
+        "        do_sample=True,\n",
+        "        temperature=0.7,\n",
+        "        top_k=50,\n",
+        "        top_p=0.95,\n",
+        "        eos_token_id=tokenizer.eos_token_id # Stop generation at end-of-sequence token\n",
+        "    )\n",
+        "    generated_text = outputs[0]['generated_text']\n",
+        "\n",
+        "    # Extract only the assistant's response part\n",
+        "    assistant_response = generated_text.split(\"Assistant:\")[-1].strip()\n",
+        "    # Remove the last user input from the response if the model repeats it\n",
+        "    if assistant_response.startswith(message):\n",
+        "        assistant_response = assistant_response[len(message):].strip()\n",
+        "\n",
+        "    return assistant_response"
+      ],
+      "execution_count": 35,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "os.environ['HF_TOKEN'] = 'YOUR_TOKEN_HERE'"
+      ],
+      "metadata": {
+        "id": "leeUFqBD-UF-"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "from huggingface_hub import InferenceClient\n",
+        "\n",
+        "client = InferenceClient(\n",
+        "    provider=\"auto\",\n",
+        "    api_key=os.environ[\"HF_TOKEN\"],\n",
+        ")\n",
+        "\n",
+        "completion = client.chat.completions.create(\n",
+        "    model=\"microsoft/phi-2\",\n",
+        "    messages=\"\\\"Can you please let us know more details about your \\\"\",\n",
+        ")\n",
+        "\n",
+        "print(completion.choices[0].message)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 512
+        },
+        "id": "Zmof51oV-UGN",
+        "outputId": "7cf7ce76-f57b-44ee-9f06-5f3191182924"
+      },
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "Cannot select auto-router when using non-Hugging Face API key.",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m/tmp/ipykernel_3691/2505081652.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m )\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m completion = client.chat.completions.create(\n\u001b[0m\u001b[1;32m     10\u001b[0m     \u001b[0mmodel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"microsoft/phi-2\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m     \u001b[0mmessages\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"\\\"Can you please let us know more details about your \\\"\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/inference/_client.py\u001b[0m in \u001b[0;36mchat_completion\u001b[0;34m(self, messages, model, stream, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream_options, temperature, tool_choice, tool_prompt, tools, top_logprobs, top_p, extra_body)\u001b[0m\n\u001b[1;32m    920\u001b[0m             \u001b[0;34m**\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mextra_body\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    921\u001b[0m         }\n\u001b[0;32m--> 922\u001b[0;31m         request_parameters = provider_helper.prepare_request(\n\u001b[0m\u001b[1;32m    923\u001b[0m             \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmessages\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    924\u001b[0m             \u001b[0mparameters\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/inference/_providers/_common.py\u001b[0m in \u001b[0;36mprepare_request\u001b[0;34m(self, inputs, parameters, headers, model, api_key, extra_payload)\u001b[0m\n\u001b[1;32m    100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    101\u001b[0m         \u001b[0;31m# routed URL if HF token, or direct URL (to customize in '_prepare_route' in subclasses)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 102\u001b[0;31m         \u001b[0murl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapi_key\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprovider_mapping_info\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprovider_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    104\u001b[0m         \u001b[0;31m# prepare payload (to customize in subclasses)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/inference/_providers/_common.py\u001b[0m in \u001b[0;36m_prepare_url\u001b[0;34m(self, api_key, mapped_model)\u001b[0m\n\u001b[1;32m    214\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    215\u001b[0m         Usually not overwritten in subclasses.\"\"\"\n\u001b[0;32m--> 216\u001b[0;31m         \u001b[0mbase_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_base_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapi_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    217\u001b[0m         \u001b[0mroute\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_route\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mapi_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    218\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;34mf\"{base_url.rstrip('/')}/{route.lstrip('/')}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/huggingface_hub/inference/_providers/_common.py\u001b[0m in \u001b[0;36m_prepare_base_url\u001b[0;34m(self, api_key)\u001b[0m\n\u001b[1;32m    300\u001b[0m         \u001b[0;31m# Route to the proxy if the api_key is a HF TOKEN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    301\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mapi_key\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"hf_\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 302\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cannot select auto-router when using non-Hugging Face API key.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    303\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    304\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase_url\u001b[0m  \u001b[0;31m# No `/auto` suffix in the URL\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: Cannot select auto-router when using non-Hugging Face API key."
+          ]
+        }
+      ]
+    }
+  ]
+}