{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "First of all we need to install requied **libraries**" ], "metadata": { "id": "k3d4WSh2zKRR" } }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tIdoqVV1izoz", "outputId": "392af71f-8313-4be0-c1da-6334550bc69d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\u001b[?25l \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m0.0/84.1 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install transformers datasets peft evaluate accelerate -q" ] }, { "cell_type": "markdown", "source": [ "Verifying Installations" ], "metadata": { "id": "Ym_FNQY4zU3y" } }, { "cell_type": "code", "source": [ "import transformers\n", "import datasets\n", "import peft\n", "import evaluate\n", "print(f\"Tramsformers: {transformers.__version__}\")\n", "print(f\"Datasets: {datasets.__version__}\")\n", "print(f\"PEFT: {peft.__version__}\")\n", "print(f\"Evaluate: {evaluate.__version__}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wOie5cSZxVIZ", "outputId": "e208377e-dcac-4a59-cc15-6f99bc018284" }, "execution_count": 22, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Tramsformers: 5.0.0\n", "Datasets: 4.0.0\n", "PEFT: 0.18.1\n", "Evaluate: 0.4.6\n" ] } ] }, { "cell_type": "markdown", "source": [ "**Load the datasets**" ], "metadata": { "id": "77A5H5jUzinc" } }, { "cell_type": "code", "source": [ "from datasets import load_dataset" ], "metadata": { "id": "101qqBCBz-JB" }, "execution_count": 18, "outputs": [] }, { "cell_type": "markdown", "source": [ "loading banking77 dataset" ], "metadata": { "id": "bEcOwfZV0IXa" } }, { "cell_type": "code", "source": [ "dataset =load_dataset(\"legacy-datasets/banking77\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "5655c5cca3d547b6a0ac08d1b1fbf008", "f9ec87bec967441f92f74b6518819709", "84a24ca325464e1db0cdb56f096c10fc", "e326b50a6a6f41f79d9af1e544645348", "cea5de85e6994e4ba40219434102842d", "c2a9c610fe1b4e93a7aacafeb22069ba", "aaa2991f6f854d188eb62177c0ccf457", "d069596b4ad54bb9a785403e0491fbef", "075bc577ced6404bb1e6a63b47c6139e", "5549923675df4864a223429518765dd2", "d0949c0967184dc097899dc1dffc0bd5", "e062cedda9ff4128b90dc0b76ce7d71c", "9f20dfd7ca6743338810ee564f1bbf95", "3589858387154504a9f7663935156d5c", "6af6400d7394437ea2aaa7816aab963f", "15b8413c1cb1464d9c5fbe0899d855e6", "7d04113fcd084ff09ea5fa7c5718680c", "ac29f3c9178041ed967f11aaa98195e8", "71b87b8a3c27421b9b3b0fa7f86b44c5", "8572d5854f2e4cc49ccf7ad17ee7ddd5", "5d2a532c7f52411a9d1e19fe58b44970", "fc1caa125cb04ef1a26c6018d2702e0d", "9ced219b80c74d5392c00c786dc3663a", "25f95444cec04b4fba706853567f0a31", "0cb3c9f5192548bcb970b06d71e1fe4b", "ebb6abfa10e4494480fcef655d7bc280", "ca9fd314395c4949b090ade258664516", "040f6956c7cb416dbca66863045d40e7", "3108dd45cc04481989000aec773bdc8b", "7803a811de55428abb91932f0a3c325e", "f34a912cde854e35a345ce5a807f27ea", "c339895ae7554611ae9679a6e80eaccb", "db0946daeef94404a0ba4ba58132ccbb", "5daf5aacab4c4028a127c4128e4a9c97", "7abc88f1c19b47449b924e183c42e21a", "d25694f9d9bc4bffb3f8a53ec5a58749", "ad5bd6f328ec41818e62602dbc53dc77", "f796fe7a076d409284f65fda06a96633", "ba6dfbc2bdf249c8b08b45c30495e684", "d96a1cae1c2846ca8243e4b9cf80f9a2", "32853e628e794361bee346a975a35534", "2ec24fdc2a5d47e7bd9224349535fd74", "bd57051fd3f544d7911b3c43a4b7a63b", "aa2e7b6a1a794c239e064988383918f6", "ce41312305a54e0e97b9d113a8245c34", "82293b8d416847dd96f71c0ed5e805fd", "57b3d091a24047a8ab97d5d6535a79ef", "3ba69b950f0848c89aa090f1a69fba57", "93db226d05cd4e9582cc685ec47dfeb2", "261c90036bd746ffb020e5f55e27a104", "0f3994568d514d30ba2e727c5a52c9bd", "2628185d0ede4f039eb7e38368761d73", "02771cfe0334486293ed5570c86f9f46", "fc512a98af3d426ba5be0278c3184398", "f4faf71ce3e6438b8a788cff559839b5" ] }, "id": "HbYjKkL40NDQ", "outputId": "eaf2f031-5f62-4187-d930-5c3f3ce112fa" }, "execution_count": 19, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "README.md: 0.00B [00:00, ?B/s]" ], "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "5655c5cca3d547b6a0ac08d1b1fbf008" } }, "metadata": {} }, { "output_type": "display_data", "data": { "text/plain": [ "data/train-00000-of-00001.parquet: 0%| | 0.00/298k [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [1410/1410 01:45, Epoch 5/5]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracy
13.9957003.6091720.293706
22.5727892.3075550.604396
32.0080831.7363330.693307
41.6930041.4968390.712288
51.5820251.4259610.735265

" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Model saved to Google Drive\n" ] } ] }, { "cell_type": "markdown", "source": [ "Model Evaluation" ], "metadata": { "id": "1xAONxM4H-U7" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "from sklearn.metrics import classification_report, confusion_matrix\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Get predictions on test set\n", "predictions = trainer.predict(tokenized_dataset[\"test\"])\n", "pred_labels = np.argmax(predictions.predictions, axis=-1)\n", "true_labels = predictions.label_ids\n", "\n", "# Overall accuracy\n", "accuracy = (pred_labels == true_labels).mean()\n", "print(f\"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\")\n", "\n", "# Detailed per class report\n", "class_names = dataset[\"test\"].features[\"label\"].names\n", "report = classification_report(\n", " true_labels,\n", " pred_labels,\n", " target_names=class_names,\n", " output_dict=True\n", ")\n", "\n", "# Find best and worst performing intents\n", "report_df = pd.DataFrame(report).transpose()\n", "report_df = report_df.drop(\n", " [\"accuracy\", \"macro avg\", \"weighted avg\"]\n", ")\n", "\n", "print(\"\\nTop 5 best performing intents:\")\n", "print(report_df.nlargest(5, \"f1-score\")[[\"f1-score\", \"support\"]])\n", "\n", "print(\"\\nTop 5 worst performing intents:\")\n", "print(report_df.nsmallest(5, \"f1-score\")[[\"f1-score\", \"support\"]])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 312 }, "id": "NCqJLNLpIBdd", "outputId": "b9dd7cb3-7ee5-4c4c-d95f-2b54e2056cbe" }, "execution_count": 30, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "" ], "text/html": [] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Test Accuracy: 0.7175 (71.75%)\n", "\n", "Top 5 best performing intents:\n", " f1-score support\n", "verify_top_up 1.000000 40.0\n", "age_limit 0.975610 40.0\n", "change_pin 0.950000 40.0\n", "passcode_forgotten 0.930233 40.0\n", "get_physical_card 0.928571 40.0\n", "\n", "Top 5 worst performing intents:\n", " f1-score support\n", "transfer_not_received_by_recipient 0.310345 40.0\n", "cash_withdrawal_not_recognised 0.338983 40.0\n", "supported_cards_and_currencies 0.339623 40.0\n", "topping_up_by_card 0.339623 40.0\n", "top_up_by_bank_transfer_charge 0.392857 40.0\n" ] } ] }, { "cell_type": "code", "source": [ "model.save_pretrained(\"banking77-lora-final\")\n", "tokenizer.save_pretrained(\"banking77-lora-final\")\n", "print(\"Model saved locally\")" ], "metadata": { "id": "HXa6IDJnCjj3", "outputId": "0a070d10-f66f-4622-cbbd-aa03fd7a9fe2", "colab": { "base_uri": "https://localhost:8080/" } }, "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model saved locally\n" ] } ] }, { "cell_type": "markdown", "source": [ "Inference Code" ], "metadata": { "id": "2TwhO54Px12T" } }, { "cell_type": "code", "source": [ "from peft import PeftModel, PeftConfig\n", "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n", "import torch\n", "\n", "# Load your fine-tuned model\n", "def load_model(model_path=\"./banking77-lora-final\"):\n", " tokenizer = AutoTokenizer.from_pretrained(model_path)\n", " config = PeftConfig.from_pretrained(model_path)\n", " model = AutoModelForSequenceClassification.from_pretrained(\n", " config.base_model_name_or_path,\n", " num_labels=77\n", " )\n", " model = PeftModel.from_pretrained(model, model_path)\n", " model.eval()\n", " return model, tokenizer\n", "\n", "# Predict intent\n", "def predict_intent(text, model, tokenizer):\n", " class_names = dataset[\"test\"].features[\"label\"].names\n", "\n", " inputs = tokenizer(\n", " text,\n", " return_tensors=\"pt\",\n", " padding=True,\n", " truncation=True,\n", " max_length=128\n", " )\n", "\n", " with torch.no_grad():\n", " outputs = model(**inputs)\n", " probabilities = torch.softmax(outputs.logits, dim=-1)\n", " confidence, predicted_class = torch.max(probabilities, dim=-1)\n", "\n", " return {\n", " \"intent\": class_names[predicted_class.item()],\n", " \"confidence\": f\"{confidence.item()*100:.2f}%\",\n", " \"text\": text\n", " }\n", "\n", "# Load model\n", "model, tokenizer = load_model()\n", "\n", "# Test with real examples\n", "test_queries = [\n", " \"I lost my card and need a replacement\",\n", " \"Why was my payment declined?\",\n", " \"How do I add money to my account?\",\n", " \"I want to change my PIN number\",\n", " \"What currencies do you support?\"\n", "]\n", "\n", "print(\"Intent Classification Results:\")\n", "print(\"=\"*50)\n", "for query in test_queries:\n", " result = predict_intent(query, model, tokenizer)\n", " print(f\"\\nText: {result['text']}\")\n", " print(f\"Intent: {result['intent']}\")\n", " print(f\"Confidence: {result['confidence']}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 708, "referenced_widgets": [ "6d1d9c5c8eef4194987337d6b9460eab", "c60e097ae7b74ce0b0643759a3d3b7e9", "76d184cb05634b18ae417b8e55e11969", "6055c976102140c486f4e7c5d7769d17", "43ff9fa4a0ab42aebc985ee1db03bd67", "1c4ed9c9153b4fffb0085f4bb7951c1a", "cc2a475498f040f4904916a3616fb63e", "9b9fed4da36f45c0b58bc93a025e4d4f", "bce6f1ae436f49ff93c9c1c806ac6481", "ddec6967ce674c7c89d68eaa05ede1c8", "ccf88702043a455abac5352aca0141a0" ] }, "id": "MSa_7Zpcx4td", "outputId": "db9a8cee-ab7d-4654-cfbf-e0cad855a3b2" }, "execution_count": 33, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Loading weights: 0%| | 0/100 [00:00