{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import math\n", "import datetime\n", "\n", "from tqdm import tqdm\n", "\n", "import pandas as pd\n", "import numpy as np\n", "\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "\n", "import bert\n", "from bert import BertModelLayer\n", "from bert.loader import StockBertConfig, map_stock_config_to_params, load_stock_weights\n", "from bert.tokenization.bert_tokenization import FullTokenizer" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "bert_model_name=\"uncased_L-12_H-768_A-12\"\n", "\n", "bert_ckpt_dir = os.path.join(\"model/\", bert_model_name)\n", "bert_ckpt_file = os.path.join(bert_ckpt_dir, \"bert_model.ckpt\")\n", "bert_config_file = os.path.join(bert_ckpt_dir, \"bert_config.json\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, \"vocab.txt\"))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "classes = ['service_availability_check',\n", " 'billing_inquiry', \n", " 'order_cancellation',\n", " 'address_verification',\n", " 'user_authentication',\n", " 'account_information_update',\n", " 'call_divert', \n", " 'customer_service_escalation',\n", " 'appointment_scheduling',\n", " 'order_status_inquiry',\n", " 'product_information_request',\n", " 'complaint_registration',\n", " 'call_disconnect',\n", " 'appointment_confirmation', \n", " 'appointment_cancellation']\n", "\n", "\n", "max_seq_len = 200" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def create_model(max_seq_len, bert_ckpt_file):\n", " with tf.io.gfile.GFile(bert_config_file, \"r\") as reader:\n", " bc = StockBertConfig.from_json_string(reader.read())\n", " bert_params = map_stock_config_to_params(bc)\n", " bert_params.adapter_size = None\n", " bert = BertModelLayer.from_params(bert_params, name=\"bert\")\n", " input_ids = keras.layers.Input(\n", " shape=(max_seq_len, ),\n", " dtype='int32',\n", " name=\"input_ids\"\n", " )\n", " bert_output = bert(input_ids)\n", " print(\"bert shape\", bert_output.shape)\n", " cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)\n", " cls_out = keras.layers.Dropout(0.5)(cls_out)\n", " logits = keras.layers.Dense(units=768, activation=\"tanh\")(cls_out)\n", " logits = keras.layers.Dropout(0.5)(logits)\n", " logits = keras.layers.Dense(\n", " units=len(classes),\n", " activation=\"softmax\"\n", " )(logits)\n", " model = keras.Model(inputs=input_ids, outputs=logits)\n", " model.build(input_shape=(None, max_seq_len))\n", " load_stock_weights(bert, bert_ckpt_file)\n", " return model\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = create_model(max_seq_len, bert_ckpt_file)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "model.load_weights('bert_adv_synthetic_weights_v2(14).h5') " ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# Run this if you change the max_seq_len\n", "#model.save('bert_200_max_seq_len_model.h5') \n", " " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def intent_recognizer(sentences):\n", " pred_tokens = map(tokenizer.tokenize, sentences)\n", " pred_tokens = map(lambda tok: [\"[CLS]\"] + tok + [\"[SEP]\"], pred_tokens)\n", " pred_token_ids = list(map(tokenizer.convert_tokens_to_ids, pred_tokens))\n", " pred_token_ids = map(lambda tids: tids +[0]*(max_seq_len-len(tids)),pred_token_ids)\n", " pred_token_ids = np.array(list(pred_token_ids))\n", " predictions = model.predict(pred_token_ids).argmax(axis=-1)\n", " for text, label in zip(sentences, predictions):\n", " print(\"text:\", text, \"\\nintent:\", classes[label])\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sentences = [\"While I’ve been speaking with you, I realized that my particular request might be better handled by someone with a more specialized focus. I believe there’s another team, maybe the one that deals with customer inquiries more closely, that would be better suited to help me. It seems like my issue falls outside the general scope, and I’d really appreciate being put in touch with the department or individual who is more equipped to provide the support I’m looking for. I’m thinking maybe the sales or technical support team could be more in line with what I need.\"]\n", "intent_recognizer(sentences)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }