Code Uploaded

Browse files

Files changed (2) hide show

AssamGPT_Inference.ipynb +0 -0
AssameseWikiGPT.ipynb +463 -0

AssamGPT_Inference.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

AssameseWikiGPT.ipynb ADDED Viewed

	@@ -0,0 +1,463 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "length of the longest sentence:  10\n",
+      "no_of_sentences:  127946\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "\n",
+    "#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' #disble gpu\n",
+    "\n",
+    "def get_text_data():\n",
+    "    sentences=[]\n",
+    "    file_name=\"cleaned_assamese_text.txt\"\n",
+    "    file=open(file_name,'r')\n",
+    "    file_sentences=file.read().split(',')\n",
+    "    sentences+=file_sentences\n",
+    "    file.close()\n",
+    "    sentences=list(filter(None,sentences))\n",
+    "    return sentences\n",
+    "\n",
+    "sentences=get_text_data()\n",
+    "random.shuffle(sentences)\n",
+    "no_of_sentences=len(sentences)\n",
+    "text_train=sentences[:int(0.7*no_of_sentences)]\n",
+    "text_test=sentences[int(0.7*no_of_sentences):int(0.85*no_of_sentences)]\n",
+    "text_valid=sentences[int(0.85*no_of_sentences):]\n",
+    "#maxlen = len(max(sentences))\n",
+    "maxlen=10\n",
+    "print(\"length of the longest sentence: \",maxlen)\n",
+    "print(\"no_of_sentences: \",no_of_sentences)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-02-28 23:36:00.068548: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-02-28 23:36:01.115879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n",
+      "2023-02-28 23:36:01.116220: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.0/lib64:/usr/local/cuda-11.7/lib64::/home/yuvrajtalukdar/miniconda3/envs/miniproject/lib/\n",
+      "2023-02-28 23:36:01.116238: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
+      "2023-02-28 23:36:02.603014: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:02.736211: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:02.736438: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:02.736847: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-02-28 23:36:02.737278: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:02.737453: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:02.737574: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:03.410798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:03.410969: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:03.411092: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
+      "2023-02-28 23:36:03.411205: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2107 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "303475\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<tf.Tensor: shape=(1, 11), dtype=int64, numpy=array([[  17, 3078, 2246,   87,   31,    0,    0,    0,    0,    0,    0]])>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from tensorflow.keras.layers import TextVectorization\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "def custom_standardization(input_string):\n",
+    "    sentence = tf.strings.lower(input_string)\n",
+    "    #sentence = tf.strings.regex_replace(sentence, \"\\n\", \" \")\n",
+    "    return sentence\n",
+    "\n",
+    "vectorize_layer = TextVectorization(\n",
+    "    standardize = custom_standardization,\n",
+    "    output_mode=\"int\",\n",
+    "    output_sequence_length=maxlen + 1,\n",
+    ")\n",
+    "\n",
+    "vectorize_layer.adapt(sentences)\n",
+    "vocab = vectorize_layer.get_vocabulary()\n",
+    "\n",
+    "vocab_size = len(vocab)\n",
+    "print(vocab_size) # 49703\n",
+    "vectorize_layer(['এক অনন্য মাত্ৰা প্ৰদান কৰাৰ'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index_lookup = dict(zip(range(len(vocab)), vocab))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 10 #64\n",
+    "\n",
+    "train_dataset = tf.data.Dataset.from_tensor_slices(text_train)\n",
+    "train_dataset = train_dataset.shuffle(buffer_size=256)\n",
+    "train_dataset = train_dataset.batch(batch_size)\n",
+    "\n",
+    "test_dataset = tf.data.Dataset.from_tensor_slices(text_test)\n",
+    "test_dataset = test_dataset.shuffle(buffer_size=256)\n",
+    "test_dataset = test_dataset.batch(batch_size)\n",
+    "\n",
+    "valid_dataset = tf.data.Dataset.from_tensor_slices(text_valid)\n",
+    "valid_dataset = valid_dataset.shuffle(buffer_size=256)\n",
+    "valid_dataset = valid_dataset.batch(batch_size)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess_text(text):\n",
+    "    text = tf.expand_dims(text, -1)\n",
+    "    tokenized_sentences = vectorize_layer(text)\n",
+    "    x = tokenized_sentences[:, :-1]\n",
+    "    y = tokenized_sentences[:, 1:]\n",
+    "    return x, y\n",
+    "\n",
+    "\n",
+    "train_dataset = train_dataset.map(preprocess_text)\n",
+    "train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)\n",
+    "\n",
+    "test_dataset = test_dataset.map(preprocess_text)\n",
+    "test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)\n",
+    "\n",
+    "valid_dataset = valid_dataset.map(preprocess_text)\n",
+    "valid_dataset = valid_dataset.prefetch(tf.data.AUTOTUNE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(<tf.Tensor: shape=(10, 10), dtype=int64, numpy=\n",
+      "array([[ 10738,      0,      0,      0,      0,      0,      0,      0,\n",
+      "             0,      0],\n",
+      "       [  5212,  24846,    504,     51,     71,   8517,   6751,   4828,\n",
+      "           681,      0],\n",
+      "       [    61,    108,   7418,    252,   2823,   2674,    134,    487,\n",
+      "             0,      0],\n",
+      "       [289690,      2, 112988,   1054,   5367,  31142,     22,   3240,\n",
+      "          1115,   2376],\n",
+      "       [   393,      2,    352,    125,   6995,   6019,  41625,     12,\n",
+      "          1799,    551],\n",
+      "       [   265,   4642,     22,   1696,  89473,    126,      3,      5,\n",
+      "           410,   3375],\n",
+      "       [  8187,  18122,    278,     34,    579,    579,     43,   1119,\n",
+      "           710,    395],\n",
+      "       [    61,     16,   5291,    150,   1166,      2,   4796,  50192,\n",
+      "          5668,   2324],\n",
+      "       [    52,    954,    239,    595,   5401,   1006,      2,   3253,\n",
+      "          3812,     21],\n",
+      "       [ 17071,      2,  15782,   5901,  15075,    783,     22,     40,\n",
+      "         40782,  34480]])>, <tf.Tensor: shape=(10, 10), dtype=int64, numpy=\n",
+      "array([[     0,      0,      0,      0,      0,      0,      0,      0,\n",
+      "             0,      0],\n",
+      "       [ 24846,    504,     51,     71,   8517,   6751,   4828,    681,\n",
+      "             0,      0],\n",
+      "       [   108,   7418,    252,   2823,   2674,    134,    487,      0,\n",
+      "             0,      0],\n",
+      "       [     2, 112988,   1054,   5367,  31142,     22,   3240,   1115,\n",
+      "          2376,   2483],\n",
+      "       [     2,    352,    125,   6995,   6019,  41625,     12,   1799,\n",
+      "           551,     20],\n",
+      "       [  4642,     22,   1696,  89473,    126,      3,      5,    410,\n",
+      "          3375,   4436],\n",
+      "       [ 18122,    278,     34,    579,    579,     43,   1119,    710,\n",
+      "           395,    710],\n",
+      "       [    16,   5291,    150,   1166,      2,   4796,  50192,   5668,\n",
+      "          2324,    239],\n",
+      "       [   954,    239,    595,   5401,   1006,      2,   3253,   3812,\n",
+      "            21,    245],\n",
+      "       [     2,  15782,   5901,  15075,    783,     22,     40,  40782,\n",
+      "         34480,      0]])>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for entry in train_dataset.take(1):\n",
+    "    print(entry)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import keras_nlp\n",
+    "from tensorflow import keras\n",
+    "\n",
+    "embed_dim = 128\n",
+    "num_heads = 4\n",
+    "\n",
+    "def create_model2(no_of_decoder=1):\n",
+    "    inputs = keras.layers.Input(shape=(maxlen,), dtype=tf.int32)\n",
+    "    x = keras_nlp.layers.TokenAndPositionEmbedding(vocab_size, maxlen, embed_dim)(inputs)\n",
+    "    for i in range(4):\n",
+    "        x = keras_nlp.layers.TransformerDecoder(intermediate_dim=embed_dim*2, num_heads=num_heads,dropout=0.5)(x)\n",
+    "    do = keras.layers.Dropout(0.4)(x)\n",
+    "    outputs = keras.layers.Dense(vocab_size, activation='softmax')(do)\n",
+    "    \n",
+    "    model = keras.Model(inputs=inputs, outputs=outputs)\n",
+    "    model.compile(\n",
+    "        optimizer=\"adam\", \n",
+    "        loss='sparse_categorical_crossentropy',\n",
+    "        metrics=[keras_nlp.metrics.Perplexity(), 'accuracy']\n",
+    "    )\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "class TextSampler(keras.callbacks.Callback):\n",
+    "    def __init__(self, start_prompt, max_tokens):\n",
+    "        self.start_prompt = start_prompt\n",
+    "        self.max_tokens = max_tokens\n",
+    "        \n",
+    "    # Helper method to choose a word from the top K probable words with respect to their probabilities\n",
+    "    # in a sequence\n",
+    "    def sample_token(self, logits):\n",
+    "        logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n",
+    "        indices = np.asarray(indices).astype(\"int32\")\n",
+    "        preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n",
+    "        preds = np.asarray(preds).astype(\"float32\")\n",
+    "        return np.random.choice(indices, p=preds)\n",
+    "\n",
+    "    def on_epoch_end(self, epoch, logs=None):\n",
+    "        decoded_sample = self.start_prompt\n",
+    "        \n",
+    "        for i in range(self.max_tokens-1):\n",
+    "            tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n",
+    "            predictions = self.model.predict([tokenized_prompt], verbose=0)\n",
+    "            # To find the index of the next word in the prediction array.\n",
+    "            # The tokenized prompt is already shorter than the original decoded sample\n",
+    "            # by one, len(decoded_sample.split()) is two words ahead - so we remove 1 to get\n",
+    "            # the next word in the sequence\n",
+    "            sample_index = len(decoded_sample.strip().split())-1\n",
+    "            \n",
+    "            sampled_token = self.sample_token(predictions[0][sample_index])\n",
+    "            sampled_token = index_lookup[sampled_token]\n",
+    "            decoded_sample += \" \" + sampled_token\n",
+    "            \n",
+    "        print(f\"\\nSample text:\\n{decoded_sample}...\\n\")\n",
+    "\n",
+    "# First 5 words of a random sentence to be used as a seed\n",
+    "random_sentence = ' '.join(random.choice(text_valid).replace('\\n', ' ').split(' ')[:4])\n",
+    "sampler = TextSampler(random_sentence, 30)\n",
+    "reducelr = keras.callbacks.ReduceLROnPlateau(patience=10, monitor='val_loss')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"model\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " input_1 (InputLayer)        [(None, 10)]              0         \n",
+      "                                                                 \n",
+      " token_and_position_embeddin  (None, 10, 128)          38846080  \n",
+      " g (TokenAndPositionEmbeddin                                     \n",
+      " g)                                                              \n",
+      "                                                                 \n",
+      " transformer_decoder (Transf  (None, 10, 128)          132480    \n",
+      " ormerDecoder)                                                   \n",
+      "                                                                 \n",
+      " transformer_decoder_1 (Tran  (None, 10, 128)          132480    \n",
+      " sformerDecoder)                                                 \n",
+      "                                                                 \n",
+      " transformer_decoder_2 (Tran  (None, 10, 128)          132480    \n",
+      " sformerDecoder)                                                 \n",
+      "                                                                 \n",
+      " transformer_decoder_3 (Tran  (None, 10, 128)          132480    \n",
+      " sformerDecoder)                                                 \n",
+      "                                                                 \n",
+      " dropout (Dropout)           (None, 10, 128)           0         \n",
+      "                                                                 \n",
+      " dense (Dense)               (None, 10, 303475)        39148275  \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 78,524,275\n",
+      "Trainable params: 78,524,275\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "Epoch 1/150\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-02-28 23:36:23.887413: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.\n",
+      "2023-02-28 23:36:24.308423: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7ff6d67579b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-02-28 23:36:24.308518: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6\n",
+      "2023-02-28 23:36:24.328912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
+      "2023-02-28 23:36:24.549826: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3082/8957 [=========>....................] - ETA: 55:03 - loss: 5.8952 - perplexity: 363.2977 - accuracy: 0.4296"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32m/media/yuvrajtalukdar/New Volume/computer/undergoing_projects/AssamWiki GPT/AssameseWikiGPT.ipynb Cell 9\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m model \u001b[39m=\u001b[39m create_model2(\u001b[39m4\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m model\u001b[39m.\u001b[39msummary()\n\u001b[0;32m----> <a href='vscode-notebook-cell:/media/yuvrajtalukdar/New%20Volume/computer/undergoing_projects/AssamWiki%20GPT/AssameseWikiGPT.ipynb#X11sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m history \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mfit(train_dataset,validation_data\u001b[39m=\u001b[39;49mvalid_dataset,epochs\u001b[39m=\u001b[39;49m\u001b[39m150\u001b[39;49m,callbacks\u001b[39m=\u001b[39;49m[sampler, reducelr])\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/utils/traceback_utils.py:65\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     63\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m     64\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 65\u001b[0m     \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m     66\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m     67\u001b[0m     filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/keras/engine/training.py:1650\u001b[0m, in \u001b[0;36mModel.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1642\u001b[0m \u001b[39mwith\u001b[39;00m tf\u001b[39m.\u001b[39mprofiler\u001b[39m.\u001b[39mexperimental\u001b[39m.\u001b[39mTrace(\n\u001b[1;32m   1643\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mtrain\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m   1644\u001b[0m     epoch_num\u001b[39m=\u001b[39mepoch,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1647\u001b[0m     _r\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m   1648\u001b[0m ):\n\u001b[1;32m   1649\u001b[0m     callbacks\u001b[39m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m-> 1650\u001b[0m     tmp_logs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_function(iterator)\n\u001b[1;32m   1651\u001b[0m     \u001b[39mif\u001b[39;00m data_handler\u001b[39m.\u001b[39mshould_sync:\n\u001b[1;32m   1652\u001b[0m         context\u001b[39m.\u001b[39masync_wait()\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    148\u001b[0m filtered_tb \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    149\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m   \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    151\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m    152\u001b[0m   filtered_tb \u001b[39m=\u001b[39m _process_traceback_frames(e\u001b[39m.\u001b[39m__traceback__)\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:880\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    877\u001b[0m compiler \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mxla\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mnonXla\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    879\u001b[0m \u001b[39mwith\u001b[39;00m OptionalXlaContext(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 880\u001b[0m   result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)\n\u001b[1;32m    882\u001b[0m new_tracing_count \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m    883\u001b[0m without_tracing \u001b[39m=\u001b[39m (tracing_count \u001b[39m==\u001b[39m new_tracing_count)\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:912\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    909\u001b[0m   \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n\u001b[1;32m    910\u001b[0m   \u001b[39m# In this case we have created variables on the first call, so we run the\u001b[39;00m\n\u001b[1;32m    911\u001b[0m   \u001b[39m# defunned version which is guaranteed to never create variables.\u001b[39;00m\n\u001b[0;32m--> 912\u001b[0m   \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_no_variable_creation_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwds)  \u001b[39m# pylint: disable=not-callable\u001b[39;00m\n\u001b[1;32m    913\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_variable_creation_fn \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    914\u001b[0m   \u001b[39m# Release the lock early so that multiple threads can perform the call\u001b[39;00m\n\u001b[1;32m    915\u001b[0m   \u001b[39m# in parallel.\u001b[39;00m\n\u001b[1;32m    916\u001b[0m   \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock\u001b[39m.\u001b[39mrelease()\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py:134\u001b[0m, in \u001b[0;36mTracingCompiler.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    131\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_lock:\n\u001b[1;32m    132\u001b[0m   (concrete_function,\n\u001b[1;32m    133\u001b[0m    filtered_flat_args) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_maybe_define_function(args, kwargs)\n\u001b[0;32m--> 134\u001b[0m \u001b[39mreturn\u001b[39;00m concrete_function\u001b[39m.\u001b[39;49m_call_flat(\n\u001b[1;32m    135\u001b[0m     filtered_flat_args, captured_inputs\u001b[39m=\u001b[39;49mconcrete_function\u001b[39m.\u001b[39;49mcaptured_inputs)\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:1745\u001b[0m, in \u001b[0;36mConcreteFunction._call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m   1741\u001b[0m possible_gradient_type \u001b[39m=\u001b[39m gradients_util\u001b[39m.\u001b[39mPossibleTapeGradientTypes(args)\n\u001b[1;32m   1742\u001b[0m \u001b[39mif\u001b[39;00m (possible_gradient_type \u001b[39m==\u001b[39m gradients_util\u001b[39m.\u001b[39mPOSSIBLE_GRADIENT_TYPES_NONE\n\u001b[1;32m   1743\u001b[0m     \u001b[39mand\u001b[39;00m executing_eagerly):\n\u001b[1;32m   1744\u001b[0m   \u001b[39m# No tape is watching; skip to running the function.\u001b[39;00m\n\u001b[0;32m-> 1745\u001b[0m   \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_call_outputs(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_inference_function\u001b[39m.\u001b[39;49mcall(\n\u001b[1;32m   1746\u001b[0m       ctx, args, cancellation_manager\u001b[39m=\u001b[39;49mcancellation_manager))\n\u001b[1;32m   1747\u001b[0m forward_backward \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_select_forward_and_backward_functions(\n\u001b[1;32m   1748\u001b[0m     args,\n\u001b[1;32m   1749\u001b[0m     possible_gradient_type,\n\u001b[1;32m   1750\u001b[0m     executing_eagerly)\n\u001b[1;32m   1751\u001b[0m forward_function, args_with_tangents \u001b[39m=\u001b[39m forward_backward\u001b[39m.\u001b[39mforward()\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py:378\u001b[0m, in \u001b[0;36m_EagerDefinedFunction.call\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m    376\u001b[0m \u001b[39mwith\u001b[39;00m _InterpolateFunctionError(\u001b[39mself\u001b[39m):\n\u001b[1;32m    377\u001b[0m   \u001b[39mif\u001b[39;00m cancellation_manager \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 378\u001b[0m     outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39;49mexecute(\n\u001b[1;32m    379\u001b[0m         \u001b[39mstr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msignature\u001b[39m.\u001b[39;49mname),\n\u001b[1;32m    380\u001b[0m         num_outputs\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_num_outputs,\n\u001b[1;32m    381\u001b[0m         inputs\u001b[39m=\u001b[39;49margs,\n\u001b[1;32m    382\u001b[0m         attrs\u001b[39m=\u001b[39;49mattrs,\n\u001b[1;32m    383\u001b[0m         ctx\u001b[39m=\u001b[39;49mctx)\n\u001b[1;32m    384\u001b[0m   \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    385\u001b[0m     outputs \u001b[39m=\u001b[39m execute\u001b[39m.\u001b[39mexecute_with_cancellation(\n\u001b[1;32m    386\u001b[0m         \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msignature\u001b[39m.\u001b[39mname),\n\u001b[1;32m    387\u001b[0m         num_outputs\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_num_outputs,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    390\u001b[0m         ctx\u001b[39m=\u001b[39mctx,\n\u001b[1;32m    391\u001b[0m         cancellation_manager\u001b[39m=\u001b[39mcancellation_manager)\n",
+      "File \u001b[0;32m~/miniconda3/envs/miniproject/lib/python3.10/site-packages/tensorflow/python/eager/execute.py:52\u001b[0m, in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m     50\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     51\u001b[0m   ctx\u001b[39m.\u001b[39mensure_initialized()\n\u001b[0;32m---> 52\u001b[0m   tensors \u001b[39m=\u001b[39m pywrap_tfe\u001b[39m.\u001b[39;49mTFE_Py_Execute(ctx\u001b[39m.\u001b[39;49m_handle, device_name, op_name,\n\u001b[1;32m     53\u001b[0m                                       inputs, attrs, num_outputs)\n\u001b[1;32m     54\u001b[0m \u001b[39mexcept\u001b[39;00m core\u001b[39m.\u001b[39m_NotOkStatusException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m     55\u001b[0m   \u001b[39mif\u001b[39;00m name \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "model = create_model2(4)\n",
+    "model.summary()\n",
+    "history = model.fit(train_dataset,validation_data=valid_dataset,epochs=150,callbacks=[sampler, reducelr])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sample_token(logits):\n",
+    "        logits, indices = tf.math.top_k(logits, k=5, sorted=True)\n",
+    "        indices = np.asarray(indices).astype(\"int32\")\n",
+    "        preds = keras.activations.softmax(tf.expand_dims(logits, 0))[0]\n",
+    "        preds = np.asarray(preds).astype(\"float32\")\n",
+    "        return np.random.choice(indices, p=preds)\n",
+    "\n",
+    "def generate_text(prompt, response_length=50):\n",
+    "    decoded_sample = prompt\n",
+    "    for i in range(response_length-1):\n",
+    "        tokenized_prompt = vectorize_layer([decoded_sample])[:, :-1]\n",
+    "        predictions = model.predict([tokenized_prompt], verbose=0)\n",
+    "        sample_index = len(decoded_sample.strip().split())-1\n",
+    "\n",
+    "        sampled_token = sample_token(predictions[0][sample_index])\n",
+    "        sampled_token = index_lookup[sampled_token]\n",
+    "        decoded_sample += \" \" + sampled_token\n",
+    "    return decoded_sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "model.save(\"pd_plaintext_transformer.h5\")\n",
+    "pickle.dump(model, open('pd_plaintext_transformer.pkl', 'wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "generate_text('য়ুৰিৰ দাদাক আৰু ',response_length=50)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "miniproject",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "b18115e74db522ea4edaf3f03801a60154dbaca70e4a91a6289c29c6971e06fa"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}