{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e6b97973",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using device:cuda\n",
      "tensor([ 0, 61,  1, 61,  2, 61,  0, 61,  3], device='cuda:0')\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "torch.Size([4, 32])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "from v2.usta_model import UstaModel\n",
    "from v2.usta_tokenizer import UstaTokenizer\n",
    "\n",
    "device = \"cpu\"\n",
    "\n",
    "if torch.cuda.is_available():\n",
    "    device=\"cuda\"\n",
    "elif torch.backends.mps.is_available():\n",
    "    device=\"mps\"\n",
    "\n",
    "\n",
    "print(f\"Using device:{device}\")\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "u_tokenizer = UstaTokenizer(\"v2/tokenizer.json\")\n",
    "\n",
    "\n",
    "prompts = [\n",
    "    \"the capital of the united\",\n",
    "    \"madrid is in\",\n",
    "    \"the capital of france is\",\n",
    "    \"the capital of germany is\"\n",
    "]\n",
    "\n",
    "tokens = u_tokenizer.encode(prompts[0])\n",
    "tokens = tokens.to(device)\n",
    "print(tokens)\n",
    "\n",
    "batch_tokens = u_tokenizer.encode_batch(prompts,32)\n",
    "batch_tokens = batch_tokens.to(device)\n",
    "batch_tokens.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0bb58cb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<All keys matched successfully>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.manual_seed(1)\n",
    "context_length = 32\n",
    "\n",
    "\n",
    "u_model = UstaModel(\n",
    "    vocab_size=len(u_tokenizer.vocab),\n",
    "    embedding_dim=12,\n",
    "    num_heads=4,\n",
    "    context_length=context_length,\n",
    "    num_layers=8,\n",
    "    device=device\n",
    "    )\n",
    "\n",
    "u_model.load_state_dict(torch.load(\"v2/u_model_4000.pth\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f1bd4c5d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([4, 32, 64])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out = u_model(batch_tokens)\n",
    "out.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b6f2e3e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# temperature => sıcaklık\n",
    "# top_k => en yüksek k olasılıklı tokenler\n",
    "# top_p => en yüksek p olasılıklı tokenler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "54e1f874",
   "metadata": {},
   "outputs": [],
   "source": [
    "top_k = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3642c093",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([22.9591, 13.6907, 12.9466, 10.6703,  8.7636,  8.7272,  7.8887,  7.8298,\n",
       "          7.7206,  7.7129]),\n",
       " [61, 60, 59, 38, 56, 50, 9, 27, 51, 22])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted_outs = sorted(out[-1][-1].tolist(),reverse=True)\n",
    "\n",
    "sorted_indexes = []\n",
    "\n",
    "for so in sorted_outs[:top_k]:\n",
    "    so_index = out[-1][-1].tolist().index(so)\n",
    "    sorted_indexes.append(so_index)\n",
    "\n",
    "sorted_outs = torch.tensor(sorted_outs[:top_k])\n",
    "sorted_outs,sorted_indexes\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "52c7e348",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([22.9591, 13.6907, 12.9466, 10.6703,  8.7636,  8.7272,  7.8887,  7.8298,\n",
       "          7.7206,  7.7129], device='cuda:0', grad_fn=<TopkBackward0>),\n",
       " tensor([61, 60, 59, 38, 56, 50,  9, 27, 51, 22], device='cuda:0'))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "values,indexes = torch.topk(out[-1][-1],k=10)\n",
    "values,indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "36797fda",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\celal\\AppData\\Local\\Temp\\ipykernel_16884\\788520487.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  adjusted_outs = torch.tensor(sorted_outs)/temperature\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([2.1845, 1.3026, 1.2318, 1.0153, 0.8338, 0.8304, 0.7506, 0.7450, 0.7346,\n",
       "        0.7339])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temperature = 10.51\n",
    "adjusted_outs = torch.tensor(sorted_outs)/temperature\n",
    "adjusted_outs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e584fca2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725, 0.0723, 0.0667, 0.0664, 0.0657,\n",
       "        0.0656])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "probs = torch.softmax(adjusted_outs,dim=-1)\n",
    "probs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "da567010",
   "metadata": {},
   "outputs": [],
   "source": [
    "top_p = 0.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9ff37e84",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.6634)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.sum(torch.tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "8cb1c526",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: 264, 4: 73, 5: 82, 9: 79, 2: 82, 6: 63, 1: 129, 3: 93, 8: 77, 7: 58}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_count = {}\n",
    "\n",
    "for _ in range(1000):\n",
    "    sample = torch.multinomial(probs,1)\n",
    "    sample_count[sample.item()] = sample_count.get(sample.item(),0)+1\n",
    "sample_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "8d0b7ead",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'the capital of the united   ': 99, 'the capital of the united  .': 1}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "outs = {}\n",
    "for _ in range(100):\n",
    "\n",
    "    out = u_model.generate(tokens, max_new_tokens=3,temperature=1.7,top_k=1,top_p=0.7)      # -> int listesi\n",
    "    decoded = u_tokenizer.decode(out) # direk liste ver\n",
    "    outs[decoded] = outs.get(decoded,0)+1\n",
    "outs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fc2ae16a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\n\\ntorch.save(u_model.state_dict(),\"u_model.pth\")\\n\\n\\nu_model.load_state_dict(torch.load(\"u_model.pth\"))\\n'"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"\"\"\n",
    "\n",
    "torch.save(u_model.state_dict(),\"u_model.pth\")\n",
    "\n",
    "\n",
    "u_model.load_state_dict(torch.load(\"u_model.pth\"))\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bcb145be",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}