{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e6b97973", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using device:cuda\n", "tensor([ 0, 61, 1, 61, 2, 61, 0, 61, 3], device='cuda:0')\n" ] }, { "data": { "text/plain": [ "torch.Size([4, 32])" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "import torch.nn as nn\n", "from v2.usta_model import UstaModel\n", "from v2.usta_tokenizer import UstaTokenizer\n", "\n", "device = \"cpu\"\n", "\n", "if torch.cuda.is_available():\n", " device=\"cuda\"\n", "elif torch.backends.mps.is_available():\n", " device=\"mps\"\n", "\n", "\n", "print(f\"Using device:{device}\")\n", "\n", "\n", "\n", "\n", "u_tokenizer = UstaTokenizer(\"v2/tokenizer.json\")\n", "\n", "\n", "prompts = [\n", " \"the capital of the united\",\n", " \"madrid is in\",\n", " \"the capital of france is\",\n", " \"the capital of germany is\"\n", "]\n", "\n", "tokens = u_tokenizer.encode(prompts[0])\n", "tokens = tokens.to(device)\n", "print(tokens)\n", "\n", "batch_tokens = u_tokenizer.encode_batch(prompts,32)\n", "batch_tokens = batch_tokens.to(device)\n", "batch_tokens.shape" ] }, { "cell_type": "code", "execution_count": 2, "id": "0bb58cb3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.manual_seed(1)\n", "context_length = 32\n", "\n", "\n", "u_model = UstaModel(\n", " vocab_size=len(u_tokenizer.vocab),\n", " embedding_dim=12,\n", " num_heads=4,\n", " context_length=context_length,\n", " num_layers=8,\n", " device=device\n", " )\n", "\n", "u_model.load_state_dict(torch.load(\"v2/u_model_4000.pth\"))" ] }, { "cell_type": "code", "execution_count": 3, "id": "f1bd4c5d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([4, 32, 64])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out = u_model(batch_tokens)\n", "out.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "b6f2e3e0", "metadata": {}, "outputs": [], "source": [ "# temperature => sıcaklık\n", "# top_k => en yüksek k olasılıklı tokenler\n", "# top_p => en yüksek p olasılıklı tokenler" ] }, { "cell_type": "code", "execution_count": 5, "id": "54e1f874", "metadata": {}, "outputs": [], "source": [ "top_k = 10" ] }, { "cell_type": "code", "execution_count": 6, "id": "3642c093", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([22.9591, 13.6907, 12.9466, 10.6703, 8.7636, 8.7272, 7.8887, 7.8298,\n", " 7.7206, 7.7129]),\n", " [61, 60, 59, 38, 56, 50, 9, 27, 51, 22])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted_outs = sorted(out[-1][-1].tolist(),reverse=True)\n", "\n", "sorted_indexes = []\n", "\n", "for so in sorted_outs[:top_k]:\n", " so_index = out[-1][-1].tolist().index(so)\n", " sorted_indexes.append(so_index)\n", "\n", "sorted_outs = torch.tensor(sorted_outs[:top_k])\n", "sorted_outs,sorted_indexes\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "52c7e348", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([22.9591, 13.6907, 12.9466, 10.6703, 8.7636, 8.7272, 7.8887, 7.8298,\n", " 7.7206, 7.7129], device='cuda:0', grad_fn=),\n", " tensor([61, 60, 59, 38, 56, 50, 9, 27, 51, 22], device='cuda:0'))" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values,indexes = torch.topk(out[-1][-1],k=10)\n", "values,indexes" ] }, { "cell_type": "code", "execution_count": 8, "id": "36797fda", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\celal\\AppData\\Local\\Temp\\ipykernel_16884\\788520487.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " adjusted_outs = torch.tensor(sorted_outs)/temperature\n" ] }, { "data": { "text/plain": [ "tensor([2.1845, 1.3026, 1.2318, 1.0153, 0.8338, 0.8304, 0.7506, 0.7450, 0.7346,\n", " 0.7339])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temperature = 10.51\n", "adjusted_outs = torch.tensor(sorted_outs)/temperature\n", "adjusted_outs" ] }, { "cell_type": "code", "execution_count": 9, "id": "e584fca2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725, 0.0723, 0.0667, 0.0664, 0.0657,\n", " 0.0656])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "probs = torch.softmax(adjusted_outs,dim=-1)\n", "probs" ] }, { "cell_type": "code", "execution_count": 10, "id": "da567010", "metadata": {}, "outputs": [], "source": [ "top_p = 0.7" ] }, { "cell_type": "code", "execution_count": 11, "id": "9ff37e84", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0.6634)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.sum(torch.tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725]))" ] }, { "cell_type": "code", "execution_count": 12, "id": "8cb1c526", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 264, 4: 73, 5: 82, 9: 79, 2: 82, 6: 63, 1: 129, 3: 93, 8: 77, 7: 58}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_count = {}\n", "\n", "for _ in range(1000):\n", " sample = torch.multinomial(probs,1)\n", " sample_count[sample.item()] = sample_count.get(sample.item(),0)+1\n", "sample_count" ] }, { "cell_type": "code", "execution_count": 15, "id": "8d0b7ead", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'the capital of the united ': 99, 'the capital of the united .': 1}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outs = {}\n", "for _ in range(100):\n", "\n", " out = u_model.generate(tokens, max_new_tokens=3,temperature=1.7,top_k=1,top_p=0.7) # -> int listesi\n", " decoded = u_tokenizer.decode(out) # direk liste ver\n", " outs[decoded] = outs.get(decoded,0)+1\n", "outs" ] }, { "cell_type": "code", "execution_count": 14, "id": "fc2ae16a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\n\\ntorch.save(u_model.state_dict(),\"u_model.pth\")\\n\\n\\nu_model.load_state_dict(torch.load(\"u_model.pth\"))\\n'" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "\n", "torch.save(u_model.state_dict(),\"u_model.pth\")\n", "\n", "\n", "u_model.load_state_dict(torch.load(\"u_model.pth\"))\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "bcb145be", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 5 }