In [1]:
import torch
import torch.nn as nn
from v2.usta_model import UstaModel
from v2.usta_tokenizer import UstaTokenizer

device = "cpu"

if torch.cuda.is_available():
    device="cuda"
elif torch.backends.mps.is_available():
    device="mps"


print(f"Using device:{device}")




u_tokenizer = UstaTokenizer("v2/tokenizer.json")


prompts = [
    "the capital of the united",
    "madrid is in",
    "the capital of france is",
    "the capital of germany is"
]

tokens = u_tokenizer.encode(prompts[0])
tokens = tokens.to(device)
print(tokens)

batch_tokens = u_tokenizer.encode_batch(prompts,32)
batch_tokens = batch_tokens.to(device)
batch_tokens.shape

Using device:cuda
tensor([ 0, 61,  1, 61,  2, 61,  0, 61,  3], device='cuda:0')


torch.Size([4, 32])

In [2]:
torch.manual_seed(1)
context_length = 32


u_model = UstaModel(
    vocab_size=len(u_tokenizer.vocab),
    embedding_dim=12,
    num_heads=4,
    context_length=context_length,
    num_layers=8,
    device=device
    )

u_model.load_state_dict(torch.load("v2/u_model_4000.pth"))

<All keys matched successfully>

In [3]:
out = u_model(batch_tokens)
out.shape

torch.Size([4, 32, 64])

In [4]:
# temperature => sıcaklık
# top_k => en yüksek k olasılıklı tokenler
# top_p => en yüksek p olasılıklı tokenler

In [5]:
top_k = 10

In [6]:
sorted_outs = sorted(out[-1][-1].tolist(),reverse=True)

sorted_indexes = []

for so in sorted_outs[:top_k]:
    so_index = out[-1][-1].tolist().index(so)
    sorted_indexes.append(so_index)

sorted_outs = torch.tensor(sorted_outs[:top_k])
sorted_outs,sorted_indexes


(tensor([22.9591, 13.6907, 12.9466, 10.6703,  8.7636,  8.7272,  7.8887,  7.8298,
          7.7206,  7.7129]),
 [61, 60, 59, 38, 56, 50, 9, 27, 51, 22])

In [7]:
values,indexes = torch.topk(out[-1][-1],k=10)
values,indexes

(tensor([22.9591, 13.6907, 12.9466, 10.6703,  8.7636,  8.7272,  7.8887,  7.8298,
          7.7206,  7.7129], device='cuda:0', grad_fn=<TopkBackward0>),
 tensor([61, 60, 59, 38, 56, 50,  9, 27, 51, 22], device='cuda:0'))

In [8]:
temperature = 10.51
adjusted_outs = torch.tensor(sorted_outs)/temperature
adjusted_outs

  adjusted_outs = torch.tensor(sorted_outs)/temperature


tensor([2.1845, 1.3026, 1.2318, 1.0153, 0.8338, 0.8304, 0.7506, 0.7450, 0.7346,
        0.7339])

In [9]:
probs = torch.softmax(adjusted_outs,dim=-1)
probs

tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725, 0.0723, 0.0667, 0.0664, 0.0657,
        0.0656])

In [10]:
top_p = 0.7

In [11]:
torch.sum(torch.tensor([0.2800, 0.1159, 0.1080, 0.0870, 0.0725]))

tensor(0.6634)

In [12]:
sample_count = {}

for _ in range(1000):
    sample = torch.multinomial(probs,1)
    sample_count[sample.item()] = sample_count.get(sample.item(),0)+1
sample_count

{0: 264, 4: 73, 5: 82, 9: 79, 2: 82, 6: 63, 1: 129, 3: 93, 8: 77, 7: 58}

In [15]:
outs = {}
for _ in range(100):

    out = u_model.generate(tokens, max_new_tokens=3,temperature=1.7,top_k=1,top_p=0.7)      # -> int listesi
    decoded = u_tokenizer.decode(out) # direk liste ver
    outs[decoded] = outs.get(decoded,0)+1
outs

{'the capital of the united   ': 99, 'the capital of the united  .': 1}

In [14]:
"""

torch.save(u_model.state_dict(),"u_model.pth")


u_model.load_state_dict(torch.load("u_model.pth"))
"""

'\n\ntorch.save(u_model.state_dict(),"u_model.pth")\n\n\nu_model.load_state_dict(torch.load("u_model.pth"))\n'