GPTfromScratch

Sleeping

App Files Files Community

sanjanatule commited on Oct 26, 2023

Commit

d3369cb

1 Parent(s): 5d0f08d

Upload utils.py

Browse files

Files changed (1) hide show

utils.py +22 -12

utils.py CHANGED Viewed

@@ -1,24 +1,34 @@
 import torch
 from torch import nn
 import lightning.pytorch as pl
 from torch.nn import functional as F
 chars = ['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
 vocab_size = len(chars)
 block_size = 32
-n_embd = 128
-n_head = 4
-n_layer = 8
-dropout = 0.1
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
 class Head(nn.Module):
     """ one head of self-attention """
     def __init__(self, head_size):
         super().__init__()
-        self.key = nn.Linear(n_embd, head_size, bias=False)
         self.query = nn.Linear(n_embd, head_size, bias=False)
         self.value = nn.Linear(n_embd, head_size, bias=False)
         self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
@@ -44,8 +54,8 @@ class MultiHeadAttention(nn.Module):
     def __init__(self, num_heads, head_size):
         super().__init__()
-        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
-        self.proj = nn.Linear(n_embd, n_embd)
         self.dropout = nn.Dropout(dropout)
     def forward(self, x):
@@ -75,10 +85,10 @@ class Block(nn.Module):
         # n_embd: embedding dimension, n_head: the number of heads we'd like
         super().__init__()
         head_size = n_embd // n_head
-        self.sa = MultiHeadAttention(n_head, head_size)
         self.ffwd = FeedFoward(n_embd)
-        self.ln1 = nn.LayerNorm(n_embd)
-        self.ln2 = nn.LayerNorm(n_embd)
     def forward(self, x):
         x = x + self.sa(self.ln1(x))

 import torch
 from torch import nn
 import lightning.pytorch as pl
 from torch.nn import functional as F
+# encoding
 chars = ['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
 vocab_size = len(chars)
+stoi = { ch:i for i,ch in enumerate(chars) }
+itos = { i:ch for i,ch in enumerate(chars) }
+# encode / decode function
+encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
+decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
+# model config
 block_size = 32
+n_embd     = 128
+n_head     = 4
+n_layer    = 8
+dropout    = 0.1
+device     = 'cuda' if torch.cuda.is_available() else 'cpu'
+learning_rate = 1e-3
 class Head(nn.Module):
     """ one head of self-attention """
     def __init__(self, head_size):
         super().__init__()
+        self.key   = nn.Linear(n_embd, head_size, bias=False)
         self.query = nn.Linear(n_embd, head_size, bias=False)
         self.value = nn.Linear(n_embd, head_size, bias=False)
         self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
     def __init__(self, num_heads, head_size):
         super().__init__()
+        self.heads   = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
+        self.proj    = nn.Linear(n_embd, n_embd)
         self.dropout = nn.Dropout(dropout)
     def forward(self, x):
         # n_embd: embedding dimension, n_head: the number of heads we'd like
         super().__init__()
         head_size = n_embd // n_head
+        self.sa   = MultiHeadAttention(n_head, head_size)
         self.ffwd = FeedFoward(n_embd)
+        self.ln1  = nn.LayerNorm(n_embd)
+        self.ln2  = nn.LayerNorm(n_embd)
     def forward(self, x):
         x = x + self.sa(self.ln1(x))