Spaces:

AYYasaswini
/

TransformerLearning

Runtime error

App Files Files Community

AYYasaswini commited on Jun 12, 2024

Commit

cd23832

verified ·

1 Parent(s): 2d9ca46

Update gpt_dev.py

Browse files

Files changed (1) hide show

gpt_dev.py +19 -20

gpt_dev.py CHANGED Viewed

@@ -12,23 +12,7 @@ Companion notebook to the [Zero To Hero](https://karpathy.ai/zero-to-hero.html)
 """
 # We always start with a dataset to train on. Let's download the tiny shakespeare dataset
-#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
-import subprocess
-# URL of the file you want to download
-url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
-# Local path where the file will be saved
-local_filename = "input.txt"
-def download_file(url, local_filename):
-    subprocess.run(["wget", url, "-O", local_filename], check=True)
-# Download the file
-download_file(url, local_filename)
-#from gpt_dev import BigramLanguageModel  # Import your model class
-# Your other code here
 # read it in to inspect it
 with open('input.txt', 'r', encoding='utf-8') as f:
@@ -317,7 +301,7 @@ from torch.nn import functional as F
 # hyperparameters
 batch_size = 16 # how many independent sequences will we process in parallel?
 block_size = 32 # what is the maximum context length for predictions?
-max_iters = 3000
 #00
 eval_interval = 100
 learning_rate = 1e-3
@@ -450,7 +434,8 @@ class Block(nn.Module):
 class BigramLanguageModel(nn.Module):
     def __init__(self):
-        super().__init__()
         # each token directly reads off the logits for the next token from a lookup table
         self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
         self.position_embedding_table = nn.Embedding(block_size, n_embd)
@@ -497,9 +482,11 @@ class BigramLanguageModel(nn.Module):
         return idx
 model = BigramLanguageModel()
 m = model.to(device)
 # print the number of parameters in the model
 print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
 # create a PyTorch optimizer
 optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
@@ -520,7 +507,19 @@ for iter in range(max_iters):
     loss.backward()
     optimizer.step()
 # generate from the model
 context = torch.zeros((1, 1), dtype=torch.long, device=device)
 print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))

 """
 # We always start with a dataset to train on. Let's download the tiny shakespeare dataset
+!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
 # read it in to inspect it
 with open('input.txt', 'r', encoding='utf-8') as f:
 # hyperparameters
 batch_size = 16 # how many independent sequences will we process in parallel?
 block_size = 32 # what is the maximum context length for predictions?
+max_iters = 5000
 #00
 eval_interval = 100
 learning_rate = 1e-3
 class BigramLanguageModel(nn.Module):
     def __init__(self):
+        #super().__init__()
+        super(BigramLanguageModel, self).__init__()
         # each token directly reads off the logits for the next token from a lookup table
         self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
         self.position_embedding_table = nn.Embedding(block_size, n_embd)
         return idx
 model = BigramLanguageModel()
 m = model.to(device)
 # print the number of parameters in the model
 print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
+torch.save(model, 'transformer_model.pth')
 # create a PyTorch optimizer
 optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
     loss.backward()
     optimizer.step()
+# Load the saved weights into the model
+#model.load_state_dict(torch.load('transformer_weights.pth'))
+torch.save(model.state_dict(), 'transformer_weights.pth')
+print("Model weights loaded successfully.")
+import torch
+# Load the entire model
+model = torch.load('transformer_model.pth')
+model.eval()  # Set the model to evaluation mode
+print("Entire model loaded successfully.")
 # generate from the model
 context = torch.zeros((1, 1), dtype=torch.long, device=device)
 print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))