Spaces:
Runtime error
Runtime error
Update gpt_dev.py
Browse files- gpt_dev.py +19 -20
gpt_dev.py
CHANGED
|
@@ -12,23 +12,7 @@ Companion notebook to the [Zero To Hero](https://karpathy.ai/zero-to-hero.html)
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
# We always start with a dataset to train on. Let's download the tiny shakespeare dataset
|
| 15 |
-
|
| 16 |
-
import subprocess
|
| 17 |
-
|
| 18 |
-
# URL of the file you want to download
|
| 19 |
-
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
|
| 20 |
-
# Local path where the file will be saved
|
| 21 |
-
local_filename = "input.txt"
|
| 22 |
-
|
| 23 |
-
def download_file(url, local_filename):
|
| 24 |
-
subprocess.run(["wget", url, "-O", local_filename], check=True)
|
| 25 |
-
|
| 26 |
-
# Download the file
|
| 27 |
-
download_file(url, local_filename)
|
| 28 |
-
|
| 29 |
-
#from gpt_dev import BigramLanguageModel # Import your model class
|
| 30 |
-
|
| 31 |
-
# Your other code here
|
| 32 |
|
| 33 |
# read it in to inspect it
|
| 34 |
with open('input.txt', 'r', encoding='utf-8') as f:
|
|
@@ -317,7 +301,7 @@ from torch.nn import functional as F
|
|
| 317 |
# hyperparameters
|
| 318 |
batch_size = 16 # how many independent sequences will we process in parallel?
|
| 319 |
block_size = 32 # what is the maximum context length for predictions?
|
| 320 |
-
max_iters =
|
| 321 |
#00
|
| 322 |
eval_interval = 100
|
| 323 |
learning_rate = 1e-3
|
|
@@ -450,7 +434,8 @@ class Block(nn.Module):
|
|
| 450 |
class BigramLanguageModel(nn.Module):
|
| 451 |
|
| 452 |
def __init__(self):
|
| 453 |
-
super().__init__()
|
|
|
|
| 454 |
# each token directly reads off the logits for the next token from a lookup table
|
| 455 |
self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
|
| 456 |
self.position_embedding_table = nn.Embedding(block_size, n_embd)
|
|
@@ -497,9 +482,11 @@ class BigramLanguageModel(nn.Module):
|
|
| 497 |
return idx
|
| 498 |
|
| 499 |
model = BigramLanguageModel()
|
|
|
|
| 500 |
m = model.to(device)
|
| 501 |
# print the number of parameters in the model
|
| 502 |
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
|
|
|
|
| 503 |
|
| 504 |
# create a PyTorch optimizer
|
| 505 |
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
|
|
@@ -520,7 +507,19 @@ for iter in range(max_iters):
|
|
| 520 |
loss.backward()
|
| 521 |
optimizer.step()
|
| 522 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
# generate from the model
|
| 524 |
context = torch.zeros((1, 1), dtype=torch.long, device=device)
|
| 525 |
print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
|
| 526 |
-
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
# We always start with a dataset to train on. Let's download the tiny shakespeare dataset
|
| 15 |
+
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# read it in to inspect it
|
| 18 |
with open('input.txt', 'r', encoding='utf-8') as f:
|
|
|
|
| 301 |
# hyperparameters
|
| 302 |
batch_size = 16 # how many independent sequences will we process in parallel?
|
| 303 |
block_size = 32 # what is the maximum context length for predictions?
|
| 304 |
+
max_iters = 5000
|
| 305 |
#00
|
| 306 |
eval_interval = 100
|
| 307 |
learning_rate = 1e-3
|
|
|
|
| 434 |
class BigramLanguageModel(nn.Module):
|
| 435 |
|
| 436 |
def __init__(self):
|
| 437 |
+
#super().__init__()
|
| 438 |
+
super(BigramLanguageModel, self).__init__()
|
| 439 |
# each token directly reads off the logits for the next token from a lookup table
|
| 440 |
self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
|
| 441 |
self.position_embedding_table = nn.Embedding(block_size, n_embd)
|
|
|
|
| 482 |
return idx
|
| 483 |
|
| 484 |
model = BigramLanguageModel()
|
| 485 |
+
|
| 486 |
m = model.to(device)
|
| 487 |
# print the number of parameters in the model
|
| 488 |
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
|
| 489 |
+
torch.save(model, 'transformer_model.pth')
|
| 490 |
|
| 491 |
# create a PyTorch optimizer
|
| 492 |
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
|
|
|
|
| 507 |
loss.backward()
|
| 508 |
optimizer.step()
|
| 509 |
|
| 510 |
+
|
| 511 |
+
# Load the saved weights into the model
|
| 512 |
+
#model.load_state_dict(torch.load('transformer_weights.pth'))
|
| 513 |
+
torch.save(model.state_dict(), 'transformer_weights.pth')
|
| 514 |
+
print("Model weights loaded successfully.")
|
| 515 |
+
|
| 516 |
+
import torch
|
| 517 |
+
|
| 518 |
+
# Load the entire model
|
| 519 |
+
model = torch.load('transformer_model.pth')
|
| 520 |
+
model.eval() # Set the model to evaluation mode
|
| 521 |
+
|
| 522 |
+
print("Entire model loaded successfully.")
|
| 523 |
# generate from the model
|
| 524 |
context = torch.zeros((1, 1), dtype=torch.long, device=device)
|
| 525 |
print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
|
|
|