Spaces:
Runtime error
Runtime error
Update gpt_dev.py
Browse files- gpt_dev.py +2 -17
gpt_dev.py
CHANGED
|
@@ -317,7 +317,7 @@ from torch.nn import functional as F
|
|
| 317 |
# hyperparameters
|
| 318 |
batch_size = 16 # how many independent sequences will we process in parallel?
|
| 319 |
block_size = 32 # what is the maximum context length for predictions?
|
| 320 |
-
max_iters =
|
| 321 |
#00
|
| 322 |
eval_interval = 100
|
| 323 |
learning_rate = 1e-3
|
|
@@ -450,8 +450,7 @@ class Block(nn.Module):
|
|
| 450 |
class BigramLanguageModel(nn.Module):
|
| 451 |
|
| 452 |
def __init__(self):
|
| 453 |
-
|
| 454 |
-
super(BigramLanguageModel, self).__init__()
|
| 455 |
# each token directly reads off the logits for the next token from a lookup table
|
| 456 |
self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
|
| 457 |
self.position_embedding_table = nn.Embedding(block_size, n_embd)
|
|
@@ -498,11 +497,9 @@ class BigramLanguageModel(nn.Module):
|
|
| 498 |
return idx
|
| 499 |
|
| 500 |
model = BigramLanguageModel()
|
| 501 |
-
torch.save(model.state_dict(), 'transformer_weights.pth')
|
| 502 |
m = model.to(device)
|
| 503 |
# print the number of parameters in the model
|
| 504 |
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
|
| 505 |
-
torch.save(model, 'transformer_model.pth')
|
| 506 |
|
| 507 |
# create a PyTorch optimizer
|
| 508 |
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
|
|
@@ -527,15 +524,3 @@ for iter in range(max_iters):
|
|
| 527 |
context = torch.zeros((1, 1), dtype=torch.long, device=device)
|
| 528 |
print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
|
| 529 |
|
| 530 |
-
# Load the saved weights into the model
|
| 531 |
-
model.load_state_dict(torch.load('transformer_weights.pth'))
|
| 532 |
-
|
| 533 |
-
print("Model weights loaded successfully.")
|
| 534 |
-
|
| 535 |
-
import torch
|
| 536 |
-
|
| 537 |
-
# Load the entire model
|
| 538 |
-
model = torch.load('transformer_model.pth')
|
| 539 |
-
model.eval() # Set the model to evaluation mode
|
| 540 |
-
|
| 541 |
-
print("Entire model loaded successfully.")
|
|
|
|
| 317 |
# hyperparameters
|
| 318 |
batch_size = 16 # how many independent sequences will we process in parallel?
|
| 319 |
block_size = 32 # what is the maximum context length for predictions?
|
| 320 |
+
max_iters = 3000
|
| 321 |
#00
|
| 322 |
eval_interval = 100
|
| 323 |
learning_rate = 1e-3
|
|
|
|
| 450 |
class BigramLanguageModel(nn.Module):
|
| 451 |
|
| 452 |
def __init__(self):
|
| 453 |
+
super().__init__()
|
|
|
|
| 454 |
# each token directly reads off the logits for the next token from a lookup table
|
| 455 |
self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
|
| 456 |
self.position_embedding_table = nn.Embedding(block_size, n_embd)
|
|
|
|
| 497 |
return idx
|
| 498 |
|
| 499 |
model = BigramLanguageModel()
|
|
|
|
| 500 |
m = model.to(device)
|
| 501 |
# print the number of parameters in the model
|
| 502 |
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
|
|
|
|
| 503 |
|
| 504 |
# create a PyTorch optimizer
|
| 505 |
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
|
|
|
|
| 524 |
context = torch.zeros((1, 1), dtype=torch.long, device=device)
|
| 525 |
print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|