AYYasaswini commited on
Commit
2d9ca46
·
verified ·
1 Parent(s): 3c9d553

Update gpt_dev.py

Browse files
Files changed (1) hide show
  1. gpt_dev.py +2 -17
gpt_dev.py CHANGED
@@ -317,7 +317,7 @@ from torch.nn import functional as F
317
  # hyperparameters
318
  batch_size = 16 # how many independent sequences will we process in parallel?
319
  block_size = 32 # what is the maximum context length for predictions?
320
- max_iters = 5000
321
  #00
322
  eval_interval = 100
323
  learning_rate = 1e-3
@@ -450,8 +450,7 @@ class Block(nn.Module):
450
  class BigramLanguageModel(nn.Module):
451
 
452
  def __init__(self):
453
- #super().__init__()
454
- super(BigramLanguageModel, self).__init__()
455
  # each token directly reads off the logits for the next token from a lookup table
456
  self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
457
  self.position_embedding_table = nn.Embedding(block_size, n_embd)
@@ -498,11 +497,9 @@ class BigramLanguageModel(nn.Module):
498
  return idx
499
 
500
  model = BigramLanguageModel()
501
- torch.save(model.state_dict(), 'transformer_weights.pth')
502
  m = model.to(device)
503
  # print the number of parameters in the model
504
  print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
505
- torch.save(model, 'transformer_model.pth')
506
 
507
  # create a PyTorch optimizer
508
  optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
@@ -527,15 +524,3 @@ for iter in range(max_iters):
527
  context = torch.zeros((1, 1), dtype=torch.long, device=device)
528
  print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
529
 
530
- # Load the saved weights into the model
531
- model.load_state_dict(torch.load('transformer_weights.pth'))
532
-
533
- print("Model weights loaded successfully.")
534
-
535
- import torch
536
-
537
- # Load the entire model
538
- model = torch.load('transformer_model.pth')
539
- model.eval() # Set the model to evaluation mode
540
-
541
- print("Entire model loaded successfully.")
 
317
  # hyperparameters
318
  batch_size = 16 # how many independent sequences will we process in parallel?
319
  block_size = 32 # what is the maximum context length for predictions?
320
+ max_iters = 3000
321
  #00
322
  eval_interval = 100
323
  learning_rate = 1e-3
 
450
  class BigramLanguageModel(nn.Module):
451
 
452
  def __init__(self):
453
+ super().__init__()
 
454
  # each token directly reads off the logits for the next token from a lookup table
455
  self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
456
  self.position_embedding_table = nn.Embedding(block_size, n_embd)
 
497
  return idx
498
 
499
  model = BigramLanguageModel()
 
500
  m = model.to(device)
501
  # print the number of parameters in the model
502
  print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
 
503
 
504
  # create a PyTorch optimizer
505
  optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
 
524
  context = torch.zeros((1, 1), dtype=torch.long, device=device)
525
  print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
526