AYYasaswini commited on
Commit
cd23832
·
verified ·
1 Parent(s): 2d9ca46

Update gpt_dev.py

Browse files
Files changed (1) hide show
  1. gpt_dev.py +19 -20
gpt_dev.py CHANGED
@@ -12,23 +12,7 @@ Companion notebook to the [Zero To Hero](https://karpathy.ai/zero-to-hero.html)
12
  """
13
 
14
  # We always start with a dataset to train on. Let's download the tiny shakespeare dataset
15
- #!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
16
- import subprocess
17
-
18
- # URL of the file you want to download
19
- url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
20
- # Local path where the file will be saved
21
- local_filename = "input.txt"
22
-
23
- def download_file(url, local_filename):
24
- subprocess.run(["wget", url, "-O", local_filename], check=True)
25
-
26
- # Download the file
27
- download_file(url, local_filename)
28
-
29
- #from gpt_dev import BigramLanguageModel # Import your model class
30
-
31
- # Your other code here
32
 
33
  # read it in to inspect it
34
  with open('input.txt', 'r', encoding='utf-8') as f:
@@ -317,7 +301,7 @@ from torch.nn import functional as F
317
  # hyperparameters
318
  batch_size = 16 # how many independent sequences will we process in parallel?
319
  block_size = 32 # what is the maximum context length for predictions?
320
- max_iters = 3000
321
  #00
322
  eval_interval = 100
323
  learning_rate = 1e-3
@@ -450,7 +434,8 @@ class Block(nn.Module):
450
  class BigramLanguageModel(nn.Module):
451
 
452
  def __init__(self):
453
- super().__init__()
 
454
  # each token directly reads off the logits for the next token from a lookup table
455
  self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
456
  self.position_embedding_table = nn.Embedding(block_size, n_embd)
@@ -497,9 +482,11 @@ class BigramLanguageModel(nn.Module):
497
  return idx
498
 
499
  model = BigramLanguageModel()
 
500
  m = model.to(device)
501
  # print the number of parameters in the model
502
  print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
 
503
 
504
  # create a PyTorch optimizer
505
  optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
@@ -520,7 +507,19 @@ for iter in range(max_iters):
520
  loss.backward()
521
  optimizer.step()
522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  # generate from the model
524
  context = torch.zeros((1, 1), dtype=torch.long, device=device)
525
  print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))
526
-
 
12
  """
13
 
14
  # We always start with a dataset to train on. Let's download the tiny shakespeare dataset
15
+ !wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # read it in to inspect it
18
  with open('input.txt', 'r', encoding='utf-8') as f:
 
301
  # hyperparameters
302
  batch_size = 16 # how many independent sequences will we process in parallel?
303
  block_size = 32 # what is the maximum context length for predictions?
304
+ max_iters = 5000
305
  #00
306
  eval_interval = 100
307
  learning_rate = 1e-3
 
434
  class BigramLanguageModel(nn.Module):
435
 
436
  def __init__(self):
437
+ #super().__init__()
438
+ super(BigramLanguageModel, self).__init__()
439
  # each token directly reads off the logits for the next token from a lookup table
440
  self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
441
  self.position_embedding_table = nn.Embedding(block_size, n_embd)
 
482
  return idx
483
 
484
  model = BigramLanguageModel()
485
+
486
  m = model.to(device)
487
  # print the number of parameters in the model
488
  print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')
489
+ torch.save(model, 'transformer_model.pth')
490
 
491
  # create a PyTorch optimizer
492
  optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
 
507
  loss.backward()
508
  optimizer.step()
509
 
510
+
511
+ # Load the saved weights into the model
512
+ #model.load_state_dict(torch.load('transformer_weights.pth'))
513
+ torch.save(model.state_dict(), 'transformer_weights.pth')
514
+ print("Model weights loaded successfully.")
515
+
516
+ import torch
517
+
518
+ # Load the entire model
519
+ model = torch.load('transformer_model.pth')
520
+ model.eval() # Set the model to evaluation mode
521
+
522
+ print("Entire model loaded successfully.")
523
  # generate from the model
524
  context = torch.zeros((1, 1), dtype=torch.long, device=device)
525
  print(decode(m.generate(context, max_new_tokens=2000)[0].tolist()))