Thishyaketh commited on
Commit
521b7cb
·
verified ·
1 Parent(s): 597744b

Upload 2 files

Browse files
Files changed (2) hide show
  1. books.py +45 -0
  2. ckpt.pt +3 -0
books.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # train a miniature character-level shakespeare model
2
+ # good for debugging and playing on macbooks and such
3
+
4
+ out_dir = 'out-books2'
5
+ eval_interval = 250 # keep frequent because we'll overfit
6
+ eval_iters = 200
7
+ log_interval = 10 # don't print too too often
8
+
9
+ # we expect to overfit on this small dataset, so only save when val improves
10
+ always_save_checkpoint = False
11
+
12
+ wandb_log = False # override via command line if you like
13
+ wandb_project = 'shakespeare'
14
+ wandb_run_name = 'mini-gpt'
15
+
16
+ dataset = 'books2'
17
+ gradient_accumulation_steps = 1
18
+ batch_size = 128
19
+ block_size = 256 # context of up to 256 previous characters
20
+
21
+ # baby GPT model :)
22
+ n_layer = 8
23
+ n_head = 8
24
+ n_embd = 512
25
+ dropout = 0.2
26
+
27
+ learning_rate = 1e-3 # with baby networks can afford to go a bit higher
28
+ max_iters = 5000
29
+ lr_decay_iters = 5000 # make equal to max_iters usually
30
+ min_lr = 1e-4 # learning_rate / 10 usually
31
+ beta2 = 0.99 # make a bit bigger because number of tokens per iter is small
32
+
33
+ warmup_iters = 100 # not super necessary potentially
34
+
35
+ # on macbook also add
36
+ device = 'cuda' # run on cpu only
37
+ compile = False # do not torch compile the model
38
+
39
+ # Evaluation settings
40
+ generate_samples = 100 # number of samples to generate for BLEU score
41
+ max_sample_length = 100 # maximum length of generated samples
42
+
43
+ # Add BLEU score settings
44
+ bleu_ngrams = 4 # maximum n-gram size for BLEU calculation
45
+ calculate_metrics = True # flag to enable/disable PPL and BLEU calculation
ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef50ef7a2312551fe8783ad19cc95d127920eb4e1aadb4dd20b9b4eab9d5545f
3
+ size 1445446661