TNSA
/

NGen2-170M

Text Generation

Model card Files Files and versions

Thishyaketh commited on May 20, 2025

Commit

521b7cb

·

verified ·

1 Parent(s): 597744b

Upload 2 files

Files changed (2) hide show

books.py +45 -0
ckpt.pt +3 -0

books.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# train a miniature character-level shakespeare model
+# good for debugging and playing on macbooks and such
+out_dir = 'out-books2'
+eval_interval = 250 # keep frequent because we'll overfit
+eval_iters = 200
+log_interval = 10 # don't print too too often
+# we expect to overfit on this small dataset, so only save when val improves
+always_save_checkpoint = False
+wandb_log = False # override via command line if you like
+wandb_project = 'shakespeare'
+wandb_run_name = 'mini-gpt'
+dataset = 'books2'
+gradient_accumulation_steps = 1
+batch_size = 128
+block_size = 256 # context of up to 256 previous characters
+# baby GPT model :)
+n_layer = 8
+n_head = 8
+n_embd = 512
+dropout = 0.2
+learning_rate = 1e-3 # with baby networks can afford to go a bit higher
+max_iters = 5000
+lr_decay_iters = 5000 # make equal to max_iters usually
+min_lr = 1e-4 # learning_rate / 10 usually
+beta2 = 0.99 # make a bit bigger because number of tokens per iter is small
+warmup_iters = 100 # not super necessary potentially
+# on macbook also add
+device = 'cuda'  # run on cpu only
+compile = False # do not torch compile the model
+# Evaluation settings
+generate_samples = 100  # number of samples to generate for BLEU score
+max_sample_length = 100  # maximum length of generated samples
+# Add BLEU score settings
+bleu_ngrams = 4  # maximum n-gram size for BLEU calculation
+calculate_metrics = True  # flag to enable/disable PPL and BLEU calculation

ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef50ef7a2312551fe8783ad19cc95d127920eb4e1aadb4dd20b9b4eab9d5545f
+size 1445446661