darwinkernelpanic commited on
Commit
94a96d5
·
verified ·
1 Parent(s): 9dae6c0

Upload train_diffreaper_5l.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_diffreaper_5l.py +4 -4
train_diffreaper_5l.py CHANGED
@@ -16,14 +16,14 @@ HF_TOKEN = os.getenv("HF_TOKEN")
16
  OUTPUT_DIR = "./training_output"
17
  LOG_FILE = "training.log"
18
  CHECKPOINT_LOG = "checkpoint_log.txt"
19
- BATCH_SIZE = 4 # Drastic reduction for 24-layer 2048-dim on 24GB VRAM
20
  LEARNING_RATE = 1e-4
21
  SAVE_EVERY = 2500
22
  TEST_EVERY = 500
23
 
24
- N_EMBD = 1536 # Slightly downscale from 2048 to fit reliably
25
- N_HEAD = 24
26
- N_LAYER = 18 # Downscaled from 24 to save memory
27
  MAX_PROMPT_LEN = 32
28
  MAX_RESP_LEN = 128
29
  TOTAL_LEN = MAX_PROMPT_LEN + MAX_RESP_LEN
 
16
  OUTPUT_DIR = "./training_output"
17
  LOG_FILE = "training.log"
18
  CHECKPOINT_LOG = "checkpoint_log.txt"
19
+ BATCH_SIZE = 16 # Back to higher batch for 5090 (32GB VRAM)
20
  LEARNING_RATE = 1e-4
21
  SAVE_EVERY = 2500
22
  TEST_EVERY = 500
23
 
24
+ N_EMBD = 2048 # Back to full Large specs
25
+ N_HEAD = 32
26
+ N_LAYER = 24 # Full depth
27
  MAX_PROMPT_LEN = 32
28
  MAX_RESP_LEN = 128
29
  TOTAL_LEN = MAX_PROMPT_LEN + MAX_RESP_LEN