Upload train_diffreaper_5l.py with huggingface_hub
Browse files- train_diffreaper_5l.py +4 -4
train_diffreaper_5l.py
CHANGED
|
@@ -16,14 +16,14 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 16 |
OUTPUT_DIR = "./training_output"
|
| 17 |
LOG_FILE = "training.log"
|
| 18 |
CHECKPOINT_LOG = "checkpoint_log.txt"
|
| 19 |
-
BATCH_SIZE =
|
| 20 |
LEARNING_RATE = 1e-4
|
| 21 |
SAVE_EVERY = 2500
|
| 22 |
TEST_EVERY = 500
|
| 23 |
|
| 24 |
-
N_EMBD =
|
| 25 |
-
N_HEAD =
|
| 26 |
-
N_LAYER =
|
| 27 |
MAX_PROMPT_LEN = 32
|
| 28 |
MAX_RESP_LEN = 128
|
| 29 |
TOTAL_LEN = MAX_PROMPT_LEN + MAX_RESP_LEN
|
|
|
|
| 16 |
OUTPUT_DIR = "./training_output"
|
| 17 |
LOG_FILE = "training.log"
|
| 18 |
CHECKPOINT_LOG = "checkpoint_log.txt"
|
| 19 |
+
BATCH_SIZE = 16 # Back to higher batch for 5090 (32GB VRAM)
|
| 20 |
LEARNING_RATE = 1e-4
|
| 21 |
SAVE_EVERY = 2500
|
| 22 |
TEST_EVERY = 500
|
| 23 |
|
| 24 |
+
N_EMBD = 2048 # Back to full Large specs
|
| 25 |
+
N_HEAD = 32
|
| 26 |
+
N_LAYER = 24 # Full depth
|
| 27 |
MAX_PROMPT_LEN = 32
|
| 28 |
MAX_RESP_LEN = 128
|
| 29 |
TOTAL_LEN = MAX_PROMPT_LEN + MAX_RESP_LEN
|