| { | |
| "best_step": 19500, | |
| "best_val_loss": 3.219841718673706, | |
| "learning_rate": 0.0003, | |
| "min_lr": 3e-05, | |
| "max_iters": 20000, | |
| "warmup_steps": 1000, | |
| "batch_size": 8, | |
| "gradient_accumulation_steps": 32, | |
| "effective_batch_size": 256, | |
| "block_size": 1024, | |
| "precision": "bfloat16", | |
| "optimizer": "AdamW", | |
| "betas": [ | |
| 0.9, | |
| 0.95 | |
| ], | |
| "weight_decay": 0.1, | |
| "grad_clip": 1.0, | |
| "dataset": "Saminx22/medical_data_for_slm", | |
| "tokenizer": "gpt2", | |
| "train_losses": [ | |
| 7.879132270812988, | |
| 6.511569023132324, | |
| 6.0676093101501465, | |
| 5.704098701477051, | |
| 5.454875946044922, | |
| 5.183218002319336, | |
| 4.928450107574463, | |
| 4.7383880615234375, | |
| 4.58985710144043, | |
| 4.431442737579346, | |
| 4.286258697509766, | |
| 4.190935134887695, | |
| 4.071569442749023, | |
| 3.9881348609924316, | |
| 3.9011902809143066, | |
| 3.81817626953125, | |
| 3.7602646350860596, | |
| 3.687962532043457, | |
| 3.623549461364746, | |
| 3.596832275390625, | |
| 3.516059160232544, | |
| 3.483616352081299, | |
| 3.4555230140686035, | |
| 3.4156558513641357, | |
| 3.3746585845947266, | |
| 3.366776704788208, | |
| 3.336205244064331, | |
| 3.320267915725708, | |
| 3.2959280014038086, | |
| 3.2804367542266846, | |
| 3.2574355602264404, | |
| 3.247173547744751, | |
| 3.244846820831299, | |
| 3.2060155868530273, | |
| 3.192638635635376, | |
| 3.1810803413391113, | |
| 3.1632189750671387, | |
| 3.161890983581543, | |
| 3.1812844276428223 | |
| ], | |
| "val_losses": [ | |
| 7.889859199523926, | |
| 6.505797863006592, | |
| 6.062633037567139, | |
| 5.6944580078125, | |
| 5.421933174133301, | |
| 5.1644487380981445, | |
| 4.942782402038574, | |
| 4.735868453979492, | |
| 4.573709487915039, | |
| 4.4302897453308105, | |
| 4.306708812713623, | |
| 4.180722713470459, | |
| 4.104051113128662, | |
| 4.005853652954102, | |
| 3.9109673500061035, | |
| 3.827249050140381, | |
| 3.7792670726776123, | |
| 3.678201198577881, | |
| 3.652942419052124, | |
| 3.6105289459228516, | |
| 3.561580181121826, | |
| 3.52152156829834, | |
| 3.4833824634552, | |
| 3.45076847076416, | |
| 3.4399120807647705, | |
| 3.411548376083374, | |
| 3.3855507373809814, | |
| 3.3449840545654297, | |
| 3.337197780609131, | |
| 3.3391780853271484, | |
| 3.301971673965454, | |
| 3.291358709335327, | |
| 3.2936108112335205, | |
| 3.2686355113983154, | |
| 3.2617692947387695, | |
| 3.258305311203003, | |
| 3.2488183975219727, | |
| 3.233189105987549, | |
| 3.219841718673706 | |
| ] | |
| } |