VInficheck / training_log.json
sunflowerbiii's picture
Upload folder using huggingface_hub
d4d0eb8 verified
Raw
History Blame Contribute Delete
8.95 kB
[
{
"loss": 0.34830059051513673,
"grad_norm": 0.2803570032119751,
"learning_rate": 0.00013714285714285716,
"num_tokens": 640109.0,
"mean_token_accuracy": 0.9155976337194442,
"epoch": 0.10746910263299302,
"step": 25
},
{
"loss": 0.14681048393249513,
"grad_norm": 0.23165035247802734,
"learning_rate": 0.000199780703920947,
"num_tokens": 1262877.0,
"mean_token_accuracy": 0.9537364545464516,
"epoch": 0.21493820526598603,
"step": 50
},
{
"loss": 0.1139146327972412,
"grad_norm": 0.16550014913082123,
"learning_rate": 0.00019830242014201796,
"num_tokens": 1905162.0,
"mean_token_accuracy": 0.9621101367473602,
"epoch": 0.32240730789897903,
"step": 75
},
{
"loss": 0.11038614273071289,
"grad_norm": 0.18707048892974854,
"learning_rate": 0.0001954504062771555,
"num_tokens": 2537797.0,
"mean_token_accuracy": 0.9628356519341469,
"epoch": 0.42987641053197206,
"step": 100
},
{
"eval_loss": 0.1351652890443802,
"eval_runtime": 69.5984,
"eval_samples_per_second": 2.845,
"eval_steps_per_second": 1.422,
"eval_num_tokens": 2954175.0,
"eval_mean_token_accuracy": 0.9607259185627254,
"epoch": 0.4986566362170876,
"step": 116
},
{
"loss": 0.11258039474487305,
"grad_norm": 0.20463427901268005,
"learning_rate": 0.00019126451787870527,
"num_tokens": 3175008.0,
"mean_token_accuracy": 0.9623757800459862,
"epoch": 0.537345513164965,
"step": 125
},
{
"loss": 0.11596426963806153,
"grad_norm": 0.1986123025417328,
"learning_rate": 0.00018580325076824513,
"num_tokens": 3799800.0,
"mean_token_accuracy": 0.961990795135498,
"epoch": 0.6448146157979581,
"step": 150
},
{
"loss": 0.0997089672088623,
"grad_norm": 0.1511112004518509,
"learning_rate": 0.0001791429235849919,
"num_tokens": 4438381.0,
"mean_token_accuracy": 0.9641576319932937,
"epoch": 0.7522837184309511,
"step": 175
},
{
"loss": 0.09359555244445801,
"grad_norm": 0.13941654562950134,
"learning_rate": 0.0001713766112687139,
"num_tokens": 5075973.0,
"mean_token_accuracy": 0.9686441496014595,
"epoch": 0.8597528210639441,
"step": 200
},
{
"loss": 0.11163744926452637,
"grad_norm": 0.1599922776222229,
"learning_rate": 0.0001626128443812245,
"num_tokens": 5723214.0,
"mean_token_accuracy": 0.9624734339118004,
"epoch": 0.9672219236969372,
"step": 225
},
{
"eval_loss": 0.12350355833768845,
"eval_runtime": 69.5143,
"eval_samples_per_second": 2.848,
"eval_steps_per_second": 1.424,
"eval_num_tokens": 5899964.0,
"eval_mean_token_accuracy": 0.9637513997578862,
"epoch": 0.9973132724341752,
"step": 232
},
{
"loss": 0.07306031227111816,
"grad_norm": 0.1523396372795105,
"learning_rate": 0.00015297409244282694,
"num_tokens": 6364544.0,
"mean_token_accuracy": 0.9745264253035415,
"epoch": 1.0730789897904351,
"step": 250
},
{
"loss": 0.06787878513336182,
"grad_norm": 0.13754014670848846,
"learning_rate": 0.00014259505247837074,
"num_tokens": 6996524.0,
"mean_token_accuracy": 0.9755120638012886,
"epoch": 1.1805480924234282,
"step": 275
},
{
"loss": 0.08596912384033203,
"grad_norm": 0.17458459734916687,
"learning_rate": 0.0001316207666896824,
"num_tokens": 7621299.0,
"mean_token_accuracy": 0.9696658563613891,
"epoch": 1.2880171950564212,
"step": 300
},
{
"loss": 0.06656608581542969,
"grad_norm": 0.1313866376876831,
"learning_rate": 0.00012020459555901427,
"num_tokens": 8267290.0,
"mean_token_accuracy": 0.9760522067546844,
"epoch": 1.3954862976894142,
"step": 325
},
{
"eval_loss": 0.1249435767531395,
"eval_runtime": 69.7547,
"eval_samples_per_second": 2.839,
"eval_steps_per_second": 1.419,
"eval_num_tokens": 8834417.0,
"eval_mean_token_accuracy": 0.963239210422593,
"epoch": 1.4943578721117678,
"step": 348
},
{
"loss": 0.07149289608001709,
"grad_norm": 0.15927733480930328,
"learning_rate": 0.00010850607470843656,
"num_tokens": 8883001.0,
"mean_token_accuracy": 0.9746167114377022,
"epoch": 1.5029554003224073,
"step": 350
},
{
"loss": 0.0666344976425171,
"grad_norm": 0.14084048569202423,
"learning_rate": 9.668868546455486e-05,
"num_tokens": 9525128.0,
"mean_token_accuracy": 0.975580106973648,
"epoch": 1.6104245029554003,
"step": 375
},
{
"loss": 0.07050958156585693,
"grad_norm": 0.1653551161289215,
"learning_rate": 8.491757028386263e-05,
"num_tokens": 10162043.0,
"mean_token_accuracy": 0.9742929524183274,
"epoch": 1.7178936055883933,
"step": 400
},
{
"loss": 0.054537668228149414,
"grad_norm": 0.17465578019618988,
"learning_rate": 7.33572249645848e-05,
"num_tokens": 10802637.0,
"mean_token_accuracy": 0.9797186449170112,
"epoch": 1.8253627082213864,
"step": 425
},
{
"loss": 0.06608867645263672,
"grad_norm": 0.15113526582717896,
"learning_rate": 6.216919989526651e-05,
"num_tokens": 11430608.0,
"mean_token_accuracy": 0.9761568233370781,
"epoch": 1.9328318108543794,
"step": 450
},
{
"eval_loss": 0.11843688040971756,
"eval_runtime": 69.9946,
"eval_samples_per_second": 2.829,
"eval_steps_per_second": 1.414,
"eval_num_tokens": 11786762.0,
"eval_mean_token_accuracy": 0.965543883015411,
"epoch": 1.9930145083288555,
"step": 464
},
{
"loss": 0.05509011745452881,
"grad_norm": 0.09923101216554642,
"learning_rate": 5.1509842464076776e-05,
"num_tokens": 12054154.0,
"mean_token_accuracy": 0.9804502256630641,
"epoch": 2.0386888769478775,
"step": 475
},
{
"loss": 0.031655769348144534,
"grad_norm": 0.13326086103916168,
"learning_rate": 4.152811217759529e-05,
"num_tokens": 12697456.0,
"mean_token_accuracy": 0.9891643562912941,
"epoch": 2.1461579795808703,
"step": 500
},
{
"loss": 0.032743215560913086,
"grad_norm": 0.12852540612220764,
"learning_rate": 3.2363499021769526e-05,
"num_tokens": 13329332.0,
"mean_token_accuracy": 0.9882625249028206,
"epoch": 2.2536270822138635,
"step": 525
},
{
"loss": 0.029418470859527587,
"grad_norm": 0.12474379688501358,
"learning_rate": 2.4144074154968832e-05,
"num_tokens": 13972889.0,
"mean_token_accuracy": 0.9891040176153183,
"epoch": 2.3610961848468563,
"step": 550
},
{
"loss": 0.030236964225769044,
"grad_norm": 0.12814833223819733,
"learning_rate": 1.6984700173783175e-05,
"num_tokens": 14606119.0,
"mean_token_accuracy": 0.9886843663454056,
"epoch": 2.4685652874798496,
"step": 575
},
{
"eval_loss": 0.1397247463464737,
"eval_runtime": 69.3145,
"eval_samples_per_second": 2.857,
"eval_steps_per_second": 1.428,
"eval_num_tokens": 14728454.0,
"eval_mean_token_accuracy": 0.9646775379325404,
"epoch": 2.4900591080064483,
"step": 580
},
{
"loss": 0.030723834037780763,
"grad_norm": 0.12963370978832245,
"learning_rate": 1.0985425962260343e-05,
"num_tokens": 15237584.0,
"mean_token_accuracy": 0.9888986241817475,
"epoch": 2.5760343901128424,
"step": 600
},
{
"loss": 0.030379328727722168,
"grad_norm": 0.16792573034763336,
"learning_rate": 6.230088555808278e-06,
"num_tokens": 15876203.0,
"mean_token_accuracy": 0.9890140387415886,
"epoch": 2.6835034927458357,
"step": 625
},
{
"loss": 0.02967998743057251,
"grad_norm": 0.19673478603363037,
"learning_rate": 2.7851415580571692e-06,
"num_tokens": 16513501.0,
"mean_token_accuracy": 0.9892704981565476,
"epoch": 2.7909725953788285,
"step": 650
},
{
"loss": 0.030462250709533692,
"grad_norm": 0.11195345222949982,
"learning_rate": 6.987264830045526e-07,
"num_tokens": 17145749.0,
"mean_token_accuracy": 0.9888393118977546,
"epoch": 2.8984416980118217,
"step": 675
},
{
"eval_loss": 0.13951744139194489,
"eval_runtime": 69.3391,
"eval_samples_per_second": 2.856,
"eval_steps_per_second": 1.428,
"eval_num_tokens": 17672878.0,
"eval_mean_token_accuracy": 0.9649288859030213,
"epoch": 2.9887157442235357,
"step": 696
},
{
"eval_loss": 0.13946650922298431,
"eval_runtime": 69.3693,
"eval_samples_per_second": 2.854,
"eval_steps_per_second": 1.427,
"eval_num_tokens": 17736411.0,
"eval_mean_token_accuracy": 0.9649666162452313,
"epoch": 3.0,
"step": 699
},
{
"train_runtime": 14009.2214,
"train_samples_per_second": 0.797,
"train_steps_per_second": 0.05,
"total_flos": 8.799123393273508e+17,
"train_loss": 0.07888307489550676,
"epoch": 3.0,
"step": 699
}
]