PHY-AI1 / checkpoint-1272 /trainer_state.json
AL-Sayed's picture
Upload folder using huggingface_hub
c568859 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994107248084856,
"eval_steps": 500,
"global_step": 1272,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019642506383814574,
"grad_norm": 2.737959623336792,
"learning_rate": 3.90625e-05,
"loss": 1.8852,
"step": 25
},
{
"epoch": 0.03928501276762915,
"grad_norm": 0.8758559823036194,
"learning_rate": 7.8125e-05,
"loss": 1.4536,
"step": 50
},
{
"epoch": 0.05892751915144372,
"grad_norm": 0.5361883640289307,
"learning_rate": 0.00011718750000000001,
"loss": 1.16,
"step": 75
},
{
"epoch": 0.0785700255352583,
"grad_norm": 0.5839149951934814,
"learning_rate": 0.00015625,
"loss": 1.01,
"step": 100
},
{
"epoch": 0.09821253191907288,
"grad_norm": 0.6101933121681213,
"learning_rate": 0.0001953125,
"loss": 0.9627,
"step": 125
},
{
"epoch": 0.11785503830288745,
"grad_norm": 0.6417807936668396,
"learning_rate": 0.00019615384615384615,
"loss": 0.9149,
"step": 150
},
{
"epoch": 0.13749754468670203,
"grad_norm": 0.549029529094696,
"learning_rate": 0.0001917832167832168,
"loss": 0.8981,
"step": 175
},
{
"epoch": 0.1571400510705166,
"grad_norm": 0.6425251364707947,
"learning_rate": 0.00018741258741258743,
"loss": 0.8879,
"step": 200
},
{
"epoch": 0.17678255745433116,
"grad_norm": 0.5528634786605835,
"learning_rate": 0.00018304195804195805,
"loss": 0.8691,
"step": 225
},
{
"epoch": 0.19642506383814576,
"grad_norm": 0.6609376668930054,
"learning_rate": 0.00017867132867132866,
"loss": 0.8617,
"step": 250
},
{
"epoch": 0.21606757022196033,
"grad_norm": 0.5458253622055054,
"learning_rate": 0.0001743006993006993,
"loss": 0.8572,
"step": 275
},
{
"epoch": 0.2357100766057749,
"grad_norm": 0.6044121384620667,
"learning_rate": 0.00016993006993006995,
"loss": 0.8494,
"step": 300
},
{
"epoch": 0.25535258298958946,
"grad_norm": 0.5752493739128113,
"learning_rate": 0.00016555944055944056,
"loss": 0.8381,
"step": 325
},
{
"epoch": 0.27499508937340406,
"grad_norm": 0.5365332961082458,
"learning_rate": 0.0001611888111888112,
"loss": 0.8516,
"step": 350
},
{
"epoch": 0.2946375957572186,
"grad_norm": 0.7016746997833252,
"learning_rate": 0.00015681818181818182,
"loss": 0.8359,
"step": 375
},
{
"epoch": 0.3142801021410332,
"grad_norm": 0.6072686910629272,
"learning_rate": 0.00015244755244755244,
"loss": 0.8178,
"step": 400
},
{
"epoch": 0.3339226085248478,
"grad_norm": 0.5570734739303589,
"learning_rate": 0.00014807692307692308,
"loss": 0.8127,
"step": 425
},
{
"epoch": 0.3535651149086623,
"grad_norm": 0.5914424657821655,
"learning_rate": 0.00014370629370629372,
"loss": 0.8128,
"step": 450
},
{
"epoch": 0.3732076212924769,
"grad_norm": 0.5375176072120667,
"learning_rate": 0.00013933566433566434,
"loss": 0.7828,
"step": 475
},
{
"epoch": 0.3928501276762915,
"grad_norm": 0.5489270091056824,
"learning_rate": 0.00013496503496503496,
"loss": 0.8109,
"step": 500
},
{
"epoch": 0.41249263406010606,
"grad_norm": 0.5411733984947205,
"learning_rate": 0.0001305944055944056,
"loss": 0.7862,
"step": 525
},
{
"epoch": 0.43213514044392065,
"grad_norm": 0.5604883432388306,
"learning_rate": 0.00012622377622377624,
"loss": 0.8028,
"step": 550
},
{
"epoch": 0.45177764682773525,
"grad_norm": 0.6268212199211121,
"learning_rate": 0.00012185314685314686,
"loss": 0.7969,
"step": 575
},
{
"epoch": 0.4714201532115498,
"grad_norm": 0.5777909755706787,
"learning_rate": 0.00011748251748251749,
"loss": 0.7803,
"step": 600
},
{
"epoch": 0.4910626595953644,
"grad_norm": 0.5517834424972534,
"learning_rate": 0.0001131118881118881,
"loss": 0.8052,
"step": 625
},
{
"epoch": 0.5107051659791789,
"grad_norm": 0.5613248944282532,
"learning_rate": 0.00010874125874125876,
"loss": 0.7731,
"step": 650
},
{
"epoch": 0.5303476723629935,
"grad_norm": 0.5555421113967896,
"learning_rate": 0.00010437062937062938,
"loss": 0.7959,
"step": 675
},
{
"epoch": 0.5499901787468081,
"grad_norm": 0.5249913334846497,
"learning_rate": 0.0001,
"loss": 0.8082,
"step": 700
},
{
"epoch": 0.5696326851306227,
"grad_norm": 0.578350841999054,
"learning_rate": 9.562937062937063e-05,
"loss": 0.7996,
"step": 725
},
{
"epoch": 0.5892751915144372,
"grad_norm": 0.5972084403038025,
"learning_rate": 9.125874125874126e-05,
"loss": 0.7892,
"step": 750
},
{
"epoch": 0.6089176978982518,
"grad_norm": 0.5550151467323303,
"learning_rate": 8.688811188811189e-05,
"loss": 0.7544,
"step": 775
},
{
"epoch": 0.6285602042820664,
"grad_norm": 0.5595849752426147,
"learning_rate": 8.251748251748252e-05,
"loss": 0.7917,
"step": 800
},
{
"epoch": 0.648202710665881,
"grad_norm": 0.5400447249412537,
"learning_rate": 7.814685314685315e-05,
"loss": 0.7429,
"step": 825
},
{
"epoch": 0.6678452170496956,
"grad_norm": 0.5469474196434021,
"learning_rate": 7.377622377622378e-05,
"loss": 0.7858,
"step": 850
},
{
"epoch": 0.6874877234335102,
"grad_norm": 0.5074354410171509,
"learning_rate": 6.940559440559441e-05,
"loss": 0.7378,
"step": 875
},
{
"epoch": 0.7071302298173247,
"grad_norm": 0.5348958373069763,
"learning_rate": 6.503496503496504e-05,
"loss": 0.7742,
"step": 900
},
{
"epoch": 0.7267727362011392,
"grad_norm": 0.5498335957527161,
"learning_rate": 6.066433566433567e-05,
"loss": 0.7922,
"step": 925
},
{
"epoch": 0.7464152425849538,
"grad_norm": 0.5797409415245056,
"learning_rate": 5.629370629370629e-05,
"loss": 0.7567,
"step": 950
},
{
"epoch": 0.7660577489687684,
"grad_norm": 0.5608484745025635,
"learning_rate": 5.192307692307693e-05,
"loss": 0.7533,
"step": 975
},
{
"epoch": 0.785700255352583,
"grad_norm": 0.5730789303779602,
"learning_rate": 4.755244755244756e-05,
"loss": 0.7608,
"step": 1000
},
{
"epoch": 0.8053427617363975,
"grad_norm": 0.5161120295524597,
"learning_rate": 4.318181818181819e-05,
"loss": 0.7644,
"step": 1025
},
{
"epoch": 0.8249852681202121,
"grad_norm": 0.6298760175704956,
"learning_rate": 3.8811188811188816e-05,
"loss": 0.7678,
"step": 1050
},
{
"epoch": 0.8446277745040267,
"grad_norm": 0.559695839881897,
"learning_rate": 3.4440559440559445e-05,
"loss": 0.7627,
"step": 1075
},
{
"epoch": 0.8642702808878413,
"grad_norm": 0.5945947170257568,
"learning_rate": 3.0069930069930068e-05,
"loss": 0.7767,
"step": 1100
},
{
"epoch": 0.8839127872716559,
"grad_norm": 0.5842404365539551,
"learning_rate": 2.5699300699300697e-05,
"loss": 0.7752,
"step": 1125
},
{
"epoch": 0.9035552936554705,
"grad_norm": 0.5409468412399292,
"learning_rate": 2.132867132867133e-05,
"loss": 0.7667,
"step": 1150
},
{
"epoch": 0.923197800039285,
"grad_norm": 0.6497332453727722,
"learning_rate": 1.695804195804196e-05,
"loss": 0.7817,
"step": 1175
},
{
"epoch": 0.9428403064230996,
"grad_norm": 0.5824007987976074,
"learning_rate": 1.2587412587412589e-05,
"loss": 0.7951,
"step": 1200
},
{
"epoch": 0.9624828128069142,
"grad_norm": 0.6233786940574646,
"learning_rate": 8.216783216783217e-06,
"loss": 0.7926,
"step": 1225
},
{
"epoch": 0.9821253191907288,
"grad_norm": 0.5785284042358398,
"learning_rate": 3.846153846153847e-06,
"loss": 0.7632,
"step": 1250
}
],
"logging_steps": 25,
"max_steps": 1272,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 1.2402531158196224e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}