hiroyukikaneko
/

gr00t_initial_ft3

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 556,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017985611510791366,
+      "grad_norm": 8.904035568237305,
+      "learning_rate": 3.571428571428572e-05,
+      "loss": 1.2754,
+      "step": 10
+    },
+    {
+      "epoch": 0.03597122302158273,
+      "grad_norm": 3.598201274871826,
+      "learning_rate": 7.142857142857143e-05,
+      "loss": 0.8598,
+      "step": 20
+    },
+    {
+      "epoch": 0.0539568345323741,
+      "grad_norm": 4.27315616607666,
+      "learning_rate": 9.999645980833454e-05,
+      "loss": 0.5009,
+      "step": 30
+    },
+    {
+      "epoch": 0.07194244604316546,
+      "grad_norm": 1.9983025789260864,
+      "learning_rate": 9.987260573051269e-05,
+      "loss": 0.3516,
+      "step": 40
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 3.3641581535339355,
+      "learning_rate": 9.957224306869053e-05,
+      "loss": 0.2792,
+      "step": 50
+    },
+    {
+      "epoch": 0.1079136690647482,
+      "grad_norm": 1.5614864826202393,
+      "learning_rate": 9.909643486313533e-05,
+      "loss": 0.233,
+      "step": 60
+    },
+    {
+      "epoch": 0.12589928057553956,
+      "grad_norm": 1.1084058284759521,
+      "learning_rate": 9.844686508907537e-05,
+      "loss": 0.1771,
+      "step": 70
+    },
+    {
+      "epoch": 0.14388489208633093,
+      "grad_norm": 0.6237399578094482,
+      "learning_rate": 9.762583269679303e-05,
+      "loss": 0.1681,
+      "step": 80
+    },
+    {
+      "epoch": 0.1618705035971223,
+      "grad_norm": 0.8216731548309326,
+      "learning_rate": 9.663624347520505e-05,
+      "loss": 0.1467,
+      "step": 90
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 1.565852165222168,
+      "learning_rate": 9.548159976772592e-05,
+      "loss": 0.145,
+      "step": 100
+    },
+    {
+      "epoch": 0.19784172661870503,
+      "grad_norm": 0.950880765914917,
+      "learning_rate": 9.41659880768122e-05,
+      "loss": 0.1361,
+      "step": 110
+    },
+    {
+      "epoch": 0.2158273381294964,
+      "grad_norm": 1.6979942321777344,
+      "learning_rate": 9.26940646010574e-05,
+      "loss": 0.1257,
+      "step": 120
+    },
+    {
+      "epoch": 0.23381294964028776,
+      "grad_norm": 1.192500114440918,
+      "learning_rate": 9.107103875602459e-05,
+      "loss": 0.1144,
+      "step": 130
+    },
+    {
+      "epoch": 0.2517985611510791,
+      "grad_norm": 0.7761622667312622,
+      "learning_rate": 8.930265473713938e-05,
+      "loss": 0.1192,
+      "step": 140
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.8879750370979309,
+      "learning_rate": 8.739517118989605e-05,
+      "loss": 0.1098,
+      "step": 150
+    },
+    {
+      "epoch": 0.28776978417266186,
+      "grad_norm": 1.1123480796813965,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 0.0984,
+      "step": 160
+    },
+    {
+      "epoch": 0.3057553956834532,
+      "grad_norm": 0.9935937523841858,
+      "learning_rate": 8.319037769713338e-05,
+      "loss": 0.0999,
+      "step": 170
+    },
+    {
+      "epoch": 0.3237410071942446,
+      "grad_norm": 0.8555904030799866,
+      "learning_rate": 8.090794931103026e-05,
+      "loss": 0.097,
+      "step": 180
+    },
+    {
+      "epoch": 0.34172661870503596,
+      "grad_norm": 0.4569714069366455,
+      "learning_rate": 7.85161318467482e-05,
+      "loss": 0.0909,
+      "step": 190
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.5646055936813354,
+      "learning_rate": 7.602339039865362e-05,
+      "loss": 0.0915,
+      "step": 200
+    },
+    {
+      "epoch": 0.3776978417266187,
+      "grad_norm": 0.6606709957122803,
+      "learning_rate": 7.343854725017918e-05,
+      "loss": 0.0965,
+      "step": 210
+    },
+    {
+      "epoch": 0.39568345323741005,
+      "grad_norm": 0.6112151145935059,
+      "learning_rate": 7.077075065009433e-05,
+      "loss": 0.0912,
+      "step": 220
+    },
+    {
+      "epoch": 0.4136690647482014,
+      "grad_norm": 0.7429100275039673,
+      "learning_rate": 6.80294424351225e-05,
+      "loss": 0.0843,
+      "step": 230
+    },
+    {
+      "epoch": 0.4316546762589928,
+      "grad_norm": 1.2504783868789673,
+      "learning_rate": 6.522432461349536e-05,
+      "loss": 0.0816,
+      "step": 240
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.5470308065414429,
+      "learning_rate": 6.236532502771078e-05,
+      "loss": 0.0806,
+      "step": 250
+    },
+    {
+      "epoch": 0.4676258992805755,
+      "grad_norm": 1.6046953201293945,
+      "learning_rate": 5.946256221802051e-05,
+      "loss": 0.0792,
+      "step": 260
+    },
+    {
+      "epoch": 0.4856115107913669,
+      "grad_norm": 0.6300632357597351,
+      "learning_rate": 5.6526309611002594e-05,
+      "loss": 0.0928,
+      "step": 270
+    },
+    {
+      "epoch": 0.5035971223021583,
+      "grad_norm": 0.7121009826660156,
+      "learning_rate": 5.3566959159961615e-05,
+      "loss": 0.0789,
+      "step": 280
+    },
+    {
+      "epoch": 0.5215827338129496,
+      "grad_norm": 1.3714096546173096,
+      "learning_rate": 5.059498456584072e-05,
+      "loss": 0.0845,
+      "step": 290
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 1.2124353647232056,
+      "learning_rate": 4.762090420881289e-05,
+      "loss": 0.0759,
+      "step": 300
+    },
+    {
+      "epoch": 0.5575539568345323,
+      "grad_norm": 0.8162264823913574,
+      "learning_rate": 4.4655243921744374e-05,
+      "loss": 0.0826,
+      "step": 310
+    },
+    {
+      "epoch": 0.5755395683453237,
+      "grad_norm": 0.7335633039474487,
+      "learning_rate": 4.17084997372813e-05,
+      "loss": 0.0819,
+      "step": 320
+    },
+    {
+      "epoch": 0.5935251798561151,
+      "grad_norm": 1.067676067352295,
+      "learning_rate": 3.879110074040514e-05,
+      "loss": 0.0793,
+      "step": 330
+    },
+    {
+      "epoch": 0.6115107913669064,
+      "grad_norm": 0.768426239490509,
+      "learning_rate": 3.591337215792852e-05,
+      "loss": 0.072,
+      "step": 340
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.6882124543190002,
+      "learning_rate": 3.3085498815564645e-05,
+      "loss": 0.0732,
+      "step": 350
+    },
+    {
+      "epoch": 0.6474820143884892,
+      "grad_norm": 0.7045790553092957,
+      "learning_rate": 3.0317489091902935e-05,
+      "loss": 0.0751,
+      "step": 360
+    },
+    {
+      "epoch": 0.6654676258992805,
+      "grad_norm": 0.6171532869338989,
+      "learning_rate": 2.7619139496864378e-05,
+      "loss": 0.0723,
+      "step": 370
+    },
+    {
+      "epoch": 0.6834532374100719,
+      "grad_norm": 1.0635524988174438,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.0727,
+      "step": 380
+    },
+    {
+      "epoch": 0.7014388489208633,
+      "grad_norm": 1.7545868158340454,
+      "learning_rate": 2.246934023134257e-05,
+      "loss": 0.0715,
+      "step": 390
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.6268433332443237,
+      "learning_rate": 2.0036116674432654e-05,
+      "loss": 0.0704,
+      "step": 400
+    },
+    {
+      "epoch": 0.737410071942446,
+      "grad_norm": 0.6153240203857422,
+      "learning_rate": 1.7708940967629567e-05,
+      "loss": 0.0776,
+      "step": 410
+    },
+    {
+      "epoch": 0.7553956834532374,
+      "grad_norm": 0.6164385080337524,
+      "learning_rate": 1.549604942589441e-05,
+      "loss": 0.0681,
+      "step": 420
+    },
+    {
+      "epoch": 0.7733812949640287,
+      "grad_norm": 0.780419647693634,
+      "learning_rate": 1.340527389091374e-05,
+      "loss": 0.0749,
+      "step": 430
+    },
+    {
+      "epoch": 0.7913669064748201,
+      "grad_norm": 0.8401429653167725,
+      "learning_rate": 1.144401401273062e-05,
+      "loss": 0.0697,
+      "step": 440
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.716050386428833,
+      "learning_rate": 9.619211060983675e-06,
+      "loss": 0.0644,
+      "step": 450
+    },
+    {
+      "epoch": 0.8273381294964028,
+      "grad_norm": 0.7171333432197571,
+      "learning_rate": 7.937323358440935e-06,
+      "loss": 0.0689,
+      "step": 460
+    },
+    {
+      "epoch": 0.8453237410071942,
+      "grad_norm": 0.8431842923164368,
+      "learning_rate": 6.40430342377426e-06,
+      "loss": 0.0754,
+      "step": 470
+    },
+    {
+      "epoch": 0.8633093525179856,
+      "grad_norm": 0.47953060269355774,
+      "learning_rate": 5.025576904469842e-06,
+      "loss": 0.0747,
+      "step": 480
+    },
+    {
+      "epoch": 0.8812949640287769,
+      "grad_norm": 0.793252170085907,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.0694,
+      "step": 490
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.5008072853088379,
+      "learning_rate": 2.7499590642665774e-06,
+      "loss": 0.0637,
+      "step": 500
+    },
+    {
+      "epoch": 0.9172661870503597,
+      "grad_norm": 0.7334234118461609,
+      "learning_rate": 1.8611215852879005e-06,
+      "loss": 0.0753,
+      "step": 510
+    },
+    {
+      "epoch": 0.935251798561151,
+      "grad_norm": 0.64471435546875,
+      "learning_rate": 1.1426567014420297e-06,
+      "loss": 0.0632,
+      "step": 520
+    },
+    {
+      "epoch": 0.9532374100719424,
+      "grad_norm": 1.301717758178711,
+      "learning_rate": 5.971071958349228e-07,
+      "loss": 0.0709,
+      "step": 530
+    },
+    {
+      "epoch": 0.9712230215827338,
+      "grad_norm": 0.5564674139022827,
+      "learning_rate": 2.2640387134577058e-07,
+      "loss": 0.0647,
+      "step": 540
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.42517802119255066,
+      "learning_rate": 3.185871715041255e-08,
+      "loss": 0.0666,
+      "step": 550
+    },
+    {
+      "epoch": 1.0,
+      "step": 556,
+      "total_flos": 3.809504209392605e+16,
+      "train_loss": 0.14214680440348687,
+      "train_runtime": 180.0894,
+      "train_samples_per_second": 98.768,
+      "train_steps_per_second": 3.087
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 556,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 20000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.809504209392605e+16,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}