Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

config.json +64 -0
experiment_cfg/metadata.json +355 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +734 -0

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+    "new_embodiment": {
+        "statistics": {
+            "state": {
+                "left_arm": {
+                    "max": [
+                        2.187028646469116,
+                        -99.21630096435547,
+                        98.77407836914062,
+                        74.72486114501953,
+                        46.27594757080078
+                    ],
+                    "min": [
+                        -12.066365242004395,
+                        -99.45140838623047,
+                        98.33625030517578,
+                        73.35863494873047,
+                        -52.869354248046875
+                    ],
+                    "mean": [
+                        -6.931744575500488,
+                        -99.3890380859375,
+                        98.62979888916016,
+                        74.25790405273438,
+                        1.3063249588012695
+                    ],
+                    "std": [
+                        4.007203578948975,
+                        0.0767870619893074,
+                        0.12424216419458389,
+                        0.4313095211982727,
+                        48.00337600708008
+                    ],
+                    "q01": [
+                        -12.066365242004395,
+                        -99.45140838623047,
+                        98.51138305664062,
+                        73.58633422851562,
+                        -52.869354248046875
+                    ],
+                    "q99": [
+                        0.15082956850528717,
+                        -99.21630096435547,
+                        98.77407836914062,
+                        74.72486114501953,
+                        45.88522720336914
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        17.847938537597656
+                    ],
+                    "min": [
+                        0.4510309398174286
+                    ],
+                    "mean": [
+                        10.498392105102539
+                    ],
+                    "std": [
+                        7.273200035095215
+                    ],
+                    "q01": [
+                        0.4510309398174286
+                    ],
+                    "q99": [
+                        17.847938537597656
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        31.229597091674805,
+                        71.30260467529297,
+                        98.0138168334961,
+                        75.76736450195312,
+                        41.3212776184082
+                    ],
+                    "min": [
+                        -27.022125244140625,
+                        -97.51502990722656,
+                        -31.865285873413086,
+                        -96.60742950439453,
+                        -65.18462371826172
+                    ],
+                    "mean": [
+                        6.173414707183838,
+                        8.965652465820312,
+                        43.04948425292969,
+                        -32.647727966308594,
+                        -44.51104736328125
+                    ],
+                    "std": [
+                        17.221529006958008,
+                        47.10391616821289,
+                        32.55530548095703,
+                        45.9875373840332,
+                        20.5490779876709
+                    ],
+                    "q01": [
+                        -24.338048934936523,
+                        -97.35470581054688,
+                        -17.356649856567383,
+                        -96.44587707519531,
+                        -59.05551528930664
+                    ],
+                    "q99": [
+                        29.63365936279297,
+                        62.324649810791016,
+                        97.75475311279297,
+                        73.66720581054688,
+                        29.715649795532308
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        71.65532684326172
+                    ],
+                    "min": [
+                        2.324263095855713
+                    ],
+                    "mean": [
+                        23.636430740356445
+                    ],
+                    "std": [
+                        15.703285217285156
+                    ],
+                    "q01": [
+                        2.4376416206359863
+                    ],
+                    "q99": [
+                        63.20068164825567
+                    ]
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "max": [
+                        2.2628026008605957,
+                        -100.0,
+                        100.0,
+                        76.97608184814453,
+                        46.617828369140625
+                    ],
+                    "min": [
+                        -12.107979774475098,
+                        -100.0,
+                        99.9084243774414,
+                        75.35468292236328,
+                        -52.625152587890625
+                    ],
+                    "mean": [
+                        -6.822534084320068,
+                        -100.0,
+                        99.92558288574219,
+                        76.36930084228516,
+                        1.283119797706604
+                    ],
+                    "std": [
+                        4.120677471160889,
+                        0.0,
+                        0.0450742170214653,
+                        0.6158173680305481,
+                        47.87895584106445
+                    ],
+                    "q01": [
+                        -12.107979774475098,
+                        -100.0,
+                        99.9084243774414,
+                        75.35468292236328,
+                        -52.625152587890625
+                    ],
+                    "q99": [
+                        0.43668121099472046,
+                        -100.0,
+                        100.0,
+                        76.97608184814453,
+                        45.738704681396484
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        18.17367935180664
+                    ],
+                    "min": [
+                        0.17905102670192719
+                    ],
+                    "mean": [
+                        10.681233406066895
+                    ],
+                    "std": [
+                        7.212050914764404
+                    ],
+                    "q01": [
+                        0.2685765326023102
+                    ],
+                    "q99": [
+                        17.99462890625
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        31.447315216064453,
+                        71.15780639648438,
+                        99.20035552978516,
+                        77.47068786621094,
+                        42.69340896606445
+                    ],
+                    "min": [
+                        -27.511274337768555,
+                        -99.25834655761719,
+                        -32.56330490112305,
+                        -98.40870666503906,
+                        -66.81427764892578
+                    ],
+                    "mean": [
+                        6.259029865264893,
+                        7.35891580581665,
+                        42.082435607910156,
+                        -33.01646423339844,
+                        -44.4665641784668
+                    ],
+                    "std": [
+                        17.27552604675293,
+                        47.02023696899414,
+                        33.24121856689453,
+                        46.35660171508789,
+                        20.584171295166016
+                    ],
+                    "q01": [
+                        -24.231243133544922,
+                        -97.77503204345703,
+                        -18.96934700012207,
+                        -97.82244873046875,
+                        -59.10393142700195
+                    ],
+                    "q99": [
+                        30.052480850219858,
+                        62.34033966064453,
+                        98.48956298828125,
+                        74.70687103271484,
+                        30.398540496826172
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        71.90612030029297
+                    ],
+                    "min": [
+                        0.4267425239086151
+                    ],
+                    "mean": [
+                        20.33241081237793
+                    ],
+                    "std": [
+                        17.951961517333984
+                    ],
+                    "q01": [
+                        1.8492176532745361
+                    ],
+                    "q99": [
+                        64.36272933960029
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "right": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                },
+                "top_rgb": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                }
+            },
+            "state": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "new_embodiment"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2db511ed728aaf36eecfd28fa48f0d77139bf02c6b27d1f169399b53d84ebeba
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f04a59b96816a7a820663a91a327c11bb5a3b1ab34a2d059d64325e693869ea
+size 2586705312

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2cbe99dd138b6621aa07c959f498cfba8ab74b1d1ed7d8344b9e45016b4819
+size 8550720062

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0291d1666eaed8b00f4187bc1c1b5e7de8ae43108df5b7b87ac4e0e74b66318
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0a428aa401f171b5e4e33fd1981cc4dd012b028cd7da79c75568021691b3e83
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,734 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.4945054945054945,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.054945054945054944,
+      "grad_norm": 3.1129469871520996,
+      "learning_rate": 1.3846153846153847e-05,
+      "loss": 1.2218,
+      "step": 10
+    },
+    {
+      "epoch": 0.10989010989010989,
+      "grad_norm": 1.2952975034713745,
+      "learning_rate": 2.9230769230769234e-05,
+      "loss": 0.4319,
+      "step": 20
+    },
+    {
+      "epoch": 0.16483516483516483,
+      "grad_norm": 2.0939278602600098,
+      "learning_rate": 4.461538461538462e-05,
+      "loss": 0.2786,
+      "step": 30
+    },
+    {
+      "epoch": 0.21978021978021978,
+      "grad_norm": 1.0714941024780273,
+      "learning_rate": 6e-05,
+      "loss": 0.2013,
+      "step": 40
+    },
+    {
+      "epoch": 0.27472527472527475,
+      "grad_norm": 1.486087441444397,
+      "learning_rate": 7.538461538461539e-05,
+      "loss": 0.1555,
+      "step": 50
+    },
+    {
+      "epoch": 0.32967032967032966,
+      "grad_norm": 0.8458685874938965,
+      "learning_rate": 9.076923076923078e-05,
+      "loss": 0.1299,
+      "step": 60
+    },
+    {
+      "epoch": 0.38461538461538464,
+      "grad_norm": 0.5655759572982788,
+      "learning_rate": 9.999741165552688e-05,
+      "loss": 0.1234,
+      "step": 70
+    },
+    {
+      "epoch": 0.43956043956043955,
+      "grad_norm": 0.7847583293914795,
+      "learning_rate": 9.996829585771195e-05,
+      "loss": 0.1084,
+      "step": 80
+    },
+    {
+      "epoch": 0.4945054945054945,
+      "grad_norm": 0.5260426998138428,
+      "learning_rate": 9.990684773387432e-05,
+      "loss": 0.0915,
+      "step": 90
+    },
+    {
+      "epoch": 0.5494505494505495,
+      "grad_norm": 0.6431106925010681,
+      "learning_rate": 9.981310704444089e-05,
+      "loss": 0.0908,
+      "step": 100
+    },
+    {
+      "epoch": 0.6043956043956044,
+      "grad_norm": 0.4315970838069916,
+      "learning_rate": 9.968713444496294e-05,
+      "loss": 0.0793,
+      "step": 110
+    },
+    {
+      "epoch": 0.6593406593406593,
+      "grad_norm": 0.4451838731765747,
+      "learning_rate": 9.952901144686857e-05,
+      "loss": 0.0728,
+      "step": 120
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.44112035632133484,
+      "learning_rate": 9.933884036472022e-05,
+      "loss": 0.0668,
+      "step": 130
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 0.31693196296691895,
+      "learning_rate": 9.911674425001134e-05,
+      "loss": 0.0648,
+      "step": 140
+    },
+    {
+      "epoch": 0.8241758241758241,
+      "grad_norm": 0.5056548118591309,
+      "learning_rate": 9.886286681154495e-05,
+      "loss": 0.0644,
+      "step": 150
+    },
+    {
+      "epoch": 0.8791208791208791,
+      "grad_norm": 0.370319664478302,
+      "learning_rate": 9.857737232244604e-05,
+      "loss": 0.0578,
+      "step": 160
+    },
+    {
+      "epoch": 0.9340659340659341,
+      "grad_norm": 0.6004385352134705,
+      "learning_rate": 9.826044551386744e-05,
+      "loss": 0.0561,
+      "step": 170
+    },
+    {
+      "epoch": 0.989010989010989,
+      "grad_norm": 0.6260236501693726,
+      "learning_rate": 9.791229145545831e-05,
+      "loss": 0.0511,
+      "step": 180
+    },
+    {
+      "epoch": 1.043956043956044,
+      "grad_norm": 0.3883954882621765,
+      "learning_rate": 9.753313542267241e-05,
+      "loss": 0.0528,
+      "step": 190
+    },
+    {
+      "epoch": 1.098901098901099,
+      "grad_norm": 0.4612903594970703,
+      "learning_rate": 9.712322275100208e-05,
+      "loss": 0.0539,
+      "step": 200
+    },
+    {
+      "epoch": 1.1538461538461537,
+      "grad_norm": 0.41645845770835876,
+      "learning_rate": 9.668281867723223e-05,
+      "loss": 0.0467,
+      "step": 210
+    },
+    {
+      "epoch": 1.2087912087912087,
+      "grad_norm": 0.45377564430236816,
+      "learning_rate": 9.621220816781708e-05,
+      "loss": 0.0489,
+      "step": 220
+    },
+    {
+      "epoch": 1.2637362637362637,
+      "grad_norm": 0.33625757694244385,
+      "learning_rate": 9.571169573449075e-05,
+      "loss": 0.0467,
+      "step": 230
+    },
+    {
+      "epoch": 1.3186813186813187,
+      "grad_norm": 0.2890590727329254,
+      "learning_rate": 9.51816052372307e-05,
+      "loss": 0.0392,
+      "step": 240
+    },
+    {
+      "epoch": 1.3736263736263736,
+      "grad_norm": 0.5078686475753784,
+      "learning_rate": 9.46222796747021e-05,
+      "loss": 0.0404,
+      "step": 250
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 0.3641613721847534,
+      "learning_rate": 9.403408096231812e-05,
+      "loss": 0.0407,
+      "step": 260
+    },
+    {
+      "epoch": 1.4835164835164836,
+      "grad_norm": 0.30617615580558777,
+      "learning_rate": 9.341738969806011e-05,
+      "loss": 0.0393,
+      "step": 270
+    },
+    {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 0.3459275960922241,
+      "learning_rate": 9.277260491620907e-05,
+      "loss": 0.0401,
+      "step": 280
+    },
+    {
+      "epoch": 1.5934065934065935,
+      "grad_norm": 0.2681010663509369,
+      "learning_rate": 9.210014382914784e-05,
+      "loss": 0.0363,
+      "step": 290
+    },
+    {
+      "epoch": 1.6483516483516483,
+      "grad_norm": 0.35169124603271484,
+      "learning_rate": 9.140044155740101e-05,
+      "loss": 0.0341,
+      "step": 300
+    },
+    {
+      "epoch": 1.7032967032967035,
+      "grad_norm": 0.28844815492630005,
+      "learning_rate": 9.067395084808709e-05,
+      "loss": 0.0353,
+      "step": 310
+    },
+    {
+      "epoch": 1.7582417582417582,
+      "grad_norm": 0.3767380714416504,
+      "learning_rate": 8.992114178196558e-05,
+      "loss": 0.0339,
+      "step": 320
+    },
+    {
+      "epoch": 1.8131868131868132,
+      "grad_norm": 0.37167835235595703,
+      "learning_rate": 8.914250146926788e-05,
+      "loss": 0.032,
+      "step": 330
+    },
+    {
+      "epoch": 1.8681318681318682,
+      "grad_norm": 0.2863631844520569,
+      "learning_rate": 8.833853373450936e-05,
+      "loss": 0.0339,
+      "step": 340
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 0.25606516003608704,
+      "learning_rate": 8.750975879048629e-05,
+      "loss": 0.0302,
+      "step": 350
+    },
+    {
+      "epoch": 1.978021978021978,
+      "grad_norm": 0.28373125195503235,
+      "learning_rate": 8.665671290166853e-05,
+      "loss": 0.031,
+      "step": 360
+    },
+    {
+      "epoch": 2.032967032967033,
+      "grad_norm": 0.41454407572746277,
+      "learning_rate": 8.577994803720606e-05,
+      "loss": 0.0426,
+      "step": 370
+    },
+    {
+      "epoch": 2.087912087912088,
+      "grad_norm": 0.4597318470478058,
+      "learning_rate": 8.488003151377351e-05,
+      "loss": 0.0366,
+      "step": 380
+    },
+    {
+      "epoch": 2.142857142857143,
+      "grad_norm": 0.3471977412700653,
+      "learning_rate": 8.395754562848408e-05,
+      "loss": 0.0358,
+      "step": 390
+    },
+    {
+      "epoch": 2.197802197802198,
+      "grad_norm": 0.37275147438049316,
+      "learning_rate": 8.301308728211017e-05,
+      "loss": 0.0321,
+      "step": 400
+    },
+    {
+      "epoch": 2.2527472527472527,
+      "grad_norm": 0.29526907205581665,
+      "learning_rate": 8.20472675928548e-05,
+      "loss": 0.0318,
+      "step": 410
+    },
+    {
+      "epoch": 2.3076923076923075,
+      "grad_norm": 0.31538689136505127,
+      "learning_rate": 8.10607115009232e-05,
+      "loss": 0.0312,
+      "step": 420
+    },
+    {
+      "epoch": 2.3626373626373627,
+      "grad_norm": 0.3267456889152527,
+      "learning_rate": 8.005405736415126e-05,
+      "loss": 0.0292,
+      "step": 430
+    },
+    {
+      "epoch": 2.4175824175824174,
+      "grad_norm": 0.23045669496059418,
+      "learning_rate": 7.902795654495154e-05,
+      "loss": 0.0268,
+      "step": 440
+    },
+    {
+      "epoch": 2.4725274725274726,
+      "grad_norm": 0.28116142749786377,
+      "learning_rate": 7.798307298884487e-05,
+      "loss": 0.0268,
+      "step": 450
+    },
+    {
+      "epoch": 2.5274725274725274,
+      "grad_norm": 0.3379570245742798,
+      "learning_rate": 7.692008279484989e-05,
+      "loss": 0.0271,
+      "step": 460
+    },
+    {
+      "epoch": 2.5824175824175826,
+      "grad_norm": 0.2906380295753479,
+      "learning_rate": 7.583967377800853e-05,
+      "loss": 0.0259,
+      "step": 470
+    },
+    {
+      "epoch": 2.6373626373626373,
+      "grad_norm": 0.3166893422603607,
+      "learning_rate": 7.474254502433064e-05,
+      "loss": 0.028,
+      "step": 480
+    },
+    {
+      "epoch": 2.6923076923076925,
+      "grad_norm": 0.3112421929836273,
+      "learning_rate": 7.362940643844564e-05,
+      "loss": 0.0284,
+      "step": 490
+    },
+    {
+      "epoch": 2.7472527472527473,
+      "grad_norm": 0.3011311888694763,
+      "learning_rate": 7.250097828425384e-05,
+      "loss": 0.0276,
+      "step": 500
+    },
+    {
+      "epoch": 2.802197802197802,
+      "grad_norm": 0.21055926382541656,
+      "learning_rate": 7.135799071887486e-05,
+      "loss": 0.0238,
+      "step": 510
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.3269054591655731,
+      "learning_rate": 7.020118332019438e-05,
+      "loss": 0.0233,
+      "step": 520
+    },
+    {
+      "epoch": 2.912087912087912,
+      "grad_norm": 0.2849093973636627,
+      "learning_rate": 6.903130460831539e-05,
+      "loss": 0.0249,
+      "step": 530
+    },
+    {
+      "epoch": 2.967032967032967,
+      "grad_norm": 0.2573489844799042,
+      "learning_rate": 6.784911156122305e-05,
+      "loss": 0.0241,
+      "step": 540
+    },
+    {
+      "epoch": 3.021978021978022,
+      "grad_norm": 0.3901364505290985,
+      "learning_rate": 6.665536912497687e-05,
+      "loss": 0.026,
+      "step": 550
+    },
+    {
+      "epoch": 3.076923076923077,
+      "grad_norm": 0.41034600138664246,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 0.0287,
+      "step": 560
+    },
+    {
+      "epoch": 3.131868131868132,
+      "grad_norm": 0.24250033497810364,
+      "learning_rate": 6.42363327350168e-05,
+      "loss": 0.0282,
+      "step": 570
+    },
+    {
+      "epoch": 3.186813186813187,
+      "grad_norm": 0.3414716124534607,
+      "learning_rate": 6.301260403526801e-05,
+      "loss": 0.028,
+      "step": 580
+    },
+    {
+      "epoch": 3.241758241758242,
+      "grad_norm": 0.22624026238918304,
+      "learning_rate": 6.178045544148739e-05,
+      "loss": 0.0244,
+      "step": 590
+    },
+    {
+      "epoch": 3.2967032967032965,
+      "grad_norm": 0.21665652096271515,
+      "learning_rate": 6.054068422381112e-05,
+      "loss": 0.0242,
+      "step": 600
+    },
+    {
+      "epoch": 3.3516483516483517,
+      "grad_norm": 0.19114629924297333,
+      "learning_rate": 5.929409258464606e-05,
+      "loss": 0.0237,
+      "step": 610
+    },
+    {
+      "epoch": 3.4065934065934065,
+      "grad_norm": 0.21987110376358032,
+      "learning_rate": 5.8041487139599216e-05,
+      "loss": 0.025,
+      "step": 620
+    },
+    {
+      "epoch": 3.4615384615384617,
+      "grad_norm": 0.29730021953582764,
+      "learning_rate": 5.678367839555163e-05,
+      "loss": 0.0226,
+      "step": 630
+    },
+    {
+      "epoch": 3.5164835164835164,
+      "grad_norm": 0.2368212342262268,
+      "learning_rate": 5.5521480226214405e-05,
+      "loss": 0.0241,
+      "step": 640
+    },
+    {
+      "epoch": 3.571428571428571,
+      "grad_norm": 0.23423047363758087,
+      "learning_rate": 5.425570934550617e-05,
+      "loss": 0.0207,
+      "step": 650
+    },
+    {
+      "epoch": 3.6263736263736264,
+      "grad_norm": 0.18969936668872833,
+      "learning_rate": 5.2987184779092715e-05,
+      "loss": 0.0228,
+      "step": 660
+    },
+    {
+      "epoch": 3.6813186813186816,
+      "grad_norm": 0.2447623908519745,
+      "learning_rate": 5.171672733443098e-05,
+      "loss": 0.019,
+      "step": 670
+    },
+    {
+      "epoch": 3.7362637362637363,
+      "grad_norm": 0.16867715120315552,
+      "learning_rate": 5.044515906965988e-05,
+      "loss": 0.0193,
+      "step": 680
+    },
+    {
+      "epoch": 3.791208791208791,
+      "grad_norm": 0.15809307992458344,
+      "learning_rate": 4.917330276168208e-05,
+      "loss": 0.0218,
+      "step": 690
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 0.2224949151277542,
+      "learning_rate": 4.790198137378056e-05,
+      "loss": 0.0185,
+      "step": 700
+    },
+    {
+      "epoch": 3.901098901098901,
+      "grad_norm": 0.23750196397304535,
+      "learning_rate": 4.663201752311461e-05,
+      "loss": 0.0194,
+      "step": 710
+    },
+    {
+      "epoch": 3.956043956043956,
+      "grad_norm": 0.16117140650749207,
+      "learning_rate": 4.536423294843978e-05,
+      "loss": 0.0185,
+      "step": 720
+    },
+    {
+      "epoch": 4.010989010989011,
+      "grad_norm": 0.21286968886852264,
+      "learning_rate": 4.409944797839635e-05,
+      "loss": 0.0186,
+      "step": 730
+    },
+    {
+      "epoch": 4.065934065934066,
+      "grad_norm": 0.2058119922876358,
+      "learning_rate": 4.283848100070988e-05,
+      "loss": 0.0198,
+      "step": 740
+    },
+    {
+      "epoch": 4.1208791208791204,
+      "grad_norm": 0.18852297961711884,
+      "learning_rate": 4.1582147932648074e-05,
+      "loss": 0.0236,
+      "step": 750
+    },
+    {
+      "epoch": 4.175824175824176,
+      "grad_norm": 0.2005940079689026,
+      "learning_rate": 4.033126169307584e-05,
+      "loss": 0.0193,
+      "step": 760
+    },
+    {
+      "epoch": 4.230769230769231,
+      "grad_norm": 0.26670485734939575,
+      "learning_rate": 3.9086631676450586e-05,
+      "loss": 0.0197,
+      "step": 770
+    },
+    {
+      "epoch": 4.285714285714286,
+      "grad_norm": 0.14271725714206696,
+      "learning_rate": 3.784906322909813e-05,
+      "loss": 0.0175,
+      "step": 780
+    },
+    {
+      "epoch": 4.34065934065934,
+      "grad_norm": 0.2008945196866989,
+      "learning_rate": 3.661935712810779e-05,
+      "loss": 0.0191,
+      "step": 790
+    },
+    {
+      "epoch": 4.395604395604396,
+      "grad_norm": 0.14711880683898926,
+      "learning_rate": 3.5398309063184146e-05,
+      "loss": 0.018,
+      "step": 800
+    },
+    {
+      "epoch": 4.450549450549451,
+      "grad_norm": 0.2107602059841156,
+      "learning_rate": 3.418670912179057e-05,
+      "loss": 0.0173,
+      "step": 810
+    },
+    {
+      "epoch": 4.5054945054945055,
+      "grad_norm": 0.16945497691631317,
+      "learning_rate": 3.298534127791785e-05,
+      "loss": 0.0178,
+      "step": 820
+    },
+    {
+      "epoch": 4.56043956043956,
+      "grad_norm": 0.14888402819633484,
+      "learning_rate": 3.179498288480834e-05,
+      "loss": 0.0181,
+      "step": 830
+    },
+    {
+      "epoch": 4.615384615384615,
+      "grad_norm": 0.1862138956785202,
+      "learning_rate": 3.061640417196433e-05,
+      "loss": 0.0171,
+      "step": 840
+    },
+    {
+      "epoch": 4.670329670329671,
+      "grad_norm": 0.17378169298171997,
+      "learning_rate": 2.945036774676587e-05,
+      "loss": 0.0156,
+      "step": 850
+    },
+    {
+      "epoch": 4.725274725274725,
+      "grad_norm": 0.1787486970424652,
+      "learning_rate": 2.8297628101020322e-05,
+      "loss": 0.0205,
+      "step": 860
+    },
+    {
+      "epoch": 4.78021978021978,
+      "grad_norm": 0.1538439691066742,
+      "learning_rate": 2.71589311227634e-05,
+      "loss": 0.0165,
+      "step": 870
+    },
+    {
+      "epoch": 4.835164835164835,
+      "grad_norm": 0.16950446367263794,
+      "learning_rate": 2.6035013613627224e-05,
+      "loss": 0.0168,
+      "step": 880
+    },
+    {
+      "epoch": 4.8901098901098905,
+      "grad_norm": 0.1429736614227295,
+      "learning_rate": 2.492660281208779e-05,
+      "loss": 0.0137,
+      "step": 890
+    },
+    {
+      "epoch": 4.945054945054945,
+      "grad_norm": 0.15313905477523804,
+      "learning_rate": 2.3834415922900415e-05,
+      "loss": 0.0156,
+      "step": 900
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.5302070379257202,
+      "learning_rate": 2.2759159653027374e-05,
+      "loss": 0.0135,
+      "step": 910
+    },
+    {
+      "epoch": 5.054945054945055,
+      "grad_norm": 0.18755768239498138,
+      "learning_rate": 2.170152975435859e-05,
+      "loss": 0.016,
+      "step": 920
+    },
+    {
+      "epoch": 5.1098901098901095,
+      "grad_norm": 0.24148383736610413,
+      "learning_rate": 2.066221057352036e-05,
+      "loss": 0.015,
+      "step": 930
+    },
+    {
+      "epoch": 5.164835164835165,
+      "grad_norm": 0.20809908211231232,
+      "learning_rate": 1.9641874609064443e-05,
+      "loss": 0.0174,
+      "step": 940
+    },
+    {
+      "epoch": 5.21978021978022,
+      "grad_norm": 0.21820078790187836,
+      "learning_rate": 1.8641182076323148e-05,
+      "loss": 0.0157,
+      "step": 950
+    },
+    {
+      "epoch": 5.274725274725275,
+      "grad_norm": 0.17557694017887115,
+      "learning_rate": 1.76607804802126e-05,
+      "loss": 0.0158,
+      "step": 960
+    },
+    {
+      "epoch": 5.329670329670329,
+      "grad_norm": 0.2052188515663147,
+      "learning_rate": 1.6701304196260166e-05,
+      "loss": 0.0145,
+      "step": 970
+    },
+    {
+      "epoch": 5.384615384615385,
+      "grad_norm": 0.19574584066867828,
+      "learning_rate": 1.5763374060127624e-05,
+      "loss": 0.0146,
+      "step": 980
+    },
+    {
+      "epoch": 5.43956043956044,
+      "grad_norm": 0.15790286660194397,
+      "learning_rate": 1.4847596965895011e-05,
+      "loss": 0.0162,
+      "step": 990
+    },
+    {
+      "epoch": 5.4945054945054945,
+      "grad_norm": 0.16527500748634338,
+      "learning_rate": 1.3954565473365844e-05,
+      "loss": 0.017,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1300,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 100,
+  "trial_name": null,
+  "trial_params": null
+}