Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config.json +64 -0
experiment_cfg/metadata.json +355 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
trainer_state.json +743 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+    "new_embodiment": {
+        "statistics": {
+            "state": {
+                "left_arm": {
+                    "max": [
+                        34.16289520263672,
+                        61.91222381591797,
+                        99.38703918457031,
+                        75.7874755859375,
+                        46.27594757080078
+                    ],
+                    "min": [
+                        -27.90346908569336,
+                        -99.60814666748047,
+                        -33.27495574951172,
+                        -88.31119537353516,
+                        -66.83760833740234
+                    ],
+                    "mean": [
+                        -1.3637042045593262,
+                        -70.07746124267578,
+                        81.53594970703125,
+                        46.74204635620117,
+                        -32.77935791015625
+                    ],
+                    "std": [
+                        10.373152732849121,
+                        52.60585021972656,
+                        34.18480682373047,
+                        48.48912811279297,
+                        37.322959899902344
+                    ],
+                    "q01": [
+                        -22.021116256713867,
+                        -99.52977752685547,
+                        -18.82662010192871,
+                        -66.29981231689453,
+                        -58.38827896118164
+                    ],
+                    "q99": [
+                        26.69683265686035,
+                        51.0971794128418,
+                        98.94921112060547,
+                        74.72486114501953,
+                        45.88522720336914
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        71.77835083007812
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        10.566564559936523
+                    ],
+                    "std": [
+                        10.46413803100586
+                    ],
+                    "q01": [
+                        0.4510309398174286
+                    ],
+                    "q99": [
+                        56.572166442871094
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        31.229597091674805,
+                        73.70741271972656,
+                        98.10017395019531,
+                        84.24878692626953,
+                        41.3212776184082
+                    ],
+                    "min": [
+                        -27.022125244140625,
+                        -97.75550842285156,
+                        -63.21243667602539,
+                        -96.60742950439453,
+                        -65.18462371826172
+                    ],
+                    "mean": [
+                        -1.5135765075683594,
+                        -25.33967399597168,
+                        61.376529693603516,
+                        0.33546876907348633,
+                        -41.29887390136719
+                    ],
+                    "std": [
+                        12.458887100219727,
+                        60.93878936767578,
+                        35.7954216003418,
+                        60.31285858154297,
+                        23.256187438964844
+                    ],
+                    "q01": [
+                        -22.887195587158203,
+                        -97.51502990722656,
+                        -18.134714126586914,
+                        -95.5573501586914,
+                        -57.9000244140625
+                    ],
+                    "q99": [
+                        28.61806297302246,
+                        63.446895599365234,
+                        97.84110260009766,
+                        73.2633285522461,
+                        29.41472053527832
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        75.05668640136719
+                    ],
+                    "min": [
+                        2.324263095855713
+                    ],
+                    "mean": [
+                        23.147193908691406
+                    ],
+                    "std": [
+                        13.153349876403809
+                    ],
+                    "q01": [
+                        2.551020383834839
+                    ],
+                    "q99": [
+                        61.621315002441406
+                    ]
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "max": [
+                        35.92695617675781,
+                        62.01413345336914,
+                        100.0,
+                        76.97608184814453,
+                        46.617828369140625
+                    ],
+                    "min": [
+                        -28.066692352294922,
+                        -100.0,
+                        -35.07326126098633,
+                        -89.78516387939453,
+                        -67.17948913574219
+                    ],
+                    "mean": [
+                        -1.1567177772521973,
+                        -70.6091537475586,
+                        82.1029281616211,
+                        47.767425537109375,
+                        -32.80006408691406
+                    ],
+                    "std": [
+                        10.52210521697998,
+                        52.541717529296875,
+                        35.224395751953125,
+                        49.508544921875,
+                        37.26472473144531
+                    ],
+                    "q01": [
+                        -21.873760223388672,
+                        -100.0,
+                        -20.238094329833984,
+                        -66.76124572753906,
+                        -58.43711853027344
+                    ],
+                    "q99": [
+                        27.669710159301758,
+                        50.230565567016654,
+                        100.0,
+                        76.97608184814453,
+                        45.738704681396484
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        74.2166519165039
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        10.451210021972656
+                    ],
+                    "std": [
+                        10.781113624572754
+                    ],
+                    "q01": [
+                        0.08952551335096359
+                    ],
+                    "q99": [
+                        56.938228607177734
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        31.447315216064453,
+                        72.47631072998047,
+                        99.20035552978516,
+                        85.42713928222656,
+                        42.69340896606445
+                    ],
+                    "min": [
+                        -27.511274337768555,
+                        -99.25834655761719,
+                        -67.30342102050781,
+                        -98.40870666503906,
+                        -66.81427764892578
+                    ],
+                    "mean": [
+                        -1.471267580986023,
+                        -26.692821502685547,
+                        60.74913787841797,
+                        0.23357370495796204,
+                        -41.19910430908203
+                    ],
+                    "std": [
+                        12.504396438598633,
+                        60.0944709777832,
+                        36.86882400512695,
+                        60.7491340637207,
+                        23.23846435546875
+                    ],
+                    "q01": [
+                        -23.001230239868164,
+                        -97.692626953125,
+                        -19.68014144897461,
+                        -96.31491088867188,
+                        -57.749412536621094
+                    ],
+                    "q99": [
+                        28.823287963867188,
+                        62.34033966064453,
+                        98.57840728759766,
+                        74.03684997558594,
+                        29.721281051635742
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        77.73826599121094
+                    ],
+                    "min": [
+                        0.4267425239086151
+                    ],
+                    "mean": [
+                        21.515247344970703
+                    ],
+                    "std": [
+                        14.212292671203613
+                    ],
+                    "q01": [
+                        1.8492176532745361
+                    ],
+                    "q99": [
+                        61.87766647338867
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "right": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                },
+                "top_rgb": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                }
+            },
+            "state": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "new_embodiment"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:115fd57ab6985556b75222944abe6f1f152a92a06ef614f5756448fabd7c02a8
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38e59620503cd8a0523f4851324145e9f86aba1a919018b013f473ad5324b370
+size 2586705312

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9025270758122743,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.009025270758122744,
+      "grad_norm": 1.977829933166504,
+      "learning_rate": 1.8e-05,
+      "loss": 0.9407,
+      "step": 10
+    },
+    {
+      "epoch": 0.018050541516245487,
+      "grad_norm": 0.9880571365356445,
+      "learning_rate": 3.8e-05,
+      "loss": 0.2558,
+      "step": 20
+    },
+    {
+      "epoch": 0.02707581227436823,
+      "grad_norm": 1.016868233680725,
+      "learning_rate": 5.8e-05,
+      "loss": 0.151,
+      "step": 30
+    },
+    {
+      "epoch": 0.036101083032490974,
+      "grad_norm": 1.0683897733688354,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 0.1332,
+      "step": 40
+    },
+    {
+      "epoch": 0.04512635379061372,
+      "grad_norm": 0.8018341660499573,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0983,
+      "step": 50
+    },
+    {
+      "epoch": 0.05415162454873646,
+      "grad_norm": 0.6524431109428406,
+      "learning_rate": 9.997785653888835e-05,
+      "loss": 0.0808,
+      "step": 60
+    },
+    {
+      "epoch": 0.0631768953068592,
+      "grad_norm": 0.6845595240592957,
+      "learning_rate": 9.990133642141359e-05,
+      "loss": 0.0778,
+      "step": 70
+    },
+    {
+      "epoch": 0.07220216606498195,
+      "grad_norm": 0.6983832120895386,
+      "learning_rate": 9.977024992520602e-05,
+      "loss": 0.0711,
+      "step": 80
+    },
+    {
+      "epoch": 0.0812274368231047,
+      "grad_norm": 0.6112372279167175,
+      "learning_rate": 9.95847403914247e-05,
+      "loss": 0.0679,
+      "step": 90
+    },
+    {
+      "epoch": 0.09025270758122744,
+      "grad_norm": 0.5820932388305664,
+      "learning_rate": 9.934501067202117e-05,
+      "loss": 0.0641,
+      "step": 100
+    },
+    {
+      "epoch": 0.09927797833935018,
+      "grad_norm": 0.5818979144096375,
+      "learning_rate": 9.905132290792394e-05,
+      "loss": 0.0618,
+      "step": 110
+    },
+    {
+      "epoch": 0.10830324909747292,
+      "grad_norm": 0.6202858686447144,
+      "learning_rate": 9.870399824239117e-05,
+      "loss": 0.0554,
+      "step": 120
+    },
+    {
+      "epoch": 0.11732851985559567,
+      "grad_norm": 0.5585798621177673,
+      "learning_rate": 9.830341646984521e-05,
+      "loss": 0.0559,
+      "step": 130
+    },
+    {
+      "epoch": 0.1263537906137184,
+      "grad_norm": 0.6366000771522522,
+      "learning_rate": 9.785001562057309e-05,
+      "loss": 0.0553,
+      "step": 140
+    },
+    {
+      "epoch": 0.13537906137184116,
+      "grad_norm": 0.4224783778190613,
+      "learning_rate": 9.734429148174675e-05,
+      "loss": 0.05,
+      "step": 150
+    },
+    {
+      "epoch": 0.1444043321299639,
+      "grad_norm": 0.39184054732322693,
+      "learning_rate": 9.6786797055287e-05,
+      "loss": 0.0455,
+      "step": 160
+    },
+    {
+      "epoch": 0.15342960288808663,
+      "grad_norm": 0.4573412835597992,
+      "learning_rate": 9.617814195316411e-05,
+      "loss": 0.0408,
+      "step": 170
+    },
+    {
+      "epoch": 0.1624548736462094,
+      "grad_norm": 0.4405965805053711,
+      "learning_rate": 9.551899173079607e-05,
+      "loss": 0.0455,
+      "step": 180
+    },
+    {
+      "epoch": 0.17148014440433212,
+      "grad_norm": 0.46750208735466003,
+      "learning_rate": 9.481006715927351e-05,
+      "loss": 0.0448,
+      "step": 190
+    },
+    {
+      "epoch": 0.18050541516245489,
+      "grad_norm": 0.37655723094940186,
+      "learning_rate": 9.405214343720707e-05,
+      "loss": 0.0439,
+      "step": 200
+    },
+    {
+      "epoch": 0.18953068592057762,
+      "grad_norm": 0.5338054895401001,
+      "learning_rate": 9.32460493430591e-05,
+      "loss": 0.047,
+      "step": 210
+    },
+    {
+      "epoch": 0.19855595667870035,
+      "grad_norm": 0.4981560707092285,
+      "learning_rate": 9.239266632888659e-05,
+      "loss": 0.0418,
+      "step": 220
+    },
+    {
+      "epoch": 0.2075812274368231,
+      "grad_norm": 0.4395357668399811,
+      "learning_rate": 9.14929275564863e-05,
+      "loss": 0.0408,
+      "step": 230
+    },
+    {
+      "epoch": 0.21660649819494585,
+      "grad_norm": 0.44028720259666443,
+      "learning_rate": 9.0547816876996e-05,
+      "loss": 0.0409,
+      "step": 240
+    },
+    {
+      "epoch": 0.22563176895306858,
+      "grad_norm": 0.37459373474121094,
+      "learning_rate": 8.955836775506776e-05,
+      "loss": 0.0426,
+      "step": 250
+    },
+    {
+      "epoch": 0.23465703971119134,
+      "grad_norm": 0.4326346516609192,
+      "learning_rate": 8.852566213878947e-05,
+      "loss": 0.0429,
+      "step": 260
+    },
+    {
+      "epoch": 0.24368231046931407,
+      "grad_norm": 0.45173099637031555,
+      "learning_rate": 8.745082927659047e-05,
+      "loss": 0.0384,
+      "step": 270
+    },
+    {
+      "epoch": 0.2527075812274368,
+      "grad_norm": 0.33228954672813416,
+      "learning_rate": 8.633504448242505e-05,
+      "loss": 0.0422,
+      "step": 280
+    },
+    {
+      "epoch": 0.26173285198555957,
+      "grad_norm": 0.364307165145874,
+      "learning_rate": 8.517952785058385e-05,
+      "loss": 0.0369,
+      "step": 290
+    },
+    {
+      "epoch": 0.27075812274368233,
+      "grad_norm": 0.34387287497520447,
+      "learning_rate": 8.398554292153866e-05,
+      "loss": 0.0353,
+      "step": 300
+    },
+    {
+      "epoch": 0.27978339350180503,
+      "grad_norm": 0.3851270079612732,
+      "learning_rate": 8.275439530027948e-05,
+      "loss": 0.0417,
+      "step": 310
+    },
+    {
+      "epoch": 0.2888086642599278,
+      "grad_norm": 0.28114214539527893,
+      "learning_rate": 8.148743122865463e-05,
+      "loss": 0.0345,
+      "step": 320
+    },
+    {
+      "epoch": 0.29783393501805056,
+      "grad_norm": 0.33744707703590393,
+      "learning_rate": 8.018603611327504e-05,
+      "loss": 0.0342,
+      "step": 330
+    },
+    {
+      "epoch": 0.30685920577617326,
+      "grad_norm": 0.4103146195411682,
+      "learning_rate": 7.88516330105925e-05,
+      "loss": 0.035,
+      "step": 340
+    },
+    {
+      "epoch": 0.315884476534296,
+      "grad_norm": 0.328669011592865,
+      "learning_rate": 7.748568107080832e-05,
+      "loss": 0.0327,
+      "step": 350
+    },
+    {
+      "epoch": 0.3249097472924188,
+      "grad_norm": 0.4371018409729004,
+      "learning_rate": 7.608967394231387e-05,
+      "loss": 0.0359,
+      "step": 360
+    },
+    {
+      "epoch": 0.33393501805054154,
+      "grad_norm": 0.4694584012031555,
+      "learning_rate": 7.466513813840825e-05,
+      "loss": 0.0339,
+      "step": 370
+    },
+    {
+      "epoch": 0.34296028880866425,
+      "grad_norm": 0.38598158955574036,
+      "learning_rate": 7.32136313680782e-05,
+      "loss": 0.0289,
+      "step": 380
+    },
+    {
+      "epoch": 0.351985559566787,
+      "grad_norm": 0.36169150471687317,
+      "learning_rate": 7.173674083266624e-05,
+      "loss": 0.032,
+      "step": 390
+    },
+    {
+      "epoch": 0.36101083032490977,
+      "grad_norm": 0.3175145983695984,
+      "learning_rate": 7.023608149028937e-05,
+      "loss": 0.0311,
+      "step": 400
+    },
+    {
+      "epoch": 0.3700361010830325,
+      "grad_norm": 0.4808519184589386,
+      "learning_rate": 6.871329428990602e-05,
+      "loss": 0.0351,
+      "step": 410
+    },
+    {
+      "epoch": 0.37906137184115524,
+      "grad_norm": 0.3936750590801239,
+      "learning_rate": 6.71700443769625e-05,
+      "loss": 0.0297,
+      "step": 420
+    },
+    {
+      "epoch": 0.388086642599278,
+      "grad_norm": 0.31872764229774475,
+      "learning_rate": 6.56080192725808e-05,
+      "loss": 0.029,
+      "step": 430
+    },
+    {
+      "epoch": 0.3971119133574007,
+      "grad_norm": 0.3199341297149658,
+      "learning_rate": 6.402892702827916e-05,
+      "loss": 0.0305,
+      "step": 440
+    },
+    {
+      "epoch": 0.40613718411552346,
+      "grad_norm": 0.29824522137641907,
+      "learning_rate": 6.243449435824276e-05,
+      "loss": 0.0313,
+      "step": 450
+    },
+    {
+      "epoch": 0.4151624548736462,
+      "grad_norm": 0.3850104808807373,
+      "learning_rate": 6.0826464751186994e-05,
+      "loss": 0.0307,
+      "step": 460
+    },
+    {
+      "epoch": 0.42418772563176893,
+      "grad_norm": 0.28914761543273926,
+      "learning_rate": 5.9206596563878357e-05,
+      "loss": 0.0335,
+      "step": 470
+    },
+    {
+      "epoch": 0.4332129963898917,
+      "grad_norm": 0.29466763138771057,
+      "learning_rate": 5.757666109839702e-05,
+      "loss": 0.0282,
+      "step": 480
+    },
+    {
+      "epoch": 0.44223826714801445,
+      "grad_norm": 0.3107747435569763,
+      "learning_rate": 5.5938440665244006e-05,
+      "loss": 0.027,
+      "step": 490
+    },
+    {
+      "epoch": 0.45126353790613716,
+      "grad_norm": 0.38809922337532043,
+      "learning_rate": 5.4293726634410855e-05,
+      "loss": 0.0281,
+      "step": 500
+    },
+    {
+      "epoch": 0.4602888086642599,
+      "grad_norm": 0.27953779697418213,
+      "learning_rate": 5.264431747654284e-05,
+      "loss": 0.0251,
+      "step": 510
+    },
+    {
+      "epoch": 0.4693140794223827,
+      "grad_norm": 0.24429909884929657,
+      "learning_rate": 5.0992016796337686e-05,
+      "loss": 0.0258,
+      "step": 520
+    },
+    {
+      "epoch": 0.47833935018050544,
+      "grad_norm": 0.21034276485443115,
+      "learning_rate": 4.93386313603304e-05,
+      "loss": 0.0271,
+      "step": 530
+    },
+    {
+      "epoch": 0.48736462093862815,
+      "grad_norm": 0.29745611548423767,
+      "learning_rate": 4.7685969121220456e-05,
+      "loss": 0.0268,
+      "step": 540
+    },
+    {
+      "epoch": 0.4963898916967509,
+      "grad_norm": 0.2871965765953064,
+      "learning_rate": 4.60358372409022e-05,
+      "loss": 0.0245,
+      "step": 550
+    },
+    {
+      "epoch": 0.5054151624548736,
+      "grad_norm": 0.30364564061164856,
+      "learning_rate": 4.439004011435979e-05,
+      "loss": 0.0237,
+      "step": 560
+    },
+    {
+      "epoch": 0.5144404332129964,
+      "grad_norm": 0.28791144490242004,
+      "learning_rate": 4.275037739658771e-05,
+      "loss": 0.0264,
+      "step": 570
+    },
+    {
+      "epoch": 0.5234657039711191,
+      "grad_norm": 0.3472572863101959,
+      "learning_rate": 4.111864203469457e-05,
+      "loss": 0.0268,
+      "step": 580
+    },
+    {
+      "epoch": 0.5324909747292419,
+      "grad_norm": 0.3806765377521515,
+      "learning_rate": 3.949661830734172e-05,
+      "loss": 0.0224,
+      "step": 590
+    },
+    {
+      "epoch": 0.5415162454873647,
+      "grad_norm": 0.27796250581741333,
+      "learning_rate": 3.788607987366069e-05,
+      "loss": 0.0253,
+      "step": 600
+    },
+    {
+      "epoch": 0.5505415162454874,
+      "grad_norm": 0.24273499846458435,
+      "learning_rate": 3.628878783378302e-05,
+      "loss": 0.0208,
+      "step": 610
+    },
+    {
+      "epoch": 0.5595667870036101,
+      "grad_norm": 0.3810800611972809,
+      "learning_rate": 3.470648880310313e-05,
+      "loss": 0.0219,
+      "step": 620
+    },
+    {
+      "epoch": 0.5685920577617328,
+      "grad_norm": 0.31840479373931885,
+      "learning_rate": 3.3140913002379995e-05,
+      "loss": 0.0222,
+      "step": 630
+    },
+    {
+      "epoch": 0.5776173285198556,
+      "grad_norm": 0.2828656733036041,
+      "learning_rate": 3.1593772365766105e-05,
+      "loss": 0.0212,
+      "step": 640
+    },
+    {
+      "epoch": 0.5866425992779783,
+      "grad_norm": 0.22635580599308014,
+      "learning_rate": 3.006675866883275e-05,
+      "loss": 0.0213,
+      "step": 650
+    },
+    {
+      "epoch": 0.5956678700361011,
+      "grad_norm": 0.20679627358913422,
+      "learning_rate": 2.8561541678638142e-05,
+      "loss": 0.0239,
+      "step": 660
+    },
+    {
+      "epoch": 0.6046931407942239,
+      "grad_norm": 0.20941723883152008,
+      "learning_rate": 2.707976732786166e-05,
+      "loss": 0.0198,
+      "step": 670
+    },
+    {
+      "epoch": 0.6137184115523465,
+      "grad_norm": 0.23454327881336212,
+      "learning_rate": 2.562305591500069e-05,
+      "loss": 0.0199,
+      "step": 680
+    },
+    {
+      "epoch": 0.6227436823104693,
+      "grad_norm": 0.2705540060997009,
+      "learning_rate": 2.419300033259798e-05,
+      "loss": 0.0207,
+      "step": 690
+    },
+    {
+      "epoch": 0.631768953068592,
+      "grad_norm": 0.2031233012676239,
+      "learning_rate": 2.279116432543705e-05,
+      "loss": 0.0195,
+      "step": 700
+    },
+    {
+      "epoch": 0.6407942238267148,
+      "grad_norm": 0.3306087553501129,
+      "learning_rate": 2.1419080780610123e-05,
+      "loss": 0.0237,
+      "step": 710
+    },
+    {
+      "epoch": 0.6498194945848376,
+      "grad_norm": 0.2521066963672638,
+      "learning_rate": 2.0078250051328784e-05,
+      "loss": 0.0201,
+      "step": 720
+    },
+    {
+      "epoch": 0.6588447653429603,
+      "grad_norm": 0.17102816700935364,
+      "learning_rate": 1.877013831630961e-05,
+      "loss": 0.0157,
+      "step": 730
+    },
+    {
+      "epoch": 0.6678700361010831,
+      "grad_norm": 0.2178596407175064,
+      "learning_rate": 1.749617597652934e-05,
+      "loss": 0.0212,
+      "step": 740
+    },
+    {
+      "epoch": 0.6768953068592057,
+      "grad_norm": 0.23762241005897522,
+      "learning_rate": 1.62577560911024e-05,
+      "loss": 0.0198,
+      "step": 750
+    },
+    {
+      "epoch": 0.6859205776173285,
+      "grad_norm": 0.24799777567386627,
+      "learning_rate": 1.5056232853991209e-05,
+      "loss": 0.0233,
+      "step": 760
+    },
+    {
+      "epoch": 0.6949458483754513,
+      "grad_norm": 0.1982533186674118,
+      "learning_rate": 1.389292011321498e-05,
+      "loss": 0.0177,
+      "step": 770
+    },
+    {
+      "epoch": 0.703971119133574,
+      "grad_norm": 0.27399930357933044,
+      "learning_rate": 1.2769089934176126e-05,
+      "loss": 0.0189,
+      "step": 780
+    },
+    {
+      "epoch": 0.7129963898916968,
+      "grad_norm": 0.17227351665496826,
+      "learning_rate": 1.1685971208675539e-05,
+      "loss": 0.0184,
+      "step": 790
+    },
+    {
+      "epoch": 0.7220216606498195,
+      "grad_norm": 0.1276431679725647,
+      "learning_rate": 1.0644748311137376e-05,
+      "loss": 0.0183,
+      "step": 800
+    },
+    {
+      "epoch": 0.7310469314079422,
+      "grad_norm": 0.20766472816467285,
+      "learning_rate": 9.646559803512994e-06,
+      "loss": 0.019,
+      "step": 810
+    },
+    {
+      "epoch": 0.740072202166065,
+      "grad_norm": 0.17726139724254608,
+      "learning_rate": 8.692497190280224e-06,
+      "loss": 0.0178,
+      "step": 820
+    },
+    {
+      "epoch": 0.7490974729241877,
+      "grad_norm": 0.2659638226032257,
+      "learning_rate": 7.783603724899257e-06,
+      "loss": 0.0186,
+      "step": 830
+    },
+    {
+      "epoch": 0.7581227436823105,
+      "grad_norm": 0.1689656376838684,
+      "learning_rate": 6.92087326903022e-06,
+      "loss": 0.0179,
+      "step": 840
+    },
+    {
+      "epoch": 0.7671480144404332,
+      "grad_norm": 0.2008303552865982,
+      "learning_rate": 6.1052492057601275e-06,
+      "loss": 0.0166,
+      "step": 850
+    },
+    {
+      "epoch": 0.776173285198556,
+      "grad_norm": 0.2688104212284088,
+      "learning_rate": 5.337623408027293e-06,
+      "loss": 0.0201,
+      "step": 860
+    },
+    {
+      "epoch": 0.7851985559566786,
+      "grad_norm": 0.16218852996826172,
+      "learning_rate": 4.618835263371396e-06,
+      "loss": 0.0182,
+      "step": 870
+    },
+    {
+      "epoch": 0.7942238267148014,
+      "grad_norm": 0.19471345841884613,
+      "learning_rate": 3.949670756075447e-06,
+      "loss": 0.0178,
+      "step": 880
+    },
+    {
+      "epoch": 0.8032490974729242,
+      "grad_norm": 0.19546721875667572,
+      "learning_rate": 3.3308616077036115e-06,
+      "loss": 0.0178,
+      "step": 890
+    },
+    {
+      "epoch": 0.8122743682310469,
+      "grad_norm": 0.17014919221401215,
+      "learning_rate": 2.7630844769743757e-06,
+      "loss": 0.0152,
+      "step": 900
+    },
+    {
+      "epoch": 0.8212996389891697,
+      "grad_norm": 0.17462334036827087,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 0.0159,
+      "step": 910
+    },
+    {
+      "epoch": 0.8303249097472925,
+      "grad_norm": 0.1929808259010315,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 0.0152,
+      "step": 920
+    },
+    {
+      "epoch": 0.8393501805054152,
+      "grad_norm": 0.16477197408676147,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 0.0159,
+      "step": 930
+    },
+    {
+      "epoch": 0.8483754512635379,
+      "grad_norm": 0.18839409947395325,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 0.0143,
+      "step": 940
+    },
+    {
+      "epoch": 0.8574007220216606,
+      "grad_norm": 0.19429394602775574,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 0.0173,
+      "step": 950
+    },
+    {
+      "epoch": 0.8664259927797834,
+      "grad_norm": 0.15573933720588684,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 0.0152,
+      "step": 960
+    },
+    {
+      "epoch": 0.8754512635379061,
+      "grad_norm": 0.16420625150203705,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 0.0181,
+      "step": 970
+    },
+    {
+      "epoch": 0.8844765342960289,
+      "grad_norm": 0.14045004546642303,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 0.0163,
+      "step": 980
+    },
+    {
+      "epoch": 0.8935018050541517,
+      "grad_norm": 0.20185790956020355,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 0.0189,
+      "step": 990
+    },
+    {
+      "epoch": 0.9025270758122743,
+      "grad_norm": 0.2897006571292877,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 0.0205,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9025270758122743,
+      "step": 1000,
+      "total_flos": 0.0,
+      "train_loss": 0.04500286555290222,
+      "train_runtime": 685.5246,
+      "train_samples_per_second": 70.019,
+      "train_steps_per_second": 1.459
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:783b80d81286a5b55670d0b48c8fb948472e733210763a68cc74c07e6c359f64
+size 5304