Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

config.json +64 -0
experiment_cfg/metadata.json +363 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
trainer_state.json +743 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,363 @@

+{
+    "new_embodiment": {
+        "statistics": {
+            "state": {
+                "left_arm": {
+                    "max": [
+                        31.097339630126953,
+                        87.07456970214844,
+                        99.29947662353516,
+                        75.39047241210938,
+                        -39.19413757324219
+                    ],
+                    "min": [
+                        -37.65949630737305,
+                        -96.6693115234375,
+                        -98.07355499267578,
+                        -72.19047546386719,
+                        -59.511600494384766
+                    ],
+                    "mean": [
+                        5.31805419921875,
+                        -73.24604797363281,
+                        84.5744857788086,
+                        53.659244537353516,
+                        -52.5255012512207
+                    ],
+                    "std": [
+                        8.872116088867188,
+                        47.90742492675781,
+                        32.04811096191406,
+                        43.50612258911133,
+                        3.739243984222412
+                    ],
+                    "q01": [
+                        -24.316442489624023,
+                        -96.6693115234375,
+                        -18.388792037963867,
+                        -54.05714416503906,
+                        -56.58119583129883
+                    ],
+                    "q99": [
+                        24.826831817626953,
+                        49.59846878051758,
+                        99.2122573852539,
+                        75.29190063476562,
+                        -40.17094039916992
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        65.40178680419922
+                    ],
+                    "min": [
+                        0.1488095223903656
+                    ],
+                    "mean": [
+                        14.567317008972168
+                    ],
+                    "std": [
+                        7.672760486602783
+                    ],
+                    "q01": [
+                        0.5208333134651184
+                    ],
+                    "q99": [
+                        44.12202453613281
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        39.934234619140625,
+                        76.15731048583984,
+                        99.3824462890625,
+                        76.23355102539062,
+                        38.59306335449219
+                    ],
+                    "min": [
+                        -33.64998245239258,
+                        -98.0335922241211,
+                        -49.80149841308594,
+                        -95.80592346191406,
+                        -64.38690948486328
+                    ],
+                    "mean": [
+                        1.84208345413208,
+                        -20.025339126586914,
+                        60.03025817871094,
+                        -8.195182800292969,
+                        -47.5246467590332
+                    ],
+                    "std": [
+                        11.903837203979492,
+                        62.466819763183594,
+                        35.845306396484375,
+                        63.938392639160156,
+                        17.236434936523438
+                    ],
+                    "q01": [
+                        -22.908294677734375,
+                        -97.6239242553711,
+                        -20.688133239746094,
+                        -94.90131378173828,
+                        -58.133853912353516
+                    ],
+                    "q99": [
+                        29.557910919189453,
+                        67.06267547607422,
+                        99.20600128173828,
+                        75.9868392944336,
+                        22.66731834411621
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        91.74726104736328
+                    ],
+                    "min": [
+                        2.3210830688476562
+                    ],
+                    "mean": [
+                        11.800792694091797
+                    ],
+                    "std": [
+                        16.004310607910156
+                    ],
+                    "q01": [
+                        2.3855576515197754
+                    ],
+                    "q99": [
+                        67.69825744628906
+                    ]
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "max": [
+                        33.853736877441406,
+                        90.75993347167969,
+                        100.0,
+                        76.46582794189453,
+                        -39.19413757324219
+                    ],
+                    "min": [
+                        -37.88003158569336,
+                        -96.45941162109375,
+                        -99.63386535644531,
+                        -73.87788391113281,
+                        -59.85348129272461
+                    ],
+                    "mean": [
+                        5.2239155769348145,
+                        -73.25548553466797,
+                        84.9612808227539,
+                        54.047916412353516,
+                        -52.520851135253906
+                    ],
+                    "std": [
+                        8.930574417114258,
+                        47.422481536865234,
+                        32.83000946044922,
+                        44.03689193725586,
+                        3.7615270614624023
+                    ],
+                    "q01": [
+                        -24.23993492126465,
+                        -96.37305450439453,
+                        -19.450801849365234,
+                        -54.6300048828125,
+                        -56.630035400390625
+                    ],
+                    "q99": [
+                        25.390304565429688,
+                        48.96372985839844,
+                        99.81693267822266,
+                        76.142333984375,
+                        -40.17094039916992
+                    ]
+                },
+                "gripper1": {
+                    "max": [
+                        65.94789123535156
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        14.2286376953125
+                    ],
+                    "std": [
+                        8.476670265197754
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        44.290208244323466
+                    ]
+                },
+                "right_arm": {
+                    "max": [
+                        40.137088775634766,
+                        75.02074432373047,
+                        99.91039276123047,
+                        78.80850982666016,
+                        39.21210479736328
+                    ],
+                    "min": [
+                        -33.43488311767578,
+                        -99.004150390625,
+                        -51.7921142578125,
+                        -96.85106658935547,
+                        -64.67518615722656
+                    ],
+                    "mean": [
+                        1.85464346408844,
+                        -21.57091522216797,
+                        59.33572769165039,
+                        -8.245699882507324,
+                        -47.39208221435547
+                    ],
+                    "std": [
+                        11.910544395446777,
+                        61.55514907836914,
+                        36.57780838012695,
+                        64.52499389648438,
+                        17.27439308166504
+                    ],
+                    "q01": [
+                        -22.848438262939453,
+                        -97.84232330322266,
+                        -22.307348251342766,
+                        -95.82978820800781,
+                        -58.20506286621094
+                    ],
+                    "q99": [
+                        29.702970504760742,
+                        65.3941879272461,
+                        99.46236419677734,
+                        77.7872314453125,
+                        22.82807159423828
+                    ]
+                },
+                "gripper2": {
+                    "max": [
+                        92.38652801513672
+                    ],
+                    "min": [
+                        0.14641287922859192
+                    ],
+                    "mean": [
+                        9.158825874328613
+                    ],
+                    "std": [
+                        17.096576690673828
+                    ],
+                    "q01": [
+                        1.6105417013168335
+                    ],
+                    "q99": [
+                        68.52123260498047
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "right": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                },
+                "left": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                },
+                "top_rgb": {
+                    "resolution": [
+                        640,
+                        480
+                    ],
+                    "channels": 3,
+                    "fps": 30.0
+                }
+            },
+            "state": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "left_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper1": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "right_arm": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        5
+                    ],
+                    "continuous": true
+                },
+                "gripper2": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "new_embodiment"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ad0600328b30fafe76a295afa31ea8d28edba7df32408932d422d6e9fca2a26
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e508f2c70e9f0c50a51672c43b3ede7ee8e1afe0a4545cc891bc2e7a402bfd39
+size 2586705312

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.1520737327188941,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01152073732718894,
+      "grad_norm": 1.6566706895828247,
+      "learning_rate": 1.8e-05,
+      "loss": 0.9518,
+      "step": 10
+    },
+    {
+      "epoch": 0.02304147465437788,
+      "grad_norm": 0.8684583902359009,
+      "learning_rate": 3.8e-05,
+      "loss": 0.1942,
+      "step": 20
+    },
+    {
+      "epoch": 0.03456221198156682,
+      "grad_norm": 0.6393296718597412,
+      "learning_rate": 5.8e-05,
+      "loss": 0.125,
+      "step": 30
+    },
+    {
+      "epoch": 0.04608294930875576,
+      "grad_norm": 0.35887211561203003,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 0.0976,
+      "step": 40
+    },
+    {
+      "epoch": 0.0576036866359447,
+      "grad_norm": 0.7574331164360046,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0844,
+      "step": 50
+    },
+    {
+      "epoch": 0.06912442396313365,
+      "grad_norm": 0.7963987588882446,
+      "learning_rate": 9.997785653888835e-05,
+      "loss": 0.0697,
+      "step": 60
+    },
+    {
+      "epoch": 0.08064516129032258,
+      "grad_norm": 0.7024580240249634,
+      "learning_rate": 9.990133642141359e-05,
+      "loss": 0.0661,
+      "step": 70
+    },
+    {
+      "epoch": 0.09216589861751152,
+      "grad_norm": 0.6337135434150696,
+      "learning_rate": 9.977024992520602e-05,
+      "loss": 0.0577,
+      "step": 80
+    },
+    {
+      "epoch": 0.10368663594470046,
+      "grad_norm": 0.3510747253894806,
+      "learning_rate": 9.95847403914247e-05,
+      "loss": 0.0537,
+      "step": 90
+    },
+    {
+      "epoch": 0.1152073732718894,
+      "grad_norm": 0.4563221037387848,
+      "learning_rate": 9.934501067202117e-05,
+      "loss": 0.052,
+      "step": 100
+    },
+    {
+      "epoch": 0.12672811059907835,
+      "grad_norm": 0.3978365957736969,
+      "learning_rate": 9.905132290792394e-05,
+      "loss": 0.0445,
+      "step": 110
+    },
+    {
+      "epoch": 0.1382488479262673,
+      "grad_norm": 0.40588557720184326,
+      "learning_rate": 9.870399824239117e-05,
+      "loss": 0.0427,
+      "step": 120
+    },
+    {
+      "epoch": 0.1497695852534562,
+      "grad_norm": 0.5251225233078003,
+      "learning_rate": 9.830341646984521e-05,
+      "loss": 0.0378,
+      "step": 130
+    },
+    {
+      "epoch": 0.16129032258064516,
+      "grad_norm": 0.38299882411956787,
+      "learning_rate": 9.785001562057309e-05,
+      "loss": 0.0404,
+      "step": 140
+    },
+    {
+      "epoch": 0.1728110599078341,
+      "grad_norm": 0.2926655411720276,
+      "learning_rate": 9.734429148174675e-05,
+      "loss": 0.0395,
+      "step": 150
+    },
+    {
+      "epoch": 0.18433179723502305,
+      "grad_norm": 0.4306221306324005,
+      "learning_rate": 9.6786797055287e-05,
+      "loss": 0.0361,
+      "step": 160
+    },
+    {
+      "epoch": 0.195852534562212,
+      "grad_norm": 0.3377871811389923,
+      "learning_rate": 9.617814195316411e-05,
+      "loss": 0.035,
+      "step": 170
+    },
+    {
+      "epoch": 0.2073732718894009,
+      "grad_norm": 0.40714597702026367,
+      "learning_rate": 9.551899173079607e-05,
+      "loss": 0.0332,
+      "step": 180
+    },
+    {
+      "epoch": 0.21889400921658986,
+      "grad_norm": 0.40230241417884827,
+      "learning_rate": 9.481006715927351e-05,
+      "loss": 0.0321,
+      "step": 190
+    },
+    {
+      "epoch": 0.2304147465437788,
+      "grad_norm": 0.41335731744766235,
+      "learning_rate": 9.405214343720707e-05,
+      "loss": 0.0319,
+      "step": 200
+    },
+    {
+      "epoch": 0.24193548387096775,
+      "grad_norm": 0.20366613566875458,
+      "learning_rate": 9.32460493430591e-05,
+      "loss": 0.0276,
+      "step": 210
+    },
+    {
+      "epoch": 0.2534562211981567,
+      "grad_norm": 0.24148087203502655,
+      "learning_rate": 9.239266632888659e-05,
+      "loss": 0.0331,
+      "step": 220
+    },
+    {
+      "epoch": 0.26497695852534564,
+      "grad_norm": 0.31080925464630127,
+      "learning_rate": 9.14929275564863e-05,
+      "loss": 0.0305,
+      "step": 230
+    },
+    {
+      "epoch": 0.2764976958525346,
+      "grad_norm": 0.2609555125236511,
+      "learning_rate": 9.0547816876996e-05,
+      "loss": 0.0277,
+      "step": 240
+    },
+    {
+      "epoch": 0.2880184331797235,
+      "grad_norm": 0.35045984387397766,
+      "learning_rate": 8.955836775506776e-05,
+      "loss": 0.0254,
+      "step": 250
+    },
+    {
+      "epoch": 0.2995391705069124,
+      "grad_norm": 0.2834389805793762,
+      "learning_rate": 8.852566213878947e-05,
+      "loss": 0.0272,
+      "step": 260
+    },
+    {
+      "epoch": 0.31105990783410137,
+      "grad_norm": 0.48804351687431335,
+      "learning_rate": 8.745082927659047e-05,
+      "loss": 0.0273,
+      "step": 270
+    },
+    {
+      "epoch": 0.3225806451612903,
+      "grad_norm": 0.36219653487205505,
+      "learning_rate": 8.633504448242505e-05,
+      "loss": 0.0274,
+      "step": 280
+    },
+    {
+      "epoch": 0.33410138248847926,
+      "grad_norm": 0.28334954380989075,
+      "learning_rate": 8.517952785058385e-05,
+      "loss": 0.0244,
+      "step": 290
+    },
+    {
+      "epoch": 0.3456221198156682,
+      "grad_norm": 0.37658995389938354,
+      "learning_rate": 8.398554292153866e-05,
+      "loss": 0.0234,
+      "step": 300
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 0.4306541979312897,
+      "learning_rate": 8.275439530027948e-05,
+      "loss": 0.0255,
+      "step": 310
+    },
+    {
+      "epoch": 0.3686635944700461,
+      "grad_norm": 0.3718424141407013,
+      "learning_rate": 8.148743122865463e-05,
+      "loss": 0.026,
+      "step": 320
+    },
+    {
+      "epoch": 0.38018433179723504,
+      "grad_norm": 0.40250667929649353,
+      "learning_rate": 8.018603611327504e-05,
+      "loss": 0.0234,
+      "step": 330
+    },
+    {
+      "epoch": 0.391705069124424,
+      "grad_norm": 0.3549322187900543,
+      "learning_rate": 7.88516330105925e-05,
+      "loss": 0.0246,
+      "step": 340
+    },
+    {
+      "epoch": 0.4032258064516129,
+      "grad_norm": 0.3418162763118744,
+      "learning_rate": 7.748568107080832e-05,
+      "loss": 0.0214,
+      "step": 350
+    },
+    {
+      "epoch": 0.4147465437788018,
+      "grad_norm": 0.4048205614089966,
+      "learning_rate": 7.608967394231387e-05,
+      "loss": 0.021,
+      "step": 360
+    },
+    {
+      "epoch": 0.42626728110599077,
+      "grad_norm": 0.2962040603160858,
+      "learning_rate": 7.466513813840825e-05,
+      "loss": 0.0229,
+      "step": 370
+    },
+    {
+      "epoch": 0.4377880184331797,
+      "grad_norm": 0.26386022567749023,
+      "learning_rate": 7.32136313680782e-05,
+      "loss": 0.021,
+      "step": 380
+    },
+    {
+      "epoch": 0.44930875576036866,
+      "grad_norm": 0.18067125976085663,
+      "learning_rate": 7.173674083266624e-05,
+      "loss": 0.0207,
+      "step": 390
+    },
+    {
+      "epoch": 0.4608294930875576,
+      "grad_norm": 0.24752575159072876,
+      "learning_rate": 7.023608149028937e-05,
+      "loss": 0.023,
+      "step": 400
+    },
+    {
+      "epoch": 0.47235023041474655,
+      "grad_norm": 0.24577368795871735,
+      "learning_rate": 6.871329428990602e-05,
+      "loss": 0.0199,
+      "step": 410
+    },
+    {
+      "epoch": 0.4838709677419355,
+      "grad_norm": 0.20847314596176147,
+      "learning_rate": 6.71700443769625e-05,
+      "loss": 0.0214,
+      "step": 420
+    },
+    {
+      "epoch": 0.49539170506912444,
+      "grad_norm": 0.19042600691318512,
+      "learning_rate": 6.56080192725808e-05,
+      "loss": 0.019,
+      "step": 430
+    },
+    {
+      "epoch": 0.5069124423963134,
+      "grad_norm": 0.3057345449924469,
+      "learning_rate": 6.402892702827916e-05,
+      "loss": 0.0204,
+      "step": 440
+    },
+    {
+      "epoch": 0.5184331797235023,
+      "grad_norm": 0.27462145686149597,
+      "learning_rate": 6.243449435824276e-05,
+      "loss": 0.0198,
+      "step": 450
+    },
+    {
+      "epoch": 0.5299539170506913,
+      "grad_norm": 0.2001851201057434,
+      "learning_rate": 6.0826464751186994e-05,
+      "loss": 0.0189,
+      "step": 460
+    },
+    {
+      "epoch": 0.5414746543778802,
+      "grad_norm": 0.2672295570373535,
+      "learning_rate": 5.9206596563878357e-05,
+      "loss": 0.0178,
+      "step": 470
+    },
+    {
+      "epoch": 0.5529953917050692,
+      "grad_norm": 0.3161514699459076,
+      "learning_rate": 5.757666109839702e-05,
+      "loss": 0.0213,
+      "step": 480
+    },
+    {
+      "epoch": 0.5645161290322581,
+      "grad_norm": 0.22500912845134735,
+      "learning_rate": 5.5938440665244006e-05,
+      "loss": 0.0191,
+      "step": 490
+    },
+    {
+      "epoch": 0.576036866359447,
+      "grad_norm": 0.32964590191841125,
+      "learning_rate": 5.4293726634410855e-05,
+      "loss": 0.0194,
+      "step": 500
+    },
+    {
+      "epoch": 0.5875576036866359,
+      "grad_norm": 0.2184433490037918,
+      "learning_rate": 5.264431747654284e-05,
+      "loss": 0.0181,
+      "step": 510
+    },
+    {
+      "epoch": 0.5990783410138248,
+      "grad_norm": 0.22752192616462708,
+      "learning_rate": 5.0992016796337686e-05,
+      "loss": 0.0153,
+      "step": 520
+    },
+    {
+      "epoch": 0.6105990783410138,
+      "grad_norm": 0.17904232442378998,
+      "learning_rate": 4.93386313603304e-05,
+      "loss": 0.017,
+      "step": 530
+    },
+    {
+      "epoch": 0.6221198156682027,
+      "grad_norm": 0.30245211720466614,
+      "learning_rate": 4.7685969121220456e-05,
+      "loss": 0.017,
+      "step": 540
+    },
+    {
+      "epoch": 0.6336405529953917,
+      "grad_norm": 0.23163466155529022,
+      "learning_rate": 4.60358372409022e-05,
+      "loss": 0.0181,
+      "step": 550
+    },
+    {
+      "epoch": 0.6451612903225806,
+      "grad_norm": 0.22935254871845245,
+      "learning_rate": 4.439004011435979e-05,
+      "loss": 0.0181,
+      "step": 560
+    },
+    {
+      "epoch": 0.6566820276497696,
+      "grad_norm": 0.220436692237854,
+      "learning_rate": 4.275037739658771e-05,
+      "loss": 0.017,
+      "step": 570
+    },
+    {
+      "epoch": 0.6682027649769585,
+      "grad_norm": 0.26229238510131836,
+      "learning_rate": 4.111864203469457e-05,
+      "loss": 0.0178,
+      "step": 580
+    },
+    {
+      "epoch": 0.6797235023041475,
+      "grad_norm": 0.19217033684253693,
+      "learning_rate": 3.949661830734172e-05,
+      "loss": 0.0152,
+      "step": 590
+    },
+    {
+      "epoch": 0.6912442396313364,
+      "grad_norm": 0.20372073352336884,
+      "learning_rate": 3.788607987366069e-05,
+      "loss": 0.0159,
+      "step": 600
+    },
+    {
+      "epoch": 0.7027649769585254,
+      "grad_norm": 0.1933118999004364,
+      "learning_rate": 3.628878783378302e-05,
+      "loss": 0.0157,
+      "step": 610
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.19851423799991608,
+      "learning_rate": 3.470648880310313e-05,
+      "loss": 0.0145,
+      "step": 620
+    },
+    {
+      "epoch": 0.7258064516129032,
+      "grad_norm": 0.16878198087215424,
+      "learning_rate": 3.3140913002379995e-05,
+      "loss": 0.0157,
+      "step": 630
+    },
+    {
+      "epoch": 0.7373271889400922,
+      "grad_norm": 0.21986301243305206,
+      "learning_rate": 3.1593772365766105e-05,
+      "loss": 0.0162,
+      "step": 640
+    },
+    {
+      "epoch": 0.7488479262672811,
+      "grad_norm": 0.13134035468101501,
+      "learning_rate": 3.006675866883275e-05,
+      "loss": 0.0157,
+      "step": 650
+    },
+    {
+      "epoch": 0.7603686635944701,
+      "grad_norm": 0.1785222440958023,
+      "learning_rate": 2.8561541678638142e-05,
+      "loss": 0.0147,
+      "step": 660
+    },
+    {
+      "epoch": 0.771889400921659,
+      "grad_norm": 0.16605904698371887,
+      "learning_rate": 2.707976732786166e-05,
+      "loss": 0.0133,
+      "step": 670
+    },
+    {
+      "epoch": 0.783410138248848,
+      "grad_norm": 0.18675336241722107,
+      "learning_rate": 2.562305591500069e-05,
+      "loss": 0.0145,
+      "step": 680
+    },
+    {
+      "epoch": 0.7949308755760369,
+      "grad_norm": 0.18020185828208923,
+      "learning_rate": 2.419300033259798e-05,
+      "loss": 0.0151,
+      "step": 690
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 0.1856432557106018,
+      "learning_rate": 2.279116432543705e-05,
+      "loss": 0.0137,
+      "step": 700
+    },
+    {
+      "epoch": 0.8179723502304147,
+      "grad_norm": 0.1251407414674759,
+      "learning_rate": 2.1419080780610123e-05,
+      "loss": 0.0126,
+      "step": 710
+    },
+    {
+      "epoch": 0.8294930875576036,
+      "grad_norm": 0.20187409222126007,
+      "learning_rate": 2.0078250051328784e-05,
+      "loss": 0.0139,
+      "step": 720
+    },
+    {
+      "epoch": 0.8410138248847926,
+      "grad_norm": 0.16873343288898468,
+      "learning_rate": 1.877013831630961e-05,
+      "loss": 0.0142,
+      "step": 730
+    },
+    {
+      "epoch": 0.8525345622119815,
+      "grad_norm": 0.11382901668548584,
+      "learning_rate": 1.749617597652934e-05,
+      "loss": 0.0129,
+      "step": 740
+    },
+    {
+      "epoch": 0.8640552995391705,
+      "grad_norm": 0.1710187941789627,
+      "learning_rate": 1.62577560911024e-05,
+      "loss": 0.0152,
+      "step": 750
+    },
+    {
+      "epoch": 0.8755760368663594,
+      "grad_norm": 0.12709008157253265,
+      "learning_rate": 1.5056232853991209e-05,
+      "loss": 0.0139,
+      "step": 760
+    },
+    {
+      "epoch": 0.8870967741935484,
+      "grad_norm": 0.15011294186115265,
+      "learning_rate": 1.389292011321498e-05,
+      "loss": 0.0136,
+      "step": 770
+    },
+    {
+      "epoch": 0.8986175115207373,
+      "grad_norm": 0.17724603414535522,
+      "learning_rate": 1.2769089934176126e-05,
+      "loss": 0.0125,
+      "step": 780
+    },
+    {
+      "epoch": 0.9101382488479263,
+      "grad_norm": 0.16647891700267792,
+      "learning_rate": 1.1685971208675539e-05,
+      "loss": 0.0129,
+      "step": 790
+    },
+    {
+      "epoch": 0.9216589861751152,
+      "grad_norm": 0.1402997523546219,
+      "learning_rate": 1.0644748311137376e-05,
+      "loss": 0.0123,
+      "step": 800
+    },
+    {
+      "epoch": 0.9331797235023042,
+      "grad_norm": 0.11741903424263,
+      "learning_rate": 9.646559803512994e-06,
+      "loss": 0.0119,
+      "step": 810
+    },
+    {
+      "epoch": 0.9447004608294931,
+      "grad_norm": 0.14751701056957245,
+      "learning_rate": 8.692497190280224e-06,
+      "loss": 0.0133,
+      "step": 820
+    },
+    {
+      "epoch": 0.956221198156682,
+      "grad_norm": 0.14610819518566132,
+      "learning_rate": 7.783603724899257e-06,
+      "loss": 0.0128,
+      "step": 830
+    },
+    {
+      "epoch": 0.967741935483871,
+      "grad_norm": 0.1284884661436081,
+      "learning_rate": 6.92087326903022e-06,
+      "loss": 0.0116,
+      "step": 840
+    },
+    {
+      "epoch": 0.9792626728110599,
+      "grad_norm": 0.14618724584579468,
+      "learning_rate": 6.1052492057601275e-06,
+      "loss": 0.0137,
+      "step": 850
+    },
+    {
+      "epoch": 0.9907834101382489,
+      "grad_norm": 0.14276568591594696,
+      "learning_rate": 5.337623408027293e-06,
+      "loss": 0.0136,
+      "step": 860
+    },
+    {
+      "epoch": 1.0023041474654377,
+      "grad_norm": 0.10031285136938095,
+      "learning_rate": 4.618835263371396e-06,
+      "loss": 0.0119,
+      "step": 870
+    },
+    {
+      "epoch": 1.0138248847926268,
+      "grad_norm": 0.10763294994831085,
+      "learning_rate": 3.949670756075447e-06,
+      "loss": 0.0111,
+      "step": 880
+    },
+    {
+      "epoch": 1.0253456221198156,
+      "grad_norm": 0.1076810285449028,
+      "learning_rate": 3.3308616077036115e-06,
+      "loss": 0.0109,
+      "step": 890
+    },
+    {
+      "epoch": 1.0368663594470047,
+      "grad_norm": 0.13353325426578522,
+      "learning_rate": 2.7630844769743757e-06,
+      "loss": 0.011,
+      "step": 900
+    },
+    {
+      "epoch": 1.0483870967741935,
+      "grad_norm": 0.10967040807008743,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 0.0117,
+      "step": 910
+    },
+    {
+      "epoch": 1.0599078341013826,
+      "grad_norm": 0.16759690642356873,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 0.0118,
+      "step": 920
+    },
+    {
+      "epoch": 1.0714285714285714,
+      "grad_norm": 0.15041400492191315,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 0.0121,
+      "step": 930
+    },
+    {
+      "epoch": 1.0829493087557605,
+      "grad_norm": 0.11416321247816086,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 0.0127,
+      "step": 940
+    },
+    {
+      "epoch": 1.0944700460829493,
+      "grad_norm": 0.12047427892684937,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 0.0117,
+      "step": 950
+    },
+    {
+      "epoch": 1.1059907834101383,
+      "grad_norm": 0.13395977020263672,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 0.0108,
+      "step": 960
+    },
+    {
+      "epoch": 1.1175115207373272,
+      "grad_norm": 0.08437898010015488,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 0.0123,
+      "step": 970
+    },
+    {
+      "epoch": 1.129032258064516,
+      "grad_norm": 0.07629328966140747,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 0.0111,
+      "step": 980
+    },
+    {
+      "epoch": 1.140552995391705,
+      "grad_norm": 0.10458842664957047,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 0.0128,
+      "step": 990
+    },
+    {
+      "epoch": 1.1520737327188941,
+      "grad_norm": 0.11208489537239075,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 0.0123,
+      "step": 1000
+    },
+    {
+      "epoch": 1.1520737327188941,
+      "step": 1000,
+      "total_flos": 0.0,
+      "train_loss": 0.03519474593549967,
+      "train_runtime": 1870.4299,
+      "train_samples_per_second": 53.464,
+      "train_steps_per_second": 0.535
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 100,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e0ae2c3a8057ef71b3d22f0c8dd728c42aa8b67ed4242ad7cc20cd753eb2a04
+size 5304