besimray commited on
Commit
564a6e5
·
verified ·
1 Parent(s): 9754365

Training in progress, step 135, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:777125d8bd12de5f7ed18971ab031a8c25535a9628c33bf91a5fc02cd48f84a0
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f892944e1d6553c2988900130a3362ea080ce87049af159434348e43983a67f7
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a85d722f100b15fa6de8db1b7863c44b71b3fce19bc20b18ae46f8b628ed0a26
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f294a2b1a687df224adc5ac3e37eb23eab0bfe8458ff9f9b4712852a5997cf77
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab76824ef5f4a03a5fc43923056d7e1a2adea903a5e98b8bb7f651e3d75cd0f7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbd0b0d00d8a6ce2af47f7a318c5367a4519b639c67ff4d1f9441e0f3c04db1f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e182d30fc85938f253f4b0ba7702798b872e5ce41399e7a5462adca4c40ff6e4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3002a39ac6502366eefa64e828fe85e0b7d2b42f2ce52a223a7439ad2a05fd9b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.736842105263158,
5
  "eval_steps": 8,
6
- "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1053,6 +1053,41 @@
1053
  "learning_rate": 9.903113209758096e-06,
1054
  "loss": 1.0679,
1055
  "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1056
  }
1057
  ],
1058
  "logging_steps": 1,
@@ -1072,7 +1107,7 @@
1072
  "attributes": {}
1073
  }
1074
  },
1075
- "total_flos": 1.315875884630016e+16,
1076
  "train_batch_size": 10,
1077
  "trial_name": null,
1078
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8421052631578947,
5
  "eval_steps": 8,
6
+ "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1053
  "learning_rate": 9.903113209758096e-06,
1054
  "loss": 1.0679,
1055
  "step": 130
1056
+ },
1057
+ {
1058
+ "epoch": 2.7578947368421054,
1059
+ "grad_norm": 0.5930253267288208,
1060
+ "learning_rate": 8.952245334118414e-06,
1061
+ "loss": 0.8819,
1062
+ "step": 131
1063
+ },
1064
+ {
1065
+ "epoch": 2.7789473684210524,
1066
+ "grad_norm": 0.6247056126594543,
1067
+ "learning_rate": 8.047222744854943e-06,
1068
+ "loss": 0.991,
1069
+ "step": 132
1070
+ },
1071
+ {
1072
+ "epoch": 2.8,
1073
+ "grad_norm": 0.5282688736915588,
1074
+ "learning_rate": 7.1885011480961164e-06,
1075
+ "loss": 0.9508,
1076
+ "step": 133
1077
+ },
1078
+ {
1079
+ "epoch": 2.8210526315789473,
1080
+ "grad_norm": 0.4279923141002655,
1081
+ "learning_rate": 6.37651293602628e-06,
1082
+ "loss": 0.9463,
1083
+ "step": 134
1084
+ },
1085
+ {
1086
+ "epoch": 2.8421052631578947,
1087
+ "grad_norm": 0.4681239426136017,
1088
+ "learning_rate": 5.611666969163243e-06,
1089
+ "loss": 1.1093,
1090
+ "step": 135
1091
  }
1092
  ],
1093
  "logging_steps": 1,
 
1107
  "attributes": {}
1108
  }
1109
  },
1110
+ "total_flos": 1.366908129509376e+16,
1111
  "train_batch_size": 10,
1112
  "trial_name": null,
1113
  "trial_params": null