ktcapraz commited on
Commit
8aef96b
·
verified ·
1 Parent(s): 690f90c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5736c925db7f325caa64064fa8c0c51ab5fa9a598482c0300d3b977751127823
3
  size 44457856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf6560d9731881fd473f9816efb8036955ffb3de008fe809f52d9df144791cf
3
  size 44457856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:728dfb33a4050dcc23ebfb9ad6464d56fd71ae703afa6f265db4f5f89f59faeb
3
  size 22782603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34d1617df6520a5eb5d62e3332b9b93cd6d5ec54588750f740f2ab514eead854
3
  size 22782603
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd0e9d505fbc3f97feb166d29026132bdf14eb3e5c7ff77beebc303ee666f96
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4393a84a3109995aa1202073b039b12062e3189ed89aa0b94ef0510ba843009
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1310cb41dd434edb9dd1c0baa2baac71e510fe550ece0401d9efffee54c9e6ee
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b17ac597f6c83922dfd244cdb7a2ea135e2862a7fff0eee3a1ac2b62bf89fd9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08,
6
  "eval_steps": 50,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -51,6 +51,49 @@
51
  "eval_samples_per_second": 0.872,
52
  "eval_steps_per_second": 0.218,
53
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
  ],
56
  "logging_steps": 10,
@@ -70,7 +113,7 @@
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 4455022195126272.0,
74
  "train_batch_size": 2,
75
  "trial_name": null,
76
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.16,
6
  "eval_steps": 50,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
51
  "eval_samples_per_second": 0.872,
52
  "eval_steps_per_second": 0.218,
53
  "step": 50
54
+ },
55
+ {
56
+ "epoch": 0.096,
57
+ "grad_norm": 0.27004000544548035,
58
+ "learning_rate": 0.00018258064516129033,
59
+ "loss": 0.2846,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 0.112,
64
+ "grad_norm": 0.24109533429145813,
65
+ "learning_rate": 0.00017935483870967742,
66
+ "loss": 0.2828,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 0.128,
71
+ "grad_norm": 0.2596932649612427,
72
+ "learning_rate": 0.0001761290322580645,
73
+ "loss": 0.277,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.144,
78
+ "grad_norm": 0.2961156964302063,
79
+ "learning_rate": 0.00017290322580645163,
80
+ "loss": 0.29,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 0.16,
85
+ "grad_norm": 0.3086676597595215,
86
+ "learning_rate": 0.00016967741935483872,
87
+ "loss": 0.2883,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 0.16,
92
+ "eval_loss": 0.28304216265678406,
93
+ "eval_runtime": 519.1154,
94
+ "eval_samples_per_second": 0.963,
95
+ "eval_steps_per_second": 0.241,
96
+ "step": 100
97
  }
98
  ],
99
  "logging_steps": 10,
 
113
  "attributes": {}
114
  }
115
  },
116
+ "total_flos": 8826978008930304.0,
117
  "train_batch_size": 2,
118
  "trial_name": null,
119
  "trial_params": null