tokhey commited on
Commit
42df8e2
·
verified ·
1 Parent(s): 01dbcb2

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09db3585ef14eb3513d30889662c80d7633ff3cd63316b82ef586dfb3b8d7b3e
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4627a384ccfb90bc59a01e8a25eb50a60e5c9d9a7cbc4781a18e1ed2574854e
3
  size 73911112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e71be3572f6a22a56350f714efc69d9efa4b44853add45b77f9c1f30917185b6
3
  size 148053627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2150c51d63f89d1bc1d324172bc8d004bf72d5ec3681b8fa7f23855405bf6f
3
  size 148053627
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a8c328066465d046257df5f5a7cb6a0fb0a5264c4955cc6474ec70ef06d414
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ac64e05ebf46af43b314fca9573cea163c2749e9f4c0fcee8ac426f60f5872
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66992a317492aac8acf09f50982fcc03e33ec7b4ccd6cedceda25e040f1d8941
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254aa974d264413ca1b60d5980136d8531f6cb9e9b28340b9a2daa2496d48c4c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9953d7880628d69ca8ebcf3260c665a4b6406ffee3f2366f52cd98bb07b2bfff
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f784d5566457719ec24fd9e3c13871706202c23a784efd0979d8609621573572
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0533333333333332,
6
  "eval_steps": 100,
7
- "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -64,6 +64,62 @@
64
  "learning_rate": 0.0004184239109116393,
65
  "loss": 0.7014,
66
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 5,
@@ -83,7 +139,7 @@
83
  "attributes": {}
84
  }
85
  },
86
- "total_flos": 2579880952725504.0,
87
  "train_batch_size": 1,
88
  "trial_name": null,
89
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.1066666666666665,
6
  "eval_steps": 100,
7
+ "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
64
  "learning_rate": 0.0004184239109116393,
65
  "loss": 0.7014,
66
  "step": 40
67
+ },
68
+ {
69
+ "epoch": 1.1866666666666668,
70
+ "grad_norm": 0.13756456971168518,
71
+ "learning_rate": 0.00038809124324012647,
72
+ "loss": 0.6655,
73
+ "step": 45
74
+ },
75
+ {
76
+ "epoch": 1.32,
77
+ "grad_norm": 0.14693589508533478,
78
+ "learning_rate": 0.00035449008622169586,
79
+ "loss": 0.6315,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 1.4533333333333334,
84
+ "grad_norm": 0.16421127319335938,
85
+ "learning_rate": 0.0003184157475180208,
86
+ "loss": 0.6525,
87
+ "step": 55
88
+ },
89
+ {
90
+ "epoch": 1.5866666666666667,
91
+ "grad_norm": 0.1636400669813156,
92
+ "learning_rate": 0.00028072207266617854,
93
+ "loss": 0.6398,
94
+ "step": 60
95
+ },
96
+ {
97
+ "epoch": 1.72,
98
+ "grad_norm": 0.16314196586608887,
99
+ "learning_rate": 0.00024230123536095747,
100
+ "loss": 0.6836,
101
+ "step": 65
102
+ },
103
+ {
104
+ "epoch": 1.8533333333333335,
105
+ "grad_norm": 0.1881810426712036,
106
+ "learning_rate": 0.0002040626205458574,
107
+ "loss": 0.6227,
108
+ "step": 70
109
+ },
110
+ {
111
+ "epoch": 1.9866666666666668,
112
+ "grad_norm": 0.1938481628894806,
113
+ "learning_rate": 0.00016691130013008512,
114
+ "loss": 0.6343,
115
+ "step": 75
116
+ },
117
+ {
118
+ "epoch": 2.1066666666666665,
119
+ "grad_norm": 0.18422311544418335,
120
+ "learning_rate": 0.0001317266107909975,
121
+ "loss": 0.6193,
122
+ "step": 80
123
  }
124
  ],
125
  "logging_steps": 5,
 
139
  "attributes": {}
140
  }
141
  },
142
+ "total_flos": 5159761905451008.0,
143
  "train_batch_size": 1,
144
  "trial_name": null,
145
  "trial_params": null