KrafterDen commited on
Commit
607317f
·
verified ·
1 Parent(s): 698904a

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/README.md CHANGED
@@ -202,4 +202,5 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
202
  ### Framework versions
203
 
204
  - PEFT 0.9.0
205
- - PEFT 0.8.2
 
 
202
  ### Framework versions
203
 
204
  - PEFT 0.9.0
205
+ - PEFT 0.8.2
206
+ - PEFT 0.7.1
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:274d63560e28ff9071de84560968bf675cbef8e9fd1c92a42018142d4c84bf60
3
  size 9443384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb59f848c88a5b1b5cab3242d4c331dc775e603ff7609a07f42436d4de84a99b
3
  size 9443384
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6477c1f6b7f8c73eeb14d7c4314be2a685cfe187dcdaaa3ae5408f640288400b
3
  size 18914450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57188196c44f6f175fe4a57663b3d96a4c9aa25b1a71e5c17d4937f7a159ec03
3
  size 18914450
checkpoint-100/trainer_state.json CHANGED
@@ -10,79 +10,79 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.42,
13
- "grad_norm": 0.1815568506717682,
14
  "learning_rate": 2.9999999999999997e-05,
15
- "loss": 3.8657,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.84,
20
- "grad_norm": 0.23455914855003357,
21
  "learning_rate": 5.9999999999999995e-05,
22
- "loss": 3.8223,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.25,
27
- "grad_norm": 0.32260793447494507,
28
  "learning_rate": 8.999999999999999e-05,
29
- "loss": 3.7255,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.67,
34
- "grad_norm": 0.38705918192863464,
35
  "learning_rate": 0.00011999999999999999,
36
- "loss": 3.4952,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.09,
41
- "grad_norm": 0.5273059606552124,
42
  "learning_rate": 0.00015,
43
- "loss": 3.098,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.51,
48
- "grad_norm": 0.6030514240264893,
49
  "learning_rate": 0.00017999999999999998,
50
- "loss": 2.5299,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 2.92,
55
- "grad_norm": 0.459722101688385,
56
  "learning_rate": 0.00020999999999999998,
57
- "loss": 1.899,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.34,
62
- "grad_norm": 0.1655016839504242,
63
  "learning_rate": 0.00023999999999999998,
64
- "loss": 1.6018,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.76,
69
- "grad_norm": 0.10938003659248352,
70
  "learning_rate": 0.00027,
71
- "loss": 1.4726,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.18,
76
- "grad_norm": 0.09813433140516281,
77
  "learning_rate": 0.0003,
78
- "loss": 1.4336,
79
  "step": 100
80
  }
81
  ],
82
  "logging_steps": 10,
83
- "max_steps": 300,
84
  "num_input_tokens_seen": 0,
85
- "num_train_epochs": 14,
86
  "save_steps": 100,
87
  "total_flos": 1.863006384782131e+16,
88
  "train_batch_size": 4,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.42,
13
+ "grad_norm": 0.18964600563049316,
14
  "learning_rate": 2.9999999999999997e-05,
15
+ "loss": 3.866,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.84,
20
+ "grad_norm": 0.240617036819458,
21
  "learning_rate": 5.9999999999999995e-05,
22
+ "loss": 3.8202,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.25,
27
+ "grad_norm": 0.32437172532081604,
28
  "learning_rate": 8.999999999999999e-05,
29
+ "loss": 3.7224,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.67,
34
+ "grad_norm": 0.39009416103363037,
35
  "learning_rate": 0.00011999999999999999,
36
+ "loss": 3.4906,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.09,
41
+ "grad_norm": 0.5346343517303467,
42
  "learning_rate": 0.00015,
43
+ "loss": 3.0915,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.51,
48
+ "grad_norm": 0.6114345788955688,
49
  "learning_rate": 0.00017999999999999998,
50
+ "loss": 2.5238,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 2.92,
55
+ "grad_norm": 0.4761127531528473,
56
  "learning_rate": 0.00020999999999999998,
57
+ "loss": 1.8914,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.34,
62
+ "grad_norm": 0.16753199696540833,
63
  "learning_rate": 0.00023999999999999998,
64
+ "loss": 1.5992,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.76,
69
+ "grad_norm": 0.11089170724153519,
70
  "learning_rate": 0.00027,
71
+ "loss": 1.4703,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.18,
76
+ "grad_norm": 0.08356910198926926,
77
  "learning_rate": 0.0003,
78
+ "loss": 1.4324,
79
  "step": 100
80
  }
81
  ],
82
  "logging_steps": 10,
83
+ "max_steps": 250,
84
  "num_input_tokens_seen": 0,
85
+ "num_train_epochs": 11,
86
  "save_steps": 100,
87
  "total_flos": 1.863006384782131e+16,
88
  "train_batch_size": 4,
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e101655a2c30ed323838269a1b44a8d1f49e703e9180438d91b23c9fd7ee9379
3
  size 4960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37336d61cdddd86a38ff87d00b592ca42a595577f8ec34bb99a3ac69a0ed524
3
  size 4960