VedaantJain commited on
Commit
2c89754
·
verified ·
1 Parent(s): 87a2382

Model save

Browse files
README.md CHANGED
@@ -4,7 +4,6 @@ library_name: transformers
4
  model_name: llama_check_tuned
5
  tags:
6
  - generated_from_trainer
7
- - alignment-handbook
8
  - trl
9
  - sft
10
  licence: license
 
4
  model_name: llama_check_tuned
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 2.0,
3
  "eval_loss": 1.131566047668457,
4
  "eval_runtime": 0.1689,
5
  "eval_samples": 10,
6
  "eval_samples_per_second": 59.209,
7
  "eval_steps_per_second": 11.842,
8
- "total_flos": 239345474732032.0,
9
- "train_loss": 1.1170939207077026,
10
- "train_runtime": 1.5876,
11
  "train_samples": 10,
12
- "train_samples_per_second": 12.597,
13
- "train_steps_per_second": 1.26
14
  }
 
1
  {
2
+ "epoch": 8.0,
3
  "eval_loss": 1.131566047668457,
4
  "eval_runtime": 0.1689,
5
  "eval_samples": 10,
6
  "eval_samples_per_second": 59.209,
7
  "eval_steps_per_second": 11.842,
8
+ "total_flos": 981465273729024.0,
9
+ "train_loss": 1.1090886294841766,
10
+ "train_runtime": 6.846,
11
  "train_samples": 10,
12
+ "train_samples_per_second": 11.686,
13
+ "train_steps_per_second": 1.169
14
  }
config.json CHANGED
@@ -36,6 +36,6 @@
36
  "tie_word_embeddings": true,
37
  "torch_dtype": "bfloat16",
38
  "transformers_version": "4.46.3",
39
- "use_cache": true,
40
  "vocab_size": 128256
41
  }
 
36
  "tie_word_embeddings": true,
37
  "torch_dtype": "bfloat16",
38
  "transformers_version": "4.46.3",
39
+ "use_cache": false,
40
  "vocab_size": 128256
41
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13cbd6d16e927a0c5bad54102514e6e18b4a47b3a6eb911e39d678d328d19f55
3
  size 4965799096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f407073dc27d61cb5af438c45bbd0344ad11c8a240c4d87938d053193ed3846
3
  size 4965799096
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f66ee63b3de299eb9fd89dfbc23a39095197874fd6891f7d18de34c50e4e1a7
3
  size 2247734992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e29f2c22d9ad84bbf25fafb8f5a132bbdab172fbfd9be8639621b96a0a470a0
3
  size 2247734992
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0,
3
- "total_flos": 239345474732032.0,
4
- "train_loss": 1.1170939207077026,
5
- "train_runtime": 1.5876,
6
  "train_samples": 10,
7
- "train_samples_per_second": 12.597,
8
- "train_steps_per_second": 1.26
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 981465273729024.0,
4
+ "train_loss": 1.1090886294841766,
5
+ "train_runtime": 6.846,
6
  "train_samples": 10,
7
+ "train_samples_per_second": 11.686,
8
+ "train_steps_per_second": 1.169
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17,39 +17,128 @@
17
  {
18
  "epoch": 1.0,
19
  "eval_loss": 1.131566047668457,
20
- "eval_runtime": 0.1556,
21
- "eval_samples_per_second": 64.262,
22
- "eval_steps_per_second": 12.852,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 2.0,
27
- "learning_rate": 0.0,
28
  "loss": 1.1065,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 2.0,
33
  "eval_loss": 1.131566047668457,
34
- "eval_runtime": 0.1559,
35
- "eval_samples_per_second": 64.123,
36
- "eval_steps_per_second": 12.825,
37
  "step": 2
38
  },
39
  {
40
- "epoch": 2.0,
41
- "step": 2,
42
- "total_flos": 239345474732032.0,
43
- "train_loss": 1.1170939207077026,
44
- "train_runtime": 1.5876,
45
- "train_samples_per_second": 12.597,
46
- "train_steps_per_second": 1.26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
  ],
49
  "logging_steps": 1,
50
- "max_steps": 2,
51
  "num_input_tokens_seen": 0,
52
- "num_train_epochs": 2,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
@@ -63,7 +152,7 @@
63
  "attributes": {}
64
  }
65
  },
66
- "total_flos": 239345474732032.0,
67
  "train_batch_size": 4,
68
  "trial_name": null,
69
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17
  {
18
  "epoch": 1.0,
19
  "eval_loss": 1.131566047668457,
20
+ "eval_runtime": 0.1572,
21
+ "eval_samples_per_second": 63.617,
22
+ "eval_steps_per_second": 12.723,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "learning_rate": 1.9009688679024189e-06,
28
  "loss": 1.1065,
29
  "step": 2
30
  },
31
  {
32
  "epoch": 2.0,
33
  "eval_loss": 1.131566047668457,
34
+ "eval_runtime": 0.155,
35
+ "eval_samples_per_second": 64.522,
36
+ "eval_steps_per_second": 12.904,
37
  "step": 2
38
  },
39
  {
40
+ "epoch": 3.0,
41
+ "learning_rate": 1.6234898018587336e-06,
42
+ "loss": 1.1262,
43
+ "step": 3
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "eval_loss": 1.131566047668457,
48
+ "eval_runtime": 0.1549,
49
+ "eval_samples_per_second": 64.547,
50
+ "eval_steps_per_second": 12.909,
51
+ "step": 3
52
+ },
53
+ {
54
+ "epoch": 4.0,
55
+ "grad_norm": 9.71204948425293,
56
+ "learning_rate": 1.2225209339563143e-06,
57
+ "loss": 1.122,
58
+ "step": 4
59
+ },
60
+ {
61
+ "epoch": 4.0,
62
+ "eval_loss": 1.1228824853897095,
63
+ "eval_runtime": 0.1564,
64
+ "eval_samples_per_second": 63.951,
65
+ "eval_steps_per_second": 12.79,
66
+ "step": 4
67
+ },
68
+ {
69
+ "epoch": 5.0,
70
+ "grad_norm": 9.71204948425293,
71
+ "learning_rate": 7.774790660436857e-07,
72
+ "loss": 1.1074,
73
+ "step": 5
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "eval_loss": 1.1228824853897095,
78
+ "eval_runtime": 0.1547,
79
+ "eval_samples_per_second": 64.658,
80
+ "eval_steps_per_second": 12.932,
81
+ "step": 5
82
+ },
83
+ {
84
+ "epoch": 6.0,
85
+ "grad_norm": 9.71204948425293,
86
+ "learning_rate": 3.765101981412665e-07,
87
+ "loss": 1.0914,
88
+ "step": 6
89
+ },
90
+ {
91
+ "epoch": 6.0,
92
+ "eval_loss": 1.1228824853897095,
93
+ "eval_runtime": 0.1548,
94
+ "eval_samples_per_second": 64.58,
95
+ "eval_steps_per_second": 12.916,
96
+ "step": 6
97
+ },
98
+ {
99
+ "epoch": 7.0,
100
+ "grad_norm": 9.71204948425293,
101
+ "learning_rate": 9.903113209758096e-08,
102
+ "loss": 1.0813,
103
+ "step": 7
104
+ },
105
+ {
106
+ "epoch": 7.0,
107
+ "eval_loss": 1.1228824853897095,
108
+ "eval_runtime": 0.1545,
109
+ "eval_samples_per_second": 64.709,
110
+ "eval_steps_per_second": 12.942,
111
+ "step": 7
112
+ },
113
+ {
114
+ "epoch": 8.0,
115
+ "grad_norm": 9.079455375671387,
116
+ "learning_rate": 0.0,
117
+ "loss": 1.1103,
118
+ "step": 8
119
+ },
120
+ {
121
+ "epoch": 8.0,
122
+ "eval_loss": 1.1230531930923462,
123
+ "eval_runtime": 0.1545,
124
+ "eval_samples_per_second": 64.744,
125
+ "eval_steps_per_second": 12.949,
126
+ "step": 8
127
+ },
128
+ {
129
+ "epoch": 8.0,
130
+ "step": 8,
131
+ "total_flos": 981465273729024.0,
132
+ "train_loss": 1.1090886294841766,
133
+ "train_runtime": 6.846,
134
+ "train_samples_per_second": 11.686,
135
+ "train_steps_per_second": 1.169
136
  }
137
  ],
138
  "logging_steps": 1,
139
+ "max_steps": 8,
140
  "num_input_tokens_seen": 0,
141
+ "num_train_epochs": 8,
142
  "save_steps": 500,
143
  "stateful_callbacks": {
144
  "TrainerControl": {
 
152
  "attributes": {}
153
  }
154
  },
155
+ "total_flos": 981465273729024.0,
156
  "train_batch_size": 4,
157
  "trial_name": null,
158
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bfdeb31251f018eed190305404c961d1de16f1d879606964b08caa9ae1fcd9a
3
  size 6968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033e0f2dc57707a8efb6a46923345fabb007786fea52c40f797b227fcda45a7a
3
  size 6968