CharlesLi commited on
Commit
bbffc47
·
verified ·
1 Parent(s): b39b2ba

Model save

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. all_results.json +3 -3
  3. train_results.json +3 -3
  4. trainer_state.json +9 -9
README.md CHANGED
@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.8973
24
 
25
  ## Model description
26
 
@@ -55,7 +55,7 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 1.8904 | 0.8 | 2 | 0.8973 |
59
 
60
 
61
  ### Framework versions
 
20
 
21
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.8974
24
 
25
  ## Model description
26
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.8904 | 0.8 | 2 | 0.8974 |
59
 
60
 
61
  ### Framework versions
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 0.8,
3
  "total_flos": 1406258997362688.0,
4
  "train_loss": 1.9418977499008179,
5
- "train_runtime": 371.4468,
6
  "train_samples": 100,
7
- "train_samples_per_second": 0.054,
8
- "train_steps_per_second": 0.005
9
  }
 
2
  "epoch": 0.8,
3
  "total_flos": 1406258997362688.0,
4
  "train_loss": 1.9418977499008179,
5
+ "train_runtime": 22.4337,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 0.892,
8
+ "train_steps_per_second": 0.089
9
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 0.8,
3
  "total_flos": 1406258997362688.0,
4
  "train_loss": 1.9418977499008179,
5
- "train_runtime": 371.4468,
6
  "train_samples": 100,
7
- "train_samples_per_second": 0.054,
8
- "train_steps_per_second": 0.005
9
  }
 
2
  "epoch": 0.8,
3
  "total_flos": 1406258997362688.0,
4
  "train_loss": 1.9418977499008179,
5
+ "train_runtime": 22.4337,
6
  "train_samples": 100,
7
+ "train_samples_per_second": 0.892,
8
+ "train_steps_per_second": 0.089
9
  }
trainer_state.json CHANGED
@@ -10,17 +10,17 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.4,
13
- "grad_norm": 1.2140933275222778,
14
  "learning_rate": 0.0002,
15
  "loss": 1.8904,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
- "eval_loss": 0.8972685933113098,
21
- "eval_runtime": 0.9262,
22
- "eval_samples_per_second": 3.239,
23
- "eval_steps_per_second": 1.08,
24
  "step": 2
25
  },
26
  {
@@ -28,16 +28,16 @@
28
  "step": 2,
29
  "total_flos": 1406258997362688.0,
30
  "train_loss": 1.9418977499008179,
31
- "train_runtime": 371.4468,
32
- "train_samples_per_second": 0.054,
33
- "train_steps_per_second": 0.005
34
  }
35
  ],
36
  "logging_steps": 5,
37
  "max_steps": 2,
38
  "num_input_tokens_seen": 0,
39
  "num_train_epochs": 1,
40
- "save_steps": 100,
41
  "stateful_callbacks": {
42
  "TrainerControl": {
43
  "args": {
 
10
  "log_history": [
11
  {
12
  "epoch": 0.4,
13
+ "grad_norm": 1.2139586210250854,
14
  "learning_rate": 0.0002,
15
  "loss": 1.8904,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
+ "eval_loss": 0.8973897099494934,
21
+ "eval_runtime": 0.919,
22
+ "eval_samples_per_second": 3.264,
23
+ "eval_steps_per_second": 1.088,
24
  "step": 2
25
  },
26
  {
 
28
  "step": 2,
29
  "total_flos": 1406258997362688.0,
30
  "train_loss": 1.9418977499008179,
31
+ "train_runtime": 22.4337,
32
+ "train_samples_per_second": 0.892,
33
+ "train_steps_per_second": 0.089
34
  }
35
  ],
36
  "logging_steps": 5,
37
  "max_steps": 2,
38
  "num_input_tokens_seen": 0,
39
  "num_train_epochs": 1,
40
+ "save_steps": 1000,
41
  "stateful_callbacks": {
42
  "TrainerControl": {
43
  "args": {