peterbeamish commited on
Commit
94febd1
·
1 Parent(s): 4c5e2ee

End of training

Browse files
Files changed (1) hide show
  1. trainer_state.json +63 -35
trainer_state.json CHANGED
@@ -1,64 +1,92 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "eval_steps": 8000,
6
- "global_step": 6048,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.5,
13
- "learning_rate": 0.0,
14
- "loss": 0.0,
15
- "step": 1000
 
 
 
 
 
 
 
16
  },
17
  {
18
- "epoch": 0.99,
19
- "learning_rate": 0.0,
20
  "loss": 0.0,
21
- "step": 2000
22
  },
23
  {
24
- "epoch": 1.49,
25
- "learning_rate": 0.0,
26
- "loss": 0.0,
27
- "step": 3000
 
 
 
 
 
 
 
28
  },
29
  {
30
- "epoch": 1.98,
31
- "learning_rate": 0.0,
32
- "loss": 0.0,
33
- "step": 4000
 
 
 
 
 
 
 
34
  },
35
  {
36
- "epoch": 2.48,
37
- "learning_rate": 0.0,
38
  "loss": 0.0,
39
- "step": 5000
40
  },
41
  {
42
- "epoch": 2.98,
43
- "learning_rate": 0.0,
44
- "loss": 0.0,
45
- "step": 6000
 
 
 
 
 
 
 
46
  },
47
  {
48
- "epoch": 3.0,
49
- "step": 6048,
50
- "total_flos": 2247973873385472.0,
51
  "train_loss": 0.0,
52
- "train_runtime": 538.1748,
53
- "train_samples_per_second": 22.47,
54
- "train_steps_per_second": 11.238
55
  }
56
  ],
57
- "logging_steps": 1000,
58
- "max_steps": 6048,
59
- "num_train_epochs": 3,
60
  "save_steps": 500,
61
- "total_flos": 2247973873385472.0,
62
  "trial_name": null,
63
  "trial_params": null
64
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1008,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_gen_len": 17.1436,
14
+ "eval_loss": NaN,
15
+ "eval_rouge1": 0.0259,
16
+ "eval_rouge2": 0.0047,
17
+ "eval_rougeL": 0.0256,
18
+ "eval_rougeLsum": 0.0255,
19
+ "eval_runtime": 105.9672,
20
+ "eval_samples_per_second": 38.05,
21
+ "eval_steps_per_second": 2.378,
22
+ "step": 252
23
  },
24
  {
25
+ "epoch": 1.98,
26
+ "learning_rate": 2e-05,
27
  "loss": 0.0,
28
+ "step": 500
29
  },
30
  {
31
+ "epoch": 2.0,
32
+ "eval_gen_len": 17.1436,
33
+ "eval_loss": NaN,
34
+ "eval_rouge1": 0.0259,
35
+ "eval_rouge2": 0.0047,
36
+ "eval_rougeL": 0.0256,
37
+ "eval_rougeLsum": 0.0255,
38
+ "eval_runtime": 106.6812,
39
+ "eval_samples_per_second": 37.795,
40
+ "eval_steps_per_second": 2.362,
41
+ "step": 504
42
  },
43
  {
44
+ "epoch": 3.0,
45
+ "eval_gen_len": 17.1436,
46
+ "eval_loss": NaN,
47
+ "eval_rouge1": 0.0259,
48
+ "eval_rouge2": 0.0047,
49
+ "eval_rougeL": 0.0256,
50
+ "eval_rougeLsum": 0.0255,
51
+ "eval_runtime": 108.1549,
52
+ "eval_samples_per_second": 37.28,
53
+ "eval_steps_per_second": 2.33,
54
+ "step": 756
55
  },
56
  {
57
+ "epoch": 3.97,
58
+ "learning_rate": 2e-05,
59
  "loss": 0.0,
60
+ "step": 1000
61
  },
62
  {
63
+ "epoch": 4.0,
64
+ "eval_gen_len": 17.1436,
65
+ "eval_loss": NaN,
66
+ "eval_rouge1": 0.0259,
67
+ "eval_rouge2": 0.0047,
68
+ "eval_rougeL": 0.0256,
69
+ "eval_rougeLsum": 0.0255,
70
+ "eval_runtime": 109.2088,
71
+ "eval_samples_per_second": 36.92,
72
+ "eval_steps_per_second": 2.308,
73
+ "step": 1008
74
  },
75
  {
76
+ "epoch": 4.0,
77
+ "step": 1008,
78
+ "total_flos": 2997298497847296.0,
79
  "train_loss": 0.0,
80
+ "train_runtime": 577.5801,
81
+ "train_samples_per_second": 27.916,
82
+ "train_steps_per_second": 1.745
83
  }
84
  ],
85
+ "logging_steps": 500,
86
+ "max_steps": 1008,
87
+ "num_train_epochs": 4,
88
  "save_steps": 500,
89
+ "total_flos": 2997298497847296.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }