AnnyNguyen commited on
Commit
a7a847a
·
verified ·
1 Parent(s): 104d9cc

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +84 -20
trainer_state.json CHANGED
@@ -1,45 +1,109 @@
1
  {
2
- "best_global_step": 1846,
3
- "best_metric": 0.8862317158114698,
4
- "best_model_checkpoint": "outputs/bartpho-hsd/checkpoint-best/checkpoint-1846",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
- "global_step": 1846,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 7.859710216522217,
15
  "learning_rate": 4.998769072267837e-05,
16
- "loss": 0.3322,
17
  "step": 923
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.885767355467843,
22
- "eval_loss": 0.3262785077095032,
23
- "eval_runtime": 26.1724,
24
- "eval_samples_per_second": 164.563,
25
- "eval_steps_per_second": 5.158,
26
  "step": 923
27
  },
28
  {
29
  "epoch": 2.0,
30
- "grad_norm": 8.771722793579102,
31
  "learning_rate": 4.995072162589518e-05,
32
- "loss": 0.1918,
33
  "step": 1846
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.8862317158114698,
38
- "eval_loss": 0.4059225618839264,
39
- "eval_runtime": 26.1398,
40
- "eval_samples_per_second": 164.768,
41
- "eval_steps_per_second": 5.165,
42
  "step": 1846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  ],
45
  "logging_steps": 923,
@@ -68,7 +132,7 @@
68
  "attributes": {}
69
  }
70
  },
71
- "total_flos": 7767634318166016.0,
72
  "train_batch_size": 32,
73
  "trial_name": null,
74
  "trial_params": null
 
1
  {
2
+ "best_global_step": 5538,
3
+ "best_metric": 0.8634780589737636,
4
+ "best_model_checkpoint": "outputs/bartpho-hsd/checkpoint-best/checkpoint-5538",
5
+ "epoch": 6.0,
6
  "eval_steps": 500,
7
+ "global_step": 5538,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 2.5697989463806152,
15
  "learning_rate": 4.998769072267837e-05,
16
+ "loss": 0.5389,
17
  "step": 923
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.8428140236823776,
22
+ "eval_loss": 0.5179033875465393,
23
+ "eval_runtime": 60.9906,
24
+ "eval_samples_per_second": 70.617,
25
+ "eval_steps_per_second": 2.213,
26
  "step": 923
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "grad_norm": 3.148050308227539,
31
  "learning_rate": 4.995072162589518e-05,
32
+ "loss": 0.5208,
33
  "step": 1846
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.8428140236823776,
38
+ "eval_loss": 0.5180655121803284,
39
+ "eval_runtime": 55.4284,
40
+ "eval_samples_per_second": 77.704,
41
+ "eval_steps_per_second": 2.436,
42
  "step": 1846
43
+ },
44
+ {
45
+ "epoch": 3.0,
46
+ "grad_norm": 6.275445461273193,
47
+ "learning_rate": 4.988912917920435e-05,
48
+ "loss": 0.4785,
49
+ "step": 2769
50
+ },
51
+ {
52
+ "epoch": 3.0,
53
+ "eval_accuracy": 0.850243789180404,
54
+ "eval_loss": 0.48658695816993713,
55
+ "eval_runtime": 55.6378,
56
+ "eval_samples_per_second": 77.411,
57
+ "eval_steps_per_second": 2.426,
58
+ "step": 2769
59
+ },
60
+ {
61
+ "epoch": 4.0,
62
+ "grad_norm": 2.8800065517425537,
63
+ "learning_rate": 4.980297416691463e-05,
64
+ "loss": 0.4072,
65
+ "step": 3692
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "eval_accuracy": 0.8379382400742976,
70
+ "eval_loss": 0.45363402366638184,
71
+ "eval_runtime": 55.9448,
72
+ "eval_samples_per_second": 76.987,
73
+ "eval_steps_per_second": 2.413,
74
+ "step": 3692
75
+ },
76
+ {
77
+ "epoch": 5.0,
78
+ "grad_norm": 2.930692195892334,
79
+ "learning_rate": 4.969234161362153e-05,
80
+ "loss": 0.372,
81
+ "step": 4615
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "eval_accuracy": 0.8576735546784304,
86
+ "eval_loss": 0.4497613310813904,
87
+ "eval_runtime": 55.8766,
88
+ "eval_samples_per_second": 77.081,
89
+ "eval_steps_per_second": 2.416,
90
+ "step": 4615
91
+ },
92
+ {
93
+ "epoch": 6.0,
94
+ "grad_norm": 3.390305995941162,
95
+ "learning_rate": 4.9557340700298316e-05,
96
+ "loss": 0.345,
97
+ "step": 5538
98
+ },
99
+ {
100
+ "epoch": 6.0,
101
+ "eval_accuracy": 0.8634780589737636,
102
+ "eval_loss": 0.4286285638809204,
103
+ "eval_runtime": 55.8527,
104
+ "eval_samples_per_second": 77.114,
105
+ "eval_steps_per_second": 2.417,
106
+ "step": 5538
107
  }
108
  ],
109
  "logging_steps": 923,
 
132
  "attributes": {}
133
  }
134
  },
135
+ "total_flos": 9.625341380307149e+16,
136
  "train_batch_size": 32,
137
  "trial_name": null,
138
  "trial_params": null