JulienRPA commited on
Commit
e104eee
·
1 Parent(s): 480b56c

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.3106227575602255,
4
- "eval_loss": 2.317716598510742,
5
- "eval_runtime": 99.9088,
6
- "eval_samples": 17200,
7
- "eval_samples_per_second": 172.157,
8
- "eval_steps_per_second": 2.692,
9
- "perplexity": 10.152465663617022,
10
- "train_loss": 5.0763140617684925,
11
- "train_runtime": 1146.6501,
12
- "train_samples": 17053,
13
- "train_samples_per_second": 44.616,
14
- "train_steps_per_second": 0.699
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.731570151719994,
4
+ "eval_loss": 0.9372276663780212,
5
+ "eval_runtime": 264.4068,
6
+ "eval_samples": 49327,
7
+ "eval_samples_per_second": 186.557,
8
+ "eval_steps_per_second": 2.916,
9
+ "perplexity": 2.5528941244824197,
10
+ "train_loss": 2.219597038788229,
11
+ "train_runtime": 7937.3216,
12
+ "train_samples": 147965,
13
+ "train_samples_per_second": 55.925,
14
+ "train_steps_per_second": 0.874
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.3106227575602255,
4
- "eval_loss": 2.317716598510742,
5
- "eval_runtime": 99.9088,
6
- "eval_samples": 17200,
7
- "eval_samples_per_second": 172.157,
8
- "eval_steps_per_second": 2.692,
9
- "perplexity": 10.152465663617022
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.731570151719994,
4
+ "eval_loss": 0.9372276663780212,
5
+ "eval_runtime": 264.4068,
6
+ "eval_samples": 49327,
7
+ "eval_samples_per_second": 186.557,
8
+ "eval_steps_per_second": 2.916,
9
+ "perplexity": 2.5528941244824197
10
  }
runs/Apr27_08-13-09_98bd7713ce50/events.out.tfevents.1682591630.98bd7713ce50.11648.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a153e12410dcdb490ae4d0d5bd526efa96cce1b208f5196ecc6cd6c2a9f37488
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 5.0763140617684925,
4
- "train_runtime": 1146.6501,
5
- "train_samples": 17053,
6
- "train_samples_per_second": 44.616,
7
- "train_steps_per_second": 0.699
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 2.219597038788229,
4
+ "train_runtime": 7937.3216,
5
+ "train_samples": 147965,
6
+ "train_samples_per_second": 55.925,
7
+ "train_steps_per_second": 0.874
8
  }
trainer_state.json CHANGED
@@ -2,30 +2,102 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 801,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.87,
12
- "learning_rate": 1.8789013732833958e-05,
13
- "loss": 6.3233,
14
  "step": 500
15
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  {
17
  "epoch": 3.0,
18
- "step": 801,
19
- "total_flos": 2940962095055544.0,
20
- "train_loss": 5.0763140617684925,
21
- "train_runtime": 1146.6501,
22
- "train_samples_per_second": 44.616,
23
- "train_steps_per_second": 0.699
24
  }
25
  ],
26
- "max_steps": 801,
27
  "num_train_epochs": 3,
28
- "total_flos": 2940962095055544.0,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 6936,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.22,
12
+ "learning_rate": 4.639561707035756e-05,
13
+ "loss": 6.8529,
14
  "step": 500
15
  },
16
+ {
17
+ "epoch": 0.43,
18
+ "learning_rate": 4.2791234140715114e-05,
19
+ "loss": 3.542,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.65,
24
+ "learning_rate": 3.9186851211072664e-05,
25
+ "loss": 2.9635,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.87,
30
+ "learning_rate": 3.558246828143022e-05,
31
+ "loss": 2.5274,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 1.08,
36
+ "learning_rate": 3.1978085351787776e-05,
37
+ "loss": 2.2054,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 1.3,
42
+ "learning_rate": 2.8373702422145332e-05,
43
+ "loss": 1.9297,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 1.51,
48
+ "learning_rate": 2.4769319492502884e-05,
49
+ "loss": 1.7394,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 1.73,
54
+ "learning_rate": 2.116493656286044e-05,
55
+ "loss": 1.5874,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 1.95,
60
+ "learning_rate": 1.7560553633217993e-05,
61
+ "loss": 1.4676,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 2.16,
66
+ "learning_rate": 1.395617070357555e-05,
67
+ "loss": 1.3606,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 2.38,
72
+ "learning_rate": 1.0351787773933102e-05,
73
+ "loss": 1.2739,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 2.6,
78
+ "learning_rate": 6.747404844290659e-06,
79
+ "loss": 1.2049,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 2.81,
84
+ "learning_rate": 3.143021914648212e-06,
85
+ "loss": 1.1527,
86
+ "step": 6500
87
+ },
88
  {
89
  "epoch": 3.0,
90
+ "step": 6936,
91
+ "total_flos": 1.9168865726431344e+16,
92
+ "train_loss": 2.219597038788229,
93
+ "train_runtime": 7937.3216,
94
+ "train_samples_per_second": 55.925,
95
+ "train_steps_per_second": 0.874
96
  }
97
  ],
98
+ "max_steps": 6936,
99
  "num_train_epochs": 3,
100
+ "total_flos": 1.9168865726431344e+16,
101
  "trial_name": null,
102
  "trial_params": null
103
  }