JulienRPA commited on
Commit
68cc690
·
1 Parent(s): df000e7

End of training

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9477994163539991,
4
- "eval_loss": 0.3440474271774292,
5
- "eval_runtime": 142.2069,
6
- "eval_samples": 49327,
7
- "eval_samples_per_second": 346.868,
8
- "eval_steps_per_second": 5.422,
9
- "perplexity": 1.410645536857361,
10
- "train_loss": 0.0,
11
- "train_runtime": 0.072,
12
- "train_samples": 147965,
13
- "train_samples_per_second": 6164165.044,
14
- "train_steps_per_second": 96317.032
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.3106227575602255,
4
+ "eval_loss": 2.317716598510742,
5
+ "eval_runtime": 99.9088,
6
+ "eval_samples": 17200,
7
+ "eval_samples_per_second": 172.157,
8
+ "eval_steps_per_second": 2.692,
9
+ "perplexity": 10.152465663617022,
10
+ "train_loss": 5.0763140617684925,
11
+ "train_runtime": 1146.6501,
12
+ "train_samples": 17053,
13
+ "train_samples_per_second": 44.616,
14
+ "train_steps_per_second": 0.699
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9477994163539991,
4
- "eval_loss": 0.3440474271774292,
5
- "eval_runtime": 142.2069,
6
- "eval_samples": 49327,
7
- "eval_samples_per_second": 346.868,
8
- "eval_steps_per_second": 5.422,
9
- "perplexity": 1.410645536857361
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.3106227575602255,
4
+ "eval_loss": 2.317716598510742,
5
+ "eval_runtime": 99.9088,
6
+ "eval_samples": 17200,
7
+ "eval_samples_per_second": 172.157,
8
+ "eval_steps_per_second": 2.692,
9
+ "perplexity": 10.152465663617022
10
  }
runs/Apr27_07-33-56_98bd7713ce50/events.out.tfevents.1682582323.98bd7713ce50.1129.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c7cdfebb3c52a0ba07f2751462b7383e16cf74a7821f139e80a88c1f84b12bd
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.0,
4
- "train_runtime": 0.072,
5
- "train_samples": 147965,
6
- "train_samples_per_second": 6164165.044,
7
- "train_steps_per_second": 96317.032
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 5.0763140617684925,
4
+ "train_runtime": 1146.6501,
5
+ "train_samples": 17053,
6
+ "train_samples_per_second": 44.616,
7
+ "train_steps_per_second": 0.699
8
  }
trainer_state.json CHANGED
@@ -2,111 +2,30 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 6936,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.22,
12
- "learning_rate": 4.639561707035756e-05,
13
- "loss": 5.1876,
14
  "step": 500
15
  },
16
- {
17
- "epoch": 0.43,
18
- "learning_rate": 4.2791234140715114e-05,
19
- "loss": 1.9693,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.65,
24
- "learning_rate": 3.9186851211072664e-05,
25
- "loss": 1.007,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.87,
30
- "learning_rate": 3.558246828143022e-05,
31
- "loss": 0.6693,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 1.08,
36
- "learning_rate": 3.1978085351787776e-05,
37
- "loss": 0.5638,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 1.3,
42
- "learning_rate": 2.8373702422145332e-05,
43
- "loss": 0.5036,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 1.51,
48
- "learning_rate": 2.4769319492502884e-05,
49
- "loss": 0.4634,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 1.73,
54
- "learning_rate": 2.116493656286044e-05,
55
- "loss": 0.436,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 1.95,
60
- "learning_rate": 1.7560553633217993e-05,
61
- "loss": 0.4033,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 2.16,
66
- "learning_rate": 1.395617070357555e-05,
67
- "loss": 0.3911,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 2.38,
72
- "learning_rate": 1.0351787773933102e-05,
73
- "loss": 0.3743,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 2.6,
78
- "learning_rate": 6.747404844290659e-06,
79
- "loss": 0.3591,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 2.81,
84
- "learning_rate": 3.143021914648212e-06,
85
- "loss": 0.3378,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 3.0,
90
- "step": 6936,
91
- "total_flos": 1.057619951996595e+16,
92
- "train_loss": 0.9339470032864651,
93
- "train_runtime": 4801.0064,
94
- "train_samples_per_second": 92.459,
95
- "train_steps_per_second": 1.445
96
- },
97
  {
98
  "epoch": 3.0,
99
- "step": 6936,
100
- "total_flos": 1.057619951996595e+16,
101
- "train_loss": 0.0,
102
- "train_runtime": 0.072,
103
- "train_samples_per_second": 6164165.044,
104
- "train_steps_per_second": 96317.032
105
  }
106
  ],
107
- "max_steps": 6936,
108
  "num_train_epochs": 3,
109
- "total_flos": 1.057619951996595e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 801,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.87,
12
+ "learning_rate": 1.8789013732833958e-05,
13
+ "loss": 6.3233,
14
  "step": 500
15
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  {
17
  "epoch": 3.0,
18
+ "step": 801,
19
+ "total_flos": 2940962095055544.0,
20
+ "train_loss": 5.0763140617684925,
21
+ "train_runtime": 1146.6501,
22
+ "train_samples_per_second": 44.616,
23
+ "train_steps_per_second": 0.699
24
  }
25
  ],
26
+ "max_steps": 801,
27
  "num_train_epochs": 3,
28
+ "total_flos": 2940962095055544.0,
29
  "trial_name": null,
30
  "trial_params": null
31
  }