fpadovani commited on
Commit
3de072b
·
verified ·
1 Parent(s): 77bc344

Training in progress, step 2460, checkpoint

Browse files
checkpoint-2460/trainer_state.json CHANGED
@@ -9,36 +9,71 @@
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
  "eval_loss": 3.9908180236816406,
15
- "eval_runtime": 66.5009,
16
- "eval_samples_per_second": 504.745,
17
- "eval_steps_per_second": 1.985,
18
  "step": 492
19
  },
 
 
 
 
 
 
 
20
  {
21
  "epoch": 2.0,
22
  "eval_loss": 3.653867483139038,
23
- "eval_runtime": 66.5302,
24
- "eval_samples_per_second": 504.523,
25
- "eval_steps_per_second": 1.984,
26
  "step": 984
27
  },
 
 
 
 
 
 
 
28
  {
29
  "epoch": 3.0,
30
  "eval_loss": 3.5320048332214355,
31
- "eval_runtime": 66.5567,
32
- "eval_samples_per_second": 504.322,
33
- "eval_steps_per_second": 1.983,
34
  "step": 1476
35
  },
 
 
 
 
 
 
 
36
  {
37
  "epoch": 4.0,
38
  "eval_loss": 3.473982334136963,
39
- "eval_runtime": 66.5024,
40
- "eval_samples_per_second": 504.734,
41
- "eval_steps_per_second": 1.985,
42
  "step": 1968
43
  }
44
  ],
 
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.0020325203252032522,
14
+ "grad_norm": 28.81853675842285,
15
+ "learning_rate": 0.0,
16
+ "loss": 10.3353,
17
+ "step": 1
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "grad_norm": 1.4544554948806763,
22
+ "learning_rate": 9.82e-05,
23
+ "loss": 4.8617,
24
+ "step": 492
25
+ },
26
  {
27
  "epoch": 1.0,
28
  "eval_loss": 3.9908180236816406,
29
+ "eval_runtime": 66.4665,
30
+ "eval_samples_per_second": 505.006,
31
+ "eval_steps_per_second": 1.986,
32
  "step": 492
33
  },
34
+ {
35
+ "epoch": 2.0,
36
+ "grad_norm": 1.4267009496688843,
37
+ "learning_rate": 7.529411764705883e-05,
38
+ "loss": 3.5249,
39
+ "step": 984
40
+ },
41
  {
42
  "epoch": 2.0,
43
  "eval_loss": 3.653867483139038,
44
+ "eval_runtime": 66.5662,
45
+ "eval_samples_per_second": 504.25,
46
+ "eval_steps_per_second": 1.983,
47
  "step": 984
48
  },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.5028904676437378,
52
+ "learning_rate": 5.0127877237851665e-05,
53
+ "loss": 3.257,
54
+ "step": 1476
55
+ },
56
  {
57
  "epoch": 3.0,
58
  "eval_loss": 3.5320048332214355,
59
+ "eval_runtime": 66.4925,
60
+ "eval_samples_per_second": 504.809,
61
+ "eval_steps_per_second": 1.985,
62
  "step": 1476
63
  },
64
+ {
65
+ "epoch": 4.0,
66
+ "grad_norm": 1.6736714839935303,
67
+ "learning_rate": 2.4961636828644502e-05,
68
+ "loss": 3.1208,
69
+ "step": 1968
70
+ },
71
  {
72
  "epoch": 4.0,
73
  "eval_loss": 3.473982334136963,
74
+ "eval_runtime": 66.5329,
75
+ "eval_samples_per_second": 504.503,
76
+ "eval_steps_per_second": 1.984,
77
  "step": 1968
78
  }
79
  ],
checkpoint-2460/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7be473c73c39056482d0a43c66eaed18208f6dc8449be0dc16aabad059cd82cc
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3290c0b51af746ef3fa5659adc5fa025f21c3647c818c43be3ade238b5ea6a1e
3
  size 5969