ccore commited on
Commit
acb8489
·
verified ·
1 Parent(s): d15c69f

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:355b00ffa8e07d4346760e95eccf2eb530e9a1b84d8356dcbf28db9cd7dd5295
3
  size 500979600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6430e4a6208a8112bffb365eeaa287b4691166600fb665cd625816e4b96a8c
3
  size 500979600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c08f01c9be95dfaa9d09755bce204473729e762258d21a0f4b953c864ea7e68
3
  size 1002078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f72b21dc6b023992f07d421fba6e4e2b690747ea68d2145d3c66dabae86e00e
3
  size 1002078330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b00502da5feba7cd8100a8570af5fac37e5c8c267a539c58346d380c13e441c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574024fd0bef58b4ca8af0606c1be9cf07d7494e9e30913df874b1f6896f01c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c98fcf42f494fa75f135fb338a20acefff4d78f4f44abc68b10fde88458a02
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d4270c057ffbe7d94d3a4a46e15a61be9cfe5f3368f1f48731c220c26ede867
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,92 +1,20 @@
1
  {
2
- "best_metric": 0.6704908013343811,
3
- "best_model_checkpoint": "./opt_trained3/checkpoint-140",
4
- "epoch": 9.353982300884956,
5
  "eval_steps": 500,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.7184422016143799,
14
- "eval_runtime": 5.9548,
15
- "eval_samples_per_second": 50.38,
16
- "eval_steps_per_second": 12.595,
17
  "step": 15
18
- },
19
- {
20
- "epoch": 2.0,
21
- "eval_loss": 0.7095981240272522,
22
- "eval_runtime": 5.9501,
23
- "eval_samples_per_second": 50.419,
24
- "eval_steps_per_second": 12.605,
25
- "step": 30
26
- },
27
- {
28
- "epoch": 3.0,
29
- "eval_loss": 0.6935510039329529,
30
- "eval_runtime": 5.9572,
31
- "eval_samples_per_second": 50.359,
32
- "eval_steps_per_second": 12.59,
33
- "step": 45
34
- },
35
- {
36
- "epoch": 4.0,
37
- "eval_loss": 0.6844949126243591,
38
- "eval_runtime": 5.9603,
39
- "eval_samples_per_second": 50.333,
40
- "eval_steps_per_second": 12.583,
41
- "step": 60
42
- },
43
- {
44
- "epoch": 5.0,
45
- "eval_loss": 0.6744114756584167,
46
- "eval_runtime": 5.9487,
47
- "eval_samples_per_second": 50.432,
48
- "eval_steps_per_second": 12.608,
49
- "step": 75
50
- },
51
- {
52
- "epoch": 6.0,
53
- "eval_loss": 0.6723850965499878,
54
- "eval_runtime": 5.9492,
55
- "eval_samples_per_second": 50.427,
56
- "eval_steps_per_second": 12.607,
57
- "step": 90
58
- },
59
- {
60
- "epoch": 7.0,
61
- "eval_loss": 0.6734561920166016,
62
- "eval_runtime": 5.9536,
63
- "eval_samples_per_second": 50.39,
64
- "eval_steps_per_second": 12.598,
65
- "step": 105
66
- },
67
- {
68
- "epoch": 8.0,
69
- "eval_loss": 0.6707628965377808,
70
- "eval_runtime": 5.9562,
71
- "eval_samples_per_second": 50.368,
72
- "eval_steps_per_second": 12.592,
73
- "step": 120
74
- },
75
- {
76
- "epoch": 9.0,
77
- "eval_loss": 0.670530378818512,
78
- "eval_runtime": 5.9504,
79
- "eval_samples_per_second": 50.417,
80
- "eval_steps_per_second": 12.604,
81
- "step": 135
82
- },
83
- {
84
- "epoch": 9.353982300884956,
85
- "eval_loss": 0.6704908013343811,
86
- "eval_runtime": 5.9917,
87
- "eval_samples_per_second": 50.069,
88
- "eval_steps_per_second": 12.517,
89
- "step": 140
90
  }
91
  ],
92
  "logging_steps": 500,
@@ -101,12 +29,12 @@
101
  "should_evaluate": false,
102
  "should_log": false,
103
  "should_save": true,
104
- "should_training_stop": true
105
  },
106
  "attributes": {}
107
  }
108
  },
109
- "total_flos": 5207869157760000.0,
110
  "train_batch_size": 24,
111
  "trial_name": null,
112
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6533264517784119,
3
+ "best_model_checkpoint": "./opt_trained3/checkpoint-15",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.6533264517784119,
14
+ "eval_runtime": 5.9687,
15
+ "eval_samples_per_second": 50.262,
16
+ "eval_steps_per_second": 12.565,
17
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
  "logging_steps": 500,
 
29
  "should_evaluate": false,
30
  "should_log": false,
31
  "should_save": true,
32
+ "should_training_stop": false
33
  },
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 556605613440000.0,
38
  "train_batch_size": 24,
39
  "trial_name": null,
40
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce5b37466798b22cd1c58fa164a2deb26fd86f95d9733731d6588e8abd389d38
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20903957dfffb8df7a5a23888baadcd68fb6b87201bf4873082dc166ef1b5bc9
3
  size 5368