simon-mellergaard commited on
Commit
3a69ec1
·
verified ·
1 Parent(s): 3dbce4d

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8f0c5a6bb485f27a2d2623282cf4635be2d472e4216c79e1992c40ecf6827ae
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d893b21e0d58af9c49b992482b58823b77e2465854de5572d3a1cd1f3927dc6
3
  size 598898116
run-2/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:207c30c8b747194e9a6d03e089dc13c984ce6dfb5686e9f365182fa6d3a9dcbf
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d893b21e0d58af9c49b992482b58823b77e2465854de5572d3a1cd1f3927dc6
3
  size 598898116
run-2/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f259294e4fff728135f977b74709f0e79c1c1d1bedce909dc9f6c7148485b590
3
  size 1197884026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eaf700b2792c8bb32ee0c45b4c5b8f38c85b461a608721efa29d46c040fca3e
3
  size 1197884026
run-2/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:036b590a6639189ef8931808f698edb538b6f4e0b64d7349f2ab831e4a003a21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:306c6af96669ed13365a61a72a74a28f181e5e20ceb2078a041489ebbb847646
3
  size 1064
run-2/checkpoint-500/trainer_state.json CHANGED
@@ -11,50 +11,50 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.7674193548387097,
15
- "eval_loss": 3.5887417793273926,
16
- "eval_runtime": 20.1099,
17
- "eval_samples_per_second": 154.153,
18
- "eval_steps_per_second": 1.641,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
- "grad_norm": 7.158421993255615,
24
- "learning_rate": 1.6424079065588502e-05,
25
- "loss": 5.9747,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.9238709677419354,
31
- "eval_loss": 1.755516767501831,
32
- "eval_runtime": 20.2577,
33
- "eval_samples_per_second": 153.028,
34
- "eval_steps_per_second": 1.629,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
- "grad_norm": 4.927733898162842,
40
- "learning_rate": 1.2830188679245283e-05,
41
- "loss": 1.9965,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
- "eval_accuracy": 0.947741935483871,
47
- "eval_loss": 1.2168198823928833,
48
- "eval_runtime": 20.1766,
49
- "eval_samples_per_second": 153.643,
50
- "eval_steps_per_second": 1.636,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
- "max_steps": 1113,
56
  "num_input_tokens_seen": 0,
57
- "num_train_epochs": 7,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
@@ -72,8 +72,8 @@
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.19053512605657696,
76
- "num_train_epochs": 7,
77
- "temperature": 8
78
  }
79
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.7964516129032259,
15
+ "eval_loss": 6.4024338722229,
16
+ "eval_runtime": 22.8904,
17
+ "eval_samples_per_second": 135.428,
18
+ "eval_steps_per_second": 1.442,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
+ "grad_norm": 20.56516456604004,
24
+ "learning_rate": 1.5828092243186584e-05,
25
+ "loss": 14.3499,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.9280645161290323,
31
+ "eval_loss": 2.3401365280151367,
32
+ "eval_runtime": 22.8836,
33
+ "eval_samples_per_second": 135.468,
34
+ "eval_steps_per_second": 1.442,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
+ "grad_norm": 9.793017387390137,
40
+ "learning_rate": 1.1635220125786164e-05,
41
+ "loss": 2.7392,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_accuracy": 0.954516129032258,
47
+ "eval_loss": 1.683219313621521,
48
+ "eval_runtime": 22.7915,
49
+ "eval_samples_per_second": 136.016,
50
+ "eval_steps_per_second": 1.448,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
+ "max_steps": 954,
56
  "num_input_tokens_seen": 0,
57
+ "num_train_epochs": 6,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
 
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
+ "alpha": 0.8393294687667213,
76
+ "num_train_epochs": 6,
77
+ "temperature": 4
78
  }
79
  }
run-2/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a56d8b1ad389a3c658a9c44fe59a9d8d4421ac34a0e985324cdb64ca878ca72f
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59adeb0d3a55bdc7384874777e892e5d0cc98df3d3789609437ba56985f062cb
3
  size 5368
runs/Sep28_10-21-38_bb2384aee55a/events.out.tfevents.1759057654.bb2384aee55a.76.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9950303484b305802a098459cf1fce855e8c2bf7dffcae41b3a334b30df26204
3
+ size 14501
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f2a6043dcb359bf6e5c547e39efa179f167939f5b2be51fd4993780a524ca74
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59adeb0d3a55bdc7384874777e892e5d0cc98df3d3789609437ba56985f062cb
3
  size 5368