simon-mellergaard commited on
Commit
d5765fe
·
verified ·
1 Parent(s): e6f591a

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22f946dd187c71c743ed8a24c383b7621e8b8662ec78fbff6ed001870698a454
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ae6dd26bc1c419aab07240966e39546c64989855d6d985e7c22cc08ac52479
3
  size 598898116
run-3/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f4eff725814b13dc937fa55afed34b3497c98d1e48b5dc28cc14726451a72eb
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ae6dd26bc1c419aab07240966e39546c64989855d6d985e7c22cc08ac52479
3
  size 598898116
run-3/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f600a4eb072af1b2680d52a56cbd5f7424e22b03cfdb04ef085dfba10999f1f
3
  size 1197884026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d0e493985ac4f5d54096e2fce84dba3e7b34b3ac185eb1f2e3153c0dbc8f30
3
  size 1197884026
run-3/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d73e26459c92e195a7738eb8b675e4359a54bd79fe9532dd9d1f04483172c359
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
run-3/checkpoint-500/trainer_state.json CHANGED
@@ -11,50 +11,50 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6764516129032258,
15
- "eval_loss": 3.1047208309173584,
16
- "eval_runtime": 20.0866,
17
- "eval_samples_per_second": 154.332,
18
- "eval_steps_per_second": 1.643,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
- "grad_norm": 6.007691383361816,
24
- "learning_rate": 1.49937106918239e-05,
25
- "loss": 4.485,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.892258064516129,
31
- "eval_loss": 1.8396068811416626,
32
- "eval_runtime": 20.1816,
33
- "eval_samples_per_second": 153.605,
34
- "eval_steps_per_second": 1.635,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
- "grad_norm": 6.994930267333984,
40
- "learning_rate": 9.962264150943397e-06,
41
- "loss": 2.021,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
- "eval_accuracy": 0.9319354838709677,
47
- "eval_loss": 1.3435319662094116,
48
- "eval_runtime": 20.2565,
49
- "eval_samples_per_second": 153.037,
50
- "eval_steps_per_second": 1.629,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
- "max_steps": 795,
56
  "num_input_tokens_seen": 0,
57
- "num_train_epochs": 5,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
@@ -72,8 +72,8 @@
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.7221593654978639,
76
- "num_train_epochs": 5,
77
- "temperature": 12
78
  }
79
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.7574193548387097,
15
+ "eval_loss": 2.6135873794555664,
16
+ "eval_runtime": 22.8142,
17
+ "eval_samples_per_second": 135.88,
18
+ "eval_steps_per_second": 1.446,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
+ "grad_norm": 4.528892517089844,
24
+ "learning_rate": 1.749685534591195e-05,
25
+ "loss": 3.9724,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.92,
31
+ "eval_loss": 1.4474385976791382,
32
+ "eval_runtime": 23.449,
33
+ "eval_samples_per_second": 132.202,
34
+ "eval_steps_per_second": 1.407,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
+ "grad_norm": 4.671586036682129,
40
+ "learning_rate": 1.4981132075471698e-05,
41
+ "loss": 1.573,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_accuracy": 0.9429032258064516,
47
+ "eval_loss": 1.0164119005203247,
48
+ "eval_runtime": 22.8814,
49
+ "eval_samples_per_second": 135.481,
50
+ "eval_steps_per_second": 1.442,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
+ "max_steps": 1590,
56
  "num_input_tokens_seen": 0,
57
+ "num_train_epochs": 10,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
 
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
+ "alpha": 0.44422800356103254,
76
+ "num_train_epochs": 10,
77
+ "temperature": 13
78
  }
79
  }
run-3/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17adff4f3634e2a8d622b645ea126e53ba766395bed524f0e523cb41d5464456
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db705550b987db55c9bb3aff5b3ff0fef828763e0672ac943e35e25b8f5e4e9
3
  size 5368
runs/Sep28_10-21-38_bb2384aee55a/events.out.tfevents.1759058880.bb2384aee55a.76.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fbf21fe3734393b1c0633723a186f1ac72f5c30fa23a977ee63caf9c544fba3
3
+ size 14502
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59adeb0d3a55bdc7384874777e892e5d0cc98df3d3789609437ba56985f062cb
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db705550b987db55c9bb3aff5b3ff0fef828763e0672ac943e35e25b8f5e4e9
3
  size 5368