ClaraPind commited on
Commit
1a02f93
·
verified ·
1 Parent(s): 8bf7275

Training in progress, step 1590

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e1643b1bd0323fbc45e8613702edc944075f87f113b1ffa8729a561b3398d5b
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f74d46f58414d8d1207638e0b7a63352db6c31e4dd89193fd52b0547378e68
3
  size 268290900
run-3/checkpoint-1590/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:412c06c8ccb1a63d5e9727eca90fdd4f9928fde11459996be3ba96c562fb87c5
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f74d46f58414d8d1207638e0b7a63352db6c31e4dd89193fd52b0547378e68
3
  size 268290900
run-3/checkpoint-1590/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca6d2ecfb317074f4ce663063657a6232f2dd947850a2b6da5df5383185c8158
3
  size 536645835
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9508dbf7547fb3373062c7fb409e13f13c17df653c87772161baa86c1f61544d
3
  size 536645835
run-3/checkpoint-1590/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17bf4b31b76e5514eff8ee3fa2091d1cda56850f095ceb6c333114284dfd91f4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591a3036abf7476423f029ad2a59e6f99ee1fcae384b84b29f655a7609102e7d
3
  size 1465
run-3/checkpoint-1590/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_global_step": 318,
3
- "best_metric": 0.0064516129032258064,
4
  "best_model_checkpoint": null,
5
  "epoch": 5.0,
6
  "eval_steps": 500,
@@ -11,73 +11,73 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.9968553459119497,
14
- "grad_norm": 1.8634014129638672,
15
- "learning_rate": 8.418430380468648e-07,
16
- "loss": 2.7485,
17
  "step": 317
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.0064516129032258064,
22
- "eval_loss": 2.429906129837036,
23
- "eval_runtime": 16.088,
24
- "eval_samples_per_second": 192.69,
25
- "eval_steps_per_second": 4.04,
26
  "step": 318
27
  },
28
  {
29
  "epoch": 1.9937106918238994,
30
- "grad_norm": 1.7561661005020142,
31
- "learning_rate": 6.323734595061614e-07,
32
- "loss": 2.3269,
33
  "step": 634
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.0064516129032258064,
38
- "eval_loss": 2.151293992996216,
39
- "eval_runtime": 15.9574,
40
- "eval_samples_per_second": 194.267,
41
- "eval_steps_per_second": 4.073,
42
  "step": 636
43
  },
44
  {
45
  "epoch": 2.990566037735849,
46
- "grad_norm": 2.544083595275879,
47
- "learning_rate": 4.22903880965458e-07,
48
- "loss": 2.1336,
49
  "step": 951
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_accuracy": 0.0064516129032258064,
54
- "eval_loss": 2.018416404724121,
55
- "eval_runtime": 15.8356,
56
- "eval_samples_per_second": 195.762,
57
- "eval_steps_per_second": 4.105,
58
  "step": 954
59
  },
60
  {
61
  "epoch": 3.9874213836477987,
62
- "grad_norm": 1.9344236850738525,
63
- "learning_rate": 2.1343430242475459e-07,
64
- "loss": 2.0365,
65
  "step": 1268
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.0064516129032258064,
70
- "eval_loss": 1.9513684511184692,
71
- "eval_runtime": 15.904,
72
- "eval_samples_per_second": 194.919,
73
- "eval_steps_per_second": 4.087,
74
  "step": 1272
75
  },
76
  {
77
  "epoch": 4.984276729559748,
78
- "grad_norm": 2.013123035430908,
79
- "learning_rate": 3.964723884051169e-09,
80
- "loss": 1.9904,
81
  "step": 1585
82
  }
83
  ],
@@ -102,9 +102,9 @@
102
  "train_batch_size": 48,
103
  "trial_name": null,
104
  "trial_params": {
105
- "alpha": 0.7222343549409885,
106
- "learning_rate": 1.0506518292735597e-06,
107
  "num_train_epochs": 5,
108
- "temperature": 2.5618786472533883
109
  }
110
  }
 
1
  {
2
+ "best_global_step": 1272,
3
+ "best_metric": 0.011935483870967743,
4
  "best_model_checkpoint": null,
5
  "epoch": 5.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.9968553459119497,
14
+ "grad_norm": 1.5790598392486572,
15
+ "learning_rate": 4.0062893081761014e-05,
16
+ "loss": 1.4043,
17
  "step": 317
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.01032258064516129,
22
+ "eval_loss": 1.1946589946746826,
23
+ "eval_runtime": 16.2932,
24
+ "eval_samples_per_second": 190.263,
25
+ "eval_steps_per_second": 3.989,
26
  "step": 318
27
  },
28
  {
29
  "epoch": 1.9937106918238994,
30
+ "grad_norm": 1.969873070716858,
31
+ "learning_rate": 3.009433962264151e-05,
32
+ "loss": 1.1616,
33
  "step": 634
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.00935483870967742,
38
+ "eval_loss": 1.14057195186615,
39
+ "eval_runtime": 16.0661,
40
+ "eval_samples_per_second": 192.953,
41
+ "eval_steps_per_second": 4.046,
42
  "step": 636
43
  },
44
  {
45
  "epoch": 2.990566037735849,
46
+ "grad_norm": 1.7632919549942017,
47
+ "learning_rate": 2.0125786163522016e-05,
48
+ "loss": 1.0714,
49
  "step": 951
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.00903225806451613,
54
+ "eval_loss": 1.1342412233352661,
55
+ "eval_runtime": 16.0739,
56
+ "eval_samples_per_second": 192.859,
57
+ "eval_steps_per_second": 4.044,
58
  "step": 954
59
  },
60
  {
61
  "epoch": 3.9874213836477987,
62
+ "grad_norm": 2.8834447860717773,
63
+ "learning_rate": 1.0157232704402517e-05,
64
+ "loss": 1.0014,
65
  "step": 1268
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.011935483870967743,
70
+ "eval_loss": 1.122102975845337,
71
+ "eval_runtime": 16.095,
72
+ "eval_samples_per_second": 192.607,
73
+ "eval_steps_per_second": 4.039,
74
  "step": 1272
75
  },
76
  {
77
  "epoch": 4.984276729559748,
78
+ "grad_norm": 1.7756671905517578,
79
+ "learning_rate": 1.886792452830189e-07,
80
+ "loss": 0.9547,
81
  "step": 1585
82
  }
83
  ],
 
102
  "train_batch_size": 48,
103
  "trial_name": null,
104
  "trial_params": {
105
+ "alpha": 0.9,
106
+ "learning_rate": 5e-05,
107
  "num_train_epochs": 5,
108
+ "temperature": 2.0
109
  }
110
  }
run-3/checkpoint-1590/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdb4d45e7fb042f4d332b29670a9da6b163ef4d1fd0283dfd0bd42ef247ed757
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a8b3a49ebc8182073042c91753089264c4557ea1da4359c036b31372888809
3
  size 5905
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cefe7c9a43e61af3bc5ba28e6ebbd266f83be53c6466a8db7b7ee3a9d5f5925
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a8b3a49ebc8182073042c91753089264c4557ea1da4359c036b31372888809
3
  size 5905