CatkinChen commited on
Commit
d119dd7
·
verified ·
1 Parent(s): c1a1cdb

Add training data

Browse files
Files changed (1) hide show
  1. training_data.json +34 -44
training_data.json CHANGED
@@ -1,43 +1,21 @@
1
  {
2
  "train_losses": [
3
- 5755.956953125,
4
- 4527.261198730469,
5
- 3494.703161621094,
6
- 2589.8849584960935,
7
- 1721.1179077148438,
8
- 762.3653802490235,
9
- 596.5937448120117,
10
- 549.3049771118164,
11
- 532.7015045166015,
12
- 527.6343267822266,
13
- 522.560433959961,
14
- 525.4734405517578,
15
- 532.2424160766601,
16
- 542.7450305175781,
17
- 549.4707604980468
18
  ],
19
  "test_losses": [
20
- 4864.913232421875,
21
- 3544.6339721679688,
22
- 3383.009033203125,
23
- 3733.9931518554686,
24
- 3762.6799560546874,
25
- 4619.136267089843,
26
- 4848.683056640625,
27
- 5241.984057617187,
28
- 5930.204077148437,
29
- 6055.067846679687,
30
- 6544.0708984375,
31
- 7091.799609375,
32
- 7371.5255859375,
33
- 7603.633618164063,
34
- 8138.669360351562
35
  ],
36
  "config": {
37
  "epochs": 15,
38
- "batch_size": 32,
39
- "learning_rate": 0.0005,
40
  "sequence_size": 32,
 
 
41
  "adaptive_weighting": {
42
  "initial_weight_emb": 1.5,
43
  "final_weight_emb": 0.0,
@@ -45,22 +23,34 @@
45
  "initial_weight_raw": 0.4,
46
  "final_weight_raw": 1.0,
47
  "weight_raw_shape": "linear",
48
- "initial_kl_beta": 0.0001,
49
- "final_kl_beta": 0.6,
50
- "kl_beta_shape": "cosine",
51
- "warmup_epoch_ratio": 0.4
 
 
 
 
 
 
52
  },
53
- "total_correlation_beta_multiplier": 10.0,
54
  "free_bits": 0.15,
55
  "focal_loss_alpha": 0.75,
56
  "focal_loss_gamma": 2.0,
57
- "dropout_rate": 0.2,
58
  "enable_dropout_on_latent": true,
59
- "enable_dropout_on_decoder": true
 
 
 
 
 
 
 
60
  },
61
- "final_train_loss": 549.4707604980468,
62
- "final_test_loss": 8138.669360351562,
63
- "total_epochs": 15,
64
- "best_train_loss": 522.560433959961,
65
- "best_test_loss": 3383.009033203125
66
  }
 
1
  {
2
  "train_losses": [
3
+ 6022.34626953125,
4
+ 4383.866735839843,
5
+ 2710.9047668457033
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
  "test_losses": [
8
+ 5278.615673828125,
9
+ 3384.1504150390624,
10
+ 2245.631726074219
 
 
 
 
 
 
 
 
 
 
 
 
11
  ],
12
  "config": {
13
  "epochs": 15,
14
+ "batch_size": 1024,
15
+ "max_learning_rate": 0.001,
16
  "sequence_size": 32,
17
+ "shuffle_batches": true,
18
+ "shuffle_within_batch": true,
19
  "adaptive_weighting": {
20
  "initial_weight_emb": 1.5,
21
  "final_weight_emb": 0.0,
 
23
  "initial_weight_raw": 0.4,
24
  "final_weight_raw": 1.0,
25
  "weight_raw_shape": "linear",
26
+ "initial_mi_beta": 0.0,
27
+ "final_mi_beta": 0.0,
28
+ "mi_beta_shape": "constant",
29
+ "initial_tc_beta": 5.0,
30
+ "final_tc_beta": 5.0,
31
+ "tc_beta_shape": "constant",
32
+ "initial_dw_beta": 0.02,
33
+ "final_dw_beta": 0.3,
34
+ "dw_beta_shape": "custom",
35
+ "warmup_epoch_ratio": 0.2
36
  },
 
37
  "free_bits": 0.15,
38
  "focal_loss_alpha": 0.75,
39
  "focal_loss_gamma": 2.0,
40
+ "dropout_rate": 0.1,
41
  "enable_dropout_on_latent": true,
42
+ "enable_dropout_on_decoder": true,
43
+ "early_stopping": {
44
+ "enabled": true,
45
+ "patience": 3,
46
+ "min_delta": 0.01,
47
+ "triggered": true,
48
+ "best_epoch": 3
49
+ }
50
  },
51
+ "final_train_loss": 2710.9047668457033,
52
+ "final_test_loss": 2245.631726074219,
53
+ "total_epochs": 3,
54
+ "best_train_loss": 2710.9047668457033,
55
+ "best_test_loss": 2245.631726074219
56
  }