bdanko commited on
Commit
953bc42
·
verified ·
1 Parent(s): 0bb7b1f

Phase 2 checkpoint — 3 epochs

Browse files
best/adapter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f1a49489d0d1e6ae3b71567dafadf3fa1af5f4486e65b3ddd350ef6a9a06a2
3
+ size 2108181
best/config.json CHANGED
@@ -3,50 +3,43 @@
3
  "split": "train",
4
  "max_samples": 100,
5
  "val_max_samples": 20,
6
- "batch_size": 32,
 
7
  "d_model": 384,
8
  "latent_dim": 512,
9
  "encoder_layers": 3,
10
  "encoder_heads": 8,
11
  "encoder_dropout": 0.1,
12
  "use_part_embeddings": true,
13
- "ddpm": {
14
- "num_timesteps": 1000,
15
- "beta_start": 0.0001,
16
- "beta_end": 0.02,
17
- "schedule_type": "linear"
18
- },
19
- "masking": {
20
- "feature_corruption": true,
21
- "time_span_masking": true,
22
- "whole_part_masking": true,
23
- "velocity_reconstruction": true,
24
- "latent_smoothness": true,
25
- "contrastive_consistency": false,
26
- "feature_corruption_prob": 0.15,
27
- "time_span_ratio": 0.2,
28
- "contrastive_weight": 0.05
29
- },
30
- "w_masked_pos": 1.0,
31
- "w_masked_vel": 1.0,
32
- "w_full_recon": 0.1,
33
- "w_latent_smooth": 0.01,
34
- "w_contrastive": 0.05,
35
  "epochs": 3,
36
- "lr": 0.0001,
 
 
 
37
  "weight_decay": 1e-05,
38
  "grad_clip": 1.0,
39
  "scheduler": "cosine",
40
  "warmup_steps": 100,
41
  "mixed_precision": false,
42
  "gradient_accumulation_steps": 1,
43
- "ckpt_dir": "/content/phase1_ckpt",
 
 
 
 
 
 
44
  "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
45
  "upload_hf": false,
46
  "seed": 42,
47
  "log_backend": "csv",
48
- "wandb_project": "cslt-phase1",
49
  "log_every_n_steps": 10,
50
  "smoke_test": false,
51
- "run_id": "138504cf"
52
  }
 
3
  "split": "train",
4
  "max_samples": 100,
5
  "val_max_samples": 20,
6
+ "batch_size": 8,
7
+ "max_target_length": 128,
8
  "d_model": 384,
9
  "latent_dim": 512,
10
  "encoder_layers": 3,
11
  "encoder_heads": 8,
12
  "encoder_dropout": 0.1,
13
  "use_part_embeddings": true,
14
+ "t5_name": "google/flan-t5-small",
15
+ "t5_dim": 512,
16
+ "adapter_dropout": 0.1,
17
+ "use_attention_pooling": true,
18
+ "pool_num_heads": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "epochs": 3,
20
+ "warmup_epochs": 1,
21
+ "lr_encoder": 5e-06,
22
+ "lr_adapter": 0.0001,
23
+ "lr_t5": 5e-05,
24
  "weight_decay": 1e-05,
25
  "grad_clip": 1.0,
26
  "scheduler": "cosine",
27
  "warmup_steps": 100,
28
  "mixed_precision": false,
29
  "gradient_accumulation_steps": 1,
30
+ "num_beams": 4,
31
+ "max_new_tokens": 50,
32
+ "use_ctc_head": false,
33
+ "ctc_weight": 0.1,
34
+ "ctc_vocab_size": 256,
35
+ "ckpt_dir": "/content/phase2_ckpt",
36
+ "phase1_ckpt": "/content/phase1_ckpt",
37
  "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
38
  "upload_hf": false,
39
  "seed": 42,
40
  "log_backend": "csv",
41
+ "wandb_project": "cslt-phase2",
42
  "log_every_n_steps": 10,
43
  "smoke_test": false,
44
+ "run_id": "0c5128a7"
45
  }
best/encoder.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b0cd4f268a852a7c6b4b0f5216c4e68d1176d4fea1e9a01176eda64a56ae0f
3
  size 35465544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bec4ba6a36a815d93bce379c04af4e1b1937538437546009935a57117a779c1
3
  size 35465544
best/metadata.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 0,
3
- "step": 4,
4
- "git_hash": "8457835",
5
- "timestamp": "2026-04-29T02:11:39.138289",
6
  "python_version": "3.12.13",
7
  "torch_version": "2.10.0+cu128",
8
  "seed": 42,
9
- "run_id": "138504cf"
10
  }
 
1
  {
2
+ "epoch": 1,
3
+ "step": 26,
4
+ "git_hash": "8e0490b",
5
+ "timestamp": "2026-04-29T02:23:13.312090",
6
  "python_version": "3.12.13",
7
  "torch_version": "2.10.0+cu128",
8
  "seed": 42,
9
+ "run_id": "0c5128a7"
10
  }
best/metrics.json CHANGED
@@ -1,15 +1,10 @@
1
  {
2
- "train/total_loss": 2.303677797317505,
3
- "train/masked_pos_loss": 1.166947454214096,
4
- "train/masked_vel_loss": 1.0032410025596619,
5
- "train/full_recon_loss": 1.33467036485672,
6
- "train/latent_smooth_loss": 0.0022277096286416054,
7
- "val/total_loss": 0.4656478464603424,
8
- "val/masked_pos_loss": 0.33220887184143066,
9
- "val/masked_vel_loss": 0.0,
10
- "val/full_recon_loss": 1.3343030214309692,
11
- "val/latent_smooth_loss": 0.0008657827856950462,
12
- "train/lr": 4.960000000000002e-06,
13
- "z_mean": 0.015502252615988255,
14
- "z_std": 0.08731898665428162
15
  }
 
1
  {
2
+ "bleu": 0.11166559489511374,
3
+ "rouge_l": 0.3286637931034483,
4
+ "chrf": 5.338902295436229,
5
+ "exact_match": 0.0,
6
+ "avg_pred_len": 46.6,
7
+ "avg_ref_len": 21.55,
8
+ "val_loss": 9.667327245076498,
9
+ "train_loss": 9.591291280893179
 
 
 
 
 
10
  }
best/model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44fd3d37eae1a71aafcf8f41eaa630732e4ba955993fc0285c8aec2ed5bded87
3
- size 53658903
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da4bb739fc3ccc6a025e24626079949074aa40a06e1fd3be0b5b4af8ebb9af5
3
+ size 349764515
best/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca01bcce1f20838e008ce1f239fd91bdd7c9a25d2b08c6f7fc77ade1aa49e918
3
- size 91980043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb68706dfa6b901c015939c7d23e443227d5d107e3943ee66b361eda13956b51
3
+ size 684195019
best/tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
best/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "eos_token": "</s>",
4
+ "extra_ids": 100,
5
+ "extra_special_tokens": [
6
+ "<extra_id_0>",
7
+ "<extra_id_1>",
8
+ "<extra_id_2>",
9
+ "<extra_id_3>",
10
+ "<extra_id_4>",
11
+ "<extra_id_5>",
12
+ "<extra_id_6>",
13
+ "<extra_id_7>",
14
+ "<extra_id_8>",
15
+ "<extra_id_9>",
16
+ "<extra_id_10>",
17
+ "<extra_id_11>",
18
+ "<extra_id_12>",
19
+ "<extra_id_13>",
20
+ "<extra_id_14>",
21
+ "<extra_id_15>",
22
+ "<extra_id_16>",
23
+ "<extra_id_17>",
24
+ "<extra_id_18>",
25
+ "<extra_id_19>",
26
+ "<extra_id_20>",
27
+ "<extra_id_21>",
28
+ "<extra_id_22>",
29
+ "<extra_id_23>",
30
+ "<extra_id_24>",
31
+ "<extra_id_25>",
32
+ "<extra_id_26>",
33
+ "<extra_id_27>",
34
+ "<extra_id_28>",
35
+ "<extra_id_29>",
36
+ "<extra_id_30>",
37
+ "<extra_id_31>",
38
+ "<extra_id_32>",
39
+ "<extra_id_33>",
40
+ "<extra_id_34>",
41
+ "<extra_id_35>",
42
+ "<extra_id_36>",
43
+ "<extra_id_37>",
44
+ "<extra_id_38>",
45
+ "<extra_id_39>",
46
+ "<extra_id_40>",
47
+ "<extra_id_41>",
48
+ "<extra_id_42>",
49
+ "<extra_id_43>",
50
+ "<extra_id_44>",
51
+ "<extra_id_45>",
52
+ "<extra_id_46>",
53
+ "<extra_id_47>",
54
+ "<extra_id_48>",
55
+ "<extra_id_49>",
56
+ "<extra_id_50>",
57
+ "<extra_id_51>",
58
+ "<extra_id_52>",
59
+ "<extra_id_53>",
60
+ "<extra_id_54>",
61
+ "<extra_id_55>",
62
+ "<extra_id_56>",
63
+ "<extra_id_57>",
64
+ "<extra_id_58>",
65
+ "<extra_id_59>",
66
+ "<extra_id_60>",
67
+ "<extra_id_61>",
68
+ "<extra_id_62>",
69
+ "<extra_id_63>",
70
+ "<extra_id_64>",
71
+ "<extra_id_65>",
72
+ "<extra_id_66>",
73
+ "<extra_id_67>",
74
+ "<extra_id_68>",
75
+ "<extra_id_69>",
76
+ "<extra_id_70>",
77
+ "<extra_id_71>",
78
+ "<extra_id_72>",
79
+ "<extra_id_73>",
80
+ "<extra_id_74>",
81
+ "<extra_id_75>",
82
+ "<extra_id_76>",
83
+ "<extra_id_77>",
84
+ "<extra_id_78>",
85
+ "<extra_id_79>",
86
+ "<extra_id_80>",
87
+ "<extra_id_81>",
88
+ "<extra_id_82>",
89
+ "<extra_id_83>",
90
+ "<extra_id_84>",
91
+ "<extra_id_85>",
92
+ "<extra_id_86>",
93
+ "<extra_id_87>",
94
+ "<extra_id_88>",
95
+ "<extra_id_89>",
96
+ "<extra_id_90>",
97
+ "<extra_id_91>",
98
+ "<extra_id_92>",
99
+ "<extra_id_93>",
100
+ "<extra_id_94>",
101
+ "<extra_id_95>",
102
+ "<extra_id_96>",
103
+ "<extra_id_97>",
104
+ "<extra_id_98>",
105
+ "<extra_id_99>"
106
+ ],
107
+ "is_local": false,
108
+ "model_max_length": 512,
109
+ "pad_token": "<pad>",
110
+ "sp_model_kwargs": {},
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
latest/adapter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d257a3b82f174b31fca249d690441501aaccb54e55ab18cd5b1ef8149528a5
3
+ size 2108181
latest/config.json CHANGED
@@ -3,50 +3,43 @@
3
  "split": "train",
4
  "max_samples": 100,
5
  "val_max_samples": 20,
6
- "batch_size": 32,
 
7
  "d_model": 384,
8
  "latent_dim": 512,
9
  "encoder_layers": 3,
10
  "encoder_heads": 8,
11
  "encoder_dropout": 0.1,
12
  "use_part_embeddings": true,
13
- "ddpm": {
14
- "num_timesteps": 1000,
15
- "beta_start": 0.0001,
16
- "beta_end": 0.02,
17
- "schedule_type": "linear"
18
- },
19
- "masking": {
20
- "feature_corruption": true,
21
- "time_span_masking": true,
22
- "whole_part_masking": true,
23
- "velocity_reconstruction": true,
24
- "latent_smoothness": true,
25
- "contrastive_consistency": false,
26
- "feature_corruption_prob": 0.15,
27
- "time_span_ratio": 0.2,
28
- "contrastive_weight": 0.05
29
- },
30
- "w_masked_pos": 1.0,
31
- "w_masked_vel": 1.0,
32
- "w_full_recon": 0.1,
33
- "w_latent_smooth": 0.01,
34
- "w_contrastive": 0.05,
35
  "epochs": 3,
36
- "lr": 0.0001,
 
 
 
37
  "weight_decay": 1e-05,
38
  "grad_clip": 1.0,
39
  "scheduler": "cosine",
40
  "warmup_steps": 100,
41
  "mixed_precision": false,
42
  "gradient_accumulation_steps": 1,
43
- "ckpt_dir": "/content/phase1_ckpt",
 
 
 
 
 
 
44
  "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
45
  "upload_hf": false,
46
  "seed": 42,
47
  "log_backend": "csv",
48
- "wandb_project": "cslt-phase1",
49
  "log_every_n_steps": 10,
50
  "smoke_test": false,
51
- "run_id": "138504cf"
52
  }
 
3
  "split": "train",
4
  "max_samples": 100,
5
  "val_max_samples": 20,
6
+ "batch_size": 8,
7
+ "max_target_length": 128,
8
  "d_model": 384,
9
  "latent_dim": 512,
10
  "encoder_layers": 3,
11
  "encoder_heads": 8,
12
  "encoder_dropout": 0.1,
13
  "use_part_embeddings": true,
14
+ "t5_name": "google/flan-t5-small",
15
+ "t5_dim": 512,
16
+ "adapter_dropout": 0.1,
17
+ "use_attention_pooling": true,
18
+ "pool_num_heads": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "epochs": 3,
20
+ "warmup_epochs": 1,
21
+ "lr_encoder": 5e-06,
22
+ "lr_adapter": 0.0001,
23
+ "lr_t5": 5e-05,
24
  "weight_decay": 1e-05,
25
  "grad_clip": 1.0,
26
  "scheduler": "cosine",
27
  "warmup_steps": 100,
28
  "mixed_precision": false,
29
  "gradient_accumulation_steps": 1,
30
+ "num_beams": 4,
31
+ "max_new_tokens": 50,
32
+ "use_ctc_head": false,
33
+ "ctc_weight": 0.1,
34
+ "ctc_vocab_size": 256,
35
+ "ckpt_dir": "/content/phase2_ckpt",
36
+ "phase1_ckpt": "/content/phase1_ckpt",
37
  "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
38
  "upload_hf": false,
39
  "seed": 42,
40
  "log_backend": "csv",
41
+ "wandb_project": "cslt-phase2",
42
  "log_every_n_steps": 10,
43
  "smoke_test": false,
44
+ "run_id": "0c5128a7"
45
  }
latest/encoder.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c118e819f59393ac2aeaea39d50b3057bda6f54e35137241b167716f8947e6b2
3
  size 35465544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418ac86dab5ac1eb8d0fd22ed3052efed74b2062603c367a689461548e281cc3
3
  size 35465544
latest/metadata.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 2,
3
- "step": 12,
4
- "git_hash": "8457835",
5
- "timestamp": "2026-04-29T02:12:36.318924",
6
  "python_version": "3.12.13",
7
  "torch_version": "2.10.0+cu128",
8
  "seed": 42,
9
- "run_id": "138504cf"
10
  }
 
1
  {
2
  "epoch": 2,
3
+ "step": 39,
4
+ "git_hash": "8e0490b",
5
+ "timestamp": "2026-04-29T02:23:45.118381",
6
  "python_version": "3.12.13",
7
  "torch_version": "2.10.0+cu128",
8
  "seed": 42,
9
+ "run_id": "0c5128a7"
10
  }
latest/metrics.json CHANGED
@@ -1,15 +1,10 @@
1
  {
2
- "train/total_loss": 1.879580244421959,
3
- "train/masked_pos_loss": 0.9129918068647385,
4
- "train/masked_vel_loss": 0.8337216824293137,
5
- "train/full_recon_loss": 1.3284493386745453,
6
- "train/latent_smooth_loss": 0.002177916350774467,
7
- "val/total_loss": 2.7803542613983154,
8
- "val/masked_pos_loss": 1.3252986669540405,
9
- "val/masked_vel_loss": 1.3226159811019897,
10
- "val/full_recon_loss": 1.3243049383163452,
11
- "val/latent_smooth_loss": 0.0009115393040701747,
12
- "train/lr": 1.2880000000000004e-05,
13
- "z_mean": 0.015641039237380028,
14
- "z_std": 0.08970633149147034
15
  }
 
1
  {
2
+ "bleu": 0.04798207548180831,
3
+ "rouge_l": 0.393788682581786,
4
+ "chrf": 4.945468708957515,
5
+ "exact_match": 0.0,
6
+ "avg_pred_len": 50.0,
7
+ "avg_ref_len": 21.55,
8
+ "val_loss": 9.705982208251953,
9
+ "train_loss": 9.528985537015474
 
 
 
 
 
10
  }
latest/model.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f1ad050335a9eb521d96e698a793d784f6a5f617f0ea5e4b03f752c7471df3f
3
- size 53658903
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89aef35446e8e9ff1487577e527e75c7826867d322bb07c17e651b7971e8e08f
3
+ size 349764515
latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10346bdeb257677bf43cb4ad840a210bf15fa7fb500341d00e55171f07863b5
3
- size 91980043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f47e69e39a37130bb6fcdbae2297ca2862f0acb2d6e3dcf5ed7696837c04e2
3
+ size 684195019
latest/tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
latest/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "eos_token": "</s>",
4
+ "extra_ids": 100,
5
+ "extra_special_tokens": [
6
+ "<extra_id_0>",
7
+ "<extra_id_1>",
8
+ "<extra_id_2>",
9
+ "<extra_id_3>",
10
+ "<extra_id_4>",
11
+ "<extra_id_5>",
12
+ "<extra_id_6>",
13
+ "<extra_id_7>",
14
+ "<extra_id_8>",
15
+ "<extra_id_9>",
16
+ "<extra_id_10>",
17
+ "<extra_id_11>",
18
+ "<extra_id_12>",
19
+ "<extra_id_13>",
20
+ "<extra_id_14>",
21
+ "<extra_id_15>",
22
+ "<extra_id_16>",
23
+ "<extra_id_17>",
24
+ "<extra_id_18>",
25
+ "<extra_id_19>",
26
+ "<extra_id_20>",
27
+ "<extra_id_21>",
28
+ "<extra_id_22>",
29
+ "<extra_id_23>",
30
+ "<extra_id_24>",
31
+ "<extra_id_25>",
32
+ "<extra_id_26>",
33
+ "<extra_id_27>",
34
+ "<extra_id_28>",
35
+ "<extra_id_29>",
36
+ "<extra_id_30>",
37
+ "<extra_id_31>",
38
+ "<extra_id_32>",
39
+ "<extra_id_33>",
40
+ "<extra_id_34>",
41
+ "<extra_id_35>",
42
+ "<extra_id_36>",
43
+ "<extra_id_37>",
44
+ "<extra_id_38>",
45
+ "<extra_id_39>",
46
+ "<extra_id_40>",
47
+ "<extra_id_41>",
48
+ "<extra_id_42>",
49
+ "<extra_id_43>",
50
+ "<extra_id_44>",
51
+ "<extra_id_45>",
52
+ "<extra_id_46>",
53
+ "<extra_id_47>",
54
+ "<extra_id_48>",
55
+ "<extra_id_49>",
56
+ "<extra_id_50>",
57
+ "<extra_id_51>",
58
+ "<extra_id_52>",
59
+ "<extra_id_53>",
60
+ "<extra_id_54>",
61
+ "<extra_id_55>",
62
+ "<extra_id_56>",
63
+ "<extra_id_57>",
64
+ "<extra_id_58>",
65
+ "<extra_id_59>",
66
+ "<extra_id_60>",
67
+ "<extra_id_61>",
68
+ "<extra_id_62>",
69
+ "<extra_id_63>",
70
+ "<extra_id_64>",
71
+ "<extra_id_65>",
72
+ "<extra_id_66>",
73
+ "<extra_id_67>",
74
+ "<extra_id_68>",
75
+ "<extra_id_69>",
76
+ "<extra_id_70>",
77
+ "<extra_id_71>",
78
+ "<extra_id_72>",
79
+ "<extra_id_73>",
80
+ "<extra_id_74>",
81
+ "<extra_id_75>",
82
+ "<extra_id_76>",
83
+ "<extra_id_77>",
84
+ "<extra_id_78>",
85
+ "<extra_id_79>",
86
+ "<extra_id_80>",
87
+ "<extra_id_81>",
88
+ "<extra_id_82>",
89
+ "<extra_id_83>",
90
+ "<extra_id_84>",
91
+ "<extra_id_85>",
92
+ "<extra_id_86>",
93
+ "<extra_id_87>",
94
+ "<extra_id_88>",
95
+ "<extra_id_89>",
96
+ "<extra_id_90>",
97
+ "<extra_id_91>",
98
+ "<extra_id_92>",
99
+ "<extra_id_93>",
100
+ "<extra_id_94>",
101
+ "<extra_id_95>",
102
+ "<extra_id_96>",
103
+ "<extra_id_97>",
104
+ "<extra_id_98>",
105
+ "<extra_id_99>"
106
+ ],
107
+ "is_local": false,
108
+ "model_max_length": 512,
109
+ "pad_token": "<pad>",
110
+ "sp_model_kwargs": {},
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
logs/train_log.csv CHANGED
@@ -1,5 +1,7 @@
1
- step,train/total_loss,train/masked_pos_loss,train/masked_vel_loss,train/full_recon_loss,train/latent_smooth_loss,val/total_loss,val/masked_pos_loss,val/masked_vel_loss,val/full_recon_loss,val/latent_smooth_loss,train/lr,z_mean,z_std
2
- 4,2.303677797317505,1.166947454214096,1.0032410025596619,1.33467036485672,0.0022277096286416054,0.4656478464603424,0.33220887184143066,0.0,1.3343030214309692,0.0008657827856950462,4.960000000000002e-06,0.015502252615988255,0.08731898665428162
3
- 8,1.628475546836853,0.6631256863474846,0.8320725113153458,1.3325617015361786,0.002120855962857604,2.791887044906616,1.328176736831665,1.3305968046188354,1.3310281038284302,0.0010730486828833818,8.920000000000004e-06,0.015302599407732487,0.0877484530210495
4
- 10,1.1317189931869507,0.6652693152427673,0.3334561586380005,1.3297309875488281,0.002038335893303156,,,,,,1.0900000000000006e-05,,
5
- 12,1.879580244421959,0.9129918068647385,0.8337216824293137,1.3284493386745453,0.002177916350774467,2.7803542613983154,1.3252986669540405,1.3226159811019897,1.3243049383163452,0.0009115393040701747,1.2880000000000004e-05,0.015641039237380028,0.08970633149147034
 
 
 
1
+ step,train/loss,train/lr
2
+ 10,9.641051292419434,1.0900000000000006e-05
3
+ 13,9.713348535391,
4
+ 20,9.549558639526367,3.9650000000000025e-07
5
+ 26,9.591291280893179,
6
+ 30,9.527813911437988,2.480000000000001e-07
7
+ 39,9.528985537015474,
metrics_epoch_000.json CHANGED
@@ -1,16 +1,11 @@
1
  {
2
- "epoch": 0,
3
- "train/total_loss": 2.303677797317505,
4
- "train/masked_pos_loss": 1.166947454214096,
5
- "train/masked_vel_loss": 1.0032410025596619,
6
- "train/full_recon_loss": 1.33467036485672,
7
- "train/latent_smooth_loss": 0.0022277096286416054,
8
- "val/total_loss": 0.4656478464603424,
9
- "val/masked_pos_loss": 0.33220887184143066,
10
- "val/masked_vel_loss": 0.0,
11
- "val/full_recon_loss": 1.3343030214309692,
12
- "val/latent_smooth_loss": 0.0008657827856950462,
13
- "train/lr": 4.960000000000002e-06,
14
- "z_mean": 0.015502252615988255,
15
- "z_std": 0.08731898665428162
16
  }
 
1
  {
2
+ "epoch": 1,
3
+ "bleu": 0.06633456743593955,
4
+ "rouge_l": 0.37037037037037035,
5
+ "chrf": 4.102654203852878,
6
+ "exact_match": 0.0,
7
+ "avg_pred_len": 39.1,
8
+ "avg_ref_len": 21.55,
9
+ "val_loss": 9.739182154337565,
10
+ "train_loss": 9.713348535391
 
 
 
 
 
11
  }
metrics_epoch_001.json CHANGED
@@ -1,16 +1,11 @@
1
  {
2
- "epoch": 1,
3
- "train/total_loss": 1.628475546836853,
4
- "train/masked_pos_loss": 0.6631256863474846,
5
- "train/masked_vel_loss": 0.8320725113153458,
6
- "train/full_recon_loss": 1.3325617015361786,
7
- "train/latent_smooth_loss": 0.002120855962857604,
8
- "val/total_loss": 2.791887044906616,
9
- "val/masked_pos_loss": 1.328176736831665,
10
- "val/masked_vel_loss": 1.3305968046188354,
11
- "val/full_recon_loss": 1.3310281038284302,
12
- "val/latent_smooth_loss": 0.0010730486828833818,
13
- "train/lr": 8.920000000000004e-06,
14
- "z_mean": 0.015302599407732487,
15
- "z_std": 0.0877484530210495
16
  }
 
1
  {
2
+ "epoch": 2,
3
+ "bleu": 0.11166559489511374,
4
+ "rouge_l": 0.3286637931034483,
5
+ "chrf": 5.338902295436229,
6
+ "exact_match": 0.0,
7
+ "avg_pred_len": 46.6,
8
+ "avg_ref_len": 21.55,
9
+ "val_loss": 9.667327245076498,
10
+ "train_loss": 9.591291280893179
 
 
 
 
 
11
  }
metrics_epoch_002.json CHANGED
@@ -1,16 +1,11 @@
1
  {
2
- "epoch": 2,
3
- "train/total_loss": 1.879580244421959,
4
- "train/masked_pos_loss": 0.9129918068647385,
5
- "train/masked_vel_loss": 0.8337216824293137,
6
- "train/full_recon_loss": 1.3284493386745453,
7
- "train/latent_smooth_loss": 0.002177916350774467,
8
- "val/total_loss": 2.7803542613983154,
9
- "val/masked_pos_loss": 1.3252986669540405,
10
- "val/masked_vel_loss": 1.3226159811019897,
11
- "val/full_recon_loss": 1.3243049383163452,
12
- "val/latent_smooth_loss": 0.0009115393040701747,
13
- "train/lr": 1.2880000000000004e-05,
14
- "z_mean": 0.015641039237380028,
15
- "z_std": 0.08970633149147034
16
  }
 
1
  {
2
+ "epoch": 3,
3
+ "bleu": 0.04798207548180831,
4
+ "rouge_l": 0.393788682581786,
5
+ "chrf": 4.945468708957515,
6
+ "exact_match": 0.0,
7
+ "avg_pred_len": 50.0,
8
+ "avg_ref_len": 21.55,
9
+ "val_loss": 9.705982208251953,
10
+ "train_loss": 9.528985537015474
 
 
 
 
 
11
  }