Phase 2 checkpoint — 3 epochs
Browse files- best/adapter.pt +3 -0
- best/config.json +20 -27
- best/encoder.pt +1 -1
- best/metadata.json +5 -5
- best/metrics.json +8 -13
- best/model.pt +2 -2
- best/optimizer.pt +2 -2
- best/tokenizer/tokenizer.json +0 -0
- best/tokenizer/tokenizer_config.json +113 -0
- latest/adapter.pt +3 -0
- latest/config.json +20 -27
- latest/encoder.pt +1 -1
- latest/metadata.json +4 -4
- latest/metrics.json +8 -13
- latest/model.pt +2 -2
- latest/optimizer.pt +2 -2
- latest/tokenizer/tokenizer.json +0 -0
- latest/tokenizer/tokenizer_config.json +113 -0
- logs/train_log.csv +7 -5
- metrics_epoch_000.json +9 -14
- metrics_epoch_001.json +9 -14
- metrics_epoch_002.json +9 -14
best/adapter.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73f1a49489d0d1e6ae3b71567dafadf3fa1af5f4486e65b3ddd350ef6a9a06a2
|
| 3 |
+
size 2108181
|
best/config.json
CHANGED
|
@@ -3,50 +3,43 @@
|
|
| 3 |
"split": "train",
|
| 4 |
"max_samples": 100,
|
| 5 |
"val_max_samples": 20,
|
| 6 |
-
"batch_size":
|
|
|
|
| 7 |
"d_model": 384,
|
| 8 |
"latent_dim": 512,
|
| 9 |
"encoder_layers": 3,
|
| 10 |
"encoder_heads": 8,
|
| 11 |
"encoder_dropout": 0.1,
|
| 12 |
"use_part_embeddings": true,
|
| 13 |
-
"
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
},
|
| 19 |
-
"masking": {
|
| 20 |
-
"feature_corruption": true,
|
| 21 |
-
"time_span_masking": true,
|
| 22 |
-
"whole_part_masking": true,
|
| 23 |
-
"velocity_reconstruction": true,
|
| 24 |
-
"latent_smoothness": true,
|
| 25 |
-
"contrastive_consistency": false,
|
| 26 |
-
"feature_corruption_prob": 0.15,
|
| 27 |
-
"time_span_ratio": 0.2,
|
| 28 |
-
"contrastive_weight": 0.05
|
| 29 |
-
},
|
| 30 |
-
"w_masked_pos": 1.0,
|
| 31 |
-
"w_masked_vel": 1.0,
|
| 32 |
-
"w_full_recon": 0.1,
|
| 33 |
-
"w_latent_smooth": 0.01,
|
| 34 |
-
"w_contrastive": 0.05,
|
| 35 |
"epochs": 3,
|
| 36 |
-
"
|
|
|
|
|
|
|
|
|
|
| 37 |
"weight_decay": 1e-05,
|
| 38 |
"grad_clip": 1.0,
|
| 39 |
"scheduler": "cosine",
|
| 40 |
"warmup_steps": 100,
|
| 41 |
"mixed_precision": false,
|
| 42 |
"gradient_accumulation_steps": 1,
|
| 43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
|
| 45 |
"upload_hf": false,
|
| 46 |
"seed": 42,
|
| 47 |
"log_backend": "csv",
|
| 48 |
-
"wandb_project": "cslt-
|
| 49 |
"log_every_n_steps": 10,
|
| 50 |
"smoke_test": false,
|
| 51 |
-
"run_id": "
|
| 52 |
}
|
|
|
|
| 3 |
"split": "train",
|
| 4 |
"max_samples": 100,
|
| 5 |
"val_max_samples": 20,
|
| 6 |
+
"batch_size": 8,
|
| 7 |
+
"max_target_length": 128,
|
| 8 |
"d_model": 384,
|
| 9 |
"latent_dim": 512,
|
| 10 |
"encoder_layers": 3,
|
| 11 |
"encoder_heads": 8,
|
| 12 |
"encoder_dropout": 0.1,
|
| 13 |
"use_part_embeddings": true,
|
| 14 |
+
"t5_name": "google/flan-t5-small",
|
| 15 |
+
"t5_dim": 512,
|
| 16 |
+
"adapter_dropout": 0.1,
|
| 17 |
+
"use_attention_pooling": true,
|
| 18 |
+
"pool_num_heads": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"epochs": 3,
|
| 20 |
+
"warmup_epochs": 1,
|
| 21 |
+
"lr_encoder": 5e-06,
|
| 22 |
+
"lr_adapter": 0.0001,
|
| 23 |
+
"lr_t5": 5e-05,
|
| 24 |
"weight_decay": 1e-05,
|
| 25 |
"grad_clip": 1.0,
|
| 26 |
"scheduler": "cosine",
|
| 27 |
"warmup_steps": 100,
|
| 28 |
"mixed_precision": false,
|
| 29 |
"gradient_accumulation_steps": 1,
|
| 30 |
+
"num_beams": 4,
|
| 31 |
+
"max_new_tokens": 50,
|
| 32 |
+
"use_ctc_head": false,
|
| 33 |
+
"ctc_weight": 0.1,
|
| 34 |
+
"ctc_vocab_size": 256,
|
| 35 |
+
"ckpt_dir": "/content/phase2_ckpt",
|
| 36 |
+
"phase1_ckpt": "/content/phase1_ckpt",
|
| 37 |
"hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
|
| 38 |
"upload_hf": false,
|
| 39 |
"seed": 42,
|
| 40 |
"log_backend": "csv",
|
| 41 |
+
"wandb_project": "cslt-phase2",
|
| 42 |
"log_every_n_steps": 10,
|
| 43 |
"smoke_test": false,
|
| 44 |
+
"run_id": "0c5128a7"
|
| 45 |
}
|
best/encoder.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35465544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bec4ba6a36a815d93bce379c04af4e1b1937538437546009935a57117a779c1
|
| 3 |
size 35465544
|
best/metadata.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"step":
|
| 4 |
-
"git_hash": "
|
| 5 |
-
"timestamp": "2026-04-29T02:
|
| 6 |
"python_version": "3.12.13",
|
| 7 |
"torch_version": "2.10.0+cu128",
|
| 8 |
"seed": 42,
|
| 9 |
-
"run_id": "
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 1,
|
| 3 |
+
"step": 26,
|
| 4 |
+
"git_hash": "8e0490b",
|
| 5 |
+
"timestamp": "2026-04-29T02:23:13.312090",
|
| 6 |
"python_version": "3.12.13",
|
| 7 |
"torch_version": "2.10.0+cu128",
|
| 8 |
"seed": 42,
|
| 9 |
+
"run_id": "0c5128a7"
|
| 10 |
}
|
best/metrics.json
CHANGED
|
@@ -1,15 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"val/full_recon_loss": 1.3343030214309692,
|
| 11 |
-
"val/latent_smooth_loss": 0.0008657827856950462,
|
| 12 |
-
"train/lr": 4.960000000000002e-06,
|
| 13 |
-
"z_mean": 0.015502252615988255,
|
| 14 |
-
"z_std": 0.08731898665428162
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"bleu": 0.11166559489511374,
|
| 3 |
+
"rouge_l": 0.3286637931034483,
|
| 4 |
+
"chrf": 5.338902295436229,
|
| 5 |
+
"exact_match": 0.0,
|
| 6 |
+
"avg_pred_len": 46.6,
|
| 7 |
+
"avg_ref_len": 21.55,
|
| 8 |
+
"val_loss": 9.667327245076498,
|
| 9 |
+
"train_loss": 9.591291280893179
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
}
|
best/model.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4da4bb739fc3ccc6a025e24626079949074aa40a06e1fd3be0b5b4af8ebb9af5
|
| 3 |
+
size 349764515
|
best/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb68706dfa6b901c015939c7d23e443227d5d107e3943ee66b361eda13956b51
|
| 3 |
+
size 684195019
|
best/tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
best/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"extra_ids": 100,
|
| 5 |
+
"extra_special_tokens": [
|
| 6 |
+
"<extra_id_0>",
|
| 7 |
+
"<extra_id_1>",
|
| 8 |
+
"<extra_id_2>",
|
| 9 |
+
"<extra_id_3>",
|
| 10 |
+
"<extra_id_4>",
|
| 11 |
+
"<extra_id_5>",
|
| 12 |
+
"<extra_id_6>",
|
| 13 |
+
"<extra_id_7>",
|
| 14 |
+
"<extra_id_8>",
|
| 15 |
+
"<extra_id_9>",
|
| 16 |
+
"<extra_id_10>",
|
| 17 |
+
"<extra_id_11>",
|
| 18 |
+
"<extra_id_12>",
|
| 19 |
+
"<extra_id_13>",
|
| 20 |
+
"<extra_id_14>",
|
| 21 |
+
"<extra_id_15>",
|
| 22 |
+
"<extra_id_16>",
|
| 23 |
+
"<extra_id_17>",
|
| 24 |
+
"<extra_id_18>",
|
| 25 |
+
"<extra_id_19>",
|
| 26 |
+
"<extra_id_20>",
|
| 27 |
+
"<extra_id_21>",
|
| 28 |
+
"<extra_id_22>",
|
| 29 |
+
"<extra_id_23>",
|
| 30 |
+
"<extra_id_24>",
|
| 31 |
+
"<extra_id_25>",
|
| 32 |
+
"<extra_id_26>",
|
| 33 |
+
"<extra_id_27>",
|
| 34 |
+
"<extra_id_28>",
|
| 35 |
+
"<extra_id_29>",
|
| 36 |
+
"<extra_id_30>",
|
| 37 |
+
"<extra_id_31>",
|
| 38 |
+
"<extra_id_32>",
|
| 39 |
+
"<extra_id_33>",
|
| 40 |
+
"<extra_id_34>",
|
| 41 |
+
"<extra_id_35>",
|
| 42 |
+
"<extra_id_36>",
|
| 43 |
+
"<extra_id_37>",
|
| 44 |
+
"<extra_id_38>",
|
| 45 |
+
"<extra_id_39>",
|
| 46 |
+
"<extra_id_40>",
|
| 47 |
+
"<extra_id_41>",
|
| 48 |
+
"<extra_id_42>",
|
| 49 |
+
"<extra_id_43>",
|
| 50 |
+
"<extra_id_44>",
|
| 51 |
+
"<extra_id_45>",
|
| 52 |
+
"<extra_id_46>",
|
| 53 |
+
"<extra_id_47>",
|
| 54 |
+
"<extra_id_48>",
|
| 55 |
+
"<extra_id_49>",
|
| 56 |
+
"<extra_id_50>",
|
| 57 |
+
"<extra_id_51>",
|
| 58 |
+
"<extra_id_52>",
|
| 59 |
+
"<extra_id_53>",
|
| 60 |
+
"<extra_id_54>",
|
| 61 |
+
"<extra_id_55>",
|
| 62 |
+
"<extra_id_56>",
|
| 63 |
+
"<extra_id_57>",
|
| 64 |
+
"<extra_id_58>",
|
| 65 |
+
"<extra_id_59>",
|
| 66 |
+
"<extra_id_60>",
|
| 67 |
+
"<extra_id_61>",
|
| 68 |
+
"<extra_id_62>",
|
| 69 |
+
"<extra_id_63>",
|
| 70 |
+
"<extra_id_64>",
|
| 71 |
+
"<extra_id_65>",
|
| 72 |
+
"<extra_id_66>",
|
| 73 |
+
"<extra_id_67>",
|
| 74 |
+
"<extra_id_68>",
|
| 75 |
+
"<extra_id_69>",
|
| 76 |
+
"<extra_id_70>",
|
| 77 |
+
"<extra_id_71>",
|
| 78 |
+
"<extra_id_72>",
|
| 79 |
+
"<extra_id_73>",
|
| 80 |
+
"<extra_id_74>",
|
| 81 |
+
"<extra_id_75>",
|
| 82 |
+
"<extra_id_76>",
|
| 83 |
+
"<extra_id_77>",
|
| 84 |
+
"<extra_id_78>",
|
| 85 |
+
"<extra_id_79>",
|
| 86 |
+
"<extra_id_80>",
|
| 87 |
+
"<extra_id_81>",
|
| 88 |
+
"<extra_id_82>",
|
| 89 |
+
"<extra_id_83>",
|
| 90 |
+
"<extra_id_84>",
|
| 91 |
+
"<extra_id_85>",
|
| 92 |
+
"<extra_id_86>",
|
| 93 |
+
"<extra_id_87>",
|
| 94 |
+
"<extra_id_88>",
|
| 95 |
+
"<extra_id_89>",
|
| 96 |
+
"<extra_id_90>",
|
| 97 |
+
"<extra_id_91>",
|
| 98 |
+
"<extra_id_92>",
|
| 99 |
+
"<extra_id_93>",
|
| 100 |
+
"<extra_id_94>",
|
| 101 |
+
"<extra_id_95>",
|
| 102 |
+
"<extra_id_96>",
|
| 103 |
+
"<extra_id_97>",
|
| 104 |
+
"<extra_id_98>",
|
| 105 |
+
"<extra_id_99>"
|
| 106 |
+
],
|
| 107 |
+
"is_local": false,
|
| 108 |
+
"model_max_length": 512,
|
| 109 |
+
"pad_token": "<pad>",
|
| 110 |
+
"sp_model_kwargs": {},
|
| 111 |
+
"tokenizer_class": "T5Tokenizer",
|
| 112 |
+
"unk_token": "<unk>"
|
| 113 |
+
}
|
latest/adapter.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4d257a3b82f174b31fca249d690441501aaccb54e55ab18cd5b1ef8149528a5
|
| 3 |
+
size 2108181
|
latest/config.json
CHANGED
|
@@ -3,50 +3,43 @@
|
|
| 3 |
"split": "train",
|
| 4 |
"max_samples": 100,
|
| 5 |
"val_max_samples": 20,
|
| 6 |
-
"batch_size":
|
|
|
|
| 7 |
"d_model": 384,
|
| 8 |
"latent_dim": 512,
|
| 9 |
"encoder_layers": 3,
|
| 10 |
"encoder_heads": 8,
|
| 11 |
"encoder_dropout": 0.1,
|
| 12 |
"use_part_embeddings": true,
|
| 13 |
-
"
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
},
|
| 19 |
-
"masking": {
|
| 20 |
-
"feature_corruption": true,
|
| 21 |
-
"time_span_masking": true,
|
| 22 |
-
"whole_part_masking": true,
|
| 23 |
-
"velocity_reconstruction": true,
|
| 24 |
-
"latent_smoothness": true,
|
| 25 |
-
"contrastive_consistency": false,
|
| 26 |
-
"feature_corruption_prob": 0.15,
|
| 27 |
-
"time_span_ratio": 0.2,
|
| 28 |
-
"contrastive_weight": 0.05
|
| 29 |
-
},
|
| 30 |
-
"w_masked_pos": 1.0,
|
| 31 |
-
"w_masked_vel": 1.0,
|
| 32 |
-
"w_full_recon": 0.1,
|
| 33 |
-
"w_latent_smooth": 0.01,
|
| 34 |
-
"w_contrastive": 0.05,
|
| 35 |
"epochs": 3,
|
| 36 |
-
"
|
|
|
|
|
|
|
|
|
|
| 37 |
"weight_decay": 1e-05,
|
| 38 |
"grad_clip": 1.0,
|
| 39 |
"scheduler": "cosine",
|
| 40 |
"warmup_steps": 100,
|
| 41 |
"mixed_precision": false,
|
| 42 |
"gradient_accumulation_steps": 1,
|
| 43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
|
| 45 |
"upload_hf": false,
|
| 46 |
"seed": 42,
|
| 47 |
"log_backend": "csv",
|
| 48 |
-
"wandb_project": "cslt-
|
| 49 |
"log_every_n_steps": 10,
|
| 50 |
"smoke_test": false,
|
| 51 |
-
"run_id": "
|
| 52 |
}
|
|
|
|
| 3 |
"split": "train",
|
| 4 |
"max_samples": 100,
|
| 5 |
"val_max_samples": 20,
|
| 6 |
+
"batch_size": 8,
|
| 7 |
+
"max_target_length": 128,
|
| 8 |
"d_model": 384,
|
| 9 |
"latent_dim": 512,
|
| 10 |
"encoder_layers": 3,
|
| 11 |
"encoder_heads": 8,
|
| 12 |
"encoder_dropout": 0.1,
|
| 13 |
"use_part_embeddings": true,
|
| 14 |
+
"t5_name": "google/flan-t5-small",
|
| 15 |
+
"t5_dim": 512,
|
| 16 |
+
"adapter_dropout": 0.1,
|
| 17 |
+
"use_attention_pooling": true,
|
| 18 |
+
"pool_num_heads": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"epochs": 3,
|
| 20 |
+
"warmup_epochs": 1,
|
| 21 |
+
"lr_encoder": 5e-06,
|
| 22 |
+
"lr_adapter": 0.0001,
|
| 23 |
+
"lr_t5": 5e-05,
|
| 24 |
"weight_decay": 1e-05,
|
| 25 |
"grad_clip": 1.0,
|
| 26 |
"scheduler": "cosine",
|
| 27 |
"warmup_steps": 100,
|
| 28 |
"mixed_precision": false,
|
| 29 |
"gradient_accumulation_steps": 1,
|
| 30 |
+
"num_beams": 4,
|
| 31 |
+
"max_new_tokens": 50,
|
| 32 |
+
"use_ctc_head": false,
|
| 33 |
+
"ctc_weight": 0.1,
|
| 34 |
+
"ctc_vocab_size": 256,
|
| 35 |
+
"ckpt_dir": "/content/phase2_ckpt",
|
| 36 |
+
"phase1_ckpt": "/content/phase1_ckpt",
|
| 37 |
"hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
|
| 38 |
"upload_hf": false,
|
| 39 |
"seed": 42,
|
| 40 |
"log_backend": "csv",
|
| 41 |
+
"wandb_project": "cslt-phase2",
|
| 42 |
"log_every_n_steps": 10,
|
| 43 |
"smoke_test": false,
|
| 44 |
+
"run_id": "0c5128a7"
|
| 45 |
}
|
latest/encoder.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35465544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:418ac86dab5ac1eb8d0fd22ed3052efed74b2062603c367a689461548e281cc3
|
| 3 |
size 35465544
|
latest/metadata.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 2,
|
| 3 |
-
"step":
|
| 4 |
-
"git_hash": "
|
| 5 |
-
"timestamp": "2026-04-29T02:
|
| 6 |
"python_version": "3.12.13",
|
| 7 |
"torch_version": "2.10.0+cu128",
|
| 8 |
"seed": 42,
|
| 9 |
-
"run_id": "
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 2,
|
| 3 |
+
"step": 39,
|
| 4 |
+
"git_hash": "8e0490b",
|
| 5 |
+
"timestamp": "2026-04-29T02:23:45.118381",
|
| 6 |
"python_version": "3.12.13",
|
| 7 |
"torch_version": "2.10.0+cu128",
|
| 8 |
"seed": 42,
|
| 9 |
+
"run_id": "0c5128a7"
|
| 10 |
}
|
latest/metrics.json
CHANGED
|
@@ -1,15 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"val/full_recon_loss": 1.3243049383163452,
|
| 11 |
-
"val/latent_smooth_loss": 0.0009115393040701747,
|
| 12 |
-
"train/lr": 1.2880000000000004e-05,
|
| 13 |
-
"z_mean": 0.015641039237380028,
|
| 14 |
-
"z_std": 0.08970633149147034
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"bleu": 0.04798207548180831,
|
| 3 |
+
"rouge_l": 0.393788682581786,
|
| 4 |
+
"chrf": 4.945468708957515,
|
| 5 |
+
"exact_match": 0.0,
|
| 6 |
+
"avg_pred_len": 50.0,
|
| 7 |
+
"avg_ref_len": 21.55,
|
| 8 |
+
"val_loss": 9.705982208251953,
|
| 9 |
+
"train_loss": 9.528985537015474
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
}
|
latest/model.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89aef35446e8e9ff1487577e527e75c7826867d322bb07c17e651b7971e8e08f
|
| 3 |
+
size 349764515
|
latest/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72f47e69e39a37130bb6fcdbae2297ca2862f0acb2d6e3dcf5ed7696837c04e2
|
| 3 |
+
size 684195019
|
latest/tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
latest/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"extra_ids": 100,
|
| 5 |
+
"extra_special_tokens": [
|
| 6 |
+
"<extra_id_0>",
|
| 7 |
+
"<extra_id_1>",
|
| 8 |
+
"<extra_id_2>",
|
| 9 |
+
"<extra_id_3>",
|
| 10 |
+
"<extra_id_4>",
|
| 11 |
+
"<extra_id_5>",
|
| 12 |
+
"<extra_id_6>",
|
| 13 |
+
"<extra_id_7>",
|
| 14 |
+
"<extra_id_8>",
|
| 15 |
+
"<extra_id_9>",
|
| 16 |
+
"<extra_id_10>",
|
| 17 |
+
"<extra_id_11>",
|
| 18 |
+
"<extra_id_12>",
|
| 19 |
+
"<extra_id_13>",
|
| 20 |
+
"<extra_id_14>",
|
| 21 |
+
"<extra_id_15>",
|
| 22 |
+
"<extra_id_16>",
|
| 23 |
+
"<extra_id_17>",
|
| 24 |
+
"<extra_id_18>",
|
| 25 |
+
"<extra_id_19>",
|
| 26 |
+
"<extra_id_20>",
|
| 27 |
+
"<extra_id_21>",
|
| 28 |
+
"<extra_id_22>",
|
| 29 |
+
"<extra_id_23>",
|
| 30 |
+
"<extra_id_24>",
|
| 31 |
+
"<extra_id_25>",
|
| 32 |
+
"<extra_id_26>",
|
| 33 |
+
"<extra_id_27>",
|
| 34 |
+
"<extra_id_28>",
|
| 35 |
+
"<extra_id_29>",
|
| 36 |
+
"<extra_id_30>",
|
| 37 |
+
"<extra_id_31>",
|
| 38 |
+
"<extra_id_32>",
|
| 39 |
+
"<extra_id_33>",
|
| 40 |
+
"<extra_id_34>",
|
| 41 |
+
"<extra_id_35>",
|
| 42 |
+
"<extra_id_36>",
|
| 43 |
+
"<extra_id_37>",
|
| 44 |
+
"<extra_id_38>",
|
| 45 |
+
"<extra_id_39>",
|
| 46 |
+
"<extra_id_40>",
|
| 47 |
+
"<extra_id_41>",
|
| 48 |
+
"<extra_id_42>",
|
| 49 |
+
"<extra_id_43>",
|
| 50 |
+
"<extra_id_44>",
|
| 51 |
+
"<extra_id_45>",
|
| 52 |
+
"<extra_id_46>",
|
| 53 |
+
"<extra_id_47>",
|
| 54 |
+
"<extra_id_48>",
|
| 55 |
+
"<extra_id_49>",
|
| 56 |
+
"<extra_id_50>",
|
| 57 |
+
"<extra_id_51>",
|
| 58 |
+
"<extra_id_52>",
|
| 59 |
+
"<extra_id_53>",
|
| 60 |
+
"<extra_id_54>",
|
| 61 |
+
"<extra_id_55>",
|
| 62 |
+
"<extra_id_56>",
|
| 63 |
+
"<extra_id_57>",
|
| 64 |
+
"<extra_id_58>",
|
| 65 |
+
"<extra_id_59>",
|
| 66 |
+
"<extra_id_60>",
|
| 67 |
+
"<extra_id_61>",
|
| 68 |
+
"<extra_id_62>",
|
| 69 |
+
"<extra_id_63>",
|
| 70 |
+
"<extra_id_64>",
|
| 71 |
+
"<extra_id_65>",
|
| 72 |
+
"<extra_id_66>",
|
| 73 |
+
"<extra_id_67>",
|
| 74 |
+
"<extra_id_68>",
|
| 75 |
+
"<extra_id_69>",
|
| 76 |
+
"<extra_id_70>",
|
| 77 |
+
"<extra_id_71>",
|
| 78 |
+
"<extra_id_72>",
|
| 79 |
+
"<extra_id_73>",
|
| 80 |
+
"<extra_id_74>",
|
| 81 |
+
"<extra_id_75>",
|
| 82 |
+
"<extra_id_76>",
|
| 83 |
+
"<extra_id_77>",
|
| 84 |
+
"<extra_id_78>",
|
| 85 |
+
"<extra_id_79>",
|
| 86 |
+
"<extra_id_80>",
|
| 87 |
+
"<extra_id_81>",
|
| 88 |
+
"<extra_id_82>",
|
| 89 |
+
"<extra_id_83>",
|
| 90 |
+
"<extra_id_84>",
|
| 91 |
+
"<extra_id_85>",
|
| 92 |
+
"<extra_id_86>",
|
| 93 |
+
"<extra_id_87>",
|
| 94 |
+
"<extra_id_88>",
|
| 95 |
+
"<extra_id_89>",
|
| 96 |
+
"<extra_id_90>",
|
| 97 |
+
"<extra_id_91>",
|
| 98 |
+
"<extra_id_92>",
|
| 99 |
+
"<extra_id_93>",
|
| 100 |
+
"<extra_id_94>",
|
| 101 |
+
"<extra_id_95>",
|
| 102 |
+
"<extra_id_96>",
|
| 103 |
+
"<extra_id_97>",
|
| 104 |
+
"<extra_id_98>",
|
| 105 |
+
"<extra_id_99>"
|
| 106 |
+
],
|
| 107 |
+
"is_local": false,
|
| 108 |
+
"model_max_length": 512,
|
| 109 |
+
"pad_token": "<pad>",
|
| 110 |
+
"sp_model_kwargs": {},
|
| 111 |
+
"tokenizer_class": "T5Tokenizer",
|
| 112 |
+
"unk_token": "<unk>"
|
| 113 |
+
}
|
logs/train_log.csv
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
-
step,train/
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
step,train/loss,train/lr
|
| 2 |
+
10,9.641051292419434,1.0900000000000006e-05
|
| 3 |
+
13,9.713348535391,
|
| 4 |
+
20,9.549558639526367,3.9650000000000025e-07
|
| 5 |
+
26,9.591291280893179,
|
| 6 |
+
30,9.527813911437988,2.480000000000001e-07
|
| 7 |
+
39,9.528985537015474,
|
metrics_epoch_000.json
CHANGED
|
@@ -1,16 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"val/full_recon_loss": 1.3343030214309692,
|
| 12 |
-
"val/latent_smooth_loss": 0.0008657827856950462,
|
| 13 |
-
"train/lr": 4.960000000000002e-06,
|
| 14 |
-
"z_mean": 0.015502252615988255,
|
| 15 |
-
"z_std": 0.08731898665428162
|
| 16 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 1,
|
| 3 |
+
"bleu": 0.06633456743593955,
|
| 4 |
+
"rouge_l": 0.37037037037037035,
|
| 5 |
+
"chrf": 4.102654203852878,
|
| 6 |
+
"exact_match": 0.0,
|
| 7 |
+
"avg_pred_len": 39.1,
|
| 8 |
+
"avg_ref_len": 21.55,
|
| 9 |
+
"val_loss": 9.739182154337565,
|
| 10 |
+
"train_loss": 9.713348535391
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
metrics_epoch_001.json
CHANGED
|
@@ -1,16 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"val/full_recon_loss": 1.3310281038284302,
|
| 12 |
-
"val/latent_smooth_loss": 0.0010730486828833818,
|
| 13 |
-
"train/lr": 8.920000000000004e-06,
|
| 14 |
-
"z_mean": 0.015302599407732487,
|
| 15 |
-
"z_std": 0.0877484530210495
|
| 16 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 2,
|
| 3 |
+
"bleu": 0.11166559489511374,
|
| 4 |
+
"rouge_l": 0.3286637931034483,
|
| 5 |
+
"chrf": 5.338902295436229,
|
| 6 |
+
"exact_match": 0.0,
|
| 7 |
+
"avg_pred_len": 46.6,
|
| 8 |
+
"avg_ref_len": 21.55,
|
| 9 |
+
"val_loss": 9.667327245076498,
|
| 10 |
+
"train_loss": 9.591291280893179
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
metrics_epoch_002.json
CHANGED
|
@@ -1,16 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"
|
| 4 |
-
"
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"val/full_recon_loss": 1.3243049383163452,
|
| 12 |
-
"val/latent_smooth_loss": 0.0009115393040701747,
|
| 13 |
-
"train/lr": 1.2880000000000004e-05,
|
| 14 |
-
"z_mean": 0.015641039237380028,
|
| 15 |
-
"z_std": 0.08970633149147034
|
| 16 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 3,
|
| 3 |
+
"bleu": 0.04798207548180831,
|
| 4 |
+
"rouge_l": 0.393788682581786,
|
| 5 |
+
"chrf": 4.945468708957515,
|
| 6 |
+
"exact_match": 0.0,
|
| 7 |
+
"avg_pred_len": 50.0,
|
| 8 |
+
"avg_ref_len": 21.55,
|
| 9 |
+
"val_loss": 9.705982208251953,
|
| 10 |
+
"train_loss": 9.528985537015474
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|