bdanko commited on Apr 29

Commit

953bc42

verified ·

1 Parent(s): 0bb7b1f

Phase 2 checkpoint — 3 epochs

Browse files

Files changed (22) hide show

best/adapter.pt +3 -0
best/config.json +20 -27
best/encoder.pt +1 -1
best/metadata.json +5 -5
best/metrics.json +8 -13
best/model.pt +2 -2
best/optimizer.pt +2 -2
best/tokenizer/tokenizer.json +0 -0
best/tokenizer/tokenizer_config.json +113 -0
latest/adapter.pt +3 -0
latest/config.json +20 -27
latest/encoder.pt +1 -1
latest/metadata.json +4 -4
latest/metrics.json +8 -13
latest/model.pt +2 -2
latest/optimizer.pt +2 -2
latest/tokenizer/tokenizer.json +0 -0
latest/tokenizer/tokenizer_config.json +113 -0
logs/train_log.csv +7 -5
metrics_epoch_000.json +9 -14
metrics_epoch_001.json +9 -14
metrics_epoch_002.json +9 -14

best/adapter.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73f1a49489d0d1e6ae3b71567dafadf3fa1af5f4486e65b3ddd350ef6a9a06a2
+size 2108181

best/config.json CHANGED Viewed

@@ -3,50 +3,43 @@
   "split": "train",
   "max_samples": 100,
   "val_max_samples": 20,
-  "batch_size": 32,
   "d_model": 384,
   "latent_dim": 512,
   "encoder_layers": 3,
   "encoder_heads": 8,
   "encoder_dropout": 0.1,
   "use_part_embeddings": true,
-  "ddpm": {
-    "num_timesteps": 1000,
-    "beta_start": 0.0001,
-    "beta_end": 0.02,
-    "schedule_type": "linear"
-  },
-  "masking": {
-    "feature_corruption": true,
-    "time_span_masking": true,
-    "whole_part_masking": true,
-    "velocity_reconstruction": true,
-    "latent_smoothness": true,
-    "contrastive_consistency": false,
-    "feature_corruption_prob": 0.15,
-    "time_span_ratio": 0.2,
-    "contrastive_weight": 0.05
-  },
-  "w_masked_pos": 1.0,
-  "w_masked_vel": 1.0,
-  "w_full_recon": 0.1,
-  "w_latent_smooth": 0.01,
-  "w_contrastive": 0.05,
   "epochs": 3,
-  "lr": 0.0001,
   "weight_decay": 1e-05,
   "grad_clip": 1.0,
   "scheduler": "cosine",
   "warmup_steps": 100,
   "mixed_precision": false,
   "gradient_accumulation_steps": 1,
-  "ckpt_dir": "/content/phase1_ckpt",
   "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
   "upload_hf": false,
   "seed": 42,
   "log_backend": "csv",
-  "wandb_project": "cslt-phase1",
   "log_every_n_steps": 10,
   "smoke_test": false,
-  "run_id": "138504cf"
 }

   "split": "train",
   "max_samples": 100,
   "val_max_samples": 20,
+  "batch_size": 8,
+  "max_target_length": 128,
   "d_model": 384,
   "latent_dim": 512,
   "encoder_layers": 3,
   "encoder_heads": 8,
   "encoder_dropout": 0.1,
   "use_part_embeddings": true,
+  "t5_name": "google/flan-t5-small",
+  "t5_dim": 512,
+  "adapter_dropout": 0.1,
+  "use_attention_pooling": true,
+  "pool_num_heads": 4,
   "epochs": 3,
+  "warmup_epochs": 1,
+  "lr_encoder": 5e-06,
+  "lr_adapter": 0.0001,
+  "lr_t5": 5e-05,
   "weight_decay": 1e-05,
   "grad_clip": 1.0,
   "scheduler": "cosine",
   "warmup_steps": 100,
   "mixed_precision": false,
   "gradient_accumulation_steps": 1,
+  "num_beams": 4,
+  "max_new_tokens": 50,
+  "use_ctc_head": false,
+  "ctc_weight": 0.1,
+  "ctc_vocab_size": 256,
+  "ckpt_dir": "/content/phase2_ckpt",
+  "phase1_ckpt": "/content/phase1_ckpt",
   "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
   "upload_hf": false,
   "seed": 42,
   "log_backend": "csv",
+  "wandb_project": "cslt-phase2",
   "log_every_n_steps": 10,
   "smoke_test": false,
+  "run_id": "0c5128a7"
 }

best/encoder.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43b0cd4f268a852a7c6b4b0f5216c4e68d1176d4fea1e9a01176eda64a56ae0f
 size 35465544

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bec4ba6a36a815d93bce379c04af4e1b1937538437546009935a57117a779c1
 size 35465544

best/metadata.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "epoch": 0,
-  "step": 4,
-  "git_hash": "8457835",
-  "timestamp": "2026-04-29T02:11:39.138289",
   "python_version": "3.12.13",
   "torch_version": "2.10.0+cu128",
   "seed": 42,
-  "run_id": "138504cf"
 }

 {
+  "epoch": 1,
+  "step": 26,
+  "git_hash": "8e0490b",
+  "timestamp": "2026-04-29T02:23:13.312090",
   "python_version": "3.12.13",
   "torch_version": "2.10.0+cu128",
   "seed": 42,
+  "run_id": "0c5128a7"
 }

best/metrics.json CHANGED Viewed

@@ -1,15 +1,10 @@
 {
-  "train/total_loss": 2.303677797317505,
-  "train/masked_pos_loss": 1.166947454214096,
-  "train/masked_vel_loss": 1.0032410025596619,
-  "train/full_recon_loss": 1.33467036485672,
-  "train/latent_smooth_loss": 0.0022277096286416054,
-  "val/total_loss": 0.4656478464603424,
-  "val/masked_pos_loss": 0.33220887184143066,
-  "val/masked_vel_loss": 0.0,
-  "val/full_recon_loss": 1.3343030214309692,
-  "val/latent_smooth_loss": 0.0008657827856950462,
-  "train/lr": 4.960000000000002e-06,
-  "z_mean": 0.015502252615988255,
-  "z_std": 0.08731898665428162
 }

 {
+  "bleu": 0.11166559489511374,
+  "rouge_l": 0.3286637931034483,
+  "chrf": 5.338902295436229,
+  "exact_match": 0.0,
+  "avg_pred_len": 46.6,
+  "avg_ref_len": 21.55,
+  "val_loss": 9.667327245076498,
+  "train_loss": 9.591291280893179
 }

best/model.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44fd3d37eae1a71aafcf8f41eaa630732e4ba955993fc0285c8aec2ed5bded87
-size 53658903

 version https://git-lfs.github.com/spec/v1
+oid sha256:4da4bb739fc3ccc6a025e24626079949074aa40a06e1fd3be0b5b4af8ebb9af5
+size 349764515

best/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca01bcce1f20838e008ce1f239fd91bdd7c9a25d2b08c6f7fc77ade1aa49e918
-size 91980043

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb68706dfa6b901c015939c7d23e443227d5d107e3943ee66b361eda13956b51
+size 684195019

best/tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

best/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+  "backend": "tokenizers",
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "extra_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "is_local": false,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

latest/adapter.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4d257a3b82f174b31fca249d690441501aaccb54e55ab18cd5b1ef8149528a5
+size 2108181

latest/config.json CHANGED Viewed

@@ -3,50 +3,43 @@
   "split": "train",
   "max_samples": 100,
   "val_max_samples": 20,
-  "batch_size": 32,
   "d_model": 384,
   "latent_dim": 512,
   "encoder_layers": 3,
   "encoder_heads": 8,
   "encoder_dropout": 0.1,
   "use_part_embeddings": true,
-  "ddpm": {
-    "num_timesteps": 1000,
-    "beta_start": 0.0001,
-    "beta_end": 0.02,
-    "schedule_type": "linear"
-  },
-  "masking": {
-    "feature_corruption": true,
-    "time_span_masking": true,
-    "whole_part_masking": true,
-    "velocity_reconstruction": true,
-    "latent_smoothness": true,
-    "contrastive_consistency": false,
-    "feature_corruption_prob": 0.15,
-    "time_span_ratio": 0.2,
-    "contrastive_weight": 0.05
-  },
-  "w_masked_pos": 1.0,
-  "w_masked_vel": 1.0,
-  "w_full_recon": 0.1,
-  "w_latent_smooth": 0.01,
-  "w_contrastive": 0.05,
   "epochs": 3,
-  "lr": 0.0001,
   "weight_decay": 1e-05,
   "grad_clip": 1.0,
   "scheduler": "cosine",
   "warmup_steps": 100,
   "mixed_precision": false,
   "gradient_accumulation_steps": 1,
-  "ckpt_dir": "/content/phase1_ckpt",
   "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
   "upload_hf": false,
   "seed": 42,
   "log_backend": "csv",
-  "wandb_project": "cslt-phase1",
   "log_every_n_steps": 10,
   "smoke_test": false,
-  "run_id": "138504cf"
 }

   "split": "train",
   "max_samples": 100,
   "val_max_samples": 20,
+  "batch_size": 8,
+  "max_target_length": 128,
   "d_model": 384,
   "latent_dim": 512,
   "encoder_layers": 3,
   "encoder_heads": 8,
   "encoder_dropout": 0.1,
   "use_part_embeddings": true,
+  "t5_name": "google/flan-t5-small",
+  "t5_dim": 512,
+  "adapter_dropout": 0.1,
+  "use_attention_pooling": true,
+  "pool_num_heads": 4,
   "epochs": 3,
+  "warmup_epochs": 1,
+  "lr_encoder": 5e-06,
+  "lr_adapter": 0.0001,
+  "lr_t5": 5e-05,
   "weight_decay": 1e-05,
   "grad_clip": 1.0,
   "scheduler": "cosine",
   "warmup_steps": 100,
   "mixed_precision": false,
   "gradient_accumulation_steps": 1,
+  "num_beams": 4,
+  "max_new_tokens": 50,
+  "use_ctc_head": false,
+  "ctc_weight": 0.1,
+  "ctc_vocab_size": 256,
+  "ckpt_dir": "/content/phase2_ckpt",
+  "phase1_ckpt": "/content/phase1_ckpt",
   "hf_repo": "bdanko/cslt-flan-t5-small-autoencoder",
   "upload_hf": false,
   "seed": 42,
   "log_backend": "csv",
+  "wandb_project": "cslt-phase2",
   "log_every_n_steps": 10,
   "smoke_test": false,
+  "run_id": "0c5128a7"
 }

latest/encoder.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c118e819f59393ac2aeaea39d50b3057bda6f54e35137241b167716f8947e6b2
 size 35465544

 version https://git-lfs.github.com/spec/v1
+oid sha256:418ac86dab5ac1eb8d0fd22ed3052efed74b2062603c367a689461548e281cc3
 size 35465544

latest/metadata.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "epoch": 2,
-  "step": 12,
-  "git_hash": "8457835",
-  "timestamp": "2026-04-29T02:12:36.318924",
   "python_version": "3.12.13",
   "torch_version": "2.10.0+cu128",
   "seed": 42,
-  "run_id": "138504cf"
 }

 {
   "epoch": 2,
+  "step": 39,
+  "git_hash": "8e0490b",
+  "timestamp": "2026-04-29T02:23:45.118381",
   "python_version": "3.12.13",
   "torch_version": "2.10.0+cu128",
   "seed": 42,
+  "run_id": "0c5128a7"
 }

latest/metrics.json CHANGED Viewed

@@ -1,15 +1,10 @@
 {
-  "train/total_loss": 1.879580244421959,
-  "train/masked_pos_loss": 0.9129918068647385,
-  "train/masked_vel_loss": 0.8337216824293137,
-  "train/full_recon_loss": 1.3284493386745453,
-  "train/latent_smooth_loss": 0.002177916350774467,
-  "val/total_loss": 2.7803542613983154,
-  "val/masked_pos_loss": 1.3252986669540405,
-  "val/masked_vel_loss": 1.3226159811019897,
-  "val/full_recon_loss": 1.3243049383163452,
-  "val/latent_smooth_loss": 0.0009115393040701747,
-  "train/lr": 1.2880000000000004e-05,
-  "z_mean": 0.015641039237380028,
-  "z_std": 0.08970633149147034
 }

 {
+  "bleu": 0.04798207548180831,
+  "rouge_l": 0.393788682581786,
+  "chrf": 4.945468708957515,
+  "exact_match": 0.0,
+  "avg_pred_len": 50.0,
+  "avg_ref_len": 21.55,
+  "val_loss": 9.705982208251953,
+  "train_loss": 9.528985537015474
 }

latest/model.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f1ad050335a9eb521d96e698a793d784f6a5f617f0ea5e4b03f752c7471df3f
-size 53658903

 version https://git-lfs.github.com/spec/v1
+oid sha256:89aef35446e8e9ff1487577e527e75c7826867d322bb07c17e651b7971e8e08f
+size 349764515

latest/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d10346bdeb257677bf43cb4ad840a210bf15fa7fb500341d00e55171f07863b5
-size 91980043

 version https://git-lfs.github.com/spec/v1
+oid sha256:72f47e69e39a37130bb6fcdbae2297ca2862f0acb2d6e3dcf5ed7696837c04e2
+size 684195019

latest/tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

latest/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+  "backend": "tokenizers",
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "extra_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "is_local": false,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

logs/train_log.csv CHANGED Viewed

@@ -1,5 +1,7 @@
-step,train/total_loss,train/masked_pos_loss,train/masked_vel_loss,train/full_recon_loss,train/latent_smooth_loss,val/total_loss,val/masked_pos_loss,val/masked_vel_loss,val/full_recon_loss,val/latent_smooth_loss,train/lr,z_mean,z_std
-4,2.303677797317505,1.166947454214096,1.0032410025596619,1.33467036485672,0.0022277096286416054,0.4656478464603424,0.33220887184143066,0.0,1.3343030214309692,0.0008657827856950462,4.960000000000002e-06,0.015502252615988255,0.08731898665428162
-8,1.628475546836853,0.6631256863474846,0.8320725113153458,1.3325617015361786,0.002120855962857604,2.791887044906616,1.328176736831665,1.3305968046188354,1.3310281038284302,0.0010730486828833818,8.920000000000004e-06,0.015302599407732487,0.0877484530210495
-10,1.1317189931869507,0.6652693152427673,0.3334561586380005,1.3297309875488281,0.002038335893303156,,,,,,1.0900000000000006e-05,,
-12,1.879580244421959,0.9129918068647385,0.8337216824293137,1.3284493386745453,0.002177916350774467,2.7803542613983154,1.3252986669540405,1.3226159811019897,1.3243049383163452,0.0009115393040701747,1.2880000000000004e-05,0.015641039237380028,0.08970633149147034

+step,train/loss,train/lr
+10,9.641051292419434,1.0900000000000006e-05
+13,9.713348535391,
+20,9.549558639526367,3.9650000000000025e-07
+26,9.591291280893179,
+30,9.527813911437988,2.480000000000001e-07
+39,9.528985537015474,

metrics_epoch_000.json CHANGED Viewed

@@ -1,16 +1,11 @@
 {
-  "epoch": 0,
-  "train/total_loss": 2.303677797317505,
-  "train/masked_pos_loss": 1.166947454214096,
-  "train/masked_vel_loss": 1.0032410025596619,
-  "train/full_recon_loss": 1.33467036485672,
-  "train/latent_smooth_loss": 0.0022277096286416054,
-  "val/total_loss": 0.4656478464603424,
-  "val/masked_pos_loss": 0.33220887184143066,
-  "val/masked_vel_loss": 0.0,
-  "val/full_recon_loss": 1.3343030214309692,
-  "val/latent_smooth_loss": 0.0008657827856950462,
-  "train/lr": 4.960000000000002e-06,
-  "z_mean": 0.015502252615988255,
-  "z_std": 0.08731898665428162
 }

 {
+  "epoch": 1,
+  "bleu": 0.06633456743593955,
+  "rouge_l": 0.37037037037037035,
+  "chrf": 4.102654203852878,
+  "exact_match": 0.0,
+  "avg_pred_len": 39.1,
+  "avg_ref_len": 21.55,
+  "val_loss": 9.739182154337565,
+  "train_loss": 9.713348535391
 }

metrics_epoch_001.json CHANGED Viewed

@@ -1,16 +1,11 @@
 {
-  "epoch": 1,
-  "train/total_loss": 1.628475546836853,
-  "train/masked_pos_loss": 0.6631256863474846,
-  "train/masked_vel_loss": 0.8320725113153458,
-  "train/full_recon_loss": 1.3325617015361786,
-  "train/latent_smooth_loss": 0.002120855962857604,
-  "val/total_loss": 2.791887044906616,
-  "val/masked_pos_loss": 1.328176736831665,
-  "val/masked_vel_loss": 1.3305968046188354,
-  "val/full_recon_loss": 1.3310281038284302,
-  "val/latent_smooth_loss": 0.0010730486828833818,
-  "train/lr": 8.920000000000004e-06,
-  "z_mean": 0.015302599407732487,
-  "z_std": 0.0877484530210495
 }

 {
+  "epoch": 2,
+  "bleu": 0.11166559489511374,
+  "rouge_l": 0.3286637931034483,
+  "chrf": 5.338902295436229,
+  "exact_match": 0.0,
+  "avg_pred_len": 46.6,
+  "avg_ref_len": 21.55,
+  "val_loss": 9.667327245076498,
+  "train_loss": 9.591291280893179
 }

metrics_epoch_002.json CHANGED Viewed

@@ -1,16 +1,11 @@
 {
-  "epoch": 2,
-  "train/total_loss": 1.879580244421959,
-  "train/masked_pos_loss": 0.9129918068647385,
-  "train/masked_vel_loss": 0.8337216824293137,
-  "train/full_recon_loss": 1.3284493386745453,
-  "train/latent_smooth_loss": 0.002177916350774467,
-  "val/total_loss": 2.7803542613983154,
-  "val/masked_pos_loss": 1.3252986669540405,
-  "val/masked_vel_loss": 1.3226159811019897,
-  "val/full_recon_loss": 1.3243049383163452,
-  "val/latent_smooth_loss": 0.0009115393040701747,
-  "train/lr": 1.2880000000000004e-05,
-  "z_mean": 0.015641039237380028,
-  "z_std": 0.08970633149147034
 }

 {
+  "epoch": 3,
+  "bleu": 0.04798207548180831,
+  "rouge_l": 0.393788682581786,
+  "chrf": 4.945468708957515,
+  "exact_match": 0.0,
+  "avg_pred_len": 50.0,
+  "avg_ref_len": 21.55,
+  "val_loss": 9.705982208251953,
+  "train_loss": 9.528985537015474
 }