Upload folder using huggingface_hub
Browse files
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
DoubleInputRegressor_lc_3_ssl.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
fit_DoubleInputRegressor_lc_3_ssl.png filter=lfs diff=lfs merge=lfs -text
|
DoubleInputRegressor_lc_3_ssl.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2e0af5d8901a2b28aaa2998c525d0c878fbd3810096f26761cbde67b0b4e807
|
| 3 |
+
size 14418305
|
DoubleInputRegressor_lc_3_ssl.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03337e9e63170668951746145c5910f4ab08a2e66a095ffab1022fd02226f258
|
| 3 |
+
size 48107861
|
DoubleInputRegressor_lc_3_ssl_complete_config.json
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "DoubleInputRegressor",
|
| 3 |
+
"data_args": {
|
| 4 |
+
"log_dir": "/data/lightSpec/logs",
|
| 5 |
+
"dataset": "KeplerDataset",
|
| 6 |
+
"data_dir": "/data/lightPred/data",
|
| 7 |
+
"model_name": "DoubleInputRegressor",
|
| 8 |
+
"batch_size": 16,
|
| 9 |
+
"num_epochs": 1000,
|
| 10 |
+
"exp_num": "3_ssl",
|
| 11 |
+
"max_len_spectra": 4096,
|
| 12 |
+
"max_days_lc": 270,
|
| 13 |
+
"max_len_lc": 34560,
|
| 14 |
+
"lc_freq": 0.0208,
|
| 15 |
+
"create_umap": false,
|
| 16 |
+
"masked_transform": false,
|
| 17 |
+
"use_acf": true,
|
| 18 |
+
"use_fft": true,
|
| 19 |
+
"scale_flux": false,
|
| 20 |
+
"ssl_weight": 0.5,
|
| 21 |
+
"labels": [
|
| 22 |
+
"Prot"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
"model_args": {
|
| 26 |
+
"encoder_only": true,
|
| 27 |
+
"stacked_input": true,
|
| 28 |
+
"in_channels": 3,
|
| 29 |
+
"load_checkpoint": false,
|
| 30 |
+
"output_dim": 1,
|
| 31 |
+
"num_quantiles": 5,
|
| 32 |
+
"dropout_p": 0.3,
|
| 33 |
+
"checkpoint_path": "/data/lightSpec/logs/light_2025-03-19/DoubleInputRegressor_lc_1.pth"
|
| 34 |
+
},
|
| 35 |
+
"astroconf_args": {
|
| 36 |
+
"in_channels": 2,
|
| 37 |
+
"encoder": [
|
| 38 |
+
"mhsa_pro",
|
| 39 |
+
"conv"
|
| 40 |
+
],
|
| 41 |
+
"timeshift": false,
|
| 42 |
+
"num_layers": 5,
|
| 43 |
+
"num_decoder_layers": 6,
|
| 44 |
+
"stride": 20,
|
| 45 |
+
"encoder_dim": 512,
|
| 46 |
+
"decoder_dim": 128,
|
| 47 |
+
"num_heads": 8,
|
| 48 |
+
"kernel_size": 3,
|
| 49 |
+
"dropout_p": 0.3,
|
| 50 |
+
"output_dim": 3,
|
| 51 |
+
"encoder_only": true,
|
| 52 |
+
"norm": "postnorm",
|
| 53 |
+
"load_checkpoint": false,
|
| 54 |
+
"spec_checkpoint_path": "/data/lightSpec/logs/exp6/astroconf_spectra_3.pth"
|
| 55 |
+
},
|
| 56 |
+
"cnn_args": {
|
| 57 |
+
"in_channels": 2,
|
| 58 |
+
"num_layers": 6,
|
| 59 |
+
"stride": 1,
|
| 60 |
+
"encoder_dims": [
|
| 61 |
+
32,
|
| 62 |
+
64,
|
| 63 |
+
128,
|
| 64 |
+
256,
|
| 65 |
+
512
|
| 66 |
+
],
|
| 67 |
+
"kernel_size": 3,
|
| 68 |
+
"dropout_p": 0.3,
|
| 69 |
+
"output_dim": 2,
|
| 70 |
+
"beta": 1,
|
| 71 |
+
"load_checkpoint": false,
|
| 72 |
+
"checkpoint_num": 1,
|
| 73 |
+
"activation": "sine",
|
| 74 |
+
"sine_w0": 1.0,
|
| 75 |
+
"avg_output": true,
|
| 76 |
+
"checkpoint_path": "/data/lightSpec/logs/light_2024-11-27/CNNEncoder_lc_1.pth"
|
| 77 |
+
},
|
| 78 |
+
"optim_args": {
|
| 79 |
+
"max_lr": "5e-5",
|
| 80 |
+
"weight_decay": "1e-5",
|
| 81 |
+
"warmup_pct": 0.15,
|
| 82 |
+
"steps_per_epoch": 2500,
|
| 83 |
+
"quantiles": [
|
| 84 |
+
0.1,
|
| 85 |
+
0.25,
|
| 86 |
+
0.5,
|
| 87 |
+
0.75,
|
| 88 |
+
0.9
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
"num_params": 11982725,
|
| 92 |
+
"model_structure": "DistributedDataParallel(\n (module): MultiTaskSimSiam(\n (activation): GELU(approximate='none')\n (simsiam): SimSiam(\n (backbone): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (projector): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n (encoder): Sequential(\n (0): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (1): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n )\n (predictor): prediction_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Linear(in_features=256, out_features=256, bias=True)\n )\n )\n (regressor): Sequential(\n (0): Linear(in_features=512, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): GELU(approximate='none')\n (3): Dropout(p=0.3, inplace=False)\n (4): Linear(in_features=256, out_features=5, bias=True)\n )\n )\n)",
|
| 93 |
+
"transforms": "Compose(\n RandomCrop(crop_size=34560)\n moving_avg(kernel_size=13, stride=1)\n ACF(max_lag=None)\n <transforms.transforms.FFT object at 0x7f8fd43145b0>\n Normalize(scheme='['mag_median', 'std']', axis=0)\n ToTensor\n)",
|
| 94 |
+
"trainer": {
|
| 95 |
+
"model": "DistributedDataParallel(\n (module): MultiTaskSimSiam(\n (activation): GELU(approximate='none')\n (simsiam): SimSiam(\n (backbone): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (projector): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n (encoder): Sequential(\n (0): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (1): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n )\n (predictor): prediction_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Linear(in_features=256, out_features=256, bias=True)\n )\n )\n (regressor): Sequential(\n (0): Linear(in_features=512, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): GELU(approximate='none')\n (3): Dropout(p=0.3, inplace=False)\n (4): Linear(in_features=256, out_features=5, bias=True)\n )\n )\n)",
|
| 96 |
+
"optimizer": "AdamW (\nParameter Group 0\n amsgrad: False\n betas: (0.9, 0.999)\n capturable: False\n differentiable: False\n eps: 1e-08\n foreach: None\n fused: None\n initial_lr: 5e-05\n lr: 5e-05\n maximize: False\n weight_decay: 1e-05\n)",
|
| 97 |
+
"criterion": "CQR()",
|
| 98 |
+
"scaler": "<torch.cuda.amp.grad_scaler.GradScaler object at 0x7f8f6924f790>",
|
| 99 |
+
"grad_clip": false,
|
| 100 |
+
"cos_inc": false,
|
| 101 |
+
"output_dim": 1,
|
| 102 |
+
"scheduler": "<torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7f8f6924f850>",
|
| 103 |
+
"train_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f27cb20>",
|
| 104 |
+
"val_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f220ac0>",
|
| 105 |
+
"train_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27c910>",
|
| 106 |
+
"val_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27cdc0>",
|
| 107 |
+
"max_iter": Infinity,
|
| 108 |
+
"device": 0,
|
| 109 |
+
"world_size": 1,
|
| 110 |
+
"exp_num": "light_2025-03-19",
|
| 111 |
+
"exp_name": "DoubleInputRegressor_lc_3_ssl",
|
| 112 |
+
"log_path": "/data/lightSpec/logs",
|
| 113 |
+
"best_state_dict": null,
|
| 114 |
+
"plot_every": null,
|
| 115 |
+
"logger": null,
|
| 116 |
+
"range_update": null,
|
| 117 |
+
"accumulation_step": 1,
|
| 118 |
+
"wandb": false,
|
| 119 |
+
"num_quantiles": 5,
|
| 120 |
+
"update_func": "<function Trainer.<lambda> at 0x7f8fd7d94820>",
|
| 121 |
+
"stack_pairs": false,
|
| 122 |
+
"temperature": 1,
|
| 123 |
+
"use_w": false,
|
| 124 |
+
"use_pred_coeff": false,
|
| 125 |
+
"pred_coeff_val": null,
|
| 126 |
+
"ssl_weight": 0.5,
|
| 127 |
+
"weight_decay": true
|
| 128 |
+
}
|
| 129 |
+
}
|
fit_DoubleInputRegressor_lc_3_ssl.png
ADDED
|
Git LFS Details
|