File size: 25,262 Bytes
910bee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "model_name": "DoubleInputRegressor",
  "data_args": {
    "log_dir": "/data/lightSpec/logs",
    "dataset": "KeplerDataset",
    "data_dir": "/data/lightPred/data",
    "model_name": "DoubleInputRegressor",
    "batch_size": 16,
    "num_epochs": 1000,
    "exp_num": "3_ssl",
    "max_len_spectra": 4096,
    "max_days_lc": 270,
    "max_len_lc": 34560,
    "lc_freq": 0.0208,
    "create_umap": false,
    "masked_transform": false,
    "use_acf": true,
    "use_fft": true,
    "scale_flux": false,
    "ssl_weight": 0.5,
    "labels": [
      "Prot"
    ]
  },
  "model_args": {
    "encoder_only": true,
    "stacked_input": true,
    "in_channels": 3,
    "load_checkpoint": false,
    "output_dim": 1,
    "num_quantiles": 5,
    "dropout_p": 0.3,
    "checkpoint_path": "/data/lightSpec/logs/light_2025-03-19/DoubleInputRegressor_lc_1.pth"
  },
  "astroconf_args": {
    "in_channels": 2,
    "encoder": [
      "mhsa_pro",
      "conv"
    ],
    "timeshift": false,
    "num_layers": 5,
    "num_decoder_layers": 6,
    "stride": 20,
    "encoder_dim": 512,
    "decoder_dim": 128,
    "num_heads": 8,
    "kernel_size": 3,
    "dropout_p": 0.3,
    "output_dim": 3,
    "encoder_only": true,
    "norm": "postnorm",
    "load_checkpoint": false,
    "spec_checkpoint_path": "/data/lightSpec/logs/exp6/astroconf_spectra_3.pth"
  },
  "cnn_args": {
    "in_channels": 2,
    "num_layers": 6,
    "stride": 1,
    "encoder_dims": [
      32,
      64,
      128,
      256,
      512
    ],
    "kernel_size": 3,
    "dropout_p": 0.3,
    "output_dim": 2,
    "beta": 1,
    "load_checkpoint": false,
    "checkpoint_num": 1,
    "activation": "sine",
    "sine_w0": 1.0,
    "avg_output": true,
    "checkpoint_path": "/data/lightSpec/logs/light_2024-11-27/CNNEncoder_lc_1.pth"
  },
  "optim_args": {
    "max_lr": "5e-5",
    "weight_decay": "1e-5",
    "warmup_pct": 0.15,
    "steps_per_epoch": 2500,
    "quantiles": [
      0.1,
      0.25,
      0.5,
      0.75,
      0.9
    ]
  },
  "num_params": 11982725,
  "model_structure": "DistributedDataParallel(\n  (module): MultiTaskSimSiam(\n    (activation): GELU(approximate='none')\n    (simsiam): SimSiam(\n      (backbone): DoubleInputRegressor(\n        (encoder1): CNNEncoder(\n          (activation): Sine()\n          (embedding): Sequential(\n            (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n            (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): Sine()\n          )\n          (layers): ModuleList(\n            (0): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (1): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (2): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (3): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (4-5): 2 x ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n          )\n          (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n        )\n        (encoder2): Astroconformer(\n          (extractor): Sequential(\n            (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n            (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): SiLU()\n          )\n          (pe): RotaryEmbedding()\n          (encoder): ConformerEncoder(\n            (blocks): ModuleList(\n              (0-4): 5 x ConformerBlock(\n                (modlist): ModuleList(\n                  (0): PostNorm(\n                    (module): MHA_rotary(\n                      (query): Linear(in_features=512, out_features=512, bias=True)\n                      (key): Linear(in_features=512, out_features=512, bias=True)\n                      (value): Linear(in_features=512, out_features=512, bias=True)\n                      (rotary_emb): RotaryEmbedding()\n                      (output): Linear(in_features=512, out_features=512, bias=True)\n                    )\n                    (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                  )\n                  (1): PostNorm(\n                    (module): ConvBlock(\n                      (layers): Sequential(\n                        (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                        (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                        (2): SiLU()\n                      )\n                    )\n                    (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                  )\n                )\n              )\n            )\n          )\n          (pred_layer): Identity()\n        )\n        (regressor): Identity()\n      )\n      (projector): projection_MLP(\n        (layer1): Sequential(\n          (0): Linear(in_features=1024, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer2): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer3): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (encoder): Sequential(\n        (0): DoubleInputRegressor(\n          (encoder1): CNNEncoder(\n            (activation): Sine()\n            (embedding): Sequential(\n              (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n              (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n              (2): Sine()\n            )\n            (layers): ModuleList(\n              (0): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (1): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (2): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (3): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (4-5): 2 x ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n            )\n            (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n          )\n          (encoder2): Astroconformer(\n            (extractor): Sequential(\n              (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n              (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n              (2): SiLU()\n            )\n            (pe): RotaryEmbedding()\n            (encoder): ConformerEncoder(\n              (blocks): ModuleList(\n                (0-4): 5 x ConformerBlock(\n                  (modlist): ModuleList(\n                    (0): PostNorm(\n                      (module): MHA_rotary(\n                        (query): Linear(in_features=512, out_features=512, bias=True)\n                        (key): Linear(in_features=512, out_features=512, bias=True)\n                        (value): Linear(in_features=512, out_features=512, bias=True)\n                        (rotary_emb): RotaryEmbedding()\n                        (output): Linear(in_features=512, out_features=512, bias=True)\n                      )\n                      (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                    )\n                    (1): PostNorm(\n                      (module): ConvBlock(\n                        (layers): Sequential(\n                          (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                          (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                          (2): SiLU()\n                        )\n                      )\n                      (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                    )\n                  )\n                )\n              )\n            )\n            (pred_layer): Identity()\n          )\n          (regressor): Identity()\n        )\n        (1): projection_MLP(\n          (layer1): Sequential(\n            (0): Linear(in_features=1024, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): ReLU(inplace=True)\n          )\n          (layer2): Sequential(\n            (0): Linear(in_features=256, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): ReLU(inplace=True)\n          )\n          (layer3): Sequential(\n            (0): Linear(in_features=256, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n      (predictor): prediction_MLP(\n        (layer1): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer2): Linear(in_features=256, out_features=256, bias=True)\n      )\n    )\n    (regressor): Sequential(\n      (0): Linear(in_features=512, out_features=256, bias=True)\n      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (2): GELU(approximate='none')\n      (3): Dropout(p=0.3, inplace=False)\n      (4): Linear(in_features=256, out_features=5, bias=True)\n    )\n  )\n)",
  "transforms": "Compose(\n    RandomCrop(crop_size=34560)\n    moving_avg(kernel_size=13, stride=1)\n    ACF(max_lag=None)\n    <transforms.transforms.FFT object at 0x7f8fd43145b0>\n    Normalize(scheme='['mag_median', 'std']', axis=0)\n    ToTensor\n)",
  "trainer": {
    "model": "DistributedDataParallel(\n  (module): MultiTaskSimSiam(\n    (activation): GELU(approximate='none')\n    (simsiam): SimSiam(\n      (backbone): DoubleInputRegressor(\n        (encoder1): CNNEncoder(\n          (activation): Sine()\n          (embedding): Sequential(\n            (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n            (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): Sine()\n          )\n          (layers): ModuleList(\n            (0): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (1): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (2): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (3): ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n            (4-5): 2 x ConvBlock(\n              (activation): Sine()\n              (layers): Sequential(\n                (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                (2): Sine()\n              )\n            )\n          )\n          (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n        )\n        (encoder2): Astroconformer(\n          (extractor): Sequential(\n            (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n            (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): SiLU()\n          )\n          (pe): RotaryEmbedding()\n          (encoder): ConformerEncoder(\n            (blocks): ModuleList(\n              (0-4): 5 x ConformerBlock(\n                (modlist): ModuleList(\n                  (0): PostNorm(\n                    (module): MHA_rotary(\n                      (query): Linear(in_features=512, out_features=512, bias=True)\n                      (key): Linear(in_features=512, out_features=512, bias=True)\n                      (value): Linear(in_features=512, out_features=512, bias=True)\n                      (rotary_emb): RotaryEmbedding()\n                      (output): Linear(in_features=512, out_features=512, bias=True)\n                    )\n                    (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                  )\n                  (1): PostNorm(\n                    (module): ConvBlock(\n                      (layers): Sequential(\n                        (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                        (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                        (2): SiLU()\n                      )\n                    )\n                    (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                  )\n                )\n              )\n            )\n          )\n          (pred_layer): Identity()\n        )\n        (regressor): Identity()\n      )\n      (projector): projection_MLP(\n        (layer1): Sequential(\n          (0): Linear(in_features=1024, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer2): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer3): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (encoder): Sequential(\n        (0): DoubleInputRegressor(\n          (encoder1): CNNEncoder(\n            (activation): Sine()\n            (embedding): Sequential(\n              (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n              (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n              (2): Sine()\n            )\n            (layers): ModuleList(\n              (0): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (1): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (2): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (3): ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n              (4-5): 2 x ConvBlock(\n                (activation): Sine()\n                (layers): Sequential(\n                  (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                  (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                  (2): Sine()\n                )\n              )\n            )\n            (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n          )\n          (encoder2): Astroconformer(\n            (extractor): Sequential(\n              (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n              (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n              (2): SiLU()\n            )\n            (pe): RotaryEmbedding()\n            (encoder): ConformerEncoder(\n              (blocks): ModuleList(\n                (0-4): 5 x ConformerBlock(\n                  (modlist): ModuleList(\n                    (0): PostNorm(\n                      (module): MHA_rotary(\n                        (query): Linear(in_features=512, out_features=512, bias=True)\n                        (key): Linear(in_features=512, out_features=512, bias=True)\n                        (value): Linear(in_features=512, out_features=512, bias=True)\n                        (rotary_emb): RotaryEmbedding()\n                        (output): Linear(in_features=512, out_features=512, bias=True)\n                      )\n                      (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                    )\n                    (1): PostNorm(\n                      (module): ConvBlock(\n                        (layers): Sequential(\n                          (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n                          (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n                          (2): SiLU()\n                        )\n                      )\n                      (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n                    )\n                  )\n                )\n              )\n            )\n            (pred_layer): Identity()\n          )\n          (regressor): Identity()\n        )\n        (1): projection_MLP(\n          (layer1): Sequential(\n            (0): Linear(in_features=1024, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): ReLU(inplace=True)\n          )\n          (layer2): Sequential(\n            (0): Linear(in_features=256, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (2): ReLU(inplace=True)\n          )\n          (layer3): Sequential(\n            (0): Linear(in_features=256, out_features=256, bias=True)\n            (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n      (predictor): prediction_MLP(\n        (layer1): Sequential(\n          (0): Linear(in_features=256, out_features=256, bias=True)\n          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (2): ReLU(inplace=True)\n        )\n        (layer2): Linear(in_features=256, out_features=256, bias=True)\n      )\n    )\n    (regressor): Sequential(\n      (0): Linear(in_features=512, out_features=256, bias=True)\n      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (2): GELU(approximate='none')\n      (3): Dropout(p=0.3, inplace=False)\n      (4): Linear(in_features=256, out_features=5, bias=True)\n    )\n  )\n)",
    "optimizer": "AdamW (\nParameter Group 0\n    amsgrad: False\n    betas: (0.9, 0.999)\n    capturable: False\n    differentiable: False\n    eps: 1e-08\n    foreach: None\n    fused: None\n    initial_lr: 5e-05\n    lr: 5e-05\n    maximize: False\n    weight_decay: 1e-05\n)",
    "criterion": "CQR()",
    "scaler": "<torch.cuda.amp.grad_scaler.GradScaler object at 0x7f8f6924f790>",
    "grad_clip": false,
    "cos_inc": false,
    "output_dim": 1,
    "scheduler": "<torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7f8f6924f850>",
    "train_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f27cb20>",
    "val_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f220ac0>",
    "train_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27c910>",
    "val_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27cdc0>",
    "max_iter": Infinity,
    "device": 0,
    "world_size": 1,
    "exp_num": "light_2025-03-19",
    "exp_name": "DoubleInputRegressor_lc_3_ssl",
    "log_path": "/data/lightSpec/logs",
    "best_state_dict": null,
    "plot_every": null,
    "logger": null,
    "range_update": null,
    "accumulation_step": 1,
    "wandb": false,
    "num_quantiles": 5,
    "update_func": "<function Trainer.<lambda> at 0x7f8fd7d94820>",
    "stack_pairs": false,
    "temperature": 1,
    "use_w": false,
    "use_pred_coeff": false,
    "pred_coeff_val": null,
    "ssl_weight": 0.5,
    "weight_decay": true
  }
}