Ilayk commited on
Commit
910bee7
·
verified ·
1 Parent(s): 6ae87cb

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DoubleInputRegressor_lc_3_ssl.json filter=lfs diff=lfs merge=lfs -text
37
+ fit_DoubleInputRegressor_lc_3_ssl.png filter=lfs diff=lfs merge=lfs -text
DoubleInputRegressor_lc_3_ssl.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2e0af5d8901a2b28aaa2998c525d0c878fbd3810096f26761cbde67b0b4e807
3
+ size 14418305
DoubleInputRegressor_lc_3_ssl.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03337e9e63170668951746145c5910f4ab08a2e66a095ffab1022fd02226f258
3
+ size 48107861
DoubleInputRegressor_lc_3_ssl_complete_config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "DoubleInputRegressor",
3
+ "data_args": {
4
+ "log_dir": "/data/lightSpec/logs",
5
+ "dataset": "KeplerDataset",
6
+ "data_dir": "/data/lightPred/data",
7
+ "model_name": "DoubleInputRegressor",
8
+ "batch_size": 16,
9
+ "num_epochs": 1000,
10
+ "exp_num": "3_ssl",
11
+ "max_len_spectra": 4096,
12
+ "max_days_lc": 270,
13
+ "max_len_lc": 34560,
14
+ "lc_freq": 0.0208,
15
+ "create_umap": false,
16
+ "masked_transform": false,
17
+ "use_acf": true,
18
+ "use_fft": true,
19
+ "scale_flux": false,
20
+ "ssl_weight": 0.5,
21
+ "labels": [
22
+ "Prot"
23
+ ]
24
+ },
25
+ "model_args": {
26
+ "encoder_only": true,
27
+ "stacked_input": true,
28
+ "in_channels": 3,
29
+ "load_checkpoint": false,
30
+ "output_dim": 1,
31
+ "num_quantiles": 5,
32
+ "dropout_p": 0.3,
33
+ "checkpoint_path": "/data/lightSpec/logs/light_2025-03-19/DoubleInputRegressor_lc_1.pth"
34
+ },
35
+ "astroconf_args": {
36
+ "in_channels": 2,
37
+ "encoder": [
38
+ "mhsa_pro",
39
+ "conv"
40
+ ],
41
+ "timeshift": false,
42
+ "num_layers": 5,
43
+ "num_decoder_layers": 6,
44
+ "stride": 20,
45
+ "encoder_dim": 512,
46
+ "decoder_dim": 128,
47
+ "num_heads": 8,
48
+ "kernel_size": 3,
49
+ "dropout_p": 0.3,
50
+ "output_dim": 3,
51
+ "encoder_only": true,
52
+ "norm": "postnorm",
53
+ "load_checkpoint": false,
54
+ "spec_checkpoint_path": "/data/lightSpec/logs/exp6/astroconf_spectra_3.pth"
55
+ },
56
+ "cnn_args": {
57
+ "in_channels": 2,
58
+ "num_layers": 6,
59
+ "stride": 1,
60
+ "encoder_dims": [
61
+ 32,
62
+ 64,
63
+ 128,
64
+ 256,
65
+ 512
66
+ ],
67
+ "kernel_size": 3,
68
+ "dropout_p": 0.3,
69
+ "output_dim": 2,
70
+ "beta": 1,
71
+ "load_checkpoint": false,
72
+ "checkpoint_num": 1,
73
+ "activation": "sine",
74
+ "sine_w0": 1.0,
75
+ "avg_output": true,
76
+ "checkpoint_path": "/data/lightSpec/logs/light_2024-11-27/CNNEncoder_lc_1.pth"
77
+ },
78
+ "optim_args": {
79
+ "max_lr": "5e-5",
80
+ "weight_decay": "1e-5",
81
+ "warmup_pct": 0.15,
82
+ "steps_per_epoch": 2500,
83
+ "quantiles": [
84
+ 0.1,
85
+ 0.25,
86
+ 0.5,
87
+ 0.75,
88
+ 0.9
89
+ ]
90
+ },
91
+ "num_params": 11982725,
92
+ "model_structure": "DistributedDataParallel(\n (module): MultiTaskSimSiam(\n (activation): GELU(approximate='none')\n (simsiam): SimSiam(\n (backbone): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (projector): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n (encoder): Sequential(\n (0): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (1): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n )\n (predictor): prediction_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Linear(in_features=256, out_features=256, bias=True)\n )\n )\n (regressor): Sequential(\n (0): Linear(in_features=512, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): GELU(approximate='none')\n (3): Dropout(p=0.3, inplace=False)\n (4): Linear(in_features=256, out_features=5, bias=True)\n )\n )\n)",
93
+ "transforms": "Compose(\n RandomCrop(crop_size=34560)\n moving_avg(kernel_size=13, stride=1)\n ACF(max_lag=None)\n <transforms.transforms.FFT object at 0x7f8fd43145b0>\n Normalize(scheme='['mag_median', 'std']', axis=0)\n ToTensor\n)",
94
+ "trainer": {
95
+ "model": "DistributedDataParallel(\n (module): MultiTaskSimSiam(\n (activation): GELU(approximate='none')\n (simsiam): SimSiam(\n (backbone): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (projector): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n (encoder): Sequential(\n (0): DoubleInputRegressor(\n (encoder1): CNNEncoder(\n (activation): Sine()\n (embedding): Sequential(\n (0): Conv1d(2, 32, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n (layers): ModuleList(\n (0): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (1): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (2): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(128, 256, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (3): ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(256, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n (4-5): 2 x ConvBlock(\n (activation): Sine()\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): Sine()\n )\n )\n )\n (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n )\n (encoder2): Astroconformer(\n (extractor): Sequential(\n (0): Conv1d(2, 512, kernel_size=(20,), stride=(20,))\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n (pe): RotaryEmbedding()\n (encoder): ConformerEncoder(\n (blocks): ModuleList(\n (0-4): 5 x ConformerBlock(\n (modlist): ModuleList(\n (0): PostNorm(\n (module): MHA_rotary(\n (query): Linear(in_features=512, out_features=512, bias=True)\n (key): Linear(in_features=512, out_features=512, bias=True)\n (value): Linear(in_features=512, out_features=512, bias=True)\n (rotary_emb): RotaryEmbedding()\n (output): Linear(in_features=512, out_features=512, bias=True)\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n (1): PostNorm(\n (module): ConvBlock(\n (layers): Sequential(\n (0): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same, bias=False)\n (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): SiLU()\n )\n )\n (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n )\n (pred_layer): Identity()\n )\n (regressor): Identity()\n )\n (1): projection_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=1024, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer3): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )\n )\n )\n (predictor): prediction_MLP(\n (layer1): Sequential(\n (0): Linear(in_features=256, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): ReLU(inplace=True)\n )\n (layer2): Linear(in_features=256, out_features=256, bias=True)\n )\n )\n (regressor): Sequential(\n (0): Linear(in_features=512, out_features=256, bias=True)\n (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n (2): GELU(approximate='none')\n (3): Dropout(p=0.3, inplace=False)\n (4): Linear(in_features=256, out_features=5, bias=True)\n )\n )\n)",
96
+ "optimizer": "AdamW (\nParameter Group 0\n amsgrad: False\n betas: (0.9, 0.999)\n capturable: False\n differentiable: False\n eps: 1e-08\n foreach: None\n fused: None\n initial_lr: 5e-05\n lr: 5e-05\n maximize: False\n weight_decay: 1e-05\n)",
97
+ "criterion": "CQR()",
98
+ "scaler": "<torch.cuda.amp.grad_scaler.GradScaler object at 0x7f8f6924f790>",
99
+ "grad_clip": false,
100
+ "cos_inc": false,
101
+ "output_dim": 1,
102
+ "scheduler": "<torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7f8f6924f850>",
103
+ "train_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f27cb20>",
104
+ "val_dl": "<torch.utils.data.dataloader.DataLoader object at 0x7f8f6f220ac0>",
105
+ "train_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27c910>",
106
+ "val_sampler": "<torch.utils.data.distributed.DistributedSampler object at 0x7f8f6f27cdc0>",
107
+ "max_iter": Infinity,
108
+ "device": 0,
109
+ "world_size": 1,
110
+ "exp_num": "light_2025-03-19",
111
+ "exp_name": "DoubleInputRegressor_lc_3_ssl",
112
+ "log_path": "/data/lightSpec/logs",
113
+ "best_state_dict": null,
114
+ "plot_every": null,
115
+ "logger": null,
116
+ "range_update": null,
117
+ "accumulation_step": 1,
118
+ "wandb": false,
119
+ "num_quantiles": 5,
120
+ "update_func": "<function Trainer.<lambda> at 0x7f8fd7d94820>",
121
+ "stack_pairs": false,
122
+ "temperature": 1,
123
+ "use_w": false,
124
+ "use_pred_coeff": false,
125
+ "pred_coeff_val": null,
126
+ "ssl_weight": 0.5,
127
+ "weight_decay": true
128
+ }
129
+ }
fit_DoubleInputRegressor_lc_3_ssl.png ADDED

Git LFS Details

  • SHA256: 35aff88c473ffdf7d30c2cb247cdc699fd707cb2b2962380f0e8ac260aeb941f
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB