panda

File size: 13,482 Bytes

f38d112

{
    "model_config": {
        "mode": "predict",
        "context_length": 512,
        "prediction_length": 128,
        "distribution_output": null,
        "loss": "mse",
        "huber_delta": 1.0,
        "patch_length": 16,
        "patch_stride": 16,
        "num_hidden_layers": 8,
        "d_model": 512,
        "num_attention_heads": 8,
        "channel_attention": true,
        "ffn_dim": 512,
        "norm_type": "rmsnorm",
        "norm_eps": 1e-05,
        "attention_dropout": 0.0,
        "positional_dropout": 0.0,
        "path_dropout": 0.0,
        "ff_dropout": 0.0,
        "bias": true,
        "activation_function": "gelu",
        "pre_norm": true,
        "use_cls_token": false,
        "init_std": 0.02,
        "scaling": "std",
        "do_mask_input": null,
        "mask_type": "random",
        "random_mask_ratio": 0.5,
        "num_forecast_mask_patches": 3,
        "channel_consistent_masking": false,
        "unmasked_channel_indices": null,
        "mask_value": 0,
        "pooling_type": "mean",
        "head_dropout": 0.0,
        "num_parallel_samples": 100,
        "channel_rope": false,
        "max_wavelength": 500,
        "rope_percent": 0.75,
        "pretrained_encoder_path": null,
        "use_dynamics_embedding": true,
        "num_poly_feats": 120,
        "poly_degrees": 2,
        "rff_trainable": false,
        "rff_scale": 1.0,
        "num_rff": 256
    },
    "train_config": {
        "seed": 99,
        "max_steps": 100000,
        "save_steps": 50000,
        "log_steps": 1000,
        "per_device_train_batch_size": 1024,
        "gradient_accumulation_steps": 1,
        "max_grad_norm": 1.0,
        "dataloader_num_workers": 16,
        "dataloader_prefetch_factor": 2,
        "tf32": false,
        "torch_compile": true,
        "optim": "adamw_torch_fused",
        "learning_rate": 0.001,
        "lr_scheduler_type": "cosine",
        "warmup_ratio": 0.1,
        "weight_decay": 0.0,
        "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
        "ddp_backend": "nccl",
        "ddp_find_unused_parameters": false,
        "remove_unused_columns": false
    },
    "all_config": {
        "run_name": "pft_chattn_emb_w_poly",
        "wandb": {
            "log": true,
            "project_name": "dystformer",
            "entity": "gilpinlab",
            "group_name": "fine-tuning",
            "resume": false,
            "tags": null
        },
        "patchtst": {
            "mode": "predict",
            "context_length": 512,
            "prediction_length": 128,
            "distribution_output": null,
            "loss": "mse",
            "huber_delta": 1.0,
            "patch_length": 16,
            "patch_stride": 16,
            "num_hidden_layers": 8,
            "d_model": 512,
            "num_attention_heads": 8,
            "channel_attention": true,
            "ffn_dim": 512,
            "norm_type": "rmsnorm",
            "norm_eps": 1e-05,
            "attention_dropout": 0.0,
            "positional_dropout": 0.0,
            "path_dropout": 0.0,
            "ff_dropout": 0.0,
            "bias": true,
            "activation_function": "gelu",
            "pre_norm": true,
            "use_cls_token": false,
            "init_std": 0.02,
            "scaling": "std",
            "do_mask_input": null,
            "mask_type": "random",
            "random_mask_ratio": 0.5,
            "num_forecast_mask_patches": 3,
            "channel_consistent_masking": false,
            "unmasked_channel_indices": null,
            "mask_value": 0,
            "pooling_type": "mean",
            "head_dropout": 0.0,
            "num_parallel_samples": 100,
            "channel_rope": false,
            "max_wavelength": 500,
            "rope_percent": 0.75,
            "pretrained_encoder_path": null,
            "use_dynamics_embedding": true,
            "num_poly_feats": 120,
            "poly_degrees": 2,
            "rff_trainable": false,
            "rff_scale": 1.0,
            "num_rff": 256
        },
        "chronos": {
            "model_id": "amazon/chronos-t5-mini",
            "model_type": "seq2seq",
            "random_init": false,
            "tie_embeddings": true,
            "context_length": 512,
            "prediction_length": 64,
            "num_samples": 20,
            "n_tokens": 4096,
            "n_special_tokens": 2,
            "pad_token_id": 0,
            "eos_token_id": 1,
            "use_eos_token": true,
            "tokenizer_class": "MeanScaleUniformBins",
            "tokenizer_kwargs": {
                "low_limit": -15.0,
                "high_limit": 15.0
            },
            "temperature": 1.0,
            "top_k": 50,
            "top_p": 1.0
        },
        "train": {
            "seed": 99,
            "max_steps": 100000,
            "save_steps": 50000,
            "log_steps": 1000,
            "per_device_train_batch_size": 1024,
            "gradient_accumulation_steps": 1,
            "max_grad_norm": 1.0,
            "dataloader_num_workers": 16,
            "dataloader_prefetch_factor": 2,
            "tf32": false,
            "torch_compile": true,
            "optim": "adamw_torch_fused",
            "learning_rate": 0.001,
            "lr_scheduler_type": "cosine",
            "warmup_ratio": 0.1,
            "weight_decay": 0.0,
            "output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
            "ddp_backend": "nccl",
            "ddp_find_unused_parameters": false,
            "remove_unused_columns": false
        },
        "scheduler": {
            "enabled": false,
            "schedule_value_name": "noise_scale",
            "schedule_name": "cosine",
            "epoch_stop": 0.5,
            "init_value": 1.0,
            "final_value": 0.0,
            "eps": 0.008,
            "num_steps": 4,
            "decay_rate": 8.0
        },
        "eval": {
            "mode": "predict",
            "data_path": "/stor/work/AMDG_Gilpin_Summer2024/data/test/",
            "checkpoint_path": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints",
            "device": "cuda:7",
            "torch_dtype": "float32",
            "batch_size": 32,
            "num_systems": 10,
            "sliding_context": false,
            "metric_names": [
                "mse",
                "mae",
                "smape",
                "r2_score",
                "spearman"
            ],
            "forecast_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/forecasts",
            "labels_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/labels",
            "completions_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/completions",
            "patch_input_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/patch_input",
            "timestep_masks_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/timestep_masks",
            "metrics_save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/metrics",
            "metrics_fname": "metrics.json",
            "overwrite": false,
            "seed": 42,
            "parallel_sample_reduction": "mean",
            "limit_prediction_length": true,
            "prediction_length": 64,
            "num_test_instances": 1,
            "window_style": "sampled",
            "window_stride": 1,
            "split_coords": false,
            "verbose": false,
            "use_channel_sampler": false,
            "channel_sampler": {
                "num_channels": 3,
                "num_samples": 2
            }
        },
        "run_metrics": {
            "wandb_run_id": null,
            "plot_dir": "figs",
            "save_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/eval/run_metrics",
            "save_fname": "metrics.json"
        },
        "train_data_dirs": [
            "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train",
            "/stor/work/AMDG_Gilpin_Summer2024/data/final_skew40/train_z5_z10",
            "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train",
            "/stor/work/AMDG_Gilpin_Summer2024/data/final_base40/train_z5_z10"
        ],
        "extra_train_data_paths": null,
        "probability": null,
        "shuffle_buffer_length": 100000,
        "min_past": 60,
        "max_missing_prop": 0.9,
        "fixed_dim": 3,
        "augmentations": {
            "augmentation_rate": 0.2,
            "probabilities": [
                0.3333333333333333,
                0.3333333333333333,
                0.3333333333333333,
                0.0,
                0.0
            ],
            "dim_range": [
                3,
                8
            ],
            "lag_range": [
                1,
                10
            ],
            "phase_surrogate_cutoff": 1.0,
            "mode_range": [
                5,
                15
            ],
            "max_wavenumber": 10.0,
            "max_amp": 10.0
        },
        "sampling": {
            "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data/",
            "sys_class": "continuous_no_delay",
            "test_split": 0.3,
            "split_prefix": null,
            "rseed": 999,
            "ic_rseed": 888,
            "num_points": 4096,
            "num_periods": 40,
            "num_periods_min": 20,
            "num_periods_max": 60,
            "num_ics": 1,
            "num_param_perturbations": 4,
            "param_scale": 0.5,
            "split_coords": false,
            "standardize": false,
            "verbose": false,
            "multiprocessing": true,
            "debug_system": null,
            "silence_integration_errors": false,
            "save_params": true,
            "save_traj_stats": false,
            "ignore_probability": 0.0,
            "sign_match_probability": 0.5,
            "atol": 1e-10,
            "rtol": 1e-09,
            "reference_traj": {
                "length": 4096,
                "transient": 0.5,
                "n_periods": 40,
                "atol": 1e-07,
                "rtol": 1e-06
            }
        },
        "validator": {
            "enable": true,
            "verbose": false,
            "transient_time_frac": 0.05,
            "plot_save_dir": null,
            "save_failed_trajs": false,
            "attractor_tests": [
                "check_not_linear",
                "check_boundedness",
                "check_not_fixed_point",
                "check_zero_one_test",
                "check_power_spectrum",
                "check_stationarity"
            ]
        },
        "events": {
            "max_duration": 300,
            "instability_threshold": 10000.0,
            "min_step": 1e-10,
            "verbose": true
        },
        "skew": {
            "num_pairs": 5000,
            "pairs_rseed": 123,
            "sys_idx_low": 0,
            "sys_idx_high": null,
            "normalization_strategy": "flow_rms",
            "randomize_driver_indices": true,
            "transform_scales": true,
            "train_nonskew_path": null,
            "test_nonskew_path": null,
            "coupling_map_type": "additive",
            "coupling_map": {
                "transform_scales": false,
                "randomize_driver_indices": true,
                "normalization_strategy": "flow_rms",
                "random_seed": 0
            }
        },
        "analysis": {
            "data_dir": "/stor/work/AMDG_Gilpin_Summer2024/data",
            "split": "copy/final_skew40/train",
            "num_samples": 1,
            "one_dim_target": false,
            "save_dir": "outputs",
            "plots_dir": "figures",
            "compute_quantile_limits": false,
            "compute_max_lyapunov_exponents": false,
            "filter_ensemble": true,
            "filter_json_fname": "failed_samples",
            "verbose": true,
            "attractor_tests": [
                "check_zero_one_test"
            ],
            "check_not_transient": {
                "max_transient_prop": 0.2,
                "atol": 0.001
            },
            "check_stationarity": {
                "p_value": 0.05
            },
            "check_boundedness": {
                "threshold": 10000.0,
                "max_zscore": 5,
                "eps": 1e-10
            },
            "check_zero_one_test": {
                "threshold": 0.2,
                "strategy": "score"
            }
        }
    },
    "job_info": {
        "cuda_available": true,
        "device_count": 4,
        "device_names": {
            "0": "AMD Instinct MI100",
            "1": "AMD Instinct MI100",
            "2": "AMD Instinct MI100",
            "3": "AMD Instinct MI100"
        },
        "mem_info": {
            "0": [
                4438360064,
                34342961152
            ],
            "1": [
                4429185024,
                34342961152
            ],
            "2": [
                4456448000,
                34342961152
            ],
            "3": [
                4462739456,
                34342961152
            ]
        },
        "torchelastic_launched": true,
        "world_size": 4,
        "python_version": "3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]",
        "torch_version": "2.2.2+rocm5.7",
        "numpy_version": "1.26.4",
        "gluonts_version": "0.15.1",
        "transformers_version": "4.40.1",
        "accelerate_version": "0.34.2"
    }
}