panda-72M / training_info.json

Upload 4 files

25d348d verified 6 months ago

16 kB

	{
	"model_config": {
	"mode": "predict",
	"context_length": 512,
	"prediction_length": 128,
	"distribution_output": null,
	"loss": "mse",
	"huber_delta": 1.0,
	"patch_length": 16,
	"patch_stride": 16,
	"num_hidden_layers": 12,
	"d_model": 768,
	"num_attention_heads": 12,
	"channel_attention": true,
	"ffn_dim": 768,
	"norm_type": "rmsnorm",
	"norm_eps": 1e-05,
	"attention_dropout": 0.0,
	"positional_dropout": 0.0,
	"path_dropout": 0.0,
	"ff_dropout": 0.0,
	"bias": true,
	"activation_function": "gelu",
	"pre_norm": true,
	"use_cls_token": false,
	"init_std": 0.02,
	"scaling": "std",
	"do_mask_input": null,
	"mask_type": "random",
	"random_mask_ratio": 0.5,
	"num_forecast_mask_patches": 3,
	"channel_consistent_masking": false,
	"unmasked_channel_indices": null,
	"mask_value": 0,
	"pooling_type": "mean",
	"head_dropout": 0.0,
	"num_parallel_samples": 100,
	"channel_rope": false,
	"max_wavelength": 500,
	"rope_percent": 0.75,
	"pretrained_encoder_path": null,
	"pretrained_pft_path": null,
	"use_dynamics_embedding": true,
	"num_poly_feats": 188,
	"poly_degrees": 2,
	"rff_trainable": false,
	"rff_scale": 1.0,
	"num_rff": 376
	},
	"train_config": {
	"seed": 99,
	"max_steps": 800000,
	"save_steps": 50000,
	"log_steps": 1000,
	"resume_from_checkpoint": null,
	"per_device_train_batch_size": 384,
	"gradient_accumulation_steps": 1,
	"max_grad_norm": 1.0,
	"dataloader_num_workers": 16,
	"dataloader_prefetch_factor": 2,
	"tf32": false,
	"torch_compile": true,
	"optim": "adamw_torch_fused",
	"learning_rate": 0.001,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.05,
	"weight_decay": 0.0,
	"output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
	"ddp_backend": "nccl",
	"ddp_find_unused_parameters": false,
	"remove_unused_columns": false
	},
	"all_config": {
	"run_name": "panda_nh12_dmodel768_mixedp",
	"wandb": {
	"log": true,
	"project_name": "panda",
	"entity": "gilpinlab",
	"group_name": null,
	"resume": false,
	"resume_run_id": null,
	"tags": null
	},
	"patchtst": {
	"mode": "predict",
	"context_length": 512,
	"prediction_length": 128,
	"distribution_output": null,
	"loss": "mse",
	"huber_delta": 1.0,
	"patch_length": 16,
	"patch_stride": 16,
	"num_hidden_layers": 12,
	"d_model": 768,
	"num_attention_heads": 12,
	"channel_attention": true,
	"ffn_dim": 768,
	"norm_type": "rmsnorm",
	"norm_eps": 1e-05,
	"attention_dropout": 0.0,
	"positional_dropout": 0.0,
	"path_dropout": 0.0,
	"ff_dropout": 0.0,
	"bias": true,
	"activation_function": "gelu",
	"pre_norm": true,
	"use_cls_token": false,
	"init_std": 0.02,
	"scaling": "std",
	"do_mask_input": null,
	"mask_type": "random",
	"random_mask_ratio": 0.5,
	"num_forecast_mask_patches": 3,
	"channel_consistent_masking": false,
	"unmasked_channel_indices": null,
	"mask_value": 0,
	"pooling_type": "mean",
	"head_dropout": 0.0,
	"num_parallel_samples": 100,
	"channel_rope": false,
	"max_wavelength": 500,
	"rope_percent": 0.75,
	"pretrained_encoder_path": null,
	"pretrained_pft_path": null,
	"use_dynamics_embedding": true,
	"num_poly_feats": 188,
	"poly_degrees": 2,
	"rff_trainable": false,
	"rff_scale": 1.0,
	"num_rff": 376
	},
	"chronos": {
	"model_id": "amazon/chronos-t5-mini",
	"model_type": "seq2seq",
	"random_init": false,
	"tie_embeddings": true,
	"context_length": 512,
	"prediction_length": 128,
	"num_samples": 20,
	"n_tokens": 4096,
	"n_special_tokens": 2,
	"pad_token_id": 0,
	"eos_token_id": 1,
	"use_eos_token": true,
	"tokenizer_class": "MeanScaleUniformBins",
	"tokenizer_kwargs": {
	"low_limit": -15.0,
	"high_limit": 15.0
	},
	"temperature": 1.0,
	"top_k": 50,
	"top_p": 1.0
	},
	"train": {
	"seed": 99,
	"max_steps": 800000,
	"save_steps": 50000,
	"log_steps": 1000,
	"resume_from_checkpoint": null,
	"per_device_train_batch_size": 384,
	"gradient_accumulation_steps": 1,
	"max_grad_norm": 1.0,
	"dataloader_num_workers": 16,
	"dataloader_prefetch_factor": 2,
	"tf32": false,
	"torch_compile": true,
	"optim": "adamw_torch_fused",
	"learning_rate": 0.001,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.05,
	"weight_decay": 0.0,
	"output_dir": "/stor/work/AMDG_Gilpin_Summer2024/checkpoints/",
	"ddp_backend": "nccl",
	"ddp_find_unused_parameters": false,
	"remove_unused_columns": false
	},
	"scheduler": {
	"enabled": false,
	"schedule_value_name": "noise_scale",
	"schedule_name": "cosine",
	"epoch_stop": 0.5,
	"init_value": 1.0,
	"final_value": 0.0,
	"eps": 0.008,
	"num_steps": 4,
	"decay_rate": 8.0
	},
	"eval": {
	"mode": "predict",
	"data_paths_lst": null,
	"checkpoint_path": null,
	"device": "cuda:7",
	"torch_dtype": "float32",
	"batch_size": 32,
	"num_subdirs": null,
	"num_samples_per_subdir": null,
	"sliding_context": true,
	"save_contexts": false,
	"save_labels": false,
	"save_predictions": false,
	"save_completions": false,
	"save_masks": false,
	"num_processes": 10,
	"metric_names": [
	"mse",
	"mae",
	"smape",
	"spearman"
	],
	"forecast_save_dir": null,
	"labels_save_dir": null,
	"completions_save_dir": null,
	"patch_input_save_dir": null,
	"timestep_masks_save_dir": null,
	"metrics_save_dir": null,
	"metrics_fname": "metrics",
	"overwrite": false,
	"seed": 1,
	"num_samples": 1,
	"parallel_sample_reduction": "mean",
	"limit_prediction_length": true,
	"context_length": 512,
	"prediction_length": 64,
	"num_test_instances": 1,
	"window_style": "sampled",
	"window_stride": 1,
	"split_coords": false,
	"verbose": false,
	"baselines": {
	"baseline_model": "fourier_arima",
	"order": [
	4,
	1,
	4
	],
	"num_fourier_terms": 5
	},
	"chronos": {
	"zero_shot": false,
	"deterministic": true
	}
	},
	"run_metrics": {
	"wandb_run_id": null,
	"plot_dir": "figures",
	"save_dir": null,
	"save_fname": "metrics.json"
	},
	"train_data_dirs": [
	"/stor/work/AMDG_Gilpin_Summer2024/data/improved/base_mixedp_ic16/train",
	"/stor/work/AMDG_Gilpin_Summer2024/data/improved/skew_mixedp_ic16/train",
	"/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train",
	"/stor/work/AMDG_Gilpin_Summer2024/data/improved/final_base40/train_z5_z10"
	],
	"probability": null,
	"shuffle_buffer_length": 100000,
	"min_past": 60,
	"max_missing_prop": 0.9,
	"fixed_dim": 3,
	"augmentations": {
	"augmentation_rate": 0.2,
	"probabilities": [
	0.3333333333333333,
	0.3333333333333333,
	0.3333333333333333,
	0.0,
	0.0
	],
	"dim_range": [
	3,
	8
	],
	"lag_range": [
	1,
	10
	],
	"phase_surrogate_cutoff": 1.0,
	"mode_range": [
	5,
	15
	],
	"max_wavenumber": 10.0,
	"max_amp": 10.0
	},
	"multiprocess_kwargs": {
	"processes": null,
	"maxtasksperchild": null
	},
	"restart_sampling": {
	"split_name": null,
	"params_json_path": null,
	"systems_batch_size": 128,
	"batch_idx_low": null,
	"batch_idx_high": null,
	"starting_sample_idx": 0,
	"save_first_sample": true
	},
	"sampling": {
	"data_dir": null,
	"sys_class": "continuous_no_delay",
	"test_split": 0.3,
	"split_prefix": null,
	"rseed": 999,
	"ic_rseed": 888,
	"num_points": 4096,
	"num_periods": 40,
	"num_periods_min": 40,
	"num_periods_max": 40,
	"num_ics": 1,
	"num_param_perturbations": 4,
	"param_scale": 0.5,
	"split_coords": false,
	"standardize": false,
	"verbose": false,
	"multiprocessing": true,
	"debug_system": null,
	"silence_integration_errors": false,
	"save_params": true,
	"save_traj_stats": false,
	"ignore_probability": 0.0,
	"sign_match_probability": 0.5,
	"atol": 1e-10,
	"rtol": 1e-09,
	"reference_traj": {
	"length": 4096,
	"transient": 0.5,
	"n_periods": 40,
	"atol": 1e-07,
	"rtol": 1e-06
	}
	},
	"validator": {
	"enable": true,
	"verbose": false,
	"transient_time_frac": 0.05,
	"plot_save_dir": null,
	"save_failed_trajs": false,
	"attractor_tests": [
	"check_not_linear",
	"check_boundedness",
	"check_not_fixed_point",
	"check_zero_one_test",
	"check_power_spectrum",
	"check_stationarity"
	]
	},
	"events": {
	"max_duration": 300,
	"instability_threshold": 10000.0,
	"min_step": 1e-10,
	"verbose": true
	},
	"skew": {
	"num_pairs": 5000,
	"pairs_rseed": 123,
	"sys_idx_low": 0,
	"sys_idx_high": null,
	"normalization_strategy": "flow_rms",
	"randomize_driver_indices": true,
	"transform_scales": true,
	"train_nonskew_path": null,
	"test_nonskew_path": null,
	"coupling_map_type": "additive",
	"coupling_map": {
	"transform_scales": true,
	"randomize_driver_indices": true,
	"normalization_strategy": "flow_rms",
	"random_seed": 0
	}
	},
	"analysis": {
	"data_dir": null,
	"split": null,
	"num_samples": null,
	"one_dim_target": false,
	"save_dir": "outputs",
	"plots_dir": "figures",
	"compute_quantile_limits": false,
	"compute_max_lyapunov_exponents": false,
	"filter_ensemble": true,
	"filter_json_fname": "failed_samples",
	"verbose": true,
	"attractor_tests": [
	"check_zero_one_test"
	],
	"check_not_transient": {
	"max_transient_prop": 0.2,
	"atol": 0.001
	},
	"check_stationarity": {
	"p_value": 0.05
	},
	"check_boundedness": {
	"threshold": 10000.0,
	"max_zscore": 5,
	"eps": 1e-10
	},
	"check_zero_one_test": {
	"threshold": 0.2,
	"strategy": "score"
	}
	},
	"base_style": "ggplot",
	"matplotlib_style": {
	"font": {
	"serif": "Computer Modern Roman",
	"size": 10
	},
	"axes": {
	"titlesize": 12,
	"labelsize": 10,
	"linewidth": 0.75,
	"facecolor": "white",
	"grid": false
	},
	"grid": {
	"color": "gray",
	"linewidth": 0.5,
	"alpha": 0.5
	},
	"lines": {
	"linewidth": 1.5,
	"markersize": 5
	},
	"xtick": {
	"labelsize": 8,
	"major": {
	"size": 4
	},
	"minor": {
	"size": 2
	},
	"direction": "in"
	},
	"ytick": {
	"labelsize": 8,
	"major": {
	"size": 4
	},
	"minor": {
	"size": 2
	},
	"direction": "in"
	},
	"figure": {
	"figsize": [
	3.25,
	2.5
	],
	"dpi": 300,
	"autolayout": true,
	"facecolor": "white"
	},
	"legend": {
	"fontsize": 8,
	"title_fontsize": 9,
	"loc": "upper right",
	"frameon": false
	},
	"savefig": {
	"dpi": 300,
	"format": "pdf",
	"transparent": false
	}
	}
	},
	"job_info": {
	"cuda_available": true,
	"device_count": 6,
	"device_names": {
	"0": "AMD Instinct MI100",
	"1": "AMD Instinct MI100",
	"2": "AMD Instinct MI100",
	"3": "AMD Instinct MI100",
	"4": "AMD Instinct MI100",
	"5": "AMD Instinct MI100"
	},
	"mem_info": {
	"0": [
	8209039360,
	34342961152
	],
	"1": [
	8212447232,
	34342961152
	],
	"2": [
	8199864320,
	34342961152
	],
	"3": [
	8199864320,
	34342961152
	],
	"4": [
	8199864320,
	34342961152
	],
	"5": [
	8212447232,
	34342961152
	]
	},
	"torchelastic_launched": true,
	"world_size": 6,
	"python_version": "3.10.14 (main, May 6 2024, 19:42:50) [GCC 11.2.0]",
	"torch_version": "2.2.2+rocm5.7",
	"numpy_version": "1.26.4",
	"gluonts_version": "0.15.1",
	"transformers_version": "4.40.1",
	"accelerate_version": "1.7.0"
	}
	}