| { |
| "args": { |
| "data_dir": "./data", |
| "dataset": "cifar10", |
| "dataset_kwargs": "{}", |
| "criterion": "ce", |
| "model": "resnet", |
| "model_kwargs": "{}", |
| "image_size": 32, |
| "patch_size": 4, |
| "embed_dim": 256, |
| "depth": 6, |
| "num_heads": 8, |
| "mlp_ratio": 4.0, |
| "dropout": 0.0, |
| "vit_preset": "default", |
| "epochs": 50, |
| "batch_size": 8192, |
| "lr": 0.1, |
| "weight_decay": 0.0, |
| "grad_clip": 0.0, |
| "num_workers": 4, |
| "seed": 42, |
| "device": "auto", |
| "deterministic": false, |
| "train_shuffle": true, |
| "dataset_cache": "none", |
| "momentum": 0.0, |
| "optimizer": "sgd", |
| "adam_beta1": 0.9, |
| "adam_beta2": 0.999, |
| "adam_eps": 1e-08, |
| "mlp_width": 256, |
| "mlp_activation": "gelu", |
| "cnn_width": 32, |
| "cnn_activation": "gelu", |
| "resnet_width": 16, |
| "resnet_activation": "gelu", |
| "dataset_n": 50000, |
| "dataset_n_test": 1000, |
| "cf_cifar_classes": "10", |
| "sorting_vocab_size": 4, |
| "sorting_length": 8, |
| "sharpness_batches": 1, |
| "sharpness_batch_size": 0, |
| "eig_iters": 100, |
| "eig_warm_iters": 0, |
| "eig_refresh": 5, |
| "eig_frequency": null, |
| "eig_tol": 1e-10, |
| "trace_samples": 16, |
| "trace_refresh": 5, |
| "eig_method": "lobpcg", |
| "log_dir": "logs/resnet_cifar10_fullbatch_e50", |
| "log_steps": false, |
| "log_every": 50, |
| "wandb_project": "loss-landscape", |
| "wandb_entity": null, |
| "wandb_name": "resnet-cifar10-fullbatch-e50", |
| "wandb_offline": false, |
| "wandb_disabled": false, |
| "debug": false, |
| "profile_sharpness": false, |
| "profile_epoch": 1, |
| "all": false, |
| "local_smoothness": true, |
| "eig": false, |
| "trace": false, |
| "checkpoint_every_epochs": 5, |
| "checkpoint_dir": null, |
| "save_final_checkpoint": true, |
| "save_optimizer_state": false, |
| "train_only": false, |
| "hf_push": true, |
| "hf_repo_id": "tkharisov7/resnet-fullbatch-gd", |
| "hf_repo_type": "model", |
| "hf_private": false, |
| "hf_token_env": "HF_TOKEN", |
| "hf_path_in_repo": "", |
| "hf_include_checkpoints": true, |
| "hf_checkpoints_as_branches": true, |
| "hf_checkpoint_branch_prefix": "epoch-", |
| "list_models": false, |
| "list_datasets": false |
| }, |
| "data_meta": { |
| "dataset_type": "torchvision_cifar", |
| "train_config": { |
| "data_dir": "./data", |
| "dataset": "cifar10", |
| "image_size": 32, |
| "batch_size": 8192, |
| "num_workers": 4, |
| "train_shuffle": true, |
| "pin_memory": true, |
| "dataset_cache": "none", |
| "cache_device": "cpu" |
| }, |
| "sharpness_config": { |
| "data_dir": "./data", |
| "dataset": "cifar10", |
| "image_size": 32, |
| "batch_size": 8192, |
| "num_workers": 4, |
| "train_shuffle": false, |
| "pin_memory": true, |
| "dataset_cache": "none", |
| "cache_device": "cpu" |
| } |
| } |
| } |