| { | |
| "_name_or_path": "../doc_simp/models/classifier", | |
| "accelerator": null, | |
| "accumulate_grad_batches": null, | |
| "add_context": false, | |
| "amp_backend": null, | |
| "amp_level": null, | |
| "architectures": [ | |
| "RobertaForSequenceClassification" | |
| ], | |
| "attention_probs_dropout_prob": 0.1, | |
| "auto_lr_find": false, | |
| "auto_scale_batch_size": false, | |
| "auto_select_gpus": null, | |
| "batch_size": 32, | |
| "benchmark": null, | |
| "binary_clf": false, | |
| "bos_token_id": 0, | |
| "check_val_every_n_epoch": 1, | |
| "checkpoint": null, | |
| "ckpt_metric": "val_macro_f1", | |
| "classifier_dropout": null, | |
| "context_dir": null, | |
| "context_doc_id": null, | |
| "context_window": 5, | |
| "default_root_dir": null, | |
| "detect_anomaly": false, | |
| "devices": "2", | |
| "doc_pos_embeds": false, | |
| "enable_checkpointing": true, | |
| "enable_model_summary": true, | |
| "enable_progress_bar": true, | |
| "eos_token_id": 2, | |
| "fast_dev_run": false, | |
| "gpus": null, | |
| "gradient_clip_algorithm": null, | |
| "gradient_clip_val": null, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.1, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1", | |
| "2": "LABEL_2", | |
| "3": "LABEL_3", | |
| "4": "LABEL_4" | |
| }, | |
| "inference_mode": true, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "ipus": null, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1, | |
| "LABEL_2": 2, | |
| "LABEL_3": 3, | |
| "LABEL_4": 4 | |
| }, | |
| "layer_norm_eps": 1e-05, | |
| "learning_rate": 1e-05, | |
| "left_z_only": false, | |
| "limit_predict_batches": null, | |
| "limit_test_batches": null, | |
| "limit_train_batches": null, | |
| "limit_val_batches": null, | |
| "log_class_acc": true, | |
| "log_every_n_steps": 50, | |
| "logger": true, | |
| "lr_scheduler": false, | |
| "max_epochs": 10, | |
| "max_length": 128, | |
| "max_position_embeddings": 514, | |
| "max_samples": -1, | |
| "max_steps": -1, | |
| "max_time": null, | |
| "min_epochs": null, | |
| "min_steps": null, | |
| "model_type": "roberta", | |
| "move_metrics_to_cpu": false, | |
| "multiple_trainloader_mode": "max_size_cycle", | |
| "name": "classifier-merge", | |
| "no_context_pos": false, | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 12, | |
| "num_nodes": 1, | |
| "num_processes": null, | |
| "num_sanity_val_steps": 2, | |
| "overfit_batches": 0.0, | |
| "pad_token_id": 1, | |
| "plugins": null, | |
| "position_embedding_type": "absolute", | |
| "precision": 32, | |
| "profiler": null, | |
| "project": "planning", | |
| "reading_lvl": null, | |
| "regression": false, | |
| "reload_dataloaders_every_n_epochs": 0, | |
| "replace_sampler_ddp": true, | |
| "resume_from_checkpoint": null, | |
| "save_dir": null, | |
| "simple_context_dir": null, | |
| "simple_context_doc_id": null, | |
| "src_lvl": null, | |
| "strategy": null, | |
| "sync_batchnorm": false, | |
| "torch_dtype": "float32", | |
| "tpu_cores": null, | |
| "track_grad_norm": -1, | |
| "train_check_interval": 0.2, | |
| "train_file": "data/cochrane_sents_train.csv", | |
| "train_split": 0.9, | |
| "train_workers": 8, | |
| "transformers_version": "4.29.1", | |
| "type_vocab_size": 1, | |
| "upsample_classes": false, | |
| "use_cache": true, | |
| "use_merge_labels": true, | |
| "val_check_interval": null, | |
| "val_file": "data/cochrane_sents_train.csv", | |
| "val_split": 0.05, | |
| "val_workers": 8, | |
| "vocab_size": 50275, | |
| "wandb_id": null, | |
| "x_col": "complex", | |
| "y_col": "label" | |
| } | |