| backbone: FacebookAI/xlm-roberta-large | |
| base: hierarchical_necti | |
| data: | |
| data_path: /home/pretam-pg/DepNeCTI/DepNeCTI-XLMR/Trankit_Data | |
| granularity: Finegrain | |
| max_length: 128 | |
| num_workers: 4 | |
| use_context: true | |
| dataset: necti_finegrain | |
| dim_model: 1024 | |
| freeze_bert: false | |
| gpu: | |
| gpus: 1 | |
| precision: 16 | |
| use_gpu: true | |
| label_hierarchy: | |
| avyayibhava_subtypes: | |
| - 45 | |
| - 46 | |
| - 47 | |
| - 48 | |
| - 49 | |
| - 50 | |
| - 51 | |
| - 52 | |
| - 53 | |
| bahuvrihi_subtypes: | |
| - 20 | |
| - 21 | |
| - 22 | |
| - 23 | |
| - 24 | |
| - 25 | |
| - 26 | |
| - 27 | |
| - 28 | |
| - 29 | |
| - 30 | |
| - 31 | |
| - 32 | |
| - 33 | |
| - 34 | |
| dvandva_subtypes: | |
| - 35 | |
| - 36 | |
| - 37 | |
| - 38 | |
| - 39 | |
| - 40 | |
| - 41 | |
| - 42 | |
| - 43 | |
| - 44 | |
| tatpurusa_subtypes: | |
| - 0 | |
| - 1 | |
| - 2 | |
| - 3 | |
| - 4 | |
| - 5 | |
| - 6 | |
| - 7 | |
| - 8 | |
| - 9 | |
| - 10 | |
| - 11 | |
| - 12 | |
| - 13 | |
| - 14 | |
| - 15 | |
| - 16 | |
| - 17 | |
| - 18 | |
| - 19 | |
| logging: | |
| experiment_name: local-refinement-v1 | |
| logger: wandb | |
| project_name: hierarchical-diffusion-necti | |
| loss_type: l2 | |
| noise_schedule: cosine | |
| objective: pred_x0 | |
| output: | |
| eval_every_n_epochs: 1 | |
| log_every_n_steps: 50 | |
| output_dir: saved_models/hierarchical_necti | |
| save_best: true | |
| save_last: true | |
| sampling_steps: 100 | |
| snr_scale: 1.0 | |
| stage1: | |
| depth: 6 | |
| mlp_ratio: 4.0 | |
| num_classes: 6 | |
| num_heads: 16 | |
| stage2: | |
| depth: 4 | |
| mlp_ratio: 4.0 | |
| num_classes: 56 | |
| num_heads: 16 | |
| window_size: 7 | |
| time_steps: 1000 | |
| training: | |
| batch_size: 16 | |
| freeze_stage1_for_stage2: true | |
| lr_bert: 2e-5 | |
| lr_scheduler_type: cosine | |
| lr_stage1: 1e-4 | |
| lr_stage2: 1e-4 | |
| max_epochs: 50 | |
| max_grad_norm: 1.0 | |
| max_steps: 100000 | |
| min_delta: 0.0001 | |
| monitor: val_USS | |
| optimizer_type: AdamW | |
| patience: 10 | |
| stage1_epochs: 30 | |
| stage1_weight: 1.0 | |
| stage2_epochs: 20 | |
| stage2_weight: 1.0 | |
| strategy: joint | |
| warmup_ratio: 0.05 | |
| warmup_steps: 500 | |
| weight_decay: 0.01 | |