backbone: FacebookAI/xlm-roberta-large base: hierarchical_necti data: data_path: /home/pretam-pg/DepNeCTI/DepNeCTI-XLMR/Trankit_Data granularity: Finegrain max_length: 128 num_workers: 4 use_context: true dataset: necti_finegrain dim_model: 1024 freeze_bert: false gpu: gpus: 1 precision: 16 use_gpu: true label_hierarchy: avyayibhava_subtypes: - 45 - 46 - 47 - 48 - 49 - 50 - 51 - 52 - 53 bahuvrihi_subtypes: - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 - 33 - 34 dvandva_subtypes: - 35 - 36 - 37 - 38 - 39 - 40 - 41 - 42 - 43 - 44 tatpurusa_subtypes: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 logging: experiment_name: local-refinement-v1 logger: wandb project_name: hierarchical-diffusion-necti loss_type: l2 noise_schedule: cosine objective: pred_x0 output: eval_every_n_epochs: 1 log_every_n_steps: 50 output_dir: saved_models/hierarchical_necti save_best: true save_last: true sampling_steps: 100 snr_scale: 1.0 stage1: depth: 6 mlp_ratio: 4.0 num_classes: 6 num_heads: 16 stage2: depth: 4 mlp_ratio: 4.0 num_classes: 56 num_heads: 16 window_size: 7 time_steps: 1000 training: batch_size: 16 freeze_stage1_for_stage2: true lr_bert: 2e-5 lr_scheduler_type: cosine lr_stage1: 1e-4 lr_stage2: 1e-4 max_epochs: 50 max_grad_norm: 1.0 max_steps: 100000 min_delta: 0.0001 monitor: val_USS optimizer_type: AdamW patience: 10 stage1_epochs: 30 stage1_weight: 1.0 stage2_epochs: 20 stage2_weight: 1.0 strategy: joint warmup_ratio: 0.05 warmup_steps: 500 weight_decay: 0.01