backbone: FacebookAI/xlm-roberta-large
base: hierarchical_necti
data:
  data_path: /home/pretam-pg/DepNeCTI/DepNeCTI-XLMR/Trankit_Data
  granularity: Finegrain
  max_length: 128
  num_workers: 4
  use_context: true
dataset: necti_finegrain
dim_model: 1024
freeze_bert: false
gpu:
  gpus: 1
  precision: 16
  use_gpu: true
label_hierarchy:
  avyayibhava_subtypes:
  - 45
  - 46
  - 47
  - 48
  - 49
  - 50
  - 51
  - 52
  - 53
  bahuvrihi_subtypes:
  - 20
  - 21
  - 22
  - 23
  - 24
  - 25
  - 26
  - 27
  - 28
  - 29
  - 30
  - 31
  - 32
  - 33
  - 34
  dvandva_subtypes:
  - 35
  - 36
  - 37
  - 38
  - 39
  - 40
  - 41
  - 42
  - 43
  - 44
  tatpurusa_subtypes:
  - 0
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10
  - 11
  - 12
  - 13
  - 14
  - 15
  - 16
  - 17
  - 18
  - 19
logging:
  experiment_name: local-refinement-v1
  logger: wandb
  project_name: hierarchical-diffusion-necti
loss_type: l2
noise_schedule: cosine
objective: pred_x0
output:
  eval_every_n_epochs: 1
  log_every_n_steps: 50
  output_dir: saved_models/hierarchical_necti
  save_best: true
  save_last: true
sampling_steps: 100
snr_scale: 1.0
stage1:
  depth: 6
  mlp_ratio: 4.0
  num_classes: 6
  num_heads: 16
stage2:
  depth: 4
  mlp_ratio: 4.0
  num_classes: 56
  num_heads: 16
  window_size: 7
time_steps: 1000
training:
  batch_size: 16
  freeze_stage1_for_stage2: true
  lr_bert: 2e-5
  lr_scheduler_type: cosine
  lr_stage1: 1e-4
  lr_stage2: 1e-4
  max_epochs: 50
  max_grad_norm: 1.0
  max_steps: 100000
  min_delta: 0.0001
  monitor: val_USS
  optimizer_type: AdamW
  patience: 10
  stage1_epochs: 30
  stage1_weight: 1.0
  stage2_epochs: 20
  stage2_weight: 1.0
  strategy: joint
  warmup_ratio: 0.05
  warmup_steps: 500
  weight_decay: 0.01