diffu-hier-win-7 / config.yaml
Pretam's picture
Upload folder using huggingface_hub
48a2caf verified
backbone: FacebookAI/xlm-roberta-large
base: hierarchical_necti
data:
data_path: /home/pretam-pg/DepNeCTI/DepNeCTI-XLMR/Trankit_Data
granularity: Finegrain
max_length: 128
num_workers: 4
use_context: true
dataset: necti_finegrain
dim_model: 1024
freeze_bert: false
gpu:
gpus: 1
precision: 16
use_gpu: true
label_hierarchy:
avyayibhava_subtypes:
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
bahuvrihi_subtypes:
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
dvandva_subtypes:
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
tatpurusa_subtypes:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
logging:
experiment_name: local-refinement-v1
logger: wandb
project_name: hierarchical-diffusion-necti
loss_type: l2
noise_schedule: cosine
objective: pred_x0
output:
eval_every_n_epochs: 1
log_every_n_steps: 50
output_dir: saved_models/hierarchical_necti
save_best: true
save_last: true
sampling_steps: 100
snr_scale: 1.0
stage1:
depth: 6
mlp_ratio: 4.0
num_classes: 6
num_heads: 16
stage2:
depth: 4
mlp_ratio: 4.0
num_classes: 56
num_heads: 16
window_size: 7
time_steps: 1000
training:
batch_size: 16
freeze_stage1_for_stage2: true
lr_bert: 2e-5
lr_scheduler_type: cosine
lr_stage1: 1e-4
lr_stage2: 1e-4
max_epochs: 50
max_grad_norm: 1.0
max_steps: 100000
min_delta: 0.0001
monitor: val_USS
optimizer_type: AdamW
patience: 10
stage1_epochs: 30
stage1_weight: 1.0
stage2_epochs: 20
stage2_weight: 1.0
strategy: joint
warmup_ratio: 0.05
warmup_steps: 500
weight_decay: 0.01