File size: 1,236 Bytes
6e45475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
data_cfgs:
  eval_data_files: null
  eval_datasets: null
  eval_optional_args: []
  eval_size: null
  eval_split: null
  eval_subset: null
  eval_template: null
  train_data_files: 8713_tokenized_v2.pt
  train_datasets: /aifs4su/yaodong/datasets/llava_annotated
  train_optional_args: []
  train_size: null
  train_split: train
  train_subset: null
  train_template: ANYTHING_TI2TI
logger_cfgs:
  cache_dir: null
  log_project: align-anything
  log_run_name: sft
  log_type: wandb
  output_dir: ../outputs/sft_chameleon_0727_0802_v2.1_1e-4
  save_interval: 500.0
model_cfgs:
  model_max_length: 4096
  model_name_or_path: /aifs4su/yaodong/projects/hantao/anole/facilitating_image_generation/model/chameleon_hf_0727
  trust_remote_code: true
special_tokens: null
train_cfgs:
  adam_betas:
  - 0.9
  - 0.95
  adam_epsilon: 1.0e-08
  bf16: true
  ds_cfgs: ds_z3_config.json
  epochs: 3
  eval_interval: 1000
  eval_strategy: steps
  fp16: false
  freeze_language_model: false
  gradient_accumulation_steps: 2.0
  gradient_checkpointing: true
  learning_rate: 0.0001
  lr_scheduler_type: cosine
  lr_warmup_ratio: 0.03
  max_grad_norm: 1.0
  per_device_eval_batch_size: 2.0
  per_device_train_batch_size: 2.0
  seed: 42
  weight_decay: 0.0