yydxlv commited on
Commit
4fc370c
·
verified ·
1 Parent(s): 084f98c

Delete training_config.yml

Browse files
Files changed (1) hide show
  1. training_config.yml +0 -70
training_config.yml DELETED
@@ -1,70 +0,0 @@
1
- config:
2
- (): colpali_engine.trainer.colmodel_training.ColModelTrainingConfig
3
- output_dir: !path ../../../models/colphi3-ba256-ckpt-5e
4
- processor:
5
- () : colpali_engine.utils.transformers_wrappers.AutoProcessorWrapper
6
- class_to_instanciate: !ext colpali_engine.models.ColPhi3_5Processor
7
- pretrained_model_name_or_path: "./models/Phi-3.5-vision-instruct"
8
- trust_remote_code: True
9
- model:
10
- (): colpali_engine.utils.transformers_wrappers.AllPurposeWrapper
11
- class_to_instanciate: !ext colpali_engine.models.ColPhi3_5
12
- pretrained_model_name_or_path: "./models/Phi-3.5-vision-instruct"
13
- # attn_implementation: "eager"
14
- torch_dtype: !ext torch.bfloat16
15
- trust_remote_code: True
16
- # device_map: "auto"
17
- # quantization_config:
18
- # (): transformers.BitsAndBytesConfig
19
- # load_in_4bit: true
20
- # bnb_4bit_quant_type: "nf4"
21
- # bnb_4bit_compute_dtype: "bfloat16"
22
- # bnb_4bit_use_double_quant: true
23
-
24
- dataset_loading_func: !ext colpali_engine.utils.dataset_transformation.load_train_set
25
- eval_dataset_loader: !import ../data/test_data.yaml
26
-
27
- # max_length: 50
28
- run_eval: true
29
-
30
- loss_func:
31
- (): colpali_engine.loss.late_interaction_losses.ColbertPairwiseCELoss
32
- tr_args:
33
- (): transformers.training_args.TrainingArguments
34
- output_dir: null
35
- overwrite_output_dir: true
36
- num_train_epochs: 5
37
- per_device_train_batch_size: 32
38
- gradient_checkpointing: true
39
- gradient_checkpointing_kwargs: { "use_reentrant": false }
40
- # gradient_checkpointing: true
41
- # 6 x 8 gpus = 48 batch size
42
- # gradient_accumulation_steps: 4
43
- per_device_eval_batch_size: 8
44
- eval_strategy: "steps"
45
- dataloader_num_workers: 8
46
- # bf16: true
47
- save_steps: 500
48
- logging_steps: 10
49
- eval_steps: 1000
50
- warmup_steps: 100
51
- learning_rate: 5e-4
52
- save_total_limit: 1
53
- save_safetensors: False
54
- # resume_from_checkpoint: true
55
- # optim: "paged_adamw_8bit"
56
- # wandb logging
57
- # wandb_project: "colqwen2"
58
- # run_name: "colqwen2-ba32-nolora"
59
- report_to: "wandb"
60
- peft_config:
61
- (): peft.LoraConfig
62
- r: 32
63
- lora_alpha: 32
64
- lora_dropout: 0.1
65
- init_lora_weights: "gaussian"
66
- bias: "none"
67
- task_type: "FEATURE_EXTRACTION"
68
- target_modules: '(.*(model).*(down_proj|gate_up_proj|up_proj|k_proj|q_proj|v_proj|o_proj|qkv_proj|fc1|fc2|lm_head|img_projection.0|img_projection.2).*$)'
69
- # target_modules: '(.*(model).*(down_proj|gate_up_proj|up_proj|k_proj|q_proj|v_proj|o_proj|qkv_proj|fc1|fc2).*$)'
70
-