Spaces:
Sleeping
Sleeping
Upload transformers_config.json with huggingface_hub
Browse files- transformers_config.json +7 -3
transformers_config.json
CHANGED
|
@@ -9,7 +9,7 @@
|
|
| 9 |
},
|
| 10 |
"training_config": {
|
| 11 |
"num_train_epochs": 3,
|
| 12 |
-
"per_device_train_batch_size":
|
| 13 |
"gradient_accumulation_steps": 4,
|
| 14 |
"learning_rate": 2e-5,
|
| 15 |
"lr_scheduler_type": "cosine",
|
|
@@ -27,7 +27,8 @@
|
|
| 27 |
"output_dir": "fine_tuned_model",
|
| 28 |
"disable_tqdm": false,
|
| 29 |
"report_to": ["tensorboard"],
|
| 30 |
-
"logging_first_step": true
|
|
|
|
| 31 |
},
|
| 32 |
"hardware_config": {
|
| 33 |
"fp16": true,
|
|
@@ -35,7 +36,10 @@
|
|
| 35 |
"gradient_checkpointing": true,
|
| 36 |
"device_map": "auto",
|
| 37 |
"attn_implementation": "eager",
|
| 38 |
-
"use_flash_attention": false
|
|
|
|
|
|
|
|
|
|
| 39 |
},
|
| 40 |
"quantization_config": {
|
| 41 |
"load_in_4bit": true,
|
|
|
|
| 9 |
},
|
| 10 |
"training_config": {
|
| 11 |
"num_train_epochs": 3,
|
| 12 |
+
"per_device_train_batch_size": 4,
|
| 13 |
"gradient_accumulation_steps": 4,
|
| 14 |
"learning_rate": 2e-5,
|
| 15 |
"lr_scheduler_type": "cosine",
|
|
|
|
| 27 |
"output_dir": "fine_tuned_model",
|
| 28 |
"disable_tqdm": false,
|
| 29 |
"report_to": ["tensorboard"],
|
| 30 |
+
"logging_first_step": true,
|
| 31 |
+
"dataloader_num_workers": 4
|
| 32 |
},
|
| 33 |
"hardware_config": {
|
| 34 |
"fp16": true,
|
|
|
|
| 36 |
"gradient_checkpointing": true,
|
| 37 |
"device_map": "auto",
|
| 38 |
"attn_implementation": "eager",
|
| 39 |
+
"use_flash_attention": false,
|
| 40 |
+
"memory_optimization": {
|
| 41 |
+
"expandable_segments": true
|
| 42 |
+
}
|
| 43 |
},
|
| 44 |
"quantization_config": {
|
| 45 |
"load_in_4bit": true,
|