Spaces:

George-API
/

qwen4bit

Sleeping

App Files Files Community

George-API commited on Mar 16

Commit

493e679

verified ·

1 Parent(s): 31e9844

Upload transformers_config.json with huggingface_hub

Browse files

Files changed (1) hide show

transformers_config.json +15 -17

transformers_config.json CHANGED Viewed

@@ -9,8 +9,8 @@
   },
   "training_config": {
     "num_train_epochs": 3,
-    "per_device_train_batch_size": 4,
-    "gradient_accumulation_steps": 4,
     "learning_rate": 2e-5,
     "lr_scheduler_type": "cosine",
     "warmup_ratio": 0.03,
@@ -31,25 +31,25 @@
     "group_by_length": true
   },
   "hardware_config": {
-    "fp16": true,
-    "bf16": false,
     "gradient_checkpointing": true,
     "device_map": "auto",
-    "attn_implementation": "eager",
-    "use_flash_attention": false,
     "memory_optimization": {
       "expandable_segments": true,
-      "max_memory_fraction": 0.95
     }
   },
   "quantization_config": {
     "load_in_4bit": true,
-    "bnb_4bit_compute_dtype": "float16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": true
   },
   "lora_config": {
-    "r": 16,
     "lora_alpha": 32,
     "lora_dropout": 0.05,
     "bias": "none",
@@ -87,15 +87,13 @@
       "allgather_partitions": true,
       "allgather_no_copy": true
     },
-    "gradient_accumulation_steps": 4,
     "gradient_clipping": 0.3,
     "fp16": {
-      "enabled": true,
-      "loss_scale": 0,
-      "loss_scale_window": 1000,
-      "initial_scale_power": 16,
-      "hysteresis": 2,
-      "min_loss_scale": 1
     },
     "optimizer": {
       "type": "AdamW",
@@ -118,7 +116,7 @@
     "train_batch_size": "auto",
     "train_micro_batch_size_per_gpu": "auto",
     "wall_clock_breakdown": false,
-    "communication_data_type": "fp16",
     "comms_logger": {
       "enabled": false
     },

   },
   "training_config": {
     "num_train_epochs": 3,
+    "per_device_train_batch_size": 3,
+    "gradient_accumulation_steps": 2,
     "learning_rate": 2e-5,
     "lr_scheduler_type": "cosine",
     "warmup_ratio": 0.03,
     "group_by_length": true
   },
   "hardware_config": {
+    "fp16": false,
+    "bf16": true,
     "gradient_checkpointing": true,
     "device_map": "auto",
+    "attn_implementation": "flash_attention_2",
+    "use_flash_attention": true,
     "memory_optimization": {
       "expandable_segments": true,
+      "max_memory_fraction": 0.9
     }
   },
   "quantization_config": {
     "load_in_4bit": true,
+    "bnb_4bit_compute_dtype": "bfloat16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": true
   },
   "lora_config": {
+    "r": 8,
     "lora_alpha": 32,
     "lora_dropout": 0.05,
     "bias": "none",
       "allgather_partitions": true,
       "allgather_no_copy": true
     },
+    "gradient_accumulation_steps": 2,
     "gradient_clipping": 0.3,
     "fp16": {
+      "enabled": false
+    },
+    "bf16": {
+      "enabled": true
     },
     "optimizer": {
       "type": "AdamW",
     "train_batch_size": "auto",
     "train_micro_batch_size_per_gpu": "auto",
     "wall_clock_breakdown": false,
+    "communication_data_type": "bfloat16",
     "comms_logger": {
       "enabled": false
     },