George-API
/

training-scripts

Model card Files Files and versions

xet

Community

George-API commited on Mar 11

Commit

8d5f419

verified ·

1 Parent(s): e278512

Add: Configuration file for training

Browse files

Files changed (1) hide show

transformers_config.json +72 -0

transformers_config.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "model_config": {
+    "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
+    "use_cache": false,
+    "rope_scaling": {
+      "type": "dynamic",
+      "factor": 2.0
+    }
+  },
+  "training_config": {
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 2,
+    "gradient_accumulation_steps": 4,
+    "learning_rate": 2e-5,
+    "lr_scheduler_type": "cosine",
+    "warmup_ratio": 0.03,
+    "weight_decay": 0.01,
+    "optim": "adamw_torch",
+    "max_grad_norm": 0.3,
+    "max_seq_length": 2048,
+    "logging_steps": 10,
+    "save_steps": 200,
+    "save_total_limit": 3,
+    "evaluation_strategy": "steps",
+    "eval_steps": 200,
+    "load_best_model_at_end": true,
+    "output_dir": "fine_tuned_model",
+    "disable_tqdm": false,
+    "report_to": ["tensorboard"],
+    "logging_first_step": true
+  },
+  "hardware_config": {
+    "fp16": true,
+    "bf16": false,
+    "gradient_checkpointing": true,
+    "device_map": "auto",
+    "use_flash_attention": false,
+    "attn_implementation": "eager"
+  },
+  "quantization_config": {
+    "load_in_4bit": true,
+    "bnb_4bit_compute_dtype": "float16",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": true
+  },
+  "lora_config": {
+    "r": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "bias": "none",
+    "target_modules": [
+      "q_proj",
+      "k_proj",
+      "v_proj",
+      "o_proj",
+      "gate_proj",
+      "up_proj",
+      "down_proj"
+    ]
+  },
+  "dataset_config": {
+    "sort_by_field": "prompt_number",
+    "sort_direction": "ascending",
+    "max_tokens": 2048,
+    "text_field": "conversations",
+    "shuffle_seed": 42,
+    "training_phase_only": true,
+    "pre_tokenized": true,
+    "input_ids_field": "input_ids",
+    "skip_tokenization": true
+  }
+}