Spaces:

George-API
/

qwen4bit

Sleeping

App Files Files Community

qwen4bit / transformers_config.json

George-API

Upload transformers_config.json with huggingface_hub

0a1769d verified 10 months ago

raw

history blame

2.96 kB

	{
	"model_config": {
	"model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
	"use_cache": false,
	"rope_scaling": {
	"type": "dynamic",
	"factor": 2.0
	}
	},
	"training_config": {
	"num_train_epochs": 3,
	"per_device_train_batch_size": 4,
	"gradient_accumulation_steps": 4,
	"learning_rate": 2e-5,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.03,
	"weight_decay": 0.01,
	"optim": "adamw_torch",
	"max_grad_norm": 0.3,
	"max_seq_length": 2048,
	"logging_steps": 10,
	"save_steps": 200,
	"save_total_limit": 3,
	"evaluation_strategy": "steps",
	"eval_steps": 200,
	"load_best_model_at_end": true,
	"output_dir": "fine_tuned_model",
	"disable_tqdm": false,
	"report_to": ["tensorboard"],
	"logging_first_step": true,
	"dataloader_num_workers": 4
	},
	"hardware_config": {
	"fp16": true,
	"bf16": false,
	"gradient_checkpointing": true,
	"device_map": "auto",
	"attn_implementation": "eager",
	"use_flash_attention": false,
	"memory_optimization": {
	"expandable_segments": true
	}
	},
	"quantization_config": {
	"load_in_4bit": true,
	"bnb_4bit_compute_dtype": "float16",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true
	},
	"lora_config": {
	"r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"bias": "none",
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	]
	},
	"dataset_config": {
	"sort_by_field": "prompt_number",
	"max_tokens": 2048,
	"text_field": "conversations",
	"training_phase_only": true,
	"pre_tokenized": true,
	"input_ids_field": "input_ids",
	"skip_tokenization": true
	},
	"deepspeed_config": {
	"zero_optimization": {
	"stage": 2,
	"offload_optimizer": {
	"device": "cpu",
	"pin_memory": true
	},
	"contiguous_gradients": true,
	"overlap_comm": true,
	"reduce_scatter": true,
	"reduce_bucket_size": 5e8,
	"allgather_bucket_size": 5e8
	},
	"gradient_accumulation_steps": 4,
	"gradient_clipping": 0.3,
	"fp16": {
	"enabled": true,
	"loss_scale": 0,
	"loss_scale_window": 1000,
	"initial_scale_power": 16,
	"hysteresis": 2,
	"min_loss_scale": 1
	},
	"optimizer": {
	"type": "AdamW",
	"params": {
	"lr": 2e-5,
	"betas": [0.9, 0.999],
	"eps": 1e-8,
	"weight_decay": 0.01
	}
	},
	"activation_checkpointing": {
	"partition_activations": true,
	"cpu_checkpointing": true,
	"contiguous_memory_optimization": true,
	"number_checkpoints": null,
	"synchronize_checkpoint_boundary": false,
	"profile": false
	},
	"steps_per_print": 10,
	"train_batch_size": "auto",
	"train_micro_batch_size_per_gpu": "auto",
	"wall_clock_breakdown": false
	}
	}