jbae1213
/

fiqa-test

Text Generation

Model card Files Files and versions

fiqa-test / training_config.json

jbae1213's picture

Upload training_config.json with huggingface_hub

2e21175 verified 19 days ago

history blame contribute delete

1.36 kB

	{
	"stage": "sft",
	"do_train": true,
	"finetuning_type": "lora",
	"model": "Qwen/Qwen3-4B-Instruct-2507",
	"dataset": "llamafactory/fiqa",
	"max_samples": null,
	"eval_dataset": null,
	"train_dataset_num_samples": 5500,
	"eval_dataset_num_samples": null,
	"quantization_bit": 4,
	"load_in_4bit": true,
	"load_in_16bit": false,
	"lora_rank": 64,
	"lora_r": 64,
	"lora_alpha": 128,
	"lora_dropout": 0.05,
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	],
	"bias": "none",
	"use_gradient_checkpointing": "unsloth",
	"random_state": 3407,
	"learning_rate": 0.00015,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.05,
	"weight_decay": 0.0,
	"adam_beta2": 0.999,
	"num_train_epochs": 4.0,
	"max_steps": -1,
	"per_device_train_batch_size": 4,
	"gradient_accumulation_steps": 4,
	"seq_len": 2048,
	"max_seq_length": 2048,
	"logging_steps": 20,
	"save_strategy": "steps",
	"save_steps": 500,
	"save_total_limit": 2,
	"evaluation_strategy": "no",
	"eval_steps": null,
	"load_best_model_at_end": false,
	"bf16": true,
	"report_to": "none",
	"dataset_num_proc": 4,
	"seed": 3407,
	"output_dir": "/root/jb/personas/finance/fiqa/finance_lora_unsloth_output/20260305_020931",
	"adam_beta1": 0.9,
	"packing": false,
	"dataset_text_field": "text"
	}