NiCEtmtm
/

Llama3_kw_gen_base

text-generation-inference

Model card Files Files and versions

Llama3_kw_gen_base / config.json

NiCEtmtm's picture

Update config.json

29ac941 verified almost 2 years ago

history blame contribute delete

1.32 kB

	{
	"model_type": "llama",
	"alpha_pattern": {},
	"auto_mapping": null,
	"base_model_name_or_path": "unsloth/llama-3-8b-bnb-4bit",
	"bias": "none",
	"fan_in_fan_out": false,
	"inference_mode": true,
	"init_lora_weights": true,
	"layer_replication": null,
	"layers_pattern": null,
	"layers_to_transform": null,
	"loftq_config": {},
	"lora_alpha": 16,
	"lora_dropout": 0,
	"megatron_config": null,
	"megatron_core": "megatron.core",
	"modules_to_save": [
	"lm_head",
	"embed_tokens"
	],
	"peft_type": "LORA",
	"r": 16,
	"rank_pattern": {},
	"revision": "unsloth",
	"target_modules": [
	"k_proj",
	"gate_proj",
	"q_proj",
	"up_proj",
	"o_proj",
	"down_proj",
	"v_proj"
	],
	"task_type": "CAUSAL_LM",
	"use_dora": false,
	"use_rslora": false,
	"hidden_size": 4096,
	"num_attention_heads": 32,
	"num_hidden_layers": 24,
	"intermediate_size": 16384,
	"max_position_embeddings": 512,
	"vocab_size": 32000,
	"layer_norm_eps": 1e-5,
	"initializer_range": 0.02,
	"train_batch_size": 2,
	"gradient_accumulation_steps": 4,
	"warmup_steps": 5,
	"max_steps": 60,
	"learning_rate": 0.0002,
	"fp16": true,
	"bf16": false,
	"logging_steps": 1,
	"optim": "adamw_8bit",
	"weight_decay": 0.01,
	"lr_scheduler_type": "linear",
	"seed": 3407,
	"output_dir": "outputs"
	}