johaanm
/

test-grader-alpha-V1.8

Model card Files Files and versions

test-grader-alpha-V1.8 / config.json

johaanm's picture

Upload 2 files

3faa04c about 2 years ago

history blame contribute delete

761 Bytes

	{
	"model_type": "llama",
	"architectures": [
	"LlamaForCausalLM"
	],
	"vocab_size": 32000,
	"hidden_size": 2048,
	"num_hidden_layers": 24,
	"num_attention_heads": 16,
	"lora_alpha": 16,
	"lora_r": 64,
	"lora_dropout": 0.1,
	"use_cache": true,
	"use_4bit": true,
	"bnb_4bit_compute_dtype": "float16",
	"bnb_4bit_quant_type": "nf4",
	"use_nested_quant": false,
	"fp16": true,
	"bf16": false,
	"per_device_train_batch_size": 16,
	"per_device_eval_batch_size": 16,
	"gradient_accumulation_steps": 1,
	"max_grad_norm": 0.5,
	"learning_rate": 0.0004,
	"weight_decay": 0.0003,
	"optim": "adamw_hf",
	"lr_scheduler_type": "linear",
	"warmup_ratio": 0.1,
	"group_by_length": true
	}