TeszenAI
/

MTP-3

Model card Files Files and versions

MTP-3 / config.yaml

teszenofficial's picture

Upload 6 files

563bb6a verified about 2 months ago

history blame contribute delete

921 Bytes

	data:
	augmentation_prob: 0.3
	corpus_path: data.jsonl
	max_text_length: 3000
	min_text_length: 30
	use_augmentation: true
	validation_split: 0.15
	format: "instruction-context-response"

	generation:
	default_max_tokens: 200
	default_repetition_penalty: 1.2
	default_temperature: 0.8
	default_top_k: 50
	default_top_p: 0.95
	min_response_length: 30
	prompt_format: "### Instrucción:\n{instruction}\n\n### Contexto:\n{context}\n\n### Respuesta:\n"

	model:
	d_ff: 4096
	d_model: 1024
	dropout: 0.1
	max_seq_len: 2048
	n_heads: 16
	n_layers: 24
	vocab_size: 8000

	training:
	accumulation_steps: 8
	batch_size: 2
	epochs: 30
	label_smoothing: 0.1
	learning_rate: 0.0003
	max_grad_norm: 1.0
	min_delta: 0.0005
	min_lr: 1.0e-06
	num_threads: 4
	patience: 7
	save_every: 3
	use_amp: true
	use_lr_scheduler: true
	warmup_steps: 500
	weight_decay: 0.1