Theoistic
/

Bantam-285m

Model card Files Files and versions

Bantam-285m / training_args.json

Theoistic's picture

Upload folder using huggingface_hub

bc9042e verified 2 months ago

history blame contribute delete

1.71 kB

	{
	"dataset": "/root/lang_mix.jsonl",
	"tokenizer": "/root/tokenizers",
	"out_dir": "/root/models/run_cen_V",
	"use_hf": false,
	"hf_name": null,
	"hf_subset": null,
	"hf_split": "train",
	"hf_streaming": true,
	"hf_text_field": "text",
	"hf_messages_field": "messages",
	"shuffle_buffer_size": 10000,
	"seed": 1337,
	"init_from_checkpoint": null,
	"finetune_from": null,
	"strict_vocab_match": false,
	"save_tag": null,
	"save_every_n": 2000,
	"keep_last_k": 3,
	"save_on_improve": false,
	"improve_delta": 0.0,
	"resume_from_checkpoint": null,
	"log_loss_to_csv": false,
	"dataset_text_field": "text",
	"min_sample_token_length": 8,
	"stream_local_dataset": false,
	"local_dataset_shuffle_buffer": 2048,
	"block_count_sample_fraction": 0.02,
	"block_count_min_sample_megabytes": 32,
	"block_count_max_sample_megabytes": 512,
	"precision": "bf16",
	"optimizer": "adamw",
	"lr": 0.0003,
	"weight_decay": 0.06,
	"beta2": 0.98,
	"adam_eps": 1e-08,
	"grad_clip": 0.8,
	"optim_eps": 1e-08,
	"lr_scheduler": "cosine",
	"min_lr_ratio": 0.05,
	"muon_lr": null,
	"muon_momentum": 0.95,
	"muon_exclude_embeddings": true,
	"muon_beta1": null,
	"muon_beta2": null,
	"muon_eps": null,
	"muon_bias_correction": true,
	"muon_clip_by_layer": false,
	"muon_lr_correction": true,
	"batch_size": 4,
	"accum_steps": 16,
	"epochs": 2,
	"warmup_frac": 0.05,
	"log_every_n": 10,
	"overfit_subset": null,
	"use_gradient_checkpoint": false,
	"num_workers": 2,
	"pin_memory": true,
	"persistent_workers": true,
	"sft_mode": "lora",
	"lora_r": 64,
	"lora_alpha": 96,
	"lora_dropout": 0.05,
	"include_agent_end": true,
	"include_eos": false,
	"mask_user_queries": true
	}