nvan13's picture
Upload folder using huggingface_hub
a0d95b0 verified
{
"architectures": [
"IbaXs_LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "float32",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"main_cfg": {
"data": {
"data_path": "./ft-training_set/math_7k.json",
"dataset_name": "math7k"
},
"hyperxs": {
"latent_feature_dim": 256,
"layer_embed_dim": 48,
"layer_norm_epsilon": 1e-05,
"lora_attn_dim": 32,
"module_embed_dim": 16,
"modules_per_layer": 7,
"n_cross_attn_tokens": 4,
"out_proj_dim": 64
},
"model": {
"base_model_name": "meta-llama/Llama-2-7b-hf"
},
"seed": 42,
"training": {
"batch_size": 128,
"bf16": false,
"bf16_full_eval": false,
"cutoff_len": 207,
"dataloader_num_workers": 4,
"dataloader_persistent_workers": true,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 1,
"eval_steps": 20,
"eval_strategy": "steps",
"gradient_checkpointing": false,
"learning_rate": 1e-05,
"logging_steps": 1,
"lr_scheduler_type": "cosine",
"max_steps": 10,
"num_train_epochs": 1.0,
"num_workers": 4,
"optim": "adamw_torch",
"output_dir": "runs",
"per_device_train_batch_size": 32,
"per_device_valid_batch_size": 64,
"report_to": "none",
"resume_from_checkpoint": false,
"save_safetensors": false,
"save_steps": 0,
"torch_compile": false,
"train_on_inputs": false,
"val_set_size": 128,
"warmup_ratio": 0.1
}
},
"max_position_embeddings": 4096,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"transformers_version": "4.57.3",
"use_cache": true,
"vocab_size": 32000
}