nvan13's picture
Upload folder using huggingface_hub
a0d95b0 verified
{
"architectures": [
"IbaXs_LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "float32",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"main_cfg": {
"data": {
"data_path": "ft-training_set/commonsense_15k.json",
"dataset_name": "CMS",
"val_set_size": 512
},
"hyperxs": {
"drop_out": 0.0,
"latent_feature_dim": 512,
"layer_embed_dim": 48,
"layer_norm_epsilon": 1e-05,
"lora_attn_dim": 64,
"module_embed_dim": 16,
"modules_per_layer": 7,
"n_cross_attn_tokens": 8,
"out_proj_dim": 128
},
"infer": {
"datasets": [
"boolq"
],
"eval_batch_size": 128,
"is_json": true,
"model_path": ""
},
"model": {
"base_model_name": "meta-llama/Llama-2-7b-hf",
"cutoff_len": 512,
"train_on_inputs": false
},
"run_text": "def",
"seed": 42,
"training": {
"bf16": false,
"bf16_full_eval": false,
"dataloader_num_workers": 4,
"dataloader_persistent_workers": true,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 1,
"eval_delay": 0,
"eval_steps": 1,
"eval_strategy": "steps",
"gradient_accumulation_steps": 1,
"gradient_checkpointing": false,
"gradient_checkpointing_kwargs": {
"use_reentrant": false
},
"learning_rate": 1e-36,
"load_best_model_at_end": true,
"logging_steps": 1,
"lr_scheduler_type": "cosine",
"max_steps": -1,
"num_train_epochs": 3.0,
"num_workers": 2,
"optim": "adamw_torch",
"output_dir": "exps",
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 16,
"report_to": "none",
"resume_from_checkpoint": false,
"save_safetensors": false,
"save_steps": 1.0,
"save_strategy": "steps",
"save_total_limit": 1,
"warmup_ratio": 0.1
}
},
"max_position_embeddings": 4096,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"transformers_version": "4.57.3",
"use_cache": true,
"vocab_size": 32000
}