--- library_name: peft base_model: athirdpath/BigMistral-13b --- base_model: athirdpath/BigMistral-13b model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer is_mistral_derived_model: true load_in_4bit: true datasets: - path: glueLORA2.jsonl - type: alpaca val_set_size: 0.07 adapter: qlora sequence_len: 4096 sample_packing: true pad_to_sequence_len: true lora_r: 512 lora_alpha: 32 lora_dropout: 0.04 lora_target_linear: true gradient_accumulation_steps: 6 micro_batch_size: 3 eval_batch_size: 3 num_epochs: 4 optimizer: adamw_bnb_8bit lr_scheduler: cosine learning_rate: 0.00005 bf16: true gradient_checkpointing: true flash_attention: true warmup_steps: 10 weight_decay: 0.00001