TinyLlama_v1.1-mars-peft-benchmark
/
TinyLlama_v1.1-qlora
/TinyLlama_v1.1-qlora-logiqa-r32-a2
/training_configuration.json
| { | |
| "model_id": "TinyLlama/TinyLlama_v1.1", | |
| "dataset": { | |
| "name": "LOGIQA", | |
| "dataset_id": "data/logiqa_train", | |
| "preprocess_id": "logiqa_train_deepeval" | |
| }, | |
| "peft_config": { | |
| "method": "qlora", | |
| "rank": 32, | |
| "alpha": 64, | |
| "dropout": 0.0, | |
| "bias": "none", | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "down_proj", | |
| "up_proj" | |
| ], | |
| "trainable_parameter_count": 25231360 | |
| }, | |
| "training_config": { | |
| "max_dataset_length": null, | |
| "batch_size": 64, | |
| "per_device_batch_size": 32, | |
| "gradient_accumulation_steps": 2, | |
| "learning_rate": 0.0003, | |
| "num_epochs": 3, | |
| "warmup_ratio": 0.1 | |
| }, | |
| "model_name": "TinyLlama_v1.1-qlora-logiqa-r32-a2", | |
| "output_dir": "./experiment_results/TinyLlama_v1.1-qlora/TinyLlama_v1.1-qlora-logiqa-r32-a2", | |
| "seed": 42, | |
| "timestamp": "2025-08-31T03:17:56.980068" | |
| } |