| { | |
| "model_config": { | |
| "hub_model_name": "ThomasTheMaker/cadmonkey-1b-data-2", | |
| "base_model_name": "unsloth/gemma-3-1b-it-unsloth-bnb-4bit", | |
| "max_seq_length": 1024, | |
| "load_in_4bit": true, | |
| "load_in_8bit": false, | |
| "full_finetuning": false | |
| }, | |
| "dataset_config": { | |
| "datasets": [ | |
| { | |
| "name": "ThomasTheMaker/Synthetic-Object-v0" | |
| }, | |
| { | |
| "name": "ThomasTheMaker/Synthetic-Object-v1" | |
| } | |
| ], | |
| "chat_template": "gemma3", | |
| "test_objects": ["cat", "car", "tree"] | |
| }, | |
| "lora_config": { | |
| "r": 128, | |
| "alpha_multiplier": 2, | |
| "dropout": 0.05, | |
| "bias": "none", | |
| "use_gradient_checkpointing": "unsloth", | |
| "random_state": 3407, | |
| "use_rslora": false, | |
| "loftq_config": null, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "training_config": { | |
| "per_device_eval_batch_size": 0, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 8, | |
| "warmup_steps": 5, | |
| "max_steps": -1, | |
| "num_train_epochs": 3, | |
| "learning_rate": 3e-5, | |
| "weight_decay": 0.01, | |
| "lr_scheduler_type": "linear", | |
| "seed": 3407, | |
| "output_dir": "outputs", | |
| "report_to": "none", | |
| "optim": "adamw_8bit", | |
| "logging_steps": 1, | |
| "save_strategy": "steps", | |
| "save_steps": 300, | |
| "save_total_limit": 3, | |
| "eval_strategy": "no", | |
| "eval_steps": 1000 | |
| }, | |
| "inference_config": { | |
| "max_new_tokens": 1024, | |
| "temperature": 0.8, | |
| "top_p": 0.95, | |
| "top_k": 64, | |
| "do_sample": true | |
| }, | |
| "saving_config": { | |
| "save_local": true, | |
| "save_16bit": true, | |
| "save_4bit": false, | |
| "save_lora": true, | |
| "push_to_hub": true | |
| }, | |
| "logging_config": { | |
| "csv_log_enabled": true | |
| } | |
| } | |