| { | |
| "global_step": 40000, | |
| "epoch": 3, | |
| "train_config": { | |
| "lr": 1e-05, | |
| "weight_decay": 1e-05, | |
| "warmup_steps": 800, | |
| "max_grad_norm": 1.0, | |
| "train_llm": true, | |
| "llm_lr": 1e-05, | |
| "max_steps": 50000, | |
| "log_every_n_steps": 10, | |
| "val_every_n_steps": 5000, | |
| "generate_every_n_steps": 5000, | |
| "save_every_n_steps": 5000, | |
| "num_samples_to_generate": 16, | |
| "generation_max_new_tokens": 128, | |
| "generation_temperature": 0.4, | |
| "generation_do_sample": true, | |
| "compute_loss_on_assistant_only": true, | |
| "wandb_project": "smol-loravlm", | |
| "wandb_run_name": "base-ft-continue-3k" | |
| }, | |
| "checkpoint_config": { | |
| "checkpoint_dir": "checkpoints/smol_loravlm_base_ft_from_3000", | |
| "push_to_hub": false, | |
| "hub_model_id": "toilaluan/ai0", | |
| "hub_token": null, | |
| "hub_private_repo": true | |
| }, | |
| "model_config": { | |
| "text_ckpt": "google/embeddinggemma-300m", | |
| "image_ckpt": "google/siglip2-so400m-patch16-256", | |
| "llm_ckpt": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000/llm", | |
| "lora_rank": 16, | |
| "resume_from_checkpoint": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000", | |
| "lora_target": "qkvm", | |
| "prompt_aware": false, | |
| "lora_use_prefix": true | |
| } | |
| } |