{ "global_step": 40000, "epoch": 3, "train_config": { "lr": 1e-05, "weight_decay": 1e-05, "warmup_steps": 800, "max_grad_norm": 1.0, "train_llm": true, "llm_lr": 1e-05, "max_steps": 50000, "log_every_n_steps": 10, "val_every_n_steps": 5000, "generate_every_n_steps": 5000, "save_every_n_steps": 5000, "num_samples_to_generate": 16, "generation_max_new_tokens": 128, "generation_temperature": 0.4, "generation_do_sample": true, "compute_loss_on_assistant_only": true, "wandb_project": "smol-loravlm", "wandb_run_name": "base-ft-continue-3k" }, "checkpoint_config": { "checkpoint_dir": "checkpoints/smol_loravlm_base_ft_from_3000", "push_to_hub": false, "hub_model_id": "toilaluan/ai0", "hub_token": null, "hub_private_repo": true }, "model_config": { "text_ckpt": "google/embeddinggemma-300m", "image_ckpt": "google/siglip2-so400m-patch16-256", "llm_ckpt": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000/llm", "lora_rank": 16, "resume_from_checkpoint": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000", "lora_target": "qkvm", "prompt_aware": false, "lora_use_prefix": true } }