Upload folder using huggingface_hub

Files changed (6) hide show

config.json ADDED Viewed

+{
+  "freeze_image_encoder": true,
+  "image_model_name": "google/siglip2-so400m-patch16-256",
+  "llm_model_name": "Qwen/Qwen3-0.6B",
+  "lora_attn_implementation": "sdpa",
+  "lora_dropout": 0.1,
+  "lora_hidden_size": 768,
+  "lora_intermediate_size": 3072,
+  "lora_mlp_ratio": 4,
+  "lora_num_attention_heads": 16,
+  "lora_num_layers": 16,
+  "lora_rank": 16,
+  "lora_target": "qkvm",
+  "lora_use_prefix": true,
+  "model_type": "vision_lora",
+  "prompt_aware": false,
+  "text_model_name": null,
+  "transformers_version": "4.57.3"
+}

llm/config.json ADDED Viewed

+{
+  "architectures": [
+    "LoraQwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

llm/generation_config.json ADDED Viewed

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "lora_target": "qkvm",
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "4.57.3"
+}

llm/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca21804136ce2dcb829f3c11645cb182ed885eb333b527935ad77c9e90a12403
+size 2384234968

lora_encoder.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:77f8b6c2666174793e7d206c811a90f80c00c3c12b42a81791005c0c66b27662
+size 616399512

training_metadata.json ADDED Viewed

+{
+  "global_step": 40000,
+  "epoch": 3,
+  "train_config": {
+    "lr": 1e-05,
+    "weight_decay": 1e-05,
+    "warmup_steps": 800,
+    "max_grad_norm": 1.0,
+    "train_llm": true,
+    "llm_lr": 1e-05,
+    "max_steps": 50000,
+    "log_every_n_steps": 10,
+    "val_every_n_steps": 5000,
+    "generate_every_n_steps": 5000,
+    "save_every_n_steps": 5000,
+    "num_samples_to_generate": 16,
+    "generation_max_new_tokens": 128,
+    "generation_temperature": 0.4,
+    "generation_do_sample": true,
+    "compute_loss_on_assistant_only": true,
+    "wandb_project": "smol-loravlm",
+    "wandb_run_name": "base-ft-continue-3k"
+  },
+  "checkpoint_config": {
+    "checkpoint_dir": "checkpoints/smol_loravlm_base_ft_from_3000",
+    "push_to_hub": false,
+    "hub_model_id": "toilaluan/ai0",
+    "hub_token": null,
+    "hub_private_repo": true
+  },
+  "model_config": {
+    "text_ckpt": "google/embeddinggemma-300m",
+    "image_ckpt": "google/siglip2-so400m-patch16-256",
+    "llm_ckpt": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000/llm",
+    "lora_rank": 16,
+    "resume_from_checkpoint": "checkpoints/smol_loravlm_base_ft/checkpoint_step_3000",
+    "lora_target": "qkvm",
+    "prompt_aware": false,
+    "lora_use_prefix": true
+  }
+}