fVLM-1.7B / config.json
sanps's picture
Upload fVLM-1.7B: Foveated Vision-Language Model (Stage 3 DPO)
78b1e06 verified
raw
history blame contribute delete
504 Bytes
{
"model_type": "foveated_vlm",
"architectures": [
"FoveatedVLM"
],
"llm_name": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
"dino_name": "facebook/dinov2-small",
"llm_dim": 2048,
"dino_dim": 384,
"query_dim": 384,
"visual_scale": 0.14,
"lambda_coarse": 0.0,
"deep_query": true,
"total_params": 1835967616,
"training_stages": [
"Stage 1: Visual Alignment (4.3h, 31250 steps)",
"Stage 2: Vision-Language SFT (9.5h, 31250 steps)",
"Stage 3: DPO (1.9h, 2593 steps)"
]
}