File size: 542 Bytes
6d320d6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | {
"model_type": "foveated_vlm",
"architectures": [
"FoveatedVLM"
],
"llm_name": "HuggingFaceTB/SmolLM2-135M-Instruct",
"dino_name": "facebook/dinov2-small",
"llm_dim": 576,
"dino_dim": 384,
"query_dim": 384,
"visual_scale": 0.14,
"lambda_coarse": 0.0,
"deep_query": true,
"total_params": 185622528,
"training_stages": [
"Stage 1: Visual Alignment (OpenVid + WebVid + text retention)",
"Stage 2: Vision-Language SFT (Cauldron + video + text retention)",
"Stage 3: DPO (RLAIF-V preference pairs)"
]
} |