{ "model_name": "STAR_Qwen2.5-3B_VQGAN", "model_type": "STARMultiModalityConfig", "language_model": { "model_name": "Qwen2.5-VL", "model_path": "checkpoints/Qwen2.5-VL-3B-Instruct" }, "pixel_encoder": { "model_name": "VQ_Model", "model_path": "checkpoints/VQ-Model.pt", "image_token_size": 65536, "n_embed": 512, "num_tokens": 576, "num_heads": 8 }, "pixel_adapter": { "model_name": "MLP_GELU", "depth": 2, "input_dim": 512, "n_embed": 2048 }, "stacked_ar": { "num_layers": 16 }, "pixel_output_head": { "image_token_embed": 4096, "image_token_size": 65536, "n_embed": 2048 }, "pixel_decoder": { "model_name": "LUMINA2", "model_path": "checkpoints/lumina-image2" }, "torch_dtype": "bfloat16" }