mit-han-lab
/

foreact-pretrained

visualforesight

Model card Files Files and versions

foreact-pretrained / config.json

zhuoyang20's picture

Upload folder using huggingface_hub

63b4402 verified about 2 months ago

history blame contribute delete

941 Bytes

	{
	"_gradient_checkpointing": false,
	"architectures": [
	"VisualForesight"
	],
	"attn_implementation": null,
	"diffusion_model_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
	"dtype": "bfloat16",
	"in_channels": 32,
	"input_size": [
	15,
	20
	],
	"max_input_text_tokens": 256,
	"mllm_id": "google/gemma-2-2b-it",
	"model_type": "visualforesight",
	"modules_to_freeze": [
	"vae",
	"mllm_backbone"
	],
	"modules_to_unfreeze": [],
	"noise_scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
	"scheduler_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers",
	"system_prompt": "You are a robot and should focus on your actions. Generate a new image that meets the user's instruction while maintaining consistency with the original input where appropriate.",
	"transformers_version": "4.57.1",
	"vae_downsample_f": 32,
	"vae_id": "Efficient-Large-Model/Sana_1600M_512px_diffusers"
	}