| { |
| "overview": "Running on main branch", |
| "save_embed": true, |
| "embed_dir": "cache/embeds_old", |
| "result_dir": "results", |
| "datasets": { |
| "COCOCaptionGrounded": { |
| "root": "datasets/coco-2017", |
| "split": "train", |
| "grounded_dir": "datasets/COCOCaptionGrounded", |
| "shuffle_bbox": false, |
| "crop_augment": true, |
| "caption_augment": true, |
| "ignore_caption_id_file": "assets/nsr_val_coco_train_overlap.json" |
| }, |
| "COCOCaptionGroundedSpatial": { |
| "coco_root": "datasets/coco-2017", |
| "nsr_root": "datasets/COCOCaptionGroundedSpatial", |
| "split": "train", |
| "shuffle_bbox": false, |
| "crop_augment": true, |
| "data_augmentation": true, |
| "use_gt_bboxs": true |
| }, |
| "NSR1KSpatial": { |
| "scale_factor": 10, |
| "coco_root": "datasets/coco-2017", |
| "nsr_root": "datasets/NSR-1K", |
| "split": "train", |
| "shuffle_bbox": false, |
| "crop_augment": true, |
| "data_augmentation": true |
| } |
| }, |
| "concept_embedder_batch_size": 256, |
| "caption_embedder_batch_size": 256, |
| "model": "DiT-S", |
| "in_channel": 4, |
| "concept_in_channel": 768, |
| "y_in_channel": 768, |
| "max_in_len": 60, |
| "max_y_len": 120, |
| "scale": 2.0, |
| "noise_schedule": "linear", |
| "layout_type": "xyxy", |
| "diffusion_steps": 100, |
| "epochs": 400, |
| "global_batch_size": 256, |
| "global_seed": 0, |
| "num_workers": 4, |
| "log_every": 100, |
| "ckpt_every": 25000, |
| "t5_size": "base" |
| } |