dsrivastavv commited on
Commit
47cda00
·
verified ·
1 Parent(s): b19dd82

Upload 6 files

Browse files
coco_grounded/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overview": "Running on main branch",
3
+ "save_embed": true,
4
+ "embed_dir": "cache/embeds_old",
5
+ "result_dir": "results",
6
+ "datasets": {
7
+ "COCOCaptionGrounded": {
8
+ "root": "datasets/coco-2017",
9
+ "split": "train",
10
+ "grounded_dir": "datasets/COCOCaptionGrounded",
11
+ "shuffle_bbox": false,
12
+ "crop_augment": true,
13
+ "caption_augment": true,
14
+ "ignore_caption_id_file": "assets/nsr_val_coco_train_overlap.json"
15
+ },
16
+ "COCOCaptionGroundedSpatial": {
17
+ "coco_root": "datasets/coco-2017",
18
+ "nsr_root": "datasets/COCOCaptionGroundedSpatial",
19
+ "split": "train",
20
+ "shuffle_bbox": false,
21
+ "crop_augment": true,
22
+ "data_augmentation": true,
23
+ "use_gt_bboxs": true
24
+ },
25
+ "NSR1KSpatial": {
26
+ "scale_factor": 10,
27
+ "coco_root": "datasets/coco-2017",
28
+ "nsr_root": "datasets/NSR-1K",
29
+ "split": "train",
30
+ "shuffle_bbox": false,
31
+ "crop_augment": true,
32
+ "data_augmentation": true
33
+ }
34
+ },
35
+ "concept_embedder_batch_size": 256,
36
+ "caption_embedder_batch_size": 256,
37
+ "model": "DiT-S",
38
+ "in_channel": 4,
39
+ "concept_in_channel": 768,
40
+ "y_in_channel": 768,
41
+ "max_in_len": 60,
42
+ "max_y_len": 120,
43
+ "scale": 2.0,
44
+ "noise_schedule": "linear",
45
+ "layout_type": "xyxy",
46
+ "diffusion_steps": 100,
47
+ "epochs": 400,
48
+ "global_batch_size": 256,
49
+ "global_seed": 0,
50
+ "num_workers": 4,
51
+ "log_every": 100,
52
+ "ckpt_every": 25000,
53
+ "t5_size": "base"
54
+ }
coco_grounded/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b4f7bf85f6ff2835953ec7aa460df70899720062b67538d2ce0000f8da2f81
3
+ size 73431836
grit/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overview": "Running on main branch",
3
+ "save_embed": true,
4
+ "embed_dir": "cache/pretrain_embeds_grit",
5
+ "result_dir": "results",
6
+ "datasets": {
7
+ "GRIT": {
8
+ "path": "datasets/GRIT/grit-20m",
9
+ "crop_augment": true
10
+ }
11
+ },
12
+ "concept_embedder_batch_size": 256,
13
+ "caption_embedder_batch_size": 256,
14
+ "model": "DiT-S",
15
+ "in_channel": 4,
16
+ "concept_in_channel": 768,
17
+ "y_in_channel": 768,
18
+ "max_in_len": 60,
19
+ "max_y_len": 120,
20
+ "scale": 2.0,
21
+ "noise_schedule": "linear",
22
+ "layout_type": "xyxy",
23
+ "diffusion_steps": 100,
24
+ "epochs": 400,
25
+ "global_batch_size": 256,
26
+ "global_seed": 0,
27
+ "num_workers": 12,
28
+ "log_every": 100,
29
+ "ckpt_every": 25000,
30
+ "t5_size": "base"
31
+ }
grit/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9b34b9c8f19327169e6f6188a5ff215c4f42acef0a0bb7bab6b616da055988
3
+ size 73420623
grit_ft_coco_grounded/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overview": "Running on main branch",
3
+ "save_embed": true,
4
+ "embed_dir": "cache/embeds_old",
5
+ "result_dir": "results",
6
+ "datasets": {
7
+ "COCOCaptionGrounded": {
8
+ "root": "datasets/coco-2017",
9
+ "split": "train",
10
+ "grounded_dir": "datasets/COCOCaptionGrounded",
11
+ "shuffle_bbox": false,
12
+ "crop_augment": true,
13
+ "caption_augment": true,
14
+ "ignore_caption_id_file": "assets/nsr_val_coco_train_overlap.json"
15
+ },
16
+ "COCOCaptionGroundedSpatial": {
17
+ "coco_root": "datasets/coco-2017",
18
+ "nsr_root": "datasets/COCOCaptionGroundedSpatial",
19
+ "split": "train",
20
+ "shuffle_bbox": false,
21
+ "crop_augment": true,
22
+ "data_augmentation": true,
23
+ "use_gt_bboxs": true
24
+ },
25
+ "NSR1KSpatial": {
26
+ "scale_factor": 10,
27
+ "coco_root": "datasets/coco-2017",
28
+ "nsr_root": "datasets/NSR-1K",
29
+ "split": "train",
30
+ "shuffle_bbox": false,
31
+ "crop_augment": true,
32
+ "data_augmentation": true
33
+ }
34
+ },
35
+ "concept_embedder_batch_size": 256,
36
+ "caption_embedder_batch_size": 256,
37
+ "model": "DiT-S",
38
+ "in_channel": 4,
39
+ "concept_in_channel": 768,
40
+ "y_in_channel": 768,
41
+ "max_in_len": 60,
42
+ "max_y_len": 120,
43
+ "scale": 2.0,
44
+ "noise_schedule": "linear",
45
+ "layout_type": "xyxy",
46
+ "diffusion_steps": 100,
47
+ "epochs": 400,
48
+ "global_batch_size": 256,
49
+ "global_seed": 0,
50
+ "num_workers": 4,
51
+ "log_every": 100,
52
+ "ckpt_every": 25000,
53
+ "t5_size": "base"
54
+ }
grit_ft_coco_grounded/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af27bc9ed0613d5f49d2e50e38e816621664d740bf24de04fcbc7c0941bc461e
3
+ size 73420623