D4niel0s commited on Sep 21, 2025

Commit

7b07335

verified ·

1 Parent(s): ae29002

Upload 40 files

Browse files

Upload test inference files for all experiments

Files changed (41) hide show

.gitattributes +20 -0
evaluations/cayley_BLIP_no_sagpool/best.json +3 -0
evaluations/cayley_BLIP_no_sagpool/best.out +114 -0
evaluations/cayley_BLIP_no_sagpool/last.json +3 -0
evaluations/cayley_BLIP_no_sagpool/last.out +114 -0
evaluations/cayley_CLIP_no_sagpool/best.json +3 -0
evaluations/cayley_CLIP_no_sagpool/best.out +114 -0
evaluations/cayley_CLIP_no_sagpool/last.json +3 -0
evaluations/cayley_CLIP_no_sagpool/last.out +114 -0
evaluations/cayley_global_sagpool/best.json +3 -0
evaluations/cayley_global_sagpool/best.out +115 -0
evaluations/cayley_global_sagpool/last.json +3 -0
evaluations/cayley_global_sagpool/last.out +115 -0
evaluations/cayley_hierarchical_sagpool/best.json +3 -0
evaluations/cayley_hierarchical_sagpool/best.out +116 -0
evaluations/cayley_hierarchical_sagpool/last.json +3 -0
evaluations/cayley_hierarchical_sagpool/last.out +116 -0
evaluations/cayley_no_sagpool/best.json +3 -0
evaluations/cayley_no_sagpool/best.out +114 -0
evaluations/cayley_no_sagpool/last.json +3 -0
evaluations/cayley_no_sagpool/last.out +114 -0
evaluations/mmg_BLIP_global_sagpool/best.json +3 -0
evaluations/mmg_BLIP_global_sagpool/best.out +130 -0
evaluations/mmg_BLIP_global_sagpool/last.json +3 -0
evaluations/mmg_BLIP_global_sagpool/last.out +130 -0
evaluations/mmg_CLIP_global_sagpool/best.json +3 -0
evaluations/mmg_CLIP_global_sagpool/best.out +130 -0
evaluations/mmg_CLIP_global_sagpool/last.json +3 -0
evaluations/mmg_CLIP_global_sagpool/last.out +130 -0
evaluations/mmg_global_sagpool/best.json +3 -0
evaluations/mmg_global_sagpool/best.out +130 -0
evaluations/mmg_global_sagpool/last.json +3 -0
evaluations/mmg_global_sagpool/last.out +130 -0
evaluations/mmg_hierarchical_sagpool/best.json +3 -0
evaluations/mmg_hierarchical_sagpool/best.out +131 -0
evaluations/mmg_hierarchical_sagpool/last.json +3 -0
evaluations/mmg_hierarchical_sagpool/last.out +131 -0
evaluations/mmg_no_sagpool/best.json +3 -0
evaluations/mmg_no_sagpool/best.out +128 -0
evaluations/mmg_no_sagpool/last.json +3 -0
evaluations/mmg_no_sagpool/last.out +128 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_BLIP_no_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_BLIP_no_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_CLIP_no_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_CLIP_no_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_global_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_global_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_hierarchical_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_hierarchical_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_no_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/cayley_no_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_BLIP_global_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_BLIP_global_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_CLIP_global_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_CLIP_global_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_global_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_global_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_hierarchical_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_hierarchical_sagpool/last.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_no_sagpool/best.json filter=lfs diff=lfs merge=lfs -text
+evaluations/mmg_no_sagpool/last.json filter=lfs diff=lfs merge=lfs -text

evaluations/cayley_BLIP_no_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e36f1232aa27f47964d73be0678e24cd8e320694a7d8b9dda726251cbc82d255
+size 20396995

evaluations/cayley_BLIP_no_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_BLIP_embeds',
+ 'embeds_type': 'BLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/BLIP_cayley_none_checks/best_ckpt_update_72000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/BLIP_cayley_none_checks/best_ckpt_update_72000.pt to model
+Validation VQA accuracy: 0.4275
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_BLIP_no_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_BLIP_no_sagpool/best.json

evaluations/cayley_BLIP_no_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46b832fccf61dae709dc95ae5208e5285c1b25b6c655043261fb0a3f0485ed55
+size 20400295

evaluations/cayley_BLIP_no_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_BLIP_embeds',
+ 'embeds_type': 'BLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/BLIP_cayley_none_checks/ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/BLIP_cayley_none_checks/ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4283
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_BLIP_no_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_BLIP_no_sagpool/last.json

evaluations/cayley_CLIP_no_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0069f006c530d5a047e5cb03387bec8468518127b5be3f5497daa8156fec0aae
+size 20390999

evaluations/cayley_CLIP_no_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_CLIP_embeds',
+ 'embeds_type': 'CLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_none_CLIP_checks/best_ckpt_update_75000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 512,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(528, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_none_CLIP_checks/best_ckpt_update_75000.pt to model
+Validation VQA accuracy: 0.4127
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_CLIP_no_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_CLIP_no_sagpool/best.json

evaluations/cayley_CLIP_no_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c73da458e6de2517a3c588f0f648cbad1e25f5356c407a6df793b2086a442bde
+size 20420796

evaluations/cayley_CLIP_no_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_CLIP_embeds',
+ 'embeds_type': 'CLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_none_CLIP_checks/ckpt_update_77000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 512,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(528, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_none_CLIP_checks/ckpt_update_77000.pt to model
+Validation VQA accuracy: 0.4128
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_CLIP_no_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_CLIP_no_sagpool/last.json

evaluations/cayley_global_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa8372cfaf6c6dd73b28c2bdaac40d6549b946aa82fcc63e6bb7575144b1ff24
+size 20405034

evaluations/cayley_global_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,115 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_glob_sagpool_checks/best_ckpt_update_75000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_glob_sagpool_checks/best_ckpt_update_75000.pt to model
+Validation VQA accuracy: 0.4347
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_global_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_global_sagpool/best.json

evaluations/cayley_global_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e71d2c4363cdd6ba87934c6b071e5da7020122c539818bb0222dae54a734af7f
+size 20405668

evaluations/cayley_global_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,115 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_glob_sagpool_checks/ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_glob_sagpool_checks/ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4348
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_global_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_global_sagpool/last.json

evaluations/cayley_hierarchical_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbb55ec23cba79b9a9bb14946a26e689821105317a50ab9c70ace60705933115
+size 20424873

evaluations/cayley_hierarchical_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,116 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_hierarchical_sagpool_checks/best_ckpt_update_72000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'hierarchical'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0): None
+    (1-2): 2 x SAGPooling(GATConv, 512, ratio=0.7, multiplier=1.0)
+    (3): SAGPooling(GATConv, 512, ratio=0.8, multiplier=1.0)
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_hierarchical_sagpool_checks/best_ckpt_update_72000.pt to model
+Validation VQA accuracy: 0.4362
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_hierarchical_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_hierarchical_sagpool/best.json

evaluations/cayley_hierarchical_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:928cb860c4d53148738cd4256bfe7b6ab367c7b13ef1c1191f6f3e0888cfd7ca
+size 20435239

evaluations/cayley_hierarchical_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,116 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_hierarchical_sagpool_checks/interrupt_ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'hierarchical'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0): None
+    (1-2): 2 x SAGPooling(GATConv, 512, ratio=0.7, multiplier=1.0)
+    (3): SAGPooling(GATConv, 512, ratio=0.8, multiplier=1.0)
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_hierarchical_sagpool_checks/interrupt_ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4369
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_hierarchical_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_hierarchical_sagpool/last.json

evaluations/cayley_no_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c34bcb14ab749daa8b93cfe9327523e8bad65947216d6cf289a2c1ba1ab46018
+size 20416693

evaluations/cayley_no_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_checkpoints/best_ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_checkpoints/best_ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4461
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_no_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_no_sagpool/best.json

evaluations/cayley_no_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4378eb4ca3990dd2d557d8a06e22f4313b421973de09f272cf29fe9b33360a4d
+size 20416736

evaluations/cayley_no_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,114 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'cayley',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/cayley_checkpoints/ckpt_update_82000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 0,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 768,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(784, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/cayley_checkpoints/ckpt_update_82000.pt to model
+Validation VQA accuracy: 0.4460
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_no_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/cayley_no_sagpool/last.json

evaluations/mmg_BLIP_global_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f9d90eea8e2a8055eb1c5fb6616fc77986a5cfc1b3ac57da0c5b2d4ee90531b
+size 20399753

evaluations/mmg_BLIP_global_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_BLIP_embeds',
+ 'embeds_type': 'BLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/BLIP_mmg_global_checks/best_ckpt_update_78000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/BLIP_mmg_global_checks/best_ckpt_update_78000.pt to model
+Validation VQA accuracy: 0.4382
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_BLIP_global_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_BLIP_global_sagpool/best.json

evaluations/mmg_BLIP_global_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af7958b41643776c3347ef0abc4b3622652c077210fc7c94e34e2defc360710
+size 20396948

evaluations/mmg_BLIP_global_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_BLIP_embeds',
+ 'embeds_type': 'BLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/BLIP_mmg_global_checks/ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/BLIP_mmg_global_checks/ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4385
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_BLIP_global_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_BLIP_global_sagpool/last.json

evaluations/mmg_CLIP_global_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e34e3d049893b3f0e38259bcf1135ed2968fb2d8d4079071953165dba2efb4fe
+size 20368214

evaluations/mmg_CLIP_global_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_CLIP_embeds',
+ 'embeds_type': 'CLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_global_CLIP_checks/best_ckpt_update_78000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 517,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(533, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_global_CLIP_checks/best_ckpt_update_78000.pt to model
+Validation VQA accuracy: 0.3795
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_CLIP_global_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_CLIP_global_sagpool/best.json

evaluations/mmg_CLIP_global_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:665001e21f6099c627f8a37f1074f39350de33cd1ef51274b3543eaaceb19b61
+size 20306809

evaluations/mmg_CLIP_global_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_CLIP_embeds',
+ 'embeds_type': 'CLIP',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_global_CLIP_checks/ckpt_update_83000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 517,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(533, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_global_CLIP_checks/ckpt_update_83000.pt to model
+Validation VQA accuracy: 0.3834
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_CLIP_global_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_CLIP_global_sagpool/last.json

evaluations/mmg_global_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ce89b74930f1f6c965ebce23931c0bbb0b8cf26ce66af5b6e3d784be5abc70e
+size 20417806

evaluations/mmg_global_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_glob_sagpool_checks/best_ckpt_update_72000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_glob_sagpool_checks/best_ckpt_update_72000.pt to model
+Validation VQA accuracy: 0.4479
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_global_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_global_sagpool/best.json

evaluations/mmg_global_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ff2ce81a5e3cdb9565d4586d6b2ee9365f6b2d4751c3e9d81f62d65d8f3c450
+size 20418958

evaluations/mmg_global_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,130 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_glob_sagpool_checks/ckpt_update_81000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'global'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (global_sagpool): SAGPooling(GATConv, 512, ratio=0.5, multiplier=1.0)
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_glob_sagpool_checks/ckpt_update_81000.pt to model
+Validation VQA accuracy: 0.4486
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_global_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_global_sagpool/last.json

evaluations/mmg_hierarchical_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a50803fb0e60ddb2303d6e4452c6771b4484550a8140a08bf642d779005dd367
+size 20404655

evaluations/mmg_hierarchical_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,131 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_hierarchical_sagpool_checks/best_ckpt_update_78000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'hierarchical'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0): None
+    (1-2): 2 x SAGPooling(GATConv, 512, ratio=0.7, multiplier=1.0)
+    (3): SAGPooling(GATConv, 512, ratio=0.8, multiplier=1.0)
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_hierarchical_sagpool_checks/best_ckpt_update_78000.pt to model
+Validation VQA accuracy: 0.4327
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_hierarchical_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_hierarchical_sagpool/best.json

evaluations/mmg_hierarchical_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5998632ef9783bd4548c790fe56f1b573457c07f075cdf9e14427843c83c084b
+size 20406320

evaluations/mmg_hierarchical_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,131 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'embeds_type': 'BERT/BEiT',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_hierarchical_sagpool_checks/ckpt_update_79000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'hierarchical'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0): None
+    (1-2): 2 x SAGPooling(GATConv, 512, ratio=0.7, multiplier=1.0)
+    (3): SAGPooling(GATConv, 512, ratio=0.8, multiplier=1.0)
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_hierarchical_sagpool_checks/ckpt_update_79000.pt to model
+Validation VQA accuracy: 0.4324
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_hierarchical_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_hierarchical_sagpool/last.json

evaluations/mmg_no_sagpool/best.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:113f998cef7e6d996a10fd33dc22540443d236a0f437123d99116e20f49c7ccb
+size 20432378

evaluations/mmg_no_sagpool/best.out ADDED Viewed

	@@ -0,0 +1,128 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_checkpoints/best_ckpt_update_72000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_checkpoints/best_ckpt_update_72000.pt to model
+Validation VQA accuracy: 0.4399
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_no_sagpool/best.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_no_sagpool/best.json

evaluations/mmg_no_sagpool/last.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29182499a32406e88a52e1a17b05e908326b6c7daddcaa4347e8f2eed20d53ca
+size 20439928

evaluations/mmg_no_sagpool/last.out ADDED Viewed

	@@ -0,0 +1,128 @@

+Config:
+{'adamw_lr': 5e-05,
+ 'adamw_weight_decay': 0.05,
+ 'add_lap_pe': True,
+ 'ans2idx_path': '/home/yandex/MLWG2025/danielvolkov/Documents/GAMER/data/VQA/answer2idx.json',
+ 'batch_size': 32,
+ 'checkpoint_dir': '/home/yandex/MLWG2025/danielvolkov/checkpoints',
+ 'checkpoint_interval_updates': 1000,
+ 'dataset_path': '/home/yandex/MLWG2025/danielvolkov/datasets/VQA_w_embed',
+ 'grad_acc_steps': 2,
+ 'graph_construction_method': 'mmg',
+ 'lap_pe_dim': 16,
+ 'log_every_n_updates': 10,
+ 'max_grad_norm': 1.0,
+ 'num_epochs': 12,
+ 'num_fusion_nodes': 6,
+ 'num_text_global_nodes': 2,
+ 'num_workers': 8,
+ 'persistent_workers': True,
+ 'pin_memory': True,
+ 'resume_checkpoint': '/home/yandex/MLWG2025/danielvolkov/mmg_checkpoints/ckpt_update_82000.pt',
+ 'save_best': True,
+ 'self_loops_in_image_graph': True,
+ 'use_amp': True,
+ 'val_batches': 1000,
+ 'val_interval_updates': 3000,
+ 'warmup_fraction': 0.05}
+Model config:
+{'dropout': 0.2,
+ 'edge_dim': 6,
+ 'global_pool_method': 'mean',
+ 'global_sagpool_ratio': 0.5,
+ 'heads': 8,
+ 'hidden_dim': 512,
+ 'mlps_hidden_layers': 3,
+ 'node_dim': 773,
+ 'num_layers': 4,
+ 'output_dim': 3000,
+ 'pe_dim': 16,
+ 'sagpool_layer2ratio': {1: 0.7, 2: 0.7, 3: 0.8},
+ 'sagpool_mode': 'none'}
+Model:
+GraphGPSNet(
+  (node_mlp): Sequential(
+    (0): Linear(789, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (edge_mlp): Sequential(
+    (0): Linear(6, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (layers): ModuleList(
+    (0-3): 4 x GPSConv(512, conv=GINEConv(nn=Sequential(
+      (0): Linear(512, 512, bias=True)
+      (1): GELU(approximate='none')
+      (2): Dropout(p=0.2, inplace=False)
+      (3): Linear(512, 512, bias=True)
+      (4): GELU(approximate='none')
+      (5): Dropout(p=0.2, inplace=False)
+      (6): Linear(512, 512, bias=True)
+      (7): GELU(approximate='none')
+      (8): Dropout(p=0.2, inplace=False)
+      (9): Linear(512, 512, bias=True)
+      (10): GELU(approximate='none')
+      (11): Dropout(p=0.2, inplace=False)
+      (12): Linear(512, 512, bias=True)
+    )), heads=8, attn_type=multihead)
+  )
+  (pools): ModuleList(
+    (0-3): 4 x None
+  )
+  (postnet): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 512, bias=True)
+  )
+  (readout): Sequential(
+    (0): Linear(512, 512, bias=True)
+    (1): GELU(approximate='none')
+    (2): Dropout(p=0.2, inplace=False)
+    (3): Linear(512, 512, bias=True)
+    (4): GELU(approximate='none')
+    (5): Dropout(p=0.2, inplace=False)
+    (6): Linear(512, 512, bias=True)
+    (7): GELU(approximate='none')
+    (8): Dropout(p=0.2, inplace=False)
+    (9): Linear(512, 512, bias=True)
+    (10): GELU(approximate='none')
+    (11): Dropout(p=0.2, inplace=False)
+    (12): Linear(512, 3000, bias=True)
+  )
+)
+Loaded /home/yandex/MLWG2025/danielvolkov/mmg_checkpoints/ckpt_update_82000.pt to model
+Validation VQA accuracy: 0.4404
+Saved test predictions to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_no_sagpool/last.json
+Test predictions saved to /home/yandex/MLWG2025/danielvolkov/evaluations/mmg_no_sagpool/last.json