File size: 2,594 Bytes
cb40fbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
  "model_id": "google/siglip2-base-patch16-naflex",
  "dataset": "apartments",
  "apartments_jsonl": "data/apartments/question-image-dataset_labeled_expanded.jsonl",
  "apartments_images_dir": "data/apartments/images",
  "fmnist_root": "data",
  "output_root": "models",
  "run_name": null,
  "batch_size": 20,
  "gradient_accumulation_steps": 1,
  "stack_size": 1,
  "epochs": 2,
  "max_steps": null,
  "eval_interval": 1,
  "eval_batches": 2,
  "checkpoint_interval": 1000,
  "checkpoint_mode": "max_val_acc",
  "val_ratio": 0.1,
  "seed": 42,
  "apartments_train_group_limit": null,
  "apartments_train_question_limit": null,
  "apartments_question_split_mode": "disjoint",
  "apartments_hard_negative_strategy": "within_group_overlap",
  "apartments_hard_negative_topk": 0,
  "apartments_hard_negative_oversample_factor": 0,
  "apartments_augment_images": true,
  "apartments_augment_add_prob": 0.35,
  "apartments_augment_max_added_images": 3,
  "apartments_augment_shuffle_prob": 0.5,
  "apartments_augment_repeat_prob": 0.25,
  "apartments_augment_repeat_max_images": 2,
  "apartments_augment_rotate_prob": 0.5,
  "apartments_augment_rotate_degrees": 5.0,
  "apartments_augment_color_prob": 0.8,
  "apartments_augment_brightness_delta": 0.1,
  "apartments_augment_saturation_delta": 0.1,
  "num_workers_train": 4,
  "num_workers_val": 2,
  "text_max_length": 64,
  "image_max_num_patches": null,
  "lr_backbone": 1e-6,
  "lr_head": 0.0002,
  "weight_decay": 0.01,
  "freeze_backbone": false,
  "freeze_backbone_epochs": 0,
  "freeze_backbone_steps": null,
  "backbone_trainable_scope": "full",
  "attention_heads": 2,
  "attention_dropout": 0.3,
  "image_self_attention_layers": 0,
  "image_dropout": 0.15,
  "normalize_embeddings": true,
  "set_context_mode": "mean",
  "aggregation_mode": "attention",
  "head_feature_mode": "matching",
  "late_interaction_topk": 8,
  "head_hidden_dim": 768,
  "head_num_layers": 2,
  "head_dropout": 0.2,
  "head_activation": "swiglu",
  "label_smoothing": 0.05,
  "alignment_loss_weight": 0.05,
  "alignment_loss_logit_scale": 18.0,
  "alignment_loss_hard_negative_topk": 1,
  "compile_model": false,
  "compile_fallback_to_eager": true,
  "backbone_torch_dtype": "auto",
  "attn_implementation": null,
  "resume_path": null,
  "min_free_space_gb": 1.0,
  "use_wandb": true,
  "wandb_project": "flash-judge",
  "wandb_entity": null,
  "wandb_run_name": null,
  "save_artifacts": true,
  "push_to_hub": true,
  "hub_org": "FuncAI",
  "hub_repo_name": "FlashJudge7",
  "hub_private": false,
  "hub_token": null,
  "device": null,
  "use_amp": true
}