AbstractPhil commited on
Commit
74ff46f
·
verified ·
1 Parent(s): b11be07

Upload weights and configs - Run 20251012_040642

Browse files
weights/best_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:395f5ebc0ca3bce746bd7fce81f21d10e71341304c6e8236478064f779529919
3
- size 59515088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294bbd186f6036cf8599e3d257dae1b99f2b797e69dd10829c778672fbcc83e0
3
+ size 64195804
weights/best_model_metadata.json CHANGED
The diff for this file is too large to render. See raw diff
 
weights/david_config.json CHANGED
@@ -1,35 +1,33 @@
1
  {
2
- "name": "david_high_accuracy",
3
- "uid": "c.david.high_accuracy",
4
- "feature_dim": 512,
5
  "num_classes": 1000,
6
  "scales": [
7
- 256,
8
- 512,
9
  768,
10
  1024,
11
  1280
12
  ],
13
- "sharing_mode": "decoupled",
14
  "fusion_mode": "deep_efficiency",
15
  "use_belly": true,
16
- "belly_expand": 2.5,
17
- "shared_feature_dim": 768,
18
  "shared_layers": 2,
19
  "shared_dropout": 0.1,
20
  "fusion_temperature": 1.0,
21
  "fusion_dropout": 0.1,
22
  "tree_depth": 3,
23
- "num_experts": 5,
24
- "compression_ratio": 2,
25
  "expert_dropout": 0.1,
26
  "attention_dropout": 0.1,
27
  "progressive_training": true,
28
  "scale_warmup_epochs": {
29
- "256": 0,
30
- "512": 3,
31
- "768": 6,
32
- "1024": 9,
33
- "1280": 12
34
  }
35
  }
 
1
  {
2
+ "name": "david_clip_vit_l14",
3
+ "uid": "c.david.clip_vit_l14",
4
+ "feature_dim": 768,
5
  "num_classes": 1000,
6
  "scales": [
7
+ 384,
 
8
  768,
9
  1024,
10
  1280
11
  ],
12
+ "sharing_mode": "partial_shared",
13
  "fusion_mode": "deep_efficiency",
14
  "use_belly": true,
15
+ "belly_expand": 2.0,
16
+ "shared_feature_dim": 1024,
17
  "shared_layers": 2,
18
  "shared_dropout": 0.1,
19
  "fusion_temperature": 1.0,
20
  "fusion_dropout": 0.1,
21
  "tree_depth": 3,
22
+ "num_experts": 4,
23
+ "compression_ratio": 4,
24
  "expert_dropout": 0.1,
25
  "attention_dropout": 0.1,
26
  "progressive_training": true,
27
  "scale_warmup_epochs": {
28
+ "384": 0,
29
+ "768": 1,
30
+ "1024": 2,
31
+ "1280": 3
 
32
  }
33
  }
weights/train_config.json CHANGED
@@ -1,15 +1,21 @@
1
  {
2
  "name": "david_training",
3
- "run_id": "20251012_032356",
4
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
5
- "model_variant": "clip_vit_b16",
6
  "num_classes": 1000,
7
- "preset": "high_accuracy",
8
  "custom_config_path": null,
9
  "num_classes_override": null,
10
  "use_belly_override": null,
11
  "belly_expand_override": null,
12
  "progressive_training_override": true,
 
 
 
 
 
 
13
  "num_epochs": 20,
14
  "batch_size": 1024,
15
  "learning_rate": 0.01,
@@ -26,8 +32,8 @@
26
  "gradient_clip": 5.0,
27
  "scheduler_type": "cosine_restarts",
28
  "min_lr": 1e-06,
29
- "freeze_strategy": "performance",
30
- "freeze_threshold": 70.0,
31
  "unfreeze_on_plateau": true,
32
  "patience": 10,
33
  "track_gradients": true,
 
1
  {
2
  "name": "david_training",
3
+ "run_id": "20251012_040642",
4
  "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
5
+ "model_variant": "clip_vit_l14",
6
  "num_classes": 1000,
7
+ "preset": "clip_vit_l14",
8
  "custom_config_path": null,
9
  "num_classes_override": null,
10
  "use_belly_override": null,
11
  "belly_expand_override": null,
12
  "progressive_training_override": true,
13
+ "scale_warmup_epochs_override": {
14
+ "384": 0,
15
+ "768": 1,
16
+ "1024": 2,
17
+ "1280": 3
18
+ },
19
  "num_epochs": 20,
20
  "batch_size": 1024,
21
  "learning_rate": 0.01,
 
32
  "gradient_clip": 5.0,
33
  "scheduler_type": "cosine_restarts",
34
  "min_lr": 1e-06,
35
+ "freeze_strategy": "never",
36
+ "freeze_threshold": 90.0,
37
  "unfreeze_on_plateau": true,
38
  "patience": 10,
39
  "track_gradients": true,