Upload weights and configs - Run 20251012_040642
Browse files- weights/best_model.safetensors +2 -2
- weights/best_model_metadata.json +0 -0
- weights/david_config.json +13 -15
- weights/train_config.json +11 -5
weights/best_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:294bbd186f6036cf8599e3d257dae1b99f2b797e69dd10829c778672fbcc83e0
|
| 3 |
+
size 64195804
|
weights/best_model_metadata.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
weights/david_config.json
CHANGED
|
@@ -1,35 +1,33 @@
|
|
| 1 |
{
|
| 2 |
-
"name": "
|
| 3 |
-
"uid": "c.david.
|
| 4 |
-
"feature_dim":
|
| 5 |
"num_classes": 1000,
|
| 6 |
"scales": [
|
| 7 |
-
|
| 8 |
-
512,
|
| 9 |
768,
|
| 10 |
1024,
|
| 11 |
1280
|
| 12 |
],
|
| 13 |
-
"sharing_mode": "
|
| 14 |
"fusion_mode": "deep_efficiency",
|
| 15 |
"use_belly": true,
|
| 16 |
-
"belly_expand": 2.
|
| 17 |
-
"shared_feature_dim":
|
| 18 |
"shared_layers": 2,
|
| 19 |
"shared_dropout": 0.1,
|
| 20 |
"fusion_temperature": 1.0,
|
| 21 |
"fusion_dropout": 0.1,
|
| 22 |
"tree_depth": 3,
|
| 23 |
-
"num_experts":
|
| 24 |
-
"compression_ratio":
|
| 25 |
"expert_dropout": 0.1,
|
| 26 |
"attention_dropout": 0.1,
|
| 27 |
"progressive_training": true,
|
| 28 |
"scale_warmup_epochs": {
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
-
"
|
| 33 |
-
"1280": 12
|
| 34 |
}
|
| 35 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"name": "david_clip_vit_l14",
|
| 3 |
+
"uid": "c.david.clip_vit_l14",
|
| 4 |
+
"feature_dim": 768,
|
| 5 |
"num_classes": 1000,
|
| 6 |
"scales": [
|
| 7 |
+
384,
|
|
|
|
| 8 |
768,
|
| 9 |
1024,
|
| 10 |
1280
|
| 11 |
],
|
| 12 |
+
"sharing_mode": "partial_shared",
|
| 13 |
"fusion_mode": "deep_efficiency",
|
| 14 |
"use_belly": true,
|
| 15 |
+
"belly_expand": 2.0,
|
| 16 |
+
"shared_feature_dim": 1024,
|
| 17 |
"shared_layers": 2,
|
| 18 |
"shared_dropout": 0.1,
|
| 19 |
"fusion_temperature": 1.0,
|
| 20 |
"fusion_dropout": 0.1,
|
| 21 |
"tree_depth": 3,
|
| 22 |
+
"num_experts": 4,
|
| 23 |
+
"compression_ratio": 4,
|
| 24 |
"expert_dropout": 0.1,
|
| 25 |
"attention_dropout": 0.1,
|
| 26 |
"progressive_training": true,
|
| 27 |
"scale_warmup_epochs": {
|
| 28 |
+
"384": 0,
|
| 29 |
+
"768": 1,
|
| 30 |
+
"1024": 2,
|
| 31 |
+
"1280": 3
|
|
|
|
| 32 |
}
|
| 33 |
}
|
weights/train_config.json
CHANGED
|
@@ -1,15 +1,21 @@
|
|
| 1 |
{
|
| 2 |
"name": "david_training",
|
| 3 |
-
"run_id": "
|
| 4 |
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 5 |
-
"model_variant": "
|
| 6 |
"num_classes": 1000,
|
| 7 |
-
"preset": "
|
| 8 |
"custom_config_path": null,
|
| 9 |
"num_classes_override": null,
|
| 10 |
"use_belly_override": null,
|
| 11 |
"belly_expand_override": null,
|
| 12 |
"progressive_training_override": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"num_epochs": 20,
|
| 14 |
"batch_size": 1024,
|
| 15 |
"learning_rate": 0.01,
|
|
@@ -26,8 +32,8 @@
|
|
| 26 |
"gradient_clip": 5.0,
|
| 27 |
"scheduler_type": "cosine_restarts",
|
| 28 |
"min_lr": 1e-06,
|
| 29 |
-
"freeze_strategy": "
|
| 30 |
-
"freeze_threshold":
|
| 31 |
"unfreeze_on_plateau": true,
|
| 32 |
"patience": 10,
|
| 33 |
"track_gradients": true,
|
|
|
|
| 1 |
{
|
| 2 |
"name": "david_training",
|
| 3 |
+
"run_id": "20251012_040642",
|
| 4 |
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 5 |
+
"model_variant": "clip_vit_l14",
|
| 6 |
"num_classes": 1000,
|
| 7 |
+
"preset": "clip_vit_l14",
|
| 8 |
"custom_config_path": null,
|
| 9 |
"num_classes_override": null,
|
| 10 |
"use_belly_override": null,
|
| 11 |
"belly_expand_override": null,
|
| 12 |
"progressive_training_override": true,
|
| 13 |
+
"scale_warmup_epochs_override": {
|
| 14 |
+
"384": 0,
|
| 15 |
+
"768": 1,
|
| 16 |
+
"1024": 2,
|
| 17 |
+
"1280": 3
|
| 18 |
+
},
|
| 19 |
"num_epochs": 20,
|
| 20 |
"batch_size": 1024,
|
| 21 |
"learning_rate": 0.01,
|
|
|
|
| 32 |
"gradient_clip": 5.0,
|
| 33 |
"scheduler_type": "cosine_restarts",
|
| 34 |
"min_lr": 1e-06,
|
| 35 |
+
"freeze_strategy": "never",
|
| 36 |
+
"freeze_threshold": 90.0,
|
| 37 |
"unfreeze_on_plateau": true,
|
| 38 |
"patience": 10,
|
| 39 |
"track_gradients": true,
|