geometric_classifier v9 | acc=0.9014 | 2,325,174 params | vectorized curvature
Browse files
geometric_classifier/config.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"model_type": "GeometricShapeClassifier",
|
| 3 |
-
"version": "
|
|
|
|
| 4 |
"grid_size": 5,
|
| 5 |
"num_classes": 38,
|
| 6 |
"class_names": [
|
|
@@ -53,18 +54,11 @@
|
|
| 53 |
"hyperbolic",
|
| 54 |
"helical"
|
| 55 |
],
|
| 56 |
-
"embed_dim":
|
| 57 |
"n_tracers": 5,
|
| 58 |
-
"capacity_dims": [
|
| 59 |
-
64,
|
| 60 |
-
64,
|
| 61 |
-
64,
|
| 62 |
-
64
|
| 63 |
-
],
|
| 64 |
-
"curvature_embed_dim": 128,
|
| 65 |
"arbiter_latent_dim": 128,
|
| 66 |
"arbiter_flow_steps": 4,
|
| 67 |
-
"total_params":
|
| 68 |
"shape_catalog": {
|
| 69 |
"point": {
|
| 70 |
"dim": 0,
|
|
|
|
| 1 |
{
|
| 2 |
"model_type": "GeometricShapeClassifier",
|
| 3 |
+
"version": "v9",
|
| 4 |
+
"changes": "conv3d/conv2d \u2192 linear in curvature head, 83x training speedup",
|
| 5 |
"grid_size": 5,
|
| 6 |
"num_classes": 38,
|
| 7 |
"class_names": [
|
|
|
|
| 54 |
"hyperbolic",
|
| 55 |
"helical"
|
| 56 |
],
|
| 57 |
+
"embed_dim": 64,
|
| 58 |
"n_tracers": 5,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
"arbiter_latent_dim": 128,
|
| 60 |
"arbiter_flow_steps": 4,
|
| 61 |
+
"total_params": 2325174,
|
| 62 |
"shape_catalog": {
|
| 63 |
"point": {
|
| 64 |
"dim": 0,
|
geometric_classifier/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8251db4e5b77cb50df8c0f173e75ffb7cb79cce4e27a20db90e2e2f522354865
|
| 3 |
+
size 9574860
|
geometric_classifier/training_config.json
CHANGED
|
@@ -1,10 +1,35 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
"learned_capacities": [
|
| 5 |
-
0.
|
| 6 |
-
0.
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
]
|
|
|
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"n_samples": 500000,
|
| 3 |
+
"epochs": 80,
|
| 4 |
+
"batch_size": 4096,
|
| 5 |
+
"lr": 0.003,
|
| 6 |
+
"seed": 42,
|
| 7 |
+
"optimizer": "AdamW",
|
| 8 |
+
"weight_decay": 0.0001,
|
| 9 |
+
"scheduler": "cosine_with_warmup",
|
| 10 |
+
"warmup_epochs": 5,
|
| 11 |
+
"amp_dtype": "torch.bfloat16",
|
| 12 |
+
"loss_weights": {
|
| 13 |
+
"cls": 1.0,
|
| 14 |
+
"fill": 0.3,
|
| 15 |
+
"peak": 0.3,
|
| 16 |
+
"ovf": 0.05,
|
| 17 |
+
"div": 0.02,
|
| 18 |
+
"vol": 0.1,
|
| 19 |
+
"cm": 0.1,
|
| 20 |
+
"curved": 0.2,
|
| 21 |
+
"ctype": 0.2,
|
| 22 |
+
"arb_cls": 0.8,
|
| 23 |
+
"arb_traj": 0.2,
|
| 24 |
+
"arb_conf": 0.1,
|
| 25 |
+
"flow": 0.5
|
| 26 |
+
},
|
| 27 |
+
"best_val_accuracy": 0.90142,
|
| 28 |
"learned_capacities": [
|
| 29 |
+
0.001764793531037867,
|
| 30 |
+
0.0032583752181380987,
|
| 31 |
+
18.675533294677734,
|
| 32 |
+
18.547143936157227
|
| 33 |
+
],
|
| 34 |
+
"total_training_time_seconds": 981.4479973316193
|
| 35 |
}
|