Checkpoint: danbooru-50k-v1-512-2-20251117_060553/step9874

Browse files

Files changed (3) hide show

danbooru-50k-v1-512-2-20251117_060553/step9874/config.json +130 -0
danbooru-50k-v1-512-2-20251117_060553/step9874/model.safetensors +3 -0
danbooru-50k-v1-512-2-20251117_060553/step9874/training_state.pt +3 -0

danbooru-50k-v1-512-2-20251117_060553/step9874/config.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+  "sub_name": "danbooru-50k-v1-512-2",
+  "num_opinion_anchors": 225,
+  "pentachoron_dim": 512,
+  "scales": [
+    128,
+    256,
+    512,
+    1024
+  ],
+  "scale_hidden_dims": {
+    "128": 256,
+    "256": 512,
+    "512": 1024,
+    "1024": 2048
+  },
+  "alpha_init": 0.125,
+  "alpha_learnable": true,
+  "alpha_per_scale": true,
+  "beta_init": 0.5,
+  "beta_learnable": true,
+  "beta_per_scale": true,
+  "gamma_learnable": true,
+  "learn_layer_weights": true,
+  "siglip_model": "google/siglip-so400m-patch14-384",
+  "clip_tokenizer": "openai/clip-vit-large-patch14",
+  "illustrious_clip_path": "./models/NAI-11-epsilon_clip_l.safetensors",
+  "clip_skip": 1,
+  "siglip_layer_indices": [
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    9,
+    12,
+    18,
+    21,
+    23,
+    24,
+    25,
+    26
+  ],
+  "clip_layer_indices": null,
+  "use_gradient_checkpointing": false,
+  "share_scale_embeddings": false,
+  "dataset_name": "animetimm/danbooru-wdtagger-v4-w640-ws-50k",
+  "image_size": 384,
+  "max_tag_length": 77,
+  "batch_size": 24,
+  "num_epochs": 20,
+  "learning_rate": 0.0001,
+  "weight_decay": 0.01,
+  "warmup_steps": 1000,
+  "gradient_clip": 1.0,
+  "gradient_accumulation_steps": 1,
+  "token_loss_weight": 1.0,
+  "geometric_weight": 0.1,
+  "fusion_strategy": "learned_weighted",
+  "text_dropout_prob": 0.3,
+  "text_noise_std": 0.1,
+  "text_noise_prob": 0.5,
+  "vision_only_text": "general: blank_image",
+  "text_dropout_schedule": "linear",
+  "text_dropout_start": 0.1,
+  "text_dropout_end": 0.5,
+  "checkpoint_dir": "./checkpoints/liminal_staircase_danbooru",
+  "save_every": 500,
+  "hf_repo_id": "AbstractPhil/liminal-staircase-v2",
+  "hf_upload_every": 1000,
+  "hf_private": false,
+  "resume": false,
+  "log_dir": "./logs/liminal_staircase_danbooru",
+  "log_every": 5,
+  "device": "cuda",
+  "timestamp": "2025-11-17T14:10:35.698828",
+  "step": 9874,
+  "epoch": 5,
+  "val_loss": Infinity,
+  "fusion_diagnostics": {
+    "layer_weights": [
+      0.04135271906852722,
+      0.040869709104299545,
+      0.042614471167325974,
+      0.041568122804164886,
+      0.04186437278985977,
+      0.04223686456680298,
+      0.03997607156634331,
+      0.04222464561462402,
+      0.03964937850832939,
+      0.038747575134038925,
+      0.03888710215687752,
+      0.03851398453116417,
+      0.0383797213435173,
+      0.03842699155211449,
+      0.038952481001615524,
+      0.038612887263298035,
+      0.03840705752372742,
+      0.03818415477871895,
+      0.03945833817124367,
+      0.040741387754678726,
+      0.040348730981349945,
+      0.04126262664794922,
+      0.04083728790283203,
+      0.03929390013217926,
+      0.03858938068151474
+    ],
+    "scale_weights": [
+      0.19150905311107635,
+      0.2122136950492859,
+      0.2983412742614746,
+      0.29793596267700195
+    ],
+    "alpha_per_scale": [
+      0.24941794574260712,
+      0.24941794574260712,
+      0.24941794574260712,
+      0.24941794574260712
+    ],
+    "beta_per_scale": [
+      0.569037139415741,
+      0.5981298089027405,
+      0.5593996047973633,
+      0.58758544921875
+    ],
+    "scale_statistics": {}
+  },
+  "is_best": false
+}

danbooru-50k-v1-512-2-20251117_060553/step9874/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6edaf73851e28edcc9a5cfcb470323a0576ba252de3ef43cb49041c2ca4f4a8
+size 1106728220

danbooru-50k-v1-512-2-20251117_060553/step9874/training_state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e93b5d3a5352a989af83b384bcb370ba08cdd0bb2a2872f2a0403ca57e742a5b
+size 2075709603