AbstractPhil commited on
Commit
d61bfe5
·
verified ·
1 Parent(s): ad3bd91

Checkpoint: 20251117_015502_danbooru-50k-v1_step6

Browse files
checkpoints/20251117_015502_danbooru-50k-v1_step6/config.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sub_name": "danbooru-50k-v1",
3
+ "num_opinion_anchors": 225,
4
+ "pentachoron_dim": 256,
5
+ "scales": [
6
+ 128,
7
+ 256,
8
+ 512
9
+ ],
10
+ "scale_hidden_dims": {
11
+ "128": 256,
12
+ "256": 512,
13
+ "512": 1024
14
+ },
15
+ "alpha_init": 0.1,
16
+ "alpha_learnable": true,
17
+ "alpha_per_scale": true,
18
+ "beta_init": 0.5,
19
+ "beta_learnable": true,
20
+ "beta_per_scale": true,
21
+ "gamma_learnable": true,
22
+ "learn_layer_weights": true,
23
+ "siglip_model": "google/siglip-so400m-patch14-384",
24
+ "clip_tokenizer": "openai/clip-vit-large-patch14",
25
+ "illustrious_clip_path": "./models/NAI-11-epsilon_clip_l.safetensors",
26
+ "clip_skip": 0,
27
+ "siglip_layer_indices": [
28
+ 12,
29
+ 16,
30
+ 20,
31
+ 23,
32
+ 25,
33
+ 26
34
+ ],
35
+ "clip_layer_indices": null,
36
+ "use_gradient_checkpointing": false,
37
+ "share_scale_embeddings": true,
38
+ "dataset_name": "animetimm/danbooru-wdtagger-v4-w640-ws-50k",
39
+ "image_size": 384,
40
+ "max_tag_length": 77,
41
+ "batch_size": 32,
42
+ "num_epochs": 3,
43
+ "learning_rate": 0.0001,
44
+ "weight_decay": 0.01,
45
+ "warmup_steps": 1000,
46
+ "gradient_clip": 1.0,
47
+ "gradient_accumulation_steps": 1,
48
+ "token_loss_weight": 1.0,
49
+ "geometric_weight": 0.1,
50
+ "fusion_strategy": "learned_weighted",
51
+ "checkpoint_dir": "./checkpoints/liminal_staircase_danbooru",
52
+ "save_every": 500,
53
+ "hf_repo_id": "AbstractPhil/liminal-staircase-v2",
54
+ "hf_upload_every": 1000,
55
+ "hf_private": false,
56
+ "resume": true,
57
+ "log_dir": "./logs/liminal_staircase_danbooru",
58
+ "log_every": 50,
59
+ "device": "cuda",
60
+ "timestamp": "2025-11-17T01:55:03.560895",
61
+ "step": 6,
62
+ "epoch": 0,
63
+ "val_loss": Infinity,
64
+ "fusion_diagnostics": {
65
+ "layer_weights": [
66
+ 0.05555903539061546,
67
+ 0.05554059520363808,
68
+ 0.05553547292947769,
69
+ 0.05557769536972046,
70
+ 0.05554599314928055,
71
+ 0.05556809529662132,
72
+ 0.05554487556219101,
73
+ 0.05558248609304428,
74
+ 0.05553889647126198,
75
+ 0.055535610765218735,
76
+ 0.05558742955327034,
77
+ 0.05557844415307045,
78
+ 0.05556866526603699,
79
+ 0.05554370582103729,
80
+ 0.05553441122174263,
81
+ 0.05558684095740318,
82
+ 0.05553947389125824,
83
+ 0.055532246828079224
84
+ ],
85
+ "scale_weights": [
86
+ 0.33324742317199707,
87
+ 0.33328598737716675,
88
+ 0.33346661925315857
89
+ ],
90
+ "alpha_per_scale": [
91
+ 0.2624704837799072,
92
+ 0.2624704837799072,
93
+ 0.2624704837799072
94
+ ],
95
+ "beta_per_scale": [
96
+ 0.6224586963653564,
97
+ 0.6223307847976685,
98
+ 0.6223313808441162
99
+ ],
100
+ "scale_statistics": {}
101
+ }
102
+ }
checkpoints/20251117_015502_danbooru-50k-v1_step6/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49006c8008c0cb256569378155a99939a20ea1fb5f9af6b99bd433b1977e538
3
+ size 328002100
checkpoints/20251117_015502_danbooru-50k-v1_step6/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3206476e8080fe5beb65ced45a926c5630286b0edc5fd1e150bb9c010608d845
3
+ size 606530757