AbstractPhil commited on
Commit
0e595e7
·
verified ·
1 Parent(s): 81211f0

Checkpoint: danbooru-50k-v1-512-20251117_055745/step116

Browse files
danbooru-50k-v1-512-20251117_055745/step116/config.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sub_name": "danbooru-50k-v1-512",
3
+ "num_opinion_anchors": 225,
4
+ "pentachoron_dim": 512,
5
+ "scales": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 1024
10
+ ],
11
+ "scale_hidden_dims": {
12
+ "128": 128,
13
+ "256": 512,
14
+ "512": 1024,
15
+ "1024": 2048
16
+ },
17
+ "alpha_init": 0.1,
18
+ "alpha_learnable": true,
19
+ "alpha_per_scale": true,
20
+ "beta_init": 0.5,
21
+ "beta_learnable": true,
22
+ "beta_per_scale": true,
23
+ "gamma_learnable": true,
24
+ "learn_layer_weights": true,
25
+ "siglip_model": "google/siglip-so400m-patch14-384",
26
+ "clip_tokenizer": "openai/clip-vit-large-patch14",
27
+ "illustrious_clip_path": "./models/NAI-11-epsilon_clip_l.safetensors",
28
+ "clip_skip": 0,
29
+ "siglip_layer_indices": [
30
+ 3,
31
+ 6,
32
+ 9,
33
+ 12,
34
+ 21,
35
+ 23,
36
+ 24,
37
+ 25,
38
+ 26
39
+ ],
40
+ "clip_layer_indices": null,
41
+ "use_gradient_checkpointing": false,
42
+ "share_scale_embeddings": false,
43
+ "dataset_name": "animetimm/danbooru-wdtagger-v4-w640-ws-50k",
44
+ "image_size": 384,
45
+ "max_tag_length": 77,
46
+ "batch_size": 16,
47
+ "num_epochs": 3,
48
+ "learning_rate": 0.0001,
49
+ "weight_decay": 0.01,
50
+ "warmup_steps": 1000,
51
+ "gradient_clip": 1.0,
52
+ "gradient_accumulation_steps": 1,
53
+ "token_loss_weight": 1.0,
54
+ "geometric_weight": 0.1,
55
+ "fusion_strategy": "learned_weighted",
56
+ "text_dropout_prob": 0.3,
57
+ "text_noise_std": 0.1,
58
+ "text_noise_prob": 0.5,
59
+ "vision_only_text": "general: blank_image",
60
+ "text_dropout_schedule": "linear",
61
+ "text_dropout_start": 0.1,
62
+ "text_dropout_end": 0.5,
63
+ "checkpoint_dir": "./checkpoints/liminal_staircase_danbooru",
64
+ "save_every": 500,
65
+ "hf_repo_id": "AbstractPhil/liminal-staircase-v2",
66
+ "hf_upload_every": 1000,
67
+ "hf_private": false,
68
+ "resume": true,
69
+ "log_dir": "./logs/liminal_staircase_danbooru",
70
+ "log_every": 5,
71
+ "device": "cuda",
72
+ "timestamp": "2025-11-17T06:00:46.795492",
73
+ "step": 116,
74
+ "epoch": 0,
75
+ "val_loss": Infinity,
76
+ "fusion_diagnostics": {
77
+ "layer_weights": [
78
+ 0.04763081297278404,
79
+ 0.04762101545929909,
80
+ 0.04759393259882927,
81
+ 0.04762202873826027,
82
+ 0.047565292567014694,
83
+ 0.047590624541044235,
84
+ 0.047599051147699356,
85
+ 0.047562386840581894,
86
+ 0.04760372266173363,
87
+ 0.047676656395196915,
88
+ 0.04768342152237892,
89
+ 0.04765501245856285,
90
+ 0.047600556164979935,
91
+ 0.047637615352869034,
92
+ 0.047645460814237595,
93
+ 0.04759979993104935,
94
+ 0.047738682478666306,
95
+ 0.04763513430953026,
96
+ 0.04761926084756851,
97
+ 0.04758467897772789,
98
+ 0.04753493145108223
99
+ ],
100
+ "scale_weights": [
101
+ 0.2466253936290741,
102
+ 0.24661938846111298,
103
+ 0.2533079981803894,
104
+ 0.2534472644329071
105
+ ],
106
+ "alpha_per_scale": [
107
+ 0.2630360424518585,
108
+ 0.2630360424518585,
109
+ 0.2630360424518585,
110
+ 0.2630360424518585
111
+ ],
112
+ "beta_per_scale": [
113
+ 0.6198403835296631,
114
+ 0.6200416088104248,
115
+ 0.6200869679450989,
116
+ 0.6201481819152832
117
+ ],
118
+ "scale_statistics": {}
119
+ },
120
+ "is_best": false
121
+ }
danbooru-50k-v1-512-20251117_055745/step116/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d479fbc8da77d5bf92d67ca27278dc6798b2ed23104034ff0e0589217fc04d
3
+ size 985442620
danbooru-50k-v1-512-20251117_055745/step116/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c82e3bcd4e35bb88b0528c29d9d5d6596be0d957be62715d158ba124cddcda
3
+ size 1855182563