AbstractPhil commited on
Commit
45ee27f
·
verified ·
1 Parent(s): 17b55d1

Checkpoint: danbooru-50k-v1/epoch2_step4585_20251117_051214

Browse files
danbooru-50k-v1/epoch2_step4585_20251117_051214/config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sub_name": "danbooru-50k-v1",
3
+ "num_opinion_anchors": 225,
4
+ "pentachoron_dim": 256,
5
+ "scales": [
6
+ 128,
7
+ 256,
8
+ 512
9
+ ],
10
+ "scale_hidden_dims": {
11
+ "128": 256,
12
+ "256": 512,
13
+ "512": 1024
14
+ },
15
+ "alpha_init": 0.1,
16
+ "alpha_learnable": true,
17
+ "alpha_per_scale": true,
18
+ "beta_init": 0.5,
19
+ "beta_learnable": true,
20
+ "beta_per_scale": true,
21
+ "gamma_learnable": true,
22
+ "learn_layer_weights": true,
23
+ "siglip_model": "google/siglip-so400m-patch14-384",
24
+ "clip_tokenizer": "openai/clip-vit-large-patch14",
25
+ "illustrious_clip_path": "./models/NAI-11-epsilon_clip_l.safetensors",
26
+ "clip_skip": 0,
27
+ "siglip_layer_indices": [
28
+ 12,
29
+ 16,
30
+ 20,
31
+ 23,
32
+ 25,
33
+ 26
34
+ ],
35
+ "clip_layer_indices": null,
36
+ "use_gradient_checkpointing": false,
37
+ "share_scale_embeddings": true,
38
+ "dataset_name": "animetimm/danbooru-wdtagger-v4-w640-ws-50k",
39
+ "image_size": 384,
40
+ "max_tag_length": 77,
41
+ "batch_size": 32,
42
+ "num_epochs": 3,
43
+ "learning_rate": 0.0001,
44
+ "weight_decay": 0.01,
45
+ "warmup_steps": 1000,
46
+ "gradient_clip": 1.0,
47
+ "gradient_accumulation_steps": 1,
48
+ "token_loss_weight": 1.0,
49
+ "geometric_weight": 0.1,
50
+ "fusion_strategy": "learned_weighted",
51
+ "text_dropout_prob": 0.3,
52
+ "text_noise_std": 0.1,
53
+ "text_noise_prob": 0.5,
54
+ "vision_only_text": "general: blank_image",
55
+ "text_dropout_schedule": "linear",
56
+ "text_dropout_start": 0.1,
57
+ "text_dropout_end": 0.5,
58
+ "checkpoint_dir": "./checkpoints/liminal_staircase_danbooru",
59
+ "save_every": 500,
60
+ "hf_repo_id": "AbstractPhil/liminal-staircase-v2",
61
+ "hf_upload_every": 1000,
62
+ "hf_private": false,
63
+ "resume": true,
64
+ "log_dir": "./logs/liminal_staircase_danbooru",
65
+ "log_every": 5,
66
+ "device": "cuda",
67
+ "timestamp": "2025-11-17T05:12:15.951499",
68
+ "step": 4585,
69
+ "epoch": 2,
70
+ "val_loss": 3.1854735561039136,
71
+ "fusion_diagnostics": {
72
+ "layer_weights": [
73
+ 0.05383476987481117,
74
+ 0.05307381600141525,
75
+ 0.05373457819223404,
76
+ 0.05356656759977341,
77
+ 0.05366039276123047,
78
+ 0.05355314910411835,
79
+ 0.05681484192609787,
80
+ 0.05716554448008537,
81
+ 0.05682314559817314,
82
+ 0.056655507534742355,
83
+ 0.05661669373512268,
84
+ 0.05677459016442299,
85
+ 0.056852370500564575,
86
+ 0.05770973861217499,
87
+ 0.05561613664031029,
88
+ 0.05701824277639389,
89
+ 0.05642232671380043,
90
+ 0.05410761386156082
91
+ ],
92
+ "scale_weights": [
93
+ 0.3062363862991333,
94
+ 0.3039093315601349,
95
+ 0.3898542523384094
96
+ ],
97
+ "alpha_per_scale": [
98
+ 0.27752038836479187,
99
+ 0.27752038836479187,
100
+ 0.27752038836479187
101
+ ],
102
+ "beta_per_scale": [
103
+ 0.5901311635971069,
104
+ 0.5916643142700195,
105
+ 0.5984686613082886
106
+ ],
107
+ "scale_statistics": {},
108
+ "text_modality_stats": {
109
+ "clean": "30.0%",
110
+ "noisy": "29.7%",
111
+ "sentinel": "40.3%"
112
+ }
113
+ },
114
+ "is_best": true
115
+ }
danbooru-50k-v1/epoch2_step4585_20251117_051214/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f169684ac83dddb6425e8b09d4f1276cd9d828689667e4dda3dccb182aad2992
3
+ size 328002100
danbooru-50k-v1/epoch2_step4585_20251117_051214/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dccc1dbc0d1a59639b4d067bdbf9fb66117a9aa942d18f712011bba2685042f
3
+ size 606530821