zerofata commited on
Commit
318f797
·
verified ·
1 Parent(s): ebc4a7a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +244 -524
README.md CHANGED
@@ -1,285 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  <style>
2
- .container {
3
- --primary-accent: #C0C0C0;
4
- --secondary-accent: #4A9EFF;
5
- --glow-primary: rgba(192, 192, 192, 0.6);
6
- --glow-secondary: rgba(74, 158, 255, 0.6);
7
-
8
- --bg-main: #0B0A18;
9
- --bg-container: #110F24;
10
- --bg-card: rgba(20, 18, 40, 0.7);
11
-
12
- --text-main: #DCDCDC;
13
- --text-muted: #9E9E9E;
14
- --white: #FFFFFF;
15
- --border-color: #3C3A50;
16
-
17
- --font-title: 'Cinzel', serif;
18
- --font-body: 'EB Garamond', serif;
19
- --font-code: 'Courier New', monospace;
20
-
21
- font-family: var(--font-body);
22
- color: var(--text-main);
23
  line-height: 1.6;
24
- font-weight: 400;
25
-
26
- max-width: 1100px;
27
- margin: 20px auto;
28
- padding: 25px;
29
- background-color: var(--bg-main);
30
- background-image: linear-gradient(rgba(11, 10, 24, 0.95), rgba(11, 10, 24, 0.95)), url('https://www.transparenttextures.com/patterns/stardust.png');
31
- min-height: calc(100vh - 40px);
32
-
33
- border-radius: 8px;
34
- box-shadow: 0 0 25px rgba(0,0,0,0.7);
35
- border: 1px solid var(--border-color);
36
  }
37
 
38
- .container .title-container {
39
- background: linear-gradient(135deg, rgba(20, 18, 40, 0.8), rgba(30, 28, 50, 0.6));
40
- margin-bottom: 30px;
41
- border: 1px solid var(--border-color);
42
- border-radius: 6px;
43
- padding: 25px;
44
- text-align: center;
45
  position: relative;
46
- box-shadow: 0 5px 15px rgba(0,0,0,0.4);
47
- overflow: hidden;
 
48
  }
49
 
50
- .container .title-main {
51
- color: var(--white);
52
- font-size: 2.5rem;
53
- font-weight: 700;
54
- margin: 0;
55
- letter-spacing: 4px;
56
- display: block;
57
- text-transform: uppercase;
58
- text-shadow: 0 0 4px var(--glow-primary), 0 0 8px var(--glow-primary), 0 0 12px var(--glow-primary);
59
- font-family: var(--font-title);
60
  }
61
 
62
- .container .lemonade-text {
63
- color: var(--secondary-accent);
64
- text-shadow: 0 0 8px var(--glow-secondary);
 
 
65
  }
66
 
67
- .container .title-subtitle {
68
- padding-left: 0;
69
- margin-top: 15px;
 
 
 
70
  }
71
 
72
- .container .subtitle-text {
73
- color: var(--text-muted);
74
- font-size: 1.2rem;
75
- font-family: var(--font-body);
76
- font-style: italic;
 
77
  font-weight: 400;
78
- letter-spacing: 2px;
79
- text-transform: uppercase;
80
- opacity: 0.8;
81
  }
82
 
83
- .container img {
84
- max-width: 100%;
85
- border: 2px solid var(--border-color);
86
- margin-bottom: 40px;
87
- box-shadow: 0 5px 15px rgba(0,0,0,0.5);
88
- border-radius: 4px;
89
  }
90
 
91
- .container .section-container {
92
- margin-bottom: 25px;
93
- padding-bottom: 25px;
94
- border-bottom: 1px dashed var(--border-color);
95
- }
96
- .container .section-container:last-of-type {
97
- border-bottom: none;
98
- padding-bottom: 0;
99
- margin-bottom: 0;
100
  }
101
 
102
- .container .section-header {
103
- display: flex;
104
- align-items: center;
105
- padding: 0 0 15px 0;
 
106
  }
107
 
108
- .container .section-title {
109
- font-family: var(--font-title);
110
- background: linear-gradient(45deg, var(--secondary-accent), var(--primary-accent));
111
- background-clip: text;
112
- -webkit-background-clip: text;
113
- -webkit-text-fill-color: transparent;
114
- font-size: 1.4rem;
115
- margin: 0 !important;
116
- padding: 0 0 10px 0 !important;
117
- letter-spacing: 1px;
118
  font-weight: 700;
119
- text-transform: uppercase;
120
- border: none !important;
121
- position: relative;
122
  display: inline-block;
 
 
123
  }
124
 
125
- .container .section-title::after {
126
- content: '';
127
- position: absolute;
128
- bottom: 0;
129
- left: 0;
130
- width: 100%;
131
- height: 2px;
132
- background-image: linear-gradient(to right, var(--secondary-accent), var(--primary-accent));
133
- box-shadow: 0 0 6px var(--glow-secondary), 0 0 6px var(--glow-primary);
134
- border-radius: 2px;
135
  }
136
 
137
- .container .section-content {
138
- padding: 20px 0 0 0;
 
 
139
  }
140
 
141
- .container .subheading {
142
- color: var(--secondary-accent);
143
- font-size: 1.1rem;
144
- margin-top: 20px;
145
- margin-bottom: 12px;
146
- font-weight: 700;
147
- display: block;
148
  text-transform: uppercase;
149
- letter-spacing: 2px;
150
- font-family: var(--font-title);
151
- border-bottom: 1px solid var(--secondary-accent);
152
- padding-bottom: 6px;
153
- text-shadow: 0 0 4px var(--glow-secondary);
 
 
 
 
 
 
154
  }
155
 
156
- .container .data-box {
157
- background-color: var(--bg-card);
 
158
  padding: 15px;
159
- border: 1px solid var(--border-color);
160
- border-left: 2px solid var(--primary-accent);
161
- margin-bottom: 15px;
162
- box-shadow: inset 0 0 6px rgba(0,0,0,0.4);
163
- border-radius: 4px;
164
- font-size: 1rem;
165
  }
166
 
167
- .container .data-row {
168
  display: flex;
169
- align-items: center;
170
- margin-bottom: 6px;
171
- padding: 5px 0;
172
  }
173
 
174
- .container .data-row:last-child {
175
- margin-bottom: 0;
 
 
176
  }
177
 
178
- .container .data-arrow {
179
- color: var(--secondary-accent);
180
- font-weight: bold;
181
- margin-right: 10px;
182
- font-family: var(--font-code);
183
- font-size: 1rem;
184
  }
185
 
186
- .container .data-label {
187
- color: var(--white);
188
- font-weight: 600;
189
- font-family: var(--font-body);
190
- margin-right: 8px;
191
- min-width: 80px;
 
 
 
 
 
 
192
  }
193
 
194
- .container a {
195
- color: var(--primary-accent);
 
196
  text-decoration: none;
197
- font-weight: 600;
198
- transition: all .2s;
199
  }
200
 
201
- .container .data-row a {
202
- border-bottom: 1px dotted var(--primary-accent);
 
203
  }
204
 
205
- .container a:hover {
206
- text-decoration: none;
207
- color: var(--white);
208
- text-shadow: 0 0 5px var(--glow-primary);
209
- }
210
-
211
- .container .data-row a:hover {
212
- border-bottom-style: solid;
 
 
 
 
 
 
213
  }
214
 
215
- .container .dropdown-container {
 
216
  margin-top: 20px;
217
  }
218
 
219
- .container .dropdown-summary {
220
  cursor: pointer;
221
  padding: 10px 0;
222
- color: var(--text-muted);
 
223
  font-size: 1.1rem;
224
- font-weight: 700;
225
- text-transform: none;
226
- font-family: var(--font-title);
227
  letter-spacing: 1px;
 
228
  list-style: none;
229
- transition: color 0.2s ease;
 
230
  }
231
- .container .dropdown-summary:hover {
232
- color: var(--primary-accent);
 
233
  }
234
 
235
- .container .dropdown-arrow {
236
- color: var(--secondary-accent);
237
  margin-right: 10px;
238
- transition: transform 0.2s ease;
 
 
 
 
239
  }
240
 
241
- .container .dropdown-content {
242
  margin-top: 15px;
243
- padding: 20px;
244
- background-color: var(--bg-card);
245
- border: 1px solid var(--border-color);
246
- border-radius: 4px;
247
  }
248
 
249
- .container .config-title {
250
- color: var(--text-muted);
251
  font-size: 1rem;
252
  margin-bottom: 10px;
253
- font-family: var(--font-body);
254
  text-transform: uppercase;
255
  letter-spacing: 1px;
256
- font-weight: 700;
257
- }
258
-
259
- .container pre {
260
- background-color: #1c1c1c;
261
- padding: 15px;
262
- border: 1px solid var(--border-color);
263
- white-space: pre-wrap;
264
- word-wrap: break-word;
265
- color: #c5c8c6;
266
- border-radius: 4px;
267
- box-shadow: inset 0 0 5px rgba(0,0,0,0.5);
268
- }
269
-
270
- .container pre code {
271
- background: none;
272
- color: inherit;
273
- padding: 0;
274
- border-radius: 0;
275
- }
276
-
277
- .container code {
278
- font-family: var(--font-code);
279
- color: var(--primary-accent);
280
- background: var(--border-color);
281
- padding: 2px 5px;
282
- border-radius: 4px;
283
  }
284
  </style>
285
  <html lang="en">
@@ -287,27 +252,27 @@
287
  <meta charset="UTF-8">
288
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
289
  <title>Painted Fantasy</title>
290
- <link rel="preconnect" href="https://fonts.googleapis.com">
291
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
292
- <link href="https://fonts.googleapis.com/css2?family=Cinzel:wght@400;700&family=MedievalSharp&family=EB+Garamond:ital,wght@0,400;0,500;1,400&display=swap" rel="stylesheet">
293
  </head>
294
  <body>
295
 
296
  <div class="container">
297
  <div class="title-container">
 
298
  <div class="glitchy-overlay"></div>
 
299
  <div class="title-wrapper">
300
  <h1 class="title-main">
301
  <span class="title-prefix">PAINTED FANTASY</span>
302
- <span class="lemonade-text">VISAGE v3</span>
303
  </h1>
304
  <div class="title-subtitle">
305
- <span class="subtitle-text">Mistrall Small 3.2 Upscaled 34B</span>
306
  </div>
307
  </div>
308
  </div>
309
 
310
- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/CroIkC3MXC5gIghNjkEVg.png)
311
 
312
  <div class="section-container">
313
  <div class="section-header">
@@ -315,9 +280,9 @@
315
  <h2 class="section-title">Overview</h2>
316
  </div>
317
  <div class="section-content">
318
- <p>No layer left behind edition.</p>
319
- <p>Upscale redone with the missing final layer included. The original upscales were always missing a layer, but I never troubleshooted to identify *what* layer was missing. Turns out it was the final layer. That's kind of an important one.</p>
320
- <p>This model is an uncensored, creative writing and RP model. Compared to the older version, it is smarter and I think has a bit less repetition. The old V2 version though is slightly more creative due to the instability it had.</p>
321
  </div>
322
  </div>
323
 
@@ -345,17 +310,17 @@
345
  <span>*In asterisks*</span>
346
  </div>
347
  </div>
348
- <h3 class="subheading">Recommended Samplers</h3>
349
  <div class="data-box">
350
  <div class="data-row">
351
  <span class="data-arrow">></span>
352
  <span class="data-label">Temp:</span>
353
- <span>0.6-0.8</span>
354
  </div>
355
  <div class="data-row">
356
  <span class="data-arrow">></span>
357
  <span class="data-label">MinP:</span>
358
- <span>0.05 - 0.1</span>
359
  </div>
360
  <div class="data-row">
361
  <span class="data-arrow">></span>
@@ -385,12 +350,10 @@
385
  <h3 class="subheading">GGUF</h3>
386
  <div class="data-box">
387
  <div class="data-row">
388
- <span class="data-arrow">></span>
389
- <a href="">Static (tbc)</a>
390
  </div>
391
  <div class="data-row">
392
- <span class="data-arrow">></span>
393
- <a href="">iMatrix (tbc)</a>
394
  </div>
395
  </div>
396
  </div>
@@ -398,20 +361,19 @@
398
  <h3 class="subheading">EXL3</h3>
399
  <div class="data-box">
400
  <div class="data-row">
401
- <span class="data-arrow">></span>
402
- <a href="">3bpw</a>
403
  </div>
404
  <div class="data-row">
405
- <span class="data-arrow">></span>
406
- <a href="">4bpw</a>
407
  </div>
408
  <div class="data-row">
409
- <span class="data-arrow">></span>
410
- <a href="">5bpw</a>
411
  </div>
412
  <div class="data-row">
413
- <span class="data-arrow">></span>
414
- <a href="">6bpw</a>
 
 
415
  </div>
416
  </div>
417
  </div>
@@ -421,354 +383,112 @@
421
  <div class="section-container">
422
  <div class="section-header">
423
  <div class="section-indicator"></div>
424
- <h2 class="section-title">Creation Process</h2>
425
  </div>
426
  <div class="section-content">
427
- <p>Creation Process: Upscale > CPT > SFT > DPO</p>
428
- <p>Pretrained on approx 300MB of light novel and FineWeb-2 corpus.</p>
429
- <p>SFT on approx 8 million tokens, SFW / NSFW RP, stories and creative instruct data.</p>
430
- <p>DPO on a high quality RP / NSFW dataset with a focus on improving instruction following, reducing repetition and fixing common model mistakes.</p>
431
- <div class="dropdown-container">
432
- <details>
433
- <summary class="dropdown-summary">
434
- <span class="dropdown-arrow">></span>
435
- Mergekit configs
436
- </summary>
437
- <div class="dropdown-content">
438
- <p>Merge configurations used during the model creation process.</p>
439
- <div class="config-title">Upscale (Passthrough)</div>
440
- <pre><code>base_model: ConicCat/Mistral-Small-3.2-AntiRep-24B
441
- merge_method: passthrough
442
- dtype: bfloat16
443
- slices:
444
- - sources:
445
- - model: ConicCat/Mistral-Small-3.2-AntiRep-24B
446
- layer_range: [0, 29]
447
- - sources:
448
- - model: ConicCat/Mistral-Small-3.2-AntiRep-24B
449
- layer_range: [10, 40]</code></pre>
450
- </div>
451
- </details>
452
- </div>
453
- <div class="dropdown-container">
454
- <details>
455
- <summary class="dropdown-summary">
456
- <span class="dropdown-arrow">></span>
457
- Axolotl configs
458
- </summary>
459
  <div class="dropdown-content">
460
  <p>Not optimized for cost / performance efficiency, YMMV.</p>
461
- <div class="config-title">Pretrain 4*H100</div>
462
  <pre><code>&#35; ====================
463
  &#35; MODEL CONFIGURATION
464
  &#35; ====================
465
- base_model: ../mergekit/pf_v2_upscale
466
- model_type: MistralForCausalLM
467
  tokenizer_type: AutoTokenizer
468
  chat_template: mistral_v7_tekken
 
469
  &#35; ====================
470
  &#35; DATASET CONFIGURATION
471
  &#35; ====================
472
  datasets:
473
- - path: ./data/pretrain_dataset_v5_stripped.jsonl
474
- type: completion
475
- <br>
 
 
 
 
 
 
 
 
 
 
476
  dataset_prepared_path:
477
  train_on_inputs: false &#35; Only train on assistant responses
478
- <br>
479
  &#35; ====================
480
  &#35; QLORA CONFIGURATION
481
  &#35; ====================
482
  adapter: qlora
483
  load_in_4bit: true
484
- lora_r: 32
485
- lora_alpha: 64
486
- lora_dropout: 0.05
487
  lora_target_linear: true
488
  &#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
489
- <br>
490
  &#35; ====================
491
  &#35; TRAINING PARAMETERS
492
  &#35; ====================
493
- num_epochs: 1
494
- micro_batch_size: 4
495
  gradient_accumulation_steps: 1
496
- learning_rate: 4e-5
497
  optimizer: paged_adamw_8bit
498
  lr_scheduler: rex
499
  warmup_ratio: 0.05
500
  weight_decay: 0.01
501
  max_grad_norm: 1.0
502
- <br>
503
  &#35; ====================
504
  &#35; SEQUENCE &amp; PACKING
505
  &#35; ====================
506
- sequence_len: 12288
507
  sample_packing: true
508
  eval_sample_packing: false
509
  pad_to_sequence_len: true
510
- <br>
511
  &#35; ====================
512
  &#35; HARDWARE OPTIMIZATIONS
513
  &#35; ====================
514
  bf16: auto
515
  flash_attention: true
516
- gradient_checkpointing: offload
517
- deepspeed: deepspeed_configs/zero1.json
518
- <br>
519
- plugins:
520
- - axolotl.integrations.liger.LigerPlugin
521
- - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
522
- cut_cross_entropy: true
523
- liger_rope: true
524
- liger_rms_norm: true
525
- liger_layer_norm: true
526
- liger_glu_activation: true
527
- liger_cross_entropy: false &#35; Cut Cross Entropy overrides this
528
- liger_fused_linear_cross_entropy: false &#35; Cut Cross Entropy overrides this
529
- <br>
530
  &#35; ====================
531
  &#35; EVALUATION &amp; CHECKPOINTING
532
  &#35; ====================
533
  save_strategy: steps
534
- save_steps: 40
535
  save_total_limit: 5 &#35; Keep best + last few checkpoints
536
  load_best_model_at_end: true
 
537
  greater_is_better: false
538
- <br>
539
  &#35; ====================
540
  &#35; LOGGING &amp; OUTPUT
541
  &#35; ====================
542
- output_dir: ./Visage-V2-PT-1
543
  logging_steps: 2
544
  save_safetensors: true
545
- <br>
546
  &#35; ====================
547
  &#35; WANDB TRACKING
548
  &#35; ====================
549
- wandb_project: Visage-V2-PT
550
- # wandb_entity: your_entity
551
- wandb_name: Visage-V2-PT-1</code></pre>
552
- <div class="config-title">SFT 4*H100</div>
553
- <pre><code># ====================
554
- # MODEL CONFIGURATION
555
- # ====================
556
- base_model: ./Visage-V3-PT-1/merged
557
- model_type: MistralForCausalLM
558
- tokenizer_type: AutoTokenizer
559
- chat_template: mistral_v7_tekken
560
- <br>
561
- # ====================
562
- # DATASET CONFIGURATION
563
- # ====================
564
- datasets:
565
- - path: ./data/dataset.jsonl
566
- type: chat_template
567
- split: train
568
- chat_template_strategy: tokenizer
569
- field_messages: messages
570
- message_property_mappings:
571
- role: role
572
- content: content
573
- roles:
574
- user: ["user"]
575
- assistant: ["assistant"]
576
- system: ["system"]
577
- <br>
578
- dataset_prepared_path:
579
- train_on_inputs: false # Only train on assistant responses
580
- <br>
581
- # ====================
582
- # QLORA CONFIGURATION
583
- # ====================
584
- adapter: qlora
585
- load_in_4bit: true
586
- lora_r: 128
587
- lora_alpha: 128
588
- lora_dropout: 0.1
589
- lora_target_linear: true
590
- # lora_modules_to_save: # Uncomment only if you added NEW tokens
591
- <br>
592
- # ====================
593
- # TRAINING PARAMETERS
594
- # ====================
595
- num_epochs: 3
596
- micro_batch_size: 4
597
- gradient_accumulation_steps: 1
598
- learning_rate: 1e-5
599
- optimizer: paged_adamw_8bit
600
- lr_scheduler: rex
601
- warmup_ratio: 0.05
602
- weight_decay: 0.01
603
- max_grad_norm: 1.0
604
- <br>
605
- # ====================
606
- # SEQUENCE & PACKING
607
- # ====================
608
- sequence_len: 8192
609
- sample_packing: true
610
- pad_to_sequence_len: true
611
- <br>
612
- # ====================
613
- # HARDWARE OPTIMIZATIONS
614
- # ====================
615
- bf16: auto
616
- flash_attention: true
617
- gradient_checkpointing: offload
618
- deepspeed: deepspeed_configs/zero1.json
619
- <br>
620
- plugins:
621
- - axolotl.integrations.liger.LigerPlugin
622
- - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
623
- cut_cross_entropy: true
624
- liger_rope: true
625
- liger_rms_norm: true
626
- liger_layer_norm: true
627
- liger_glu_activation: true
628
- liger_cross_entropy: false # Cut Cross Entropy overrides this
629
- liger_fused_linear_cross_entropy: false # Cut Cross Entropy overrides this
630
- <br>
631
- # ====================
632
- # EVALUATION & CHECKPOINTING
633
- # ====================
634
- save_strategy: steps
635
- save_steps: 20
636
- save_total_limit: 5 # Keep best + last few checkpoints
637
- load_best_model_at_end: true
638
- metric_for_best_model: eval_loss
639
- greater_is_better: false
640
- <br>
641
- # ====================
642
- # LOGGING & OUTPUT
643
- # ====================
644
- output_dir: ./Visage-V3-PT-1-SFT-2
645
- logging_steps: 1
646
- save_safetensors: true
647
- <br>
648
- # ====================
649
- # WANDB TRACKING
650
- # ====================
651
- wandb_project: Visage-V3-SFT
652
- # wandb_entity: your_entity
653
- wandb_name: Visage-V3-PT-1-SFT-2</code></pre>
654
- <div class="config-title">DPO 2*H200</div>
655
- <pre><code># ====================
656
- # MODEL CONFIGURATION
657
- # ====================
658
- base_model: ./Visage-V3-PT-1-SFT-2/merged
659
- model_type: MistralForCausalLM
660
- tokenizer_type: AutoTokenizer
661
- chat_template: mistral_v7_tekken
662
- <br>
663
- # ====================
664
- # RL/DPO CONFIGURATION
665
- # ====================
666
- rl: dpo
667
- rl_beta: 0.085
668
- <br>
669
- # ====================
670
- # DATASET CONFIGURATION
671
- # ====================
672
- datasets:
673
- - path: ./data/handcrafted_dataset_mistral_rep.jsonl
674
- type: chat_template.default
675
- field_messages: messages
676
- field_chosen: chosen
677
- field_rejected: rejected
678
- message_property_mappings:
679
- role: role
680
- content: content
681
- roles:
682
- system: ["system"]
683
- user: ["user"]
684
- assistant: ["assistant"]
685
- - path: ./data/approved_automated_l3_dataset.jsonl
686
- type: chat_template.default
687
- field_messages: messages
688
- field_chosen: chosen
689
- field_rejected: rejected
690
- message_property_mappings:
691
- role: role
692
- content: content
693
- roles:
694
- system: ["system"]
695
- user: ["user"]
696
- assistant: ["assistant"]
697
- dataset_prepared_path:
698
- train_on_inputs: false # Only train on assistant responses
699
- <br>
700
- # ====================
701
- # QLORA CONFIGURATION
702
- # ====================
703
- adapter: lora
704
- load_in_8bit: true
705
- lora_r: 16
706
- lora_alpha: 32
707
- lora_dropout: 0.1
708
- lora_target_linear: true
709
- # lora_modules_to_save: # Uncomment only if you added NEW tokens
710
- <br>
711
- # ====================
712
- # TRAINING PARAMETERS
713
- # ====================
714
- num_epochs: 1
715
- micro_batch_size: 2
716
- gradient_accumulation_steps: 4
717
- learning_rate: 2e-6
718
- optimizer: adamw_torch_fused
719
- lr_scheduler: cosine
720
- warmup_steps: 5
721
- weight_decay: 0.01
722
- max_grad_norm: 1.0
723
- <br>
724
- # ====================
725
- # SEQUENCE CONFIGURATION
726
- # ====================
727
- sequence_len: 8192
728
- pad_to_sequence_len: true
729
- <br>
730
- # ====================
731
- # HARDWARE OPTIMIZATIONS
732
- # ====================
733
- bf16: auto
734
- tf32: false
735
- flash_attention: true
736
- gradient_checkpointing: offload
737
- <br>
738
- plugins:
739
- - axolotl.integrations.liger.LigerPlugin
740
- - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
741
- cut_cross_entropy: true
742
- liger_rope: true
743
- liger_rms_norm: true
744
- liger_layer_norm: true
745
- liger_glu_activation: true
746
- liger_cross_entropy: false # Cut Cross Entropy overrides this
747
- liger_fused_linear_cross_entropy: false # Cut Cross Entropy overrides this
748
- deepspeed: deepspeed_configs/zero1.json
749
- <br>
750
- # ====================
751
- # CHECKPOINTING
752
- # ====================
753
- save_steps: 10
754
- save_total_limit: 10
755
- load_best_model_at_end: true
756
- metric_for_best_model: eval_loss
757
- greater_is_better: false
758
- <br>
759
- # ====================
760
- # LOGGING & OUTPUT
761
- # ====================
762
- output_dir: ./Visage-V3-PT-1-SFT-2-DPO-2
763
- logging_steps: 1
764
- save_safetensors: true
765
- <br>
766
- # ====================
767
- # WANDB TRACKING
768
- # ====================
769
- wandb_project: Visage-V3-DPO
770
- # wandb_entity: your_entity
771
- wandb_name: Visage-V3-PT-1-SFT-2-DPO-2</code></pre>
772
  </div>
773
  </details>
774
  </div>
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - zerofata/Roleplay-Anime-Characters
6
+ - zerofata/Instruct-Anime-CreativeWriting
7
+ - zerofata/Summaries-Anime-FandomPages
8
+ - zerofata/Instruct-Anime
9
+ base_model:
10
+ - ConicCat/Mistral-Small-3.2-AntiRep-24B
11
+ ---
12
+ <!DOCTYPE html>
13
  <style>
14
+ body {
15
+ font-family: 'Georgia', 'Times New Roman', serif;
16
+ color: #dce4f0; /* Soft off-white */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  line-height: 1.6;
18
+ margin: 0;
19
+ padding: 0;
20
+ background-color: #161a25; /* Deep blue from dark sky */
 
 
 
 
 
 
 
 
 
21
  }
22
 
23
+ .lemonade-text {
24
+ color: #89d8ff; /* Bright blue from city lights */
 
 
 
 
 
25
  position: relative;
26
+ z-index: 2;
27
+ margin-left: 0.2em;
28
+ text-shadow: 0 0 15px #89d8ff;
29
  }
30
 
31
+ /* Section styling */
32
+ .section-container {
33
+ background-color: rgba(32, 40, 56, 0.7); /* Slightly transparent dark blue */
34
+ margin-bottom: 30px;
35
+ position: relative;
36
+ overflow: hidden;
37
+ border-bottom: 1px solid #ff9966; /* Sunset orange */
38
+ box-shadow: 0 4px 15px rgba(255, 153, 102, 0.05);
 
 
39
  }
40
 
41
+ .section-header {
42
+ display: flex;
43
+ align-items: center;
44
+ background-color: rgba(255, 153, 102, 0.12);
45
+ padding: 10px 20px;
46
  }
47
 
48
+ .section-indicator {
49
+ width: 8px;
50
+ height: 20px;
51
+ background-color: #ff9966; /* Sunset orange */
52
+ margin-right: 15px;
53
+ box-shadow: 0 0 8px rgba(255, 153, 102, 0.2);
54
  }
55
 
56
+ .section-title {
57
+ font-family: 'Playfair Display', serif; /* Using the new font */
58
+ color: #ffb399; /* Lighter sunset shade */
59
+ font-size: 1.4rem;
60
+ margin: 0;
61
+ letter-spacing: 1px;
62
  font-weight: 400;
63
+ text-transform: capitalize;
 
 
64
  }
65
 
66
+ .section-content {
67
+ padding: 20px;
68
+ font-family: 'Crimson Text', serif; /* Using the new font */
69
+ color: #dce4f0;
70
+ line-height: 1.6;
 
71
  }
72
 
73
+ /* Title styling */
74
+ .title-container {
75
+ background-color: #202838;
76
+ position: relative;
77
+ overflow: hidden;
78
+ margin-bottom: 40px;
79
+ border-left: 3px solid #ff9966; /* Sunset orange */
80
+ box-shadow: 0 6px 20px rgba(255, 153, 102, 0.07);
 
81
  }
82
 
83
+ .title-wrapper {
84
+ position: relative;
85
+ z-index: 2;
86
+ padding: 25px 20px 30px 30px;
87
+ font-family: 'Playfair Display', serif;
88
  }
89
 
90
+ .title-main {
91
+ color: #ffb399; /* Lighter sunset shade */
92
+ font-size: 2.5rem;
 
 
 
 
 
 
 
93
  font-weight: 700;
94
+ margin: 0;
95
+ letter-spacing: 2px;
 
96
  display: inline-block;
97
+ position: relative;
98
+ text-transform: uppercase;
99
  }
100
 
101
+ .title-prefix {
102
+ position: relative;
103
+ z-index: 2;
 
 
 
 
 
 
 
104
  }
105
 
106
+ .title-subtitle {
107
+ padding-left: 15px;
108
+ margin-top: 5px;
109
+ margin-left: 5px;
110
  }
111
 
112
+ .subtitle-text {
113
+ color: #a6c8e0; /* Muted sky blue */
114
+ font-size: 1.2rem;
115
+ font-family: 'Crimson Text', serif;
116
+ font-weight: 300;
117
+ letter-spacing: 3px;
 
118
  text-transform: uppercase;
119
+ display: inline-block;
120
+ }
121
+
122
+ .glitchy-overlay {
123
+ position: absolute;
124
+ top: 0;
125
+ left: 0;
126
+ width: 100%;
127
+ height: 100%;
128
+ background-image: repeating-linear-gradient(0deg, rgba(0,0,0,0) 0, rgba(137, 216, 255, 0.08) 1px, rgba(0,0,0,0) 2px); /* Rain effect with blue tint */
129
+ z-index: 1;
130
  }
131
 
132
+ /* Data box styling */
133
+ .data-box {
134
+ background-color: rgba(22, 26, 37, 0.6);
135
  padding: 15px;
136
+ border-left: 2px solid #ff9966; /* Sunset orange */
137
+ margin-bottom: 20px;
138
+ box-shadow: 0 2px 10px rgba(255, 153, 102, 0.05);
 
 
 
139
  }
140
 
141
+ .data-row {
142
  display: flex;
143
+ margin-bottom: 8px;
 
 
144
  }
145
 
146
+ .data-arrow {
147
+ color: #ff9966; /* Sunset orange */
148
+ width: 20px;
149
+ display: inline-block;
150
  }
151
 
152
+ .data-label {
153
+ color: #a6c8e0; /* Muted sky blue */
154
+ width: 80px;
155
+ display: inline-block;
 
 
156
  }
157
 
158
+ /* Subheading styling */
159
+ .subheading {
160
+ color: #a6c8e0; /* Muted sky blue */
161
+ font-size: 1.1rem;
162
+ margin-top: 20px;
163
+ margin-bottom: 15px;
164
+ font-weight: 400;
165
+ border-bottom: 1px dashed rgba(166, 200, 224, 0.4);
166
+ display: inline-block;
167
+ text-transform: uppercase;
168
+ letter-spacing: 1px;
169
+ font-family: 'Playfair Display', serif;
170
  }
171
 
172
+ /* Links */
173
+ a {
174
+ color: #89d8ff; /* Bright blue from city lights */
175
  text-decoration: none;
 
 
176
  }
177
 
178
+ a:hover {
179
+ text-decoration: underline;
180
+ color: #ffb399; /* Lighter sunset shade on hover */
181
  }
182
 
183
+ /* Container */
184
+ .container {
185
+ max-width: 1200px;
186
+ margin: 20px auto;
187
+ padding: 40px 20px;
188
+ background-color: #202838; /* Darker container background */
189
+ background-image:
190
+ radial-gradient(circle at 20% 80%, rgba(255, 153, 102, 0.04) 0%, transparent 50%), /* Sunset glow */
191
+ radial-gradient(circle at 80% 20%, rgba(137, 216, 255, 0.04) 0%, transparent 50%), /* Blue glow */
192
+ radial-gradient(circle at 40% 40%, rgba(224, 230, 241, 0.02) 0%, transparent 50%); /* Faint cloud/light glow */
193
+ min-height: calc(100vh - 40px);
194
+ border: 1px solid #ff9966; /* Sunset orange */
195
+ border-radius: 8px;
196
+ box-shadow: 0 8px 32px rgba(255, 153, 102, 0.07);
197
  }
198
 
199
+ /* Dropdown styling */
200
+ .dropdown-container {
201
  margin-top: 20px;
202
  }
203
 
204
+ .dropdown-summary {
205
  cursor: pointer;
206
  padding: 10px 0;
207
+ border-bottom: 1px dashed rgba(166, 200, 224, 0.4);
208
+ color: #a6c8e0; /* Muted sky blue */
209
  font-size: 1.1rem;
210
+ font-weight: 400;
211
+ text-transform: uppercase;
 
212
  letter-spacing: 1px;
213
+ font-family: 'Playfair Display', serif;
214
  list-style: none;
215
+ display: flex;
216
+ align-items: center;
217
  }
218
+
219
+ .dropdown-summary::-webkit-details-marker {
220
+ display: none;
221
  }
222
 
223
+ .dropdown-arrow {
224
+ color: #ff9966; /* Sunset orange */
225
  margin-right: 10px;
226
+ transition: transform 0.3s ease;
227
+ }
228
+
229
+ .dropdown-container[open] .dropdown-arrow {
230
+ transform: rotate(90deg);
231
  }
232
 
233
+ .dropdown-content {
234
  margin-top: 15px;
235
+ padding: 15px;
236
+ background-color: rgba(22, 26, 37, 0.6);
237
+ border-left: 2px solid #ff9966; /* Sunset orange */
238
+ box-shadow: 0 2px 10px rgba(255, 153, 102, 0.05);
239
  }
240
 
241
+ .config-title {
242
+ color: #a6c8e0; /* Muted sky blue */
243
  font-size: 1rem;
244
  margin-bottom: 10px;
245
+ font-family: 'Playfair Display', serif;
246
  text-transform: uppercase;
247
  letter-spacing: 1px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  }
249
  </style>
250
  <html lang="en">
 
252
  <meta charset="UTF-8">
253
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
254
  <title>Painted Fantasy</title>
255
+ <link href="https://fonts.googleapis.com/css2?family=Crimson+Text:wght@400;600;700&family=Playfair+Display:wght@400;700&display=swap" rel="stylesheet">
 
 
256
  </head>
257
  <body>
258
 
259
  <div class="container">
260
  <div class="title-container">
261
+ <!-- Glitchy overlay -->
262
  <div class="glitchy-overlay"></div>
263
+ <!-- Main title -->
264
  <div class="title-wrapper">
265
  <h1 class="title-main">
266
  <span class="title-prefix">PAINTED FANTASY</span>
267
+ <span class="lemonade-text">v2</span>
268
  </h1>
269
  <div class="title-subtitle">
270
+ <span class="subtitle-text">MS3.2-24B</span>
271
  </div>
272
  </div>
273
  </div>
274
 
275
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/nG4d50uWBkQEvSZvJZod8.png)
276
 
277
  <div class="section-container">
278
  <div class="section-header">
 
280
  <h2 class="section-title">Overview</h2>
281
  </div>
282
  <div class="section-content">
283
+ <p>This is an uncensored creative model intended to excel at character driven RP / ERP.</p>
284
+ <p>Version 2 feels quite different from the original, with a heavy focus on reducing repetition across conversations and improving instruction following.</p>
285
+ <p>Has a pretty unique writing style and sense of creativity (IMO). Pays the price with intermittent brain farts though.</p>
286
  </div>
287
  </div>
288
 
 
310
  <span>*In asterisks*</span>
311
  </div>
312
  </div>
313
+ <h3 class="subheading">Suggested Samplers</h3>
314
  <div class="data-box">
315
  <div class="data-row">
316
  <span class="data-arrow">></span>
317
  <span class="data-label">Temp:</span>
318
+ <span>0.5-0.6</span>
319
  </div>
320
  <div class="data-row">
321
  <span class="data-arrow">></span>
322
  <span class="data-label">MinP:</span>
323
+ <span>0.1</span>
324
  </div>
325
  <div class="data-row">
326
  <span class="data-arrow">></span>
 
350
  <h3 class="subheading">GGUF</h3>
351
  <div class="data-box">
352
  <div class="data-row">
353
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-v2-24B-GGUF">Static (mradermacher)</a>
 
354
  </div>
355
  <div class="data-row">
356
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-v2-24B-i1-GGUF">iMatrix (mradermacher)</a>
 
357
  </div>
358
  </div>
359
  </div>
 
361
  <h3 class="subheading">EXL3</h3>
362
  <div class="data-box">
363
  <div class="data-row">
364
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-3bpw">3bpw</a>
 
365
  </div>
366
  <div class="data-row">
367
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-3.5bpw">3.5bpw</a>
 
368
  </div>
369
  <div class="data-row">
370
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-4bpw">4bpw</a>
 
371
  </div>
372
  <div class="data-row">
373
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-5bpw">5bpw</a>
374
+ </div>
375
+ <div class="data-row">
376
+ <span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-6bpw">6bpw</a>
377
  </div>
378
  </div>
379
  </div>
 
383
  <div class="section-container">
384
  <div class="section-header">
385
  <div class="section-indicator"></div>
386
+ <h2 class="section-title">Training Process</h2>
387
  </div>
388
  <div class="section-content">
389
+ <p>Training process: SFT > DPO > KTO</p>
390
+ <p>SFT with RP/ERP, Stories and in character assistant data.</p>
391
+ <p>DPO focused on reducing repetition, misgendered characters and slop.</p>
392
+ <p>KTO focused on further reducing repetition and slop.</p>
393
+ <div class="dropdown-container">
394
+ <details>
395
+ <summary class="dropdown-summary">
396
+ <span class="dropdown-arrow">></span>
397
+ Axolotl configs
398
+ </summary>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  <div class="dropdown-content">
400
  <p>Not optimized for cost / performance efficiency, YMMV.</p>
401
+ <div class="config-title">SFT 1*H100</div>
402
  <pre><code>&#35; ====================
403
  &#35; MODEL CONFIGURATION
404
  &#35; ====================
405
+ base_model: ConicCat/Mistral-Small-3.2-AntiRep-24B
406
+ model_type: AutoModelForCausalLM
407
  tokenizer_type: AutoTokenizer
408
  chat_template: mistral_v7_tekken
409
+ <br>
410
  &#35; ====================
411
  &#35; DATASET CONFIGURATION
412
  &#35; ====================
413
  datasets:
414
+ - path: ./dataset.jsonl
415
+ type: chat_template
416
+ split: train
417
+ chat_template_strategy: tokenizer
418
+ field_messages: messages
419
+ message_property_mappings:
420
+ role: role
421
+ content: content
422
+ roles:
423
+ user: ["user"]
424
+ assistant: ["assistant"]
425
+ system: ["system"]
426
+
427
  dataset_prepared_path:
428
  train_on_inputs: false &#35; Only train on assistant responses
429
+
430
  &#35; ====================
431
  &#35; QLORA CONFIGURATION
432
  &#35; ====================
433
  adapter: qlora
434
  load_in_4bit: true
435
+ lora_r: 128
436
+ lora_alpha: 128
437
+ lora_dropout: 0.1
438
  lora_target_linear: true
439
  &#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
440
+
441
  &#35; ====================
442
  &#35; TRAINING PARAMETERS
443
  &#35; ====================
444
+ num_epochs: 3
445
+ micro_batch_size: 8
446
  gradient_accumulation_steps: 1
447
+ learning_rate: 1e-5
448
  optimizer: paged_adamw_8bit
449
  lr_scheduler: rex
450
  warmup_ratio: 0.05
451
  weight_decay: 0.01
452
  max_grad_norm: 1.0
453
+
454
  &#35; ====================
455
  &#35; SEQUENCE &amp; PACKING
456
  &#35; ====================
457
+ sequence_len: 8192
458
  sample_packing: true
459
  eval_sample_packing: false
460
  pad_to_sequence_len: true
461
+
462
  &#35; ====================
463
  &#35; HARDWARE OPTIMIZATIONS
464
  &#35; ====================
465
  bf16: auto
466
  flash_attention: true
467
+ gradient_checkpointing: true
468
+
 
 
 
 
 
 
 
 
 
 
 
 
469
  &#35; ====================
470
  &#35; EVALUATION &amp; CHECKPOINTING
471
  &#35; ====================
472
  save_strategy: steps
473
+ save_steps: 20
474
  save_total_limit: 5 &#35; Keep best + last few checkpoints
475
  load_best_model_at_end: true
476
+ metric_for_best_model: eval_loss
477
  greater_is_better: false
478
+
479
  &#35; ====================
480
  &#35; LOGGING &amp; OUTPUT
481
  &#35; ====================
482
+ output_dir: ./PT-SFT_1
483
  logging_steps: 2
484
  save_safetensors: true
485
+
486
  &#35; ====================
487
  &#35; WANDB TRACKING
488
  &#35; ====================
489
+ wandb_project: PF-SFT
490
+ wandb_entity: your_entity
491
+ wandb_name: run_name<p></p></code></pre>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  </div>
493
  </details>
494
  </div>