ApacheOne commited on
Commit
a859e05
·
verified ·
1 Parent(s): 728da85

Upload LoRA adapter + config + train.toml

Browse files
Files changed (3) hide show
  1. adapter_config.json +74 -0
  2. adapter_model.safetensors +3 -0
  3. train.toml +85 -0
adapter_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": null,
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": false,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.0,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 32,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "20.attn.to_out",
33
+ "23.attn.to_out",
34
+ "17.attn.to_out",
35
+ "16.attn.to_out",
36
+ "10.attn.to_out",
37
+ "add_v_proj",
38
+ "18.attn.to_out",
39
+ "single_transformer_blocks.1.attn.to_out",
40
+ "to_qkv_mlp_proj",
41
+ "to_add_out",
42
+ "single_transformer_blocks.6.attn.to_out",
43
+ "22.attn.to_out",
44
+ "single_transformer_blocks.7.attn.to_out",
45
+ "21.attn.to_out",
46
+ "linear_in",
47
+ "15.attn.to_out",
48
+ "single_transformer_blocks.4.attn.to_out",
49
+ "to_k",
50
+ "add_q_proj",
51
+ "to_q",
52
+ "to_v",
53
+ "single_transformer_blocks.5.attn.to_out",
54
+ "13.attn.to_out",
55
+ "single_transformer_blocks.2.attn.to_out",
56
+ "11.attn.to_out",
57
+ "9.attn.to_out",
58
+ "single_transformer_blocks.3.attn.to_out",
59
+ "12.attn.to_out",
60
+ "add_k_proj",
61
+ "to_out.0",
62
+ "14.attn.to_out",
63
+ "single_transformer_blocks.0.attn.to_out",
64
+ "linear_out",
65
+ "8.attn.to_out",
66
+ "19.attn.to_out"
67
+ ],
68
+ "target_parameters": null,
69
+ "task_type": null,
70
+ "trainable_token_indices": null,
71
+ "use_dora": false,
72
+ "use_qalora": false,
73
+ "use_rslora": false
74
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e01c07cca103edfc486ae782a97671064979879f638a962d23884671715cb57
3
+ size 174103488
train.toml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Output path for training runs. Each training run makes a new directory in here.
2
+ output_dir = "/content/outputs"
3
+
4
+ # Dataset config file.
5
+ dataset = "/content/run_cfg/dataset.toml"
6
+
7
+ # training settings
8
+
9
+ epochs = 1
10
+ micro_batch_size_per_gpu = 1
11
+ pipeline_stages = 1
12
+ gradient_accumulation_steps = 1
13
+ gradient_clipping = 1.0
14
+ warmup_steps = 0
15
+
16
+ # eval settings
17
+
18
+ eval_every_n_epochs = 1
19
+ eval_before_first_step = true
20
+ eval_micro_batch_size_per_gpu = 1
21
+ eval_gradient_accumulation_steps = 1
22
+
23
+ # misc settings
24
+
25
+ save_every_n_epochs = 1
26
+ checkpoint_every_n_minutes = 120
27
+ activation_checkpointing = true
28
+ partition_method = 'parameters'
29
+ save_dtype = 'bfloat16'
30
+ caching_batch_size = 1
31
+ steps_per_print = 1
32
+
33
+ # Flux2-Klein with NEW nested config structure.
34
+ [model]
35
+ dtype = 'bfloat16'
36
+ type = 'flux_2_klein'
37
+
38
+ [model.paths]
39
+ diffusers = "/content/models/flux2_klein_base_9b"
40
+
41
+ [model.vae]
42
+ latent_mode = 'sample'
43
+
44
+ [model.lora]
45
+ format = 'ai_toolkit_peft'
46
+
47
+ # --- DIFFUSERS PARITY PIPELINE ---
48
+ # Uses diffusers-style timestep sampling + SD3 weighting.
49
+
50
+ [model.train]
51
+ # Explicitly select the diffusers-parity pipeline.
52
+ pipeline = 'diffusers'
53
+
54
+ # These are the same knobs exposed by
55
+ # external/diffusers/examples/dreambooth/train_dreambooth_lora_flux2_klein.py
56
+ # when using compute_density_for_timestep_sampling + compute_loss_weighting_for_sd3.
57
+ # Diffusers-style knobs (mirrors the official diffusers trainer).
58
+ [model.train.diffusers]
59
+ # Enable diffusers-parity even if pipeline is not explicitly set.
60
+ enabled = true
61
+ # weighting_scheme: none | sigma_sqrt | logit_normal | mode | cosmap
62
+ weighting_scheme = 'none'
63
+ logit_mean = 0.0
64
+ logit_std = 1.0
65
+ mode_scale = 1.29
66
+ # num_train_timesteps typically 1000 in diffusers schedulers
67
+ num_train_timesteps = 1000
68
+
69
+ # The rest of the training config stays the same as native.
70
+ [adapter]
71
+ type = 'lora'
72
+ rank = 32
73
+ dtype = 'bfloat16'
74
+ [optimizer]
75
+ type = 'adamw_optimi'
76
+ lr = 2e-5
77
+ betas = [0.9, 0.99]
78
+ weight_decay = 0.01
79
+ eps = 1e-8
80
+
81
+ [monitoring]
82
+ enable_wandb = false
83
+ wandb_api_key = ''
84
+ wandb_tracker_name = ''
85
+ wandb_run_name = ''