Upload LoRA adapter + config + train.toml

Browse files

Files changed (3) hide show

adapter_config.json +74 -0
adapter_model.safetensors +3 -0
train.toml +85 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "20.attn.to_out",
+    "23.attn.to_out",
+    "17.attn.to_out",
+    "16.attn.to_out",
+    "10.attn.to_out",
+    "add_v_proj",
+    "18.attn.to_out",
+    "single_transformer_blocks.1.attn.to_out",
+    "to_qkv_mlp_proj",
+    "to_add_out",
+    "single_transformer_blocks.6.attn.to_out",
+    "22.attn.to_out",
+    "single_transformer_blocks.7.attn.to_out",
+    "21.attn.to_out",
+    "linear_in",
+    "15.attn.to_out",
+    "single_transformer_blocks.4.attn.to_out",
+    "to_k",
+    "add_q_proj",
+    "to_q",
+    "to_v",
+    "single_transformer_blocks.5.attn.to_out",
+    "13.attn.to_out",
+    "single_transformer_blocks.2.attn.to_out",
+    "11.attn.to_out",
+    "9.attn.to_out",
+    "single_transformer_blocks.3.attn.to_out",
+    "12.attn.to_out",
+    "add_k_proj",
+    "to_out.0",
+    "14.attn.to_out",
+    "single_transformer_blocks.0.attn.to_out",
+    "linear_out",
+    "8.attn.to_out",
+    "19.attn.to_out"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e01c07cca103edfc486ae782a97671064979879f638a962d23884671715cb57
+size 174103488

train.toml ADDED Viewed

	@@ -0,0 +1,85 @@

+# Output path for training runs. Each training run makes a new directory in here.
+output_dir = "/content/outputs"
+# Dataset config file.
+dataset = "/content/run_cfg/dataset.toml"
+# training settings
+epochs = 1
+micro_batch_size_per_gpu = 1
+pipeline_stages = 1
+gradient_accumulation_steps = 1
+gradient_clipping = 1.0
+warmup_steps = 0
+# eval settings
+eval_every_n_epochs = 1
+eval_before_first_step = true
+eval_micro_batch_size_per_gpu = 1
+eval_gradient_accumulation_steps = 1
+# misc settings
+save_every_n_epochs = 1
+checkpoint_every_n_minutes = 120
+activation_checkpointing = true
+partition_method = 'parameters'
+save_dtype = 'bfloat16'
+caching_batch_size = 1
+steps_per_print = 1
+# Flux2-Klein with NEW nested config structure.
+[model]
+dtype = 'bfloat16'
+type = 'flux_2_klein'
+[model.paths]
+diffusers = "/content/models/flux2_klein_base_9b"
+[model.vae]
+latent_mode = 'sample'
+[model.lora]
+format = 'ai_toolkit_peft'
+# --- DIFFUSERS PARITY PIPELINE ---
+# Uses diffusers-style timestep sampling + SD3 weighting.
+[model.train]
+# Explicitly select the diffusers-parity pipeline.
+pipeline = 'diffusers'
+# These are the same knobs exposed by
+# external/diffusers/examples/dreambooth/train_dreambooth_lora_flux2_klein.py
+# when using compute_density_for_timestep_sampling + compute_loss_weighting_for_sd3.
+# Diffusers-style knobs (mirrors the official diffusers trainer).
+[model.train.diffusers]
+# Enable diffusers-parity even if pipeline is not explicitly set.
+enabled = true
+# weighting_scheme: none | sigma_sqrt | logit_normal | mode | cosmap
+weighting_scheme = 'none'
+logit_mean = 0.0
+logit_std = 1.0
+mode_scale = 1.29
+# num_train_timesteps typically 1000 in diffusers schedulers
+num_train_timesteps = 1000
+# The rest of the training config stays the same as native.
+[adapter]
+type = 'lora'
+rank = 32
+dtype = 'bfloat16'
+[optimizer]
+type = 'adamw_optimi'
+lr = 2e-5
+betas = [0.9, 0.99]
+weight_decay = 0.01
+eps = 1e-8
+[monitoring]
+enable_wandb = false
+wandb_api_key = ''
+wandb_tracker_name = ''
+wandb_run_name = ''