{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "## Fine-Tuning script for master thesis \"*AI-based Image Generation to Support Easy Language*\"\n", "\n", "This is an adapted version of this [colab](https://colab.research.google.com/github/Linaqruf/kohya-trainer/blob/main/kohya-LoRA-dreambooth.ipynb) and makes use of the fine-tuning script from this [repository](https://github.com/Linaqruf/kohya-trainer) (commit: `3d494d8`).\n", "\n", "Execute all cells to reproduce the weights used in the thesis. T4 and disabled \"extended ram\" were used during the final training run of the thesis." ], "metadata": { "id": "HWkM_jf5v42U" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "nb06s6qR0FFP" }, "outputs": [], "source": [ "# @title ## Install Dependencies\n", "import os\n", "import zipfile\n", "import shutil\n", "import time\n", "from subprocess import getoutput\n", "from IPython.utils import capture\n", "from google.colab import drive\n", "\n", "\n", "%store -r\n", "\n", "# root_dir\n", "root_dir = \"/content\"\n", "repo_dir = os.path.join(root_dir, \"kohya-trainer\")\n", "training_dir = os.path.join(root_dir, \"LoRA\")\n", "pretrained_model = os.path.join(root_dir, \"pretrained_model\")\n", "vae_dir = os.path.join(root_dir, \"vae\")\n", "config_dir = os.path.join(training_dir, \"config\")\n", "\n", "# repo_dir\n", "accelerate_config = os.path.join(repo_dir, \"accelerate_config/config.yaml\")\n", "tools_dir = os.path.join(repo_dir, \"tools\")\n", "finetune_dir = os.path.join(repo_dir, \"finetune\")\n", "\n", "# output_dir\n", "output_to_drive = False\n", "output_dir = \"/content/LoRA/output\" if not output_to_drive else \"/content/drive/MyDrive/LoRA/output\"\n", "sample_dir = os.path.join(output_dir, \"sample\")\n", "\n", "for store in [\n", " \"root_dir\",\n", " \"repo_dir\",\n", " \"training_dir\",\n", " \"pretrained_model\",\n", " \"vae_dir\",\n", " \"accelerate_config\",\n", " \"tools_dir\",\n", " \"finetune_dir\",\n", " \"config_dir\",\n", " \"output_dir\",\n", " \"sample_dir\"\n", "]:\n", " with capture.capture_output() as cap:\n", " %store {store}\n", " del cap\n", "\n", "repo_url = \"https://github.com/Linaqruf/kohya-trainer\"\n", "submission_hash = \"3d494d83e4aea273f64716286a26d162a8df3317\"\n", "branch = \"\"\n", "mount_drive = True\n", "verbose = False\n", "\n", "def read_file(filename):\n", " with open(filename, \"r\") as f:\n", " contents = f.read()\n", " return contents\n", "\n", "\n", "def write_file(filename, contents):\n", " with open(filename, \"w\") as f:\n", " f.write(contents)\n", "\n", "\n", "def clone_repo(url):\n", " if not os.path.exists(repo_dir):\n", " os.chdir(root_dir)\n", " !git clone {url} {repo_dir}\n", " !git checkout {submission_hash}\n", " else:\n", " os.chdir(repo_dir)\n", " !git checkout {submission_hash}\n", "\n", "def mount_drive():\n", " if not os.path.exists(\"/content/drive\"):\n", " drive.mount(\"/content/drive\")\n", "\n", "def set_environment_variables():\n", " os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"\n", " os.environ[\"BITSANDBYTES_NOWELCOME\"] = \"1\"\n", " os.environ[\"SAFETENSORS_FAST_GPU\"] = \"1\"\n", "\n", "def adjust_ld_library_path(cuda_path):\n", " ld_library_path = os.environ.get(\"LD_LIBRARY_PATH\", \"\")\n", " os.environ[\"LD_LIBRARY_PATH\"] = f\"{ld_library_path}:{cuda_path}\"\n", "\n", "def make_dirs():\n", " for dir in [\n", " training_dir,\n", " config_dir,\n", " pretrained_model,\n", " vae_dir,\n", " output_dir,\n", " sample_dir\n", " ]:\n", " os.makedirs(dir, exist_ok=True)\n", "\n", "def install_dependencies(verbose=True, accelerate_config=\"accelerate_config.yaml\"):\n", " \"\"\"Install all requirements and dependencies\"\"\"\n", " gpu_info = getoutput(\"nvidia-smi\")\n", " if \"T4\" in gpu_info:\n", " update_gpu_configuration()\n", "\n", " install_requirements(verbose)\n", " install_pytorch_libraries(verbose)\n", "\n", " configure_accelerate(accelerate_config)\n", "\n", "def update_gpu_configuration():\n", " \"\"\"Modify the utility file to use GPU (replace 'cpu' with 'cuda')\"\"\"\n", " !sed -i \"s@cpu@cuda@\" library/model_util.py\n", "\n", "def install_requirements(verbose):\n", " \"\"\"Install Python packages from requirements.txt\"\"\"\n", " !pip install {\"-q\" if not verbose else \"\"} --upgrade -r requirements.txt\n", "\n", "def install_pytorch_libraries(verbose):\n", " \"\"\"Install specific versions of PyTorch and related libraries\"\"\"\n", " !pip install {\"-q\" if not verbose else \"\"} torch==2.0.0+cu118 torchvision==0.15.1+cu118 torchaudio==2.0.1+cu118 torchtext==0.15.1 torchdata==0.6.0 xformers==0.0.19 triton==2.0.0 --extra-index-url https://download.pytorch.org/whl/cu118 -U\n", "\n", "def configure_accelerate(accelerate_config):\n", " \"\"\"Configure Accelerate if the specified config file does not exist\"\"\"\n", " from accelerate.utils import write_basic_config\n", "\n", " if not os.path.exists(accelerate_config):\n", " write_basic_config(save_location=accelerate_config)\n", "\n", "\n", "def main():\n", " \"\"\"Setup directories and environment specific variables\"\"\"\n", " os.chdir(root_dir)\n", "\n", " if mount_drive:\n", " mount_drive()\n", "\n", " make_dirs()\n", "\n", " clone_repo(repo_url)\n", "\n", " os.chdir(repo_dir)\n", "\n", " !apt install aria2 {\"-qq\" if not verbose else \"\"}\n", "\n", " install_dependencies(verbose=verbose, accelerate_config=accelerate_config)\n", " time.sleep(3)\n", "\n", " set_environment_variables()\n", "\n", " cuda_path = \"/usr/local/cuda-11.8/targets/x86_64-linux/lib/\"\n", " adjust_ld_library_path(cuda_path)\n", "\n", "main()\n" ] }, { "cell_type": "code", "source": [ "# @title ## Download Model and VAE\n", "\n", "%store -r\n", "\n", "os.chdir(root_dir)\n", "\n", "hf_token = \"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\"\n", "user_header = f'\"Authorization: Bearer {hf_token}\"'\n", "\n", "# model\n", "model_name = \"Stable-Diffusion-v1-5.safetensors\"\n", "model_url = \"https://huggingface.co/bomdey/plAInlang/resolve/main/stable_diffusion_1_5-pruned.safetensors\"\n", "\n", "# Download pretrained model from huggingface\n", "pretrained_model_name_or_path = os.path.join(pretrained_model, model_name)\n", "if not os.path.exists(pretrained_model_name_or_path):\n", " !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -d {pretrained_model} -o {model_name} \"{model_url}\"\n", "\n", "# vae\n", "vae_name = \"stablediffusion.vae.pt\"\n", "vae_url = \"https://huggingface.co/bomdey/plAInlang/resolve/main/vae-ft-mse-840000-ema-pruned.ckpt\"\n", "\n", "# Download vae from huggingface\n", "vae = os.path.join(vae_dir, vae_name)\n", "if not os.path.exists(vae):\n", " !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -d {vae_dir} -o {vae_name} \"{vae_url}\"" ], "metadata": { "id": "cjt6t_ob01g7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ## Load Dataset from Huggingface\n", "\n", "%store -r\n", "\n", "dataset_submission_hash = \"731fd74dbed6197f88d935828608fff4a3b3299d\"\n", "hf_dataset_repo = \"https://huggingface.co/datasets/bomdey/plAInLang/\"\n", "data_destination_dir = \"/content/dataset\"\n", "\n", "if not os.path.exists(data_destination_dir):\n", " !git clone {hf_dataset_repo} {data_destination_dir}\n", " time.sleep(3)\n", "\n", "os.chdir(data_destination_dir)\n", "!git checkout {dataset_submission_hash}\n", "\n", "%store data_destination_dir\n", "\n", "os.chdir(root_dir)\n", "\n", "# Setup directory for training data\n", "train_data_dir = data_destination_dir\n", "\n", "%store train_data_dir" ], "metadata": { "id": "llTHbemwhzTv" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ## Dataset Config\n", "import toml\n", "import glob\n", "\n", "dataset_repeats = 10\n", "activation_word = \"pl41nl4ng\"\n", "caption_extension = \".txt\"\n", "resolution = 512\n", "flip_aug = False\n", "keep_tokens = 0\n", "\n", "def find_image_files(path):\n", " \"\"\"Get all images from a given path\"\"\"\n", " supported_extensions = (\".png\", \".jpg\", \".jpeg\", \".webp\", \".bmp\")\n", " return [file for file in glob.glob(path + '/**/*', recursive=True) if file.lower().endswith(supported_extensions)]\n", "\n", "def process_data_dir(data_dir, default_num_repeats, default_class_token):\n", " \"\"\"Process a data directory and create subsets for image datasets\"\"\"\n", " subsets = []\n", " images = find_image_files(data_dir)\n", " if images:\n", " subsets.append({\n", " \"image_dir\": data_dir,\n", " \"class_tokens\": default_class_token,\n", " \"num_repeats\": default_num_repeats,\n", " **({}),\n", " })\n", "\n", " return subsets\n", "\n", "\n", "train_subsets = process_data_dir(train_data_dir, dataset_repeats, activation_word)\n", "config = {\n", " \"general\": {\n", " \"enable_bucket\": True,\n", " \"caption_extension\": caption_extension,\n", " \"shuffle_caption\": True,\n", " \"keep_tokens\": keep_tokens,\n", " \"bucket_reso_steps\": 64,\n", " \"bucket_no_upscale\": False,\n", " },\n", " \"datasets\": [\n", " {\n", " \"resolution\": resolution,\n", " \"min_bucket_reso\": 256,\n", " \"max_bucket_reso\": 1024,\n", " \"caption_dropout_rate\": 0,\n", " \"caption_tag_dropout_rate\": 0,\n", " \"caption_dropout_every_n_epochs\": 0,\n", " \"flip_aug\": flip_aug,\n", " \"color_aug\": False,\n", " \"face_crop_aug_range\": None,\n", " \"subsets\": train_subsets,\n", " }\n", " ],\n", "}\n", "\n", "\n", "dataset_config = os.path.join(config_dir, \"dataset_config.toml\")\n", "config_str = toml.dumps(config)\n", "with open(dataset_config, \"w\") as f:\n", " f.write(config_str)\n", "\n", "print(config_str)" ], "metadata": { "id": "7lgAaowm3uMV" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# @title ## Training Config\n", "\n", "import toml\n", "import os\n", "\n", "project_name = \"pl41n-l4ng_final\"\n", "%store project_name\n", "\n", "%store -r\n", "\n", "# Lora and Optimizer\n", "conv_dim = 8\n", "conv_alpha = 8\n", "network_dim = 256\n", "network_alpha = 256\n", "network_weight = \"\"\n", "network_module = \"networks.lora\"\n", "network_args = \"\"\n", "min_snr_gamma = 5\n", "optimizer_type = \"AdamW8bit\" #\n", "optimizer_args = \"\"\n", "unet_lr = 5e-6\n", "text_encoder_lr = 25e-7\n", "lr_scheduler = \"constant\"\n", "lr_warmup_steps = 0\n", "lr_scheduler_num_cycles = 0\n", "lr_scheduler_power = 0\n", "\n", "# Training\n", "lowram = True\n", "enable_sample_prompt = True\n", "sampler = \"euler_a\"\n", "noise_offset = 0.0\n", "num_epochs = 30\n", "vae_batch_size = 4\n", "train_batch_size = 2\n", "mixed_precision = \"fp16\"\n", "save_precision = \"fp16\"\n", "save_n_epochs_type = \"save_every_n_epochs\"\n", "save_n_epochs_type_value = 1\n", "save_model_as = \"safetensors\"\n", "max_token_length = 225\n", "clip_skip = 1\n", "gradient_checkpointing = False\n", "gradient_accumulation_steps = 1\n", "seed = 42\n", "logging_dir = \"/content/LoRA/logs\"\n", "prior_loss_weight = 1.0\n", "\n", "os.chdir(repo_dir)\n", "\n", "sample_str = f\"\"\"\n", " illustration in the style of pl41nl4ng, a man with glasses and a tie, solo, looking at viewer, smile, closed mouth, short hair, simple background, black background, shirt, 1boy, portrait, male focus, glasses \\\n", " --n lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry \\\n", " --w 512 \\\n", " --h 512 \\\n", " --l 7 \\\n", " --s 28\n", "\"\"\"\n", "\n", "config = {\n", " \"model_arguments\": {\n", " \"v2\": False,\n", " \"v_parameterization\": False,\n", " \"pretrained_model_name_or_path\": pretrained_model_name_or_path,\n", " \"vae\": vae,\n", " },\n", " \"additional_network_arguments\": {\n", " \"no_metadata\": False,\n", " \"unet_lr\": float(unet_lr),\n", " \"text_encoder_lr\": float(text_encoder_lr),\n", " \"network_weights\": network_weight,\n", " \"network_module\": network_module,\n", " \"network_dim\": network_dim,\n", " \"network_alpha\": network_alpha,\n", " \"network_args\": None,\n", " \"network_train_unet_only\": False,\n", " \"network_train_text_encoder_only\": False,\n", " \"training_comment\": None,\n", " },\n", " \"optimizer_arguments\": {\n", " \"min_snr_gamma\": min_snr_gamma,\n", " \"optimizer_type\": optimizer_type,\n", " \"learning_rate\": unet_lr,\n", " \"max_grad_norm\": 1.0,\n", " \"optimizer_args\": None,\n", " \"lr_scheduler\": lr_scheduler,\n", " \"lr_warmup_steps\": lr_warmup_steps,\n", " \"lr_scheduler_num_cycles\": None,\n", " \"lr_scheduler_power\": None,\n", " },\n", " \"dataset_arguments\": {\n", " \"cache_latents\": True,\n", " \"debug_dataset\": False,\n", " \"vae_batch_size\": vae_batch_size,\n", " },\n", " \"training_arguments\": {\n", " \"output_dir\": output_dir,\n", " \"output_name\": project_name,\n", " \"save_precision\": save_precision,\n", " \"save_every_n_epochs\": save_n_epochs_type_value,\n", " \"save_n_epoch_ratio\": None,\n", " \"save_last_n_epochs\": None,\n", " \"save_state\": None,\n", " \"save_last_n_epochs_state\": None,\n", " \"resume\": None,\n", " \"train_batch_size\": train_batch_size,\n", " \"max_token_length\": 225,\n", " \"mem_eff_attn\": False,\n", " \"xformers\": True,\n", " \"max_train_epochs\": num_epochs,\n", " \"max_data_loader_n_workers\": 8,\n", " \"persistent_data_loader_workers\": True,\n", " \"seed\": seed if seed > 0 else None,\n", " \"gradient_checkpointing\": gradient_checkpointing,\n", " \"gradient_accumulation_steps\": gradient_accumulation_steps,\n", " \"mixed_precision\": mixed_precision,\n", " \"clip_skip\": clip_skip,\n", " \"logging_dir\": logging_dir,\n", " \"log_prefix\": project_name,\n", " \"noise_offset\": None,\n", " \"lowram\": lowram,\n", " },\n", " \"sample_prompt_arguments\": {\n", " \"sample_every_n_steps\": None,\n", " \"sample_every_n_epochs\": 1,\n", " \"sample_sampler\": sampler,\n", " },\n", " \"dreambooth_arguments\": {\n", " \"prior_loss_weight\": 1.0,\n", " },\n", " \"saving_arguments\": {\n", " \"save_model_as\": save_model_as\n", " },\n", "}\n", "\n", "config_path = os.path.join(config_dir, \"config_file.toml\")\n", "prompt_path = os.path.join(config_dir, \"sample_prompt.txt\")\n", "\n", "for key in config:\n", " if isinstance(config[key], dict):\n", " for sub_key in config[key]:\n", " if config[key][sub_key] == \"\":\n", " config[key][sub_key] = None\n", " elif config[key] == \"\":\n", " config[key] = None\n", "\n", "config_str = toml.dumps(config)\n", "\n", "def write_file(filename, contents):\n", " with open(filename, \"w\") as f:\n", " f.write(contents)\n", "\n", "write_file(config_path, config_str)\n", "write_file(prompt_path, sample_str)\n", "\n", "print(config_str)" ], "metadata": { "id": "igwhMSLQ5Dz_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title ## Start Training\n", "\n", "sample_prompt = \"/content/LoRA/config/sample_prompt.txt\"\n", "config_file = \"/content/LoRA/config/config_file.toml\"\n", "dataset_config = \"/content/LoRA/config/dataset_config.toml\"\n", "\n", "accelerate_conf = {\n", " \"config_file\" : accelerate_config,\n", " \"num_cpu_threads_per_process\" : 1,\n", "}\n", "\n", "train_conf = {\n", " \"sample_prompts\" : sample_prompt,\n", " \"dataset_config\" : dataset_config,\n", " \"config_file\" : config_file\n", "}\n", "\n", "def train(config):\n", " \"\"\"Create training arguments\"\"\"\n", " args = \"\"\n", " for k, v in config.items():\n", " if isinstance(v, str):\n", " args += f'--{k}=\"{v}\" '\n", " elif isinstance(v, int) and not isinstance(v, bool):\n", " args += f\"--{k}={v} \"\n", "\n", " return args\n", "\n", "\n", "accelerate_args = train(accelerate_conf)\n", "train_args = train(train_conf)\n", "final_args = f\"accelerate launch {accelerate_args} train_network.py {train_args}\"\n", "\n", "os.chdir(repo_dir)\n", "!{final_args}" ], "metadata": { "id": "0hyHFH845al3" }, "execution_count": null, "outputs": [] } ] }