c2cite / launch.py
loadingy's picture
first push
51be264
#!/usr/bin/env python3
import json
import os
import datasets as hf_datasets
import fire
file_path = "templates"
work_path = os.path.dirname(os.path.abspath(__file__))
def load_dataset(path: str):
if path.endswith(".json") or path.endswith(".jsonl"):
data = hf_datasets.load_dataset("json", data_files=path)
elif ":" in path:
split = path.split(":")
data = hf_datasets.load_dataset(split[0], split[1])
else:
data = hf_datasets.load_dataset(path)
return data
def compose_command(
base_model: str,
config: str = "moe_peft.json",
inference: bool = False,
evaluate: bool = False,
load_adapter: bool = False,
random_seed: int = 42,
cuda_device: int = None,
log_file: str = "moe_peft.log",
overwrite: bool = False,
attn_impl: str = None,
sliding_window: bool = False,
use_cache: bool = True,
quantize: str = None,
dtype: str = "bf16",
tf32: bool = False,
):
assert quantize in (None, "4bit", "8bit")
assert dtype in ("fp32", "fp16", "bf16")
command = "python moe_peft.py"
if cuda_device is not None:
command = f"CUDA_VISIBLE_DEVICES={cuda_device} " + command
command += f" --base_model {base_model}"
command += f" --config {config}"
if inference:
command += " --inference"
if evaluate:
command += " --evaluate"
if load_adapter:
command += " --load_adapter"
command += f" --seed {random_seed}"
command += f" --log_file {log_file}"
if overwrite:
command += " --overwrite"
if attn_impl is not None:
command += f" --attn_impl {attn_impl}"
if sliding_window:
command += " --sliding_window"
if not use_cache:
command += " --disable_cache"
if quantize is not None:
command += f" --load_{quantize}"
if dtype in ("fp16", "bf16"):
command += f" --{dtype}"
if tf32:
command += " --tf32"
return os.system(command)
def update_record(dict_: dict, key_, value_):
if value_ is not None:
dict_[key_] = value_
def gen_config(
# essential
template: str,
tasks: str,
# optional
adapter_name: str = None,
file_name: str = "moe_peft.json",
data_path: str = None,
multi_task: bool = False,
append: bool = False,
# default value provided by template
prompt_template: str = None,
cutoff_len: int = None,
save_step: int = None,
lr_scheduler: str = None,
warmup_steps: float = None,
learning_rate: float = None,
batch_size: int = None,
micro_batch_size: int = None,
evaluate_steps: int = None,
evaluate_batch_size: int = None,
num_epochs: int = None,
loraplus_lr_ratio: float = None,
use_dora: bool = None,
use_rslora: bool = None,
group_by_length: bool = None,
):
import moe_peft
template = f"{work_path}{os.sep}{file_path}{os.sep}{template}.json"
config_dir = f"{work_path}{os.sep}{file_name}"
with open(template, "r", encoding="utf8") as fp:
template_obj = json.load(fp)
update_record(template_obj, "cutoff_len", cutoff_len)
update_record(template_obj, "save_step", save_step)
lora_templates = template_obj["lora"]
template_obj["lora"] = []
if append:
with open(config_dir, "r", encoding="utf8") as fp:
orig_config = json.load(fp)
template_obj["lora"] = orig_config["lora"]
index = len(template_obj["lora"])
if multi_task:
task_list = [tasks]
path_list = [data_path]
else:
task_list = tasks.split(";")
path_list = (
[None] * len(task_list) if data_path is None else data_path.split(";")
)
for lora_template in lora_templates:
for task_name, data_path in zip(task_list, path_list):
lora_config = lora_template.copy()
if multi_task:
lora_config["name"] = f"multi_task_{index}"
lora_config["task_name"] = task_name
elif task_name not in moe_peft.tasks.task_dict:
try:
load_dataset(task_name)
except:
raise RuntimeError(f"Task name '{task_name}' not exist.")
lora_config["name"] = f"casual_{index}"
lora_config["task_name"] = "casual"
lora_config["data"] = task_name
lora_config["prompt"] = "alpaca"
else:
lora_config["name"] = (
f"{task_name.split(':')[-1].replace('-', '_')}_{index}"
)
lora_config["task_name"] = task_name
if adapter_name is not None:
lora_config["name"] = f"{adapter_name}_{index}"
update_record(lora_config, "data", data_path)
update_record(lora_config, "prompt", prompt_template)
update_record(lora_config, "scheduler_type", lr_scheduler)
update_record(lora_config, "warmup_steps", warmup_steps)
update_record(lora_config, "lr", learning_rate)
update_record(lora_config, "batch_size", batch_size)
update_record(lora_config, "micro_batch_size", micro_batch_size)
update_record(lora_config, "evaluate_steps", evaluate_steps)
update_record(lora_config, "evaluate_batch_size", evaluate_batch_size)
update_record(lora_config, "num_epochs", num_epochs)
update_record(lora_config, "loraplus_lr_ratio", loraplus_lr_ratio)
update_record(lora_config, "use_dora", use_dora)
update_record(lora_config, "use_rslora", use_rslora)
update_record(lora_config, "group_by_length", group_by_length)
template_obj["lora"].append(lora_config)
index += 1
with open(config_dir, "w") as f:
json.dump(template_obj, f, indent=4)
print(f"Configuration file saved to {config_dir}")
def avail_tasks():
import moe_peft
print("Available task names:")
for name in moe_peft.tasks.task_dict.keys():
print(f" {name}")
print("These tasks can be trained and evaluated automatically using MoE-PEFT.")
def show_help():
print(
"""
Launcher of MoE-PEFT
Usage: python launch.py COMMAND [ARGS...]
Command:
gen generate a configuration from template
run start a task with existed configuration
avail List all available tasks
help Show help information
Arguments of gen:
--template lora, mixlora, etc.
--tasks task names separate by ';'
--adapter_name default is task name
--file_name default is 'moe_peft.json'
--data_path path to input data
--multi_task multi-task training
--append append to existed config
--prompt_template [alpaca]
--cutoff_len
--save_step
--warmup_steps
--learning_rate
--loraplus_lr_ratio
--batch_size
--micro_batch_size
--evaluate_batch_size
--num_epochs
--use_dora
--use_rslora
--group_by_length
Arguments of run:
--base_model model name or path
--config [moe_peft.json]
--load_adapter [false]
--random_seed [42]
--cuda_device [0]
--log_file [moe_peft.log]
--overwrite [false]
--attn_impl [eager]
--sliding_window [false]
--use_cache [true]
--quantize [none], 4bit, 8bit
--dtype [bf16], fp16, fp32
--tf32 [false]
"""
)
command_map = {
"gen": gen_config,
"run": compose_command,
"avail": avail_tasks,
"help": show_help,
}
def main(command: str = "help", *args, **kwargs):
command_map[command](*args, **kwargs)
if __name__ == "__main__":
fire.Fire(main)