Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,55 +13,25 @@ import torch
|
|
| 13 |
import yaml
|
| 14 |
from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
|
| 15 |
from mergekit.config import MergeConfiguration
|
| 16 |
-
|
| 17 |
from clean_community_org import garbage_collect_empty_models
|
| 18 |
|
| 19 |
has_gpu = torch.cuda.is_available()
|
| 20 |
|
| 21 |
-
# Running directly from Python doesn't work well with Gradio+run_process because of:
|
| 22 |
-
# Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
|
| 23 |
-
# Let's use the CLI instead.
|
| 24 |
-
#
|
| 25 |
-
# import mergekit.merge
|
| 26 |
-
# from mergekit.common import parse_kmb
|
| 27 |
-
# from mergekit.options import MergeOptions
|
| 28 |
-
#
|
| 29 |
-
# merge_options = (
|
| 30 |
-
# MergeOptions(
|
| 31 |
-
# copy_tokenizer=True,
|
| 32 |
-
# cuda=True,
|
| 33 |
-
# low_cpu_memory=True,
|
| 34 |
-
# write_model_card=True,
|
| 35 |
-
# )
|
| 36 |
-
# if has_gpu
|
| 37 |
-
# else MergeOptions(
|
| 38 |
-
# allow_crimes=True,
|
| 39 |
-
# out_shard_size=parse_kmb("1B"),
|
| 40 |
-
# lazy_unpickle=True,
|
| 41 |
-
# write_model_card=True,
|
| 42 |
-
# )
|
| 43 |
-
# )
|
| 44 |
-
|
| 45 |
cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
|
| 46 |
" --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
|
| 47 |
)
|
| 48 |
|
| 49 |
MARKDOWN_DESCRIPTION = """
|
| 50 |
# mergekit-gui
|
| 51 |
-
|
| 52 |
The fastest way to perform a model merge π₯
|
| 53 |
-
|
| 54 |
Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
|
| 55 |
"""
|
| 56 |
|
| 57 |
MARKDOWN_ARTICLE = """
|
| 58 |
___
|
| 59 |
-
|
| 60 |
## Merge Configuration
|
| 61 |
-
|
| 62 |
[Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
|
| 63 |
Below are the primary elements of a configuration file:
|
| 64 |
-
|
| 65 |
- `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
|
| 66 |
- `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
|
| 67 |
- `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
|
|
@@ -69,11 +39,8 @@ Below are the primary elements of a configuration file:
|
|
| 69 |
- `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
|
| 70 |
- `dtype`: Specifies the data type used for the merging operation.
|
| 71 |
- `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
|
| 72 |
-
|
| 73 |
## Merge Methods
|
| 74 |
-
|
| 75 |
A quick overview of the currently supported merge methods:
|
| 76 |
-
|
| 77 |
| Method | `merge_method` value | Multi-Model | Uses base model |
|
| 78 |
| -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
|
| 79 |
| Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | β
| β |
|
|
@@ -84,13 +51,9 @@ A quick overview of the currently supported merge methods:
|
|
| 84 |
| [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | β
| β
|
|
| 85 |
| Passthrough | `passthrough` | β | β |
|
| 86 |
| [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | β
| β
|
|
| 87 |
-
|
| 88 |
-
|
| 89 |
## Citation
|
| 90 |
-
|
| 91 |
This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
|
| 92 |
If you use it in your research, please cite the following paper:
|
| 93 |
-
|
| 94 |
```
|
| 95 |
@article{goddard2024arcee,
|
| 96 |
title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
|
|
@@ -99,19 +62,13 @@ If you use it in your research, please cite the following paper:
|
|
| 99 |
year={2024}
|
| 100 |
}
|
| 101 |
```
|
| 102 |
-
|
| 103 |
This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
|
| 104 |
"""
|
| 105 |
|
| 106 |
examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
|
| 107 |
-
|
| 108 |
-
# Do not set community token as `HF_TOKEN` to avoid accidentally using it in merge scripts.
|
| 109 |
-
# `COMMUNITY_HF_TOKEN` is used to upload models to the community organization (https://huggingface.co/mergekit-community)
|
| 110 |
-
# when user do not provide a token.
|
| 111 |
COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
|
| 112 |
|
| 113 |
-
|
| 114 |
-
def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]]:
|
| 115 |
runner = LogsViewRunner()
|
| 116 |
|
| 117 |
if not yaml_config:
|
|
@@ -151,8 +108,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
|
|
| 151 |
|
| 152 |
if not repo_name:
|
| 153 |
yield runner.log("No repo name provided. Generating a random one.")
|
| 154 |
-
repo_name = f"mergekit-{merge_config.merge_method}"
|
| 155 |
-
# Make repo_name "unique" (no need to be extra careful on uniqueness)
|
| 156 |
repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
|
| 157 |
repo_name = repo_name.replace("/", "-").strip("-")
|
| 158 |
|
|
@@ -167,8 +123,7 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
|
|
| 167 |
yield runner.log(f"Error creating repo {e}", level="ERROR")
|
| 168 |
return
|
| 169 |
|
| 170 |
-
|
| 171 |
-
tmp_env = os.environ.copy() # taken from https://stackoverflow.com/a/4453495
|
| 172 |
tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
|
| 173 |
full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
|
| 174 |
yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
|
|
@@ -186,7 +141,6 @@ def merge(yaml_config: str, hf_token: str, repo_name: str) -> Iterable[List[Log]
|
|
| 186 |
)
|
| 187 |
yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
|
| 188 |
|
| 189 |
-
|
| 190 |
with gr.Blocks() as demo:
|
| 191 |
gr.Markdown(MARKDOWN_DESCRIPTION)
|
| 192 |
|
|
@@ -206,6 +160,11 @@ with gr.Blocks() as demo:
|
|
| 206 |
label="Repo name",
|
| 207 |
placeholder="Optional. Will create a random name if empty.",
|
| 208 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
button = gr.Button("Merge", variant="primary")
|
| 210 |
logs = LogsView(label="Terminal output")
|
| 211 |
gr.Examples(
|
|
@@ -218,11 +177,8 @@ with gr.Blocks() as demo:
|
|
| 218 |
)
|
| 219 |
gr.Markdown(MARKDOWN_ARTICLE)
|
| 220 |
|
| 221 |
-
button.click(fn=merge, inputs=[config, token, repo_name], outputs=[logs])
|
| 222 |
-
|
| 223 |
|
| 224 |
-
# Run garbage collection every hour to keep the community org clean.
|
| 225 |
-
# Empty models might exists if the merge fails abruptly (e.g. if user leaves the Space).
|
| 226 |
def _garbage_collect_every_hour():
|
| 227 |
while True:
|
| 228 |
try:
|
|
@@ -231,8 +187,9 @@ def _garbage_collect_every_hour():
|
|
| 231 |
print("Error running garbage collection", e)
|
| 232 |
time.sleep(3600)
|
| 233 |
|
| 234 |
-
|
| 235 |
pool = ThreadPoolExecutor()
|
| 236 |
-
pool.submit(
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
demo.queue(default_concurrency_limit=1).launch()
|
|
|
|
| 13 |
import yaml
|
| 14 |
from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
|
| 15 |
from mergekit.config import MergeConfiguration
|
|
|
|
| 16 |
from clean_community_org import garbage_collect_empty_models
|
| 17 |
|
| 18 |
has_gpu = torch.cuda.is_available()
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
cli = "mergekit-yaml config.yaml merge --copy-tokenizer" + (
|
| 21 |
" --cuda --low-cpu-memory --allow-crimes" if has_gpu else " --allow-crimes --out-shard-size 1B --lazy-unpickle"
|
| 22 |
)
|
| 23 |
|
| 24 |
MARKDOWN_DESCRIPTION = """
|
| 25 |
# mergekit-gui
|
|
|
|
| 26 |
The fastest way to perform a model merge π₯
|
|
|
|
| 27 |
Specify a YAML configuration file (see examples below) and a HF token and this app will perform the merge and upload the merged model to your user profile.
|
| 28 |
"""
|
| 29 |
|
| 30 |
MARKDOWN_ARTICLE = """
|
| 31 |
___
|
|
|
|
| 32 |
## Merge Configuration
|
|
|
|
| 33 |
[Mergekit](https://github.com/arcee-ai/mergekit) configurations are YAML documents specifying the operations to perform in order to produce your merged model.
|
| 34 |
Below are the primary elements of a configuration file:
|
|
|
|
| 35 |
- `merge_method`: Specifies the method to use for merging models. See [Merge Methods](https://github.com/arcee-ai/mergekit#merge-methods) for a list.
|
| 36 |
- `slices`: Defines slices of layers from different models to be used. This field is mutually exclusive with `models`.
|
| 37 |
- `models`: Defines entire models to be used for merging. This field is mutually exclusive with `slices`.
|
|
|
|
| 39 |
- `parameters`: Holds various parameters such as weights and densities, which can also be specified at different levels of the configuration.
|
| 40 |
- `dtype`: Specifies the data type used for the merging operation.
|
| 41 |
- `tokenizer_source`: Determines how to construct a tokenizer for the merged model.
|
|
|
|
| 42 |
## Merge Methods
|
|
|
|
| 43 |
A quick overview of the currently supported merge methods:
|
|
|
|
| 44 |
| Method | `merge_method` value | Multi-Model | Uses base model |
|
| 45 |
| -------------------------------------------------------------------------------------------- | -------------------- | ----------- | --------------- |
|
| 46 |
| Linear ([Model Soups](https://arxiv.org/abs/2203.05482)) | `linear` | β
| β |
|
|
|
|
| 51 |
| [DARE](https://arxiv.org/abs/2311.03099) [Task Arithmetic](https://arxiv.org/abs/2212.04089) | `dare_linear` | β
| β
|
|
| 52 |
| Passthrough | `passthrough` | β | β |
|
| 53 |
| [Model Stock](https://arxiv.org/abs/2403.19522) | `model_stock` | β
| β
|
|
|
|
|
|
|
|
| 54 |
## Citation
|
|
|
|
| 55 |
This GUI is powered by [Arcee's MergeKit](https://arxiv.org/abs/2403.13257).
|
| 56 |
If you use it in your research, please cite the following paper:
|
|
|
|
| 57 |
```
|
| 58 |
@article{goddard2024arcee,
|
| 59 |
title={Arcee's MergeKit: A Toolkit for Merging Large Language Models},
|
|
|
|
| 62 |
year={2024}
|
| 63 |
}
|
| 64 |
```
|
|
|
|
| 65 |
This Space is heavily inspired by LazyMergeKit by Maxime Labonne (see [Colab](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb)).
|
| 66 |
"""
|
| 67 |
|
| 68 |
examples = [[str(f)] for f in pathlib.Path("examples").glob("*.yaml")]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
COMMUNITY_HF_TOKEN = os.getenv("COMMUNITY_HF_TOKEN")
|
| 70 |
|
| 71 |
+
def merge(yaml_config: str, hf_token: str, repo_name: str, profile_name: str) -> Iterable[List[Log]]:
|
|
|
|
| 72 |
runner = LogsViewRunner()
|
| 73 |
|
| 74 |
if not yaml_config:
|
|
|
|
| 108 |
|
| 109 |
if not repo_name:
|
| 110 |
yield runner.log("No repo name provided. Generating a random one.")
|
| 111 |
+
repo_name = f"{profile_name}/mergekit-{merge_config.merge_method}" if profile_name else f"mergekit-{merge_config.merge_method}"
|
|
|
|
| 112 |
repo_name += "-" + "".join(random.choices(string.ascii_lowercase, k=7))
|
| 113 |
repo_name = repo_name.replace("/", "-").strip("-")
|
| 114 |
|
|
|
|
| 123 |
yield runner.log(f"Error creating repo {e}", level="ERROR")
|
| 124 |
return
|
| 125 |
|
| 126 |
+
tmp_env = os.environ.copy()
|
|
|
|
| 127 |
tmp_env["HF_HOME"] = f"{tmpdirname}/.cache"
|
| 128 |
full_cli = cli + f" --lora-merge-cache {tmpdirname}/.lora_cache"
|
| 129 |
yield from runner.run_command(full_cli.split(), cwd=merged_path, env=tmp_env)
|
|
|
|
| 141 |
)
|
| 142 |
yield runner.log(f"Model successfully uploaded to HF: {repo_url.repo_id}")
|
| 143 |
|
|
|
|
| 144 |
with gr.Blocks() as demo:
|
| 145 |
gr.Markdown(MARKDOWN_DESCRIPTION)
|
| 146 |
|
|
|
|
| 160 |
label="Repo name",
|
| 161 |
placeholder="Optional. Will create a random name if empty.",
|
| 162 |
)
|
| 163 |
+
profile_name = gr.Textbox(
|
| 164 |
+
lines=1,
|
| 165 |
+
label="Hugging Face Profile Name",
|
| 166 |
+
placeholder="Enter your Hugging Face profile name.",
|
| 167 |
+
)
|
| 168 |
button = gr.Button("Merge", variant="primary")
|
| 169 |
logs = LogsView(label="Terminal output")
|
| 170 |
gr.Examples(
|
|
|
|
| 177 |
)
|
| 178 |
gr.Markdown(MARKDOWN_ARTICLE)
|
| 179 |
|
| 180 |
+
button.click(fn=merge, inputs=[config, token, repo_name, profile_name], outputs=[logs])
|
|
|
|
| 181 |
|
|
|
|
|
|
|
| 182 |
def _garbage_collect_every_hour():
|
| 183 |
while True:
|
| 184 |
try:
|
|
|
|
| 187 |
print("Error running garbage collection", e)
|
| 188 |
time.sleep(3600)
|
| 189 |
|
|
|
|
| 190 |
pool = ThreadPoolExecutor()
|
| 191 |
+
pool.submit(_gar
|
| 192 |
+
|
| 193 |
+
bage_collect_every_hour)
|
| 194 |
|
| 195 |
+
demo.queue(default_concurrency_limit=1).launch()
|