Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -735,6 +735,7 @@ with gr.Blocks() as demo:
|
|
| 735 |
t5_norm = gr.Checkbox(label="Normalize Weights", value=True); t5_i8 = gr.Checkbox(label="Int8 Mask", value=False); t5_flat = gr.Checkbox(label="Flatten Tensors (NuSlerp)", value=False); t5_row = gr.Checkbox(label="Row Wise (NuSlerp)", value=False)
|
| 736 |
with gr.Row():
|
| 737 |
t5_eps = gr.Textbox(label="eps (Stabilization Constant) (MultiSlerp)", value="1e-8"); t5_iter = gr.Number(label="Max Iterations (Karcher)", value=10); t5_tol = gr.Textbox(label="tol (Convergence Tolerance) (Karcher)", value="1e-5")
|
|
|
|
| 738 |
m1, w1 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); m2, w2 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0")
|
| 739 |
with gr.Accordion("More", open=False):
|
| 740 |
m3, w3 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); m4, w4 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); m5, w5 = gr.Textbox(label="Model 5"), gr.Textbox(label="Weight 5", value="1.0")
|
|
@@ -751,12 +752,16 @@ with gr.Blocks() as demo:
|
|
| 751 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 752 |
with gr.Row():
|
| 753 |
t6_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
|
|
|
| 754 |
t6_base = gr.Textbox(label="Base Model (required)")
|
|
|
|
|
|
|
|
|
|
| 755 |
with gr.Row():
|
| 756 |
t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True); t6_lamb = gr.Number(label="Lambda", value=1.0); t6_topk = gr.Slider(0, 1, 1.0, label="Select TopK (SCE)")
|
| 757 |
-
m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
|
|
|
|
| 758 |
with gr.Accordion("More", open=False):
|
| 759 |
-
m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 760 |
m3_6, w3_6 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); d3_6, g3_6, e3_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 761 |
m4_6, w4_6 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); d4_6, g4_6, e4_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 762 |
t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
|
|
@@ -772,11 +777,10 @@ with gr.Blocks() as demo:
|
|
| 772 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 773 |
with gr.Row():
|
| 774 |
t7_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 775 |
-
|
| 776 |
t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
|
| 777 |
|
| 778 |
-
gr.Markdown("####
|
| 779 |
-
gr.Markdown("**passthrough:** 1 model | **nearswap/arcee_fusion:** 2 models | **model_stock:** 3+ models")
|
| 780 |
|
| 781 |
with gr.Row():
|
| 782 |
t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
|
|
@@ -791,13 +795,13 @@ with gr.Blocks() as demo:
|
|
| 791 |
|
| 792 |
# --- TAB 8 (MoEr) ---
|
| 793 |
with gr.Tab("MoEr"):
|
| 794 |
-
gr.Markdown("### Mixture of Experts")
|
| 795 |
gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
|
| 796 |
|
| 797 |
t8_token = gr.Textbox(label="Token", type="password")
|
| 798 |
with gr.Row():
|
| 799 |
t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 800 |
-
t8_base = gr.Textbox(label="Base Model
|
| 801 |
gr.Markdown("#### Experts (at least 2 required)")
|
| 802 |
gr.Markdown("Prompts are comma-separated descriptors for each expert")
|
| 803 |
|
|
|
|
| 735 |
t5_norm = gr.Checkbox(label="Normalize Weights", value=True); t5_i8 = gr.Checkbox(label="Int8 Mask", value=False); t5_flat = gr.Checkbox(label="Flatten Tensors (NuSlerp)", value=False); t5_row = gr.Checkbox(label="Row Wise (NuSlerp)", value=False)
|
| 736 |
with gr.Row():
|
| 737 |
t5_eps = gr.Textbox(label="eps (Stabilization Constant) (MultiSlerp)", value="1e-8"); t5_iter = gr.Number(label="Max Iterations (Karcher)", value=10); t5_tol = gr.Textbox(label="tol (Convergence Tolerance) (Karcher)", value="1e-5")
|
| 738 |
+
gr.Markdown("#### MODELS: **slerp:** 2 models exactly, one of the 2 also listed as *Base* | **nuslerp:** 2 models exactly; *Base*: optional | **multislerp:** 2+ models; *Base*: optional | **karcher:** 2+ models; *Base*: none")
|
| 739 |
m1, w1 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); m2, w2 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0")
|
| 740 |
with gr.Accordion("More", open=False):
|
| 741 |
m3, w3 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); m4, w4 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); m5, w5 = gr.Textbox(label="Model 5"), gr.Textbox(label="Weight 5", value="1.0")
|
|
|
|
| 752 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 753 |
with gr.Row():
|
| 754 |
t6_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 755 |
+
gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
|
| 756 |
t6_base = gr.Textbox(label="Base Model (required)")
|
| 757 |
+
|
| 758 |
+
gr.Markdown("#### MODELS: These methods all accept **2 or more models**, and require one of these designated as *Base*")
|
| 759 |
+
|
| 760 |
with gr.Row():
|
| 761 |
t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True); t6_lamb = gr.Number(label="Lambda", value=1.0); t6_topk = gr.Slider(0, 1, 1.0, label="Select TopK (SCE)")
|
| 762 |
+
m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 763 |
+
m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 764 |
with gr.Accordion("More", open=False):
|
|
|
|
| 765 |
m3_6, w3_6 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); d3_6, g3_6, e3_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 766 |
m4_6, w4_6 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); d4_6, g4_6, e4_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
|
| 767 |
t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
|
|
|
|
| 777 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 778 |
with gr.Row():
|
| 779 |
t7_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 780 |
+
gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
|
| 781 |
t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
|
| 782 |
|
| 783 |
+
gr.Markdown("#### MODELS: **passthrough:** 1 model acc. to Docs, but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | **nearswap/arcee_fusion:** 2 models, one also listed as *Base* | **model_stock:** 3+ models, one also listed as *Base*")
|
|
|
|
| 784 |
|
| 785 |
with gr.Row():
|
| 786 |
t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
|
|
|
|
| 795 |
|
| 796 |
# --- TAB 8 (MoEr) ---
|
| 797 |
with gr.Tab("MoEr"):
|
| 798 |
+
gr.Markdown("### Mixture of Experts: fuses self-attention & normalization layers from *Base* w/MLP layers from *Experts*")
|
| 799 |
gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
|
| 800 |
|
| 801 |
t8_token = gr.Textbox(label="Token", type="password")
|
| 802 |
with gr.Row():
|
| 803 |
t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 804 |
+
t8_base = gr.Textbox(label="Base Model (Required)"); t8_gate = gr.Dropdown(["cheap_embed", "random", "hidden"], value="cheap_embed", label="Gate Mode"); t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
|
| 805 |
gr.Markdown("#### Experts (at least 2 required)")
|
| 806 |
gr.Markdown("Prompts are comma-separated descriptors for each expert")
|
| 807 |
|