Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -874,7 +874,7 @@ with gr.Blocks() as demo:
|
|
| 874 |
|
| 875 |
# --- 1. Merge Legacy ---
|
| 876 |
with gr.Tab("Merge 2 Base") as t1_tab:
|
| 877 |
-
gr.Markdown("
|
| 878 |
with gr.Row(variant="compact"):
|
| 879 |
t1_token = gr.Textbox(label="Token", type="password", scale=2)
|
| 880 |
t1_prec = gr.Dropdown(["bf16", "fp16", "float32"], value="bf16", label="Precision", scale=1)
|
|
@@ -895,6 +895,7 @@ with gr.Blocks() as demo:
|
|
| 895 |
|
| 896 |
# --- 2. Extract Adapter ---
|
| 897 |
with gr.Tab("Extract LoRA") as t2_tab:
|
|
|
|
| 898 |
with gr.Row(variant="compact"):
|
| 899 |
t2_token = gr.Textbox(label="Token", type="password", scale=2)
|
| 900 |
t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1, scale=1)
|
|
@@ -909,7 +910,7 @@ with gr.Blocks() as demo:
|
|
| 909 |
|
| 910 |
# --- 3. Merge Adapters ---
|
| 911 |
with gr.Tab("Fuse Adapters") as t3_tab:
|
| 912 |
-
gr.Markdown("
|
| 913 |
with gr.Row(variant="compact"):
|
| 914 |
t3_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 915 |
t3_method = gr.Dropdown(["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"], value="Iterative EMA (Linear w/ Beta/Sigma coefficient)", label="Merge Method", scale=2)
|
|
@@ -946,7 +947,7 @@ with gr.Blocks() as demo:
|
|
| 946 |
|
| 947 |
# --- 5. Amphinterpolative ---
|
| 948 |
with gr.Tab("Amphinterpolate") as t5_tab:
|
| 949 |
-
gr.Markdown("
|
| 950 |
with gr.Row(variant="compact"):
|
| 951 |
t5_token = gr.Textbox(label="HF Token", type="password", scale=1)
|
| 952 |
t5_method = gr.Dropdown(["slerp", "nuslerp", "multislerp", "karcher"], value="slerp", label="Merge Method", scale=1)
|
|
@@ -955,7 +956,7 @@ with gr.Blocks() as demo:
|
|
| 955 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 956 |
|
| 957 |
with gr.Row(variant="compact"):
|
| 958 |
-
t5_shard = gr.Slider(label="Max Shard Size (GB)", value=
|
| 959 |
t5_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 960 |
t5_t = gr.Slider(0, 1, 0.5, label="t (mix factor)")
|
| 961 |
with gr.Row(variant="compact"):
|
|
@@ -974,7 +975,7 @@ with gr.Blocks() as demo:
|
|
| 974 |
t5_iter = gr.Number(label="Max Iter (Karcher)", value=10)
|
| 975 |
t5_tol = gr.Textbox(label="tol (Karcher)", value="1e-5")
|
| 976 |
|
| 977 |
-
gr.Markdown("**MODELS**:
|
| 978 |
|
| 979 |
with gr.Row(variant="compact"):
|
| 980 |
with gr.Column(scale=3): m1 = gr.Textbox(label="Model 1")
|
|
@@ -1001,7 +1002,7 @@ with gr.Blocks() as demo:
|
|
| 1001 |
|
| 1002 |
# --- 6. Stir/Tie Bases ---
|
| 1003 |
with gr.Tab("Align/Tie") as t6_tab:
|
| 1004 |
-
gr.Markdown("
|
| 1005 |
with gr.Row(variant="compact"):
|
| 1006 |
t6_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1007 |
t6_method = gr.Dropdown(["task_arithmetic", "ties", "dare_ties", "dare_linear", "della", "della_linear", "breadcrumbs", "breadcrumbs_ties", "sce"], value="ties", label="Merge Method", scale=2)
|
|
@@ -1009,12 +1010,12 @@ with gr.Blocks() as demo:
|
|
| 1009 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1010 |
|
| 1011 |
with gr.Row(variant="compact"):
|
| 1012 |
-
t6_shard = gr.Slider(label="Max Shard Size (GB)", value=
|
| 1013 |
t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 1014 |
t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source")
|
| 1015 |
t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 1016 |
-
gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
|
| 1017 |
-
gr.Markdown("**MODELS**: These methods all accept
|
| 1018 |
|
| 1019 |
with gr.Accordion("Global Parameters (Normalize, Int8, Lambda, etc.)", open=False):
|
| 1020 |
with gr.Row(variant="compact"): t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True)
|
|
@@ -1040,18 +1041,18 @@ with gr.Blocks() as demo:
|
|
| 1040 |
|
| 1041 |
# --- 7. Specious ---
|
| 1042 |
with gr.Tab("Specious") as t7_tab:
|
| 1043 |
-
gr.Markdown("
|
| 1044 |
with gr.Row(variant="compact"):
|
| 1045 |
t7_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1046 |
t7_method = gr.Dropdown(["model_stock", "nearswap", "arcee_fusion", "passthrough", "linear"], value="model_stock", label="Merge Method", scale=2)
|
| 1047 |
t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
|
| 1048 |
-
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1049 |
|
| 1050 |
with gr.Row(variant="compact"):
|
| 1051 |
-
t7_shard = gr.Slider(label="Max Shard Size (GB)", value=
|
| 1052 |
-
gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
|
| 1053 |
|
| 1054 |
-
gr.Markdown("**MODELS**:
|
| 1055 |
|
| 1056 |
with gr.Row(variant="compact"):
|
| 1057 |
t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
|
|
@@ -1072,8 +1073,8 @@ with gr.Blocks() as demo:
|
|
| 1072 |
|
| 1073 |
# --- 8. MoEr ---
|
| 1074 |
with gr.Tab("MoEr") as t8_tab:
|
| 1075 |
-
gr.Markdown("
|
| 1076 |
-
gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
|
| 1077 |
|
| 1078 |
with gr.Row(variant="compact"):
|
| 1079 |
t8_token = gr.Textbox(label="Token", type="password", scale=1)
|
|
@@ -1083,8 +1084,9 @@ with gr.Blocks() as demo:
|
|
| 1083 |
t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
|
| 1084 |
with gr.Row(variant="compact"):
|
| 1085 |
t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
|
|
|
| 1086 |
|
| 1087 |
-
gr.Markdown("#### Experts (
|
| 1088 |
with gr.Row(variant="compact"): t8_expert1 = gr.Textbox(label="Expert 1", placeholder="org/expert1", scale=2); t8_prompt1 = gr.Textbox(label="Positive Prompts", placeholder="math, reasoning, logic", scale=3)
|
| 1089 |
with gr.Row(variant="compact"): t8_expert2 = gr.Textbox(label="Expert 2", placeholder="org/expert2", scale=2); t8_prompt2 = gr.Textbox(label="Positive Prompts", placeholder="creative, writing, storytelling", scale=3)
|
| 1090 |
with gr.Accordion("More Experts (3-5)", open=False):
|
|
@@ -1102,18 +1104,19 @@ with gr.Blocks() as demo:
|
|
| 1102 |
|
| 1103 |
# --- 9. Rawer ---
|
| 1104 |
with gr.Tab("Rawer") as t9_tab:
|
| 1105 |
-
gr.Markdown("
|
| 1106 |
with gr.Row(variant="compact"):
|
| 1107 |
t9_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1108 |
t9_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Merge Method", scale=1)
|
| 1109 |
t9_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Config dtype", scale=1)
|
| 1110 |
t9_models = gr.TextArea(label="Models (one per line)", lines=3)
|
|
|
|
| 1111 |
with gr.Row(variant="compact"):
|
| 1112 |
t9_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t9_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 1113 |
with gr.Row(variant="compact"):
|
| 1114 |
-
t9_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t9_chat = gr.Textbox(label="Chat Template
|
| 1115 |
-
gr.Markdown("Built-in Chat Templates: alpaca, chatml, llama3, mistral, exaone, auto")
|
| 1116 |
-
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1117 |
with gr.Row(variant="compact"):
|
| 1118 |
t9_out = gr.Textbox(label="Output Repo", scale=3); t9_priv = gr.Checkbox(label="Private", value=True, scale=1)
|
| 1119 |
t9_btn = gr.Button("Merge Raw", variant="primary")
|
|
@@ -1122,8 +1125,8 @@ with gr.Blocks() as demo:
|
|
| 1122 |
|
| 1123 |
# --- 10. Mario,DARE! ---
|
| 1124 |
with gr.Tab("Mario,Dare!") as t10_tab:
|
| 1125 |
-
gr.Markdown("
|
| 1126 |
-
gr.Markdown("
|
| 1127 |
t10_token = gr.Textbox(label="Token", type="password")
|
| 1128 |
|
| 1129 |
gr.Markdown(
|
|
|
|
| 874 |
|
| 875 |
# --- 1. Merge Legacy ---
|
| 876 |
with gr.Tab("Merge 2 Base") as t1_tab:
|
| 877 |
+
gr.Markdown("##### Fuse a fine-tuned low-rank **ADAPTER** model (*LoRA, DoRA, etc...*) + a full **BASE** model (*LLM, t2i, t2v...* any!)")
|
| 878 |
with gr.Row(variant="compact"):
|
| 879 |
t1_token = gr.Textbox(label="Token", type="password", scale=2)
|
| 880 |
t1_prec = gr.Dropdown(["bf16", "fp16", "float32"], value="bf16", label="Precision", scale=1)
|
|
|
|
| 895 |
|
| 896 |
# --- 2. Extract Adapter ---
|
| 897 |
with gr.Tab("Extract LoRA") as t2_tab:
|
| 898 |
+
gr.Markdown("##### Extract differences between 2 architecturally similar **BASE MODELS** as a low-rank **ADAPTER**")
|
| 899 |
with gr.Row(variant="compact"):
|
| 900 |
t2_token = gr.Textbox(label="Token", type="password", scale=2)
|
| 901 |
t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1, scale=1)
|
|
|
|
| 910 |
|
| 911 |
# --- 3. Merge Adapters ---
|
| 912 |
with gr.Tab("Fuse Adapters") as t3_tab:
|
| 913 |
+
gr.Markdown("##### Merge 2 or more *ADAPTERS* into a new chimera adapter by either: *averaging*, *collaging*, or *interpolating* their tensors")
|
| 914 |
with gr.Row(variant="compact"):
|
| 915 |
t3_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 916 |
t3_method = gr.Dropdown(["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"], value="Iterative EMA (Linear w/ Beta/Sigma coefficient)", label="Merge Method", scale=2)
|
|
|
|
| 947 |
|
| 948 |
# --- 5. Amphinterpolative ---
|
| 949 |
with gr.Tab("Amphinterpolate") as t5_tab:
|
| 950 |
+
gr.Markdown("##### Spherical Interpolation Methods Family: slerp, nuslerp, multislerp, karcher")
|
| 951 |
with gr.Row(variant="compact"):
|
| 952 |
t5_token = gr.Textbox(label="HF Token", type="password", scale=1)
|
| 953 |
t5_method = gr.Dropdown(["slerp", "nuslerp", "multislerp", "karcher"], value="slerp", label="Merge Method", scale=1)
|
|
|
|
| 956 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 957 |
|
| 958 |
with gr.Row(variant="compact"):
|
| 959 |
+
t5_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0)
|
| 960 |
t5_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 961 |
t5_t = gr.Slider(0, 1, 0.5, label="t (mix factor)")
|
| 962 |
with gr.Row(variant="compact"):
|
|
|
|
| 975 |
t5_iter = gr.Number(label="Max Iter (Karcher)", value=10)
|
| 976 |
t5_tol = gr.Textbox(label="tol (Karcher)", value="1e-5")
|
| 977 |
|
| 978 |
+
gr.Markdown("**MODELS**: ***slerp:*** 2 models exactly, 1 co-listed as *Base* | ***nuslerp:*** 2 models; *Base*: optional | ***multislerp:*** 2+ models; *Base*: optional | ***karcher:*** 2+ models; *Base*: none")
|
| 979 |
|
| 980 |
with gr.Row(variant="compact"):
|
| 981 |
with gr.Column(scale=3): m1 = gr.Textbox(label="Model 1")
|
|
|
|
| 1002 |
|
| 1003 |
# --- 6. Stir/Tie Bases ---
|
| 1004 |
with gr.Tab("Align/Tie") as t6_tab:
|
| 1005 |
+
gr.Markdown("##### Task Vector Methods Family: task_arithmetic, ties, dare_ties/dare_linear, della/della_linear, breadcrumbs/breadcrumbs_ties, sce")
|
| 1006 |
with gr.Row(variant="compact"):
|
| 1007 |
t6_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1008 |
t6_method = gr.Dropdown(["task_arithmetic", "ties", "dare_ties", "dare_linear", "della", "della_linear", "breadcrumbs", "breadcrumbs_ties", "sce"], value="ties", label="Merge Method", scale=2)
|
|
|
|
| 1010 |
gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1011 |
|
| 1012 |
with gr.Row(variant="compact"):
|
| 1013 |
+
t6_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0)
|
| 1014 |
t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 1015 |
t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source")
|
| 1016 |
t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 1017 |
+
gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
|
| 1018 |
+
gr.Markdown("**MODELS**: These methods all accept ***2 or more models***, and require one of these to be designated as *Base*")
|
| 1019 |
|
| 1020 |
with gr.Accordion("Global Parameters (Normalize, Int8, Lambda, etc.)", open=False):
|
| 1021 |
with gr.Row(variant="compact"): t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True)
|
|
|
|
| 1041 |
|
| 1042 |
# --- 7. Specious ---
|
| 1043 |
with gr.Tab("Specious") as t7_tab:
|
| 1044 |
+
gr.Markdown("##### Specialized Methods: model_stock, nearswap, arcee_fusion, passthrough")
|
| 1045 |
with gr.Row(variant="compact"):
|
| 1046 |
t7_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1047 |
t7_method = gr.Dropdown(["model_stock", "nearswap", "arcee_fusion", "passthrough", "linear"], value="model_stock", label="Merge Method", scale=2)
|
| 1048 |
t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
|
| 1049 |
+
gr.Markdown("See [*MergeKit Merge Method Docs*](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1050 |
|
| 1051 |
with gr.Row(variant="compact"):
|
| 1052 |
+
t7_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 1053 |
+
gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
|
| 1054 |
|
| 1055 |
+
gr.Markdown("**MODELS**: ***passthrough:*** 1 model per [Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md), but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | ***nearswap/arcee_fusion:*** 2 models, one co-listed as *Base* | ***model_stock:*** 3+ models, one co-listed as *Base*")
|
| 1056 |
|
| 1057 |
with gr.Row(variant="compact"):
|
| 1058 |
t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
|
|
|
|
| 1073 |
|
| 1074 |
# --- 8. MoEr ---
|
| 1075 |
with gr.Tab("MoEr") as t8_tab:
|
| 1076 |
+
gr.Markdown("##### **Mixture of Experts**: fuses self-attention & normalization layers from *Base* w/MLP layers from *Experts*")
|
| 1077 |
+
gr.Markdown("See [*MergeKit MoE doc*](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
|
| 1078 |
|
| 1079 |
with gr.Row(variant="compact"):
|
| 1080 |
t8_token = gr.Textbox(label="Token", type="password", scale=1)
|
|
|
|
| 1084 |
t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
|
| 1085 |
with gr.Row(variant="compact"):
|
| 1086 |
t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 1087 |
+
gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
|
| 1088 |
|
| 1089 |
+
gr.Markdown("#### Experts *(At least 2 required. **Prompts** must be comma-separated.)*")
|
| 1090 |
with gr.Row(variant="compact"): t8_expert1 = gr.Textbox(label="Expert 1", placeholder="org/expert1", scale=2); t8_prompt1 = gr.Textbox(label="Positive Prompts", placeholder="math, reasoning, logic", scale=3)
|
| 1091 |
with gr.Row(variant="compact"): t8_expert2 = gr.Textbox(label="Expert 2", placeholder="org/expert2", scale=2); t8_prompt2 = gr.Textbox(label="Positive Prompts", placeholder="creative, writing, storytelling", scale=3)
|
| 1092 |
with gr.Accordion("More Experts (3-5)", open=False):
|
|
|
|
| 1104 |
|
| 1105 |
# --- 9. Rawer ---
|
| 1106 |
with gr.Tab("Rawer") as t9_tab:
|
| 1107 |
+
gr.Markdown("##### MergeKit Module for merging Raw PyTorch Weights / Non-pipeline-classed Models")
|
| 1108 |
with gr.Row(variant="compact"):
|
| 1109 |
t9_token = gr.Textbox(label="Token", type="password", scale=1)
|
| 1110 |
t9_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Merge Method", scale=1)
|
| 1111 |
t9_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Config dtype", scale=1)
|
| 1112 |
t9_models = gr.TextArea(label="Models (one per line)", lines=3)
|
| 1113 |
+
gr.Markdown("**MODELS**: ***passthrough:*** 1 model per [Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md), but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | ***linear:*** takes 2+ models, averages between weights; aka the *Model Soup* method")
|
| 1114 |
with gr.Row(variant="compact"):
|
| 1115 |
t9_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t9_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
|
| 1116 |
with gr.Row(variant="compact"):
|
| 1117 |
+
t9_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t9_chat = gr.Textbox(label="Chat Template", placeholder="auto")
|
| 1118 |
+
gr.Markdown("Built-in Chat Templates: *alpaca, chatml, llama3, mistral, exaone, auto*")
|
| 1119 |
+
gr.Markdown("See [**MergeKit Merge Method Docs**](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
|
| 1120 |
with gr.Row(variant="compact"):
|
| 1121 |
t9_out = gr.Textbox(label="Output Repo", scale=3); t9_priv = gr.Checkbox(label="Private", value=True, scale=1)
|
| 1122 |
t9_btn = gr.Button("Merge Raw", variant="primary")
|
|
|
|
| 1125 |
|
| 1126 |
# --- 10. Mario,DARE! ---
|
| 1127 |
with gr.Tab("Mario,Dare!") as t10_tab:
|
| 1128 |
+
gr.Markdown("##### Model-Agnostic Implementation of DARE (Drop And REscale)")
|
| 1129 |
+
gr.Markdown("Adapted from the [sft-merger by Martyn Garcia](https://github.com/martyn)")
|
| 1130 |
t10_token = gr.Textbox(label="Token", type="password")
|
| 1131 |
|
| 1132 |
gr.Markdown(
|