Spaces:

AlekseyCalvin
/

Soon_Merger

Running

App Files Files Community

AlekseyCalvin commited on Jan 10

Commit

cd69aef

verified ·

1 Parent(s): be26fd7

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -23

app.py CHANGED Viewed

@@ -874,7 +874,7 @@ with gr.Blocks() as demo:
         # --- 1. Merge Legacy ---
         with gr.Tab("Merge 2 Base") as t1_tab:
-            gr.Markdown("### Fuse adapter (LoRA, DoRA, etc...) + base model (LLM, t2i, t2v... any!)")
             with gr.Row(variant="compact"):
                 t1_token = gr.Textbox(label="Token", type="password", scale=2)
                 t1_prec = gr.Dropdown(["bf16", "fp16", "float32"], value="bf16", label="Precision", scale=1)
@@ -895,6 +895,7 @@ with gr.Blocks() as demo:
         # --- 2. Extract Adapter ---
         with gr.Tab("Extract LoRA") as t2_tab:
             with gr.Row(variant="compact"):
                 t2_token = gr.Textbox(label="Token", type="password", scale=2)
                 t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1, scale=1)
@@ -909,7 +910,7 @@ with gr.Blocks() as demo:
         # --- 3. Merge Adapters ---
         with gr.Tab("Fuse Adapters") as t3_tab:
-            gr.Markdown("### Batch Adapter Merging")
             with gr.Row(variant="compact"):
                 t3_token = gr.Textbox(label="Token", type="password", scale=1)
                 t3_method = gr.Dropdown(["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"], value="Iterative EMA (Linear w/ Beta/Sigma coefficient)", label="Merge Method", scale=2)
@@ -946,7 +947,7 @@ with gr.Blocks() as demo:
         # --- 5. Amphinterpolative ---
         with gr.Tab("Amphinterpolate") as t5_tab:
-            gr.Markdown("### Spherical Interpolation Methods Family: slerp, nuslerp, multislerp, karcher")
             with gr.Row(variant="compact"):
                 t5_token = gr.Textbox(label="HF Token", type="password", scale=1)
                 t5_method = gr.Dropdown(["slerp", "nuslerp", "multislerp", "karcher"], value="slerp", label="Merge Method", scale=1)
@@ -955,7 +956,7 @@ with gr.Blocks() as demo:
             gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
-                t5_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0)
                 t5_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
                 t5_t = gr.Slider(0, 1, 0.5, label="t (mix factor)")
             with gr.Row(variant="compact"):
@@ -974,7 +975,7 @@ with gr.Blocks() as demo:
                     t5_iter = gr.Number(label="Max Iter (Karcher)", value=10)
                     t5_tol = gr.Textbox(label="tol (Karcher)", value="1e-5")
-            gr.Markdown("**MODELS**: **slerp:** 2 models exactly, one of the 2 also listed as *Base* | **nuslerp:** 2 models exactly; *Base*: optional | **multislerp:** 2+ models; *Base*: optional | **karcher:** 2+ models; *Base*: none")
             with gr.Row(variant="compact"):
                 with gr.Column(scale=3): m1 = gr.Textbox(label="Model 1")
@@ -1001,7 +1002,7 @@ with gr.Blocks() as demo:
         # --- 6. Stir/Tie Bases ---
         with gr.Tab("Align/Tie") as t6_tab:
-            gr.Markdown("### Task Vector Methods Family: task_arithmetic, ties, dare_ties, dare_linear, della, della_linear, breadcrumbs, breadcrumbs_ties, sce")
             with gr.Row(variant="compact"):
                 t6_token = gr.Textbox(label="Token", type="password", scale=1)
                 t6_method = gr.Dropdown(["task_arithmetic", "ties", "dare_ties", "dare_linear", "della", "della_linear", "breadcrumbs", "breadcrumbs_ties", "sce"], value="ties", label="Merge Method", scale=2)
@@ -1009,12 +1010,12 @@ with gr.Blocks() as demo:
             gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
-                t6_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0)
                 t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
                 t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source")
                 t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
-            gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
-            gr.Markdown("**MODELS**: These methods all accept **2 or more models**, and require one of these designated as *Base*")
             with gr.Accordion("Global Parameters (Normalize, Int8, Lambda, etc.)", open=False):
                  with gr.Row(variant="compact"): t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True)
@@ -1040,18 +1041,18 @@ with gr.Blocks() as demo:
         # --- 7. Specious ---
         with gr.Tab("Specious") as t7_tab:
-            gr.Markdown("### Specialized Methods: model_stock, nearswap, arcee_fusion, passthrough")
             with gr.Row(variant="compact"):
                 t7_token = gr.Textbox(label="Token", type="password", scale=1)
                 t7_method = gr.Dropdown(["model_stock", "nearswap", "arcee_fusion", "passthrough", "linear"], value="model_stock", label="Merge Method", scale=2)
             t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
-            gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
-                t7_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
-            gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
-            gr.Markdown("**MODELS**: **passthrough:** 1 model acc. to Docs, but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | **nearswap/arcee_fusion:** 2 models, one also listed as *Base* | **model_stock:** 3+ models, one also listed as *Base*")
             with gr.Row(variant="compact"):
                 t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
@@ -1072,8 +1073,8 @@ with gr.Blocks() as demo:
         # --- 8. MoEr ---
         with gr.Tab("MoEr") as t8_tab:
-            gr.Markdown("### Mixture of Experts: fuses self-attention & normalization layers from *Base* w/MLP layers from *Experts*")
-            gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
             with gr.Row(variant="compact"):
                 t8_token = gr.Textbox(label="Token", type="password", scale=1)
@@ -1083,8 +1084,9 @@ with gr.Blocks() as demo:
                  t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
             with gr.Row(variant="compact"):
                 t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
-            gr.Markdown("#### Experts (at least 2 required). Prompts are comma-separated.")
             with gr.Row(variant="compact"): t8_expert1 = gr.Textbox(label="Expert 1", placeholder="org/expert1", scale=2); t8_prompt1 = gr.Textbox(label="Positive Prompts", placeholder="math, reasoning, logic", scale=3)
             with gr.Row(variant="compact"): t8_expert2 = gr.Textbox(label="Expert 2", placeholder="org/expert2", scale=2); t8_prompt2 = gr.Textbox(label="Positive Prompts", placeholder="creative, writing, storytelling", scale=3)
             with gr.Accordion("More Experts (3-5)", open=False):
@@ -1102,18 +1104,19 @@ with gr.Blocks() as demo:
         # --- 9. Rawer ---
         with gr.Tab("Rawer") as t9_tab:
-            gr.Markdown("### Raw PyTorch MergeKit / Non-pipeline-classed Models")
             with gr.Row(variant="compact"):
                 t9_token = gr.Textbox(label="Token", type="password", scale=1)
                 t9_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Merge Method", scale=1)
                 t9_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Config dtype", scale=1)
             t9_models = gr.TextArea(label="Models (one per line)", lines=3)
             with gr.Row(variant="compact"):
                 t9_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t9_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
             with gr.Row(variant="compact"):
-                t9_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t9_chat = gr.Textbox(label="Chat Template (e.g., alpaca, chatml, auto)", placeholder="auto")
-            gr.Markdown("Built-in Chat Templates: alpaca, chatml, llama3, mistral, exaone, auto")
-            gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
                 t9_out = gr.Textbox(label="Output Repo", scale=3); t9_priv = gr.Checkbox(label="Private", value=True, scale=1)
             t9_btn = gr.Button("Merge Raw", variant="primary")
@@ -1122,8 +1125,8 @@ with gr.Blocks() as demo:
         # --- 10. Mario,DARE! ---
         with gr.Tab("Mario,Dare!") as t10_tab:
-            gr.Markdown("### Model-Agnostic DARE Implementation (Drop And REscale)")
-            gr.Markdown("From [sft-merger by Martyn Garcia](https://github.com/martyn)")
             t10_token = gr.Textbox(label="Token", type="password")
             gr.Markdown(

         # --- 1. Merge Legacy ---
         with gr.Tab("Merge 2 Base") as t1_tab:
+            gr.Markdown("##### Fuse a fine-tuned low-rank **ADAPTER** model (*LoRA, DoRA, etc...*) + a full **BASE** model (*LLM, t2i, t2v...* any!)")
             with gr.Row(variant="compact"):
                 t1_token = gr.Textbox(label="Token", type="password", scale=2)
                 t1_prec = gr.Dropdown(["bf16", "fp16", "float32"], value="bf16", label="Precision", scale=1)
         # --- 2. Extract Adapter ---
         with gr.Tab("Extract LoRA") as t2_tab:
+                gr.Markdown("##### Extract differences between 2 architecturally similar **BASE MODELS** as a low-rank **ADAPTER**")
             with gr.Row(variant="compact"):
                 t2_token = gr.Textbox(label="Token", type="password", scale=2)
                 t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1, scale=1)
         # --- 3. Merge Adapters ---
         with gr.Tab("Fuse Adapters") as t3_tab:
+            gr.Markdown("##### Merge 2 or more *ADAPTERS* into a new chimera adapter by either: *averaging*, *collaging*, or *interpolating* their tensors")
             with gr.Row(variant="compact"):
                 t3_token = gr.Textbox(label="Token", type="password", scale=1)
                 t3_method = gr.Dropdown(["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"], value="Iterative EMA (Linear w/ Beta/Sigma coefficient)", label="Merge Method", scale=2)
         # --- 5. Amphinterpolative ---
         with gr.Tab("Amphinterpolate") as t5_tab:
+            gr.Markdown("##### Spherical Interpolation Methods Family: slerp, nuslerp, multislerp, karcher")
             with gr.Row(variant="compact"):
                 t5_token = gr.Textbox(label="HF Token", type="password", scale=1)
                 t5_method = gr.Dropdown(["slerp", "nuslerp", "multislerp", "karcher"], value="slerp", label="Merge Method", scale=1)
             gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
+                t5_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0)
                 t5_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
                 t5_t = gr.Slider(0, 1, 0.5, label="t (mix factor)")
             with gr.Row(variant="compact"):
                     t5_iter = gr.Number(label="Max Iter (Karcher)", value=10)
                     t5_tol = gr.Textbox(label="tol (Karcher)", value="1e-5")
+            gr.Markdown("**MODELS**: ***slerp:*** 2 models exactly, 1 co-listed as *Base* | ***nuslerp:*** 2 models; *Base*: optional | ***multislerp:*** 2+ models; *Base*: optional | ***karcher:*** 2+ models; *Base*: none")
             with gr.Row(variant="compact"):
                 with gr.Column(scale=3): m1 = gr.Textbox(label="Model 1")
         # --- 6. Stir/Tie Bases ---
         with gr.Tab("Align/Tie") as t6_tab:
+            gr.Markdown("##### Task Vector Methods Family: task_arithmetic, ties, dare_ties/dare_linear, della/della_linear, breadcrumbs/breadcrumbs_ties, sce")
             with gr.Row(variant="compact"):
                 t6_token = gr.Textbox(label="Token", type="password", scale=1)
                 t6_method = gr.Dropdown(["task_arithmetic", "ties", "dare_ties", "dare_linear", "della", "della_linear", "breadcrumbs", "breadcrumbs_ties", "sce"], value="ties", label="Merge Method", scale=2)
             gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
+                t6_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0)
                 t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
                 t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source")
                 t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
+            gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
+            gr.Markdown("**MODELS**: These methods all accept ***2 or more models***, and require one of these to be designated as *Base*")
             with gr.Accordion("Global Parameters (Normalize, Int8, Lambda, etc.)", open=False):
                  with gr.Row(variant="compact"): t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True)
         # --- 7. Specious ---
         with gr.Tab("Specious") as t7_tab:
+            gr.Markdown("##### Specialized Methods: model_stock, nearswap, arcee_fusion, passthrough")
             with gr.Row(variant="compact"):
                 t7_token = gr.Textbox(label="Token", type="password", scale=1)
                 t7_method = gr.Dropdown(["model_stock", "nearswap", "arcee_fusion", "passthrough", "linear"], value="model_stock", label="Merge Method", scale=2)
             t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
+            gr.Markdown("See [*MergeKit Merge Method Docs*](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
+                t7_shard = gr.Slider(label="Max Shard Size (GB)", value=4.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
+            gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
+            gr.Markdown("**MODELS**: ***passthrough:*** 1 model per [Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md), but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | ***nearswap/arcee_fusion:*** 2 models, one co-listed as *Base* | ***model_stock:*** 3+ models, one co-listed as *Base*")
             with gr.Row(variant="compact"):
                 t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
         # --- 8. MoEr ---
         with gr.Tab("MoEr") as t8_tab:
+            gr.Markdown("##### **Mixture of Experts**: fuses self-attention & normalization layers from *Base* w/MLP layers from *Experts*")
+            gr.Markdown("See [*MergeKit MoE doc*](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
             with gr.Row(variant="compact"):
                 t8_token = gr.Textbox(label="Token", type="password", scale=1)
                  t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
             with gr.Row(variant="compact"):
                 t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
+            gr.Markdown("Built-in **Chat Templates**: *alpaca, chatml, llama3, mistral, exaone, auto (default)*")
+            gr.Markdown("#### Experts *(At least 2 required. **Prompts** must be comma-separated.)*")
             with gr.Row(variant="compact"): t8_expert1 = gr.Textbox(label="Expert 1", placeholder="org/expert1", scale=2); t8_prompt1 = gr.Textbox(label="Positive Prompts", placeholder="math, reasoning, logic", scale=3)
             with gr.Row(variant="compact"): t8_expert2 = gr.Textbox(label="Expert 2", placeholder="org/expert2", scale=2); t8_prompt2 = gr.Textbox(label="Positive Prompts", placeholder="creative, writing, storytelling", scale=3)
             with gr.Accordion("More Experts (3-5)", open=False):
         # --- 9. Rawer ---
         with gr.Tab("Rawer") as t9_tab:
+            gr.Markdown("##### MergeKit Module for merging Raw PyTorch Weights / Non-pipeline-classed Models")
             with gr.Row(variant="compact"):
                 t9_token = gr.Textbox(label="Token", type="password", scale=1)
                 t9_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Merge Method", scale=1)
                 t9_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Config dtype", scale=1)
             t9_models = gr.TextArea(label="Models (one per line)", lines=3)
+            gr.Markdown("**MODELS**: ***passthrough:*** 1 model per [Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md), but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | ***linear:*** takes 2+ models, averages between weights; aka the *Model Soup* method")
             with gr.Row(variant="compact"):
                 t9_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t9_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision")
             with gr.Row(variant="compact"):
+                t9_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t9_chat = gr.Textbox(label="Chat Template", placeholder="auto")
+            gr.Markdown("Built-in Chat Templates: *alpaca, chatml, llama3, mistral, exaone, auto*")
+            gr.Markdown("See [**MergeKit Merge Method Docs**](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
             with gr.Row(variant="compact"):
                 t9_out = gr.Textbox(label="Output Repo", scale=3); t9_priv = gr.Checkbox(label="Private", value=True, scale=1)
             t9_btn = gr.Button("Merge Raw", variant="primary")
         # --- 10. Mario,DARE! ---
         with gr.Tab("Mario,Dare!") as t10_tab:
+            gr.Markdown("##### Model-Agnostic Implementation of DARE (Drop And REscale)")
+            gr.Markdown("Adapted from the [sft-merger by Martyn Garcia](https://github.com/martyn)")
             t10_token = gr.Textbox(label="Token", type="password")
             gr.Markdown(