AlekseyCalvin commited on
Commit
9e6dd64
·
verified ·
1 Parent(s): 8093971

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -735,6 +735,7 @@ with gr.Blocks() as demo:
735
  t5_norm = gr.Checkbox(label="Normalize Weights", value=True); t5_i8 = gr.Checkbox(label="Int8 Mask", value=False); t5_flat = gr.Checkbox(label="Flatten Tensors (NuSlerp)", value=False); t5_row = gr.Checkbox(label="Row Wise (NuSlerp)", value=False)
736
  with gr.Row():
737
  t5_eps = gr.Textbox(label="eps (Stabilization Constant) (MultiSlerp)", value="1e-8"); t5_iter = gr.Number(label="Max Iterations (Karcher)", value=10); t5_tol = gr.Textbox(label="tol (Convergence Tolerance) (Karcher)", value="1e-5")
 
738
  m1, w1 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); m2, w2 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0")
739
  with gr.Accordion("More", open=False):
740
  m3, w3 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); m4, w4 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); m5, w5 = gr.Textbox(label="Model 5"), gr.Textbox(label="Weight 5", value="1.0")
@@ -751,12 +752,16 @@ with gr.Blocks() as demo:
751
  gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
752
  with gr.Row():
753
  t6_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
 
754
  t6_base = gr.Textbox(label="Base Model (required)")
 
 
 
755
  with gr.Row():
756
  t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True); t6_lamb = gr.Number(label="Lambda", value=1.0); t6_topk = gr.Slider(0, 1, 1.0, label="Select TopK (SCE)")
757
- m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
 
758
  with gr.Accordion("More", open=False):
759
- m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
760
  m3_6, w3_6 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); d3_6, g3_6, e3_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
761
  m4_6, w4_6 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); d4_6, g4_6, e4_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
762
  t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
@@ -772,11 +777,10 @@ with gr.Blocks() as demo:
772
  gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
773
  with gr.Row():
774
  t7_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
775
-
776
  t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
777
 
778
- gr.Markdown("#### Models")
779
- gr.Markdown("**passthrough:** 1 model | **nearswap/arcee_fusion:** 2 models | **model_stock:** 3+ models")
780
 
781
  with gr.Row():
782
  t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
@@ -791,13 +795,13 @@ with gr.Blocks() as demo:
791
 
792
  # --- TAB 8 (MoEr) ---
793
  with gr.Tab("MoEr"):
794
- gr.Markdown("### Mixture of Experts")
795
  gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
796
 
797
  t8_token = gr.Textbox(label="Token", type="password")
798
  with gr.Row():
799
  t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
800
- t8_base = gr.Textbox(label="Base Model"); t8_experts = gr.TextArea(label="Experts List"); t8_gate = gr.Dropdown(["cheap_embed", "random", "hidden"], value="cheap_embed", label="Gate Mode"); t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
801
  gr.Markdown("#### Experts (at least 2 required)")
802
  gr.Markdown("Prompts are comma-separated descriptors for each expert")
803
 
 
735
  t5_norm = gr.Checkbox(label="Normalize Weights", value=True); t5_i8 = gr.Checkbox(label="Int8 Mask", value=False); t5_flat = gr.Checkbox(label="Flatten Tensors (NuSlerp)", value=False); t5_row = gr.Checkbox(label="Row Wise (NuSlerp)", value=False)
736
  with gr.Row():
737
  t5_eps = gr.Textbox(label="eps (Stabilization Constant) (MultiSlerp)", value="1e-8"); t5_iter = gr.Number(label="Max Iterations (Karcher)", value=10); t5_tol = gr.Textbox(label="tol (Convergence Tolerance) (Karcher)", value="1e-5")
738
+ gr.Markdown("#### MODELS: **slerp:** 2 models exactly, one of the 2 also listed as *Base* | **nuslerp:** 2 models exactly; *Base*: optional | **multislerp:** 2+ models; *Base*: optional | **karcher:** 2+ models; *Base*: none")
739
  m1, w1 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); m2, w2 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0")
740
  with gr.Accordion("More", open=False):
741
  m3, w3 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); m4, w4 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); m5, w5 = gr.Textbox(label="Model 5"), gr.Textbox(label="Weight 5", value="1.0")
 
752
  gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
753
  with gr.Row():
754
  t6_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t6_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t6_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t6_chat = gr.Textbox(label="Chat Template", placeholder="auto")
755
+ gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
756
  t6_base = gr.Textbox(label="Base Model (required)")
757
+
758
+ gr.Markdown("#### MODELS: These methods all accept **2 or more models**, and require one of these designated as *Base*")
759
+
760
  with gr.Row():
761
  t6_norm = gr.Checkbox(label="Normalize Weights", value=True); t6_i8 = gr.Checkbox(label="Int8 Mask", value=False); t6_resc = gr.Checkbox(label="Rescale (Dare_Linear)", value=True); t6_lamb = gr.Number(label="Lambda", value=1.0); t6_topk = gr.Slider(0, 1, 1.0, label="Select TopK (SCE)")
762
+ m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
763
+ m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
764
  with gr.Accordion("More", open=False):
 
765
  m3_6, w3_6 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); d3_6, g3_6, e3_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
766
  m4_6, w4_6 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); d4_6, g4_6, e4_6 = gr.Textbox(label="Density (DARE/TIES)", value="1.0"), gr.Number(label="Gamma (breadcrumbs)", value=0.01), gr.Number(label="Epsilon (DELLA)", value=0.15)
767
  t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
 
777
  gr.Markdown("See [MergeKit Merge Method Docs](https://github.com/arcee-ai/mergekit/blob/main/docs/merge_methods.md) for more info.")
778
  with gr.Row():
779
  t7_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t7_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t7_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t7_chat = gr.Textbox(label="Chat Template", placeholder="auto")
780
+ gr.Markdown("Built-in **Chat Templates**: alpaca, chatml, llama3, mistral, exaone, auto (default)")
781
  t7_base = gr.Textbox(label="Base Model (required for nearswap/arcee_fusion/model_stock)", placeholder="org/base-model")
782
 
783
+ gr.Markdown("#### MODELS: **passthrough:** 1 model acc. to Docs, but [Examples](https://github.com/arcee-ai/mergekit/tree/main/examples) shows 2+ | **nearswap/arcee_fusion:** 2 models, one also listed as *Base* | **model_stock:** 3+ models, one also listed as *Base*")
 
784
 
785
  with gr.Row():
786
  t7_norm = gr.Checkbox(label="Normalize", value=True); t7_i8 = gr.Checkbox(label="Int8 Mask", value=False); t7_t = gr.Slider(0, 1, 0.5, label="t (Interpolation Ratio, for Nearswap)"); t7_filt_w = gr.Checkbox(label="Filter Wise (for Model_Stock)", value=False)
 
795
 
796
  # --- TAB 8 (MoEr) ---
797
  with gr.Tab("MoEr"):
798
+ gr.Markdown("### Mixture of Experts: fuses self-attention & normalization layers from *Base* w/MLP layers from *Experts*")
799
  gr.Markdown("See [MergeKit MoE doc](https://github.com/arcee-ai/mergekit/blob/main/docs/moe.md) for more info.")
800
 
801
  t8_token = gr.Textbox(label="Token", type="password")
802
  with gr.Row():
803
  t8_shard = gr.Slider(label="Max Shard Size (GB)", value=5.0, minimum=0.5, maximum=20.0); t8_prec = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Precision"); t8_tok = gr.Dropdown(["base", "union", "model:path"], value="base", label="Tokenizer Source"); t8_chat = gr.Textbox(label="Chat Template", placeholder="auto")
804
+ t8_base = gr.Textbox(label="Base Model (Required)"); t8_gate = gr.Dropdown(["cheap_embed", "random", "hidden"], value="cheap_embed", label="Gate Mode"); t8_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Internal Dtype")
805
  gr.Markdown("#### Experts (at least 2 required)")
806
  gr.Markdown("Prompts are comma-separated descriptors for each expert")
807