AlekseyCalvin commited on
Commit
50b030f
·
verified ·
1 Parent(s): db16e05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -25
app.py CHANGED
@@ -661,9 +661,6 @@ def task_mergekit_weighted(hf_token, models_text, method, dtype, base_model, wei
661
  except Exception as e:
662
  return f"Error: {e}"
663
 
664
- # =================================================================================
665
- # TAB 6: INTERPOLATION (Slerp, Task Arithmetic)
666
- # =================================================================================
667
 
668
  def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val, dtype, out_repo, private):
669
  cleanup_temp()
@@ -721,10 +718,6 @@ def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val,
721
  except Exception as e:
722
  return f"Error: {e}"
723
 
724
- # =================================================================================
725
- # TAB 7: MOE (Mixture of Experts)
726
- # =================================================================================
727
-
728
  def task_mergekit_moe(hf_token, base_model, experts_text, gate_mode, dtype, out_repo, private):
729
  cleanup_temp()
730
  if not hf_token: return "Error: Token required"
@@ -785,10 +778,6 @@ def task_raw_merge(hf_token, models_text, method, dtype, out_repo, private):
785
  except Exception as e:
786
  return f"Error: {e}"
787
 
788
- # =================================================================================
789
- # TAB 9: DARE SOONR (Custom Python Implementation)
790
- # =================================================================================
791
-
792
  def task_dare_soonr(hf_token, base_model, ft_model, ratio, mask_rate, out_repo, private):
793
  # Ported from the requested DARE-MERGE-SOONR implementation
794
  cleanup_temp()
@@ -866,8 +855,7 @@ with gr.Blocks() as demo:
866
  gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
867
 
868
  with gr.Tabs():
869
- # --- ORIGINAL TABS 1-4 (PRESERVED EXACTLY) ---
870
- with gr.Tab("Merge to Base Model + Reshard Output"):
871
  with gr.Row():
872
  t1_token = gr.Textbox(label="Token", type="password")
873
  with gr.Row():
@@ -939,11 +927,9 @@ with gr.Blocks() as demo:
939
  t4_btn = gr.Button("Resize")
940
  t4_res = gr.Textbox(label="Result")
941
  t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], t4_res)
942
-
943
- # --- NEW TABS (5-9) USING CLI & IN-LINED LOGIC ---
944
 
945
- with gr.Tab("Tab 5: Weighted & Sparsified"):
946
- gr.Markdown("### Linear, Ties, Dare-Ties, Model Stock")
947
  t5_token = gr.Textbox(label="HF Token", type="password")
948
  with gr.Row():
949
  t5_method = gr.Dropdown(["linear", "ties", "dare_ties", "dare_linear", "model_stock"], value="linear", label="Method")
@@ -961,7 +947,7 @@ with gr.Blocks() as demo:
961
  t5_res = gr.Textbox(label="Result")
962
  t5_btn.click(task_mergekit_weighted, [t5_token, t5_models, t5_method, t5_dtype, t5_base, t5_weights, t5_density, t5_norm, t5_out, t5_priv], t5_res)
963
 
964
- with gr.Tab("Tab 6: Interpolation"):
965
  gr.Markdown("### Slerp, Task Arithmetic, NuSlerp")
966
  t6_token = gr.Textbox(label="HF Token", type="password")
967
  with gr.Row():
@@ -978,7 +964,7 @@ with gr.Blocks() as demo:
978
  t6_res = gr.Textbox(label="Result")
979
  t6_btn.click(task_mergekit_interp, [t6_token, t6_model_a, t6_model_b, t6_base, t6_method, t6_t, t6_dtype, t6_out, t6_priv], t6_res)
980
 
981
- with gr.Tab("Tab 7: MoE"):
982
  gr.Markdown("### Mixture of Experts Construction")
983
  t7_token = gr.Textbox(label="HF Token", type="password")
984
  t7_base = gr.Textbox(label="Base Model")
@@ -992,20 +978,20 @@ with gr.Blocks() as demo:
992
  t7_res = gr.Textbox(label="Result")
993
  t7_btn.click(task_mergekit_moe, [t7_token, t7_base, t7_experts, t7_gate, t7_dtype, t7_out, t7_priv], t7_res)
994
 
995
- with gr.Tab("Tab 8: Raw PyTorch"):
996
- gr.Markdown("### Non-Transformer / Raw Weights")
997
  t8_token = gr.Textbox(label="HF Token", type="password")
998
  t8_models = gr.TextArea(label="Models (one per line)")
999
  t8_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Method")
1000
- t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Dtype")
1001
  t8_out = gr.Textbox(label="Output Repo")
1002
  t8_priv = gr.Checkbox(label="Private", value=True)
1003
- t8_btn = gr.Button("Merge Raw (CLI)")
1004
  t8_res = gr.Textbox(label="Result")
1005
  t8_btn.click(task_raw_merge, [t8_token, t8_models, t8_method, t8_dtype, t8_out, t8_priv], t8_res)
1006
 
1007
- with gr.Tab("Tab 9: DARE SOONR (Custom)"):
1008
- gr.Markdown("### Custom DARE Implementation (No MergeKit)")
1009
  t9_token = gr.Textbox(label="HF Token", type="password")
1010
  with gr.Row():
1011
  t9_base = gr.Textbox(label="Base Model")
 
661
  except Exception as e:
662
  return f"Error: {e}"
663
 
 
 
 
664
 
665
  def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val, dtype, out_repo, private):
666
  cleanup_temp()
 
718
  except Exception as e:
719
  return f"Error: {e}"
720
 
 
 
 
 
721
  def task_mergekit_moe(hf_token, base_model, experts_text, gate_mode, dtype, out_repo, private):
722
  cleanup_temp()
723
  if not hf_token: return "Error: Token required"
 
778
  except Exception as e:
779
  return f"Error: {e}"
780
 
 
 
 
 
781
  def task_dare_soonr(hf_token, base_model, ft_model, ratio, mask_rate, out_repo, private):
782
  # Ported from the requested DARE-MERGE-SOONR implementation
783
  cleanup_temp()
 
855
  gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
856
 
857
  with gr.Tabs():
858
+ with gr.Tab("Merge into Base Model"):
 
859
  with gr.Row():
860
  t1_token = gr.Textbox(label="Token", type="password")
861
  with gr.Row():
 
927
  t4_btn = gr.Button("Resize")
928
  t4_res = gr.Textbox(label="Result")
929
  t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], t4_res)
 
 
930
 
931
+ with gr.Tab("Stir/Tie Bases"):
932
+ gr.Markdown("### Linear, TIES, dare-TIES, Model Stock")
933
  t5_token = gr.Textbox(label="HF Token", type="password")
934
  with gr.Row():
935
  t5_method = gr.Dropdown(["linear", "ties", "dare_ties", "dare_linear", "model_stock"], value="linear", label="Method")
 
947
  t5_res = gr.Textbox(label="Result")
948
  t5_btn.click(task_mergekit_weighted, [t5_token, t5_models, t5_method, t5_dtype, t5_base, t5_weights, t5_density, t5_norm, t5_out, t5_priv], t5_res)
949
 
950
+ with gr.Tab("Amphinterpolative"):
951
  gr.Markdown("### Slerp, Task Arithmetic, NuSlerp")
952
  t6_token = gr.Textbox(label="HF Token", type="password")
953
  with gr.Row():
 
964
  t6_res = gr.Textbox(label="Result")
965
  t6_btn.click(task_mergekit_interp, [t6_token, t6_model_a, t6_model_b, t6_base, t6_method, t6_t, t6_dtype, t6_out, t6_priv], t6_res)
966
 
967
+ with gr.Tab("MoEr"):
968
  gr.Markdown("### Mixture of Experts Construction")
969
  t7_token = gr.Textbox(label="HF Token", type="password")
970
  t7_base = gr.Textbox(label="Base Model")
 
978
  t7_res = gr.Textbox(label="Result")
979
  t7_btn.click(task_mergekit_moe, [t7_token, t7_base, t7_experts, t7_gate, t7_dtype, t7_out, t7_priv], t7_res)
980
 
981
+ with gr.Tab("Rawer"):
982
+ gr.Markdown("### Raw PyTorch MergeKit / Non-pipeline-classed")
983
  t8_token = gr.Textbox(label="HF Token", type="password")
984
  t8_models = gr.TextArea(label="Models (one per line)")
985
  t8_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Method")
986
+ t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="dtype")
987
  t8_out = gr.Textbox(label="Output Repo")
988
  t8_priv = gr.Checkbox(label="Private", value=True)
989
+ t8_btn = gr.Button("Merge")
990
  t8_res = gr.Textbox(label="Result")
991
  t8_btn.click(task_raw_merge, [t8_token, t8_models, t8_method, t8_dtype, t8_out, t8_priv], t8_res)
992
 
993
+ with gr.Tab("Mario,DARE!"):
994
+ gr.Markdown("### From sft-merger by [Martyn Garcia](https://github.com/martyn)")
995
  t9_token = gr.Textbox(label="HF Token", type="password")
996
  with gr.Row():
997
  t9_base = gr.Textbox(label="Base Model")