Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -661,9 +661,6 @@ def task_mergekit_weighted(hf_token, models_text, method, dtype, base_model, wei
|
|
| 661 |
except Exception as e:
|
| 662 |
return f"Error: {e}"
|
| 663 |
|
| 664 |
-
# =================================================================================
|
| 665 |
-
# TAB 6: INTERPOLATION (Slerp, Task Arithmetic)
|
| 666 |
-
# =================================================================================
|
| 667 |
|
| 668 |
def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val, dtype, out_repo, private):
|
| 669 |
cleanup_temp()
|
|
@@ -721,10 +718,6 @@ def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val,
|
|
| 721 |
except Exception as e:
|
| 722 |
return f"Error: {e}"
|
| 723 |
|
| 724 |
-
# =================================================================================
|
| 725 |
-
# TAB 7: MOE (Mixture of Experts)
|
| 726 |
-
# =================================================================================
|
| 727 |
-
|
| 728 |
def task_mergekit_moe(hf_token, base_model, experts_text, gate_mode, dtype, out_repo, private):
|
| 729 |
cleanup_temp()
|
| 730 |
if not hf_token: return "Error: Token required"
|
|
@@ -785,10 +778,6 @@ def task_raw_merge(hf_token, models_text, method, dtype, out_repo, private):
|
|
| 785 |
except Exception as e:
|
| 786 |
return f"Error: {e}"
|
| 787 |
|
| 788 |
-
# =================================================================================
|
| 789 |
-
# TAB 9: DARE SOONR (Custom Python Implementation)
|
| 790 |
-
# =================================================================================
|
| 791 |
-
|
| 792 |
def task_dare_soonr(hf_token, base_model, ft_model, ratio, mask_rate, out_repo, private):
|
| 793 |
# Ported from the requested DARE-MERGE-SOONR implementation
|
| 794 |
cleanup_temp()
|
|
@@ -866,8 +855,7 @@ with gr.Blocks() as demo:
|
|
| 866 |
gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
|
| 867 |
|
| 868 |
with gr.Tabs():
|
| 869 |
-
|
| 870 |
-
with gr.Tab("Merge to Base Model + Reshard Output"):
|
| 871 |
with gr.Row():
|
| 872 |
t1_token = gr.Textbox(label="Token", type="password")
|
| 873 |
with gr.Row():
|
|
@@ -939,11 +927,9 @@ with gr.Blocks() as demo:
|
|
| 939 |
t4_btn = gr.Button("Resize")
|
| 940 |
t4_res = gr.Textbox(label="Result")
|
| 941 |
t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], t4_res)
|
| 942 |
-
|
| 943 |
-
# --- NEW TABS (5-9) USING CLI & IN-LINED LOGIC ---
|
| 944 |
|
| 945 |
-
with gr.Tab("
|
| 946 |
-
gr.Markdown("### Linear,
|
| 947 |
t5_token = gr.Textbox(label="HF Token", type="password")
|
| 948 |
with gr.Row():
|
| 949 |
t5_method = gr.Dropdown(["linear", "ties", "dare_ties", "dare_linear", "model_stock"], value="linear", label="Method")
|
|
@@ -961,7 +947,7 @@ with gr.Blocks() as demo:
|
|
| 961 |
t5_res = gr.Textbox(label="Result")
|
| 962 |
t5_btn.click(task_mergekit_weighted, [t5_token, t5_models, t5_method, t5_dtype, t5_base, t5_weights, t5_density, t5_norm, t5_out, t5_priv], t5_res)
|
| 963 |
|
| 964 |
-
with gr.Tab("
|
| 965 |
gr.Markdown("### Slerp, Task Arithmetic, NuSlerp")
|
| 966 |
t6_token = gr.Textbox(label="HF Token", type="password")
|
| 967 |
with gr.Row():
|
|
@@ -978,7 +964,7 @@ with gr.Blocks() as demo:
|
|
| 978 |
t6_res = gr.Textbox(label="Result")
|
| 979 |
t6_btn.click(task_mergekit_interp, [t6_token, t6_model_a, t6_model_b, t6_base, t6_method, t6_t, t6_dtype, t6_out, t6_priv], t6_res)
|
| 980 |
|
| 981 |
-
with gr.Tab("
|
| 982 |
gr.Markdown("### Mixture of Experts Construction")
|
| 983 |
t7_token = gr.Textbox(label="HF Token", type="password")
|
| 984 |
t7_base = gr.Textbox(label="Base Model")
|
|
@@ -992,20 +978,20 @@ with gr.Blocks() as demo:
|
|
| 992 |
t7_res = gr.Textbox(label="Result")
|
| 993 |
t7_btn.click(task_mergekit_moe, [t7_token, t7_base, t7_experts, t7_gate, t7_dtype, t7_out, t7_priv], t7_res)
|
| 994 |
|
| 995 |
-
with gr.Tab("
|
| 996 |
-
gr.Markdown("###
|
| 997 |
t8_token = gr.Textbox(label="HF Token", type="password")
|
| 998 |
t8_models = gr.TextArea(label="Models (one per line)")
|
| 999 |
t8_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Method")
|
| 1000 |
-
t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="
|
| 1001 |
t8_out = gr.Textbox(label="Output Repo")
|
| 1002 |
t8_priv = gr.Checkbox(label="Private", value=True)
|
| 1003 |
-
t8_btn = gr.Button("Merge
|
| 1004 |
t8_res = gr.Textbox(label="Result")
|
| 1005 |
t8_btn.click(task_raw_merge, [t8_token, t8_models, t8_method, t8_dtype, t8_out, t8_priv], t8_res)
|
| 1006 |
|
| 1007 |
-
with gr.Tab("
|
| 1008 |
-
gr.Markdown("###
|
| 1009 |
t9_token = gr.Textbox(label="HF Token", type="password")
|
| 1010 |
with gr.Row():
|
| 1011 |
t9_base = gr.Textbox(label="Base Model")
|
|
|
|
| 661 |
except Exception as e:
|
| 662 |
return f"Error: {e}"
|
| 663 |
|
|
|
|
|
|
|
|
|
|
| 664 |
|
| 665 |
def task_mergekit_interp(hf_token, model_a, model_b, base_model, method, t_val, dtype, out_repo, private):
|
| 666 |
cleanup_temp()
|
|
|
|
| 718 |
except Exception as e:
|
| 719 |
return f"Error: {e}"
|
| 720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 721 |
def task_mergekit_moe(hf_token, base_model, experts_text, gate_mode, dtype, out_repo, private):
|
| 722 |
cleanup_temp()
|
| 723 |
if not hf_token: return "Error: Token required"
|
|
|
|
| 778 |
except Exception as e:
|
| 779 |
return f"Error: {e}"
|
| 780 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
def task_dare_soonr(hf_token, base_model, ft_model, ratio, mask_rate, out_repo, private):
|
| 782 |
# Ported from the requested DARE-MERGE-SOONR implementation
|
| 783 |
cleanup_temp()
|
|
|
|
| 855 |
gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
|
| 856 |
|
| 857 |
with gr.Tabs():
|
| 858 |
+
with gr.Tab("Merge into Base Model"):
|
|
|
|
| 859 |
with gr.Row():
|
| 860 |
t1_token = gr.Textbox(label="Token", type="password")
|
| 861 |
with gr.Row():
|
|
|
|
| 927 |
t4_btn = gr.Button("Resize")
|
| 928 |
t4_res = gr.Textbox(label="Result")
|
| 929 |
t4_btn.click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], t4_res)
|
|
|
|
|
|
|
| 930 |
|
| 931 |
+
with gr.Tab("Stir/Tie Bases"):
|
| 932 |
+
gr.Markdown("### Linear, TIES, dare-TIES, Model Stock")
|
| 933 |
t5_token = gr.Textbox(label="HF Token", type="password")
|
| 934 |
with gr.Row():
|
| 935 |
t5_method = gr.Dropdown(["linear", "ties", "dare_ties", "dare_linear", "model_stock"], value="linear", label="Method")
|
|
|
|
| 947 |
t5_res = gr.Textbox(label="Result")
|
| 948 |
t5_btn.click(task_mergekit_weighted, [t5_token, t5_models, t5_method, t5_dtype, t5_base, t5_weights, t5_density, t5_norm, t5_out, t5_priv], t5_res)
|
| 949 |
|
| 950 |
+
with gr.Tab("Amphinterpolative"):
|
| 951 |
gr.Markdown("### Slerp, Task Arithmetic, NuSlerp")
|
| 952 |
t6_token = gr.Textbox(label="HF Token", type="password")
|
| 953 |
with gr.Row():
|
|
|
|
| 964 |
t6_res = gr.Textbox(label="Result")
|
| 965 |
t6_btn.click(task_mergekit_interp, [t6_token, t6_model_a, t6_model_b, t6_base, t6_method, t6_t, t6_dtype, t6_out, t6_priv], t6_res)
|
| 966 |
|
| 967 |
+
with gr.Tab("MoEr"):
|
| 968 |
gr.Markdown("### Mixture of Experts Construction")
|
| 969 |
t7_token = gr.Textbox(label="HF Token", type="password")
|
| 970 |
t7_base = gr.Textbox(label="Base Model")
|
|
|
|
| 978 |
t7_res = gr.Textbox(label="Result")
|
| 979 |
t7_btn.click(task_mergekit_moe, [t7_token, t7_base, t7_experts, t7_gate, t7_dtype, t7_out, t7_priv], t7_res)
|
| 980 |
|
| 981 |
+
with gr.Tab("Rawer"):
|
| 982 |
+
gr.Markdown("### Raw PyTorch MergeKit / Non-pipeline-classed")
|
| 983 |
t8_token = gr.Textbox(label="HF Token", type="password")
|
| 984 |
t8_models = gr.TextArea(label="Models (one per line)")
|
| 985 |
t8_method = gr.Dropdown(["linear", "passthrough"], value="linear", label="Method")
|
| 986 |
+
t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="dtype")
|
| 987 |
t8_out = gr.Textbox(label="Output Repo")
|
| 988 |
t8_priv = gr.Checkbox(label="Private", value=True)
|
| 989 |
+
t8_btn = gr.Button("Merge")
|
| 990 |
t8_res = gr.Textbox(label="Result")
|
| 991 |
t8_btn.click(task_raw_merge, [t8_token, t8_models, t8_method, t8_dtype, t8_out, t8_priv], t8_res)
|
| 992 |
|
| 993 |
+
with gr.Tab("Mario,DARE!"):
|
| 994 |
+
gr.Markdown("### From sft-merger by [Martyn Garcia](https://github.com/martyn)")
|
| 995 |
t9_token = gr.Textbox(label="HF Token", type="password")
|
| 996 |
with gr.Row():
|
| 997 |
t9_base = gr.Textbox(label="Base Model")
|