Soon_Merger_Tools

Sleeping

App Files Files Community

AlekseyCalvin commited on Jan 4

Commit

a6d1a0c

verified ·

1 Parent(s): a20e358

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -37

app.py CHANGED Viewed

@@ -18,11 +18,12 @@ from huggingface_hub import HfApi, hf_hub_download, list_repo_files, login
 from safetensors.torch import load_file, save_file
 from tqdm import tqdm
-# --- Essential Imports (No try-except blocks to ensure visibility of errors) ---
 from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
 from mergekit.config import MergeConfiguration
-# --- Constants ---
 try:
     TempDir = Path("/tmp/temp_tool")
     os.makedirs(TempDir, exist_ok=True)
@@ -135,7 +136,7 @@ def get_key_stem(key):
     return key
 # =================================================================================
-# TABS 1-4 LOGIC (Legacy Python Implementation)
 # =================================================================================
 class MemoryEfficientSafeOpen:
@@ -203,8 +204,7 @@ def task_merge_legacy(hf_token, base, sub, lora, scale, prec, shard, out, struct
     except Exception as e: return f"Error: {e}"
     if struct_s:
         try:
-            files = api.list_repo_files(repo_id=struct_s, token=hf_token)
-            for f in tqdm(files, desc="Copying Structure"):
                 if sub and f.startswith(sub): continue
                 if not sub and any(f.endswith(x) for x in ['.safetensors', '.bin', '.pt', '.pth']): continue
                 l = hf_hub_download(repo_id=struct_s, filename=f, token=hf_token, local_dir=TempDir)
@@ -418,7 +418,7 @@ def task_resize(hf_token, lora_input, new_rank, dynamic_method, dynamic_param, o
     return "Done"
 # =================================================================================
-# MERGEKIT & LOGSVIEW (TABS 5-9) - FIXED CLI LOGIC
 # =================================================================================
 def parse_weight(w_str):
@@ -432,14 +432,17 @@ def run_mergekit_logic(config_dict, token, out_repo, private, shard_size, output
     runner = LogsViewRunner()
     cleanup_temp()
-    # 1. Validation
     try:
-        MergeConfiguration.model_validate(config_dict)
     except Exception as e:
         yield runner.log(f"Invalid Config: {e}", level="ERROR")
         return
-    # 2. Auth & Config Save
     if token:
         login(token.strip())
         os.environ["HF_TOKEN"] = token.strip()
@@ -447,10 +450,6 @@ def run_mergekit_logic(config_dict, token, out_repo, private, shard_size, output
     if "dtype" not in config_dict: config_dict["dtype"] = output_precision
     if "tokenizer_source" not in config_dict and tokenizer_source != "base":
         config_dict["tokenizer_source"] = tokenizer_source
-    # Add chat_template if not empty
-    if chat_template and chat_template.strip():
-        config_dict["chat_template"] = chat_template.strip()
     config_path = TempDir / "config.yaml"
     with open(config_path, "w") as f: yaml.dump(config_dict, f, sort_keys=False)
@@ -458,7 +457,6 @@ def run_mergekit_logic(config_dict, token, out_repo, private, shard_size, output
     yield runner.log(f"Config saved to {config_path}")
     yield runner.log(f"YAML:\n{yaml.dump(config_dict, sort_keys=False)}")
-    # 3. Create Repo
     try:
         api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=token)
         yield runner.log(f"Repo {out_repo} ready.")
@@ -466,9 +464,7 @@ def run_mergekit_logic(config_dict, token, out_repo, private, shard_size, output
         yield runner.log(f"Repo Error: {e}", level="ERROR")
         return
-    # 4. Execution
     out_path = TempDir / "merge_output"
     shard_arg = f"{int(float(shard_size) * 1024)}M"
     cmd = [
@@ -494,7 +490,6 @@ def run_mergekit_logic(config_dict, token, out_repo, private, shard_size, output
         yield runner.log("Merge failed.", level="ERROR")
         return
-    # 5. Upload
     yield runner.log(f"Uploading to {out_repo}...")
     yield from runner.run_python(api.upload_folder, repo_id=out_repo, folder_path=out_path)
     yield runner.log("Upload Complete!")
@@ -513,23 +508,27 @@ def wrapper_amphinterpolative(token, method, base, t, norm, i8, flat, row, eps,
     if method in ["slerp", "nuslerp"]:
         if not base.strip(): yield runner.log("Error: Base model required", level="ERROR"); return
         config["base_model"] = base.strip()
-        sources = []
-        for m, w in [(m1,w1), (m2,w2)]:
-            if m.strip(): sources.append({"model": m, "parameters": {"weight": parse_weight(w)}})
         config["slices"] = [{"sources": sources, "parameters": params}]
     else:
         if base.strip() and method == "multislerp": config["base_model"] = base.strip()
-        models = []
-        for m, w in [(m1, w1), (m2, w2), (m3, w3), (m4, w4), (m5, w5)]:
-            if m.strip(): models.append({"model": m, "parameters": {"weight": parse_weight(w)}})
         config["models"] = models
         config["parameters"] = params
     yield from run_mergekit_logic(config, token, out, priv, shard, prec, tok_src, chat_t, program="mergekit-yaml")
 def wrapper_stirtie(token, method, base, norm, i8, lamb, resc, topk, m1, w1, d1, g1, e1, m2, w2, d2, g2, e2, m3, w3, d3, g3, e3, m4, w4, d4, g4, e4, out, priv, shard, prec, tok_src, chat_t):
     models = []
-    for m, w, d, g, e in [(m1,w1,d1,g1,e1), (m2,w2,d2,g2,e2), (m3,w3,d3,g3,e3), (m4,w4,d4,g4,e4)]:
         if not m.strip(): continue
         p = {"weight": parse_weight(w)}
         if method in ["ties", "dare_ties", "dare_linear", "breadcrumbs_ties"]: p["density"] = parse_weight(d)
@@ -553,13 +552,11 @@ def wrapper_stirtie(token, method, base, norm, i8, lamb, resc, topk, m1, w1, d1,
 def wrapper_specious(token, method, base, norm, i8, t, filt_w, m1, w1, f1, m2, w2, m3, w3, m4, w4, m5, w5, out, priv, shard, prec, tok_src, chat_t):
     models = []
     if method == "passthrough":
-        if not m1.strip(): yield runner.log("Error: Model 1 required", level="ERROR"); return
         p = {"weight": parse_weight(w1)}
         if f1.strip(): p["filter"] = f1.strip()
         models.append({"model": m1, "parameters": p})
     else:
-        for m, w in [(m1,w1), (m2,w2), (m3,w3), (m4,w4), (m5,w5)]:
-            if m.strip(): models.append({"model": m, "parameters": {"weight": parse_weight(w)}})
     config = {"merge_method": method, "parameters": {"normalize": norm, "int8_mask": i8}}
     if base.strip(): config["base_model"] = base.strip()
@@ -577,12 +574,13 @@ def wrapper_moer(token, base, experts, gate, dtype, out, priv, shard, prec, tok_
         "dtype": dtype,
         "experts": formatted
     }
     yield from run_mergekit_logic(config, token, out, priv, shard, prec, tok_src, chat_t, program="mergekit-moe")
 def wrapper_rawer(token, models, method, dtype, out, priv, shard, prec, tok_src, chat_t):
-    m_list = [m.strip() for m in models.split('\n') if m.strip()]
     config = {
-        "models": [{"model": m, "parameters": {"weight": 1.0}} for m in m_list],
         "merge_method": method,
         "dtype": dtype
     }
@@ -680,7 +678,7 @@ with gr.Blocks() as demo:
             t4_out = gr.Textbox(label="Output")
             gr.Button("Resize").click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], gr.Textbox(label="Result"))
-        # --- TAB 5: Amphinterpolative ---
         with gr.Tab("Amphinterpolative"):
             gr.Markdown("### Spherical Interpolation Family")
             t5_token = gr.Textbox(label="HF Token", type="password")
@@ -703,7 +701,7 @@ with gr.Blocks() as demo:
             t5_out = gr.Textbox(label="Output Repo"); t5_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Execute").click(wrapper_amphinterpolative, [t5_token, t5_method, t5_base, t5_t, t5_norm, t5_i8, t5_flat, t5_row, t5_eps, t5_iter, t5_tol, m1, w1, m2, w2, m3, w3, m4, w4, m5, w5, t5_out, t5_priv, t5_shard, t5_prec, t5_tok, t5_chat], LogsView())
-        # --- TAB 6: Stir/Tie Bases ---
         with gr.Tab("Stir/Tie Bases"):
             gr.Markdown("### Task Vector Family")
             t6_token = gr.Textbox(label="Token", type="password")
@@ -716,10 +714,14 @@ with gr.Blocks() as demo:
             m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
             with gr.Accordion("More", open=False):
                 m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
             t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
-            gr.Button("Execute").click(wrapper_stirtie, [t6_token, t6_method, t6_base, t6_norm, t6_i8, t6_lamb, t6_resc, t6_topk, m1_6, w1_6, d1_6, g1_6, e1_6, m2_6, w2_6, d2_6, g2_6, e2_6, t6_out, t6_priv, t6_shard, t6_prec, t6_tok, t6_chat], LogsView())
-        # --- TAB 7: Specious ---
         with gr.Tab("Specious"):
             gr.Markdown("### Specialized Methods")
             t7_token = gr.Textbox(label="Token", type="password")
@@ -736,7 +738,7 @@ with gr.Blocks() as demo:
             t7_out = gr.Textbox(label="Output Repo"); t7_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Execute").click(wrapper_specious, [t7_token, t7_method, t7_base, t7_norm, t7_i8, t7_t, t7_filt_w, m1_7, w1_7, f1_7, m2_7, w2_7, m3_7, w3_7, m4_7, w4_7, m5_7, w5_7, t7_out, t7_priv, t7_shard, t7_prec, t7_tok, t7_chat], LogsView())
-        # --- TAB 8: MoEr ---
         with gr.Tab("MoEr"):
             gr.Markdown("### Mixture of Experts")
             t8_token = gr.Textbox(label="Token", type="password")
@@ -746,7 +748,7 @@ with gr.Blocks() as demo:
             t8_out = gr.Textbox(label="Output Repo"); t8_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Build MoE").click(wrapper_moer, [t8_token, t8_base, t8_experts, t8_gate, t8_dtype, t8_out, t8_priv, t8_shard, t8_prec, t8_tok, t8_chat], LogsView())
-        # --- TAB 9: Rawer ---
         with gr.Tab("Rawer"):
             gr.Markdown("### Raw PyTorch / Non-Transformer")
             t9_token = gr.Textbox(label="Token", type="password"); t9_models = gr.TextArea(label="Models (one per line)")
@@ -756,9 +758,8 @@ with gr.Blocks() as demo:
             t9_out = gr.Textbox(label="Output Repo"); t9_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Merge Raw").click(wrapper_rawer, [t9_token, t9_models, t9_method, t9_dtype, t9_out, t9_priv, t9_shard, t9_prec, t9_tok, t9_chat], LogsView())
-        # --- TAB 10: Mario,DARE! ---
         with gr.Tab("Mario,DARE!"):
-            gr.Markdown("### From sft-merger by [Martyn Garcia](https://github.com/martyn)")
             t10_token = gr.Textbox(label="Token", type="password")
             with gr.Row():
                 t10_base = gr.Textbox(label="Base Model"); t10_ft = gr.Textbox(label="Fine-Tuned Model")

 from safetensors.torch import load_file, save_file
 from tqdm import tqdm
+# --- Essential Imports ---
+# No try/except block here. If these fail, the app should error out visibly rather than freeze.
 from gradio_logsview.logsview import Log, LogsView, LogsViewRunner
 from mergekit.config import MergeConfiguration
+# --- Constants & Setup ---
 try:
     TempDir = Path("/tmp/temp_tool")
     os.makedirs(TempDir, exist_ok=True)
     return key
 # =================================================================================
+# TABS 1-4 LOGIC (RESTORED)
 # =================================================================================
 class MemoryEfficientSafeOpen:
     except Exception as e: return f"Error: {e}"
     if struct_s:
         try:
+            for f in api.list_repo_files(repo_id=struct_s, token=hf_token):
                 if sub and f.startswith(sub): continue
                 if not sub and any(f.endswith(x) for x in ['.safetensors', '.bin', '.pt', '.pth']): continue
                 l = hf_hub_download(repo_id=struct_s, filename=f, token=hf_token, local_dir=TempDir)
     return "Done"
 # =================================================================================
+# MERGEKIT & LOGSVIEW (TABS 5-9)
 # =================================================================================
 def parse_weight(w_str):
     runner = LogsViewRunner()
     cleanup_temp()
+    # Empty field handling: Remove keys with empty values recursively if needed, but primarily handle chat_template
+    if chat_template and chat_template.strip():
+        config_dict["chat_template"] = chat_template.strip()
     try:
+        if program != "mergekit-moe":
+            MergeConfiguration.model_validate(config_dict)
     except Exception as e:
         yield runner.log(f"Invalid Config: {e}", level="ERROR")
         return
     if token:
         login(token.strip())
         os.environ["HF_TOKEN"] = token.strip()
     if "dtype" not in config_dict: config_dict["dtype"] = output_precision
     if "tokenizer_source" not in config_dict and tokenizer_source != "base":
         config_dict["tokenizer_source"] = tokenizer_source
     config_path = TempDir / "config.yaml"
     with open(config_path, "w") as f: yaml.dump(config_dict, f, sort_keys=False)
     yield runner.log(f"Config saved to {config_path}")
     yield runner.log(f"YAML:\n{yaml.dump(config_dict, sort_keys=False)}")
     try:
         api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=token)
         yield runner.log(f"Repo {out_repo} ready.")
         yield runner.log(f"Repo Error: {e}", level="ERROR")
         return
     out_path = TempDir / "merge_output"
     shard_arg = f"{int(float(shard_size) * 1024)}M"
     cmd = [
         yield runner.log("Merge failed.", level="ERROR")
         return
     yield runner.log(f"Uploading to {out_repo}...")
     yield from runner.run_python(api.upload_folder, repo_id=out_repo, folder_path=out_path)
     yield runner.log("Upload Complete!")
     if method in ["slerp", "nuslerp"]:
         if not base.strip(): yield runner.log("Error: Base model required", level="ERROR"); return
         config["base_model"] = base.strip()
+        sources = [{"model": m, "parameters": {"weight": parse_weight(w)}} for m, w in [(m1,w1), (m2,w2)] if m.strip()]
         config["slices"] = [{"sources": sources, "parameters": params}]
     else:
         if base.strip() and method == "multislerp": config["base_model"] = base.strip()
+        models = [{"model": m, "parameters": {"weight": parse_weight(w)}} for m, w in [(m1,w1), (m2,w2), (m3,w3), (m4,w4), (m5,w5)] if m.strip()]
         config["models"] = models
         config["parameters"] = params
     yield from run_mergekit_logic(config, token, out, priv, shard, prec, tok_src, chat_t, program="mergekit-yaml")
 def wrapper_stirtie(token, method, base, norm, i8, lamb, resc, topk, m1, w1, d1, g1, e1, m2, w2, d2, g2, e2, m3, w3, d3, g3, e3, m4, w4, d4, g4, e4, out, priv, shard, prec, tok_src, chat_t):
+    # Fix: This wrapper was causing the freeze due to mismatched arguments.
+    # It must handle m3 and m4 inputs correctly.
     models = []
+    # Loop over the 4 models defined in UI
+    for m, w, d, g, e in [
+        (m1, w1, d1, g1, e1),
+        (m2, w2, d2, g2, e2),
+        (m3, w3, d3, g3, e3),
+        (m4, w4, d4, g4, e4)
+    ]:
         if not m.strip(): continue
         p = {"weight": parse_weight(w)}
         if method in ["ties", "dare_ties", "dare_linear", "breadcrumbs_ties"]: p["density"] = parse_weight(d)
 def wrapper_specious(token, method, base, norm, i8, t, filt_w, m1, w1, f1, m2, w2, m3, w3, m4, w4, m5, w5, out, priv, shard, prec, tok_src, chat_t):
     models = []
     if method == "passthrough":
         p = {"weight": parse_weight(w1)}
         if f1.strip(): p["filter"] = f1.strip()
         models.append({"model": m1, "parameters": p})
     else:
+        models = [{"model": m, "parameters": {"weight": parse_weight(w)}} for m, w in [(m1,w1), (m2,w2), (m3,w3), (m4,w4), (m5,w5)] if m.strip()]
     config = {"merge_method": method, "parameters": {"normalize": norm, "int8_mask": i8}}
     if base.strip(): config["base_model"] = base.strip()
         "dtype": dtype,
         "experts": formatted
     }
+    # Uses mergekit-moe CLI
     yield from run_mergekit_logic(config, token, out, priv, shard, prec, tok_src, chat_t, program="mergekit-moe")
 def wrapper_rawer(token, models, method, dtype, out, priv, shard, prec, tok_src, chat_t):
+    models_list = [{"model": m.strip(), "parameters": {"weight": 1.0}} for m in models.split('\n') if m.strip()]
     config = {
+        "models": models_list,
         "merge_method": method,
         "dtype": dtype
     }
             t4_out = gr.Textbox(label="Output")
             gr.Button("Resize").click(task_resize, [t4_token, t4_in, t4_rank, t4_method, t4_param, t4_out], gr.Textbox(label="Result"))
+        # --- TAB 5 ---
         with gr.Tab("Amphinterpolative"):
             gr.Markdown("### Spherical Interpolation Family")
             t5_token = gr.Textbox(label="HF Token", type="password")
             t5_out = gr.Textbox(label="Output Repo"); t5_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Execute").click(wrapper_amphinterpolative, [t5_token, t5_method, t5_base, t5_t, t5_norm, t5_i8, t5_flat, t5_row, t5_eps, t5_iter, t5_tol, m1, w1, m2, w2, m3, w3, m4, w4, m5, w5, t5_out, t5_priv, t5_shard, t5_prec, t5_tok, t5_chat], LogsView())
+        # --- TAB 6 ---
         with gr.Tab("Stir/Tie Bases"):
             gr.Markdown("### Task Vector Family")
             t6_token = gr.Textbox(label="Token", type="password")
             m1_6, w1_6 = gr.Textbox(label="Model 1"), gr.Textbox(label="Weight 1", value="1.0"); d1_6, g1_6, e1_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
             with gr.Accordion("More", open=False):
                 m2_6, w2_6 = gr.Textbox(label="Model 2"), gr.Textbox(label="Weight 2", value="1.0"); d2_6, g2_6, e2_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
+                # FIX: Added Missing UI components for models 3 & 4
+                m3_6, w3_6 = gr.Textbox(label="Model 3"), gr.Textbox(label="Weight 3", value="1.0"); d3_6, g3_6, e3_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
+                m4_6, w4_6 = gr.Textbox(label="Model 4"), gr.Textbox(label="Weight 4", value="1.0"); d4_6, g4_6, e4_6 = gr.Textbox(label="Density", value="1.0"), gr.Number(label="Gamma", value=0.01), gr.Number(label="Epsilon", value=0.15)
             t6_out = gr.Textbox(label="Output Repo"); t6_priv = gr.Checkbox(label="Private", value=True)
+            # FIX: Included all model variables in input list
+            gr.Button("Execute").click(wrapper_stirtie, [t6_token, t6_method, t6_base, t6_norm, t6_i8, t6_lamb, t6_resc, t6_topk, m1_6, w1_6, d1_6, g1_6, e1_6, m2_6, w2_6, d2_6, g2_6, e2_6, m3_6, w3_6, d3_6, g3_6, e3_6, m4_6, w4_6, d4_6, g4_6, e4_6, t6_out, t6_priv, t6_shard, t6_prec, t6_tok, t6_chat], LogsView())
+        # --- TAB 7 ---
         with gr.Tab("Specious"):
             gr.Markdown("### Specialized Methods")
             t7_token = gr.Textbox(label="Token", type="password")
             t7_out = gr.Textbox(label="Output Repo"); t7_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Execute").click(wrapper_specious, [t7_token, t7_method, t7_base, t7_norm, t7_i8, t7_t, t7_filt_w, m1_7, w1_7, f1_7, m2_7, w2_7, m3_7, w3_7, m4_7, w4_7, m5_7, w5_7, t7_out, t7_priv, t7_shard, t7_prec, t7_tok, t7_chat], LogsView())
+        # --- TAB 8 (MoEr) ---
         with gr.Tab("MoEr"):
             gr.Markdown("### Mixture of Experts")
             t8_token = gr.Textbox(label="Token", type="password")
             t8_out = gr.Textbox(label="Output Repo"); t8_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Build MoE").click(wrapper_moer, [t8_token, t8_base, t8_experts, t8_gate, t8_dtype, t8_out, t8_priv, t8_shard, t8_prec, t8_tok, t8_chat], LogsView())
+        # --- TAB 9 (Rawer) ---
         with gr.Tab("Rawer"):
             gr.Markdown("### Raw PyTorch / Non-Transformer")
             t9_token = gr.Textbox(label="Token", type="password"); t9_models = gr.TextArea(label="Models (one per line)")
             t9_out = gr.Textbox(label="Output Repo"); t9_priv = gr.Checkbox(label="Private", value=True)
             gr.Button("Merge Raw").click(wrapper_rawer, [t9_token, t9_models, t9_method, t9_dtype, t9_out, t9_priv, t9_shard, t9_prec, t9_tok, t9_chat], LogsView())
+        # --- TAB 10 ---
         with gr.Tab("Mario,DARE!"):
             t10_token = gr.Textbox(label="Token", type="password")
             with gr.Row():
                 t10_base = gr.Textbox(label="Base Model"); t10_ft = gr.Textbox(label="Fine-Tuned Model")