Soon_Merger_Toolkit

Sleeping

App Files Files Community

AlekseyCalvin commited on Jan 3

Commit

fd17c26

verified ·

1 Parent(s): 0032edf

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -44

app.py CHANGED Viewed

@@ -607,29 +607,29 @@ def task_full_mergekit_merge(hf_token, models_text, method, dtype, base_model, w
 # TAB 6: MOE CREATION
 # =================================================================================
-def task_moe_create(hf_token, base_model, experts_text, prompts_text, gate_mode, dtype, tok_source, shard_size, out_repo, private):
     cleanup_temp()
     if not hf_token or not out_repo: return "Error: Token and Output Repo required."
     login(hf_token.strip())
     experts = [e.strip() for e in experts_text.split('\n') if e.strip()]
     prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
-    # 1. Build Config
     config = build_moe_config(
-        base_model=base_model, experts=experts, prompts=prompts, gate_mode=gate_mode,
-        dtype=dtype, tokenizer_source=tok_source
     )
-    # 2. Execute
     out_path = TempDir / "moe_model"
     try:
         execute_mergekit_config(config, str(out_path), shard_size)
         api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=hf_token)
         api.upload_folder(folder_path=str(out_path), repo_id=out_repo, token=hf_token)
         return f"Success! MoE model uploaded to {out_repo}"
-    except Exception as e:
-        return f"MoE Error: {e}"
 # --- TAB 8: Raw PyTorch (New) ---
 def task_raw_pytorch(hf_token, models_text, method, dtype, base_model, weights, shard_size, out_repo, private):
@@ -662,20 +662,19 @@ with gr.Blocks() as demo:
     gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
     with gr.Tabs():
-        # --- ORIGINAL TABS ---
-        with gr.Tab("Merge to Base + Reshard"):
             with gr.Row():
                 t1_token = gr.Textbox(label="Token", type="password")
-                t1_out = gr.Textbox(label="Output Repo")
             with gr.Row():
                 t1_base = gr.Textbox(label="Base Repo", value="name/repo")
-                t1_sub = gr.Textbox(label="Subfolder", value="")
-            t1_lora = gr.Textbox(label="LoRA Source", value="https://...")
             with gr.Row():
-                t1_scale = gr.Slider(0, 3, 1.0, label="Scale")
                 t1_prec = gr.Radio(["bf16", "fp16", "float32"], value="bf16", label="Precision")
-                t1_shard = gr.Slider(0.5, 10, 2.0, label="Shard (GB)")
-            t1_struct = gr.Textbox(label="Structure Source")
             t1_priv = gr.Checkbox(label="Private", value=True)
             t1_btn = gr.Button("Merge")
             t1_res = gr.Textbox(label="Result")
@@ -684,25 +683,31 @@ with gr.Blocks() as demo:
         with gr.Tab("Extract Adapter"):
             t2_token = gr.Textbox(label="Token", type="password")
             t2_org = gr.Textbox(label="Original Model")
-            t2_tun = gr.Textbox(label="Tuned Model")
-            t2_rank = gr.Number(label="Rank", value=32)
             t2_out = gr.Textbox(label="Output Repo")
             t2_btn = gr.Button("Extract")
             t2_res = gr.Textbox(label="Result")
             t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res)
         with gr.Tab("Merge Adapters"):
             t3_token = gr.Textbox(label="Token", type="password")
-            t3_urls = gr.TextArea(label="Adapter URLs")
-            t3_method = gr.Dropdown(["Iterative EMA", "Concatenation", "SVD Fusion"], value="Iterative EMA")
             with gr.Row():
-                t3_weights = gr.Textbox(label="Weights")
-                t3_rank = gr.Number(label="Target Rank", value=128)
             with gr.Row():
-                t3_beta = gr.Slider(0.01, 1.0, 0.95, label="Beta")
-                t3_sigma = gr.Slider(0.01, 1.0, 0.21, label="Sigma")
             t3_out = gr.Textbox(label="Output Repo")
-            t3_priv = gr.Checkbox(label="Private", value=True)
             t3_btn = gr.Button("Merge")
             t3_res = gr.Textbox(label="Result")
             t3_btn.click(task_merge_adapters_advanced, [t3_token, t3_urls, t3_method, t3_weights, t3_beta, t3_sigma, t3_rank, t3_out, t3_priv], t3_res)
@@ -711,9 +716,20 @@ with gr.Blocks() as demo:
             t4_token = gr.Textbox(label="Token", type="password")
             t4_in = gr.Textbox(label="LoRA")
             with gr.Row():
-                t4_rank = gr.Number(label="To Rank", value=8)
-                t4_method = gr.Dropdown(["None", "sv_ratio", "sv_fro", "sv_cumulative"], value="None")
-                t4_param = gr.Number(label="Param", value=0.9)
             t4_out = gr.Textbox(label="Output")
             t4_btn = gr.Button("Resize")
             t4_res = gr.Textbox(label="Result")
@@ -724,18 +740,18 @@ with gr.Blocks() as demo:
             gr.Markdown("### 🧩 MergeKit Engine (Multi-Model)")
             with gr.Row():
                 t5_token = gr.Textbox(label="HF Token", type="password")
-                t5_method = gr.Dropdown(["Linear", "SLERP", "TIES", "DARE_TIES", "DARE_LINEAR", "Model_Stock"], value="TIES", label="Method")
-                t5_dtype = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output Dtype")
-            t5_models = gr.TextArea(label="Models (One per line)", placeholder="user/model_A\nuser/model_B")
             with gr.Accordion("Advanced Parameters", open=True):
                 with gr.Row():
                     t5_base = gr.Textbox(label="Base Model (Optional/Auto)", placeholder="Defaults to first model if empty")
-                    t5_shard = gr.Slider(0.5, 10, 2.0, step=0.5, label="Shard Size (GB)")
                 with gr.Row():
-                    t5_weights = gr.Textbox(label="Weights (Comma sep)", placeholder="1.0, 0.5 or 0.5 (for SLERP t)")
-                    t5_density = gr.Slider(0, 1, 0.5, label="Density (TIES/DARE)")
                 with gr.Row():
                     t5_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
                     t5_ranges = gr.TextArea(label="Layer Ranges/Slices (JSON or SLERP config)", placeholder='{"slices": [{"sources": [{"model": "A", "layer_range": [0, 16]}]}]}')
@@ -751,14 +767,20 @@ with gr.Blocks() as demo:
             gr.Markdown("### 🤖 MoE Architecture Upscaling")
             with gr.Row():
                 t6_token = gr.Textbox(label="HF Token", type="password")
-                t6_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="Dtype")
-                t6_shard = gr.Slider(0.5, 10, 2.0, step=0.5, label="Shard Size (GB)")
-            t6_base = gr.Textbox(label="Base Architecture Model", placeholder="e.g. Mistral-7B-v0.1")
-            t6_experts = gr.TextArea(label="Expert Models (One per line)", placeholder="expert1/repo\nexpert2/repo...")
             with gr.Row():
-                t6_gate = gr.Dropdown(["cheap_embed", "hidden", "random"], value="cheap_embed", label="Gate Mode")
                 t6_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
             t6_out = gr.Textbox(label="Output Repo")
@@ -766,7 +788,7 @@ with gr.Blocks() as demo:
             t6_btn = gr.Button("🏗️ Build MoE")
             t6_res = gr.Textbox(label="Result")
-            t6_btn.click(task_moe_create, [t6_token, t6_base, t6_experts, t6_gate, t6_dtype, t6_tok, t6_shard, t6_out, t6_priv], t6_res)
         with gr.Tab("DARE Fusion (Custom)"):
             gr.Markdown("### 🎲 DARE Fusion (Custom Implementation)")
@@ -786,15 +808,15 @@ with gr.Blocks() as demo:
             t7_btn.click(task_dare_custom, [t7_token, t7_base, t7_ft, t7_ratio, t7_mask, t7_out, t7_priv], t7_res)
         with gr.Tab("Raw PyTorch Merge"):
-            gr.Markdown("### 🧠 Raw Weight Merging (Non-Transformers)")
             t8_token = gr.Textbox(label="HF Token", type="password")
-            t8_method = gr.Dropdown(["Linear", "TIES", "Task_Arithmetic"], value="Linear", label="Method")
-            t8_models = gr.TextArea(label="Models (Path/Repo)")
             with gr.Row():
                 t8_base = gr.Textbox(label="Base Model (Optional)")
-                t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="Dtype")
             t8_weights = gr.Textbox(label="Weights")
-            t8_shard = gr.Slider(0.5, 10, 2.0, label="Shard Size (GB)")
             t8_out = gr.Textbox(label="Output Repo")
             t8_priv = gr.Checkbox(label="Private", value=True)
             t8_btn = gr.Button("Merge Raw Weights")

 # TAB 6: MOE CREATION
 # =================================================================================
+def task_moe_create(hf_token, base_model, experts_text, prompts_text, shared_expert_text, gate_mode, dtype, tok_source, shard_size, out_repo, private):
+    # Args: Token, Base, Experts, Prompts, Shared, Gate, Dtype, Tok, Shard, Out, Priv
     cleanup_temp()
     if not hf_token or not out_repo: return "Error: Token and Output Repo required."
     login(hf_token.strip())
     experts = [e.strip() for e in experts_text.split('\n') if e.strip()]
     prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
+    shared = [s.strip() for s in shared_expert_text.split('\n') if s.strip()]
     config = build_moe_config(
+        base_model=base_model, experts=experts, prompts=prompts,
+        gate_mode=gate_mode, dtype=dtype, tokenizer_source=tok_source,
+        shared_experts=shared
     )
     out_path = TempDir / "moe_model"
     try:
         execute_mergekit_config(config, str(out_path), shard_size)
         api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=hf_token)
         api.upload_folder(folder_path=str(out_path), repo_id=out_repo, token=hf_token)
         return f"Success! MoE model uploaded to {out_repo}"
+    except Exception as e: return f"MoE Error: {e}"
 # --- TAB 8: Raw PyTorch (New) ---
 def task_raw_pytorch(hf_token, models_text, method, dtype, base_model, weights, shard_size, out_repo, private):
     gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
     with gr.Tabs():
+        with gr.Tab("Merge to Base Model + Reshard Output"):
             with gr.Row():
                 t1_token = gr.Textbox(label="Token", type="password")
             with gr.Row():
                 t1_base = gr.Textbox(label="Base Repo", value="name/repo")
+            t1_sub = gr.Textbox(label="Subfolder (Optional)", value="")
+            t1_lora = gr.Textbox(label="LoRA Direct Link or Repo", value="https://huggingface.co/GuangyuanSD/Z-Image-Re-Turbo-LoRA/resolve/main/Z-image_re_turbo_lora_8steps_rank_32_v1_fp16.safetensors")
             with gr.Row():
+                t1_scale = gr.Slider(label="Scale", value=1.0, minimum=0, maximum=3.0, step=0.1)
                 t1_prec = gr.Radio(["bf16", "fp16", "float32"], value="bf16", label="Precision")
+                t1_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
+            t1_out = gr.Textbox(label="Output Repo")
+            t1_struct = gr.Textbox(label="Extras Source (copies configs/components/etc)", value="name/repo")
             t1_priv = gr.Checkbox(label="Private", value=True)
             t1_btn = gr.Button("Merge")
             t1_res = gr.Textbox(label="Result")
         with gr.Tab("Extract Adapter"):
             t2_token = gr.Textbox(label="Token", type="password")
             t2_org = gr.Textbox(label="Original Model")
+            t2_tun = gr.Textbox(label="Tuned or Homologous Model")
+            t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1)
             t2_out = gr.Textbox(label="Output Repo")
             t2_btn = gr.Button("Extract")
             t2_res = gr.Textbox(label="Result")
             t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res)
         with gr.Tab("Merge Adapters"):
+            gr.Markdown("### Batch Adapter Merging")
             t3_token = gr.Textbox(label="Token", type="password")
+            t3_urls = gr.TextArea(label="Adapter URLs/Repos (one per line, or space-separated)", placeholder="user/lora1\nhttps://hf.co/user/lora2.safetensors\n...")
+            with gr.Row():
+                t3_method = gr.Dropdown(
+                    ["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"],
+                    value="Iterative EMA (Linear w/ Beta/Sigma coefficient)",
+                    label="Merge Method"
+                )
             with gr.Row():
+                t3_weights = gr.Textbox(label="Weights (comma-separated) – for Concat/SVD", placeholder="1.0, 0.5, 0.8...")
+                t3_rank = gr.Number(label="Target Rank – For SVD only", value=128, minimum=1, maximum=1024)
             with gr.Row():
+                t3_beta = gr.Slider(label="Beta – for linear/post-hoc EMA", value=0.95, minimum=0.01, maximum=1.00, step=0.01)
+                t3_sigma = gr.Slider(label="Sigma Rel – for linear/post-hoc EMA", value=0.21, minimum=0.01, maximum=1.00, step=0.01)
             t3_out = gr.Textbox(label="Output Repo")
+            t3_priv = gr.Checkbox(label="Private Output", value=True)
             t3_btn = gr.Button("Merge")
             t3_res = gr.Textbox(label="Result")
             t3_btn.click(task_merge_adapters_advanced, [t3_token, t3_urls, t3_method, t3_weights, t3_beta, t3_sigma, t3_rank, t3_out, t3_priv], t3_res)
             t4_token = gr.Textbox(label="Token", type="password")
             t4_in = gr.Textbox(label="LoRA")
             with gr.Row():
+                t4_rank = gr.Number(label="To Rank (Safety Ceiling)", value=8, minimum=1, maximum=512, step=1)
+                t4_method = gr.Dropdown(["None", "sv_ratio", "sv_fro", "sv_cumulative"], value="None", label="Dynamic Method")
+                t4_param = gr.Number(label="Dynamic Param", value=0.9)
+            gr.Markdown(
+                """
+                ### 📉 Dynamic Resizing Guide
+                These methods intelligently determine the best rank per layer.
+                * **sv_ratio (Relative Strength):** Keeps features that are at least `1/Param` as strong as the main feature. **Param must be >= 2**. (e.g. 2 = keep features half as strong as top).
+                * **sv_fro (Visual Information Density):** Preserves `Param%` of the total information content (Frobenius Norm) of the layer. **Param between 0.0 and 1.0** (e.g. 0.9 = 90% info retention).
+                * **sv_cumulative (Cumulative Sum):** Preserves weights that sum up to `Param%` of the total strength. **Param between 0.0 and 1.0**.
+                * **⚠️ Safety Ceiling:** The **"To Rank"** slider acts as a hard limit. Even if a dynamic method wants a higher rank, it will be cut down to this number to keep file sizes small.
+                """
+            )
             t4_out = gr.Textbox(label="Output")
             t4_btn = gr.Button("Resize")
             t4_res = gr.Textbox(label="Result")
             gr.Markdown("### 🧩 MergeKit Engine (Multi-Model)")
             with gr.Row():
                 t5_token = gr.Textbox(label="HF Token", type="password")
+                t5_method = gr.Dropdown(["linear", "slerp", "nuslerp", "nearswap", "ties", "dare_ties", "dare_linear", "model_stock", "karcher", "passthrough", "task_arithmetic", "sce", "breadcrumbs", "breadcrumbs_ties", "arcee_fusion"], value="ties", label="Method")
+                t5_dtype = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output dtype")
+            t5_models = gr.TextArea(label="Models (one per line)", placeholder="user/model_A\nuser/model_B")
             with gr.Accordion("Advanced Parameters", open=True):
                 with gr.Row():
                     t5_base = gr.Textbox(label="Base Model (Optional/Auto)", placeholder="Defaults to first model if empty")
+                    t5_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
                 with gr.Row():
+                    t5_weights = gr.Textbox(label="Weight-ratios (per model, comma-sep) or Mix-factor t (for SLERPs)", placeholder="1.0, 0.5, 0.5 (DARE, Soup, TIES...) or 0-to-1.0 (SLERP Base/Other-ratio)")
+                    t5_density = gr.Textbox(label="Density (weights retained) (for DARE/TIES/...)", placeholder="0.9")
                 with gr.Row():
                     t5_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
                     t5_ranges = gr.TextArea(label="Layer Ranges/Slices (JSON or SLERP config)", placeholder='{"slices": [{"sources": [{"model": "A", "layer_range": [0, 16]}]}]}')
             gr.Markdown("### 🤖 MoE Architecture Upscaling")
             with gr.Row():
                 t6_token = gr.Textbox(label="HF Token", type="password")
+                t6_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="dtype")
+                t6_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
+                t6_base = gr.Textbox(label="Base Architecture Model")
+            with gr.Row():
+                t6_experts = gr.TextArea(label="Expert Models (One per line)", placeholder="expert1/repo\nexpert2/repo")
+                t6_prompts = gr.TextArea(label="Expertise Prompts (Optional for Cheap_Embed/Randoms)", placeholder="Prompt for expert1\nPrompt for expert2")
+            # ADDED: Shared Expert Input
+            t6_shared = gr.Textbox(label="Shared Expert (Required for Qwen2, Empty for Mixtral)", placeholder="repo/shared_model")
             with gr.Row():
+                # ADDED: uniform_random
+                t6_gate = gr.Dropdown(["cheap_embed", "hidden", "random", "uniform_random"], value="cheap_embed", label="Gate Mode")
                 t6_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
             t6_out = gr.Textbox(label="Output Repo")
             t6_btn = gr.Button("🏗️ Build MoE")
             t6_res = gr.Textbox(label="Result")
+            t6_btn.click(task_moe_create, [t6_token, t6_base, t6_experts, t6_prompts, t6_shared, t6_gate, t6_dtype, t6_tok, t6_shard, t6_out, t6_priv], t6_res)
         with gr.Tab("DARE Fusion (Custom)"):
             gr.Markdown("### 🎲 DARE Fusion (Custom Implementation)")
             t7_btn.click(task_dare_custom, [t7_token, t7_base, t7_ft, t7_ratio, t7_mask, t7_out, t7_priv], t7_res)
         with gr.Tab("Raw PyTorch Merge"):
+            gr.Markdown("### 🧠 Raw Weight Merging (Works Beyond Transformers Library)")
             t8_token = gr.Textbox(label="HF Token", type="password")
+            t8_method = gr.Dropdown(["linear", "ties", "task_arithmetic", "nuslerp", "nearswap", "dare_ties", "dare_linear", "model_stock", "karcher", "passthrough", "sce", "breadcrumbs", "breadcrumbs_ties", "arcee_fusion"], value="linear", label="Method")
+            t8_models = gr.TextArea(label="Models (one per line)", placeholder="user/model_A\nuser/model_B")
             with gr.Row():
                 t8_base = gr.Textbox(label="Base Model (Optional)")
+                t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="dtype")
             t8_weights = gr.Textbox(label="Weights")
+            t8_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
             t8_out = gr.Textbox(label="Output Repo")
             t8_priv = gr.Checkbox(label="Private", value=True)
             t8_btn = gr.Button("Merge Raw Weights")