Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -607,29 +607,29 @@ def task_full_mergekit_merge(hf_token, models_text, method, dtype, base_model, w
|
|
| 607 |
# TAB 6: MOE CREATION
|
| 608 |
# =================================================================================
|
| 609 |
|
| 610 |
-
def task_moe_create(hf_token, base_model, experts_text, prompts_text, gate_mode, dtype, tok_source, shard_size, out_repo, private):
|
|
|
|
| 611 |
cleanup_temp()
|
| 612 |
if not hf_token or not out_repo: return "Error: Token and Output Repo required."
|
| 613 |
login(hf_token.strip())
|
| 614 |
|
| 615 |
experts = [e.strip() for e in experts_text.split('\n') if e.strip()]
|
| 616 |
prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
|
|
|
|
| 617 |
|
| 618 |
-
# 1. Build Config
|
| 619 |
config = build_moe_config(
|
| 620 |
-
base_model=base_model, experts=experts, prompts=prompts,
|
| 621 |
-
dtype=dtype, tokenizer_source=tok_source
|
|
|
|
| 622 |
)
|
| 623 |
|
| 624 |
-
# 2. Execute
|
| 625 |
out_path = TempDir / "moe_model"
|
| 626 |
try:
|
| 627 |
execute_mergekit_config(config, str(out_path), shard_size)
|
| 628 |
api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=hf_token)
|
| 629 |
api.upload_folder(folder_path=str(out_path), repo_id=out_repo, token=hf_token)
|
| 630 |
return f"Success! MoE model uploaded to {out_repo}"
|
| 631 |
-
except Exception as e:
|
| 632 |
-
return f"MoE Error: {e}"
|
| 633 |
|
| 634 |
# --- TAB 8: Raw PyTorch (New) ---
|
| 635 |
def task_raw_pytorch(hf_token, models_text, method, dtype, base_model, weights, shard_size, out_repo, private):
|
|
@@ -662,20 +662,19 @@ with gr.Blocks() as demo:
|
|
| 662 |
gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
|
| 663 |
|
| 664 |
with gr.Tabs():
|
| 665 |
-
|
| 666 |
-
with gr.Tab("Merge to Base + Reshard"):
|
| 667 |
with gr.Row():
|
| 668 |
t1_token = gr.Textbox(label="Token", type="password")
|
| 669 |
-
t1_out = gr.Textbox(label="Output Repo")
|
| 670 |
with gr.Row():
|
| 671 |
t1_base = gr.Textbox(label="Base Repo", value="name/repo")
|
| 672 |
-
|
| 673 |
-
t1_lora = gr.Textbox(label="LoRA
|
| 674 |
with gr.Row():
|
| 675 |
-
t1_scale = gr.Slider(0,
|
| 676 |
t1_prec = gr.Radio(["bf16", "fp16", "float32"], value="bf16", label="Precision")
|
| 677 |
-
t1_shard = gr.Slider(
|
| 678 |
-
|
|
|
|
| 679 |
t1_priv = gr.Checkbox(label="Private", value=True)
|
| 680 |
t1_btn = gr.Button("Merge")
|
| 681 |
t1_res = gr.Textbox(label="Result")
|
|
@@ -684,25 +683,31 @@ with gr.Blocks() as demo:
|
|
| 684 |
with gr.Tab("Extract Adapter"):
|
| 685 |
t2_token = gr.Textbox(label="Token", type="password")
|
| 686 |
t2_org = gr.Textbox(label="Original Model")
|
| 687 |
-
t2_tun = gr.Textbox(label="Tuned Model")
|
| 688 |
-
t2_rank = gr.Number(label="Rank", value=32)
|
| 689 |
t2_out = gr.Textbox(label="Output Repo")
|
| 690 |
t2_btn = gr.Button("Extract")
|
| 691 |
t2_res = gr.Textbox(label="Result")
|
| 692 |
t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res)
|
| 693 |
|
| 694 |
with gr.Tab("Merge Adapters"):
|
|
|
|
| 695 |
t3_token = gr.Textbox(label="Token", type="password")
|
| 696 |
-
t3_urls = gr.TextArea(label="Adapter URLs")
|
| 697 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
with gr.Row():
|
| 699 |
-
t3_weights = gr.Textbox(label="Weights")
|
| 700 |
-
t3_rank = gr.Number(label="Target Rank", value=128)
|
| 701 |
with gr.Row():
|
| 702 |
-
t3_beta = gr.Slider(0.
|
| 703 |
-
t3_sigma = gr.Slider(0.
|
| 704 |
t3_out = gr.Textbox(label="Output Repo")
|
| 705 |
-
t3_priv = gr.Checkbox(label="Private", value=True)
|
| 706 |
t3_btn = gr.Button("Merge")
|
| 707 |
t3_res = gr.Textbox(label="Result")
|
| 708 |
t3_btn.click(task_merge_adapters_advanced, [t3_token, t3_urls, t3_method, t3_weights, t3_beta, t3_sigma, t3_rank, t3_out, t3_priv], t3_res)
|
|
@@ -711,9 +716,20 @@ with gr.Blocks() as demo:
|
|
| 711 |
t4_token = gr.Textbox(label="Token", type="password")
|
| 712 |
t4_in = gr.Textbox(label="LoRA")
|
| 713 |
with gr.Row():
|
| 714 |
-
t4_rank = gr.Number(label="To Rank", value=8)
|
| 715 |
-
t4_method = gr.Dropdown(["None", "sv_ratio", "sv_fro", "sv_cumulative"], value="None")
|
| 716 |
-
t4_param = gr.Number(label="Param", value=0.9)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
t4_out = gr.Textbox(label="Output")
|
| 718 |
t4_btn = gr.Button("Resize")
|
| 719 |
t4_res = gr.Textbox(label="Result")
|
|
@@ -724,18 +740,18 @@ with gr.Blocks() as demo:
|
|
| 724 |
gr.Markdown("### 🧩 MergeKit Engine (Multi-Model)")
|
| 725 |
with gr.Row():
|
| 726 |
t5_token = gr.Textbox(label="HF Token", type="password")
|
| 727 |
-
t5_method = gr.Dropdown(["
|
| 728 |
-
t5_dtype = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output
|
| 729 |
|
| 730 |
-
t5_models = gr.TextArea(label="Models (
|
| 731 |
|
| 732 |
with gr.Accordion("Advanced Parameters", open=True):
|
| 733 |
with gr.Row():
|
| 734 |
t5_base = gr.Textbox(label="Base Model (Optional/Auto)", placeholder="Defaults to first model if empty")
|
| 735 |
-
t5_shard = gr.Slider(
|
| 736 |
with gr.Row():
|
| 737 |
-
t5_weights = gr.Textbox(label="
|
| 738 |
-
t5_density = gr.
|
| 739 |
with gr.Row():
|
| 740 |
t5_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
|
| 741 |
t5_ranges = gr.TextArea(label="Layer Ranges/Slices (JSON or SLERP config)", placeholder='{"slices": [{"sources": [{"model": "A", "layer_range": [0, 16]}]}]}')
|
|
@@ -751,14 +767,20 @@ with gr.Blocks() as demo:
|
|
| 751 |
gr.Markdown("### 🤖 MoE Architecture Upscaling")
|
| 752 |
with gr.Row():
|
| 753 |
t6_token = gr.Textbox(label="HF Token", type="password")
|
| 754 |
-
t6_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="
|
| 755 |
-
t6_shard = gr.Slider(
|
|
|
|
| 756 |
|
| 757 |
-
|
| 758 |
-
|
|
|
|
| 759 |
|
|
|
|
|
|
|
|
|
|
| 760 |
with gr.Row():
|
| 761 |
-
|
|
|
|
| 762 |
t6_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
|
| 763 |
|
| 764 |
t6_out = gr.Textbox(label="Output Repo")
|
|
@@ -766,7 +788,7 @@ with gr.Blocks() as demo:
|
|
| 766 |
t6_btn = gr.Button("🏗️ Build MoE")
|
| 767 |
t6_res = gr.Textbox(label="Result")
|
| 768 |
|
| 769 |
-
t6_btn.click(task_moe_create, [t6_token, t6_base, t6_experts, t6_gate, t6_dtype, t6_tok, t6_shard, t6_out, t6_priv], t6_res)
|
| 770 |
|
| 771 |
with gr.Tab("DARE Fusion (Custom)"):
|
| 772 |
gr.Markdown("### 🎲 DARE Fusion (Custom Implementation)")
|
|
@@ -786,15 +808,15 @@ with gr.Blocks() as demo:
|
|
| 786 |
t7_btn.click(task_dare_custom, [t7_token, t7_base, t7_ft, t7_ratio, t7_mask, t7_out, t7_priv], t7_res)
|
| 787 |
|
| 788 |
with gr.Tab("Raw PyTorch Merge"):
|
| 789 |
-
gr.Markdown("### 🧠 Raw Weight Merging (
|
| 790 |
t8_token = gr.Textbox(label="HF Token", type="password")
|
| 791 |
-
t8_method = gr.Dropdown(["
|
| 792 |
-
t8_models = gr.TextArea(label="Models (
|
| 793 |
with gr.Row():
|
| 794 |
t8_base = gr.Textbox(label="Base Model (Optional)")
|
| 795 |
-
t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="
|
| 796 |
t8_weights = gr.Textbox(label="Weights")
|
| 797 |
-
t8_shard = gr.Slider(
|
| 798 |
t8_out = gr.Textbox(label="Output Repo")
|
| 799 |
t8_priv = gr.Checkbox(label="Private", value=True)
|
| 800 |
t8_btn = gr.Button("Merge Raw Weights")
|
|
|
|
| 607 |
# TAB 6: MOE CREATION
|
| 608 |
# =================================================================================
|
| 609 |
|
| 610 |
+
def task_moe_create(hf_token, base_model, experts_text, prompts_text, shared_expert_text, gate_mode, dtype, tok_source, shard_size, out_repo, private):
|
| 611 |
+
# Args: Token, Base, Experts, Prompts, Shared, Gate, Dtype, Tok, Shard, Out, Priv
|
| 612 |
cleanup_temp()
|
| 613 |
if not hf_token or not out_repo: return "Error: Token and Output Repo required."
|
| 614 |
login(hf_token.strip())
|
| 615 |
|
| 616 |
experts = [e.strip() for e in experts_text.split('\n') if e.strip()]
|
| 617 |
prompts = [p.strip() for p in prompts_text.split('\n') if p.strip()]
|
| 618 |
+
shared = [s.strip() for s in shared_expert_text.split('\n') if s.strip()]
|
| 619 |
|
|
|
|
| 620 |
config = build_moe_config(
|
| 621 |
+
base_model=base_model, experts=experts, prompts=prompts,
|
| 622 |
+
gate_mode=gate_mode, dtype=dtype, tokenizer_source=tok_source,
|
| 623 |
+
shared_experts=shared
|
| 624 |
)
|
| 625 |
|
|
|
|
| 626 |
out_path = TempDir / "moe_model"
|
| 627 |
try:
|
| 628 |
execute_mergekit_config(config, str(out_path), shard_size)
|
| 629 |
api.create_repo(repo_id=out_repo, private=private, exist_ok=True, token=hf_token)
|
| 630 |
api.upload_folder(folder_path=str(out_path), repo_id=out_repo, token=hf_token)
|
| 631 |
return f"Success! MoE model uploaded to {out_repo}"
|
| 632 |
+
except Exception as e: return f"MoE Error: {e}"
|
|
|
|
| 633 |
|
| 634 |
# --- TAB 8: Raw PyTorch (New) ---
|
| 635 |
def task_raw_pytorch(hf_token, models_text, method, dtype, base_model, weights, shard_size, out_repo, private):
|
|
|
|
| 662 |
gr.Markdown("# 🧰Training-Free CPU-run Model Creation Toolkit")
|
| 663 |
|
| 664 |
with gr.Tabs():
|
| 665 |
+
with gr.Tab("Merge to Base Model + Reshard Output"):
|
|
|
|
| 666 |
with gr.Row():
|
| 667 |
t1_token = gr.Textbox(label="Token", type="password")
|
|
|
|
| 668 |
with gr.Row():
|
| 669 |
t1_base = gr.Textbox(label="Base Repo", value="name/repo")
|
| 670 |
+
t1_sub = gr.Textbox(label="Subfolder (Optional)", value="")
|
| 671 |
+
t1_lora = gr.Textbox(label="LoRA Direct Link or Repo", value="https://huggingface.co/GuangyuanSD/Z-Image-Re-Turbo-LoRA/resolve/main/Z-image_re_turbo_lora_8steps_rank_32_v1_fp16.safetensors")
|
| 672 |
with gr.Row():
|
| 673 |
+
t1_scale = gr.Slider(label="Scale", value=1.0, minimum=0, maximum=3.0, step=0.1)
|
| 674 |
t1_prec = gr.Radio(["bf16", "fp16", "float32"], value="bf16", label="Precision")
|
| 675 |
+
t1_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
|
| 676 |
+
t1_out = gr.Textbox(label="Output Repo")
|
| 677 |
+
t1_struct = gr.Textbox(label="Extras Source (copies configs/components/etc)", value="name/repo")
|
| 678 |
t1_priv = gr.Checkbox(label="Private", value=True)
|
| 679 |
t1_btn = gr.Button("Merge")
|
| 680 |
t1_res = gr.Textbox(label="Result")
|
|
|
|
| 683 |
with gr.Tab("Extract Adapter"):
|
| 684 |
t2_token = gr.Textbox(label="Token", type="password")
|
| 685 |
t2_org = gr.Textbox(label="Original Model")
|
| 686 |
+
t2_tun = gr.Textbox(label="Tuned or Homologous Model")
|
| 687 |
+
t2_rank = gr.Number(label="Extract At Rank", value=32, minimum=1, maximum=1024, step=1)
|
| 688 |
t2_out = gr.Textbox(label="Output Repo")
|
| 689 |
t2_btn = gr.Button("Extract")
|
| 690 |
t2_res = gr.Textbox(label="Result")
|
| 691 |
t2_btn.click(task_extract, [t2_token, t2_org, t2_tun, t2_rank, t2_out], t2_res)
|
| 692 |
|
| 693 |
with gr.Tab("Merge Adapters"):
|
| 694 |
+
gr.Markdown("### Batch Adapter Merging")
|
| 695 |
t3_token = gr.Textbox(label="Token", type="password")
|
| 696 |
+
t3_urls = gr.TextArea(label="Adapter URLs/Repos (one per line, or space-separated)", placeholder="user/lora1\nhttps://hf.co/user/lora2.safetensors\n...")
|
| 697 |
+
with gr.Row():
|
| 698 |
+
t3_method = gr.Dropdown(
|
| 699 |
+
["Iterative EMA (Linear w/ Beta/Sigma coefficient)", "Concatenation (MOE-like weights-stack)", "SVD Fusion (Task Arithmetic/Compressed)"],
|
| 700 |
+
value="Iterative EMA (Linear w/ Beta/Sigma coefficient)",
|
| 701 |
+
label="Merge Method"
|
| 702 |
+
)
|
| 703 |
with gr.Row():
|
| 704 |
+
t3_weights = gr.Textbox(label="Weights (comma-separated) – for Concat/SVD", placeholder="1.0, 0.5, 0.8...")
|
| 705 |
+
t3_rank = gr.Number(label="Target Rank – For SVD only", value=128, minimum=1, maximum=1024)
|
| 706 |
with gr.Row():
|
| 707 |
+
t3_beta = gr.Slider(label="Beta – for linear/post-hoc EMA", value=0.95, minimum=0.01, maximum=1.00, step=0.01)
|
| 708 |
+
t3_sigma = gr.Slider(label="Sigma Rel – for linear/post-hoc EMA", value=0.21, minimum=0.01, maximum=1.00, step=0.01)
|
| 709 |
t3_out = gr.Textbox(label="Output Repo")
|
| 710 |
+
t3_priv = gr.Checkbox(label="Private Output", value=True)
|
| 711 |
t3_btn = gr.Button("Merge")
|
| 712 |
t3_res = gr.Textbox(label="Result")
|
| 713 |
t3_btn.click(task_merge_adapters_advanced, [t3_token, t3_urls, t3_method, t3_weights, t3_beta, t3_sigma, t3_rank, t3_out, t3_priv], t3_res)
|
|
|
|
| 716 |
t4_token = gr.Textbox(label="Token", type="password")
|
| 717 |
t4_in = gr.Textbox(label="LoRA")
|
| 718 |
with gr.Row():
|
| 719 |
+
t4_rank = gr.Number(label="To Rank (Safety Ceiling)", value=8, minimum=1, maximum=512, step=1)
|
| 720 |
+
t4_method = gr.Dropdown(["None", "sv_ratio", "sv_fro", "sv_cumulative"], value="None", label="Dynamic Method")
|
| 721 |
+
t4_param = gr.Number(label="Dynamic Param", value=0.9)
|
| 722 |
+
|
| 723 |
+
gr.Markdown(
|
| 724 |
+
"""
|
| 725 |
+
### 📉 Dynamic Resizing Guide
|
| 726 |
+
These methods intelligently determine the best rank per layer.
|
| 727 |
+
* **sv_ratio (Relative Strength):** Keeps features that are at least `1/Param` as strong as the main feature. **Param must be >= 2**. (e.g. 2 = keep features half as strong as top).
|
| 728 |
+
* **sv_fro (Visual Information Density):** Preserves `Param%` of the total information content (Frobenius Norm) of the layer. **Param between 0.0 and 1.0** (e.g. 0.9 = 90% info retention).
|
| 729 |
+
* **sv_cumulative (Cumulative Sum):** Preserves weights that sum up to `Param%` of the total strength. **Param between 0.0 and 1.0**.
|
| 730 |
+
* **⚠️ Safety Ceiling:** The **"To Rank"** slider acts as a hard limit. Even if a dynamic method wants a higher rank, it will be cut down to this number to keep file sizes small.
|
| 731 |
+
"""
|
| 732 |
+
)
|
| 733 |
t4_out = gr.Textbox(label="Output")
|
| 734 |
t4_btn = gr.Button("Resize")
|
| 735 |
t4_res = gr.Textbox(label="Result")
|
|
|
|
| 740 |
gr.Markdown("### 🧩 MergeKit Engine (Multi-Model)")
|
| 741 |
with gr.Row():
|
| 742 |
t5_token = gr.Textbox(label="HF Token", type="password")
|
| 743 |
+
t5_method = gr.Dropdown(["linear", "slerp", "nuslerp", "nearswap", "ties", "dare_ties", "dare_linear", "model_stock", "karcher", "passthrough", "task_arithmetic", "sce", "breadcrumbs", "breadcrumbs_ties", "arcee_fusion"], value="ties", label="Method")
|
| 744 |
+
t5_dtype = gr.Dropdown(["float16", "bfloat16", "float32"], value="bfloat16", label="Output dtype")
|
| 745 |
|
| 746 |
+
t5_models = gr.TextArea(label="Models (one per line)", placeholder="user/model_A\nuser/model_B")
|
| 747 |
|
| 748 |
with gr.Accordion("Advanced Parameters", open=True):
|
| 749 |
with gr.Row():
|
| 750 |
t5_base = gr.Textbox(label="Base Model (Optional/Auto)", placeholder="Defaults to first model if empty")
|
| 751 |
+
t5_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
|
| 752 |
with gr.Row():
|
| 753 |
+
t5_weights = gr.Textbox(label="Weight-ratios (per model, comma-sep) or Mix-factor t (for SLERPs)", placeholder="1.0, 0.5, 0.5 (DARE, Soup, TIES...) or 0-to-1.0 (SLERP Base/Other-ratio)")
|
| 754 |
+
t5_density = gr.Textbox(label="Density (weights retained) (for DARE/TIES/...)", placeholder="0.9")
|
| 755 |
with gr.Row():
|
| 756 |
t5_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
|
| 757 |
t5_ranges = gr.TextArea(label="Layer Ranges/Slices (JSON or SLERP config)", placeholder='{"slices": [{"sources": [{"model": "A", "layer_range": [0, 16]}]}]}')
|
|
|
|
| 767 |
gr.Markdown("### 🤖 MoE Architecture Upscaling")
|
| 768 |
with gr.Row():
|
| 769 |
t6_token = gr.Textbox(label="HF Token", type="password")
|
| 770 |
+
t6_dtype = gr.Dropdown(["float16", "bfloat16"], value="bfloat16", label="dtype")
|
| 771 |
+
t6_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
|
| 772 |
+
t6_base = gr.Textbox(label="Base Architecture Model")
|
| 773 |
|
| 774 |
+
with gr.Row():
|
| 775 |
+
t6_experts = gr.TextArea(label="Expert Models (One per line)", placeholder="expert1/repo\nexpert2/repo")
|
| 776 |
+
t6_prompts = gr.TextArea(label="Expertise Prompts (Optional for Cheap_Embed/Randoms)", placeholder="Prompt for expert1\nPrompt for expert2")
|
| 777 |
|
| 778 |
+
# ADDED: Shared Expert Input
|
| 779 |
+
t6_shared = gr.Textbox(label="Shared Expert (Required for Qwen2, Empty for Mixtral)", placeholder="repo/shared_model")
|
| 780 |
+
|
| 781 |
with gr.Row():
|
| 782 |
+
# ADDED: uniform_random
|
| 783 |
+
t6_gate = gr.Dropdown(["cheap_embed", "hidden", "random", "uniform_random"], value="cheap_embed", label="Gate Mode")
|
| 784 |
t6_tok = gr.Dropdown(["base", "union", "first"], value="base", label="Tokenizer Source")
|
| 785 |
|
| 786 |
t6_out = gr.Textbox(label="Output Repo")
|
|
|
|
| 788 |
t6_btn = gr.Button("🏗️ Build MoE")
|
| 789 |
t6_res = gr.Textbox(label="Result")
|
| 790 |
|
| 791 |
+
t6_btn.click(task_moe_create, [t6_token, t6_base, t6_experts, t6_prompts, t6_shared, t6_gate, t6_dtype, t6_tok, t6_shard, t6_out, t6_priv], t6_res)
|
| 792 |
|
| 793 |
with gr.Tab("DARE Fusion (Custom)"):
|
| 794 |
gr.Markdown("### 🎲 DARE Fusion (Custom Implementation)")
|
|
|
|
| 808 |
t7_btn.click(task_dare_custom, [t7_token, t7_base, t7_ft, t7_ratio, t7_mask, t7_out, t7_priv], t7_res)
|
| 809 |
|
| 810 |
with gr.Tab("Raw PyTorch Merge"):
|
| 811 |
+
gr.Markdown("### 🧠 Raw Weight Merging (Works Beyond Transformers Library)")
|
| 812 |
t8_token = gr.Textbox(label="HF Token", type="password")
|
| 813 |
+
t8_method = gr.Dropdown(["linear", "ties", "task_arithmetic", "nuslerp", "nearswap", "dare_ties", "dare_linear", "model_stock", "karcher", "passthrough", "sce", "breadcrumbs", "breadcrumbs_ties", "arcee_fusion"], value="linear", label="Method")
|
| 814 |
+
t8_models = gr.TextArea(label="Models (one per line)", placeholder="user/model_A\nuser/model_B")
|
| 815 |
with gr.Row():
|
| 816 |
t8_base = gr.Textbox(label="Base Model (Optional)")
|
| 817 |
+
t8_dtype = gr.Dropdown(["float32", "float16", "bfloat16"], value="float32", label="dtype")
|
| 818 |
t8_weights = gr.Textbox(label="Weights")
|
| 819 |
+
t8_shard = gr.Slider(label="Max Shard Size (GB)", value=2.0, minimum=0.1, maximum=10.0, step=0.1)
|
| 820 |
t8_out = gr.Textbox(label="Output Repo")
|
| 821 |
t8_priv = gr.Checkbox(label="Private", value=True)
|
| 822 |
t8_btn = gr.Button("Merge Raw Weights")
|