Spaces:

rootlocalghost
/

Flux.2-Klein-Model-Quantizer

Running

App Files Files Community

rootlocalghost commited on 19 days ago

Commit

041d8b6

verified ·

1 Parent(s): 7deb3dc

Create app.py

Browse files

Files changed (1) hide show

app.py +212 -0

app.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import gc
+import torch
+import shutil
+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from safetensors.torch import load_file, save_file
+TEMP_DIR = "temp_processing_dir"
+def convert_and_upload(token, source_repo, target_repo, precision, target_components):
+    if not token:
+        yield "❌ Error: Please provide a valid Hugging Face Write Token."
+        return
+    if not target_repo.strip() or "your-username" in target_repo:
+        yield "❌ Error: Please specify a valid Target Repository (e.g., your-username/repo-name)."
+        return
+    if not target_components:
+        yield "❌ Error: Please select at least one component to quantize."
+        return
+    # Map precision string to PyTorch dtype
+    if precision == "FP8":
+        target_dtype = torch.float8_e4m3fn
+    elif precision == "FP16":
+        target_dtype = torch.float16
+    elif precision == "BF16":
+        target_dtype = torch.bfloat16
+    else:
+        target_dtype = None
+    api = HfApi(token=token)
+    yield f"🔄 Connecting to Hugging Face and verifying target repo: {target_repo}..."
+    try:
+        api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
+    except Exception as e:
+        yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
+        return
+    yield f"📋 Fetching file list from {source_repo}..."
+    try:
+        files = api.list_repo_files(source_repo)
+    except Exception as e:
+        yield f"❌ Error fetching files: {str(e)}"
+        return
+    os.makedirs(TEMP_DIR, exist_ok=True)
+    for file in files:
+        # AUTO-DELETE/SKIP LOGIC: Detect large .safetensors files at the root level (no slashes in path)
+        is_root_safetensor = "/" not in file and file.endswith(".safetensors")
+        if is_root_safetensor:
+            yield f"🗑️ Auto-skipping massive root model: {file}..."
+            try:
+                # If pushing to an existing repo, explicitly delete the large root file if it exists there
+                api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted massive root file {file}")
+                yield f"✅ Ensured {file} is removed from target repository."
+            except Exception:
+                pass # File doesn't exist in target repo yet, which is fine
+            continue
+        yield f"⏳ Processing {file}..."
+        try:
+            # Download file locally, bypassing symlink cache to save disk space
+            local_path = hf_hub_download(
+                repo_id=source_repo,
+                filename=file,
+                local_dir=TEMP_DIR,
+                local_dir_use_symlinks=False
+            )
+            # Check if this file belongs to one of the user-selected components (e.g., text_encoder, transformer)
+            in_target_component = any(f"{comp}/" in file for comp in target_components)
+            # Intercept and quantize only if it's a safetensors file in a selected folder
+            if file.endswith(".safetensors") and in_target_component:
+                yield f"🧠 Quantizing {file} to {precision}..."
+                tensors = load_file(local_path)
+                # Cast floating point tensors to the selected precision
+                if target_dtype:
+                    keys = list(tensors.keys())
+                    for k in keys:
+                        if tensors[k].is_floating_point():
+                            tensors[k] = tensors[k].to(target_dtype)
+                converted_path = os.path.join(TEMP_DIR, "converted.safetensors")
+                save_file(tensors, converted_path)
+                # Aggressive memory flush to prevent OOM
+                del tensors
+                gc.collect()
+                yield f"☁️ Uploading {precision} version of {file}..."
+                api.upload_file(
+                    path_or_fileobj=converted_path,
+                    path_in_repo=file,
+                    repo_id=target_repo,
+                    commit_message=f"Upload {precision} quantized {file}"
+                )
+                os.remove(converted_path)
+            else:
+                yield f"☁️ Copying {file} as-is..."
+                api.upload_file(
+                    path_or_fileobj=local_path,
+                    path_in_repo=file,
+                    repo_id=target_repo,
+                    commit_message=f"Copy {file} from original repo"
+                )
+            # Cleanup original downloaded file
+            if os.path.exists(local_path):
+                os.remove(local_path)
+            gc.collect()
+        except Exception as e:
+            yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
+    if os.path.exists(TEMP_DIR):
+        shutil.rmtree(TEMP_DIR)
+    yield f"✅ All files processed and successfully uploaded to {target_repo}!"
+# Dynamic UI Update for Target Repo Name
+def update_target_repo(username, source, precision):
+    user_prefix = username.strip() if username.strip() else "your-username"
+    model_name = source.split("/")[-1] if "/" in source else source
+    return f"{user_prefix}/{model_name}-{precision}"
+# Build the Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Auto-Purging Model Quantizer & Uploader")
+    gr.Markdown(
+        "Convert sharded Diffusers models (like FLUX, LongCat, Z-Image) to lower precisions (FP8, FP16, BF16).\n\n"
+        "**Auto-Delete Feature:** This tool is strictly designed to handle sharded folders. It will **automatically ignore and delete** any massive "
+        "`.safetensors` files located at the root of the repository to ensure your 16GB RAM limit is never breached and your target repository stays clean."
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            hf_token = gr.Textbox(
+                label="Hugging Face Token (Write Access Required)",
+                type="password",
+                placeholder="hf_..."
+            )
+            hf_username = gr.Textbox(
+                label="Your Hugging Face Username",
+                placeholder="e.g., rootlocalghost"
+            )
+            source_repo = gr.Dropdown(
+                choices=[
+                    "black-forest-labs/FLUX.2-klein-9B",
+                    "black-forest-labs/FLUX.2-klein-4B",
+                    "Tongyi-MAI/Z-Image-Turbo",
+                    "meituan-longcat/LongCat-Image-Edit-Turbo"
+                ],
+                value="black-forest-labs/FLUX.2-klein-9B",
+                label="Source Repository",
+                allow_custom_value=True
+            )
+            target_components = gr.CheckboxGroup(
+                choices=["text_encoder", "transformer", "vae"],
+                value=["text_encoder", "transformer"],
+                label="Components to Quantize (Folders)",
+                info="Select which folders should be cast to the new precision. Everything else is copied as-is."
+            )
+            precision = gr.Dropdown(
+                choices=["FP8", "FP16", "BF16"],
+                value="FP8",
+                label="Target Precision"
+            )
+            target_repo = gr.Textbox(
+                label="Target Repository (Auto-generated)",
+                value="your-username/FLUX.2-klein-9B-FP8",
+                interactive=True
+            )
+            start_btn = gr.Button("Start Quantization & Upload", variant="primary")
+        with gr.Column(scale=3):
+            output_log = gr.Textbox(
+                label="Operation Logs",
+                lines=20,
+                interactive=False,
+                max_lines=25
+            )
+    # Automatically update the target repo name when inputs change
+    inputs_to_watch = [hf_username, source_repo, precision]
+    for inp in inputs_to_watch:
+        inp.change(
+            fn=update_target_repo,
+            inputs=inputs_to_watch,
+            outputs=[target_repo]
+        )
+    start_btn.click(
+        fn=convert_and_upload,
+        inputs=[hf_token, source_repo, target_repo, precision, target_components],
+        outputs=[output_log]
+    )
+if __name__ == "__main__":
+    demo.launch()