Spaces:

rootlocalghost
/

LongCat-Image-Model-Quantizer

Running

App Files Files Community

rootlocalghost commited on 11 days ago

Commit

eb37dfe

verified ·

1 Parent(s): a8a35df

Create app.py

Browse files

Files changed (1) hide show

app.py +217 -0

app.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import os
+import gc
+import torch
+import shutil
+import uuid
+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from safetensors.torch import load_file, save_file
+def convert_and_upload(token, source_repo, target_repo, precision, target_components):
+    if not token:
+        yield "❌ Error: Please provide a valid Hugging Face Write Token."
+        return
+    if not target_repo.strip() or "your-username" in target_repo:
+        yield "❌ Error: Please specify a valid Target Repository (e.g., your-username/repo-name)."
+        return
+    if not target_components:
+        yield "❌ Error: Please select at least one component to quantize."
+        return
+    # Map precision string to PyTorch dtype
+    if precision == "FP8":
+        target_dtype = torch.float8_e4m3fn
+    elif precision == "FP16":
+        target_dtype = torch.float16
+    elif precision == "BF16":
+        target_dtype = torch.bfloat16
+    else:
+        target_dtype = None
+    api = HfApi(token=token)
+    yield f"🔄 Connecting to Hugging Face and verifying target repo: {target_repo}..."
+    try:
+        api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
+    except Exception as e:
+        yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
+        return
+    yield f"📋 Fetching file list from {source_repo}..."
+    try:
+        files = api.list_repo_files(source_repo)
+    except Exception as e:
+        yield f"❌ Error fetching files: {str(e)}"
+        return
+    # Create a unique cache directory for this specific run to prevent disk bloat
+    cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"
+    success_count = 0
+    error_count = 0
+    for file in files:
+        # Detect large .safetensors files at the root level (just in case)
+        is_root_safetensor = "/" not in file and file.endswith(".safetensors")
+        if is_root_safetensor:
+            yield f"🗑️ Auto-skipping massive root model: {file}..."
+            try:
+                api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted root file {file}")
+                yield f"✅ Ensured {file} is removed from target repository."
+            except Exception:
+                pass
+            continue
+        yield f"⏳ Processing {file}..."
+        try:
+            os.makedirs(cache_dir, exist_ok=True)
+            # Download using the token to ensure gated/large files don't fail silently
+            local_path = hf_hub_download(
+                repo_id=source_repo,
+                filename=file,
+                cache_dir=cache_dir,
+                token=token
+            )
+            in_target_component = any(f"{comp}/" in file for comp in target_components)
+            if file.endswith(".safetensors") and in_target_component:
+                yield f"🧠 Quantizing {file} to {precision} (This will take a few minutes)..."
+                tensors = load_file(local_path)
+                if target_dtype:
+                    keys = list(tensors.keys())
+                    for k in keys:
+                        if tensors[k].is_floating_point():
+                            # Cast the tensor to target precision
+                            tensors[k] = tensors[k].to(target_dtype)
+                converted_path = "converted.safetensors"
+                save_file(tensors, converted_path)
+                # Aggressive memory flush (Crucial for the 11.68GB transformer file)
+                del tensors
+                gc.collect()
+                yield f"☁️ Uploading {precision} version of {file}..."
+                api.upload_file(
+                    path_or_fileobj=converted_path,
+                    path_in_repo=file,
+                    repo_id=target_repo,
+                    commit_message=f"Upload {precision} quantized {file}"
+                )
+                os.remove(converted_path)
+            else:
+                yield f"☁️ Copying {file} as-is..."
+                api.upload_file(
+                    path_or_fileobj=local_path,
+                    path_in_repo=file,
+                    repo_id=target_repo,
+                    commit_message=f"Copy {file} from original repo"
+                )
+            success_count += 1
+            # EXTREME DISK CLEANUP: Nuke the cache directory after every file to prevent the 50GB Space Crash
+            if os.path.exists(cache_dir):
+                shutil.rmtree(cache_dir)
+            gc.collect()
+        except Exception as e:
+            error_count += 1
+            yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."
+    # Final cleanup sweep
+    if os.path.exists(cache_dir):
+        shutil.rmtree(cache_dir)
+    yield f"✅ Finished! Successfully processed {success_count} files. Errors encountered: {error_count}."
+# Dynamic UI Update for Target Repo Name
+def update_target_repo(username, source, precision):
+    user_prefix = username.strip() if username.strip() else "your-username"
+    model_name = source.split("/")[-1] if "/" in source else source
+    return f"{user_prefix}/{model_name}-{precision}"
+# Build the Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 LongCat Dedicated Quantizer")
+    gr.Markdown(
+        "Convert the **LongCat-Image** family of models to lower precisions (FP8, FP16, BF16).\n\n"
+        "**Memory & Disk Protection:** This tool is specifically tuned to survive the massive 11.68 GB single-file `transformer` "
+        "shard. It aggressively purges Hugging Face's download cache and PyTorch's RAM buffer after every single step to keep the free Space alive."
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            hf_token = gr.Textbox(
+                label="Hugging Face Token (Write Access Required)",
+                type="password",
+                placeholder="hf_..."
+            )
+            hf_username = gr.Textbox(
+                label="Your Hugging Face Username",
+                placeholder="e.g., rootlocalghost"
+            )
+            # Locked down to LongCat models
+            source_repo = gr.Dropdown(
+                choices=[
+                    "meituan-longcat/LongCat-Image-Edit-Turbo",
+                    "meituan-longcat/LongCat-Image-Edit",
+                    "meituan-longcat/LongCat-Image"
+                ],
+                value="meituan-longcat/LongCat-Image-Edit-Turbo",
+                label="Source Repository",
+                allow_custom_value=False
+            )
+            target_components = gr.CheckboxGroup(
+                choices=["text_encoder", "transformer", "vae"],
+                value=["text_encoder", "transformer"],
+                label="Components to Quantize",
+                info="Select which folders should be cast to the new precision. Unselected folders will be copied as-is."
+            )
+            precision = gr.Dropdown(
+                choices=["FP8", "FP16", "BF16"],
+                value="FP8",
+                label="Target Precision"
+            )
+            target_repo = gr.Textbox(
+                label="Target Repository (Auto-generated)",
+                value="your-username/LongCat-Image-Edit-Turbo-FP8",
+                interactive=True
+            )
+            start_btn = gr.Button("Start Quantization & Upload", variant="primary")
+        with gr.Column(scale=3):
+            output_log = gr.Textbox(
+                label="Operation Logs",
+                lines=20,
+                interactive=False,
+                max_lines=25
+            )
+    inputs_to_watch = [hf_username, source_repo, precision]
+    for inp in inputs_to_watch:
+        inp.change(
+            fn=update_target_repo,
+            inputs=inputs_to_watch,
+            outputs=[target_repo]
+        )
+    start_btn.click(
+        fn=convert_and_upload,
+        inputs=[hf_token, source_repo, target_repo, precision, target_components],
+        outputs=[output_log]
+    )
+if __name__ == "__main__":
+    demo.launch()