| import os |
| import gc |
| import torch |
| import shutil |
| import uuid |
| import gradio as gr |
| from huggingface_hub import HfApi, hf_hub_download |
| from safetensors.torch import load_file, save_file |
|
|
| def convert_and_upload(token, source_repo, target_repo, precision, target_components): |
| if not token: |
| yield "β Error: Please provide a valid Hugging Face Write Token." |
| return |
| if not target_repo.strip() or "your-username" in target_repo: |
| yield "β Error: Please specify a valid Target Repository (e.g., your-username/repo-name)." |
| return |
| if not target_components: |
| yield "β Error: Please select at least one component to quantize." |
| return |
|
|
| |
| if precision == "FP8": |
| target_dtype = torch.float8_e4m3fn |
| elif precision == "FP16": |
| target_dtype = torch.float16 |
| elif precision == "BF16": |
| target_dtype = torch.bfloat16 |
| else: |
| target_dtype = None |
|
|
| api = HfApi(token=token) |
| yield f"π Connecting to Hugging Face and verifying target repo: {target_repo}..." |
|
|
| try: |
| api.create_repo(repo_id=target_repo, exist_ok=True, private=False) |
| except Exception as e: |
| yield f"β Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions." |
| return |
|
|
| yield f"π Fetching file list from {source_repo}..." |
| try: |
| files = api.list_repo_files(source_repo) |
| except Exception as e: |
| yield f"β Error fetching files: {str(e)}" |
| return |
|
|
| |
| cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}" |
| |
| success_count = 0 |
| error_count = 0 |
|
|
| for file in files: |
| |
| is_root_safetensor = "/" not in file and file.endswith(".safetensors") |
| |
| if is_root_safetensor: |
| yield f"ποΈ Auto-skipping massive root model: {file}..." |
| try: |
| api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted root file {file}") |
| yield f"β
Ensured {file} is removed from target repository." |
| except Exception: |
| pass |
| continue |
|
|
| yield f"β³ Processing {file}..." |
|
|
| try: |
| os.makedirs(cache_dir, exist_ok=True) |
|
|
| |
| local_path = hf_hub_download( |
| repo_id=source_repo, |
| filename=file, |
| cache_dir=cache_dir, |
| token=token |
| ) |
|
|
| in_target_component = any(f"{comp}/" in file for comp in target_components) |
|
|
| if file.endswith(".safetensors") and in_target_component: |
| yield f"π§ Quantizing {file} to {precision} (This will take a few minutes)..." |
| |
| tensors = load_file(local_path) |
|
|
| if target_dtype: |
| keys = list(tensors.keys()) |
| for k in keys: |
| if tensors[k].is_floating_point(): |
| |
| tensors[k] = tensors[k].to(target_dtype) |
|
|
| converted_path = "converted.safetensors" |
| save_file(tensors, converted_path) |
|
|
| |
| del tensors |
| gc.collect() |
|
|
| yield f"βοΈ Uploading {precision} version of {file}..." |
| api.upload_file( |
| path_or_fileobj=converted_path, |
| path_in_repo=file, |
| repo_id=target_repo, |
| commit_message=f"Upload {precision} quantized {file}" |
| ) |
| |
| os.remove(converted_path) |
|
|
| else: |
| yield f"βοΈ Copying {file} as-is..." |
| api.upload_file( |
| path_or_fileobj=local_path, |
| path_in_repo=file, |
| repo_id=target_repo, |
| commit_message=f"Copy {file} from original repo" |
| ) |
|
|
| success_count += 1 |
|
|
| |
| if os.path.exists(cache_dir): |
| shutil.rmtree(cache_dir) |
| |
| gc.collect() |
|
|
| except Exception as e: |
| error_count += 1 |
| yield f"β οΈ Error processing {file}: {str(e)}\nSkipping to next file..." |
|
|
| |
| if os.path.exists(cache_dir): |
| shutil.rmtree(cache_dir) |
|
|
| yield f"β
Finished! Successfully processed {success_count} files. Errors encountered: {error_count}." |
|
|
| |
| def update_target_repo(username, source, precision): |
| user_prefix = username.strip() if username.strip() else "your-username" |
| model_name = source.split("/")[-1] if "/" in source else source |
| return f"{user_prefix}/{model_name}-{precision}" |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# π LongCat Dedicated Quantizer") |
| gr.Markdown( |
| "Convert the **LongCat-Image** family of models to lower precisions (FP8, FP16, BF16).\n\n" |
| "**Memory & Disk Protection:** This tool is specifically tuned to survive the massive 11.68 GB single-file `transformer` " |
| "shard. It aggressively purges Hugging Face's download cache and PyTorch's RAM buffer after every single step to keep the free Space alive." |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=2): |
| hf_token = gr.Textbox( |
| label="Hugging Face Token (Write Access Required)", |
| type="password", |
| placeholder="hf_..." |
| ) |
| hf_username = gr.Textbox( |
| label="Your Hugging Face Username", |
| placeholder="e.g., rootlocalghost" |
| ) |
| |
| source_repo = gr.Dropdown( |
| choices=[ |
| "meituan-longcat/LongCat-Image-Edit-Turbo", |
| "meituan-longcat/LongCat-Image-Edit", |
| "meituan-longcat/LongCat-Image" |
| ], |
| value="meituan-longcat/LongCat-Image-Edit-Turbo", |
| label="Source Repository", |
| allow_custom_value=False |
| ) |
| |
| target_components = gr.CheckboxGroup( |
| choices=["text_encoder", "transformer", "vae"], |
| value=["text_encoder", "transformer"], |
| label="Components to Quantize", |
| info="Select which folders should be cast to the new precision. Unselected folders will be copied as-is." |
| ) |
| |
| precision = gr.Dropdown( |
| choices=["FP8", "FP16", "BF16"], |
| value="FP8", |
| label="Target Precision" |
| ) |
| target_repo = gr.Textbox( |
| label="Target Repository (Auto-generated)", |
| value="your-username/LongCat-Image-Edit-Turbo-FP8", |
| interactive=True |
| ) |
| start_btn = gr.Button("Start Quantization & Upload", variant="primary") |
| |
| with gr.Column(scale=3): |
| output_log = gr.Textbox( |
| label="Operation Logs", |
| lines=20, |
| interactive=False, |
| max_lines=25 |
| ) |
|
|
| inputs_to_watch = [hf_username, source_repo, precision] |
| for inp in inputs_to_watch: |
| inp.change( |
| fn=update_target_repo, |
| inputs=inputs_to_watch, |
| outputs=[target_repo] |
| ) |
|
|
| start_btn.click( |
| fn=convert_and_upload, |
| inputs=[hf_token, source_repo, target_repo, precision, target_components], |
| outputs=[output_log] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |