File size: 8,120 Bytes
eb37dfe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | import os
import gc
import torch
import shutil
import uuid
import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
from safetensors.torch import load_file, save_file
def convert_and_upload(token, source_repo, target_repo, precision, target_components):
if not token:
yield "β Error: Please provide a valid Hugging Face Write Token."
return
if not target_repo.strip() or "your-username" in target_repo:
yield "β Error: Please specify a valid Target Repository (e.g., your-username/repo-name)."
return
if not target_components:
yield "β Error: Please select at least one component to quantize."
return
# Map precision string to PyTorch dtype
if precision == "FP8":
target_dtype = torch.float8_e4m3fn
elif precision == "FP16":
target_dtype = torch.float16
elif precision == "BF16":
target_dtype = torch.bfloat16
else:
target_dtype = None
api = HfApi(token=token)
yield f"π Connecting to Hugging Face and verifying target repo: {target_repo}..."
try:
api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
except Exception as e:
yield f"β Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
return
yield f"π Fetching file list from {source_repo}..."
try:
files = api.list_repo_files(source_repo)
except Exception as e:
yield f"β Error fetching files: {str(e)}"
return
# Create a unique cache directory for this specific run to prevent disk bloat
cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"
success_count = 0
error_count = 0
for file in files:
# Detect large .safetensors files at the root level (just in case)
is_root_safetensor = "/" not in file and file.endswith(".safetensors")
if is_root_safetensor:
yield f"ποΈ Auto-skipping massive root model: {file}..."
try:
api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted root file {file}")
yield f"β
Ensured {file} is removed from target repository."
except Exception:
pass
continue
yield f"β³ Processing {file}..."
try:
os.makedirs(cache_dir, exist_ok=True)
# Download using the token to ensure gated/large files don't fail silently
local_path = hf_hub_download(
repo_id=source_repo,
filename=file,
cache_dir=cache_dir,
token=token
)
in_target_component = any(f"{comp}/" in file for comp in target_components)
if file.endswith(".safetensors") and in_target_component:
yield f"π§ Quantizing {file} to {precision} (This will take a few minutes)..."
tensors = load_file(local_path)
if target_dtype:
keys = list(tensors.keys())
for k in keys:
if tensors[k].is_floating_point():
# Cast the tensor to target precision
tensors[k] = tensors[k].to(target_dtype)
converted_path = "converted.safetensors"
save_file(tensors, converted_path)
# Aggressive memory flush (Crucial for the 11.68GB transformer file)
del tensors
gc.collect()
yield f"βοΈ Uploading {precision} version of {file}..."
api.upload_file(
path_or_fileobj=converted_path,
path_in_repo=file,
repo_id=target_repo,
commit_message=f"Upload {precision} quantized {file}"
)
os.remove(converted_path)
else:
yield f"βοΈ Copying {file} as-is..."
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=file,
repo_id=target_repo,
commit_message=f"Copy {file} from original repo"
)
success_count += 1
# EXTREME DISK CLEANUP: Nuke the cache directory after every file to prevent the 50GB Space Crash
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
gc.collect()
except Exception as e:
error_count += 1
yield f"β οΈ Error processing {file}: {str(e)}\nSkipping to next file..."
# Final cleanup sweep
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
yield f"β
Finished! Successfully processed {success_count} files. Errors encountered: {error_count}."
# Dynamic UI Update for Target Repo Name
def update_target_repo(username, source, precision):
user_prefix = username.strip() if username.strip() else "your-username"
model_name = source.split("/")[-1] if "/" in source else source
return f"{user_prefix}/{model_name}-{precision}"
# Build the Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# π LongCat Dedicated Quantizer")
gr.Markdown(
"Convert the **LongCat-Image** family of models to lower precisions (FP8, FP16, BF16).\n\n"
"**Memory & Disk Protection:** This tool is specifically tuned to survive the massive 11.68 GB single-file `transformer` "
"shard. It aggressively purges Hugging Face's download cache and PyTorch's RAM buffer after every single step to keep the free Space alive."
)
with gr.Row():
with gr.Column(scale=2):
hf_token = gr.Textbox(
label="Hugging Face Token (Write Access Required)",
type="password",
placeholder="hf_..."
)
hf_username = gr.Textbox(
label="Your Hugging Face Username",
placeholder="e.g., rootlocalghost"
)
# Locked down to LongCat models
source_repo = gr.Dropdown(
choices=[
"meituan-longcat/LongCat-Image-Edit-Turbo",
"meituan-longcat/LongCat-Image-Edit",
"meituan-longcat/LongCat-Image"
],
value="meituan-longcat/LongCat-Image-Edit-Turbo",
label="Source Repository",
allow_custom_value=False
)
target_components = gr.CheckboxGroup(
choices=["text_encoder", "transformer", "vae"],
value=["text_encoder", "transformer"],
label="Components to Quantize",
info="Select which folders should be cast to the new precision. Unselected folders will be copied as-is."
)
precision = gr.Dropdown(
choices=["FP8", "FP16", "BF16"],
value="FP8",
label="Target Precision"
)
target_repo = gr.Textbox(
label="Target Repository (Auto-generated)",
value="your-username/LongCat-Image-Edit-Turbo-FP8",
interactive=True
)
start_btn = gr.Button("Start Quantization & Upload", variant="primary")
with gr.Column(scale=3):
output_log = gr.Textbox(
label="Operation Logs",
lines=20,
interactive=False,
max_lines=25
)
inputs_to_watch = [hf_username, source_repo, precision]
for inp in inputs_to_watch:
inp.change(
fn=update_target_repo,
inputs=inputs_to_watch,
outputs=[target_repo]
)
start_btn.click(
fn=convert_and_upload,
inputs=[hf_token, source_repo, target_repo, precision, target_components],
outputs=[output_log]
)
if __name__ == "__main__":
demo.launch() |