Spaces:

rootlocalghost
/

LongCat-Image-Model-Quantizer

Running

App Files Files Community

LongCat-Image-Model-Quantizer / app.py

rootlocalghost

Create app.py

eb37dfe verified 3 days ago

raw

history blame contribute delete

8.12 kB

	import os
	import gc
	import torch
	import shutil
	import uuid
	import gradio as gr
	from huggingface_hub import HfApi, hf_hub_download
	from safetensors.torch import load_file, save_file

	def convert_and_upload(token, source_repo, target_repo, precision, target_components):
	if not token:
	yield "❌ Error: Please provide a valid Hugging Face Write Token."
	return
	if not target_repo.strip() or "your-username" in target_repo:
	yield "❌ Error: Please specify a valid Target Repository (e.g., your-username/repo-name)."
	return
	if not target_components:
	yield "❌ Error: Please select at least one component to quantize."
	return

	# Map precision string to PyTorch dtype
	if precision == "FP8":
	target_dtype = torch.float8_e4m3fn
	elif precision == "FP16":
	target_dtype = torch.float16
	elif precision == "BF16":
	target_dtype = torch.bfloat16
	else:
	target_dtype = None

	api = HfApi(token=token)
	yield f"🔄 Connecting to Hugging Face and verifying target repo: {target_repo}..."

	try:
	api.create_repo(repo_id=target_repo, exist_ok=True, private=False)
	except Exception as e:
	yield f"❌ Error checking/creating repo: {str(e)}\nMake sure your token has 'Write' permissions."
	return

	yield f"📋 Fetching file list from {source_repo}..."
	try:
	files = api.list_repo_files(source_repo)
	except Exception as e:
	yield f"❌ Error fetching files: {str(e)}"
	return

	# Create a unique cache directory for this specific run to prevent disk bloat
	cache_dir = f"./hf_cache_{uuid.uuid4().hex[:8]}"

	success_count = 0
	error_count = 0

	for file in files:
	# Detect large .safetensors files at the root level (just in case)
	is_root_safetensor = "/" not in file and file.endswith(".safetensors")

	if is_root_safetensor:
	yield f"🗑️ Auto-skipping massive root model: {file}..."
	try:
	api.delete_file(path_in_repo=file, repo_id=target_repo, token=token, commit_message=f"Auto-deleted root file {file}")
	yield f"✅ Ensured {file} is removed from target repository."
	except Exception:
	pass
	continue

	yield f"⏳ Processing {file}..."

	try:
	os.makedirs(cache_dir, exist_ok=True)

	# Download using the token to ensure gated/large files don't fail silently
	local_path = hf_hub_download(
	repo_id=source_repo,
	filename=file,
	cache_dir=cache_dir,
	token=token
	)

	in_target_component = any(f"{comp}/" in file for comp in target_components)

	if file.endswith(".safetensors") and in_target_component:
	yield f"🧠 Quantizing {file} to {precision} (This will take a few minutes)..."

	tensors = load_file(local_path)

	if target_dtype:
	keys = list(tensors.keys())
	for k in keys:
	if tensors[k].is_floating_point():
	# Cast the tensor to target precision
	tensors[k] = tensors[k].to(target_dtype)

	converted_path = "converted.safetensors"
	save_file(tensors, converted_path)

	# Aggressive memory flush (Crucial for the 11.68GB transformer file)
	del tensors
	gc.collect()

	yield f"☁️ Uploading {precision} version of {file}..."
	api.upload_file(
	path_or_fileobj=converted_path,
	path_in_repo=file,
	repo_id=target_repo,
	commit_message=f"Upload {precision} quantized {file}"
	)

	os.remove(converted_path)

	else:
	yield f"☁️ Copying {file} as-is..."
	api.upload_file(
	path_or_fileobj=local_path,
	path_in_repo=file,
	repo_id=target_repo,
	commit_message=f"Copy {file} from original repo"
	)

	success_count += 1

	# EXTREME DISK CLEANUP: Nuke the cache directory after every file to prevent the 50GB Space Crash
	if os.path.exists(cache_dir):
	shutil.rmtree(cache_dir)

	gc.collect()

	except Exception as e:
	error_count += 1
	yield f"⚠️ Error processing {file}: {str(e)}\nSkipping to next file..."

	# Final cleanup sweep
	if os.path.exists(cache_dir):
	shutil.rmtree(cache_dir)

	yield f"✅ Finished! Successfully processed {success_count} files. Errors encountered: {error_count}."

	# Dynamic UI Update for Target Repo Name
	def update_target_repo(username, source, precision):
	user_prefix = username.strip() if username.strip() else "your-username"
	model_name = source.split("/")[-1] if "/" in source else source
	return f"{user_prefix}/{model_name}-{precision}"

	# Build the Gradio UI
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 LongCat Dedicated Quantizer")
	gr.Markdown(
	"Convert the LongCat-Image family of models to lower precisions (FP8, FP16, BF16).\n\n"
	"Memory & Disk Protection: This tool is specifically tuned to survive the massive 11.68 GB single-file `transformer` "
	"shard. It aggressively purges Hugging Face's download cache and PyTorch's RAM buffer after every single step to keep the free Space alive."
	)

	with gr.Row():
	with gr.Column(scale=2):
	hf_token = gr.Textbox(
	label="Hugging Face Token (Write Access Required)",
	type="password",
	placeholder="hf_..."
	)
	hf_username = gr.Textbox(
	label="Your Hugging Face Username",
	placeholder="e.g., rootlocalghost"
	)
	# Locked down to LongCat models
	source_repo = gr.Dropdown(
	choices=[
	"meituan-longcat/LongCat-Image-Edit-Turbo",
	"meituan-longcat/LongCat-Image-Edit",
	"meituan-longcat/LongCat-Image"
	],
	value="meituan-longcat/LongCat-Image-Edit-Turbo",
	label="Source Repository",
	allow_custom_value=False
	)

	target_components = gr.CheckboxGroup(
	choices=["text_encoder", "transformer", "vae"],
	value=["text_encoder", "transformer"],
	label="Components to Quantize",
	info="Select which folders should be cast to the new precision. Unselected folders will be copied as-is."
	)

	precision = gr.Dropdown(
	choices=["FP8", "FP16", "BF16"],
	value="FP8",
	label="Target Precision"
	)
	target_repo = gr.Textbox(
	label="Target Repository (Auto-generated)",
	value="your-username/LongCat-Image-Edit-Turbo-FP8",
	interactive=True
	)
	start_btn = gr.Button("Start Quantization & Upload", variant="primary")

	with gr.Column(scale=3):
	output_log = gr.Textbox(
	label="Operation Logs",
	lines=20,
	interactive=False,
	max_lines=25
	)

	inputs_to_watch = [hf_username, source_repo, precision]
	for inp in inputs_to_watch:
	inp.change(
	fn=update_target_repo,
	inputs=inputs_to_watch,
	outputs=[target_repo]
	)

	start_btn.click(
	fn=convert_and_upload,
	inputs=[hf_token, source_repo, target_repo, precision, target_components],
	outputs=[output_log]
	)

	if __name__ == "__main__":
	demo.launch()