Quantize-HF-Models

Running

App Files Files Community

Quantize-HF-Models / app.py

rahul7star

Update app.py

34b694d verified 24 days ago

raw

history blame contribute delete

5.79 kB

	import os
	import subprocess
	import streamlit as st
	from huggingface_hub import snapshot_download, HfApi

	# ============================================================
	# SESSION STATE
	# ============================================================

	if "quantized_models" not in st.session_state:
	st.session_state.quantized_models = []

	# ============================================================
	# CONFIG
	# ============================================================

	MODELS_LIST = ['rahul7star/Qwen3-4B-Thinking-2509-Genius-Coder-AI-Full']

	QUANT_TYPES = [
	"Q2_K", "Q3_K_l", "Q3_K_M", "Q3_K_S",
	"Q4_0", "Q4_1", "Q4_K_M", "Q4_K_S",
	"Q5_0", "Q5_1", "Q5_K_M", "Q5_K_S",
	"Q6_K", "Q8_0", "BF16", "F16", "F32"
	]

	LLAMA_CPP_PATH = "/app/llama.cpp"
	CONVERT_SCRIPT = f"{LLAMA_CPP_PATH}/convert_hf_to_gguf.py"
	QUANTIZE_BIN = f"{LLAMA_CPP_PATH}/build/bin/llama-quantize"

	# ============================================================
	# UTILS
	# ============================================================

	def check_dependencies():
	if not os.path.exists(CONVERT_SCRIPT):
	st.error("❌ convert_hf_to_gguf.py not found")
	st.stop()
	if not os.path.exists(QUANTIZE_BIN):
	st.error("❌ llama-quantize binary not found")
	st.stop()

	def download_model(hf_model_name, output_dir="/tmp/models"):
	st.write(f"📥 Downloading `{hf_model_name}` ...")
	model_path = snapshot_download(
	repo_id=hf_model_name,
	local_dir=output_dir,
	local_dir_use_symlinks=False
	)
	st.success("✅ Model downloaded")
	return model_path

	def convert_to_gguf(model_path, output_file):
	st.write("🔄 Converting to GGUF...")
	cmd = [
	"python3",
	CONVERT_SCRIPT,
	model_path,
	"--outfile",
	output_file
	]
	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	st.error(result.stderr)
	raise RuntimeError("Conversion failed")
	st.success("✅ GGUF created")

	def quantize_model(gguf_file, quant_type):
	output_file = gguf_file.replace(".gguf", f"-{quant_type}.gguf")

	st.write(f"⚡ Quantizing → {quant_type}")
	cmd = [
	QUANTIZE_BIN,
	gguf_file,
	output_file,
	quant_type
	]

	result = subprocess.run(cmd, capture_output=True, text=True)

	if result.returncode != 0:
	st.error(result.stderr)
	return None

	st.success(f"✅ {quant_type} done")
	return output_file

	def upload_to_huggingface(file_path, repo_id):
	hf_token = os.getenv("HF_TOKEN")

	if not hf_token:
	st.error("❌ HF_TOKEN not found in environment variables")
	return

	api = HfApi(token=hf_token)

	api.create_repo(repo_id, exist_ok=True, repo_type="model")

	api.upload_file(
	path_or_fileobj=file_path,
	path_in_repo=os.path.basename(file_path),
	repo_id=repo_id,
	)

	st.success(f"🚀 Uploaded to https://huggingface.co/{repo_id}")

	# ============================================================
	# UI
	# ============================================================

	st.title("🦙 LLaMA.cpp Multi-Quantization Tool")

	check_dependencies()

	# Model selection
	selected_model = st.selectbox(
	"Select Hugging Face Model",
	MODELS_LIST,
	index=None
	)

	hf_model_name = selected_model or st.text_input(
	"Or Enter Custom HF Model ID"
	)

	# Multi-checkbox quant selection
	st.subheader("Select Quantization Types")

	selected_quants = []
	cols = st.columns(4)

	for i, quant in enumerate(QUANT_TYPES):
	with cols[i % 4]:
	if st.checkbox(quant):
	selected_quants.append(quant)

	# Start button
	if st.button("🚀 Start Quantization"):

	if not hf_model_name:
	st.warning("Please enter a model name")
	st.stop()

	if not selected_quants:
	st.warning("Select at least one quant type")
	st.stop()

	with st.spinner("Processing..."):
	try:
	base_dir = "/tmp/models"
	os.makedirs(base_dir, exist_ok=True)

	model_path = download_model(hf_model_name, base_dir)

	gguf_file = os.path.join(
	base_dir,
	hf_model_name.replace("/", "_") + ".gguf"
	)

	convert_to_gguf(model_path, gguf_file)

	st.session_state.quantized_models = []

	for quant in selected_quants:
	quant_file = quantize_model(gguf_file, quant)
	if quant_file:
	st.session_state.quantized_models.append(quant_file)

	st.success("🎉 All quantizations completed")

	except Exception as e:
	st.error(f"❌ Error: {str(e)}")

	# ============================================================
	# DOWNLOAD + UPLOAD SECTION
	# ============================================================

	if st.session_state.quantized_models:

	st.subheader("📦 Generated Models")

	for file_path in st.session_state.quantized_models:

	with open(file_path, "rb") as f:
	st.download_button(
	label=f"⬇️ Download {os.path.basename(file_path)}",
	data=f,
	file_name=os.path.basename(file_path),
	key=file_path
	)

	st.divider()

	st.subheader("🚀 Upload to Hugging Face")

	repo_id = st.text_input(
	"Target Repository (e.g. username/model-quant)"
	)

	if st.button("📤 Upload All to HF"):
	if not repo_id:
	st.warning("Enter repository ID")
	else:
	with st.spinner("Uploading..."):
	for file_path in st.session_state.quantized_models:
	upload_to_huggingface(file_path, repo_id)

	st.success("✅ All files uploaded successfully")