apex-train

Sleeping

App Files Files Community

apex-train / app.py

rahul7star

Update app.py

2bef4d1 verified 5 days ago

raw

history blame contribute delete

7.42 kB

	import os
	import subprocess
	from datetime import datetime

	import gradio as gr
	from huggingface_hub import snapshot_download, HfApi

	# -----------------------------
	# PATHS
	# -----------------------------
	APEX_DIR = os.environ.get("APEX_DIR", "/app/apex-quant")
	LLAMA_DIR = os.environ.get("LLAMA_DIR", "/app/llama.cpp")

	HF_TOKEN = os.getenv("HF_TOKEN")
	api = HfApi()

	# -----------------------------
	# LOGGING
	# -----------------------------
	def log(msg):
	ts = datetime.now().strftime("%H:%M:%S")
	line = f"[{ts}] {msg}"
	print(line, flush=True)
	return line + "\n"

	# -----------------------------
	# RUN SHELL
	# -----------------------------
	def run(cmd, cwd=None, env=None):
	log("▶ RUN: " + " ".join(cmd))

	p = subprocess.run(
	cmd,
	cwd=cwd,
	text=True,
	capture_output=True,
	env=env
	)

	if p.stdout:
	log(p.stdout)

	if p.stderr:
	log("STDERR:\n" + p.stderr)

	if p.returncode != 0:
	raise RuntimeError(p.stderr)

	return p.stdout

	# -----------------------------
	# ENSURE LLAMA.CPP EXISTS
	# -----------------------------
	def ensure_llama_cpp():
	if os.path.exists(LLAMA_DIR):
	return LLAMA_DIR

	log("📥 Cloning llama.cpp...")
	run([
	"git",
	"clone",
	"--depth", "1",
	"https://github.com/ggerganov/llama.cpp",
	LLAMA_DIR
	])

	return LLAMA_DIR

	# -----------------------------
	# BUILD LLAMA.CPP (🔥 FIX)
	# -----------------------------
	def build_llama_cpp():
	log("🔧 Building llama.cpp (required for quantization)")

	ensure_llama_cpp()

	# clean build dir
	build_dir = os.path.join(LLAMA_DIR, "build")

	run(["cmake", "-B", "build"], cwd=LLAMA_DIR)
	run(["cmake", "--build", "build", "-j"], cwd=LLAMA_DIR)

	bin_path = os.path.join(build_dir, "bin")
	log(f"✅ llama.cpp built: {bin_path}")

	return bin_path

	# -----------------------------
	# DOWNLOAD MODEL
	# -----------------------------
	def download_model(repo_id):
	log(f"📥 Downloading HF model: {repo_id}")

	path = snapshot_download(
	repo_id=repo_id,
	local_dir="/tmp/model",
	local_dir_use_symlinks=False
	)

	log(f"✅ Download complete: {path}")
	return path

	# -----------------------------
	# FIND CONVERTER
	# -----------------------------
	def find_converter():
	ensure_llama_cpp()

	candidates = [
	os.path.join(LLAMA_DIR, "convert_hf_to_gguf.py"),
	os.path.join(LLAMA_DIR, "convert-hf-to-gguf.py"),
	os.path.join(LLAMA_DIR, "scripts", "convert_hf_to_gguf.py"),
	os.path.join(LLAMA_DIR, "convert.py"),
	]

	for c in candidates:
	if os.path.exists(c):
	log(f"🔧 Found converter: {c}")
	return c

	raise RuntimeError("❌ No HF→GGUF converter found in llama.cpp")

	# -----------------------------
	# HF → GGUF
	# -----------------------------
	def build_f16(model_dir):
	log("🧠 STEP: HF → F16 GGUF")

	script = find_converter()
	f16_path = os.path.join(APEX_DIR, "model-f16.gguf")

	run([
	"python3",
	script,
	model_dir,
	"--outtype", "f16",
	"--outfile", f16_path
	])

	if not os.path.exists(f16_path):
	raise RuntimeError("❌ F16 GGUF generation failed")

	log(f"✅ F16 CREATED: {f16_path}")
	return f16_path

	# -----------------------------
	# VALID PROFILES
	# -----------------------------
	VALID_PROFILES = {
	"quality",
	"i-quality",
	"balanced",
	"i-balanced",
	"compact",
	"i-compact",
	"mini",
	"full-pipeline"
	}

	# -----------------------------
	# FIX BROKEN BINARIES
	# -----------------------------
	def fix_bad_binaries():
	bad = [
	"/usr/local/bin/llama-quantize",
	"/usr/bin/llama-quantize"
	]

	for p in bad:
	if os.path.exists(p):
	try:
	os.remove(p)
	log(f"🧹 Removed broken binary: {p}")
	except:
	log(f"⚠️ Could not remove: {p}")

	# -----------------------------
	# QUANTIZE (🔥 FIXED)
	# -----------------------------
	def quantize(f16_path, profile):
	log(f"⚙️ QUANTIZE: {profile}")

	if profile not in VALID_PROFILES:
	raise RuntimeError("❌ Invalid profile")

	# 🔥 IMPORTANT FIX
	fix_bad_binaries()
	bin_path = build_llama_cpp()

	script = os.path.join(APEX_DIR, "scripts/quantize.sh")

	if not os.path.exists(script):
	raise RuntimeError("❌ quantize.sh missing in apex-quant")

	out_path = os.path.join(APEX_DIR, f"model-apex-{profile}.gguf")

	env = os.environ.copy()
	env["PATH"] = bin_path + ":" + env.get("PATH", "")

	run([
	"bash",
	script,
	"--profile",
	profile,
	f16_path,
	out_path
	], cwd=APEX_DIR, env=env)

	if not os.path.exists(out_path):
	raise RuntimeError("❌ Quantization failed")

	log(f"✅ OUTPUT: {out_path}")
	return out_path

	# -----------------------------
	# FULL PIPELINE
	# -----------------------------
	def full_pipeline(source_repo):
	log("🚀 START FULL PIPELINE")

	model_dir = download_model(source_repo)
	f16 = build_f16(model_dir)
	gguf = quantize(f16, "i-quality")

	return gguf

	# -----------------------------
	# MAIN PIPELINE
	# -----------------------------
	def pipeline(source_repo, profile, target_repo):

	try:
	log("========================================")
	log("🚀 GGUF FACTORY START")
	log(f"📦 SOURCE: {source_repo}")
	log(f"🎯 PROFILE: {profile}")
	log(f"📤 TARGET: {target_repo}")
	log("========================================")

	if profile == "full-pipeline":
	gguf = full_pipeline(source_repo)
	else:
	model = download_model(source_repo)
	f16 = build_f16(model)
	gguf = quantize(f16, profile)

	if not HF_TOKEN:
	return "❌ HF_TOKEN missing"

	log(f"📤 Uploading → {target_repo}")

	api.create_repo(
	target_repo,
	repo_type="model",
	exist_ok=True,
	token=HF_TOKEN
	)

	api.upload_file(
	path_or_fileobj=gguf,
	path_in_repo=os.path.basename(gguf),
	repo_id=target_repo,
	repo_type="model",
	token=HF_TOKEN
	)

	log("✅ Upload complete")
	return f"✅ DONE → {target_repo}"

	except Exception as e:
	log(f"❌ ERROR: {str(e)}")
	return f"❌ ERROR: {str(e)}"

	# -----------------------------
	# UI
	# -----------------------------
	with gr.Blocks() as demo:

	gr.Markdown("# ⚡ GGUF Factory (FIXED QUANT BUILD)")

	source = gr.Textbox(
	label="HF Source Repo",
	value="rahul7star/gemma-4-finetune"
	)

	profile = gr.Dropdown(
	[
	"quality",
	"i-quality",
	"balanced",
	"i-balanced",
	"compact",
	"i-compact",
	"mini",
	"full-pipeline"
	],
	value="i-quality",
	label="Profile"
	)

	target = gr.Textbox(
	label="HF Output Repo",
	value="rahul7star/gemma-gguf"
	)

	btn = gr.Button("🚀 Run")
	out = gr.Textbox(label="Logs", lines=30)

	btn.click(
	pipeline,
	[source, profile, target],
	out
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)