Buckets:

meshllm
/

layer-split-output

Files

xet

meshllm/layer-split-output / split-model-job.sh

jamesdumay

2 days ago

download

raw

30.4 kB

	#!/bin/bash
	set -euo pipefail

	# This script runs inside an HF Job container.
	# It clones mesh-llm, builds the splitter, splits the model, validates, and publishes.
	#
	# Environment variables (set by mesh-llm model-package job spec):
	# SOURCE_REPO, SOURCE_FILE, SOURCE_QUANT, TARGET_REPO, MODEL_ID, SOURCE_REVISION
	# MESH_LLM_REF — git ref to build from (default: main)
	# CATALOG_CREATE_PR — "true" to open a PR for catalog updates (non-org members)
	# HF_TOKEN — injected as a secret by HF Jobs
	#
	# Volumes:
	# /bucket — writable storage bucket for script and fallback source cache

	MESH_LLM_REF="${MESH_LLM_REF:-main}"
	SOURCE_REVISION="${SOURCE_REVISION:-main}"
	SOURCE_QUANT="${SOURCE_QUANT:-}"
	if [ -z "$SOURCE_QUANT" ] && [[ "${MODEL_ID:-}" == : ]]; then
	SOURCE_QUANT="${MODEL_ID##*:}"
	fi
	if [ -z "$SOURCE_QUANT" ]; then
	echo "ERROR: SOURCE_QUANT is required to resolve the source GGUF without a model volume" >&2
	exit 1
	fi

	echo "╔══════════════════════════════════════════════════════════╗"
	echo "║ Layer Package Split Job ║"
	echo "╠══════════════════════════════════════════════════════════╣"
	echo "║ Source: ${SOURCE_REPO}/${SOURCE_FILE}"
	echo "║ Quant: ${SOURCE_QUANT}"
	echo "║ Target: ${TARGET_REPO}"
	echo "║ Model: ${MODEL_ID}"
	echo "║ Build: mesh-llm @ ${MESH_LLM_REF}"
	echo "╚══════════════════════════════════════════════════════════╝"
	echo ""

	# Keep executable toolchains/build products on local ephemeral storage:
	# HF bucket mounts can be unsuitable for dynamic loader/toolchain execution.
	# Package artifacts are also written locally, uploaded one at a time, and
	# removed immediately so the job never accumulates a full 400GB+ package.
	JOB_WORK_ROOT="${JOB_WORK_ROOT:-/bucket/job-work}"
	SAFE_TARGET_REPO="$(printf '%s' "$TARGET_REPO" \| tr -c '[:alnum:]._-' '_')"
	LOCAL_WORK_DIR="${LOCAL_WORK_DIR:-/tmp/meshllm-layer-job-${SAFE_TARGET_REPO}-$$}"
	if [ -z "${JOB_WORK_DIR:-}" ]; then
	JOB_WORK_DIR="${JOB_WORK_ROOT}/${SAFE_TARGET_REPO}-$(date +%Y%m%d%H%M%S)-$$"
	CLEANUP_JOB_WORK_DIR="${CLEANUP_JOB_WORK_DIR:-true}"
	else
	CLEANUP_JOB_WORK_DIR="${CLEANUP_JOB_WORK_DIR:-false}"
	fi
	PACKAGE_DIR="${PACKAGE_DIR:-${LOCAL_WORK_DIR}/package}"
	HF_HOME="${HF_HOME:-${JOB_WORK_DIR}/hf-home}"
	HF_HUB_CACHE="${HF_HUB_CACHE:-${HF_HOME}/hub}"
	HF_XET_CACHE="${HF_XET_CACHE:-${HF_HOME}/xet}"
	JOB_TMP_DIR="${JOB_TMP_DIR:-${LOCAL_WORK_DIR}/tmp}"
	BUILD_DIR="${BUILD_DIR:-${LOCAL_WORK_DIR}/build}"
	TOOL_DIR="${TOOL_DIR:-${LOCAL_WORK_DIR}/tools}"
	VENV_DIR="${VENV_DIR:-${LOCAL_WORK_DIR}/venv}"
	ARTIFACT_UPLOAD_SCRIPT="${ARTIFACT_UPLOAD_SCRIPT:-${LOCAL_WORK_DIR}/upload-package-artifact.py}"
	ARTIFACT_UPLOAD_HOOK="${ARTIFACT_UPLOAD_HOOK:-${LOCAL_WORK_DIR}/upload-package-artifact.sh}"
	CARGO_HOME="${CARGO_HOME:-${LOCAL_WORK_DIR}/cargo-home}"
	RUSTUP_HOME="${RUSTUP_HOME:-${LOCAL_WORK_DIR}/rustup-home}"
	CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-${LOCAL_WORK_DIR}/cargo-target}"
	XDG_CACHE_HOME="${XDG_CACHE_HOME:-${LOCAL_WORK_DIR}/xdg-cache}"
	PIP_CACHE_DIR="${PIP_CACHE_DIR:-${LOCAL_WORK_DIR}/pip-cache}"
	BUILD_TMP_DIR="${BUILD_TMP_DIR:-${LOCAL_WORK_DIR}/tmp}"
	TMPDIR="$BUILD_TMP_DIR"
	TEMP="$BUILD_TMP_DIR"
	TMP="$BUILD_TMP_DIR"
	export JOB_WORK_DIR PACKAGE_DIR HF_HOME HF_HUB_CACHE HF_XET_CACHE VENV_DIR ARTIFACT_UPLOAD_SCRIPT
	export TMPDIR TEMP TMP CARGO_HOME RUSTUP_HOME CARGO_TARGET_DIR XDG_CACHE_HOME PIP_CACHE_DIR

	cleanup_job_work_dir() {
	if [ -n "${LOCAL_WORK_DIR:-}" ]; then
	echo "Cleaning local work dir: ${LOCAL_WORK_DIR}"
	rm -rf "$LOCAL_WORK_DIR" \|\| true
	fi
	if [ "${CLEANUP_JOB_WORK_DIR}" = "true" ] && [ -n "${JOB_WORK_DIR:-}" ]; then
	echo "Cleaning job work dir: ${JOB_WORK_DIR}"
	rm -rf "$JOB_WORK_DIR" \|\| true
	fi
	}
	trap cleanup_job_work_dir EXIT

	log_storage_snapshot() {
	local label="$1"
	echo " Storage snapshot (${label}):"
	df -h / /bucket "$PACKAGE_DIR" "$TMPDIR" 2>/dev/null \|\| true
	echo " Mounts (${label}):"
	mount \| grep -E ' on / \| on /bucket ' \|\| true
	}

	on_error() {
	local status=$?
	local line=${BASH_LINENO[0]:-unknown}
	local command=${BASH_COMMAND:-unknown}
	echo "ERROR: split job command failed at line ${line} with status ${status}: ${command}" >&2
	log_storage_snapshot "error" >&2 \|\| true
	exit "$status"
	}
	trap on_error ERR

	start_heartbeat() {
	local label="$1"
	(
	while true; do
	sleep "${JOB_HEARTBEAT_SECONDS:-60}"
	echo " Heartbeat (${label}) $(date -u +%Y-%m-%dT%H:%M:%SZ)"
	df -h / /bucket "$PACKAGE_DIR" "$TMPDIR" 2>/dev/null \|\| true
	if [ -d "$PACKAGE_DIR" ]; then
	du -sh "$PACKAGE_DIR" 2>/dev/null \|\| true
	fi
	if [ -d "$HF_HUB_CACHE" ]; then
	du -sh "$HF_HUB_CACHE" 2>/dev/null \|\| true
	fi
	done
	) &
	HEARTBEAT_PID=$!
	}

	stop_heartbeat() {
	if [ -n "${HEARTBEAT_PID:-}" ]; then
	kill "$HEARTBEAT_PID" 2>/dev/null \|\| true
	wait "$HEARTBEAT_PID" 2>/dev/null \|\| true
	HEARTBEAT_PID=""
	fi
	}

	mkdir -p "$PACKAGE_DIR" "$HF_HUB_CACHE" "$HF_XET_CACHE" "$JOB_TMP_DIR" "$TOOL_DIR" \
	"$CARGO_HOME" "$RUSTUP_HOME" "$CARGO_TARGET_DIR" "$XDG_CACHE_HOME" "$PIP_CACHE_DIR" \
	"$BUILD_TMP_DIR"

	format_bytes() {
	python3 - "$1" <<'PYTHON'
	import sys
	value = float(int(sys.argv[1]))
	for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
	if value < 1024 or unit == "PiB":
	if unit == "B":
	print(f"{int(value)} {unit}")
	else:
	print(f"{value:.1f} {unit}")
	break
	value /= 1024
	PYTHON
	}

	estimate_bucket_workspace_bytes() {
	python3 - "$1" <<'PYTHON'
	import sys
	source = int(sys.argv[1])
	# Source and package artifacts are not meant to accumulate in the bucket. This
	# estimate is retained only as a fallback-source-cache warning when /source is
	# unavailable.
	headroom = 32 * 1024 ** 3
	print(source + headroom)
	PYTHON
	}

	# ─── Build tools ──────────────────────────────────────────────────────────
	echo "=== [1/9] Installing build dependencies ==="
	apt-get update -qq && apt-get install -y -qq \
	cmake git curl build-essential pkg-config libssl-dev \
	python3-pip python3-venv > /dev/null 2>&1
	apt-get clean
	rm -rf /var/lib/apt/lists/*

	echo "=== [2/9] Installing Rust ==="
	curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \| sh -s -- -y > /dev/null 2>&1
	source "${CARGO_HOME}/env"

	echo "=== [3/9] Cloning mesh-llm and building skippy-model-package ==="
	git clone --filter=blob:none https://github.com/Mesh-LLM/mesh-llm.git "$BUILD_DIR"
	cd "$BUILD_DIR"
	if git ls-remote --exit-code --heads origin "$MESH_LLM_REF" >/dev/null 2>&1 \|\| \
	git ls-remote --exit-code --tags origin "$MESH_LLM_REF" >/dev/null 2>&1; then
	git fetch --depth 1 origin "$MESH_LLM_REF"
	git checkout --detach FETCH_HEAD
	elif git cat-file -e "$MESH_LLM_REF^{commit}" 2>/dev/null; then
	git checkout --detach "$MESH_LLM_REF"
	else
	git fetch --depth 1 origin "$MESH_LLM_REF"
	git checkout --detach FETCH_HEAD
	fi

	# Full clone needed for git-am patches in prepare-llama
	sed -i 's/--filter=blob:none //' scripts/prepare-llama.sh
	echo " Running prepare-llama.sh..."
	scripts/prepare-llama.sh pinned 2>&1 \| tail -5
	echo " Running build-llama.sh..."
	scripts/build-llama.sh 2>&1 \| tail -5

	# Locate the llama.cpp build directory (build-llama.sh puts it here)
	LLAMA_BUILD_DIR=".deps/llama-build/build-stage-abi-cpu"
	echo " Verifying llama.cpp build at $LLAMA_BUILD_DIR..."
	find "$LLAMA_BUILD_DIR" -name "*.a" 2>/dev/null \| head -10 \|\| echo " WARNING: no .a files found"

	# Build the splitter binary
	echo " Building skippy-model-package..."
	SKIPPY_LLAMA_BUILD_DIR="$LLAMA_BUILD_DIR" \
	cargo build --release -p skippy-model-package 2>&1 \| tail -20
	SLICER="${CARGO_TARGET_DIR}/release/skippy-model-package"
	if [ ! -f "$SLICER" ]; then
	echo "ERROR: Build failed — binary not found at $SLICER"
	echo "Retrying with full output..."
	SKIPPY_LLAMA_BUILD_DIR=.deps/llama.cpp/build-stage-abi-static \
	cargo build --release -p skippy-model-package 2>&1
	exit 1
	fi
	cp "$SLICER" "${TOOL_DIR}/skippy-model-package"
	SLICER="${TOOL_DIR}/skippy-model-package"
	chmod +x "$SLICER"
	cd /
	rm -rf "$BUILD_DIR" "$CARGO_TARGET_DIR" "$CARGO_HOME" "$RUSTUP_HOME"
	TMPDIR="$JOB_TMP_DIR"
	TEMP="$JOB_TMP_DIR"
	TMP="$JOB_TMP_DIR"
	export TMPDIR TEMP TMP
	echo " ✓ Built: $SLICER"
	echo " Root filesystem after build cleanup:"
	df -h / \|\| true

	echo " Preparing Hugging Face uploader..."
	python3 -m venv "$VENV_DIR" > /dev/null
	"$VENV_DIR/bin/pip" install -q huggingface_hub
	"$VENV_DIR/bin/python3" << 'PYTHON'
	from huggingface_hub import HfApi
	import os

	api = HfApi(token=os.environ["HF_TOKEN"])
	api.create_repo(os.environ["TARGET_REPO"], exist_ok=True)
	PYTHON
	cat > "$ARTIFACT_UPLOAD_SCRIPT" <<'PYTHON'
	from huggingface_hub import HfApi
	from pathlib import Path
	import os

	path = Path(os.environ["SKIPPY_PACKAGE_ARTIFACT_PATH"])
	relative = os.environ["SKIPPY_PACKAGE_ARTIFACT_RELATIVE_PATH"]
	target_repo = os.environ["TARGET_REPO"]

	api = HfApi(token=os.environ["HF_TOKEN"])
	api.upload_file(
	repo_id=target_repo,
	path_or_fileobj=str(path),
	path_in_repo=relative,
	repo_type="model",
	commit_message=f"Add package artifact {relative}",
	)
	size = path.stat().st_size
	path.unlink()
	print(f" Uploaded and removed {relative} ({size} bytes)")
	PYTHON
	cat > "$ARTIFACT_UPLOAD_HOOK" <<'BASH'
	#!/bin/bash
	set -euo pipefail
	"${VENV_DIR}/bin/python3" "${ARTIFACT_UPLOAD_SCRIPT}"
	BASH
	chmod +x "$ARTIFACT_UPLOAD_HOOK"

	# ─── Split ────────────────────────────────────────────────────────────────
	echo ""
	echo "=== [4/9] Splitting model ==="
	if [ "$SOURCE_REVISION" = "main" ]; then
	SOURCE_REF="${SOURCE_REPO}:${SOURCE_QUANT}"
	else
	SOURCE_REF="${SOURCE_REPO}@${SOURCE_REVISION}:${SOURCE_QUANT}"
	fi
	echo " Source ref: $SOURCE_REF"
	if [ -n "${SOURCE_TOTAL_BYTES:-}" ]; then
	echo " Source bytes: $SOURCE_TOTAL_BYTES"
	ESTIMATED_BUCKET_BYTES="$(estimate_bucket_workspace_bytes "$SOURCE_TOTAL_BYTES")"
	echo " Estimated fallback /bucket cache needed: $(format_bytes "$ESTIMATED_BUCKET_BYTES")"
	fi
	MOUNTED_SOURCE_PATH="/source/${SOURCE_FILE}"
	if [ -f "$MOUNTED_SOURCE_PATH" ]; then
	WRITE_PACKAGE_INPUT="$MOUNTED_SOURCE_PATH"
	WRITE_PACKAGE_IDENTITY_ARGS=(
	--model-id "$MODEL_ID"
	--source-repo "$SOURCE_REPO"
	--source-revision "$SOURCE_REVISION"
	--source-file "$SOURCE_FILE"
	)
	echo " Source mount: $MOUNTED_SOURCE_PATH"
	else
	WRITE_PACKAGE_INPUT="$SOURCE_REF"
	WRITE_PACKAGE_IDENTITY_ARGS=()
	echo " Source mount: not available; falling back to Hugging Face cache download"
	fi
	echo " Hugging Face cache: $HF_HUB_CACHE"
	echo " Package workspace: $PACKAGE_DIR"
	echo " Temporary workspace: $TMPDIR"
	log_storage_snapshot "before write-package"
	ROOT_FS="$(df -P / \| awk 'NR==2 {print $1}')"
	PACKAGE_FS="$(df -P "$PACKAGE_DIR" \| awk 'NR==2 {print $1}')"
	if [ -n "$ROOT_FS" ] && [ "$ROOT_FS" = "$PACKAGE_FS" ]; then
	echo "WARNING: package workspace is on the container root filesystem; very large splits may hit the HF Jobs 50G ephemeral storage limit." >&2
	fi
	if [ -n "${ESTIMATED_BUCKET_BYTES:-}" ]; then
	PACKAGE_AVAILABLE_BYTES="$(df -Pk "$PACKAGE_DIR" \| awk 'NR==2 {printf "%.0f", $4 * 1024}')"
	if [ -n "$PACKAGE_AVAILABLE_BYTES" ] && [ "$PACKAGE_AVAILABLE_BYTES" -gt 0 ] && \
	[ "$PACKAGE_AVAILABLE_BYTES" -lt "$ESTIMATED_BUCKET_BYTES" ]; then
	echo "WARNING: package workspace has $(format_bytes "$PACKAGE_AVAILABLE_BYTES") available, below estimated need $(format_bytes "$ESTIMATED_BUCKET_BYTES")." >&2
	fi
	fi
	echo " Starting write-package at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
	start_heartbeat "write-package"
	set +e
	time "$SLICER" write-package "$WRITE_PACKAGE_INPUT" \
	--out-dir "$PACKAGE_DIR" \
	--after-artifact-command "$ARTIFACT_UPLOAD_HOOK" \
	"${WRITE_PACKAGE_IDENTITY_ARGS[@]}"
	WRITE_PACKAGE_STATUS=$?
	set -e
	stop_heartbeat
	if [ "$WRITE_PACKAGE_STATUS" -ne 0 ]; then
	echo "ERROR: write-package failed with status $WRITE_PACKAGE_STATUS" >&2
	log_storage_snapshot "write-package failed" >&2 \|\| true
	exit "$WRITE_PACKAGE_STATUS"
	fi
	echo " Finished write-package at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
	log_storage_snapshot "after write-package"

	SOURCE_PATH="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(m['source_model']['path'])")"
	echo " Cached source: $SOURCE_PATH ($(du -h "$SOURCE_PATH" \| cut -f1))"

	LAYER_COUNT="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(m['layer_count'])")"
	TOTAL_SIZE="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(sum(int(a.get('artifact_bytes') or 0) for a in list(m['shared'].values()) + m.get('layers', []) + m.get('projectors', [])))")"
	TOTAL_SIZE_LABEL="$(format_bytes "$TOTAL_SIZE")"
	echo " ✓ Split into $LAYER_COUNT layers; artifacts uploaded incrementally (${TOTAL_SIZE_LABEL} total)"

	# ─── Verify manifest ──────────────────────────────────────────────────────
	echo ""
	echo "=== [5/9] Verifying package manifest ==="
	"$VENV_DIR/bin/python3" << 'PYTHON'
	import json
	import os
	from pathlib import Path

	manifest_path = Path(os.environ["PACKAGE_DIR"]) / "model-package.json"
	manifest = json.loads(manifest_path.read_text())
	required = [
	manifest["shared"]["metadata"],
	manifest["shared"]["embeddings"],
	manifest["shared"]["output"],
	*manifest.get("layers", []),
	*manifest.get("projectors", []),
	]
	missing = [artifact for artifact in required if not artifact.get("path") or not artifact.get("sha256")]
	if missing:
	raise SystemExit(f"manifest contains {len(missing)} artifacts without path/checksum")
	print(f" ✓ Manifest records {len(required)} uploaded artifacts")
	PYTHON

	# ─── Publish ──────────────────────────────────────────────────────────────
	echo ""
	echo "=== [6/9] Publishing to HuggingFace ==="
	"$VENV_DIR/bin/python3" << PYTHON
	from huggingface_hub import HfApi
	import os, json
	from pathlib import Path

	api = HfApi(token=os.environ['HF_TOKEN'])
	target_repo = os.environ['TARGET_REPO']
	source_repo = os.environ['SOURCE_REPO']
	model_id = os.environ.get('MODEL_ID', '')
	manifest_path = Path(os.environ['PACKAGE_DIR']) / 'model-package.json'

	api.upload_file(
	repo_id=target_repo,
	path_or_fileobj=str(manifest_path),
	path_in_repo='model-package.json',
	repo_type='model',
	commit_message=f'Add layer package manifest from {source_repo} ({model_id})',
	)

	# Print summary
	manifest = json.load(open(manifest_path))
	print(f' ✓ Published: https://huggingface.co/{target_repo}')
	print(f' Model: {manifest["model_id"]}')
	print(f' Layers: {manifest["layer_count"]}')
	print(f' Schema: {manifest["schema_version"]}')
	PYTHON

	# ─── Update catalog ───────────────────────────────────────────────────────
	echo ""
	echo "=== [7/9] Updating meshllm/catalog ==="
	"$VENV_DIR/bin/python3" << 'PYTHON'
	from huggingface_hub import HfApi
	import os, json, tempfile

	api = HfApi(token=os.environ['HF_TOKEN'])
	source_repo = os.environ['SOURCE_REPO']
	target_repo = os.environ['TARGET_REPO']
	source_file = os.environ['SOURCE_FILE']
	source_revision = os.environ.get('SOURCE_REVISION', 'main')
	model_id = os.environ.get('MODEL_ID', '')
	package_dir = os.environ['PACKAGE_DIR']

	# Read manifest for metadata
	manifest = json.load(open(os.path.join(package_dir, 'model-package.json')))
	layer_count = manifest['layer_count']

	# Determine catalog entry path: entries/<owner>/<repo-name>.json
	owner, repo_name = source_repo.split('/', 1)
	entry_path = f"entries/{owner}/{repo_name}.json"

	# Try to fetch existing entry
	catalog_repo = "meshllm/catalog"
	try:
	existing_path = api.hf_hub_download(
	repo_id=catalog_repo,
	filename=entry_path,
	repo_type="dataset",
	)
	entry = json.load(open(existing_path))
	except Exception:
	# Create new entry
	entry = {"schema_version": 1, "source_repo": source_repo, "variants": {}}

	# Build variant name from source file stem (not MODEL_ID).
	# For "UD-Q4_K_XL/Qwen3-32B-UD-Q4_K_XL-00001-of-00002.gguf" → "Qwen3-32B-UD-Q4_K_XL"
	import re
	file_stem = source_file.split('/')[-1].replace('.gguf', '')
	# Strip shard suffix like "-00001-of-00002"
	variant_name = re.sub(r'-\d{5}-of-\d{5}$', '', file_stem)

	package_entry = {
	"type": "layer-package",
	"repo": target_repo,
	"layer_count": layer_count,
	}

	# Handle both dict-style and list-style variants
	variants = entry.get("variants", {})
	if isinstance(variants, dict):
	# Dict-keyed by variant name (existing catalog format)
	if variant_name in variants:
	packages = variants[variant_name].get("packages", [])
	packages = [p for p in packages if p.get("repo") != target_repo]
	packages.append(package_entry)
	variants[variant_name]["packages"] = packages
	else:
	variants[variant_name] = {
	"source": {
	"repo": source_repo,
	"file": source_file,
	"revision": source_revision,
	},
	"curated": {
	"name": variant_name,
	"size": f"{layer_count} layers",
	"description": f"Layer package for {model_id}",
	},
	"packages": [package_entry],
	}
	entry["variants"] = variants
	else:
	# List-style (fallback)
	existing_variant = None
	for v in variants:
	if v.get("curated", {}).get("name") == variant_name:
	existing_variant = v
	break
	if existing_variant:
	packages = existing_variant.get("packages", [])
	packages = [p for p in packages if p.get("repo") != target_repo]
	packages.append(package_entry)
	existing_variant["packages"] = packages
	else:
	variants.append({
	"source": {
	"repo": source_repo,
	"file": source_file,
	"revision": source_revision,
	},
	"curated": {
	"name": variant_name,
	"size": f"{layer_count} layers",
	"description": f"Layer package for {model_id}",
	},
	"packages": [package_entry],
	})

	# Write and upload
	with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
	json.dump(entry, f, indent=2)
	tmp_path = f.name

	create_pr = os.environ.get('CATALOG_CREATE_PR', 'false').lower() == 'true'

	api.upload_file(
	repo_id=catalog_repo,
	path_or_fileobj=tmp_path,
	path_in_repo=entry_path,
	repo_type="dataset",
	commit_message=f"Add layer package for {model_id} ({target_repo})",
	create_pr=create_pr,
	)
	print(f" ✓ Catalog updated: {catalog_repo}/{entry_path}")
	print(f" Variant: {variant_name}")
	print(f" Package: {target_repo} ({layer_count} layers)")
	PYTHON

	# ─── Model Card ────────────────────────────────────────────────────────────
	echo ""
	echo "=== [8/9] Uploading model card ==="
	"$VENV_DIR/bin/python3" << 'PYTHON'
	from huggingface_hub import HfApi
	from pathlib import Path
	import hashlib
	import json
	import os

	package_dir = Path(os.environ["PACKAGE_DIR"])
	manifest_path = package_dir / "model-package.json"
	manifest = json.loads(manifest_path.read_text())

	source_repo = os.environ["SOURCE_REPO"]
	source_file = os.environ["SOURCE_FILE"]
	source_revision = os.environ.get("SOURCE_REVISION", "main")
	target_repo = os.environ["TARGET_REPO"]
	model_id = os.environ.get("MODEL_ID", manifest.get("model_id", target_repo))
	mesh_llm_ref = os.environ.get("MESH_LLM_REF", "main")

	def sha256(path: Path) -> str:
	digest = hashlib.sha256()
	with path.open("rb") as file:
	for chunk in iter(lambda: file.read(1024 * 1024), b""):
	digest.update(chunk)
	return digest.hexdigest()

	def fmt_bytes(size: int) -> str:
	value = float(size)
	for unit in ["B", "KB", "MB", "GB", "TB"]:
	if value < 1024 or unit == "TB":
	if unit == "B":
	return f"{int(value)} {unit}"
	return f"{value:.1f} {unit}"
	value /= 1024

	def artifact_bytes(artifact: dict) -> int:
	return int(artifact.get("artifact_bytes") or 0)

	def md_cell(value) -> str:
	text = "" if value is None else str(value)
	return text.replace("\|", "\\\|").replace("\n", "<br>")

	def link(label: str, url: str) -> str:
	return f"[{md_cell(label)}]({url})"

	def code(value) -> str:
	return f"`{md_cell(value)}`"

	def yaml_quote(value: str) -> str:
	return json.dumps(value)

	def infer_model_family(name: str) -> str:
	lowered = name.lower()
	for family in ["Qwen3", "Qwen2.5", "DeepSeek", "Kimi", "Gemma", "GLM", "Llama"]:
	if family.lower() in lowered:
	return family
	return name.split("-")[0] if name else "Unknown"

	def infer_parameter_scale(name: str) -> str:
	import re
	match = re.search(r"(?i)(\d+(?:\.\d+)?[BM](?:-A\d+(?:\.\d+)?B)?)", name)
	return match.group(1) if match else "not recorded"

	def infer_quantization(name: str, source_path: str) -> str:
	import re
	combined = f"{name}/{source_path}"
	patterns = [
	r"UD-Q\d+_[A-Z]+(?:_[A-Z]+)?",
	r"Q\d+_[A-Z]+(?:_[A-Z]+)?",
	r"IQ\d+_[A-Z]+(?:_[A-Z]+)?",
	r"BF16",
	r"F16",
	]
	for pattern in patterns:
	match = re.search(pattern, combined, re.IGNORECASE)
	if match:
	return match.group(0)
	return "not recorded"

	shared = manifest.get("shared", {})
	layers = manifest.get("layers", [])
	projectors = manifest.get("projectors", [])
	manifest_hash = sha256(manifest_path)
	total_bytes = sum(artifact_bytes(artifact) for artifact in shared.values())
	total_bytes += sum(artifact_bytes(layer) for layer in layers)
	total_bytes += sum(artifact_bytes(projector) for projector in projectors)

	source_model = manifest.get("source_model", {})
	display_name = source_model.get("distribution_id") or model_id
	model_family = infer_model_family(display_name)
	parameter_scale = infer_parameter_scale(display_name)
	quantization = infer_quantization(display_name, source_file)
	source_path = source_model.get("path") or f"/hf-cache/{source_file}"
	activation_width = manifest.get("activation_width") or "not recorded"
	skippy_abi = manifest.get("skippy_abi_version") or "not recorded"
	source_sha = source_model.get("sha256") or "not recorded"
	canonical_ref = source_model.get("canonical_ref") or f"{source_repo}@{source_revision}/{source_file}"

	file_rows = [
	("Manifest", "model-package.json", "Package schema, source identity, checksums", manifest_hash),
	]
	for label, key in [
	("Metadata", "metadata"),
	("Embeddings", "embeddings"),
	("Output head", "output"),
	]:
	artifact = shared.get(key)
	if artifact:
	file_rows.append((
	label,
	artifact.get("path", f"shared/{key}.gguf"),
	f"{artifact.get('tensor_count', 'unknown')} tensors, {fmt_bytes(artifact_bytes(artifact))}",
	artifact.get("sha256", "not recorded"),
	))
	if layers:
	layer_bytes = sum(artifact_bytes(layer) for layer in layers)
	layer_tensors = sum(int(layer.get("tensor_count") or 0) for layer in layers)
	file_rows.append((
	"Transformer layers",
	"layers/layer-*.gguf",
	f"{len(layers)} layer artifacts, {layer_tensors} tensors, {fmt_bytes(layer_bytes)}",
	"see model-package.json",
	))
	for projector in projectors:
	file_rows.append((
	"Projector",
	projector.get("path", "projectors/projector.gguf"),
	f"{projector.get('kind', 'multimodal')} projector, {fmt_bytes(artifact_bytes(projector))}",
	projector.get("sha256", "not recorded"),
	))

	rows = [
	("Source model", link(source_repo, f"https://huggingface.co/{source_repo}")),
	("Model id", code(model_id)),
	("Family", model_family),
	("Parameter scale", parameter_scale),
	("Quantization", code(quantization)),
	("Layer count", manifest.get("layer_count", len(layers))),
	("Activation width", activation_width),
	("Package size", fmt_bytes(total_bytes)),
	("Source file", code(source_file)),
	("Package repo", link(target_repo, f"https://huggingface.co/{target_repo}")),
	]

	readme = f"""---
	library_name: mesh-llm
	base_model:
	- {yaml_quote(source_repo)}
	pipeline_tag: text-generation
	tags:
	- gguf
	- mesh-llm
	- layer-package
	- skippy
	- distributed-inference
	- local-inference
	- openai-compatible
	---

	<div align="center">
	<a href="https://www.meshllm.cloud">
	<img src="https://github.com/Mesh-LLM/mesh-llm/raw/main/docs/mesh-llm-logo.svg" alt="Mesh LLM" width="220">
	</a>

	<h1>{display_name}</h1>

	<p>
	<strong>Distributed GGUF inference package for Mesh LLM</strong>
	</p>

	<p>
	<a href="https://www.meshllm.cloud"><img alt="Website" src="https://img.shields.io/badge/Website-meshllm.cloud-111111?style=for-the-badge"></a>
	<a href="https://github.com/Mesh-LLM/mesh-llm"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-Mesh--LLM-24292f?style=for-the-badge&logo=github"></a>
	<a href="https://discord.gg/rs6fmc63eN"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join-5865F2?style=for-the-badge&logo=discord&logoColor=white"></a>
	</p>
	</div>

	GGUF layer package for running {display_name} across a local Mesh LLM cluster.

	This package is derived from [{source_repo}](https://huggingface.co/{source_repo}) and keeps the original GGUF distribution split into per-layer artifacts for distributed inference.

	## Highlights

	\| Run locally \| Pool multiple machines \| OpenAI-compatible \| Package variant \|
	\|---\|---\|---\|---\|
	\| Private inference on your hardware \| Split layers across peers \| Serve `/v1/chat/completions` locally \| `{quantization}` layer package \|

	## Model Overview

	\| Property \| Value \|
	\|---\|---\|
	"""

	for key, value in rows:
	readme += f"\| {md_cell(key)} \| {md_cell(value)} \|\n"

	readme += f"""
	## Recommended Use

	- Local and private inference with Mesh LLM.
	- Multi-machine serving when the full GGUF is too large for one host.
	- OpenAI-compatible chat/completions workflows through Mesh LLM's local API.

	For upstream architecture details, chat template guidance, sampling recommendations, license terms, and benchmark notes, see the source model card: [{source_repo}](https://huggingface.co/{source_repo}).

	## Quickstart

	```bash
	# Run this on each machine that should contribute memory/compute.
	mesh-llm serve --model "{target_repo}" --split
	```

	```bash
	# Check the mesh and discover the OpenAI-compatible model name.
	curl -s http://localhost:3131/api/status
	curl -s http://localhost:3131/v1/models
	```

	```bash
	# Send an OpenAI-compatible chat request.
	curl -s http://localhost:3131/v1/chat/completions \\
	-H "Content-Type: application/json" \\
	-d '{{
	"model": "{model_id}",
	"messages": [{{"role": "user", "content": "Write a tiny hello-world function in Rust."}}],
	"max_tokens": 128
	}}'
	```

	## Package Variant

	\| Property \| Value \|
	\|---\|---\|
	"""

	for key, value in [
	("Format", code(manifest.get("format", "layer-package"))),
	("Canonical source ref", code(canonical_ref)),
	("Source revision", code(source_revision)),
	("Source SHA-256", code(source_sha)),
	("Skippy ABI", code(skippy_abi)),
	("Package manifest SHA-256", code(manifest_hash)),
	]:
	readme += f"\| {md_cell(key)} \| {md_cell(value)} \|\n"

	readme += f"""
	## What Is Included

	\| Artifact \| Path \| Contents \| SHA-256 \|
	\|---\|---\|---\|---\|
	"""

	for label, path, contents, checksum in file_rows:
	readme += f"\| {md_cell(label)} \| {code(path)} \| {md_cell(contents)} \| {code(checksum)} \|\n"

	readme += f"""
	## Validation

	Generated by the Mesh LLM HF Jobs splitter from `mesh-llm` ref `{mesh_llm_ref}`.
	Each artifact is checksummed as it is written, uploaded to this repository, and removed from the job workspace before the next artifact is produced.

	```bash
	skippy-model-package write-package "{source_path}" --out-dir "{package_dir}"
	```

	## Links

	- Source model: [{source_repo}](https://huggingface.co/{source_repo})
	- Mesh LLM website: [meshllm.cloud](https://www.meshllm.cloud)
	- Mesh LLM: [github.com/Mesh-LLM/mesh-llm](https://github.com/Mesh-LLM/mesh-llm)
	- Discord: [discord.gg/rs6fmc63eN](https://discord.gg/rs6fmc63eN)
	- Package catalog: [meshllm/catalog](https://huggingface.co/datasets/meshllm/catalog)
	- Package format: [layer-package-repos.md](https://github.com/Mesh-LLM/mesh-llm/blob/main/docs/specs/layer-package-repos.md)
	"""

	Path("/tmp/README.md").write_text(readme)

	api = HfApi(token=os.environ["HF_TOKEN"])
	api.upload_file(
	path_or_fileobj="/tmp/README.md",
	path_in_repo="README.md",
	repo_id=target_repo,
	repo_type="model",
	)
	print(" ✓ Model card uploaded")
	PYTHON

	# ─── Summary ──────────────────────────────────────────────────────────────
	echo ""
	echo "=== [9/9] Done ==="
	echo ""
	echo " Published: https://huggingface.co/${TARGET_REPO}"
	echo " Layers: ${LAYER_COUNT}"
	echo " Total size: ${TOTAL_SIZE_LABEL}"
	echo ""
	echo " Use with mesh-llm:"
	echo " mesh-llm serve --model ${TARGET_REPO} --split"

Xet Storage Details

Size:: 30.4 kB
Xet hash:: 5187b9471fd62ba1956d6d4d238e990bb55c979b81ae49c602ccec5a0e5d5e37

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.