Buckets:

meshllm/layer-split-output / split-model-job.sh
jamesdumay's picture
download
raw
30.4 kB
#!/bin/bash
set -euo pipefail
# This script runs inside an HF Job container.
# It clones mesh-llm, builds the splitter, splits the model, validates, and publishes.
#
# Environment variables (set by mesh-llm model-package job spec):
# SOURCE_REPO, SOURCE_FILE, SOURCE_QUANT, TARGET_REPO, MODEL_ID, SOURCE_REVISION
# MESH_LLM_REF — git ref to build from (default: main)
# CATALOG_CREATE_PR — "true" to open a PR for catalog updates (non-org members)
# HF_TOKEN — injected as a secret by HF Jobs
#
# Volumes:
# /bucket — writable storage bucket for script and fallback source cache
MESH_LLM_REF="${MESH_LLM_REF:-main}"
SOURCE_REVISION="${SOURCE_REVISION:-main}"
SOURCE_QUANT="${SOURCE_QUANT:-}"
if [ -z "$SOURCE_QUANT" ] && [[ "${MODEL_ID:-}" == *:* ]]; then
SOURCE_QUANT="${MODEL_ID##*:}"
fi
if [ -z "$SOURCE_QUANT" ]; then
echo "ERROR: SOURCE_QUANT is required to resolve the source GGUF without a model volume" >&2
exit 1
fi
echo "╔══════════════════════════════════════════════════════════╗"
echo "║ Layer Package Split Job ║"
echo "╠══════════════════════════════════════════════════════════╣"
echo "║ Source: ${SOURCE_REPO}/${SOURCE_FILE}"
echo "║ Quant: ${SOURCE_QUANT}"
echo "║ Target: ${TARGET_REPO}"
echo "║ Model: ${MODEL_ID}"
echo "║ Build: mesh-llm @ ${MESH_LLM_REF}"
echo "╚══════════════════════════════════════════════════════════╝"
echo ""
# Keep executable toolchains/build products on local ephemeral storage:
# HF bucket mounts can be unsuitable for dynamic loader/toolchain execution.
# Package artifacts are also written locally, uploaded one at a time, and
# removed immediately so the job never accumulates a full 400GB+ package.
JOB_WORK_ROOT="${JOB_WORK_ROOT:-/bucket/job-work}"
SAFE_TARGET_REPO="$(printf '%s' "$TARGET_REPO" | tr -c '[:alnum:]._-' '_')"
LOCAL_WORK_DIR="${LOCAL_WORK_DIR:-/tmp/meshllm-layer-job-${SAFE_TARGET_REPO}-$$}"
if [ -z "${JOB_WORK_DIR:-}" ]; then
JOB_WORK_DIR="${JOB_WORK_ROOT}/${SAFE_TARGET_REPO}-$(date +%Y%m%d%H%M%S)-$$"
CLEANUP_JOB_WORK_DIR="${CLEANUP_JOB_WORK_DIR:-true}"
else
CLEANUP_JOB_WORK_DIR="${CLEANUP_JOB_WORK_DIR:-false}"
fi
PACKAGE_DIR="${PACKAGE_DIR:-${LOCAL_WORK_DIR}/package}"
HF_HOME="${HF_HOME:-${JOB_WORK_DIR}/hf-home}"
HF_HUB_CACHE="${HF_HUB_CACHE:-${HF_HOME}/hub}"
HF_XET_CACHE="${HF_XET_CACHE:-${HF_HOME}/xet}"
JOB_TMP_DIR="${JOB_TMP_DIR:-${LOCAL_WORK_DIR}/tmp}"
BUILD_DIR="${BUILD_DIR:-${LOCAL_WORK_DIR}/build}"
TOOL_DIR="${TOOL_DIR:-${LOCAL_WORK_DIR}/tools}"
VENV_DIR="${VENV_DIR:-${LOCAL_WORK_DIR}/venv}"
ARTIFACT_UPLOAD_SCRIPT="${ARTIFACT_UPLOAD_SCRIPT:-${LOCAL_WORK_DIR}/upload-package-artifact.py}"
ARTIFACT_UPLOAD_HOOK="${ARTIFACT_UPLOAD_HOOK:-${LOCAL_WORK_DIR}/upload-package-artifact.sh}"
CARGO_HOME="${CARGO_HOME:-${LOCAL_WORK_DIR}/cargo-home}"
RUSTUP_HOME="${RUSTUP_HOME:-${LOCAL_WORK_DIR}/rustup-home}"
CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-${LOCAL_WORK_DIR}/cargo-target}"
XDG_CACHE_HOME="${XDG_CACHE_HOME:-${LOCAL_WORK_DIR}/xdg-cache}"
PIP_CACHE_DIR="${PIP_CACHE_DIR:-${LOCAL_WORK_DIR}/pip-cache}"
BUILD_TMP_DIR="${BUILD_TMP_DIR:-${LOCAL_WORK_DIR}/tmp}"
TMPDIR="$BUILD_TMP_DIR"
TEMP="$BUILD_TMP_DIR"
TMP="$BUILD_TMP_DIR"
export JOB_WORK_DIR PACKAGE_DIR HF_HOME HF_HUB_CACHE HF_XET_CACHE VENV_DIR ARTIFACT_UPLOAD_SCRIPT
export TMPDIR TEMP TMP CARGO_HOME RUSTUP_HOME CARGO_TARGET_DIR XDG_CACHE_HOME PIP_CACHE_DIR
cleanup_job_work_dir() {
if [ -n "${LOCAL_WORK_DIR:-}" ]; then
echo "Cleaning local work dir: ${LOCAL_WORK_DIR}"
rm -rf "$LOCAL_WORK_DIR" || true
fi
if [ "${CLEANUP_JOB_WORK_DIR}" = "true" ] && [ -n "${JOB_WORK_DIR:-}" ]; then
echo "Cleaning job work dir: ${JOB_WORK_DIR}"
rm -rf "$JOB_WORK_DIR" || true
fi
}
trap cleanup_job_work_dir EXIT
log_storage_snapshot() {
local label="$1"
echo " Storage snapshot (${label}):"
df -h / /bucket "$PACKAGE_DIR" "$TMPDIR" 2>/dev/null || true
echo " Mounts (${label}):"
mount | grep -E ' on / | on /bucket ' || true
}
on_error() {
local status=$?
local line=${BASH_LINENO[0]:-unknown}
local command=${BASH_COMMAND:-unknown}
echo "ERROR: split job command failed at line ${line} with status ${status}: ${command}" >&2
log_storage_snapshot "error" >&2 || true
exit "$status"
}
trap on_error ERR
start_heartbeat() {
local label="$1"
(
while true; do
sleep "${JOB_HEARTBEAT_SECONDS:-60}"
echo " Heartbeat (${label}) $(date -u +%Y-%m-%dT%H:%M:%SZ)"
df -h / /bucket "$PACKAGE_DIR" "$TMPDIR" 2>/dev/null || true
if [ -d "$PACKAGE_DIR" ]; then
du -sh "$PACKAGE_DIR" 2>/dev/null || true
fi
if [ -d "$HF_HUB_CACHE" ]; then
du -sh "$HF_HUB_CACHE" 2>/dev/null || true
fi
done
) &
HEARTBEAT_PID=$!
}
stop_heartbeat() {
if [ -n "${HEARTBEAT_PID:-}" ]; then
kill "$HEARTBEAT_PID" 2>/dev/null || true
wait "$HEARTBEAT_PID" 2>/dev/null || true
HEARTBEAT_PID=""
fi
}
mkdir -p "$PACKAGE_DIR" "$HF_HUB_CACHE" "$HF_XET_CACHE" "$JOB_TMP_DIR" "$TOOL_DIR" \
"$CARGO_HOME" "$RUSTUP_HOME" "$CARGO_TARGET_DIR" "$XDG_CACHE_HOME" "$PIP_CACHE_DIR" \
"$BUILD_TMP_DIR"
format_bytes() {
python3 - "$1" <<'PYTHON'
import sys
value = float(int(sys.argv[1]))
for unit in ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]:
if value < 1024 or unit == "PiB":
if unit == "B":
print(f"{int(value)} {unit}")
else:
print(f"{value:.1f} {unit}")
break
value /= 1024
PYTHON
}
estimate_bucket_workspace_bytes() {
python3 - "$1" <<'PYTHON'
import sys
source = int(sys.argv[1])
# Source and package artifacts are not meant to accumulate in the bucket. This
# estimate is retained only as a fallback-source-cache warning when /source is
# unavailable.
headroom = 32 * 1024 ** 3
print(source + headroom)
PYTHON
}
# ─── Build tools ──────────────────────────────────────────────────────────
echo "=== [1/9] Installing build dependencies ==="
apt-get update -qq && apt-get install -y -qq \
cmake git curl build-essential pkg-config libssl-dev \
python3-pip python3-venv > /dev/null 2>&1
apt-get clean
rm -rf /var/lib/apt/lists/*
echo "=== [2/9] Installing Rust ==="
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y > /dev/null 2>&1
source "${CARGO_HOME}/env"
echo "=== [3/9] Cloning mesh-llm and building skippy-model-package ==="
git clone --filter=blob:none https://github.com/Mesh-LLM/mesh-llm.git "$BUILD_DIR"
cd "$BUILD_DIR"
if git ls-remote --exit-code --heads origin "$MESH_LLM_REF" >/dev/null 2>&1 || \
git ls-remote --exit-code --tags origin "$MESH_LLM_REF" >/dev/null 2>&1; then
git fetch --depth 1 origin "$MESH_LLM_REF"
git checkout --detach FETCH_HEAD
elif git cat-file -e "$MESH_LLM_REF^{commit}" 2>/dev/null; then
git checkout --detach "$MESH_LLM_REF"
else
git fetch --depth 1 origin "$MESH_LLM_REF"
git checkout --detach FETCH_HEAD
fi
# Full clone needed for git-am patches in prepare-llama
sed -i 's/--filter=blob:none //' scripts/prepare-llama.sh
echo " Running prepare-llama.sh..."
scripts/prepare-llama.sh pinned 2>&1 | tail -5
echo " Running build-llama.sh..."
scripts/build-llama.sh 2>&1 | tail -5
# Locate the llama.cpp build directory (build-llama.sh puts it here)
LLAMA_BUILD_DIR=".deps/llama-build/build-stage-abi-cpu"
echo " Verifying llama.cpp build at $LLAMA_BUILD_DIR..."
find "$LLAMA_BUILD_DIR" -name "*.a" 2>/dev/null | head -10 || echo " WARNING: no .a files found"
# Build the splitter binary
echo " Building skippy-model-package..."
SKIPPY_LLAMA_BUILD_DIR="$LLAMA_BUILD_DIR" \
cargo build --release -p skippy-model-package 2>&1 | tail -20
SLICER="${CARGO_TARGET_DIR}/release/skippy-model-package"
if [ ! -f "$SLICER" ]; then
echo "ERROR: Build failed — binary not found at $SLICER"
echo "Retrying with full output..."
SKIPPY_LLAMA_BUILD_DIR=.deps/llama.cpp/build-stage-abi-static \
cargo build --release -p skippy-model-package 2>&1
exit 1
fi
cp "$SLICER" "${TOOL_DIR}/skippy-model-package"
SLICER="${TOOL_DIR}/skippy-model-package"
chmod +x "$SLICER"
cd /
rm -rf "$BUILD_DIR" "$CARGO_TARGET_DIR" "$CARGO_HOME" "$RUSTUP_HOME"
TMPDIR="$JOB_TMP_DIR"
TEMP="$JOB_TMP_DIR"
TMP="$JOB_TMP_DIR"
export TMPDIR TEMP TMP
echo " ✓ Built: $SLICER"
echo " Root filesystem after build cleanup:"
df -h / || true
echo " Preparing Hugging Face uploader..."
python3 -m venv "$VENV_DIR" > /dev/null
"$VENV_DIR/bin/pip" install -q huggingface_hub
"$VENV_DIR/bin/python3" << 'PYTHON'
from huggingface_hub import HfApi
import os
api = HfApi(token=os.environ["HF_TOKEN"])
api.create_repo(os.environ["TARGET_REPO"], exist_ok=True)
PYTHON
cat > "$ARTIFACT_UPLOAD_SCRIPT" <<'PYTHON'
from huggingface_hub import HfApi
from pathlib import Path
import os
path = Path(os.environ["SKIPPY_PACKAGE_ARTIFACT_PATH"])
relative = os.environ["SKIPPY_PACKAGE_ARTIFACT_RELATIVE_PATH"]
target_repo = os.environ["TARGET_REPO"]
api = HfApi(token=os.environ["HF_TOKEN"])
api.upload_file(
repo_id=target_repo,
path_or_fileobj=str(path),
path_in_repo=relative,
repo_type="model",
commit_message=f"Add package artifact {relative}",
)
size = path.stat().st_size
path.unlink()
print(f" Uploaded and removed {relative} ({size} bytes)")
PYTHON
cat > "$ARTIFACT_UPLOAD_HOOK" <<'BASH'
#!/bin/bash
set -euo pipefail
"${VENV_DIR}/bin/python3" "${ARTIFACT_UPLOAD_SCRIPT}"
BASH
chmod +x "$ARTIFACT_UPLOAD_HOOK"
# ─── Split ────────────────────────────────────────────────────────────────
echo ""
echo "=== [4/9] Splitting model ==="
if [ "$SOURCE_REVISION" = "main" ]; then
SOURCE_REF="${SOURCE_REPO}:${SOURCE_QUANT}"
else
SOURCE_REF="${SOURCE_REPO}@${SOURCE_REVISION}:${SOURCE_QUANT}"
fi
echo " Source ref: $SOURCE_REF"
if [ -n "${SOURCE_TOTAL_BYTES:-}" ]; then
echo " Source bytes: $SOURCE_TOTAL_BYTES"
ESTIMATED_BUCKET_BYTES="$(estimate_bucket_workspace_bytes "$SOURCE_TOTAL_BYTES")"
echo " Estimated fallback /bucket cache needed: $(format_bytes "$ESTIMATED_BUCKET_BYTES")"
fi
MOUNTED_SOURCE_PATH="/source/${SOURCE_FILE}"
if [ -f "$MOUNTED_SOURCE_PATH" ]; then
WRITE_PACKAGE_INPUT="$MOUNTED_SOURCE_PATH"
WRITE_PACKAGE_IDENTITY_ARGS=(
--model-id "$MODEL_ID"
--source-repo "$SOURCE_REPO"
--source-revision "$SOURCE_REVISION"
--source-file "$SOURCE_FILE"
)
echo " Source mount: $MOUNTED_SOURCE_PATH"
else
WRITE_PACKAGE_INPUT="$SOURCE_REF"
WRITE_PACKAGE_IDENTITY_ARGS=()
echo " Source mount: not available; falling back to Hugging Face cache download"
fi
echo " Hugging Face cache: $HF_HUB_CACHE"
echo " Package workspace: $PACKAGE_DIR"
echo " Temporary workspace: $TMPDIR"
log_storage_snapshot "before write-package"
ROOT_FS="$(df -P / | awk 'NR==2 {print $1}')"
PACKAGE_FS="$(df -P "$PACKAGE_DIR" | awk 'NR==2 {print $1}')"
if [ -n "$ROOT_FS" ] && [ "$ROOT_FS" = "$PACKAGE_FS" ]; then
echo "WARNING: package workspace is on the container root filesystem; very large splits may hit the HF Jobs 50G ephemeral storage limit." >&2
fi
if [ -n "${ESTIMATED_BUCKET_BYTES:-}" ]; then
PACKAGE_AVAILABLE_BYTES="$(df -Pk "$PACKAGE_DIR" | awk 'NR==2 {printf "%.0f", $4 * 1024}')"
if [ -n "$PACKAGE_AVAILABLE_BYTES" ] && [ "$PACKAGE_AVAILABLE_BYTES" -gt 0 ] && \
[ "$PACKAGE_AVAILABLE_BYTES" -lt "$ESTIMATED_BUCKET_BYTES" ]; then
echo "WARNING: package workspace has $(format_bytes "$PACKAGE_AVAILABLE_BYTES") available, below estimated need $(format_bytes "$ESTIMATED_BUCKET_BYTES")." >&2
fi
fi
echo " Starting write-package at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
start_heartbeat "write-package"
set +e
time "$SLICER" write-package "$WRITE_PACKAGE_INPUT" \
--out-dir "$PACKAGE_DIR" \
--after-artifact-command "$ARTIFACT_UPLOAD_HOOK" \
"${WRITE_PACKAGE_IDENTITY_ARGS[@]}"
WRITE_PACKAGE_STATUS=$?
set -e
stop_heartbeat
if [ "$WRITE_PACKAGE_STATUS" -ne 0 ]; then
echo "ERROR: write-package failed with status $WRITE_PACKAGE_STATUS" >&2
log_storage_snapshot "write-package failed" >&2 || true
exit "$WRITE_PACKAGE_STATUS"
fi
echo " Finished write-package at $(date -u +%Y-%m-%dT%H:%M:%SZ)"
log_storage_snapshot "after write-package"
SOURCE_PATH="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(m['source_model']['path'])")"
echo " Cached source: $SOURCE_PATH ($(du -h "$SOURCE_PATH" | cut -f1))"
LAYER_COUNT="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(m['layer_count'])")"
TOTAL_SIZE="$(python3 -c "import json, os; m=json.load(open(os.path.join(os.environ['PACKAGE_DIR'], 'model-package.json'))); print(sum(int(a.get('artifact_bytes') or 0) for a in list(m['shared'].values()) + m.get('layers', []) + m.get('projectors', [])))")"
TOTAL_SIZE_LABEL="$(format_bytes "$TOTAL_SIZE")"
echo " ✓ Split into $LAYER_COUNT layers; artifacts uploaded incrementally (${TOTAL_SIZE_LABEL} total)"
# ─── Verify manifest ──────────────────────────────────────────────────────
echo ""
echo "=== [5/9] Verifying package manifest ==="
"$VENV_DIR/bin/python3" << 'PYTHON'
import json
import os
from pathlib import Path
manifest_path = Path(os.environ["PACKAGE_DIR"]) / "model-package.json"
manifest = json.loads(manifest_path.read_text())
required = [
manifest["shared"]["metadata"],
manifest["shared"]["embeddings"],
manifest["shared"]["output"],
*manifest.get("layers", []),
*manifest.get("projectors", []),
]
missing = [artifact for artifact in required if not artifact.get("path") or not artifact.get("sha256")]
if missing:
raise SystemExit(f"manifest contains {len(missing)} artifacts without path/checksum")
print(f" ✓ Manifest records {len(required)} uploaded artifacts")
PYTHON
# ─── Publish ──────────────────────────────────────────────────────────────
echo ""
echo "=== [6/9] Publishing to HuggingFace ==="
"$VENV_DIR/bin/python3" << PYTHON
from huggingface_hub import HfApi
import os, json
from pathlib import Path
api = HfApi(token=os.environ['HF_TOKEN'])
target_repo = os.environ['TARGET_REPO']
source_repo = os.environ['SOURCE_REPO']
model_id = os.environ.get('MODEL_ID', '')
manifest_path = Path(os.environ['PACKAGE_DIR']) / 'model-package.json'
api.upload_file(
repo_id=target_repo,
path_or_fileobj=str(manifest_path),
path_in_repo='model-package.json',
repo_type='model',
commit_message=f'Add layer package manifest from {source_repo} ({model_id})',
)
# Print summary
manifest = json.load(open(manifest_path))
print(f' ✓ Published: https://huggingface.co/{target_repo}')
print(f' Model: {manifest["model_id"]}')
print(f' Layers: {manifest["layer_count"]}')
print(f' Schema: {manifest["schema_version"]}')
PYTHON
# ─── Update catalog ───────────────────────────────────────────────────────
echo ""
echo "=== [7/9] Updating meshllm/catalog ==="
"$VENV_DIR/bin/python3" << 'PYTHON'
from huggingface_hub import HfApi
import os, json, tempfile
api = HfApi(token=os.environ['HF_TOKEN'])
source_repo = os.environ['SOURCE_REPO']
target_repo = os.environ['TARGET_REPO']
source_file = os.environ['SOURCE_FILE']
source_revision = os.environ.get('SOURCE_REVISION', 'main')
model_id = os.environ.get('MODEL_ID', '')
package_dir = os.environ['PACKAGE_DIR']
# Read manifest for metadata
manifest = json.load(open(os.path.join(package_dir, 'model-package.json')))
layer_count = manifest['layer_count']
# Determine catalog entry path: entries/<owner>/<repo-name>.json
owner, repo_name = source_repo.split('/', 1)
entry_path = f"entries/{owner}/{repo_name}.json"
# Try to fetch existing entry
catalog_repo = "meshllm/catalog"
try:
existing_path = api.hf_hub_download(
repo_id=catalog_repo,
filename=entry_path,
repo_type="dataset",
)
entry = json.load(open(existing_path))
except Exception:
# Create new entry
entry = {"schema_version": 1, "source_repo": source_repo, "variants": {}}
# Build variant name from source file stem (not MODEL_ID).
# For "UD-Q4_K_XL/Qwen3-32B-UD-Q4_K_XL-00001-of-00002.gguf" → "Qwen3-32B-UD-Q4_K_XL"
import re
file_stem = source_file.split('/')[-1].replace('.gguf', '')
# Strip shard suffix like "-00001-of-00002"
variant_name = re.sub(r'-\d{5}-of-\d{5}$', '', file_stem)
package_entry = {
"type": "layer-package",
"repo": target_repo,
"layer_count": layer_count,
}
# Handle both dict-style and list-style variants
variants = entry.get("variants", {})
if isinstance(variants, dict):
# Dict-keyed by variant name (existing catalog format)
if variant_name in variants:
packages = variants[variant_name].get("packages", [])
packages = [p for p in packages if p.get("repo") != target_repo]
packages.append(package_entry)
variants[variant_name]["packages"] = packages
else:
variants[variant_name] = {
"source": {
"repo": source_repo,
"file": source_file,
"revision": source_revision,
},
"curated": {
"name": variant_name,
"size": f"{layer_count} layers",
"description": f"Layer package for {model_id}",
},
"packages": [package_entry],
}
entry["variants"] = variants
else:
# List-style (fallback)
existing_variant = None
for v in variants:
if v.get("curated", {}).get("name") == variant_name:
existing_variant = v
break
if existing_variant:
packages = existing_variant.get("packages", [])
packages = [p for p in packages if p.get("repo") != target_repo]
packages.append(package_entry)
existing_variant["packages"] = packages
else:
variants.append({
"source": {
"repo": source_repo,
"file": source_file,
"revision": source_revision,
},
"curated": {
"name": variant_name,
"size": f"{layer_count} layers",
"description": f"Layer package for {model_id}",
},
"packages": [package_entry],
})
# Write and upload
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(entry, f, indent=2)
tmp_path = f.name
create_pr = os.environ.get('CATALOG_CREATE_PR', 'false').lower() == 'true'
api.upload_file(
repo_id=catalog_repo,
path_or_fileobj=tmp_path,
path_in_repo=entry_path,
repo_type="dataset",
commit_message=f"Add layer package for {model_id} ({target_repo})",
create_pr=create_pr,
)
print(f" ✓ Catalog updated: {catalog_repo}/{entry_path}")
print(f" Variant: {variant_name}")
print(f" Package: {target_repo} ({layer_count} layers)")
PYTHON
# ─── Model Card ────────────────────────────────────────────────────────────
echo ""
echo "=== [8/9] Uploading model card ==="
"$VENV_DIR/bin/python3" << 'PYTHON'
from huggingface_hub import HfApi
from pathlib import Path
import hashlib
import json
import os
package_dir = Path(os.environ["PACKAGE_DIR"])
manifest_path = package_dir / "model-package.json"
manifest = json.loads(manifest_path.read_text())
source_repo = os.environ["SOURCE_REPO"]
source_file = os.environ["SOURCE_FILE"]
source_revision = os.environ.get("SOURCE_REVISION", "main")
target_repo = os.environ["TARGET_REPO"]
model_id = os.environ.get("MODEL_ID", manifest.get("model_id", target_repo))
mesh_llm_ref = os.environ.get("MESH_LLM_REF", "main")
def sha256(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as file:
for chunk in iter(lambda: file.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def fmt_bytes(size: int) -> str:
value = float(size)
for unit in ["B", "KB", "MB", "GB", "TB"]:
if value < 1024 or unit == "TB":
if unit == "B":
return f"{int(value)} {unit}"
return f"{value:.1f} {unit}"
value /= 1024
def artifact_bytes(artifact: dict) -> int:
return int(artifact.get("artifact_bytes") or 0)
def md_cell(value) -> str:
text = "" if value is None else str(value)
return text.replace("|", "\\|").replace("\n", "<br>")
def link(label: str, url: str) -> str:
return f"[{md_cell(label)}]({url})"
def code(value) -> str:
return f"`{md_cell(value)}`"
def yaml_quote(value: str) -> str:
return json.dumps(value)
def infer_model_family(name: str) -> str:
lowered = name.lower()
for family in ["Qwen3", "Qwen2.5", "DeepSeek", "Kimi", "Gemma", "GLM", "Llama"]:
if family.lower() in lowered:
return family
return name.split("-")[0] if name else "Unknown"
def infer_parameter_scale(name: str) -> str:
import re
match = re.search(r"(?i)(\d+(?:\.\d+)?[BM](?:-A\d+(?:\.\d+)?B)?)", name)
return match.group(1) if match else "not recorded"
def infer_quantization(name: str, source_path: str) -> str:
import re
combined = f"{name}/{source_path}"
patterns = [
r"UD-Q\d+_[A-Z]+(?:_[A-Z]+)?",
r"Q\d+_[A-Z]+(?:_[A-Z]+)?",
r"IQ\d+_[A-Z]+(?:_[A-Z]+)?",
r"BF16",
r"F16",
]
for pattern in patterns:
match = re.search(pattern, combined, re.IGNORECASE)
if match:
return match.group(0)
return "not recorded"
shared = manifest.get("shared", {})
layers = manifest.get("layers", [])
projectors = manifest.get("projectors", [])
manifest_hash = sha256(manifest_path)
total_bytes = sum(artifact_bytes(artifact) for artifact in shared.values())
total_bytes += sum(artifact_bytes(layer) for layer in layers)
total_bytes += sum(artifact_bytes(projector) for projector in projectors)
source_model = manifest.get("source_model", {})
display_name = source_model.get("distribution_id") or model_id
model_family = infer_model_family(display_name)
parameter_scale = infer_parameter_scale(display_name)
quantization = infer_quantization(display_name, source_file)
source_path = source_model.get("path") or f"/hf-cache/{source_file}"
activation_width = manifest.get("activation_width") or "not recorded"
skippy_abi = manifest.get("skippy_abi_version") or "not recorded"
source_sha = source_model.get("sha256") or "not recorded"
canonical_ref = source_model.get("canonical_ref") or f"{source_repo}@{source_revision}/{source_file}"
file_rows = [
("Manifest", "model-package.json", "Package schema, source identity, checksums", manifest_hash),
]
for label, key in [
("Metadata", "metadata"),
("Embeddings", "embeddings"),
("Output head", "output"),
]:
artifact = shared.get(key)
if artifact:
file_rows.append((
label,
artifact.get("path", f"shared/{key}.gguf"),
f"{artifact.get('tensor_count', 'unknown')} tensors, {fmt_bytes(artifact_bytes(artifact))}",
artifact.get("sha256", "not recorded"),
))
if layers:
layer_bytes = sum(artifact_bytes(layer) for layer in layers)
layer_tensors = sum(int(layer.get("tensor_count") or 0) for layer in layers)
file_rows.append((
"Transformer layers",
"layers/layer-*.gguf",
f"{len(layers)} layer artifacts, {layer_tensors} tensors, {fmt_bytes(layer_bytes)}",
"see model-package.json",
))
for projector in projectors:
file_rows.append((
"Projector",
projector.get("path", "projectors/projector.gguf"),
f"{projector.get('kind', 'multimodal')} projector, {fmt_bytes(artifact_bytes(projector))}",
projector.get("sha256", "not recorded"),
))
rows = [
("Source model", link(source_repo, f"https://huggingface.co/{source_repo}")),
("Model id", code(model_id)),
("Family", model_family),
("Parameter scale", parameter_scale),
("Quantization", code(quantization)),
("Layer count", manifest.get("layer_count", len(layers))),
("Activation width", activation_width),
("Package size", fmt_bytes(total_bytes)),
("Source file", code(source_file)),
("Package repo", link(target_repo, f"https://huggingface.co/{target_repo}")),
]
readme = f"""---
library_name: mesh-llm
base_model:
- {yaml_quote(source_repo)}
pipeline_tag: text-generation
tags:
- gguf
- mesh-llm
- layer-package
- skippy
- distributed-inference
- local-inference
- openai-compatible
---
<div align="center">
<a href="https://www.meshllm.cloud">
<img src="https://github.com/Mesh-LLM/mesh-llm/raw/main/docs/mesh-llm-logo.svg" alt="Mesh LLM" width="220">
</a>
<h1>{display_name}</h1>
<p>
<strong>Distributed GGUF inference package for Mesh LLM</strong>
</p>
<p>
<a href="https://www.meshllm.cloud"><img alt="Website" src="https://img.shields.io/badge/Website-meshllm.cloud-111111?style=for-the-badge"></a>
<a href="https://github.com/Mesh-LLM/mesh-llm"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-Mesh--LLM-24292f?style=for-the-badge&logo=github"></a>
<a href="https://discord.gg/rs6fmc63eN"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join-5865F2?style=for-the-badge&logo=discord&logoColor=white"></a>
</p>
</div>
GGUF layer package for running **{display_name}** across a local Mesh LLM cluster.
This package is derived from [{source_repo}](https://huggingface.co/{source_repo}) and keeps the original GGUF distribution split into per-layer artifacts for distributed inference.
## Highlights
| Run locally | Pool multiple machines | OpenAI-compatible | Package variant |
|---|---|---|---|
| Private inference on your hardware | Split layers across peers | Serve `/v1/chat/completions` locally | `{quantization}` layer package |
## Model Overview
| Property | Value |
|---|---|
"""
for key, value in rows:
readme += f"| **{md_cell(key)}** | {md_cell(value)} |\n"
readme += f"""
## Recommended Use
- Local and private inference with Mesh LLM.
- Multi-machine serving when the full GGUF is too large for one host.
- OpenAI-compatible chat/completions workflows through Mesh LLM's local API.
For upstream architecture details, chat template guidance, sampling recommendations, license terms, and benchmark notes, see the source model card: [{source_repo}](https://huggingface.co/{source_repo}).
## Quickstart
```bash
# Run this on each machine that should contribute memory/compute.
mesh-llm serve --model "{target_repo}" --split
```
```bash
# Check the mesh and discover the OpenAI-compatible model name.
curl -s http://localhost:3131/api/status
curl -s http://localhost:3131/v1/models
```
```bash
# Send an OpenAI-compatible chat request.
curl -s http://localhost:3131/v1/chat/completions \\
-H "Content-Type: application/json" \\
-d '{{
"model": "{model_id}",
"messages": [{{"role": "user", "content": "Write a tiny hello-world function in Rust."}}],
"max_tokens": 128
}}'
```
## Package Variant
| Property | Value |
|---|---|
"""
for key, value in [
("Format", code(manifest.get("format", "layer-package"))),
("Canonical source ref", code(canonical_ref)),
("Source revision", code(source_revision)),
("Source SHA-256", code(source_sha)),
("Skippy ABI", code(skippy_abi)),
("Package manifest SHA-256", code(manifest_hash)),
]:
readme += f"| **{md_cell(key)}** | {md_cell(value)} |\n"
readme += f"""
## What Is Included
| Artifact | Path | Contents | SHA-256 |
|---|---|---|---|
"""
for label, path, contents, checksum in file_rows:
readme += f"| {md_cell(label)} | {code(path)} | {md_cell(contents)} | {code(checksum)} |\n"
readme += f"""
## Validation
Generated by the Mesh LLM HF Jobs splitter from `mesh-llm` ref `{mesh_llm_ref}`.
Each artifact is checksummed as it is written, uploaded to this repository, and removed from the job workspace before the next artifact is produced.
```bash
skippy-model-package write-package "{source_path}" --out-dir "{package_dir}"
```
## Links
- Source model: [{source_repo}](https://huggingface.co/{source_repo})
- Mesh LLM website: [meshllm.cloud](https://www.meshllm.cloud)
- Mesh LLM: [github.com/Mesh-LLM/mesh-llm](https://github.com/Mesh-LLM/mesh-llm)
- Discord: [discord.gg/rs6fmc63eN](https://discord.gg/rs6fmc63eN)
- Package catalog: [meshllm/catalog](https://huggingface.co/datasets/meshllm/catalog)
- Package format: [layer-package-repos.md](https://github.com/Mesh-LLM/mesh-llm/blob/main/docs/specs/layer-package-repos.md)
"""
Path("/tmp/README.md").write_text(readme)
api = HfApi(token=os.environ["HF_TOKEN"])
api.upload_file(
path_or_fileobj="/tmp/README.md",
path_in_repo="README.md",
repo_id=target_repo,
repo_type="model",
)
print(" ✓ Model card uploaded")
PYTHON
# ─── Summary ──────────────────────────────────────────────────────────────
echo ""
echo "=== [9/9] Done ==="
echo ""
echo " Published: https://huggingface.co/${TARGET_REPO}"
echo " Layers: ${LAYER_COUNT}"
echo " Total size: ${TOTAL_SIZE_LABEL}"
echo ""
echo " Use with mesh-llm:"
echo " mesh-llm serve --model ${TARGET_REPO} --split"

Xet Storage Details

Size:
30.4 kB
·
Xet hash:
5187b9471fd62ba1956d6d4d238e990bb55c979b81ae49c602ccec5a0e5d5e37

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.