SF-Cluster / examples /run_demo.sh
chq1155's picture
Initial OSS release: mosaic + gradient subset builders (verified KaiB 95.0%, GA98 92.5%, GB98 50.0% on Phase XII pilot)
ccbe063 verified
Raw
History Blame Contribute Delete
1.86 kB
#!/usr/bin/env bash
# Minimal end-to-end demo on synthetic A3M + FI matrix.
# Produces:
# demo_out/mosaic/ -- 12 mosaic subsets
# demo_out/gradient/ -- 12 gradient subsets
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUT="${HERE}/demo_out"
mkdir -p "${OUT}"
# 1. Generate synthetic inputs (200 random "sequences", L=60, random FI).
python - <<'PY'
import os
import numpy as np
from pathlib import Path
OUT = Path(os.environ.get("DEMO_OUT", "examples/demo_out"))
OUT.mkdir(parents=True, exist_ok=True)
rng = np.random.default_rng(0)
N, L = 200, 60
alphabet = np.array(list("ACDEFGHIKLMNPQRSTVWY-"))
seqs = rng.choice(alphabet, size=(N, L))
a3m_path = OUT / "synthetic.a3m"
with open(a3m_path, "w") as f:
f.write(f"#{L}\t1\n")
for i, row in enumerate(seqs):
tag = "query" if i == 0 else f"seq{i:04d}"
f.write(f">{tag}\n{''.join(row)}\n")
# Synthetic FI matrix: random but with a few high-variance columns.
fi = rng.normal(loc=0.0, scale=0.3, size=(N, L)).astype(np.float64)
hv_cols = rng.choice(L, size=L // 5, replace=False)
fi[:, hv_cols] += rng.normal(loc=0.0, scale=1.2, size=(N, len(hv_cols)))
np.save(OUT / "synthetic_fi.npy", fi)
print(f"wrote {a3m_path}")
print(f"wrote {OUT/'synthetic_fi.npy'} shape={fi.shape}")
PY
export DEMO_OUT="${OUT}"
# 2. Build mosaic subsets.
sf-cluster build \
--a3m "${OUT}/synthetic.a3m" \
--fi "${OUT}/synthetic_fi.npy" \
--method mosaic \
--n-subsets 12 \
--subset-size 32 \
--seed 20260422 \
--out "${OUT}/mosaic"
# 3. Build gradient subsets.
sf-cluster build \
--a3m "${OUT}/synthetic.a3m" \
--fi "${OUT}/synthetic_fi.npy" \
--method gradient \
--n-subsets 12 \
--subset-size 32 \
--seed 20260422 \
--out "${OUT}/gradient"
echo
echo "Done. Inspect ${OUT}/mosaic and ${OUT}/gradient."