File size: 1,860 Bytes
ccbe063 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | #!/usr/bin/env bash
# Minimal end-to-end demo on synthetic A3M + FI matrix.
# Produces:
# demo_out/mosaic/ -- 12 mosaic subsets
# demo_out/gradient/ -- 12 gradient subsets
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUT="${HERE}/demo_out"
mkdir -p "${OUT}"
# 1. Generate synthetic inputs (200 random "sequences", L=60, random FI).
python - <<'PY'
import os
import numpy as np
from pathlib import Path
OUT = Path(os.environ.get("DEMO_OUT", "examples/demo_out"))
OUT.mkdir(parents=True, exist_ok=True)
rng = np.random.default_rng(0)
N, L = 200, 60
alphabet = np.array(list("ACDEFGHIKLMNPQRSTVWY-"))
seqs = rng.choice(alphabet, size=(N, L))
a3m_path = OUT / "synthetic.a3m"
with open(a3m_path, "w") as f:
f.write(f"#{L}\t1\n")
for i, row in enumerate(seqs):
tag = "query" if i == 0 else f"seq{i:04d}"
f.write(f">{tag}\n{''.join(row)}\n")
# Synthetic FI matrix: random but with a few high-variance columns.
fi = rng.normal(loc=0.0, scale=0.3, size=(N, L)).astype(np.float64)
hv_cols = rng.choice(L, size=L // 5, replace=False)
fi[:, hv_cols] += rng.normal(loc=0.0, scale=1.2, size=(N, len(hv_cols)))
np.save(OUT / "synthetic_fi.npy", fi)
print(f"wrote {a3m_path}")
print(f"wrote {OUT/'synthetic_fi.npy'} shape={fi.shape}")
PY
export DEMO_OUT="${OUT}"
# 2. Build mosaic subsets.
sf-cluster build \
--a3m "${OUT}/synthetic.a3m" \
--fi "${OUT}/synthetic_fi.npy" \
--method mosaic \
--n-subsets 12 \
--subset-size 32 \
--seed 20260422 \
--out "${OUT}/mosaic"
# 3. Build gradient subsets.
sf-cluster build \
--a3m "${OUT}/synthetic.a3m" \
--fi "${OUT}/synthetic_fi.npy" \
--method gradient \
--n-subsets 12 \
--subset-size 32 \
--seed 20260422 \
--out "${OUT}/gradient"
echo
echo "Done. Inspect ${OUT}/mosaic and ${OUT}/gradient."
|