#!/usr/bin/env bash # Minimal end-to-end demo on synthetic A3M + FI matrix. # Produces: # demo_out/mosaic/ -- 12 mosaic subsets # demo_out/gradient/ -- 12 gradient subsets set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUT="${HERE}/demo_out" mkdir -p "${OUT}" # 1. Generate synthetic inputs (200 random "sequences", L=60, random FI). python - <<'PY' import os import numpy as np from pathlib import Path OUT = Path(os.environ.get("DEMO_OUT", "examples/demo_out")) OUT.mkdir(parents=True, exist_ok=True) rng = np.random.default_rng(0) N, L = 200, 60 alphabet = np.array(list("ACDEFGHIKLMNPQRSTVWY-")) seqs = rng.choice(alphabet, size=(N, L)) a3m_path = OUT / "synthetic.a3m" with open(a3m_path, "w") as f: f.write(f"#{L}\t1\n") for i, row in enumerate(seqs): tag = "query" if i == 0 else f"seq{i:04d}" f.write(f">{tag}\n{''.join(row)}\n") # Synthetic FI matrix: random but with a few high-variance columns. fi = rng.normal(loc=0.0, scale=0.3, size=(N, L)).astype(np.float64) hv_cols = rng.choice(L, size=L // 5, replace=False) fi[:, hv_cols] += rng.normal(loc=0.0, scale=1.2, size=(N, len(hv_cols))) np.save(OUT / "synthetic_fi.npy", fi) print(f"wrote {a3m_path}") print(f"wrote {OUT/'synthetic_fi.npy'} shape={fi.shape}") PY export DEMO_OUT="${OUT}" # 2. Build mosaic subsets. sf-cluster build \ --a3m "${OUT}/synthetic.a3m" \ --fi "${OUT}/synthetic_fi.npy" \ --method mosaic \ --n-subsets 12 \ --subset-size 32 \ --seed 20260422 \ --out "${OUT}/mosaic" # 3. Build gradient subsets. sf-cluster build \ --a3m "${OUT}/synthetic.a3m" \ --fi "${OUT}/synthetic_fi.npy" \ --method gradient \ --n-subsets 12 \ --subset-size 32 \ --seed 20260422 \ --out "${OUT}/gradient" echo echo "Done. Inspect ${OUT}/mosaic and ${OUT}/gradient."