biobert-emb / start.sh
felixbet's picture
Update start.sh
a2d010b verified
raw
history blame
1.62 kB
#!/usr/bin/env bash
set -euo pipefail
MODEL_ROOT="/app/bert_tf"
mkdir -p "$MODEL_ROOT"
if [ -z "${WEIGHTS_URL_TAR_GZ:-}" ]; then
echo "ERROR: Set WEIGHTS_URL_TAR_GZ (direct .tar.gz link) in Settings → Variables."
exit 1
fi
echo "[start] downloading model tarball…"
curl -L "$WEIGHTS_URL_TAR_GZ" -o /tmp/model.tar.gz
echo "[start] extracting to $MODEL_ROOT…"
tar -xzf /tmp/model.tar.gz -C "$MODEL_ROOT"
# --- normalize layout ---
# If there is a single subfolder and it has vocab.txt, use it as model dir
SUB="$(find "$MODEL_ROOT" -mindepth 1 -maxdepth 1 -type d | head -n 1 || true)"
if [ -n "${SUB:-}" ] && [ -f "$SUB/vocab.txt" ]; then
export MODEL_DIR="$SUB"
else
export MODEL_DIR="$MODEL_ROOT"
fi
# Pull vocab up if it's inside a 'vocab' folder
if [ -d "$MODEL_DIR/vocab" ] && [ -f "$MODEL_DIR/vocab/vocab.txt" ]; then
mv "$MODEL_DIR/vocab/vocab.txt" "$MODEL_DIR/vocab.txt"
fi
# Some dumps have checkpoint.txt → rename to checkpoint
[ -f "$MODEL_DIR/checkpoint.txt" ] && mv "$MODEL_DIR/checkpoint.txt" "$MODEL_DIR/checkpoint"
# Some dumps have bert_config.json → duplicate to config.json
[ -f "$MODEL_DIR/bert_config.json" ] && cp "$MODEL_DIR/bert_config.json" "$MODEL_DIR/config.json"
echo "[debug] contents of MODEL_DIR=$MODEL_DIR"
ls -l "$MODEL_DIR" | sed 's/^/[debug] /'
# sanity check
for f in vocab.txt config.json checkpoint model.ckpt-150000.index model.ckpt-150000.data-00000-of-00001; do
if [ ! -f "$MODEL_DIR/$f" ]; then
echo "[fatal] missing $f in $MODEL_DIR"; exit 2
fi
done
echo "[start] launching API…"
exec uvicorn app:app --host 0.0.0.0 --port "${PORT:-7860}"