biobert-emb / start.sh
felixbet's picture
Update start.sh
ac7f7f1 verified
#!/usr/bin/env bash
set -euo pipefail
MODEL_ROOT="/app/bert_tf"
mkdir -p "$MODEL_ROOT"
if [ -z "${WEIGHTS_URL_TAR_GZ:-}" ]; then
echo "[fatal] Set WEIGHTS_URL_TAR_GZ to a DIRECT .tar.gz link (Dropbox must end with dl=1)" >&2
exit 1
fi
echo "[start] downloading model…"
curl -L "$WEIGHTS_URL_TAR_GZ" -o /tmp/model.tar.gz
echo "[start] extracting…"
tar -xzf /tmp/model.tar.gz -C "$MODEL_ROOT" || { echo "[fatal] extract failed"; exit 2; }
# ---- Find the directory that actually contains vocab.txt (handles any nesting) ----
FOUND="$(find "$MODEL_ROOT" -maxdepth 4 -type f -name 'vocab.txt' | head -n1 || true)"
if [ -z "$FOUND" ]; then
echo "[fatal] vocab.txt not found under $MODEL_ROOT"; ls -R "$MODEL_ROOT"; exit 3
fi
export MODEL_DIR="$(dirname "$FOUND")"
# Normalize names expected by HF + TF
[ -f "$MODEL_DIR/bert_config.json" ] && cp "$MODEL_DIR/bert_config.json" "$MODEL_DIR/config.json"
[ -d "$MODEL_DIR/vocab" ] && [ -f "$MODEL_DIR/vocab/vocab.txt" ] && mv "$MODEL_DIR/vocab/vocab.txt" "$MODEL_DIR/vocab.txt"
[ -f "$MODEL_DIR/checkpoint.txt" ] && mv "$MODEL_DIR/checkpoint.txt" "$MODEL_DIR/checkpoint"
echo "[debug] MODEL_DIR=$MODEL_DIR"
ls -l "$MODEL_DIR" | sed 's/^/[debug] /'
# Basic sanity
for f in vocab.txt config.json checkpoint; do
[ -f "$MODEL_DIR/$f" ] || { echo "[fatal] missing $f"; exit 4; }
done
if ! ls "$MODEL_DIR"/model.ckpt-*.index >/dev/null 2>&1; then echo "[fatal] missing ckpt index"; exit 5; fi
if ! ls "$MODEL_DIR"/model.ckpt-*.data-00000-of-00001 >/dev/null 2>&1; then echo "[fatal] missing ckpt data"; exit 6; fi
echo "[start] launching API…"
exec uvicorn app:app --host 0.0.0.0 --port "${PORT:-7860}"