File size: 1,660 Bytes
a3284b1
 
 
a2d010b
 
a3284b1
9dbdf47
ac7f7f1
9dbdf47
a3284b1
 
ac7f7f1
a2d010b
a3284b1
ac7f7f1
 
a2d010b
ac7f7f1
 
 
 
a2d010b
ac7f7f1
a2d010b
ac7f7f1
a2d010b
ac7f7f1
 
a2d010b
ac7f7f1
a2d010b
 
ac7f7f1
 
 
a2d010b
ac7f7f1
 
a3284b1
9dbdf47
a3284b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env bash
set -euo pipefail

MODEL_ROOT="/app/bert_tf"
mkdir -p "$MODEL_ROOT"

if [ -z "${WEIGHTS_URL_TAR_GZ:-}" ]; then
  echo "[fatal] Set WEIGHTS_URL_TAR_GZ to a DIRECT .tar.gz link (Dropbox must end with dl=1)" >&2
  exit 1
fi

echo "[start] downloading model…"
curl -L "$WEIGHTS_URL_TAR_GZ" -o /tmp/model.tar.gz

echo "[start] extracting…"
tar -xzf /tmp/model.tar.gz -C "$MODEL_ROOT" || { echo "[fatal] extract failed"; exit 2; }

# ---- Find the directory that actually contains vocab.txt (handles any nesting) ----
FOUND="$(find "$MODEL_ROOT" -maxdepth 4 -type f -name 'vocab.txt' | head -n1 || true)"
if [ -z "$FOUND" ]; then
  echo "[fatal] vocab.txt not found under $MODEL_ROOT"; ls -R "$MODEL_ROOT"; exit 3
fi
export MODEL_DIR="$(dirname "$FOUND")"

# Normalize names expected by HF + TF
[ -f "$MODEL_DIR/bert_config.json" ] && cp "$MODEL_DIR/bert_config.json" "$MODEL_DIR/config.json"
[ -d "$MODEL_DIR/vocab" ] && [ -f "$MODEL_DIR/vocab/vocab.txt" ] && mv "$MODEL_DIR/vocab/vocab.txt" "$MODEL_DIR/vocab.txt"
[ -f "$MODEL_DIR/checkpoint.txt" ] && mv "$MODEL_DIR/checkpoint.txt" "$MODEL_DIR/checkpoint"

echo "[debug] MODEL_DIR=$MODEL_DIR"
ls -l "$MODEL_DIR" | sed 's/^/[debug] /'

# Basic sanity
for f in vocab.txt config.json checkpoint; do
  [ -f "$MODEL_DIR/$f" ] || { echo "[fatal] missing $f"; exit 4; }
done
if ! ls "$MODEL_DIR"/model.ckpt-*.index >/dev/null 2>&1; then echo "[fatal] missing ckpt index"; exit 5; fi
if ! ls "$MODEL_DIR"/model.ckpt-*.data-00000-of-00001 >/dev/null 2>&1; then echo "[fatal] missing ckpt data"; exit 6; fi

echo "[start] launching API…"
exec uvicorn app:app --host 0.0.0.0 --port "${PORT:-7860}"