para.AI_ASSUNTOS_CNJ / entrypoint.sh
Carlexxx
para.AI beta
19a52e4
#!/bin/bash
# ═══════════════════════════════════════════════════════════════
# entrypoint.sh — Orquestra ES + Download dados + FastAPI
# Single container para HuggingFace Spaces (porta 7860)
# ═══════════════════════════════════════════════════════════════
set -e
BULK_URL="https://raw.githubusercontent.com/carlex22/PARA3/f4075c75727d711480d0de6acf12c9da4f74b337/bd/bulk_assuntos.ndjson"
BULK_DEST="/app/data/bulk_assuntos.ndjson"
ES_HOME=/usr/share/elasticsearch
ES_URL="http://0.0.0.0:9200"
ES_PATH_CONF=/etc/elasticsearch
STATUS_FILE=/tmp/setup_status.json
chown -R elasticsearch:elasticsearch "$ES_HOME" "$ES_PATH_CONF" \
/var/lib/elasticsearch /var/log/elasticsearch
echo "▶ Iniciando Elasticsearch..."
gosu elasticsearch "$ES_HOME/bin/elasticsearch" -d
echo "⏳ Aguardando porta 9200..."
until curl -s http://0.0.0.0:9200 >/dev/null 2>&1; do sleep 2; done
until curl -sf "http://0.0.0.0:9200/_cluster/health?wait_for_status=yellow&timeout=10s" >/dev/null 2>&1; do sleep 3; done
echo "✅ Elasticsearch pronto."
# ── 2. Download do bulk_assuntos.ndjson ──────────────────────────
echo ""
echo "📥 [2/4] Verificando dados..."
if [ -f "$BULK_DEST" ] && [ -s "$BULK_DEST" ]; then
SIZE=$(du -sh "$BULK_DEST" | cut -f1)
echo " ✅ Dados já presentes: $SIZE — pulando download"
else
echo " Baixando de GitHub..."
echo " → $BULK_URL"
RETRY_WAIT=5
for attempt in 1 2 3 4 5; do
echo " Tentativa $attempt/5..."
if curl -fSL \
--connect-timeout 30 \
--max-time 300 \
--retry 2 \
--retry-delay 3 \
--progress-bar \
-o "$BULK_DEST" \
"$BULK_URL" 2>&1; then
LINES=$(wc -l < "$BULK_DEST")
DOCS=$(( LINES / 2 ))
SIZE=$(du -sh "$BULK_DEST" | cut -f1)
echo " ✅ Download OK: $SIZE | ~${DOCS} docs"
break
else
echo " ⚠️ Falha $attempt/5"
rm -f "$BULK_DEST"
[ $attempt -lt 5 ] && sleep $RETRY_WAIT && RETRY_WAIT=$(( RETRY_WAIT * 2 ))
fi
done
if [ ! -f "$BULK_DEST" ]; then
echo " ❌ Download falhou — API sobe sem dados indexados"
echo " Use POST /admin/reindex após verificar conectividade"
fi
fi
# ── 3. Criar índice + indexar (se ES estiver disponível) ─────────
echo ""
echo "📊 [3/4] Preparando índice..."
ES_OK=$(curl -sf "${ES_URL}/_cluster/health" > /dev/null 2>&1 && echo "yes" || echo "no")
if [ "$ES_OK" = "yes" ]; then
# Criar índice com mapping (se não existir)
INDEX_EXISTS=$(curl -sf -o /dev/null -w "%{http_code}" "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" 2>/dev/null || echo "000")
if [ "$INDEX_EXISTS" = "200" ]; then
echo " ✅ Índice já existe — pulando criação"
else
echo " Criando índice com mapping..."
HTTP=$(curl -sf -o /tmp/create_resp.json -w "%{http_code}" \
-X PUT "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" \
-H "Content-Type: application/json" \
-d @/app/data/es_mapping.json 2>/dev/null || echo "000")
if [ "$HTTP" = "200" ]; then
echo " ✅ Índice criado"
else
echo " ⚠️ Falha ao criar índice (HTTP $HTTP) — FastAPI fará na inicialização"
fi
fi
# Verificar se já há dados
DOC_COUNT=$(curl -sf "${ES_URL}/${ES_INDEX:-assuntos_juridicos}/_count" 2>/dev/null \
| python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('count',0))" 2>/dev/null || echo "0")
if [ "$DOC_COUNT" -gt "0" ] 2>/dev/null; then
echo " ✅ Índice já tem $DOC_COUNT documentos — pulando indexação"
else
echo " ⏳ Indexação será feita pelo FastAPI no startup..."
fi
else
echo " ⚠️ ES indisponível — FastAPI tentará conectar no startup"
fi
# ── 4. Iniciar FastAPI ───────────────────────────────────────────
echo ""
echo "🚀 [4/4] Iniciando FastAPI na porta ${PORT:-7860}..."
echo " Workers : ${WORKERS:-1}"
echo " ES Host : ${ES_HOST:-http://localhost:9200}"
echo " Índice : ${ES_INDEX:-assuntos_juridicos}"
echo ""
exec uvicorn app.main:app \
--host 0.0.0.0 \
--port "${PORT:-7860}" \
--workers "${WORKERS:-1}" \
--log-level info \
--access-log \
--loop uvloop