#!/bin/bash # ═══════════════════════════════════════════════════════════════ # entrypoint.sh — Orquestra ES + Download dados + FastAPI # Single container para HuggingFace Spaces (porta 7860) # ═══════════════════════════════════════════════════════════════ set -e BULK_URL="https://raw.githubusercontent.com/carlex22/PARA3/f4075c75727d711480d0de6acf12c9da4f74b337/bd/bulk_assuntos.ndjson" BULK_DEST="/app/data/bulk_assuntos.ndjson" ES_HOME=/usr/share/elasticsearch ES_URL="http://0.0.0.0:9200" ES_PATH_CONF=/etc/elasticsearch STATUS_FILE=/tmp/setup_status.json chown -R elasticsearch:elasticsearch "$ES_HOME" "$ES_PATH_CONF" \ /var/lib/elasticsearch /var/log/elasticsearch echo "▶ Iniciando Elasticsearch..." gosu elasticsearch "$ES_HOME/bin/elasticsearch" -d echo "⏳ Aguardando porta 9200..." until curl -s http://0.0.0.0:9200 >/dev/null 2>&1; do sleep 2; done until curl -sf "http://0.0.0.0:9200/_cluster/health?wait_for_status=yellow&timeout=10s" >/dev/null 2>&1; do sleep 3; done echo "✅ Elasticsearch pronto." # ── 2. Download do bulk_assuntos.ndjson ────────────────────────── echo "" echo "📥 [2/4] Verificando dados..." if [ -f "$BULK_DEST" ] && [ -s "$BULK_DEST" ]; then SIZE=$(du -sh "$BULK_DEST" | cut -f1) echo " ✅ Dados já presentes: $SIZE — pulando download" else echo " Baixando de GitHub..." echo " → $BULK_URL" RETRY_WAIT=5 for attempt in 1 2 3 4 5; do echo " Tentativa $attempt/5..." if curl -fSL \ --connect-timeout 30 \ --max-time 300 \ --retry 2 \ --retry-delay 3 \ --progress-bar \ -o "$BULK_DEST" \ "$BULK_URL" 2>&1; then LINES=$(wc -l < "$BULK_DEST") DOCS=$(( LINES / 2 )) SIZE=$(du -sh "$BULK_DEST" | cut -f1) echo " ✅ Download OK: $SIZE | ~${DOCS} docs" break else echo " ⚠️ Falha $attempt/5" rm -f "$BULK_DEST" [ $attempt -lt 5 ] && sleep $RETRY_WAIT && RETRY_WAIT=$(( RETRY_WAIT * 2 )) fi done if [ ! -f "$BULK_DEST" ]; then echo " ❌ Download falhou — API sobe sem dados indexados" echo " Use POST /admin/reindex após verificar conectividade" fi fi # ── 3. Criar índice + indexar (se ES estiver disponível) ───────── echo "" echo "📊 [3/4] Preparando índice..." ES_OK=$(curl -sf "${ES_URL}/_cluster/health" > /dev/null 2>&1 && echo "yes" || echo "no") if [ "$ES_OK" = "yes" ]; then # Criar índice com mapping (se não existir) INDEX_EXISTS=$(curl -sf -o /dev/null -w "%{http_code}" "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" 2>/dev/null || echo "000") if [ "$INDEX_EXISTS" = "200" ]; then echo " ✅ Índice já existe — pulando criação" else echo " Criando índice com mapping..." HTTP=$(curl -sf -o /tmp/create_resp.json -w "%{http_code}" \ -X PUT "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" \ -H "Content-Type: application/json" \ -d @/app/data/es_mapping.json 2>/dev/null || echo "000") if [ "$HTTP" = "200" ]; then echo " ✅ Índice criado" else echo " ⚠️ Falha ao criar índice (HTTP $HTTP) — FastAPI fará na inicialização" fi fi # Verificar se já há dados DOC_COUNT=$(curl -sf "${ES_URL}/${ES_INDEX:-assuntos_juridicos}/_count" 2>/dev/null \ | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('count',0))" 2>/dev/null || echo "0") if [ "$DOC_COUNT" -gt "0" ] 2>/dev/null; then echo " ✅ Índice já tem $DOC_COUNT documentos — pulando indexação" else echo " ⏳ Indexação será feita pelo FastAPI no startup..." fi else echo " ⚠️ ES indisponível — FastAPI tentará conectar no startup" fi # ── 4. Iniciar FastAPI ─────────────────────────────────────────── echo "" echo "🚀 [4/4] Iniciando FastAPI na porta ${PORT:-7860}..." echo " Workers : ${WORKERS:-1}" echo " ES Host : ${ES_HOST:-http://localhost:9200}" echo " Índice : ${ES_INDEX:-assuntos_juridicos}" echo "" exec uvicorn app.main:app \ --host 0.0.0.0 \ --port "${PORT:-7860}" \ --workers "${WORKERS:-1}" \ --log-level info \ --access-log \ --loop uvloop