| #!/bin/bash |
| |
| |
| |
| |
| set -e |
|
|
| BULK_URL="https://raw.githubusercontent.com/carlex22/PARA3/f4075c75727d711480d0de6acf12c9da4f74b337/bd/bulk_assuntos.ndjson" |
| BULK_DEST="/app/data/bulk_assuntos.ndjson" |
|
|
| ES_HOME=/usr/share/elasticsearch |
|
|
| ES_URL="http://0.0.0.0:9200" |
| ES_PATH_CONF=/etc/elasticsearch |
| STATUS_FILE=/tmp/setup_status.json |
| chown -R elasticsearch:elasticsearch "$ES_HOME" "$ES_PATH_CONF" \ |
| /var/lib/elasticsearch /var/log/elasticsearch |
| echo "▶ Iniciando Elasticsearch..." |
| gosu elasticsearch "$ES_HOME/bin/elasticsearch" -d |
| echo "⏳ Aguardando porta 9200..." |
| until curl -s http://0.0.0.0:9200 >/dev/null 2>&1; do sleep 2; done |
| until curl -sf "http://0.0.0.0:9200/_cluster/health?wait_for_status=yellow&timeout=10s" >/dev/null 2>&1; do sleep 3; done |
| echo "✅ Elasticsearch pronto." |
|
|
|
|
| |
| echo "" |
| echo "📥 [2/4] Verificando dados..." |
|
|
| if [ -f "$BULK_DEST" ] && [ -s "$BULK_DEST" ]; then |
| SIZE=$(du -sh "$BULK_DEST" | cut -f1) |
| echo " ✅ Dados já presentes: $SIZE — pulando download" |
| else |
| echo " Baixando de GitHub..." |
| echo " → $BULK_URL" |
|
|
| RETRY_WAIT=5 |
| for attempt in 1 2 3 4 5; do |
| echo " Tentativa $attempt/5..." |
| if curl -fSL \ |
| --connect-timeout 30 \ |
| --max-time 300 \ |
| --retry 2 \ |
| --retry-delay 3 \ |
| --progress-bar \ |
| -o "$BULK_DEST" \ |
| "$BULK_URL" 2>&1; then |
|
|
| LINES=$(wc -l < "$BULK_DEST") |
| DOCS=$(( LINES / 2 )) |
| SIZE=$(du -sh "$BULK_DEST" | cut -f1) |
| echo " ✅ Download OK: $SIZE | ~${DOCS} docs" |
| break |
| else |
| echo " ⚠️ Falha $attempt/5" |
| rm -f "$BULK_DEST" |
| [ $attempt -lt 5 ] && sleep $RETRY_WAIT && RETRY_WAIT=$(( RETRY_WAIT * 2 )) |
| fi |
| done |
|
|
| if [ ! -f "$BULK_DEST" ]; then |
| echo " ❌ Download falhou — API sobe sem dados indexados" |
| echo " Use POST /admin/reindex após verificar conectividade" |
| fi |
| fi |
|
|
| |
| echo "" |
| echo "📊 [3/4] Preparando índice..." |
|
|
| ES_OK=$(curl -sf "${ES_URL}/_cluster/health" > /dev/null 2>&1 && echo "yes" || echo "no") |
|
|
| if [ "$ES_OK" = "yes" ]; then |
| |
| INDEX_EXISTS=$(curl -sf -o /dev/null -w "%{http_code}" "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" 2>/dev/null || echo "000") |
| if [ "$INDEX_EXISTS" = "200" ]; then |
| echo " ✅ Índice já existe — pulando criação" |
| else |
| echo " Criando índice com mapping..." |
| HTTP=$(curl -sf -o /tmp/create_resp.json -w "%{http_code}" \ |
| -X PUT "${ES_URL}/${ES_INDEX:-assuntos_juridicos}" \ |
| -H "Content-Type: application/json" \ |
| -d @/app/data/es_mapping.json 2>/dev/null || echo "000") |
| if [ "$HTTP" = "200" ]; then |
| echo " ✅ Índice criado" |
| else |
| echo " ⚠️ Falha ao criar índice (HTTP $HTTP) — FastAPI fará na inicialização" |
| fi |
| fi |
|
|
| |
| DOC_COUNT=$(curl -sf "${ES_URL}/${ES_INDEX:-assuntos_juridicos}/_count" 2>/dev/null \ |
| | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('count',0))" 2>/dev/null || echo "0") |
|
|
| if [ "$DOC_COUNT" -gt "0" ] 2>/dev/null; then |
| echo " ✅ Índice já tem $DOC_COUNT documentos — pulando indexação" |
| else |
| echo " ⏳ Indexação será feita pelo FastAPI no startup..." |
| fi |
| else |
| echo " ⚠️ ES indisponível — FastAPI tentará conectar no startup" |
| fi |
|
|
| |
| echo "" |
| echo "🚀 [4/4] Iniciando FastAPI na porta ${PORT:-7860}..." |
| echo " Workers : ${WORKERS:-1}" |
| echo " ES Host : ${ES_HOST:-http://localhost:9200}" |
| echo " Índice : ${ES_INDEX:-assuntos_juridicos}" |
| echo "" |
|
|
| exec uvicorn app.main:app \ |
| --host 0.0.0.0 \ |
| --port "${PORT:-7860}" \ |
| --workers "${WORKERS:-1}" \ |
| --log-level info \ |
| --access-log \ |
| --loop uvloop |
|
|