zeta / scripts /docker_entrypoint.sh
rodrigo-moonray
Deploy zeta-only embeddings (NV-Embed-v2 + E5-small)
9b457ed
#!/bin/bash
set -e
echo "=== Zeta Researcher Startup ==="
# Debug: Check if API key is available (masked for security)
if [ -n "$ANTHROPIC_API_KEY" ]; then
echo "βœ“ ANTHROPIC_API_KEY is set (${#ANTHROPIC_API_KEY} chars)"
else
echo "βœ— WARNING: ANTHROPIC_API_KEY is NOT set!"
# Security: Only check existence, never print actual values
if env | grep -q "^ANTHROPIC"; then
echo "Found ANTHROPIC env vars (values hidden)"
else
echo "No ANTHROPIC env vars found"
fi
fi
# Decompress pre-built embeddings if they exist and target is empty
if [ ! -f "/tmp/chromadb/chroma.sqlite3" ]; then
# Try vectordb_zeta.tar.gz first (both NV-Embed-v2 + E5-small), then fall back to e5small-only
if [ -f "/app/data/vectordb_zeta.tar.gz" ]; then
echo "Extracting pre-built embeddings (NV-Embed-v2 + E5-small)..."
mkdir -p /tmp/chromadb
tar -xzf /app/data/vectordb_zeta.tar.gz -C /tmp
if [ -d "/tmp/vectordb_zeta" ]; then
mv /tmp/vectordb_zeta/* /tmp/chromadb/ 2>/dev/null || true
rmdir /tmp/vectordb_zeta 2>/dev/null || true
fi
elif [ -f "/app/data/vectordb_e5small.tar.gz" ]; then
echo "Extracting pre-built embeddings (E5-small only)..."
mkdir -p /tmp/chromadb
tar -xzf /app/data/vectordb_e5small.tar.gz -C /tmp
if [ -d "/tmp/vectordb_e5small" ]; then
mv /tmp/vectordb_e5small/* /tmp/chromadb/ 2>/dev/null || true
rmdir /tmp/vectordb_e5small 2>/dev/null || true
fi
fi
# Verify extraction
if [ -f "/tmp/chromadb/chroma.sqlite3" ]; then
echo "βœ“ ChromaDB database found"
else
echo "βœ— WARNING: ChromaDB database not found after extraction!"
fi
fi
# Ensure directories exist
mkdir -p /tmp/chromadb /tmp/shares /tmp/huggingface
# Show stats
if [ -f "/tmp/chromadb/chroma.sqlite3" ]; then
DB_SIZE=$(du -h /tmp/chromadb/chroma.sqlite3 | cut -f1)
echo "ChromaDB size: $DB_SIZE"
DB_FILES=$(ls /tmp/chromadb | wc -l)
echo "ChromaDB files: $DB_FILES"
fi
# Verify PDFs are accessible (search subdirectories too)
if [ -d "/app/data/pdfs" ]; then
PDF_COUNT=$(find /app/data/pdfs -name "*.pdf" 2>/dev/null | wc -l)
echo "PDFs available: $PDF_COUNT"
else
echo "βœ— WARNING: PDF directory not found at /app/data/pdfs"
fi
echo "Starting server on port 7860..."
exec python -m uvicorn src.api.routes:app --host 0.0.0.0 --port 7860