File size: 2,450 Bytes
9b457ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
set -e

echo "=== Zeta Researcher Startup ==="

# Debug: Check if API key is available (masked for security)
if [ -n "$ANTHROPIC_API_KEY" ]; then
    echo "✓ ANTHROPIC_API_KEY is set (${#ANTHROPIC_API_KEY} chars)"
else
    echo "✗ WARNING: ANTHROPIC_API_KEY is NOT set!"
    # Security: Only check existence, never print actual values
    if env | grep -q "^ANTHROPIC"; then
        echo "Found ANTHROPIC env vars (values hidden)"
    else
        echo "No ANTHROPIC env vars found"
    fi
fi

# Decompress pre-built embeddings if they exist and target is empty
if [ ! -f "/tmp/chromadb/chroma.sqlite3" ]; then
    # Try vectordb_zeta.tar.gz first (both NV-Embed-v2 + E5-small), then fall back to e5small-only
    if [ -f "/app/data/vectordb_zeta.tar.gz" ]; then
        echo "Extracting pre-built embeddings (NV-Embed-v2 + E5-small)..."
        mkdir -p /tmp/chromadb
        tar -xzf /app/data/vectordb_zeta.tar.gz -C /tmp
        if [ -d "/tmp/vectordb_zeta" ]; then
            mv /tmp/vectordb_zeta/* /tmp/chromadb/ 2>/dev/null || true
            rmdir /tmp/vectordb_zeta 2>/dev/null || true
        fi
    elif [ -f "/app/data/vectordb_e5small.tar.gz" ]; then
        echo "Extracting pre-built embeddings (E5-small only)..."
        mkdir -p /tmp/chromadb
        tar -xzf /app/data/vectordb_e5small.tar.gz -C /tmp
        if [ -d "/tmp/vectordb_e5small" ]; then
            mv /tmp/vectordb_e5small/* /tmp/chromadb/ 2>/dev/null || true
            rmdir /tmp/vectordb_e5small 2>/dev/null || true
        fi
    fi
    # Verify extraction
    if [ -f "/tmp/chromadb/chroma.sqlite3" ]; then
        echo "✓ ChromaDB database found"
    else
        echo "✗ WARNING: ChromaDB database not found after extraction!"
    fi
fi

# Ensure directories exist
mkdir -p /tmp/chromadb /tmp/shares /tmp/huggingface

# Show stats
if [ -f "/tmp/chromadb/chroma.sqlite3" ]; then
    DB_SIZE=$(du -h /tmp/chromadb/chroma.sqlite3 | cut -f1)
    echo "ChromaDB size: $DB_SIZE"
    DB_FILES=$(ls /tmp/chromadb | wc -l)
    echo "ChromaDB files: $DB_FILES"
fi

# Verify PDFs are accessible (search subdirectories too)
if [ -d "/app/data/pdfs" ]; then
    PDF_COUNT=$(find /app/data/pdfs -name "*.pdf" 2>/dev/null | wc -l)
    echo "PDFs available: $PDF_COUNT"
else
    echo "✗ WARNING: PDF directory not found at /app/data/pdfs"
fi

echo "Starting server on port 7860..."
exec python -m uvicorn src.api.routes:app --host 0.0.0.0 --port 7860