File size: 2,570 Bytes
4506ba8
 
7085a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# One-shot setup script — downloads data, builds indexes, verifies everything
# Usage: python setup.py

import sys
import os
sys.path.insert(0, os.path.dirname(__file__))

from src.logger import logger


def main():
    logger.info("=" * 60)
    logger.info("FINANCIAL INTELLIGENCE AGENT — SETUP")
    logger.info("=" * 60)

    # step 1: check .env
    from src.config import LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY, GROQ_API_KEY
    env_path = os.path.join(os.path.dirname(__file__), ".env")
    if not os.path.exists(env_path):
        logger.error(
            "No .env file found. Copy .env.example to .env and fill in your API key:\n"
            "  cp .env.example .env"
        )
        sys.exit(1)

    key_map = {"openai": OPENAI_API_KEY, "anthropic": ANTHROPIC_API_KEY, "groq": GROQ_API_KEY}
    active_key = key_map.get(LLM_PROVIDER, "")
    if not active_key or active_key.startswith("sk-your") or active_key.startswith("gsk_your"):
        logger.error(
            f"LLM_PROVIDER is set to '{LLM_PROVIDER}' but the API key looks unset. "
            f"Edit your .env file."
        )
        sys.exit(1)

    logger.info(f"LLM provider: {LLM_PROVIDER}")

    # step 2: ingest data
    logger.info("\n--- Step 1: Data Ingestion ---")
    from src.data_platform.ingest import run_ingestion
    run_ingestion()

    # step 3: load into DuckDB
    logger.info("\n--- Step 2: DuckDB Setup ---")
    from src.data_platform.duckdb_store import init_tables, get_schema_info
    init_tables()
    print(get_schema_info())

    # step 4: build ChromaDB vector index
    logger.info("\n--- Step 3: ChromaDB Vector Index ---")
    from src.data_platform.chroma_store import build_index as build_chroma
    build_chroma()

    # step 5: build BM25 keyword index
    logger.info("\n--- Step 4: BM25 Keyword Index ---")
    from src.data_platform.bm25_store import build_index as build_bm25
    build_bm25()

    # step 6: quick sanity test
    logger.info("\n--- Step 5: Sanity Test ---")
    from src.agents.graph import run_query
    test_q = "What is the average revenue of Technology sector companies in 2024?"
    logger.info(f"Test query: {test_q}")
    result = run_query(test_q)
    print(f"\nAnswer:\n{result['answer'][:500]}")
    print(f"\nConfidence: {result['confidence']}")

    logger.info("\n" + "=" * 60)
    logger.info("SETUP COMPLETE")
    logger.info("Run the UI with: streamlit run src/ui/app.py")
    logger.info("Run evaluation with: python evaluation/evaluate.py")
    logger.info("=" * 60)


if __name__ == "__main__":
    main()