File size: 5,412 Bytes
31a2688
 
 
82024c3
31a2688
 
 
 
 
 
 
4d2a2da
31a2688
 
 
 
 
 
 
1441fa0
 
82024c3
31a2688
 
 
 
 
 
 
 
 
 
 
 
 
 
b3c968a
31a2688
4d2a2da
 
 
 
 
 
 
 
 
 
 
31a2688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9612292
6fd2f67
 
1441fa0
 
6fd2f67
 
 
 
 
1441fa0
4d2a2da
6fd2f67
 
 
 
ec64993
6fd2f67
 
 
 
ec64993
6fd2f67
4d2a2da
6fd2f67
31a2688
82024c3
 
31a2688
 
 
 
 
 
a3bf962
31a2688
 
 
 
 
82024c3
31a2688
 
 
 
 
 
 
 
 
ec64993
 
 
 
 
31a2688
ec64993
 
 
 
 
 
31a2688
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""FastAPI application entry point."""

import logging
import os
from contextlib import asynccontextmanager
from collections.abc import AsyncIterator

from fastapi import FastAPI
from langchain_core.output_parsers import StrOutputParser

from src.config import load_settings
from src.provider import create_llm, create_llm_with_fallback, create_embeddings, create_reranker
from src.retrieval.embedder import Embedder
from src.retrieval.vector_store import VectorStore
from src.retrieval.bm25_search import BM25Search
from src.retrieval.hybrid import HybridRetriever
from src.retrieval.reranker import Reranker
from src.agent.intent_classifier import IntentClassifier
from src.agent.router import QueryRouter
from src.agent.plan_and_execute import PlanAndExecuteRouter
from src.agent.memory import ConversationMemory
from src.agent.session_store import SessionStore
from src.ingestion.pipeline import IngestionPipeline
from src.api.routes import router, set_dependencies

logger = logging.getLogger(__name__)


def create_app() -> FastAPI:
    """Create and configure the FastAPI application.

    Returns:
        Configured FastAPI application instance.
    """
    settings = load_settings()

    logging.basicConfig(level=getattr(logging, settings.log_level, logging.INFO))

    # React mode's ReAct sub-agent calls llm.bind_tools(...) internally, which
    # RunnableWithFallbacks does not support. Fall back chain is therefore only
    # applied in pipeline mode; in react mode we warn and use the primary only.
    if settings.llm_fallback_enabled and settings.agent_mode == "react":
        logger.warning(
            "LLM_FALLBACK_ENABLED is set but AGENT_MODE=react; fallback chain "
            "is incompatible with tool-calling and will be DISABLED for this run."
        )
        llm = create_llm(settings)
    else:
        llm = create_llm_with_fallback(settings)
    embeddings = create_embeddings(settings)

    embedder = Embedder(embeddings=embeddings)
    vector_store = VectorStore(
        path=settings.qdrant_path,
        collection_name=settings.collection_name,
        dimension=settings.embedding_dimension,
        url=settings.qdrant_url,
    )
    bm25_search = BM25Search()

    @asynccontextmanager
    async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
        """Load stored chunks from Qdrant and rebuild the BM25 index on startup."""
        chunks = vector_store.get_all_chunks()
        if chunks:
            bm25_search.index(chunks)
            logger.info("Rebuilt BM25 index with %d chunks from Qdrant", len(chunks))
        else:
            logger.info("No existing chunks in Qdrant; BM25 index is empty")
        yield

    application = FastAPI(
        title="KU Doc Assistant",
        description="RAG-based document assistant for University of Copenhagen.",
        version="0.1.0",
        lifespan=lifespan,
    )
    hybrid_retriever = HybridRetriever(
        vector_store=vector_store,
        bm25_search=bm25_search,
        embedder=embedder,
        dense_weight=settings.dense_weight,
        bm25_weight=settings.bm25_weight,
    )
    reranker = Reranker(model=create_reranker(settings.reranker_model))

    if settings.agent_mode == "react":
        logger.info("Agent mode: Plan-and-Execute (structured multi-step agent)")
        query_router: QueryRouter | PlanAndExecuteRouter = PlanAndExecuteRouter(
            llm=llm,
            hybrid_retriever=hybrid_retriever,
            reranker=reranker,
            vector_store=vector_store,
            default_top_k=settings.top_k,
            memory=ConversationMemory(),
            token_budget_enabled=settings.token_budget_enabled,
        )
    else:
        logger.info("Agent mode: pipeline (fixed DAG)")
        intent_classifier = IntentClassifier(llm=llm, model_name=settings.generation_model)
        llm_chain = llm | StrOutputParser()
        query_router = QueryRouter(
            intent_classifier=intent_classifier,
            hybrid_retriever=hybrid_retriever,
            reranker=reranker,
            llm_chain=llm_chain,
            translate_query=settings.translate_query,
            token_budget_enabled=settings.token_budget_enabled,
        )

    session_store = SessionStore(db_path=os.environ.get("SESSION_DB_PATH", "./data/sessions.db"))

    set_dependencies(
        query_router=query_router,
        ingestion_pipeline=IngestionPipeline(
            strategy=_parse_strategy(settings),
            chunk_size=settings.chunk_size,
            chunk_overlap=settings.chunk_overlap,
            embeddings=embeddings,
        ),
        embedder=embedder,
        vector_store=vector_store,
        bm25_search=bm25_search,
        settings=settings,
        session_store=session_store,
    )

    application.include_router(router)

    logger.info("KU Doc Assistant application created successfully")
    return application


def _parse_strategy(settings: "Settings") -> "ChunkStrategy":  # noqa: F821
    """Return the chunking strategy from config, defaulting to SEMANTIC.

    Reads the CHUNK_STRATEGY environment variable via settings. Falls back
    to SEMANTIC when the variable is unset or empty.
    """
    from src.models import ChunkStrategy

    raw = getattr(settings, "chunk_strategy", "semantic")
    try:
        return ChunkStrategy(raw)
    except ValueError:
        return ChunkStrategy.SEMANTIC


app: FastAPI = create_app()