Spaces:
Running
Running
File size: 5,412 Bytes
31a2688 82024c3 31a2688 4d2a2da 31a2688 1441fa0 82024c3 31a2688 b3c968a 31a2688 4d2a2da 31a2688 9612292 6fd2f67 1441fa0 6fd2f67 1441fa0 4d2a2da 6fd2f67 ec64993 6fd2f67 ec64993 6fd2f67 4d2a2da 6fd2f67 31a2688 82024c3 31a2688 a3bf962 31a2688 82024c3 31a2688 ec64993 31a2688 ec64993 31a2688 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | """FastAPI application entry point."""
import logging
import os
from contextlib import asynccontextmanager
from collections.abc import AsyncIterator
from fastapi import FastAPI
from langchain_core.output_parsers import StrOutputParser
from src.config import load_settings
from src.provider import create_llm, create_llm_with_fallback, create_embeddings, create_reranker
from src.retrieval.embedder import Embedder
from src.retrieval.vector_store import VectorStore
from src.retrieval.bm25_search import BM25Search
from src.retrieval.hybrid import HybridRetriever
from src.retrieval.reranker import Reranker
from src.agent.intent_classifier import IntentClassifier
from src.agent.router import QueryRouter
from src.agent.plan_and_execute import PlanAndExecuteRouter
from src.agent.memory import ConversationMemory
from src.agent.session_store import SessionStore
from src.ingestion.pipeline import IngestionPipeline
from src.api.routes import router, set_dependencies
logger = logging.getLogger(__name__)
def create_app() -> FastAPI:
"""Create and configure the FastAPI application.
Returns:
Configured FastAPI application instance.
"""
settings = load_settings()
logging.basicConfig(level=getattr(logging, settings.log_level, logging.INFO))
# React mode's ReAct sub-agent calls llm.bind_tools(...) internally, which
# RunnableWithFallbacks does not support. Fall back chain is therefore only
# applied in pipeline mode; in react mode we warn and use the primary only.
if settings.llm_fallback_enabled and settings.agent_mode == "react":
logger.warning(
"LLM_FALLBACK_ENABLED is set but AGENT_MODE=react; fallback chain "
"is incompatible with tool-calling and will be DISABLED for this run."
)
llm = create_llm(settings)
else:
llm = create_llm_with_fallback(settings)
embeddings = create_embeddings(settings)
embedder = Embedder(embeddings=embeddings)
vector_store = VectorStore(
path=settings.qdrant_path,
collection_name=settings.collection_name,
dimension=settings.embedding_dimension,
url=settings.qdrant_url,
)
bm25_search = BM25Search()
@asynccontextmanager
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
"""Load stored chunks from Qdrant and rebuild the BM25 index on startup."""
chunks = vector_store.get_all_chunks()
if chunks:
bm25_search.index(chunks)
logger.info("Rebuilt BM25 index with %d chunks from Qdrant", len(chunks))
else:
logger.info("No existing chunks in Qdrant; BM25 index is empty")
yield
application = FastAPI(
title="KU Doc Assistant",
description="RAG-based document assistant for University of Copenhagen.",
version="0.1.0",
lifespan=lifespan,
)
hybrid_retriever = HybridRetriever(
vector_store=vector_store,
bm25_search=bm25_search,
embedder=embedder,
dense_weight=settings.dense_weight,
bm25_weight=settings.bm25_weight,
)
reranker = Reranker(model=create_reranker(settings.reranker_model))
if settings.agent_mode == "react":
logger.info("Agent mode: Plan-and-Execute (structured multi-step agent)")
query_router: QueryRouter | PlanAndExecuteRouter = PlanAndExecuteRouter(
llm=llm,
hybrid_retriever=hybrid_retriever,
reranker=reranker,
vector_store=vector_store,
default_top_k=settings.top_k,
memory=ConversationMemory(),
token_budget_enabled=settings.token_budget_enabled,
)
else:
logger.info("Agent mode: pipeline (fixed DAG)")
intent_classifier = IntentClassifier(llm=llm, model_name=settings.generation_model)
llm_chain = llm | StrOutputParser()
query_router = QueryRouter(
intent_classifier=intent_classifier,
hybrid_retriever=hybrid_retriever,
reranker=reranker,
llm_chain=llm_chain,
translate_query=settings.translate_query,
token_budget_enabled=settings.token_budget_enabled,
)
session_store = SessionStore(db_path=os.environ.get("SESSION_DB_PATH", "./data/sessions.db"))
set_dependencies(
query_router=query_router,
ingestion_pipeline=IngestionPipeline(
strategy=_parse_strategy(settings),
chunk_size=settings.chunk_size,
chunk_overlap=settings.chunk_overlap,
embeddings=embeddings,
),
embedder=embedder,
vector_store=vector_store,
bm25_search=bm25_search,
settings=settings,
session_store=session_store,
)
application.include_router(router)
logger.info("KU Doc Assistant application created successfully")
return application
def _parse_strategy(settings: "Settings") -> "ChunkStrategy": # noqa: F821
"""Return the chunking strategy from config, defaulting to SEMANTIC.
Reads the CHUNK_STRATEGY environment variable via settings. Falls back
to SEMANTIC when the variable is unset or empty.
"""
from src.models import ChunkStrategy
raw = getattr(settings, "chunk_strategy", "semantic")
try:
return ChunkStrategy(raw)
except ValueError:
return ChunkStrategy.SEMANTIC
app: FastAPI = create_app()
|