Spaces:

SanskarModi
/

atlasrag-backend

Sleeping

App Files Files Community

SanskarModi commited on Dec 20, 2025

Commit

0451125

1 Parent(s): 58611cd

added langchain optional retriever

Browse files

Files changed (5) hide show

backend/app/api/routes_chat.py +10 -10
backend/app/api/routes_chat_langchain.py +53 -0
backend/app/main.py +2 -0
backend/app/retrieval/citation_filter.py +73 -0
backend/app/retrieval/langchain_retriever.py +35 -0

backend/app/api/routes_chat.py CHANGED Viewed

@@ -2,7 +2,8 @@
 from app.core.llm import llm_chat
 from app.core.prompts import build_rag_prompt
-from app.models.api import ChatRequest, ChatResponse, Citation
 from app.retrieval.retrieve import hybrid_graph_search
 from fastapi import APIRouter
@@ -29,13 +30,12 @@ def chat(request: ChatRequest) -> ChatResponse:
     answer = llm_chat(messages=messages)
-    citations = [
-        Citation(
-            page_start=sc.chunk.page_start,
-            page_end=sc.chunk.page_end,
-            snippet=sc.chunk.text[:300],
-        )
-        for sc in results
-    ]
-    return ChatResponse(answer=answer, citations=citations)

 from app.core.llm import llm_chat
 from app.core.prompts import build_rag_prompt
+from app.models.api import ChatRequest, ChatResponse
+from app.retrieval.citation_filter import filter_citations
 from app.retrieval.retrieve import hybrid_graph_search
 from fastapi import APIRouter
     answer = llm_chat(messages=messages)
+    citations = filter_citations(
+        answer=answer,
+        chunks=results,
+    )
+    return ChatResponse(
+        answer=answer,
+        citations=citations,
+    )

backend/app/api/routes_chat_langchain.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Chat routes using LangChain retriever."""
+from app.config import settings
+from app.models.api import ChatRequest, ChatResponse
+from app.models.retrieval import ScoredChunk
+from app.retrieval.citation_filter import filter_citations
+from app.retrieval.langchain_retriever import AtlasGraphRetriever
+from fastapi import APIRouter
+from langchain.chains import RetrievalQA
+from langchain_groq import ChatGroq
+router = APIRouter()
+@router.post("/ask/langchain", response_model=ChatResponse)
+def chat_langchain(request: ChatRequest) -> ChatResponse:
+    """LangChain-powered RAG endpoint with citation filtering."""
+    retriever = AtlasGraphRetriever(top_k=request.top_k)
+    llm = ChatGroq(
+        api_key=settings.groq_api_key,
+        model=settings.default_model,
+    )
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        retriever=retriever,
+        return_source_documents=True,
+    )
+    result = qa_chain.invoke({"query": request.query})
+    answer = result["result"]
+    source_docs = result.get("source_documents", [])
+    # Convert LangChain docs → ScoredChunk
+    scored_chunks = [
+        ScoredChunk(
+            chunk=doc.metadata["chunk"],
+            score=doc.metadata["score"],
+        )
+        for doc in source_docs
+    ]
+    citations = filter_citations(
+        answer=answer,
+        chunks=scored_chunks,
+    )
+    return ChatResponse(
+        answer=answer,
+        citations=citations,
+    )

backend/app/main.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Main FastAPI application for AtlasRAG backend."""
 from app.api.routes_chat import router as chat_router
 from app.api.routes_docs import router as docs_router
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
@@ -23,3 +24,4 @@ app.add_middleware(
 # Include routers
 app.include_router(chat_router, prefix="/chat")
 app.include_router(docs_router, prefix="/docs")

 """Main FastAPI application for AtlasRAG backend."""
 from app.api.routes_chat import router as chat_router
+from app.api.routes_chat_langchain import router as chat_langchain_router
 from app.api.routes_docs import router as docs_router
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 # Include routers
 app.include_router(chat_router, prefix="/chat")
 app.include_router(docs_router, prefix="/docs")
+app.include_router(chat_langchain_router, prefix="/chat")

backend/app/retrieval/citation_filter.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Citation filtering utilities.
+Selects only the sentences from retrieved chunks that
+directly support the generated answer.
+"""
+import re
+from typing import List
+from app.models.api import Citation
+from app.models.retrieval import ScoredChunk
+from sentence_transformers import SentenceTransformer, util
+# Lightweight sentence embedder
+_SENTENCE_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
+# Conservative threshold: avoids noise
+_SIMILARITY_THRESHOLD = 0.45
+_MAX_SENTENCES_PER_CHUNK = 2
+def _split_sentences(text: str) -> List[str]:
+    """Split text into clean sentences."""
+    sentences = re.split(r"(?<=[.!?])\s+", text)
+    return [s.strip() for s in sentences if len(s.strip()) >= 20]
+def filter_citations(
+    answer: str,
+    chunks: List[ScoredChunk],
+) -> List[Citation]:
+    """Filter citations to only answer-supporting sentences."""
+    if not answer.strip():
+        return []
+    answer_embedding = _SENTENCE_MODEL.encode(answer, normalize_embeddings=True)
+    filtered: List[Citation] = []
+    for sc in chunks:
+        sentences = _split_sentences(sc.chunk.text)
+        if not sentences:
+            continue
+        sentence_embeddings = _SENTENCE_MODEL.encode(
+            sentences,
+            normalize_embeddings=True,
+        )
+        similarities = util.cos_sim(answer_embedding, sentence_embeddings)[0]
+        # Collect best supporting sentences
+        selected_sentences: List[str] = []
+        for sent, score in zip(sentences, similarities):
+            if float(score) >= _SIMILARITY_THRESHOLD:
+                selected_sentences.append(sent)
+            if len(selected_sentences) >= _MAX_SENTENCES_PER_CHUNK:
+                break
+        if not selected_sentences:
+            continue
+        filtered.append(
+            Citation(
+                page_start=sc.chunk.page_start,
+                page_end=sc.chunk.page_end,
+                snippet=" ".join(selected_sentences),
+            )
+        )
+    return filtered

backend/app/retrieval/langchain_retriever.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""LangChain retriever wrapper for AtlasRAG."""
+from typing import List
+from app.retrieval.retrieve import hybrid_graph_search
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+class AtlasGraphRetriever(BaseRetriever):
+    """LangChain-compatible retriever wrapping hybrid Graph-RAG."""
+    top_k: int = 5
+    def _get_relevant_documents(self, query: str) -> List[Document]:
+        """Retrieve documents for LangChain."""
+        results = hybrid_graph_search(query, self.top_k)
+        documents: List[Document] = []
+        for sc in results:
+            documents.append(
+                Document(
+                    page_content=sc.chunk.text,
+                    metadata={
+                        "doc_id": sc.chunk.doc_id,
+                        "page_start": sc.chunk.page_start,
+                        "page_end": sc.chunk.page_end,
+                        "chunk": sc.chunk,
+                        "score": sc.score,
+                    },
+                )
+            )
+        return documents