File size: 1,738 Bytes
9f031f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""

chatbot.py — standalone RAG chain helpers for 9jaLingo FAQ chatbot.

"""

from __future__ import annotations

import os
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel
from langchain_ollama import ChatOllama

from src.ingest import get_or_build_vectorstore


LLM_MODEL = os.getenv("LLM_MODEL", "hf.co/LiquidAI/LFM2-1.2B-RAG-GGUF:Q5_K_M")

SYSTEM_PROMPT = """You are a friendly and knowledgeable support assistant for 9jaLingo,

a voice AI platform for African language speech products.



Answer the user's question using ONLY the context provided below.

If the context does not contain enough information to answer, say so politely

and suggest the user visit https://www.9jalingo.org or contact support.



Context:

{context}

"""


def _format_docs(docs) -> str:  # type: ignore[type-arg]
    return "\n\n".join(doc.page_content for doc in docs)


def build_rag_chain(k: int = 4):
    vectorstore = get_or_build_vectorstore()
    retriever = vectorstore.as_retriever(search_kwargs={"k": k})

    llm = ChatOllama(model=LLM_MODEL, temperature=0.2)
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", SYSTEM_PROMPT),
            ("human", "{question}"),
        ]
    )

    setup = RunnableParallel(
        context=itemgetter("question") | retriever | _format_docs,
        question=itemgetter("question"),
    )

    return setup | prompt | llm | StrOutputParser()


def stream_rag_chain(question: str, k: int = 4):
    chain = build_rag_chain(k=k)
    yield from chain.stream({"question": question})