AI-Versia / qa.py
Arnic's picture
initial clean push – code only
166ec24
# qa.py
from functools import lru_cache
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os
from config import Config
# ---------- LLM import ----------
try:
from langchain_community.llms import LlamaCpp
except Exception:
from langchain.llms import LlamaCpp
# ---------- Prompt ----------
PROMPT = PromptTemplate(
template="""
You are a resume and work experience assistant. Answer concisely based on context.
Context:
{context}
Question:
{question}
Answer:
""",
input_variables=["context", "question"],
)
# ---------- LLM singleton ----------
@lru_cache(maxsize=1)
def _load_llm():
return LlamaCpp(
model_path=Config.MODEL_PATH,
#n_threads=Config.LLM_THREADS,
n_threads=min(8, os.cpu_count()),
temperature=0.2,
max_tokens=1024,
n_ctx=512,
n_batch=1024,
top_k=40,
top_p=0.90,
use_mlock= True,
verbose=False,
stop=["\n##", "Sources:"],
model_kwargs={"n_gqa": 8, "offload_kqv": True},
)
# ---------- QA chain ----------
@lru_cache(maxsize=1)
def get_qa_chain(vector_store):
llm = _load_llm()
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
return_source_documents=True,
chain_type="stuff",
chain_type_kwargs={"prompt": PROMPT},
)
return chain
# ---------- convenience helper ----------
def ask_chain(chain, question: str, return_sources=False):
"""Modern invoke-based helper."""
out = chain.invoke({"query": question})
return out if return_sources else out.get("result", "")