| |
| from functools import lru_cache |
| from langchain.chains import RetrievalQA |
| from langchain.prompts import PromptTemplate |
| import os |
| from config import Config |
|
|
| |
| try: |
| from langchain_community.llms import LlamaCpp |
| except Exception: |
| from langchain.llms import LlamaCpp |
|
|
| |
| PROMPT = PromptTemplate( |
| template=""" |
| You are a resume and work experience assistant. Answer concisely based on context. |
| |
| Context: |
| {context} |
| |
| Question: |
| {question} |
| |
| Answer: |
| """, |
| input_variables=["context", "question"], |
| ) |
|
|
| |
| @lru_cache(maxsize=1) |
| def _load_llm(): |
| return LlamaCpp( |
| model_path=Config.MODEL_PATH, |
| |
| n_threads=min(8, os.cpu_count()), |
| temperature=0.2, |
| max_tokens=1024, |
| n_ctx=512, |
| n_batch=1024, |
| top_k=40, |
| top_p=0.90, |
| use_mlock= True, |
| verbose=False, |
| stop=["\n##", "Sources:"], |
| model_kwargs={"n_gqa": 8, "offload_kqv": True}, |
| ) |
|
|
| |
| @lru_cache(maxsize=1) |
| def get_qa_chain(vector_store): |
| llm = _load_llm() |
| retriever = vector_store.as_retriever(search_kwargs={"k": 2}) |
| chain = RetrievalQA.from_chain_type( |
| llm=llm, |
| retriever=retriever, |
| return_source_documents=True, |
| chain_type="stuff", |
| chain_type_kwargs={"prompt": PROMPT}, |
| ) |
| return chain |
|
|
|
|
| |
| def ask_chain(chain, question: str, return_sources=False): |
| """Modern invoke-based helper.""" |
| out = chain.invoke({"query": question}) |
| return out if return_sources else out.get("result", "") |