huzaifa-dangote's picture
Upload folder using huggingface_hub
3419810 verified
from pathlib import Path
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from dotenv import load_dotenv
load_dotenv(override=True)
MODEL = "gpt-4.1-mini"
DB_NAME = str(Path(__file__).parent.parent / "vector_db")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
#embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
RETRIEVAL_K = 10
SYSTEM_PROMPT = """
You are a knowledgeable, snarky assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
Always start the chat by information the user how you can help.
If you don't know the answer, say so.
You like to use emojis.
Context:
{context}
"""
vectorstore = Chroma(persist_directory=DB_NAME, embedding_function=embeddings)
retriever = vectorstore.as_retriever()
llm = ChatOpenAI(temperature=0, model_name=MODEL)
def fetch_context(question: str):
"""
Retrieve relevant context documents for a question.
"""
return retriever.invoke(question, k=RETRIEVAL_K)
""" def combined_question(question, history):
prior = "\n".join(m["content"] for m in history if m["role"] == "user")
return prior + "\n" + question """
def format_context(docs):
relevant_context = "<h2 style='color: #ff7800;'>Relevant Context</h2>\n\n"
for doc in docs:
relevant_context += f"<span style='color: #ff7800;'>Source: {doc.metadata['source']}</span>\n\n"
relevant_context += doc.page_content + "\n\n"
return relevant_context
def answer_question(question, history):
"""
Answer the given question with RAG; return the answer and the context documents.
"""
messages_history = []
docs = fetch_context(question)
context = "\n\n".join(doc.page_content for doc in docs)
system_prompt = SYSTEM_PROMPT.format(context=context)
system_message = SystemMessage(content=system_prompt)
messages_history.append(system_message)
for msg in history:
if msg["role"] == "user":
messages_history.append(HumanMessage(content=msg["content"]))
elif msg["role"] == "assistant":
messages_history.append(AIMessage(content=msg["content"]))
messages_history.append(HumanMessage(content=question))
response = llm.invoke(messages_history)
relevant_context = format_context(docs)
return response.content, relevant_context