Contextual_ChatBot / retrievingQueryResponse.py
Pranjal Gupta
Contextual ChatBot
c7d967d
raw
history blame
4.61 kB
import chromadb
import os
from langchain_chroma import Chroma
from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT
import time
import transformers
from langchain_community.llms import CTransformers
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
client = chromadb.HttpClient("http://localhost:8000")
def using_ollama_model(retriever, query, results,conversation_history):
history_text = ""
for item in conversation_history:
if "question" in item and item["question"]:
history_text += f"User: {item['question']}\n"
if "answer" in item and item["answer"]:
history_text += f"Assistant: {item['answer']}\n"
print("<<<<<< LLM MODEL STARTED >>>>>>")
print(" ========>", history_text)
# Ensure the prompt template is well-structured
prompt_template = """
You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
Conversation History :{history}
Context:{results}
Question:{query}
"""
# Initialize the PromptTemplate
template = PromptTemplate(
input_variables=["history","results", "query"], template=prompt_template,
)
doc_texts = "\\n".join([doc.page_content for doc in results])
formatted_output = template.format(history=history_text,results=doc_texts, query=query)
print("<<<<<<<<<<< Formatted Output >>>>>>>>>>>")
print(formatted_output)
print("type of formatted output is ", type(formatted_output))
llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)
rag_chain = template | llm | StrOutputParser()
# results = retriever.invoke(query)
# doc_texts = "\\n".join([doc.page_content for doc in results])
answer = rag_chain.invoke({"history" : history_text,"results": doc_texts, "query": query})
return answer
# # Set up the RAG pipeline
# rag_pipeline = RetrievalQAWithSourcesChain.from_chain_type(
# llm=llm, chain_type="stuff", retriever=retriever
# )
#
# try:
# # # answer = rag_pipeline.run(formatted_output)
# answer = rag_pipeline.invoke(formatted_output)
# return answer
# except Exception as e:
# print(f"Error occurred during invocation: {e}")
# return None
def retrievingReponse(docId, query, conversation_history) :
model_kwargs = {"device": "mps"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs,
)
vectorDB = Chroma(
collection_name="embeddings",
embedding_function=embeddings, # Using the encode method to get embeddings
persist_directory="MM_CHROMA_DB",
)
# retriever = vectorDB.as_retriever(
# search_type="mmr",
# search_kwargs={
# "k": 6, # was 5 originally
# "lambda_mult": 1, # was 0.30 originally
# "filter": {"docId": docId}
# }
# )
retriever = vectorDB.as_retriever(
search_type="similarity",
search_kwargs={
"k": 4, # was 5 originally
# "lambda_mult": 1, # was 0.30 originally
"filter": {"docId": docId}
}
)
# retriever = vectorDB.as_retriever()
print("<<<<<<<<<<<<<<<< Retriever >>>>>>>>>>>>>>>>")
# print("d",retriever)
print("\n")
results = retriever.invoke(
query
)
unique_results = []
seen_texts = set()
for result in results:
print(result)
# If the result's content has not been seen before, process it
if result.page_content not in seen_texts:
ans = result.page_content
ans = ans.replace("\n", "") # Clean the content by removing newlines
unique_results.append(ans) # Add the cleaned answer to the results list
seen_texts.add(result.page_content) # Mark this text as seen
os.environ["TOKENIZERS_PARALLELISM"] = "false"
start = time.time()
# llm_result = using_llm_model(retriever, query, results)
llm_result = using_ollama_model(retriever, query, results, conversation_history)
end = time.time()
print("Inference Time:>>>>>>> ", end - start)
return llm_result