File size: 4,612 Bytes
c7d967d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import chromadb
import os
from langchain_chroma import Chroma
from chromadb.config import  DEFAULT_DATABASE, DEFAULT_TENANT
import time
import transformers
from langchain_community.llms import CTransformers
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama









client = chromadb.HttpClient("http://localhost:8000")


def using_ollama_model(retriever, query, results,conversation_history):

    history_text = ""
    for item in conversation_history:
        if "question" in item and item["question"]:
            history_text += f"User: {item['question']}\n"
        if "answer" in item and item["answer"]:
            history_text += f"Assistant: {item['answer']}\n"

    print("<<<<<< LLM MODEL STARTED >>>>>>")
    print(" ========>", history_text)
    # Ensure the prompt template is well-structured
    prompt_template = """
    You are a helpful assistant. Answer the following question using the provided context and previous conversation history.
    If the context does not contain the answer, only then reply with: "Sorry, I don't have enough information."
    Conversation History :{history} 
    Context:{results}
    Question:{query}
    """

    # Initialize the PromptTemplate

    template = PromptTemplate(
        input_variables=["history","results", "query"], template=prompt_template,
    )

    doc_texts = "\\n".join([doc.page_content for doc in results])

    formatted_output = template.format(history=history_text,results=doc_texts, query=query)

    print("<<<<<<<<<<< Formatted Output >>>>>>>>>>>")
    print(formatted_output)
    print("type of formatted output is ", type(formatted_output))


    llm = ChatOllama(model="llama3.2", temperature=0.4, num_predict=512)

    rag_chain = template | llm | StrOutputParser()

    # results = retriever.invoke(query)
    # doc_texts = "\\n".join([doc.page_content for doc in results])

    answer = rag_chain.invoke({"history" : history_text,"results": doc_texts, "query": query})

    return answer

    # # Set up the RAG pipeline
    # rag_pipeline = RetrievalQAWithSourcesChain.from_chain_type(
    #     llm=llm, chain_type="stuff", retriever=retriever
    # )
    #
    # try:
    # #     # answer = rag_pipeline.run(formatted_output)
    #     answer = rag_pipeline.invoke(formatted_output)
    #     return answer
    # except Exception as e:
        # print(f"Error occurred during invocation: {e}")
        # return None






def retrievingReponse(docId, query, conversation_history) :
    
    model_kwargs = {"device": "mps"}
    encode_kwargs = {"normalize_embeddings": True}
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-distilroberta-base-v1",
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )

    vectorDB = Chroma(
        collection_name="embeddings",
        embedding_function=embeddings,  # Using the encode method to get embeddings
        persist_directory="MM_CHROMA_DB",
    )

    # retriever = vectorDB.as_retriever(
    # search_type="mmr",
    # search_kwargs={
    #     "k": 6, # was 5 originally
    #     "lambda_mult": 1, # was 0.30 originally
    #     "filter": {"docId": docId}
    # }
    # )
    retriever = vectorDB.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 4, # was 5 originally
        # "lambda_mult": 1, # was 0.30 originally
        "filter": {"docId": docId}
    }
    )

      # retriever = vectorDB.as_retriever()
    print("<<<<<<<<<<<<<<<< Retriever >>>>>>>>>>>>>>>>")
    # print("d",retriever)
    print("\n")

    results = retriever.invoke(
        query
    ) 

    unique_results = []
    seen_texts = set()

    for result in results:
        print(result)
        # If the result's content has not been seen before, process it
        if result.page_content not in seen_texts:
            ans = result.page_content
            ans = ans.replace("\n", "")  # Clean the content by removing newlines
            unique_results.append(ans)  # Add the cleaned answer to the results list
            seen_texts.add(result.page_content)  # Mark this text as seen

    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    start = time.time()

    # llm_result = using_llm_model(retriever, query, results)
    llm_result = using_ollama_model(retriever, query, results, conversation_history)
    end = time.time()
    print("Inference Time:>>>>>>> ", end - start)
    return llm_result