lchakkei
/

2

@@ -93,85 +93,42 @@ class EndpointHandler():
         loader = WebBaseLoader(urls)
         data = loader.load()
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 16)
         all_splits = text_splitter.split_documents(data)
         vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
-        retriever = vectorstore.as_retriever()
-        compressor = LLMChainExtractor.from_llm(chat)
-        compression_retriever = ContextualCompressionRetriever(
-            base_compressor=compressor, base_retriever=retriever
-        )
-        _template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
-        Chat History:
-        {chat_history}
-        Follow Up Input: {question}
-        Standalone question: [/INST]"""
-        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-        template = """[INST] Answer the question based only on the following context:
         {context}
-        Question: {question} [/INST]
-        """
-        ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
-        self.memory = ConversationBufferMemory(
-            return_messages=True, output_key="answer", input_key="question"
-        )
-        # First we add a step to load memory
-        # This adds a "memory" key to the input object
-        loaded_memory = RunnablePassthrough.assign(
-            chat_history=RunnableLambda(self.memory.load_memory_variables) | itemgetter("history"),
-        )
-        # Now we calculate the standalone question
-        standalone_question = {
-            "standalone_question": {
-                "question": lambda x: x["question"],
-                "chat_history": lambda x: get_buffer_string(x["chat_history"]),
-            }
-            | CONDENSE_QUESTION_PROMPT
             | chat
-            | StrOutputParser(),
-        }
-        DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
-        def _combine_documents(
-            docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
-        ):
-            doc_strings = [format_document(doc, document_prompt) for doc in docs]
-            return document_separator.join(doc_strings)
-        # Now we retrieve the documents
-        retrieved_documents = {
-            "docs": itemgetter("standalone_question") | retriever,
-            "question": lambda x: x["standalone_question"],
-        }
-        # Now we construct the inputs for the final prompt
-        final_inputs = {
-            "context": lambda x: _combine_documents(x["docs"]),
-            "question": itemgetter("question"),
-        }
-        # And finally, we do the part that returns the answers
-        answer = {
-            "answer": final_inputs | ANSWER_PROMPT | chat,
-            "docs": itemgetter("docs"),
-        }
-        # And now we put it all together!
-        self.final_chain = loaded_memory | standalone_question | retrieved_documents | answer
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # get inputs
         inputs = data.pop("inputs",data)
         date = data.pop("date", None)
-        result = self.final_chain.invoke({"question": inputs})
         answer = result['answer']
@@ -179,7 +136,8 @@ class EndpointHandler():
         # This will be improved in the future
         # For now you need to save it yourself
         # self.memory.save_context(inputs, {"answer": answer})
-        self.memory.load_memory_variables({})
         return answer

         loader = WebBaseLoader(urls)
         data = loader.load()
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
         all_splits = text_splitter.split_documents(data)
         vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_function)
+        retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})
+        # compressor = LLMChainExtractor.from_llm(chat)
+        # compression_retriever = ContextualCompressionRetriever(
+        #     base_compressor=compressor, base_retriever=retriever
+        # )
+        template = """Use the following pieces of context to answer the question at the end.
+        If you don't know the answer, just say that you don't know, don't try to make up an answer.
+        Use three sentences maximum and keep the answer as concise as possible.
+        Always say "thanks for asking!" at the end of the answer.
         {context}
+        Question: {question}
+        Helpful Answer:"""
+        custom_rag_prompt = PromptTemplate.from_template(template)
+        self.rag_chain = (
+            {"context": retriever | format_docs, "question": RunnablePassthrough()}
+            | custom_rag_prompt
             | chat
+            | StrOutputParser()
+        )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # get inputs
         inputs = data.pop("inputs",data)
         date = data.pop("date", None)
+        result = self.rag_chain.invoke(inputs)
         answer = result['answer']
         # This will be improved in the future
         # For now you need to save it yourself
         # self.memory.save_context(inputs, {"answer": answer})
+        #self.memory.load_memory_variables({})
         return answer