from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from langchain.retrievers.document_compressors import DocumentCompressorPipeline from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain.retrievers.document_compressors import EmbeddingsFilter #from langchain_text_splitters import CharacterTextSplitter from langchain.retrievers import ContextualCompressionRetriever from langchain_groq import ChatGroq #from langchain.document_loaders import HuggingFaceDatasetLoader # from langchain_community.document_loaders import UnstructuredExcelLoader # from langchain.document_loaders import CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS # from transformers import AutoTokenizer, AutoModelForQuestionAnswering # from transformers import AutoTokenizer, pipeline # from langchain import HuggingFacePipeline import re import os import streamlit as st import requests # Define the path to the pre-trained model you want to use modelPath = "sentence-transformers/all-MiniLM-l6-v2" # Create a dictionary with model configuration options, specifying to use the CPU for computations model_kwargs = {'device': 'cpu'} # Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False encode_kwargs = {'normalize_embeddings': False} # Initialize an instance of HuggingFaceEmbeddings with the specified parameters embeddings = HuggingFaceEmbeddings( model_name=modelPath, # Provide the pre-trained model's path model_kwargs=model_kwargs, # Pass the model configuration options encode_kwargs=encode_kwargs # Pass the encoding options ) # Initialize the HuggingFaceEmbeddings model_path = "sentence-transformers/all-MiniLM-l6-v2" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} embeddings = HuggingFaceEmbeddings( model_name=model_path, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) # Load the FAISS index db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) retriever = db.as_retriever(search_kwargs={"k": 2}) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings) relevant_filter = EmbeddingsFilter(embeddings=embeddings) pipeline_compressor = DocumentCompressorPipeline(transformers=[text_splitter, redundant_filter, relevant_filter]) compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever) chat = ChatGroq(temperature=0, groq_api_key="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", model_name="llama3-70b-8192") rag_template_str = (""" Answer the following query based on the context given. Stylization: 1)Do not include or reference quoted content verbatim in the answer. Don't say "According to context provided" 2)Include the source URLs 3)Include the Category it belongs to Formatting: 1)Use bullet points Restriction: 1)Only use context to answer the question 2)If you don't know the answer,reply with "No answer found, you can contact us on https://www.i2econsulting.com/contact-us/" context: {context} query:{query} """) rag_prompt = ChatPromptTemplate.from_template(rag_template_str) rag_chain = rag_prompt | chat | StrOutputParser() llm = ChatGroq(groq_api_key="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", model_name="mixtral-8x7b-32768") prompt = ChatPromptTemplate.from_template( """ Answer the questions based on the provided context only. Please provide the most accurate response based on the question {context} Questions:{input} """ ) rag_prompt = ChatPromptTemplate.from_template(rag_template_str) rag_chain = rag_prompt | chat | StrOutputParser() st.title("i2e Enterprise Chatbot") prompt = st.text_input("Ask Question") def api_py_function(query): context = compression_retriever.get_relevant_documents(query) #print(context) l = [] for documents in context[:5]: if documents.state['query_similarity_score'] > 0.1: content = documents.page_content + str(documents.metadata) l.append(content) final_context = ''.join(l) if l != []: response = rag_chain.invoke({"query": query, "context": final_context}) else: response = "No answer found, Please rephrase your question or you can contact us on https://www.i2econsulting.com/contact-us/" return response if prompt: print("processing request") full_response=api_py_function(prompt) # full_response = response.text.replace(u"\u2000", "") # full_response=response.text.replace("\\n\\n"," \\n") # full_response = full_response.replace("\\n", " \\n") st.write(full_response)