Spaces:

sri96
/

chatbot_app

Build error

App Files Files Community

chatbot_app / app.py

sri96

updated

29f657b verified almost 2 years ago

raw

history blame contribute delete

4.97 kB


	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain.retrievers.document_compressors import DocumentCompressorPipeline
	from langchain_community.document_transformers import EmbeddingsRedundantFilter
	from langchain.retrievers.document_compressors import EmbeddingsFilter
	#from langchain_text_splitters import CharacterTextSplitter
	from langchain.retrievers import ContextualCompressionRetriever
	from langchain_groq import ChatGroq
	#from langchain.document_loaders import HuggingFaceDatasetLoader
	# from langchain_community.document_loaders import UnstructuredExcelLoader
	# from langchain.document_loaders import CSVLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	# from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	# from transformers import AutoTokenizer, pipeline
	# from langchain import HuggingFacePipeline
	import re
	import os
	import streamlit as st
	import requests


	# Define the path to the pre-trained model you want to use
	modelPath = "sentence-transformers/all-MiniLM-l6-v2"

	# Create a dictionary with model configuration options, specifying to use the CPU for computations
	model_kwargs = {'device': 'cpu'}

	# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
	encode_kwargs = {'normalize_embeddings': False}

	# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
	embeddings = HuggingFaceEmbeddings(
	model_name=modelPath, # Provide the pre-trained model's path
	model_kwargs=model_kwargs, # Pass the model configuration options
	encode_kwargs=encode_kwargs # Pass the encoding options
	)


	# Initialize the HuggingFaceEmbeddings
	model_path = "sentence-transformers/all-MiniLM-l6-v2"
	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'normalize_embeddings': False}
	embeddings = HuggingFaceEmbeddings(
	model_name=model_path,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)

	# Load the FAISS index
	db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

	retriever = db.as_retriever(search_kwargs={"k": 2})
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
	redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
	relevant_filter = EmbeddingsFilter(embeddings=embeddings)
	pipeline_compressor = DocumentCompressorPipeline(transformers=[text_splitter, redundant_filter, relevant_filter])
	compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever)

	chat = ChatGroq(temperature=0, groq_api_key="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",
	model_name="llama3-70b-8192")

	rag_template_str = ("""
	Answer the following query based on the context given.
	Stylization:
	1)Do not include or reference quoted content verbatim in the answer. Don't say "According to context provided"
	2)Include the source URLs
	3)Include the Category it belongs to
	Formatting:
	1)Use bullet points
	Restriction:
	1)Only use context to answer the question
	2)If you don't know the answer,reply with "No answer found, you can contact us on https://www.i2econsulting.com/contact-us/"
	context: {context}
	query:{query}
	""")

	rag_prompt = ChatPromptTemplate.from_template(rag_template_str)
	rag_chain = rag_prompt \| chat \| StrOutputParser()

	llm = ChatGroq(groq_api_key="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",
	model_name="mixtral-8x7b-32768")

	prompt = ChatPromptTemplate.from_template(
	"""
	Answer the questions based on the provided context only.
	Please provide the most accurate response based on the question
	<context>
	{context}
	<context>
	Questions:{input}

	"""
	)
	rag_prompt = ChatPromptTemplate.from_template(rag_template_str)
	rag_chain = rag_prompt \| chat \| StrOutputParser()

	st.title("i2e Enterprise Chatbot")

	prompt = st.text_input("Ask Question")

	def api_py_function(query):
	context = compression_retriever.get_relevant_documents(query)
	#print(context)
	l = []
	for documents in context[:5]:
	if documents.state['query_similarity_score'] > 0.1:
	content = documents.page_content + str(documents.metadata)
	l.append(content)
	final_context = ''.join(l)
	if l != []:
	response = rag_chain.invoke({"query": query, "context": final_context})
	else:
	response = "No answer found, Please rephrase your question or you can contact us on https://www.i2econsulting.com/contact-us/"
	return response

	if prompt:
	print("processing request")
	full_response=api_py_function(prompt)
	# full_response = response.text.replace(u"\u2000", "")
	# full_response=response.text.replace("\\n\\n"," \\n")
	# full_response = full_response.replace("\\n", " \\n")
	st.write(full_response)