Spaces:

1MR
/

ragopenai

Sleeping

App Files Files Community

ragopenai / app.py

1MR

Update app.py

202a39d verified about 1 year ago

raw

history blame contribute delete

8.01 kB

	import streamlit as st
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chat_models import ChatOpenAI
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from huggingface_hub import InferenceClient
	import tempfile
	import os
	from langchain_community.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
	from htmlTemplates import css, bot_template, user_template


	def get_pdf_text(pdf_docs):
	temp_dir = tempfile.TemporaryDirectory()
	temp_filepath = os.path.join(temp_dir.name, pdf_docs.name)
	with open(temp_filepath, "wb") as f:
	f.write(pdf_docs.getvalue())
	pdf_loader = PyPDFLoader(temp_filepath)
	pdf_doc = pdf_loader.load()
	return pdf_doc


	def get_text_file(text_docs):
	temp_dir = tempfile.TemporaryDirectory()
	temp_filepath = os.path.join(temp_dir.name, text_docs.name)
	with open(temp_filepath, "wb") as f:
	f.write(text_docs.getvalue())
	text_loader = TextLoader(temp_filepath)
	text_doc = text_loader.load()
	return text_doc


	def get_csv_file(csv_docs):
	temp_dir = tempfile.TemporaryDirectory()
	temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
	with open(temp_filepath, "wb") as f:
	f.write(csv_docs.getvalue())
	csv_loader = CSVLoader(temp_filepath)
	csv_doc = csv_loader.load()
	return csv_doc


	def get_json_file(json_docs):
	temp_dir = tempfile.TemporaryDirectory()
	temp_filepath = os.path.join(temp_dir.name, json_docs.name)
	with open(temp_filepath, "wb") as f:
	f.write(json_docs.getvalue())
	json_loader = JSONLoader(temp_filepath)
	json_doc = json_loader.load()
	return json_doc


	def get_text_chunks(documents):
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=300,
	chunk_overlap=100,
	length_function=len
	)

	documents = text_splitter.split_documents(documents)
	return documents


	def get_vectorstore(text_chunks):
	embeddings = HuggingFaceEmbeddings(model_name="WhereIsAI/UAE-Large-V1")
	vectorstore = FAISS.from_documents(text_chunks, embeddings)
	return vectorstore
	#sentence-transformers/all-MiniLM-L6-v2
	#HuggingFaceH4/zephyr-7b-alpha
	#Qwen/Qwen2.5-72B-Instruct
	#mistralai/Mistral-7B-Instruct-v0.2
	def get_conversation_chain(vectorstore, tokenH):
	if not tokenH:
	raise ValueError("API token is required to initialize the HuggingFaceHub model")

	try:
	client = InferenceClient(api_key=tokenH)
	except Exception as e:
	raise ValueError(f"Error initializing HuggingFace InferenceClient: {str(e)}")

	def generate_response(messages):
	try:
	completion = client.chat.completions.create(
	model="Qwen/Qwen2.5-72B-Instruct",
	messages=messages,
	max_tokens=500
	)
	return completion.choices[0].message['content']
	except Exception as e:
	raise ValueError(f"Error generating response: {str(e)}")

	# messages = [{"role": "user", "content": user_input}, {"role": "system", "content": documents_text}]

	def conversation_chain(user_input):
	retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
	documents = retriever.get_relevant_documents(user_input)
	documents_text = "\n".join(doc.page_content for doc in documents)
	messages = [{"role": "user", "content": user_input}, {"role": "system", "content": documents_text}]
	return generate_response(messages)

	return conversation_chain


	def handle_userinput(user_question):
	# Ensure chat_history is initialized
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# Get the response from the conversation
	response = st.session_state.conversation(user_question)

	# Append the user's question and the assistant's response to chat history
	st.session_state.chat_history.append({"role": "user", "content": user_question})
	st.session_state.chat_history.append({"role": "assistant", "content": response})

	# Display the chat history
	for message in st.session_state.chat_history:
	if message["role"] == "user":
	st.write(user_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)
	# st.write(f"<div style='color: white;background-red: lightgray; padding: 0 1.5rem; border-radius: 50%;'>User: {message['content']}</div>", unsafe_allow_html=True)
	else:
	st.write(bot_template.replace("{{MSG}}", message['content']), unsafe_allow_html=True)
	# st.write(f"<div style='color: white;background-color: blue; padding: 0 1.5rem; border-radius: 50%;'>Bot: {message['content']}</div>", unsafe_allow_html=True)

	# for i, message in enumerate(st.session_state.chat_history):
	# if i % 2 == 0:
	# # Display user messages
	# st.write(user_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)
	# else:
	# # Display assistant messages
	# st.write(bot_template.replace("{{MSG}}", message["content"]), unsafe_allow_html=True)

	# for i, message in enumerate(st.session_state.chat_history):
	# if i % 2 == 0:
	# st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
	# # st.write(f"<div style='color: gray;'>User: {message['content']}</div>", unsafe_allow_html=True)
	# else:
	# st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True
	# # st.write(f"<div style='color: black;'>Bot: {message['content']}</div>", unsafe_allow_html=True)


	def main():
	st.set_page_config(page_title="Chat with multiple Files", page_icon=":books:")
	st.header("Chat with Multiple Files")
	tokenH = st.text_input("Paste your HuggingFace API Token (sk-...)")

	if not tokenH:
	st.warning("Please enter a valid HuggingFace API token.")
	return

	# Initialize session state variables
	if "conversation" not in st.session_state:
	st.session_state.conversation = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# User input for questions
	user_question = st.text_input("Ask a question about your documents:")
	if user_question:
	if st.session_state.conversation:
	handle_userinput(user_question)
	else:
	st.warning("Please upload and process files first!")

	# File uploader and processing
	docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
	if st.button("Process"):
	with st.spinner("Processing"):
	if docs:
	doc_list = []
	for file in docs:
	if file.type == 'text/plain':
	doc_list.extend(get_text_file(file))
	elif file.type in ['application/octet-stream', 'application/pdf']:
	doc_list.extend(get_pdf_text(file))
	elif file.type == 'text/csv':
	doc_list.extend(get_csv_file(file))
	elif file.type == 'application/json':
	doc_list.extend(get_json_file(file))

	# Generate text chunks
	text_chunks = get_text_chunks(doc_list)

	# Create vector store
	vectorstore = get_vectorstore(text_chunks)

	# Initialize conversation chain
	st.session_state.conversation = get_conversation_chain(vectorstore, tokenH)
	st.success("Documents processed successfully!")
	else:
	st.warning("Please upload at least one document to process.")


	if __name__ == '__main__':
	main()