Spaces:

Kokoro-Global
/

ScoreChat

Configuration error

App Files Files Community

ScoreChat / app.py

bart-bilski

Rename app3.py to app.py

af676a5 verified about 2 years ago

raw

history blame contribute delete

5.55 kB

	# importing dependencies
	from dotenv import load_dotenv
	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import faiss
	from langchain.prompts import PromptTemplate
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chat_models import ChatOpenAI
	from htmlTemplates import css, bot_template, user_template
	import os
	import openai
	# creating custom template to guide llm model
	custom_template = """
	Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question, in its original language.

	As The Score Insight Specialist, you possess deep knowledge in decoding the nuances of national sentiment and consumer behavior. Your expertise lies in transforming intricate consumer data into valuable insights, enabling businesses to make informed decisions. Your responses should demonstrate your ability to identify trends and customer emotions, providing clear and engaging narratives that help businesses strategize effectively.

	Your responses should be concise, directly related to the query, and appear as though they are derived from your own extensive knowledge base. Avoid mentioning the source of your information, and instead focus on delivering insightful analysis as if drawing from your own expertise.

	If a question does not relate to your area of expertise, simply reply with "Not applicable."

	ChatHistory:
	{chat_history}
	Follow Up Input: {question}
	Standalone question:


	"""

	CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

	# extracting text from pdf
	def get_pdf_text(docs):
	text=""
	for pdf in docs:
	pdf_reader=PdfReader(pdf)
	for page in pdf_reader.pages:
	text+=page.extract_text()
	return text

	# converting text to chunks
	def get_chunks(raw_text):
	text_splitter=CharacterTextSplitter(separator="\n",
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len)
	chunks=text_splitter.split_text(raw_text)
	return chunks

	# using all-MiniLm embeddings model and faiss to get vectorstore
	def get_vectorstore(chunks):
	embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device':'cpu'})
	vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
	return vectorstore

	# generating conversation chain
	def get_conversationchain(vectorstore):
	llm=ChatOpenAI(temperature=0.2)
	memory = ConversationBufferMemory(memory_key='chat_history',
	return_messages=True,
	output_key='answer') # using conversation buffer memory to hold past information
	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vectorstore.as_retriever(),
	condense_question_prompt=CUSTOM_QUESTION_PROMPT,
	memory=memory)
	return conversation_chain

	# generating response from user queries and displaying them accordingly
	def handle_question(question):
	response=st.session_state.conversation({'question': question})
	st.session_state.chat_history=response["chat_history"]
	for i,msg in enumerate(st.session_state.chat_history):
	if i%2==0:
	st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
	else:
	st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)



	def main():
	load_dotenv()

	# Set the path as environment variable
	os.environ["OPENAI_API_KEY"] = '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'
	openai.api_key = os.environ["OPENAI_API_KEY"]

	st.set_page_config(page_title="Chat with the Score Robot", page_icon="icon.png")
	st.image('background.png')

	st.write(css, unsafe_allow_html=True)
	if "conversation" not in st.session_state:
	st.session_state.conversation = None

	if "chat_history" not in st.session_state:
	st.session_state.chat_history = None

	st.header("Chat with the Score robot 🤖")
	question = st.text_input("Ask a question about recent reports:")
	if question:
	handle_question(question)

	if "processed" not in st.session_state or not st.session_state.processed:
	# Get all PDF files in the current directory
	pdf_files = [file for file in os.listdir('.') if file.endswith('.pdf')]

	if pdf_files: # Check if there are any PDF files
	with st.spinner("Loading reports"):
	# Process the specified PDF files
	raw_text = get_pdf_text(pdf_files) # Adjust get_pdf_text function if necessary

	# Get the text chunks
	text_chunks = get_chunks(raw_text)

	# Create vectorstore
	vectorstore = get_vectorstore(text_chunks)

	# Create conversation chain
	st.session_state.conversation = get_conversationchain(vectorstore)
	st.session_state.processed = True # Ensure we don't reprocess unless needed
	else:
	st.write("No PDF files found in the directory.")
	if __name__ == '__main__':
	main()