ScoreChat / app.py
bart-bilski's picture
Rename app3.py to app.py
af676a5 verified
# importing dependencies
from dotenv import load_dotenv
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import faiss
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from htmlTemplates import css, bot_template, user_template
import os
import openai
# creating custom template to guide llm model
custom_template = """
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question, in its original language.
As The Score Insight Specialist, you possess deep knowledge in decoding the nuances of national sentiment and consumer behavior. Your expertise lies in transforming intricate consumer data into valuable insights, enabling businesses to make informed decisions. Your responses should demonstrate your ability to identify trends and customer emotions, providing clear and engaging narratives that help businesses strategize effectively.
Your responses should be concise, directly related to the query, and appear as though they are derived from your own extensive knowledge base. Avoid mentioning the source of your information, and instead focus on delivering insightful analysis as if drawing from your own expertise.
If a question does not relate to your area of expertise, simply reply with "Not applicable."
ChatHistory:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
# extracting text from pdf
def get_pdf_text(docs):
text=""
for pdf in docs:
pdf_reader=PdfReader(pdf)
for page in pdf_reader.pages:
text+=page.extract_text()
return text
# converting text to chunks
def get_chunks(raw_text):
text_splitter=CharacterTextSplitter(separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len)
chunks=text_splitter.split_text(raw_text)
return chunks
# using all-MiniLm embeddings model and faiss to get vectorstore
def get_vectorstore(chunks):
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device':'cpu'})
vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
return vectorstore
# generating conversation chain
def get_conversationchain(vectorstore):
llm=ChatOpenAI(temperature=0.2)
memory = ConversationBufferMemory(memory_key='chat_history',
return_messages=True,
output_key='answer') # using conversation buffer memory to hold past information
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
memory=memory)
return conversation_chain
# generating response from user queries and displaying them accordingly
def handle_question(question):
response=st.session_state.conversation({'question': question})
st.session_state.chat_history=response["chat_history"]
for i,msg in enumerate(st.session_state.chat_history):
if i%2==0:
st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
else:
st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)
def main():
load_dotenv()
# Set the path as environment variable
os.environ["OPENAI_API_KEY"] = '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'
openai.api_key = os.environ["OPENAI_API_KEY"]
st.set_page_config(page_title="Chat with the Score Robot", page_icon="icon.png")
st.image('background.png')
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with the Score robot 🤖")
question = st.text_input("Ask a question about recent reports:")
if question:
handle_question(question)
if "processed" not in st.session_state or not st.session_state.processed:
# Get all PDF files in the current directory
pdf_files = [file for file in os.listdir('.') if file.endswith('.pdf')]
if pdf_files: # Check if there are any PDF files
with st.spinner("Loading reports"):
# Process the specified PDF files
raw_text = get_pdf_text(pdf_files) # Adjust get_pdf_text function if necessary
# Get the text chunks
text_chunks = get_chunks(raw_text)
# Create vectorstore
vectorstore = get_vectorstore(text_chunks)
# Create conversation chain
st.session_state.conversation = get_conversationchain(vectorstore)
st.session_state.processed = True # Ensure we don't reprocess unless needed
else:
st.write("No PDF files found in the directory.")
if __name__ == '__main__':
main()