Spaces:
Configuration error
Configuration error
File size: 5,554 Bytes
8184c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | # importing dependencies
from dotenv import load_dotenv
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import faiss
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from htmlTemplates import css, bot_template, user_template
import os
import openai
# creating custom template to guide llm model
custom_template = """
Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question, in its original language.
As The Score Insight Specialist, you possess deep knowledge in decoding the nuances of national sentiment and consumer behavior. Your expertise lies in transforming intricate consumer data into valuable insights, enabling businesses to make informed decisions. Your responses should demonstrate your ability to identify trends and customer emotions, providing clear and engaging narratives that help businesses strategize effectively.
Your responses should be concise, directly related to the query, and appear as though they are derived from your own extensive knowledge base. Avoid mentioning the source of your information, and instead focus on delivering insightful analysis as if drawing from your own expertise.
If a question does not relate to your area of expertise, simply reply with "Not applicable."
ChatHistory:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
# extracting text from pdf
def get_pdf_text(docs):
text=""
for pdf in docs:
pdf_reader=PdfReader(pdf)
for page in pdf_reader.pages:
text+=page.extract_text()
return text
# converting text to chunks
def get_chunks(raw_text):
text_splitter=CharacterTextSplitter(separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len)
chunks=text_splitter.split_text(raw_text)
return chunks
# using all-MiniLm embeddings model and faiss to get vectorstore
def get_vectorstore(chunks):
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device':'cpu'})
vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
return vectorstore
# generating conversation chain
def get_conversationchain(vectorstore):
llm=ChatOpenAI(temperature=0.2)
memory = ConversationBufferMemory(memory_key='chat_history',
return_messages=True,
output_key='answer') # using conversation buffer memory to hold past information
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
memory=memory)
return conversation_chain
# generating response from user queries and displaying them accordingly
def handle_question(question):
response=st.session_state.conversation({'question': question})
st.session_state.chat_history=response["chat_history"]
for i,msg in enumerate(st.session_state.chat_history):
if i%2==0:
st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
else:
st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)
def main():
load_dotenv()
# Set the path as environment variable
os.environ["OPENAI_API_KEY"] = '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'
openai.api_key = os.environ["OPENAI_API_KEY"]
st.set_page_config(page_title="Chat with the Score Robot", page_icon="icon.png")
st.image('background.png')
st.write(css, unsafe_allow_html=True)
if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None
st.header("Chat with the Score robot 🤖")
question = st.text_input("Ask a question about recent reports:")
if question:
handle_question(question)
if "processed" not in st.session_state or not st.session_state.processed:
# Get all PDF files in the current directory
pdf_files = [file for file in os.listdir('.') if file.endswith('.pdf')]
if pdf_files: # Check if there are any PDF files
with st.spinner("Loading reports"):
# Process the specified PDF files
raw_text = get_pdf_text(pdf_files) # Adjust get_pdf_text function if necessary
# Get the text chunks
text_chunks = get_chunks(raw_text)
# Create vectorstore
vectorstore = get_vectorstore(text_chunks)
# Create conversation chain
st.session_state.conversation = get_conversationchain(vectorstore)
st.session_state.processed = True # Ensure we don't reprocess unless needed
else:
st.write("No PDF files found in the directory.")
if __name__ == '__main__':
main()
|