Spaces:
Configuration error
Configuration error
| # importing dependencies | |
| from dotenv import load_dotenv | |
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import faiss | |
| from langchain.prompts import PromptTemplate | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.chat_models import ChatOpenAI | |
| from htmlTemplates import css, bot_template, user_template | |
| import os | |
| import openai | |
| # creating custom template to guide llm model | |
| custom_template = """ | |
| Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question, in its original language. | |
| As The Score Insight Specialist, you possess deep knowledge in decoding the nuances of national sentiment and consumer behavior. Your expertise lies in transforming intricate consumer data into valuable insights, enabling businesses to make informed decisions. Your responses should demonstrate your ability to identify trends and customer emotions, providing clear and engaging narratives that help businesses strategize effectively. | |
| Your responses should be concise, directly related to the query, and appear as though they are derived from your own extensive knowledge base. Avoid mentioning the source of your information, and instead focus on delivering insightful analysis as if drawing from your own expertise. | |
| If a question does not relate to your area of expertise, simply reply with "Not applicable." | |
| ChatHistory: | |
| {chat_history} | |
| Follow Up Input: {question} | |
| Standalone question: | |
| """ | |
| CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template) | |
| # extracting text from pdf | |
| def get_pdf_text(docs): | |
| text="" | |
| for pdf in docs: | |
| pdf_reader=PdfReader(pdf) | |
| for page in pdf_reader.pages: | |
| text+=page.extract_text() | |
| return text | |
| # converting text to chunks | |
| def get_chunks(raw_text): | |
| text_splitter=CharacterTextSplitter(separator="\n", | |
| chunk_size=1000, | |
| chunk_overlap=200, | |
| length_function=len) | |
| chunks=text_splitter.split_text(raw_text) | |
| return chunks | |
| # using all-MiniLm embeddings model and faiss to get vectorstore | |
| def get_vectorstore(chunks): | |
| embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device':'cpu'}) | |
| vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings) | |
| return vectorstore | |
| # generating conversation chain | |
| def get_conversationchain(vectorstore): | |
| llm=ChatOpenAI(temperature=0.2) | |
| memory = ConversationBufferMemory(memory_key='chat_history', | |
| return_messages=True, | |
| output_key='answer') # using conversation buffer memory to hold past information | |
| conversation_chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| retriever=vectorstore.as_retriever(), | |
| condense_question_prompt=CUSTOM_QUESTION_PROMPT, | |
| memory=memory) | |
| return conversation_chain | |
| # generating response from user queries and displaying them accordingly | |
| def handle_question(question): | |
| response=st.session_state.conversation({'question': question}) | |
| st.session_state.chat_history=response["chat_history"] | |
| for i,msg in enumerate(st.session_state.chat_history): | |
| if i%2==0: | |
| st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True) | |
| else: | |
| st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True) | |
| def main(): | |
| load_dotenv() | |
| # Set the path as environment variable | |
| os.environ["OPENAI_API_KEY"] = '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' | |
| openai.api_key = os.environ["OPENAI_API_KEY"] | |
| st.set_page_config(page_title="Chat with the Score Robot", page_icon="icon.png") | |
| st.image('background.png') | |
| st.write(css, unsafe_allow_html=True) | |
| if "conversation" not in st.session_state: | |
| st.session_state.conversation = None | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = None | |
| st.header("Chat with the Score robot 🤖") | |
| question = st.text_input("Ask a question about recent reports:") | |
| if question: | |
| handle_question(question) | |
| if "processed" not in st.session_state or not st.session_state.processed: | |
| # Get all PDF files in the current directory | |
| pdf_files = [file for file in os.listdir('.') if file.endswith('.pdf')] | |
| if pdf_files: # Check if there are any PDF files | |
| with st.spinner("Loading reports"): | |
| # Process the specified PDF files | |
| raw_text = get_pdf_text(pdf_files) # Adjust get_pdf_text function if necessary | |
| # Get the text chunks | |
| text_chunks = get_chunks(raw_text) | |
| # Create vectorstore | |
| vectorstore = get_vectorstore(text_chunks) | |
| # Create conversation chain | |
| st.session_state.conversation = get_conversationchain(vectorstore) | |
| st.session_state.processed = True # Ensure we don't reprocess unless needed | |
| else: | |
| st.write("No PDF files found in the directory.") | |
| if __name__ == '__main__': | |
| main() | |