Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| # from langchain.document_loaders import PyPDFLoader | |
| from PyPDF2 import PdfReader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chat_models import ChatOllama | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.chat_models import ChatOllama | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.schema.runnable import RunnablePassthrough | |
| from langchain.schema.output_parser import StrOutputParser | |
| import os | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| def pdf_read(pdf_doc): | |
| st.write("Loading PDF") | |
| text = "" | |
| for pdf in pdf_doc: | |
| pdf_reader = PdfReader(pdf) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| def get_chunks(text): | |
| st.write("Chunking") | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = text_splitter.split_text(text) | |
| return chunks | |
| def vector_store(text_chunks): | |
| st.write("In vector Store") | |
| vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) | |
| vector_store.save_local("faiss_db") | |
| return vector_store | |
| def get_conversational_chain(retriever, user_question): | |
| with st.spinner("Calling LLM"): | |
| llm = ChatOllama(model_name="llama2", temperature=0) | |
| template = """You are an assistant for question-answering tasks for Retrieval Augmented Generation system for the financial reports such as 10Q and 10K. | |
| Use the following pieces of retrieved context to answer the question. | |
| If you don't know the answer, just say that you don't know. | |
| Use two sentences maximum and keep the answer concise. | |
| Question: {question} | |
| Context: {context} | |
| Answer: | |
| """ | |
| prompt = ChatPromptTemplate.from_template(template) | |
| # Setup RAG pipeline | |
| conversation_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| st.write("Creating chain") | |
| answer = conversation_chain.invoke(user_question) | |
| st.write(answer) | |
| st.write("Answer: ", answer) | |
| return answer | |
| def user_input(user_question): | |
| with st.spinner("Searching the query"): | |
| if not os.path.exists("faiss_db/index.faiss"): | |
| st.error("FAISS index not found. Please upload and process a PDF file first.") | |
| return | |
| new_db = FAISS.load_local("faiss_db", embeddings, allow_dangerous_deserialization=True) | |
| retriever = new_db.as_retriever() | |
| get_conversational_chain(retriever, user_question) | |
| def main(): | |
| st.set_page_config(page_title="Chat PDF") | |
| st.header("RAG based Chat with PDF") | |
| user_question = st.text_input("Ask a Question from the PDF Files") | |
| if user_question: | |
| with st.chat_message("user"): | |
| st.write(user_question) | |
| user_input(user_question) | |
| with st.sidebar: | |
| st.title("Menu:") | |
| pdf_doc = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True) | |
| if st.button("Submit & Process"): | |
| if pdf_doc: | |
| with st.spinner("Processing..."): | |
| st.write("Reading the doc") | |
| docs = pdf_read(pdf_doc) | |
| st.write("Chunking the doc") | |
| text_chunks = get_chunks(docs) | |
| st.write("Storing the doc") | |
| vector_store(text_chunks) | |
| st.success("Done processing the PDF") | |
| else: | |
| st.error("Please upload a PDF file.") | |
| if __name__ == "__main__": | |
| main() | |