Spaces:
Sleeping
Sleeping
| from langchain_openai import ChatOpenAI | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain_community.chat_message_histories import ChatMessageHistory | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.document_loaders import BaseLoader | |
| from langchain_core.documents import Document | |
| import streamlit as st | |
| import os | |
| from io import BytesIO | |
| import pdfplumber | |
| class InMemoryPDFLoader(BaseLoader): | |
| def __init__(self, file_bytes: bytes): | |
| self.file_bytes = file_bytes | |
| def load(self): | |
| pdf_stream = BytesIO(self.file_bytes) | |
| with pdfplumber.open(pdf_stream) as pdf: | |
| text = "" | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return [Document(page_content=text)] | |
| # Access the OpenAI API key from the environment | |
| open_ai_key = os.getenv("OPENAI_API_KEY") | |
| llm = ChatOpenAI(api_key=open_ai_key) | |
| template = """Use the following pieces of information to answer the user's question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context: {context} | |
| Question: {question} | |
| Only return the helpful answer below and nothing else. | |
| Helpful answer: | |
| """ | |
| prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
| pdf_file = st.file_uploader("Upload your PDF", type="pdf") | |
| question = st.chat_input("Ask your question") | |
| if pdf_file is not None: | |
| try: | |
| pdf_bytes = pdf_file.read() | |
| loader = InMemoryPDFLoader(file_bytes=pdf_bytes) | |
| pdf_data = loader.load() | |
| # Split the text into chunks | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| docs = text_splitter.split_documents(pdf_data) | |
| # Create a Chroma vector store | |
| embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") | |
| db = Chroma.from_documents(docs, embeddings) | |
| # Initialize message history for conversation | |
| message_history = ChatMessageHistory() | |
| # Memory for conversational context | |
| memory = ConversationBufferMemory( | |
| memory_key="chat_history", | |
| output_key="answer", | |
| chat_memory=message_history, | |
| return_messages=True, | |
| ) | |
| # Create a chain that uses the Chroma vector store | |
| chain = ConversationalRetrievalChain.from_llm( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=db.as_retriever(), | |
| memory=memory, | |
| return_source_documents=False, | |
| combine_docs_chain_kwargs={'prompt': prompt} | |
| ) | |
| if question: | |
| with st.chat_message("user"): | |
| st.markdown(question) | |
| with st.chat_message("assistant"): | |
| res = chain({"question": question}) | |
| answer = res["answer"] | |
| st.write(f"{answer}") | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |