import os # Import langchain lib from langchain.llms import OpenAI from langchain.chains import RetrievalQA from langchain.vectorstores import Chroma from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.document_loaders import UnstructuredPDFLoader from langchain.chains.question_answering import load_qa_chain def load_pdf(pdf_path): loader = UnstructuredPDFLoader(pdf_path) pages = loader.load() return pages def update_openai_key(openai_key): os.environ['OPENAI_API_KEY'] = openai_key def texts_splitter(pages): text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=20) texts = text_splitter.split_documents(pages) return texts def qa_langchain(docsearch): qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff") qa = RetrievalQA(combine_documents_chain=qa_chain, retriever=docsearch.as_retriever()) return qa def main_process(pdf_path, question, openai_key): # Update OpenAI key update_openai_key(openai_key) # load PDF pages = load_pdf(pdf_path) # Text splitter texts = texts_splitter(pages) # define embeddings embeddings = OpenAIEmbeddings() # print(embeddings) docsearch = Chroma.from_documents(texts, embeddings) qa = qa_langchain(docsearch) answer = qa.run(question) return answer