document_qna / function.py
ssumukh17's picture
Update function.py
4fddc0d
from langchain.llms import OpenAI
from langchain.chains import AnalyzeDocumentChain
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredEmailLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.chains.question_answering import load_qa_chain
import os
from pandas_ai import csv_file,excel_file
def pdf_file(file_upload,message):
page_text = ''
loader = PyPDFLoader(file_upload)
pages = loader.load_and_split()
for page in pages:
page_text = page_text + page.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_pdf_chain.run(input_document=page_text,question=message)
return output
def email_file(file_upload,message):
email_text = ''
loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True)
emails = loader.load()
for email in emails:
email_text = email_text + email.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_email_chain.run(input_document=email_text,question=message)
return output
def docx_file(file_upload,message):
doc_text = ''
loader = Docx2txtLoader(file_upload)
documents = loader.load()
for doc in documents:
doc_text = doc_text + doc.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_document_chain.run(input_document=doc_text,question=message)
return output
def initiate_process(file_upload,extention,message):
# try:
if extention=='pdf':
output = pdf_file(file_upload,message)
elif extention=='eml':
output = email_file(file_upload,message)
elif extention=='docx':
output = docx_file(file_upload,message)
elif extention=='csv':
output = csv_file(file_upload,message)
elif extention=='xlsx':
output = excel_file(file_upload,message)
else:
output = "Please upload correct file format"
return output
# except Exception as e:
# output = str(e)
# return output