Spaces:
Sleeping
Sleeping
File size: 2,515 Bytes
88fb925 4fddc0d 88fb925 4fddc0d 88fb925 4fddc0d 88fb925 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | from langchain.llms import OpenAI
from langchain.chains import AnalyzeDocumentChain
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredEmailLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.chains.question_answering import load_qa_chain
import os
from pandas_ai import csv_file,excel_file
def pdf_file(file_upload,message):
page_text = ''
loader = PyPDFLoader(file_upload)
pages = loader.load_and_split()
for page in pages:
page_text = page_text + page.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_pdf_chain.run(input_document=page_text,question=message)
return output
def email_file(file_upload,message):
email_text = ''
loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True)
emails = loader.load()
for email in emails:
email_text = email_text + email.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_email_chain.run(input_document=email_text,question=message)
return output
def docx_file(file_upload,message):
doc_text = ''
loader = Docx2txtLoader(file_upload)
documents = loader.load()
for doc in documents:
doc_text = doc_text + doc.page_content
os.environ.get('OPENAI_API_KEY')
llm = OpenAI(temperature=0)
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
output = qa_document_chain.run(input_document=doc_text,question=message)
return output
def initiate_process(file_upload,extention,message):
# try:
if extention=='pdf':
output = pdf_file(file_upload,message)
elif extention=='eml':
output = email_file(file_upload,message)
elif extention=='docx':
output = docx_file(file_upload,message)
elif extention=='csv':
output = csv_file(file_upload,message)
elif extention=='xlsx':
output = excel_file(file_upload,message)
else:
output = "Please upload correct file format"
return output
# except Exception as e:
# output = str(e)
# return output |