File size: 2,515 Bytes
88fb925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fddc0d
88fb925
 
 
 
 
 
 
 
 
 
 
 
4fddc0d
88fb925
 
 
 
 
 
 
 
 
 
 
 
4fddc0d
88fb925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from langchain.llms import OpenAI
from langchain.chains import AnalyzeDocumentChain
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredEmailLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.chains.question_answering import load_qa_chain
import os
from pandas_ai import csv_file,excel_file


def pdf_file(file_upload,message):
    page_text = ''
    loader = PyPDFLoader(file_upload)
    pages = loader.load_and_split()
    for page in pages:
        page_text = page_text + page.page_content
    os.environ.get('OPENAI_API_KEY')
    llm = OpenAI(temperature=0)
    qa_chain = load_qa_chain(llm, chain_type="map_reduce")
    qa_pdf_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
    output = qa_pdf_chain.run(input_document=page_text,question=message)
    return output

def email_file(file_upload,message):
    email_text = ''
    loader = UnstructuredEmailLoader(file_upload,mode="elements",process_attachments=True)
    emails = loader.load()
    for email in emails:
        email_text = email_text + email.page_content
    os.environ.get('OPENAI_API_KEY')
    llm = OpenAI(temperature=0)
    qa_chain = load_qa_chain(llm, chain_type="map_reduce")
    qa_email_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
    output = qa_email_chain.run(input_document=email_text,question=message)
    return output

def docx_file(file_upload,message):
    doc_text = ''
    loader = Docx2txtLoader(file_upload)
    documents = loader.load()
    for doc in documents:
        doc_text = doc_text + doc.page_content
    os.environ.get('OPENAI_API_KEY')
    llm = OpenAI(temperature=0)
    qa_chain = load_qa_chain(llm, chain_type="map_reduce")
    qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
    output = qa_document_chain.run(input_document=doc_text,question=message)
    return output
    
def initiate_process(file_upload,extention,message):
    # try:            
    if extention=='pdf':
        output = pdf_file(file_upload,message)
    elif extention=='eml':
        output = email_file(file_upload,message)
    elif extention=='docx':
        output = docx_file(file_upload,message)
    elif extention=='csv':
        output = csv_file(file_upload,message)
    elif extention=='xlsx':
        output = excel_file(file_upload,message)
    else:
        output = "Please upload correct file format"
    return output
    # except Exception as e:
        # output = str(e)
        # return output