File size: 2,454 Bytes
b67ba3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.indexes import VectorstoreIndexCreator
import os
from getpass import getpass

###### GLOBALS
docsDir = ''
qa = ''

####### FUNCTIONS
def setOpenApiKey(openApiKey):
    print("setOpenApiKey")
    try:
        OPENAI_API_KEY = openApiKey
        os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    except:
        print("Invalid key. Please try again.")
    init()
    return True

def pdfLoader(path):
    print("Startng pdfLoader")
    from langchain.document_loaders import PyPDFLoader
    from langchain.text_splitter import CharacterTextSplitter
    from langchain.embeddings import OpenAIEmbeddings
    from langchain.vectorstores import Chroma
    loader = PyPDFLoader(path)
    documents = loader.load_and_split()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()
    db = Chroma.from_documents(texts, embeddings)
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
    qa = RetrievalQA.from_chain_type(
        llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
    return qa

def directoryLoader(path):
    print("directoryLoader")
    # Documentation : https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/directory_loader.html
    loader = DirectoryLoader(path)
    documents = loader.load()
    from langchain.text_splitter import CharacterTextSplitter
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    from langchain.embeddings import OpenAIEmbeddings
    embeddings = OpenAIEmbeddings()
    from langchain.vectorstores import Chroma
    db = Chroma.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)
    return qa

def init():
    print("init")
    global docsDir
    docsDir = 'docs/'
    global qa
    qa = directoryLoader(docsDir)
    #qa = pdfLoader("pdf/test.pdf")

def askQuestion(query):
    # TXT
    returnString = qa.run(query)
    # PDF
    # output = qa({"query": query})
    # returnString = str(output['result'])
    return returnString