File size: 3,008 Bytes
e37b348
 
 
 
 
 
 
 
 
 
 
 
 
 
342c06d
e37b348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870c233
 
e37b348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870c233
 
e37b348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870c233
e37b348
 
 
 
 
 
 
 
 
870c233
cb655fd
870c233
e37b348
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import json
import streamlit as st
from tempfile import NamedTemporaryFile

from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader,PyPDFLoader
from langchain_community.docstore.document import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI,OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableParallel,RunnablePassthrough

key1 = os.environ.get("OPENAI_API_KEY")

# load and split the documents using pypdf and text splitters
def load_and_split_document(uploaded_file):
    """Loads and splits the document into pages."""

    bytes_data = uploaded_file.read()
    with NamedTemporaryFile(delete=False) as tmp: 
        tmp.write(bytes_data)                      
        data = PyPDFLoader(tmp.name).load_and_split()
    os.remove(tmp.name)
    return data  
    
    

#create embeddings and store them in a database
def embeddings(key):
    return OpenAIEmbeddings(openai_api_key = key)

def create_database(data,embeddings):

    db = FAISS.from_documents(data,embeddings)
    db.save_local("faiss_database")

# create the prompt,model and chain using langchain 

prompt = """
Answer the question based only on the following context:
{context}

Question: {question}
"""

def get_model(key,model_name = "gpt-3.5-turbo"):
    chat_model = ChatOpenAI(openai_api_key = key,model_name=model_name)
    return chat_model

def response(database,model,question):
    
    prompt_val = ChatPromptTemplate.from_template(prompt)
    retreiver = database.as_retriever()
    parser = StrOutputParser()
    chain = (
        {'context': retreiver,'question':RunnablePassthrough()}
        | prompt_val
        | model
        | parser
    )
    ans = chain.invoke(question)
    return ans

def main():
    st.set_page_config("Chat Q&A")
    st.header("Chat with your pdf documents")

    uploaded_file = st.file_uploader("Upload your document", type=["pdf"])
    
    if uploaded_file:

        
        st.header("update vector store")

        if st.button("update vectors"):
            with st.spinner("processing..."):
                data = load_and_split_document(uploaded_file)
                embed = embeddings(key1)
                create_database(data,embed)
                st.success("done")
                    
        question = st.text_input("Ask question about your documents")

        if question:

            if st.button("OpenAI result"):
                with st.spinner("processing..."):
                    embed = embeddings(key1)
                    database = FAISS.load_local("faiss_database",embed,allow_dangerous_deserialization=True)
                    model = get_model(key1)
                    st.write(response(database,model,question))
                    st.success("done")

if __name__=="__main__":
    main()