File size: 2,861 Bytes
5e09e0a
3d90cef
5e09e0a
4f56416
5e09e0a
b93e0f2
5e09e0a
5c64c3f
b93e0f2
fdf4f16
 
5e09e0a
 
 
 
 
 
 
b93e0f2
5e09e0a
3d90cef
5e09e0a
 
 
 
 
 
7b3249e
4f56416
5e09e0a
 
 
 
1d9292c
5e09e0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ade9294
5e09e0a
 
 
 
 
 
 
 
 
1c17feb
 
 
5e09e0a
 
 
 
1c17feb
b93e0f2
5e09e0a
 
 
 
 
 
 
6690b2c
5e09e0a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
import os

key = os.environ.get('RLS')
os.environ["HUGGINGFACEHUB_API_TOKEN"] = key

import sentence_transformers
import faiss

def loading_pdf():
    return "Loading..."

def pdf_changes(pdf_doc):
    
    loader = OnlinePDFLoader(pdf_doc.name)
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024,
        chunk_overlap=64,
        separators=['\n\n', '\n', '(?=>\. )', ' ', '']
    )
    docs  = text_splitter.split_documents(pages)
    embeddings = HuggingFaceHubEmbeddings()
    db = FAISS.from_documents(docs, embeddings)
    
    llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
    global qa 
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",  retriever=db.as_retriever(search_kwargs={"k": 3}))
    return "Ready"

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response['result']
    return history

def infer(question):
    
    query = question
    result = qa({"query": query})

    return result

css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with PDF</h1>   
"""


with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        
        with gr.Column():
            pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
            langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
            load_pdf = gr.Button("Load PDF")
            Book = gr.Dropdown(label="Books", choices=["Book 1", "Book 2", "Book 3"], value=["Book 1", "Book 2", "Book 3"])
        chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
        question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
        submit_btn = gr.Button("Send message")
    #load_pdf.click(loading_pdf, None, langchain_status, queue=False)    
    Book.change(pdf_changes, inputs=[Book], outputs=[langchain_status], queue=False)
    load_pdf.click(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )
    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()