File size: 6,537 Bytes
290f4f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502fc4a
290f4f8
77488ae
290f4f8
f2a8094
502fc4a
 
599da55
9f34de4
290f4f8
 
 
031e23f
290f4f8
b3d9f7e
 
290f4f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
031e23f
290f4f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98338a2
502fc4a
 
9f8ee69
290f4f8
502fc4a
 
 
 
 
 
 
 
 
 
 
 
 
 
f303fb0
502fc4a
 
 
 
 
 
 
 
 
a3a3575
502fc4a
290f4f8
 
2f13a77
290f4f8
 
 
031e23f
 
 
290f4f8
 
 
 
 
592c8f6
290f4f8
031e23f
89dfad4
 
 
 
 
 
55ca354
89dfad4
 
bb2ef46
55ca354
 
 
290f4f8
 
031e23f
290f4f8
 
 
 
 
 
 
0fc054a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import gradio as gr

from langchain.document_loaders import OnlinePDFLoader

from langchain.text_splitter import CharacterTextSplitter

from langchain.llms import HuggingFaceHub

from langchain.embeddings import HuggingFaceHubEmbeddings

from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA

import os
import tempfile
import openai
import json
import re
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
import tempfile
import time

os.environ["OPENAI_API_KEY"] = os.environ['OpenApi_Key']
query1=" "
limit = 0
st = 0
paused = False
waittime = 15.0
maxLimit = 50
def loading_pdf():
    return "Loading..."

def pdf_changes(pdf_doc, prompt):
    
    #loader = OnlinePDFLoader(pdf_doc.name)
    loader = PyPDFLoader(pdf_doc.name)
    documents = loader.load()

    name_filter = "**/*.md"
    separator = "\n"
    chunk_size_limit = 1000
    max_chunk_overlap = 50

    text_splitter = CharacterTextSplitter(separator=separator, chunk_size=chunk_size_limit, chunk_overlap=max_chunk_overlap)
    split_docs = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings()
    vector_store1 = FAISS.from_documents(split_docs, embeddings)

    from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
      )


    system_template="""You are a helpful chatbot used by the user to chat with pdf documents. Only answer the questions by using information provided in the context provided to you. If there is no relavant context, tell 'Hmm, I'm not sure'."""+prompt+"""
    ----------------
    {summaries}"""

    messages = [
        SystemMessagePromptTemplate.from_template(system_template),
        HumanMessagePromptTemplate.from_template("{question}")
    ]
    prompt2 = ChatPromptTemplate.from_messages(messages)

    from langchain.chat_models import ChatOpenAI
    from langchain.chains import RetrievalQAWithSourcesChain
    global query1
    chain_type_kwargs = {"prompt": prompt2}
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=512)  # Modify model_name if you have access to GPT-4
    global chain
    chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store1.as_retriever(search_kwargs={'k': 2}),
        return_source_documents=True,
        chain_type_kwargs=chain_type_kwargs
    )
    return "Ready"

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response
    return history

def infer(question):
    global query1
    global limit
    global st
    global paused
    openai.api_key = os.environ['OpenApi_Key']
    prompt_text = question
    if paused == False:
      if prompt_text:
          query1 = query1 + "\nUser: " + prompt_text + "\nBot: "
          if limit <= maxLimit:
              result = chain(query1)
              query1 = query1 + result['answer']
              query1 = openai.ChatCompletion.create(
                                  model="gpt-3.5-turbo",
                                  messages=[
                                  {"role": "system", "content": "You are provided with chat history and latset conversation between user and bot. Summarise the history and latest conversationin minimum most tokens possible. Do not include greetings in the summary like hi, hello, etc."},
                                  {"role": "user", "content": query1},
                                      ]
                                      ) ["choices"][0]["message"]["content"].replace("'", "")
              limit += 1
              if limit == maxLimit:
                paused = True
                st = time.time()
              return result['answer']
    else:
      if time.time() - st > waittime:
        paused = False
        limit = 0
        return infer(question)
      else:
        return "Usage Limit reached :( Please visit https://askedith.gumroad.com/l/app to unlock unlimited access!"


css="""
#col-container { margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center; max-width: 700px;">
    <h1 style="color: #4545FF;">Chat with PDF</h1>
    <p style="text-align: center; color: #4545FF;">Upload a .PDF from your computer, click the "Load PDF" button, <br />
    when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""


with gr.Blocks(css=css,theme = gr.themes.Soft()) as demo:
    with gr.Column(elem_id="col-container"):
        #gr.HTML(title)
        with gr.Row():
          with gr.Column(scale=1):
              pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
              prompt = gr.Textbox(label="Behaviour Prompt (optional)", placeholder="Reply to all questions as a rap / Reply to all questions in Hindi etc. ")
              #repo_id = gr.Dropdown(label="LLM", choices=["google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz"], value="google/flan-ul2")
              with gr.Row():
                  langchain_status = gr.Textbox(label="Status", placeholder="Waiting for PDF", interactive=False,show_label=False)
                  load_pdf = gr.Button("Load pdf")
          with gr.Column(scale=2):
            chatbot = gr.Chatbot([], elem_id="chatbot",show_label=False,show_share_button=False).style(height=750)
            with gr.Row():
              question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ",scale=6,show_label=False)
              submit_btn = gr.Button("Send",scale=1)
    #load_pdf.click(loading_pdf, None, langchain_status, queue=False)    
    #repo_id.change(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
    load_pdf.click(pdf_changes, inputs=[pdf_doc,prompt], outputs=[langchain_status], queue=False)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )
    submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch(debug=True)