File size: 6,537 Bytes
290f4f8 502fc4a 290f4f8 77488ae 290f4f8 f2a8094 502fc4a 599da55 9f34de4 290f4f8 031e23f 290f4f8 b3d9f7e 290f4f8 031e23f 290f4f8 98338a2 502fc4a 9f8ee69 290f4f8 502fc4a f303fb0 502fc4a a3a3575 502fc4a 290f4f8 2f13a77 290f4f8 031e23f 290f4f8 592c8f6 290f4f8 031e23f 89dfad4 55ca354 89dfad4 bb2ef46 55ca354 290f4f8 031e23f 290f4f8 0fc054a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import gradio as gr
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import os
import tempfile
import openai
import json
import re
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
import tempfile
import time
os.environ["OPENAI_API_KEY"] = os.environ['OpenApi_Key']
query1=" "
limit = 0
st = 0
paused = False
waittime = 15.0
maxLimit = 50
def loading_pdf():
return "Loading..."
def pdf_changes(pdf_doc, prompt):
#loader = OnlinePDFLoader(pdf_doc.name)
loader = PyPDFLoader(pdf_doc.name)
documents = loader.load()
name_filter = "**/*.md"
separator = "\n"
chunk_size_limit = 1000
max_chunk_overlap = 50
text_splitter = CharacterTextSplitter(separator=separator, chunk_size=chunk_size_limit, chunk_overlap=max_chunk_overlap)
split_docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vector_store1 = FAISS.from_documents(split_docs, embeddings)
from langchain.prompts import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
system_template="""You are a helpful chatbot used by the user to chat with pdf documents. Only answer the questions by using information provided in the context provided to you. If there is no relavant context, tell 'Hmm, I'm not sure'."""+prompt+"""
----------------
{summaries}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt2 = ChatPromptTemplate.from_messages(messages)
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
global query1
chain_type_kwargs = {"prompt": prompt2}
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, max_tokens=512) # Modify model_name if you have access to GPT-4
global chain
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store1.as_retriever(search_kwargs={'k': 2}),
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs
)
return "Ready"
def add_text(history, text):
history = history + [(text, None)]
return history, ""
def bot(history):
response = infer(history[-1][0])
history[-1][1] = response
return history
def infer(question):
global query1
global limit
global st
global paused
openai.api_key = os.environ['OpenApi_Key']
prompt_text = question
if paused == False:
if prompt_text:
query1 = query1 + "\nUser: " + prompt_text + "\nBot: "
if limit <= maxLimit:
result = chain(query1)
query1 = query1 + result['answer']
query1 = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are provided with chat history and latset conversation between user and bot. Summarise the history and latest conversationin minimum most tokens possible. Do not include greetings in the summary like hi, hello, etc."},
{"role": "user", "content": query1},
]
) ["choices"][0]["message"]["content"].replace("'", "")
limit += 1
if limit == maxLimit:
paused = True
st = time.time()
return result['answer']
else:
if time.time() - st > waittime:
paused = False
limit = 0
return infer(question)
else:
return "Usage Limit reached :( Please visit https://askedith.gumroad.com/l/app to unlock unlimited access!"
css="""
#col-container { margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center; max-width: 700px;">
<h1 style="color: #4545FF;">Chat with PDF</h1>
<p style="text-align: center; color: #4545FF;">Upload a .PDF from your computer, click the "Load PDF" button, <br />
when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""
with gr.Blocks(css=css,theme = gr.themes.Soft()) as demo:
with gr.Column(elem_id="col-container"):
#gr.HTML(title)
with gr.Row():
with gr.Column(scale=1):
pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
prompt = gr.Textbox(label="Behaviour Prompt (optional)", placeholder="Reply to all questions as a rap / Reply to all questions in Hindi etc. ")
#repo_id = gr.Dropdown(label="LLM", choices=["google/flan-ul2", "OpenAssistant/oasst-sft-1-pythia-12b", "bigscience/bloomz"], value="google/flan-ul2")
with gr.Row():
langchain_status = gr.Textbox(label="Status", placeholder="Waiting for PDF", interactive=False,show_label=False)
load_pdf = gr.Button("Load pdf")
with gr.Column(scale=2):
chatbot = gr.Chatbot([], elem_id="chatbot",show_label=False,show_share_button=False).style(height=750)
with gr.Row():
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ",scale=6,show_label=False)
submit_btn = gr.Button("Send",scale=1)
#load_pdf.click(loading_pdf, None, langchain_status, queue=False)
#repo_id.change(pdf_changes, inputs=[pdf_doc], outputs=[langchain_status], queue=False)
load_pdf.click(pdf_changes, inputs=[pdf_doc,prompt], outputs=[langchain_status], queue=False)
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
submit_btn.click(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
demo.launch(debug=True) |