TLDR-the-TnC / app.py
deedax's picture
API Key with config.py
0f888b8
import langchain
import openai
import os
from langchain import OpenAI
import langchain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain import OpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import magic
import nltk
import time
import re
import gradio as gr
import config
OPENAI_API_KEY = config.OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
source_requests = [
'source?',
'source',
]
class Engine:
title: str = ""
qa = None
source_document = None
def setup_file(self, filepath):
nltk.download('averaged_perceptron_tagger')
try:
loader = DirectoryLoader('/', glob = filepath[1:])
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(openai_api_key = os.getenv('OPENAI_API_KEY'))
docsearch = Chroma.from_documents(texts, embeddings)
chain_type_kwargs = {
"memory": ConversationBufferMemory()
}
llm = ChatOpenAI(temperature = 0,
verbose = True)
self.qa = RetrievalQA.from_chain_type(llm = llm,
chain_type = 'stuff',
retriever=docsearch.as_retriever(),
chain_type_kwargs = chain_type_kwargs,
return_source_documents = True)
self.title = filepath
except:
raise Exception("Something went wrong when processing the txt file")
engine = Engine()
def add_text(history, text):
history = history + [(text, None)]
return history
def add_file(history, file):
engine.setup_file(file.name)
history = history + [("File succesfully uploaded. Prompt away! βœ…", None)]
return history
def bot(history, text):
if engine.qa:
response = engine.qa({'query': text})
if text.lower() in source_requests:
history[-1][1] = engine.source_document
yield history, ""
else:
history[-1][1] = ""
for info in re.split("(,|[\n\s+])", response['result']):
history[-1][1] += info
time.sleep(0.075)
engine.source_document = "\"" + response['source_documents'][0].page_content + " \""
yield history, ""
else:
history[-1][1] = "Upload a document first"
yield history, ""
with gr.Blocks(theme = gr.themes.Soft()) as demo:
chatbot = gr.Chatbot([], elem_id="chatbot", label = "TLDR the T&C").style(height = 750)
with gr.Row():
with gr.Column(scale=0.85):
txt = gr.Textbox(
show_label=False,
placeholder="Upload a T&C file (pdf or txt), then enter prompt",
).style(container=False)
with gr.Column(scale=0.15, min_width=0):
btn = gr.UploadButton(f"πŸ“„", file_types=["text", "pdf"])
txt.submit(add_text, [chatbot, txt], [chatbot]).then(
bot, [chatbot, txt], [chatbot, txt]
)
btn.upload(add_file, [chatbot, btn], [chatbot])
demo.queue().launch()