admin-dev's picture
Update app.py
b8a5d6d
Raw
History Blame Contribute Delete
3.44 kB
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
from langchain import SerpAPIWrapper
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
from langchain import LLMChain
import os
from glob import glob
import shutil
files = glob("shakespeare/**/*.html")
destination_folder = './data/'
if not os.path.exists(destination_folder):
os.mkdir('./data')
for html_file in files:
shutil.copy(html_file, destination_folder + html_file.split("/")[-1])
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
data = bshtml_dir_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20,
length_function=len,
)
documents = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()
persist_directory = "vector_db"
if not os.path.exists(persist_directory):
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()
else:
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
llm = ChatOpenAI(temperature=0, model="gpt-4")
doc_retriever = vectordb.as_retriever()
search = SerpAPIWrapper()
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever,
memory=readonlymemory)
tools = [
Tool(
name="Shakespeare QA System",
func=shakespeare_qa.run,
description="useful for when you need to answer questions about Shakespeare's works. Input should be a fully formed question."
),
Tool(
name="SERP API Search",
func=search.run,
description="useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question."
),
]
prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
suffix = """Begin!"
{chat_history}
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
def make_inference(query):
response = agent_chain.run(input=query)
return (response)
if __name__ == "__main__":
# make a gradio interface
import gradio as gr
gr.Interface(
make_inference,
[
gr.inputs.Textbox(lines=2, label="Query"),
],
gr.outputs.Textbox(label="Response"),
title="🗣️TalkToMyDocs📄",
description="🗣️TalkToMyDocs📄 is a tool that allows you to ask questions about many documents. In this case - Williams Shakespeare's complete works.",
).launch()