from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA from langchain.document_loaders import BSHTMLLoader, DirectoryLoader from langchain import SerpAPIWrapper from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory from langchain.agents import ZeroShotAgent, Tool, AgentExecutor from langchain import LLMChain import os from glob import glob import shutil files = glob("shakespeare/**/*.html") destination_folder = './data/' if not os.path.exists(destination_folder): os.mkdir('./data') for html_file in files: shutil.copy(html_file, destination_folder + html_file.split("/")[-1]) bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader) data = bshtml_dir_loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=20, length_function=len, ) documents = text_splitter.split_documents(data) embeddings = OpenAIEmbeddings() persist_directory = "vector_db" if not os.path.exists(persist_directory): vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory) vectordb.persist() else: vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) llm = ChatOpenAI(temperature=0, model="gpt-4") doc_retriever = vectordb.as_retriever() search = SerpAPIWrapper() memory = ConversationBufferMemory(memory_key="chat_history") readonlymemory = ReadOnlySharedMemory(memory=memory) shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever, memory=readonlymemory) tools = [ Tool( name="Shakespeare QA System", func=shakespeare_qa.run, description="useful for when you need to answer questions about Shakespeare's works. Input should be a fully formed question." ), Tool( name="SERP API Search", func=search.run, description="useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question." ), ] prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:""" suffix = """Begin!" {chat_history} Question: {input} {agent_scratchpad}""" prompt = ZeroShotAgent.create_prompt( tools, prefix=prefix, suffix=suffix, input_variables=["input", "chat_history", "agent_scratchpad"] ) llm_chain = LLMChain(llm=llm, prompt=prompt) agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True) agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory) def make_inference(query): response = agent_chain.run(input=query) return (response) if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( make_inference, [ gr.inputs.Textbox(lines=2, label="Query"), ], gr.outputs.Textbox(label="Response"), title="🗣️TalkToMyDocs📄", description="🗣️TalkToMyDocs📄 is a tool that allows you to ask questions about many documents. In this case - Williams Shakespeare's complete works.", ).launch()