Spaces:
Runtime error
Runtime error
File size: 3,442 Bytes
e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca bdc97ed e3c36ca 0e899d1 e3c36ca 0e899d1 e3c36ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
from langchain import SerpAPIWrapper
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
from langchain import LLMChain
import os
from glob import glob
import shutil
files = glob("shakespeare/**/*.html")
destination_folder = './data/'
if not os.path.exists(destination_folder):
os.mkdir('./data')
for html_file in files:
shutil.copy(html_file, destination_folder + html_file.split("/")[-1])
bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
data = bshtml_dir_loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20,
length_function=len,
)
documents = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()
persist_directory = "vector_db"
if not os.path.exists(persist_directory):
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()
else:
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
llm = ChatOpenAI(temperature=0, model="gpt-4")
doc_retriever = vectordb.as_retriever()
search = SerpAPIWrapper()
memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)
shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever,
memory=readonlymemory)
tools = [
Tool(
name="Shakespeare QA System",
func=shakespeare_qa.run,
description="useful for when you need to answer questions about Shakespeare's works. Input should be a fully formed question."
),
Tool(
name="SERP API Search",
func=search.run,
description="useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question."
),
]
prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
suffix = """Begin!"
{chat_history}
Question: {input}
{agent_scratchpad}"""
prompt = ZeroShotAgent.create_prompt(
tools,
prefix=prefix,
suffix=suffix,
input_variables=["input", "chat_history", "agent_scratchpad"]
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
def make_inference(query):
response = agent_chain.run(input=query)
return (response)
if __name__ == "__main__":
# make a gradio interface
import gradio as gr
gr.Interface(
make_inference,
[
gr.inputs.Textbox(lines=2, label="Query"),
],
gr.outputs.Textbox(label="Response"),
title="🗣️TalkToMyDocs📄",
description="🗣️TalkToMyDocs📄 is a tool that allows you to ask questions about many documents. In this case - Williams Shakespeare's complete works.",
).launch()
|