Spaces:

admin-dev
/

TalkToMyDocsShakespeare

Runtime error

App Files Files Community

TalkToMyDocsShakespeare / app.py

admin-dev

Update app.py

b8a5d6d almost 3 years ago

Raw

History Blame Contribute Delete

3.44 kB

	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import RetrievalQA
	from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
	from langchain import SerpAPIWrapper
	from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
	from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
	from langchain import LLMChain

	import os
	from glob import glob
	import shutil

	files = glob("shakespeare/*/.html")

	destination_folder = './data/'
	if not os.path.exists(destination_folder):
	os.mkdir('./data')

	for html_file in files:
	shutil.copy(html_file, destination_folder + html_file.split("/")[-1])

	bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
	data = bshtml_dir_loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=20,
	length_function=len,
	)

	documents = text_splitter.split_documents(data)
	embeddings = OpenAIEmbeddings()

	persist_directory = "vector_db"

	if not os.path.exists(persist_directory):
	vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
	vectordb.persist()
	else:
	vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

	llm = ChatOpenAI(temperature=0, model="gpt-4")
	doc_retriever = vectordb.as_retriever()

	search = SerpAPIWrapper()

	memory = ConversationBufferMemory(memory_key="chat_history")
	readonlymemory = ReadOnlySharedMemory(memory=memory)

	shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever,
	memory=readonlymemory)

	tools = [
	Tool(
	name="Shakespeare QA System",
	func=shakespeare_qa.run,
	description="useful for when you need to answer questions about Shakespeare's works. Input should be a fully formed question."
	),
	Tool(
	name="SERP API Search",
	func=search.run,
	description="useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question."
	),
	]

	prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
	suffix = """Begin!"

	{chat_history}
	Question: {input}
	{agent_scratchpad}"""

	prompt = ZeroShotAgent.create_prompt(
	tools,
	prefix=prefix,
	suffix=suffix,
	input_variables=["input", "chat_history", "agent_scratchpad"]
	)

	llm_chain = LLMChain(llm=llm, prompt=prompt)

	agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
	agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)


	def make_inference(query):
	response = agent_chain.run(input=query)
	return (response)


	if __name__ == "__main__":
	# make a gradio interface
	import gradio as gr

	gr.Interface(
	make_inference,
	[
	gr.inputs.Textbox(lines=2, label="Query"),
	],
	gr.outputs.Textbox(label="Response"),
	title="🗣️TalkToMyDocs📄",
	description="🗣️TalkToMyDocs📄 is a tool that allows you to ask questions about many documents. In this case - Williams Shakespeare's complete works.",
	).launch()