Spaces:

admin-dev
/

TalkToMyDocsShakespeare

Runtime error

File size: 3,442 Bytes

e3c36ca
 
0e899d1
e3c36ca
 
 
0e899d1
e3c36ca
 
0e899d1
e3c36ca
 
 
 
 
0e899d1
e3c36ca
 
0e899d1
 
e3c36ca
0e899d1
 
e3c36ca
 
 
 
 
0e899d1
 
 
e3c36ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e899d1
 
e3c36ca
 
 
0e899d1
e3c36ca
 
 
 
0e899d1
e3c36ca
 
 
 
 
 
 
 
 
 
 
 
 
0e899d1
 
 
e3c36ca
 
 
bdc97ed
 
e3c36ca
 
 
 
 
0e899d1
e3c36ca
 
0e899d1
e3c36ca

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
from langchain import SerpAPIWrapper
from langchain.memory import ConversationBufferMemory, ReadOnlySharedMemory
from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
from langchain import LLMChain

import os
from glob import glob
import shutil

files = glob("shakespeare/**/*.html")

destination_folder = './data/'
if not os.path.exists(destination_folder):
    os.mkdir('./data')

    for html_file in files:
        shutil.copy(html_file, destination_folder + html_file.split("/")[-1])

bshtml_dir_loader = DirectoryLoader('./data/', loader_cls=BSHTMLLoader)
data = bshtml_dir_loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
    length_function=len,
)

documents = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()

persist_directory = "vector_db"

if not os.path.exists(persist_directory):
    vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
    vectordb.persist()
else:
    vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

llm = ChatOpenAI(temperature=0, model="gpt-4")
doc_retriever = vectordb.as_retriever()

search = SerpAPIWrapper()

memory = ConversationBufferMemory(memory_key="chat_history")
readonlymemory = ReadOnlySharedMemory(memory=memory)

shakespeare_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever,
                                             memory=readonlymemory)

tools = [
    Tool(
        name="Shakespeare QA System",
        func=shakespeare_qa.run,
        description="useful for when you need to answer questions about Shakespeare's works. Input should be a fully formed question."
    ),
    Tool(
        name="SERP API Search",
        func=search.run,
        description="useful for when you need to answer questions about ruff (a python linter). Input should be a fully formed question."
    ),
]

prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
suffix = """Begin!"

{chat_history}
Question: {input}
{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools,
    prefix=prefix,
    suffix=suffix,
    input_variables=["input", "chat_history", "agent_scratchpad"]
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)


def make_inference(query):
    response = agent_chain.run(input=query)
    return (response)


if __name__ == "__main__":
    # make a gradio interface
    import gradio as gr

    gr.Interface(
        make_inference,
        [
            gr.inputs.Textbox(lines=2, label="Query"),
        ],
        gr.outputs.Textbox(label="Response"),
        title="🗣️TalkToMyDocs📄",
        description="🗣️TalkToMyDocs📄 is a tool that allows you to ask questions about many documents. In this case - Williams Shakespeare's complete works.",
    ).launch()