Spaces:

philmui
/

globe

Runtime error

App Files Files Community

philmui commited on May 26, 2023

Commit

f57b8d4

1 Parent(s): a7f3b3b

added semantic search of local books

Browse files

Files changed (6) hide show

.gitignore +4 -0
agents.py +63 -25
app.py +5 -2
data/machiavelli-the-prince.txt +0 -0
data/sunzi-art-of-war.txt +0 -0
models.py +35 -2

.gitignore CHANGED Viewed

@@ -158,3 +158,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+# ChromaDB
+db/
+chromadb/

agents.py CHANGED Viewed

@@ -11,7 +11,9 @@ from langchain.schema import HumanMessage
 from langchain.prompts import PromptTemplate, ChatPromptTemplate, \
                               HumanMessagePromptTemplate
 from models import load_chat_agent, load_chained_agent, load_sales_agent, \
-                   load_sqlite_agent
 import logging
@@ -68,6 +70,59 @@ def chatAgent(chat_message):
         output = "Please rephrase and try chat again."
     return output
 def agentController(question_text, model_name):
     output = ""
@@ -78,7 +133,13 @@ def agentController(question_text, model_name):
     elif is_magic(question_text, DIGITAL_MAGIC_TOKENS):
         output = chinookAgent(question_text, model_name)
         print(f"🔹 chinookAgent: {output}")
-    else:
         try:
             instruction = instruct_prompt.format(query=question_text)
             logger.info(f"instruction: {instruction}")
@@ -94,26 +155,3 @@ def agentController(question_text, model_name):
             logger.error(e)
     return output
-def salesAgent(instruction):
-    output = ""
-    try:
-        agent = load_sales_agent(verbose=True)
-        output = agent.run(instruction)
-        print("panda> " + output)
-    except Exception as e:
-        logger.error(e)
-        output = f"Rephrasing your prompt could get better sales results {e}"
-    return output
-def chinookAgent(instruction, model_name):
-    output = ""
-    try:
-        agent = load_sqlite_agent(model_name)
-        output = agent.run(instruction)
-        print("chinook> " + output)
-    except Exception as e:
-        logger.error(e)
-        output = "Rephrasing your prompt could get better db results {e}"
-    return output

 from langchain.prompts import PromptTemplate, ChatPromptTemplate, \
                               HumanMessagePromptTemplate
 from models import load_chat_agent, load_chained_agent, load_sales_agent, \
+                   load_sqlite_agent, load_book_agent
+import openai, numpy as np
 import logging
         output = "Please rephrase and try chat again."
     return output
+def salesAgent(instruction):
+    output = ""
+    try:
+        agent = load_sales_agent(verbose=True)
+        output = agent.run(instruction)
+        print("panda> " + output)
+    except Exception as e:
+        logger.error(e)
+        output = f"Rephrasing your prompt could get better sales results {e}"
+    return output
+def chinookAgent(instruction, model_name):
+    output = ""
+    try:
+        agent = load_sqlite_agent(model_name)
+        output = agent.run(instruction)
+        print("chinook> " + output)
+    except Exception as e:
+        logger.error(e)
+        output = "Rephrasing your prompt could get better db results {e}"
+    return output
+def semantically_similar(string1, string2):
+    #
+    # proper way to do this is to use a
+    # vector DB (chroma, pinecone, ...)
+    #
+    response = openai.Embedding.create(
+                    input=[string1, string2],
+                    engine="text-similarity-davinci-001"
+                )
+    embedding_a = response['data'][0]['embedding']
+    embedding_b = response['data'][1]['embedding']
+    similarity_score = np.dot(embedding_a, embedding_b)
+    logger.info(f"similarity: {similarity_score}")
+    return similarity_score > 0.8
+def bookAgent(query):
+    output = ""
+    try:
+        agent = load_book_agent(True)
+        result = agent({
+            "query": query
+        })
+        logger.info(f"book response: {result['result']}")
+        output = result['result']
+    except Exception as e:
+        logger.error(e)
+        output = "Rephrasing your prompt for the book agent{e}"
+    return output
 def agentController(question_text, model_name):
     output = ""
     elif is_magic(question_text, DIGITAL_MAGIC_TOKENS):
         output = chinookAgent(question_text, model_name)
         print(f"🔹 chinookAgent: {output}")
+    elif semantically_similar(question_text, "fight a war"):
+        output = bookAgent(question_text)
+        print(f"🔹 bookAgent: {output}")
+    elif semantically_similar(question_text, "how to govern"):
+        output = bookAgent(question_text)
+        print(f"🔹 bookAgent: {output}")
+    else: # reasoning agents
         try:
             instruction = instruct_prompt.format(query=question_text)
             logger.info(f"instruction: {instruction}")
             logger.error(e)
     return output

app.py CHANGED Viewed

@@ -9,7 +9,7 @@
 import streamlit as st
 from pprint import pprint
-from agents import agentController, salesAgent, chinookAgent, chatAgent
 ##############################################################################
@@ -104,7 +104,10 @@ with col2:
                  value="🔹 For my company, what is the total sales " +
                        "broken down by month?\n" +
                        "🔹 How many total artists are there in each "+
-                       "genres in our digital media database?")
 with col3:
     st.markdown("__Enhanced reasoning__ [🎵](https://www.youtube.com/watch?v=hTTUaImgCyU&t=62s)")

 import streamlit as st
 from pprint import pprint
+from agents import agentController , salesAgent, chinookAgent, chatAgent
 ##############################################################################
                  value="🔹 For my company, what is the total sales " +
                        "broken down by month?\n" +
                        "🔹 How many total artists are there in each "+
+                       "genres in our digital media database?\n" +
+                       "🔹 How to best govern a city? (The Prince)\n" +
+                       "🔹 How to win a war? (Art of War)",
+                       )
 with col3:
     st.markdown("__Enhanced reasoning__ [🎵](https://www.youtube.com/watch?v=hTTUaImgCyU&t=62s)")

data/machiavelli-the-prince.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

data/sunzi-art-of-war.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models.py CHANGED Viewed

@@ -10,9 +10,14 @@ import pandas as pd
 from langchain.agents import AgentType, load_tools, initialize_agent,\
                             create_pandas_dataframe_agent
 from langchain.chat_models import ChatOpenAI
 from langchain.llms import OpenAI
-from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
 OPENAI_LLMS = [
     'text-davinci-003',
@@ -45,10 +50,38 @@ def createLLM(model_name="text-davinci-003", temperature=0):
                              model_kwargs={"temperature":1e-10})
     return llm
 def load_chat_agent(verbose=True):
     return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)
 def load_sales_agent(verbose=True):
     '''
     Hard-coded agent that gates an internal sales CSV file for demo

 from langchain.agents import AgentType, load_tools, initialize_agent,\
                             create_pandas_dataframe_agent
+from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
 from langchain.chat_models import ChatOpenAI
 from langchain.llms import OpenAI
+from langchain.chains import RetrievalQA
+from langchain.document_loaders import DirectoryLoader, TextLoader
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.text_splitter import CharacterTextSplitter
 OPENAI_LLMS = [
     'text-davinci-003',
                              model_kwargs={"temperature":1e-10})
     return llm
 def load_chat_agent(verbose=True):
     return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)
+import os
+import chromadb
+from chromadb.config import Settings
+DB_DIR = "./db"
+def load_book_agent(verbose=True):
+    retriever = None
+    embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])
+    if not os.path.exists(DB_DIR):
+        loader = DirectoryLoader(path="./data/", glob="**/*.txt")
+        docs = loader.load()
+        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
+        text_chunks = text_splitter.split_documents(documents=docs)
+        docsearch = Chroma.from_documents(text_chunks, embeddings,
+                                        persist_directory="./db")
+        retriever = docsearch.as_retriever()
+    else:
+        vectordb = Chroma(persist_directory=DB_DIR,
+                          embedding_function=embeddings)
+        retriever = vectordb.as_retriever()
+    qa = RetrievalQA.from_chain_type(llm = OpenAI(temperature=0.9),
+                                    chain_type="stuff",
+                                    retriever=retriever,
+                                    return_source_documents=True
+                                    )
+    return qa
 def load_sales_agent(verbose=True):
     '''
     Hard-coded agent that gates an internal sales CSV file for demo