Spaces:

maclenn77
/

pdf-explainer

Runtime error

App Files Files Community

maclenn77 commited on Dec 9, 2023

Commit

16d3fdf

unverified ·

1 Parent(s): ada0a19

Add a langchain agent (#10)

Browse files

Files changed (4) hide show

app.py +30 -36
src/agent.py +43 -0
src/chroma_client.py +6 -5
src/search.py +18 -0

app.py CHANGED Viewed

@@ -6,10 +6,15 @@ import fitz
 import streamlit as st
 import openai
 from dotenv import load_dotenv
 from src.chroma_client import ChromaDB
 import src.gui_messages as gm
 from src import settings
 load_dotenv()
@@ -44,27 +49,14 @@ with st.sidebar:
 chroma_db = ChromaDB(openai.api_key)
 openai_client, collection = settings.build(chroma_db)
-# Query ChromaDb
-query = st.text_input(
-    "Query ChromaDb", value="", placeholder="Enter query", label_visibility="collapsed"
-)
-if st.button("Search"):
-    results = collection.query(
-        query_texts=[query],
-        n_results=3,
-    )
-    for idx, result in enumerate(results["documents"][0]):
-        st.markdown(
-            result
-            + "..."
-            + "**Source:** "
-            + results["metadatas"][0][idx]["source"]
-            + " **Tokens:** "
-            + str(results["metadatas"][0][idx]["num_tokens"])
-        )
 pdf = st.file_uploader("Upload a file", type="pdf")
 if pdf is not None:
@@ -75,19 +67,11 @@ if pdf is not None:
         st.write(text[0:300] + "...")
         if st.button("Save chunks"):
             with st.spinner("Saving chunks..."):
-                chunks = textwrap.wrap(text, 3000)
                 for idx, chunk in enumerate(chunks):
                     encoding = tiktoken.get_encoding("cl100k_base")
                     num_tokens = len(encoding.encode(chunk))
-                    response = (
-                        openai_client.embeddings.create(
-                            input=chunk, model="text-embedding-ada-002"
-                        )
-                        .data[0]
-                        .embedding
-                    )
                     collection.add(
-                        embeddings=[response],
                         documents=[chunk],
                         metadatas=[{"source": pdf.name, "num_tokens": num_tokens}],
                         ids=[pdf.name + str(idx)],
@@ -95,11 +79,21 @@ if pdf is not None:
 else:
     st.write("Please upload a file of type: pdf")
-if st.button("Chroma data collection"):
-    st.write(collection)
-if st.button("Delete Chroma Collection"):
-    try:
-        chroma_db.client.delete_collection(collection.name)
-    except AttributeError:
-        st.error("Collection erased.")

 import streamlit as st
 import openai
 from dotenv import load_dotenv
+from langchain.chat_models import ChatOpenAI
+from langchain.callbacks import StreamlitCallbackHandler
 from src.chroma_client import ChromaDB
 import src.gui_messages as gm
 from src import settings
+from src.agent import PDFExplainer
 load_dotenv()
 chroma_db = ChromaDB(openai.api_key)
 openai_client, collection = settings.build(chroma_db)
+# Create Agent
+llm = ChatOpenAI(temperature=0.9, model="gpt-3.5-turbo-16k")
+agent = PDFExplainer(llm, chroma_db).agent
+# Main
+st.title("PDF Explainer")
+st.subheader("Create your knowledge base")
+st.write("Upload PDF files that will help the AI Agent to understand your domain.")
 pdf = st.file_uploader("Upload a file", type="pdf")
 if pdf is not None:
         st.write(text[0:300] + "...")
         if st.button("Save chunks"):
             with st.spinner("Saving chunks..."):
+                chunks = textwrap.wrap(text, 1250)
                 for idx, chunk in enumerate(chunks):
                     encoding = tiktoken.get_encoding("cl100k_base")
                     num_tokens = len(encoding.encode(chunk))
                     collection.add(
                         documents=[chunk],
                         metadatas=[{"source": pdf.name, "num_tokens": num_tokens}],
                         ids=[pdf.name + str(idx)],
 else:
     st.write("Please upload a file of type: pdf")
+st.subheader("Search on your knowledge base")
+# if st.button("Chroma data collection"):
+#     st.write(collection)
+# if st.button("Delete Chroma Collection"):
+#     try:
+#         chroma_db.client.delete_collection(collection.name)
+#     except AttributeError:
+#         st.error("Collection erased.")
+prompt = st.chat_input()
+if prompt:
+    st.chat_message("user").write(prompt)
+    with st.chat_message("assistant"):
+        st_callback = StreamlitCallbackHandler(st.container())
+        response = agent.run(prompt, callbacks=[st_callback])
+        st.write(response)

src/agent.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""An Langchain Agent that uses ChromaDB as a query tool"""
+from langchain.agents import AgentType, initialize_agent
+from langchain.tools import Tool
+from src.search import Search
+class PDFExplainer:
+    """An Agent that uses ChromaDB as a query tool"""
+    def __init__(self, llm, chroma_db):
+        """Initialize the Agent"""
+        search = Search(chroma_db)
+        self.tools = [
+            Tool.from_function(
+                func=search.run,
+                name="Search DB",
+                description="Useful when you need more context about a specific topic.",
+                handle_parsing_errors=True,
+            )
+        ]
+        self.agent = initialize_agent(
+            self.tools,
+            llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            verbose=True,
+            handle_parsing_errors=True,
+        )
+    def add_tools(self, tools: list[Tool]):
+        """Add tools to the Agent"""
+        self.tools.extend(tools)
+    def replace_agent(self, agent: AgentType, llm):
+        """Replace the Agent"""
+        self.agent = initialize_agent(
+            self.tools,
+            llm,
+            agent=agent,
+            verbose=True,
+            handle_parsing_errors=True,
+        )

src/chroma_client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """A client for ChromaDB."""
 import chromadb
-from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
 import streamlit as st
@@ -24,11 +25,11 @@ class ChromaDB:
     def create_collection(self, name):
         """Create a Chroma collection."""
         try:
-            embedding_function = OpenAIEmbeddingFunction(
-                api_key=self.api_key, model_name="text-embedding-ada-002"
-            )
             collection = self.client.get_or_create_collection(
-                name=name, embedding_function=embedding_function
             )
             return collection
         except AttributeError:

 """A client for ChromaDB."""
 import chromadb
+# from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
 import streamlit as st
     def create_collection(self, name):
         """Create a Chroma collection."""
         try:
+            # embedding_function = OpenAIEmbeddingFunction(
+            #     api_key=self.api_key, model_name="text-embedding-ada-002"
+            # )
             collection = self.client.get_or_create_collection(
+                name=name  # , embedding_function=embedding_function
             )
             return collection
         except AttributeError:

src/search.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Search Tool"""
+class Search:
+    """Search Tool"""
+    def __init__(self, chroma_db):
+        """Initialize the Search Tool"""
+        self.chroma_db = chroma_db
+    def run(self, query: str):
+        """Run the Agent"""
+        collection = self.chroma_db.get_collection("pdf-explainer")
+        return collection.query(query_texts=[query], n_results=3)["documents"][0]
+    def collection_name(self):
+        """Return the collection name"""
+        return self.chroma_db.collection.name