bakery_shop_ordering_system_with_recipe_rag

Paused

App Files Files Community

themissingCRAM commited on Apr 4

Commit

5520644

1 Parent(s): 193e8c6

first init

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +118 -25
requirements.txt +5 -1

README.md CHANGED Viewed

@@ -10,4 +10,4 @@ pinned: false
 short_description: self correcting text to sql agent based on smolagents exampl
 ---
-self correcting text to sql agent based on https://huggingface.co/docs/smolagents/v1.12.0/examples/text_to_sql smolagents example

 short_description: self correcting text to sql agent based on smolagents exampl
 ---
+bakery shops ordering system with recipe rag

app.py CHANGED Viewed

@@ -1,13 +1,8 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-import os
 from smolagents import (
     tool,
-    CodeAgent,
-    HfApiModel,
-    GradioUI,
-    MultiStepAgent,
-    stream_to_gradio,
 )
 from sqlalchemy import (
     create_engine,
@@ -18,14 +13,17 @@ from sqlalchemy import (
     Integer,
     Float,
     insert,
-    inspect,
     text,
-    select,
-    Engine,
 )
-import spaces
 from dotenv import load_dotenv
 load_dotenv()
 #sample questions
@@ -58,15 +56,80 @@ def sql_engine_tool(query: str) -> str:
         for row in rows:
             output += "\n" + str(row)
     return output
-def init_db(engine):
     metadata_obj = MetaData()
-    def insert_rows_into_table(rows, table, engine=engine):
-        for row in rows:
-            stmt = insert(table).values(**row)
-            with engine.begin() as connection:
                 connection.execute(stmt)
     table_name = "receipts"
@@ -78,7 +141,7 @@ def init_db(engine):
         Column("price", Float),
         Column("tip", Float),
     )
-    metadata_obj.create_all(engine)
     rows = [
         {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
@@ -105,7 +168,7 @@ def init_db(engine):
         Column("receipt_id", Integer, primary_key=True),
         Column("waiter_name", String(16), primary_key=True),
     )
-    metadata_obj.create_all(engine)
     rows = [
         {"receipt_id": 1, "waiter_name": "Corey Johnson"},
@@ -114,7 +177,7 @@ def init_db(engine):
         {"receipt_id": 4, "waiter_name": "Margaret James"},
     ]
     insert_rows_into_table(rows, waiters)
-    return engine
 if __name__ == "__main__":
@@ -126,17 +189,44 @@ if __name__ == "__main__":
         token=os.getenv("my_first_agents_hf_tokens"),
     )
-    agent = CodeAgent(
         tools=[sql_engine_tool],
         model=model,
         max_steps=10,
         verbosity_level=1,
     )
     def enter_message(new_message, conversation_history):
         conversation_history.append(gr.ChatMessage(role="user", content=new_message))
         # yield "", conversation_history
-        for msg in stream_to_gradio(agent, new_message):
             conversation_history.append(msg)
             yield "", conversation_history
@@ -145,14 +235,17 @@ if __name__ == "__main__":
         return chat_history.clear(), ""
     def stop_gen():
-        agent = CodeAgent(
-            tools=[sql_engine_tool],
             model=model,
             max_steps=10,
             verbosity_level=10,
         )
     with gr.Blocks() as b:
-        gr.Markdown("# Demo text to sql on paying customers' receipts")
         chatbot = gr.Chatbot(type="messages", height=2000)
         message_box = gr.Textbox(lines=1, label="chat message (with default sample question)", value="What is the average each customer paid?")
         with gr.Row():

+from langchain_community.document_loaders import HuggingFaceDatasetLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from smolagents import (
     tool,
 )
 from sqlalchemy import (
     create_engine,
     Integer,
     Float,
     insert,
     text,
 )
+import gradio as gr
+import os
+from smolagents import Tool, CodeAgent, HfApiModel, stream_to_gradio
+import spaces
 from dotenv import load_dotenv
+from langchain.docstore.document import Document
+import chromadb
+from chromadb.utils import embedding_functions
 load_dotenv()
 #sample questions
         for row in rows:
             output += "\n" + str(row)
     return output
+@tool
+class RetrieverTool(Tool):
+    """Since we need to add a vectordb as an attribute of the tool,
+    we cannot simply use the simple tool constructor with a @tool decorator
+    Used bm25 retrival method because it is fast.
+    For more accuracy in retrival, you can replace it with semantic search
+    using vector representations for documents.
+    check out MTEB Leaderboard for accuracy ranking
+    """
+    name = "retriever"
+    description = """Uses semantic search to retrieve the parts of transformers documentation
+    that could be most relevant to answer your query.
+    Afterwards, this tool  summaries the findings from the extracted document
+    """
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The python list of queries to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, docs: list[Document], **kwargs):
+        super().__init__(**kwargs)
+        chroma_data_path = "chroma_data/"
+        if not os.path.isdir(chroma_data_path):
+            print("in if clause")
+            os.makedirs(chroma_data_path, exist_ok=True)
+        collection_name = "demo_docs"
+        embedding_func = embedding_functions.DefaultEmbeddingFunction()
+        client = chromadb.PersistentClient(path=chroma_data_path)
+        collection = client.get_or_create_collection(
+            name=collection_name,
+            embedding_function=embedding_func,
+            metadata={"hnsw:space": "cosine"},
+        )
+        collection.upsert(
+            documents=[doc.page_content for doc in docs],
+            ids=[f"id{i}" for i in range(len(docs))],
+        )
+        self.collection = collection
+    def forward(self, query: str) -> str:
+        assert isinstance(query, str), "Your search query must be a string"
+        docs = self.collection.query(query_texts=[query], n_results=5)
+        retrieved_text = "\nRetrieved documents:\n" + "".join(
+            [
+                f"\n\n===== Document {str(i)} =====\n" + doc
+                for i, doc in zip(docs["ids"][0], docs["documents"][0])
+            ]
+        )
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "summaries this text:" + retrieved_text}
+                ],
+            }
+        ]
+        return retrieved_text + "\n" + model(messages).content
+def init_db(_engine):
     metadata_obj = MetaData()
+    def insert_rows_into_table(_rows, _table, _engine=_engine):
+        for row in _rows:
+            stmt = insert(_table).values(**row)
+            with _engine.begin() as connection:
                 connection.execute(stmt)
     table_name = "receipts"
         Column("price", Float),
         Column("tip", Float),
     )
+    metadata_obj.create_all(_engine)
     rows = [
         {"receipt_id": 1, "customer_name": "Alan Payne", "price": 12.06, "tip": 1.20},
         Column("receipt_id", Integer, primary_key=True),
         Column("waiter_name", String(16), primary_key=True),
     )
+    metadata_obj.create_all(_engine)
     rows = [
         {"receipt_id": 1, "waiter_name": "Corey Johnson"},
         {"receipt_id": 4, "waiter_name": "Margaret James"},
     ]
     insert_rows_into_table(rows, waiters)
+    return _engine
 if __name__ == "__main__":
         token=os.getenv("my_first_agents_hf_tokens"),
     )
+    text2sql_agent = CodeAgent(
         tools=[sql_engine_tool],
         model=model,
         max_steps=10,
         verbosity_level=1,
     )
+    source_docs = HuggingFaceDatasetLoader("MuskumPillerum/General-Knowledge", "Answer").load()[:100]
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        add_start_index=True,
+        strip_whitespace=True,
+        separators=["\n\n", "\n", ".", " ", ""],
+    )
+    docs_processed = text_splitter.split_documents(source_docs)
+    retriever_tool = RetrieverTool(docs_processed)
+    retriever_agent = CodeAgent(
+        tools=[retriever_tool],
+        model=model,
+        max_steps=10,
+        verbosity_level=10,
+    )
+    manager_agent =    CodeAgent(
+        tools=[retriever_tool],
+        model=model,
+        managed_agents=[retriever_agent
+                        ,text2sql_agent],
+        max_steps=10,
+        verbosity_level=10,
+    )
     def enter_message(new_message, conversation_history):
         conversation_history.append(gr.ChatMessage(role="user", content=new_message))
         # yield "", conversation_history
+        for msg in stream_to_gradio(manager_agent, new_message):
             conversation_history.append(msg)
             yield "", conversation_history
         return chat_history.clear(), ""
     def stop_gen():
+        manager_agent = CodeAgent(
+            tools=[retriever_tool],
             model=model,
+            managed_agents=[retriever_agent
+                , text2sql_agent],
             max_steps=10,
             verbosity_level=10,
         )
     with gr.Blocks() as b:
+        gr.Markdown("# demo bakery shops ordering system with recipe rag")
         chatbot = gr.Chatbot(type="messages", height=2000)
         message_box = gr.Textbox(lines=1, label="chat message (with default sample question)", value="What is the average each customer paid?")
         with gr.Row():

requirements.txt CHANGED Viewed

@@ -4,4 +4,8 @@ python-dotenv==1.1.0
 sqlalchemy==2.0.40
 gradio>=5.23.1
 spaces>0.0.0
-smolagents[gradio]>=1.12.0

 sqlalchemy==2.0.40
 gradio>=5.23.1
 spaces>0.0.0
+smolagents[gradio]>=1.12.0
+sqlalchemy==2.0.40
+langchain == 0.3.21
+langchain_community == 0.3.20
+chromadb == 0.6.3