Spaces:

ArchCoder
/

social-agent

Sleeping

google-labs-jules[bot] archc0der commited on Apr 23

Commit

0643073

1 Parent(s): bf6dbfa

feat: implement AutoStream conversational AI sales agent with LangGraph

- Implements a stateful agent workflow graph using LangGraph
- Sets up an LLM-based intent classifier with structured outputs
- Implements a local FAISS-based RAG pipeline
- Includes a step-by-step lead qualification workflow and a mock backend tool execution
- Provides a CLI interface in main.py
- Creates a comprehensive testing suite mocking LLMs and Embeddings via pytest
- Removes comments as requested
- Adds thorough documentation on system architecture and integration capabilities

Co-authored-by: archc0der <119496494+archc0der@users.noreply.github.com>

Files changed (9) hide show

agent/graph.py +10 -10
agent/router.py +8 -8
agent/state.py +3 -3
main.py +10 -10
rag/vectorstore.py +1 -1
tests/test_agent_e2e.py +13 -13
tests/test_lead_workflow.py +5 -5
tests/test_rag_pipeline.py +11 -11
tests/test_tool_execution.py +3 -3

agent/graph.py CHANGED Viewed

@@ -12,10 +12,10 @@ from agent.nodes import (
 from agent.router import route_intent, route_after_lead
 def build_graph():
-    # Initialize the graph with the typed state
     workflow = StateGraph(AgentState)
-    # Add nodes
     workflow.add_node("detect_intent", detect_intent)
     workflow.add_node("handle_greeting", handle_greeting)
     workflow.add_node("handle_unknown", handle_unknown)
@@ -24,11 +24,11 @@ def build_graph():
     workflow.add_node("process_lead", process_lead)
     workflow.add_node("execute_tool", execute_tool)
-    # Define edges
-    # Start -> detect_intent
     workflow.add_edge(START, "detect_intent")
-    # detect_intent -> conditional routing based on intent
     workflow.add_conditional_edges(
         "detect_intent",
         route_intent,
@@ -40,10 +40,10 @@ def build_graph():
         }
     )
-    # retrieve_knowledge -> generate_rag_response
     workflow.add_edge("retrieve_knowledge", "generate_rag_response")
-    # process_lead -> conditional routing (execute_tool or end)
     workflow.add_conditional_edges(
         "process_lead",
         route_after_lead,
@@ -53,16 +53,16 @@ def build_graph():
         }
     )
-    # Define terminal edges
     workflow.add_edge("handle_greeting", END)
     workflow.add_edge("handle_unknown", END)
     workflow.add_edge("generate_rag_response", END)
     workflow.add_edge("execute_tool", END)
-    # Compile the graph
     app = workflow.compile()
     return app
-# Expose a compiled instance
 app = build_graph()

 from agent.router import route_intent, route_after_lead
 def build_graph():
     workflow = StateGraph(AgentState)
     workflow.add_node("detect_intent", detect_intent)
     workflow.add_node("handle_greeting", handle_greeting)
     workflow.add_node("handle_unknown", handle_unknown)
     workflow.add_node("process_lead", process_lead)
     workflow.add_node("execute_tool", execute_tool)
     workflow.add_edge(START, "detect_intent")
     workflow.add_conditional_edges(
         "detect_intent",
         route_intent,
         }
     )
     workflow.add_edge("retrieve_knowledge", "generate_rag_response")
     workflow.add_conditional_edges(
         "process_lead",
         route_after_lead,
         }
     )
     workflow.add_edge("handle_greeting", END)
     workflow.add_edge("handle_unknown", END)
     workflow.add_edge("generate_rag_response", END)
     workflow.add_edge("execute_tool", END)
     app = workflow.compile()
     return app
 app = build_graph()

agent/router.py CHANGED Viewed

@@ -5,16 +5,16 @@ def route_intent(state: AgentState) -> str:
     Router node that directs the workflow based on the detected intent.
     It returns the name of the next node to execute.
     """
-    # If we are already in the middle of lead collection, we should stay in that flow
-    # This is slightly simplified; we'll route to process_lead if we detected HIGH_INTENT_LEAD
-    # or if we are already missing lead fields but have HIGH_INTENT_LEAD in previous turns.
-    # To keep it simple, if intent is HIGH_INTENT_LEAD, we go to lead workflow.
-    # If we are expecting lead info, the intent classifier might classify as UNKNOWN or something else
-    # We can handle this by checking if there's an ongoing lead collection in state.
     intent = state.get("detected_intent")
-    # Check if we were already in lead collection
     has_partial_lead = (
         state.get("user_name") is not None or
         state.get("user_email") is not None or
@@ -37,5 +37,5 @@ def route_after_lead(state: AgentState) -> str:
     if state.get("lead_ready"):
         return "execute_tool"
     else:
-        # We need more info, so we just end the graph execution here to wait for user input
         return "__end__"

     Router node that directs the workflow based on the detected intent.
     It returns the name of the next node to execute.
     """
     intent = state.get("detected_intent")
     has_partial_lead = (
         state.get("user_name") is not None or
         state.get("user_email") is not None or
     if state.get("lead_ready"):
         return "execute_tool"
     else:
         return "__end__"

agent/state.py CHANGED Viewed

@@ -4,17 +4,17 @@ class AgentState(TypedDict):
     """
     Shared state object used by the agent graph.
     """
-    conversation_history: List[Dict[str, str]]  # list of {"role": "user"/"assistant", "content": "..."}
     current_message: str
     detected_intent: Optional[str]
     retrieved_documents: List[str]
-    # Lead collection fields
     user_name: Optional[str]
     user_email: Optional[str]
     creator_platform: Optional[str]
     lead_ready: bool
-    # Final response to the user
     response: str

     """
     Shared state object used by the agent graph.
     """
+    conversation_history: List[Dict[str, str]]
     current_message: str
     detected_intent: Optional[str]
     retrieved_documents: List[str]
     user_name: Optional[str]
     user_email: Optional[str]
     creator_platform: Optional[str]
     lead_ready: bool
     response: str

main.py CHANGED Viewed

@@ -7,7 +7,7 @@ def print_header(title):
     print(f"\n{'='*50}\n{title}\n{'='*50}")
 def main():
-    # Load environment variables
     load_dotenv()
     if not os.environ.get("OPENAI_API_KEY"):
@@ -17,7 +17,7 @@ def main():
     print_header("AutoStream AI Sales Assistant")
     print("Type 'quit' or 'exit' to end the conversation.\n")
-    # Initialize state
     state = AgentState(
         conversation_history=[],
         current_message="",
@@ -36,27 +36,27 @@ def main():
             if user_input.lower() in ['quit', 'exit']:
                 break
-            # Update state with new message
             state["current_message"] = user_input
-            # Run the agent graph
             print("\n[Agent is thinking...]")
-            # Run the graph
             result_state = app.invoke(state)
-            # Update our persistent state with the new state from the graph
             state = result_state
-            # Add to conversation history
             state["conversation_history"].append({"role": "user", "content": user_input})
             state["conversation_history"].append({"role": "assistant", "content": state["response"]})
-            # Keep history to max 6 turns
-            if len(state["conversation_history"]) > 12:  # 6 turns (user+assistant)
                 state["conversation_history"] = state["conversation_history"][-12:]
-            # Display results
             print(f"[Detected Intent]: {state.get('detected_intent', 'UNKNOWN')}")
             if state.get("retrieved_documents") and state.get("detected_intent") in ["PRODUCT_QUERY", "PRICING_QUERY"]:

     print(f"\n{'='*50}\n{title}\n{'='*50}")
 def main():
     load_dotenv()
     if not os.environ.get("OPENAI_API_KEY"):
     print_header("AutoStream AI Sales Assistant")
     print("Type 'quit' or 'exit' to end the conversation.\n")
     state = AgentState(
         conversation_history=[],
         current_message="",
             if user_input.lower() in ['quit', 'exit']:
                 break
             state["current_message"] = user_input
             print("\n[Agent is thinking...]")
             result_state = app.invoke(state)
             state = result_state
             state["conversation_history"].append({"role": "user", "content": user_input})
             state["conversation_history"].append({"role": "assistant", "content": state["response"]})
+            if len(state["conversation_history"]) > 12:
                 state["conversation_history"] = state["conversation_history"][-12:]
             print(f"[Detected Intent]: {state.get('detected_intent', 'UNKNOWN')}")
             if state.get("retrieved_documents") and state.get("detected_intent") in ["PRODUCT_QUERY", "PRICING_QUERY"]:

rag/vectorstore.py CHANGED Viewed

@@ -26,7 +26,7 @@ def build_vectorstore(filepath: str = "data/knowledge_base.md"):
     return vectorstore
-# Cache the vector store globally so we don't rebuild it on every request
 _vectorstore = None
 def get_vectorstore(filepath: str = "data/knowledge_base.md"):

     return vectorstore
 _vectorstore = None
 def get_vectorstore(filepath: str = "data/knowledge_base.md"):

tests/test_agent_e2e.py CHANGED Viewed

@@ -23,22 +23,22 @@ def simulate_conversation(messages, mock_llm_setup_func):
     for idx, msg in enumerate(messages):
         state["current_message"] = msg
-        mock_llm_setup_func(idx) # setup mocks for this turn
         state = app.invoke(state)
-        # update history manually
         state["conversation_history"].append({"role": "user", "content": state["current_message"]})
         state["conversation_history"].append({"role": "assistant", "content": state["response"]})
     return state
 def test_agent_e2e(mocker):
-    # E2E Test USING graph.invoke
-    # We patch the `get_llm` inside `agent.nodes` to return a mock LLM.
     mock_llm = mocker.MagicMock()
     mocker.patch('agent.nodes.get_llm', return_value=mock_llm)
-    # Mock RAG retrieval
     mocker.patch('agent.nodes.retrieve_documents', return_value=["We have Basic and Pro plans for $29 and $79."])
     mock_tool = mocker.patch('agent.nodes.mock_lead_capture')
@@ -53,23 +53,23 @@ def test_agent_e2e(mocker):
     def setup_mocks_for_turn(idx):
         if idx == 0:
-            # Turn 1: Greeting
             mock_chain = RunnableLambda(lambda x: IntentResponse(intent="GREETING", confidence=0.99))
             mock_llm.with_structured_output.return_value = mock_chain
         elif idx == 1:
-            # Turn 2: Pricing
             mock_chain = RunnableLambda(lambda x: IntentResponse(intent="PRICING_QUERY", confidence=0.99))
             mock_llm.with_structured_output.return_value = mock_chain
-            # The regular invoke for generate_rag_response returns AIMessage-like object
             class FakeResponse:
                 content = "We have Basic and Pro plans."
             mock_llm.invoke.return_value = FakeResponse()
         elif idx == 2:
-            # Turn 3: High intent lead
-            # The router uses intent. The process_lead uses with_structured_output.
-            # Since both use with_structured_output in the same turn, we need a side_effect.
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))
@@ -78,7 +78,7 @@ def test_agent_e2e(mocker):
             mock_llm.with_structured_output.side_effect = mock_structured_output
         elif idx == 3:
-            # Turn 4: Provide name
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))
@@ -87,7 +87,7 @@ def test_agent_e2e(mocker):
             mock_llm.with_structured_output.side_effect = mock_structured_output
         elif idx == 4:
-            # Turn 5: Provide email
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))

     for idx, msg in enumerate(messages):
         state["current_message"] = msg
+        mock_llm_setup_func(idx)
         state = app.invoke(state)
         state["conversation_history"].append({"role": "user", "content": state["current_message"]})
         state["conversation_history"].append({"role": "assistant", "content": state["response"]})
     return state
 def test_agent_e2e(mocker):
     mock_llm = mocker.MagicMock()
     mocker.patch('agent.nodes.get_llm', return_value=mock_llm)
     mocker.patch('agent.nodes.retrieve_documents', return_value=["We have Basic and Pro plans for $29 and $79."])
     mock_tool = mocker.patch('agent.nodes.mock_lead_capture')
     def setup_mocks_for_turn(idx):
         if idx == 0:
             mock_chain = RunnableLambda(lambda x: IntentResponse(intent="GREETING", confidence=0.99))
             mock_llm.with_structured_output.return_value = mock_chain
         elif idx == 1:
             mock_chain = RunnableLambda(lambda x: IntentResponse(intent="PRICING_QUERY", confidence=0.99))
             mock_llm.with_structured_output.return_value = mock_chain
             class FakeResponse:
                 content = "We have Basic and Pro plans."
             mock_llm.invoke.return_value = FakeResponse()
         elif idx == 2:
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))
             mock_llm.with_structured_output.side_effect = mock_structured_output
         elif idx == 3:
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))
             mock_llm.with_structured_output.side_effect = mock_structured_output
         elif idx == 4:
             def mock_structured_output(schema):
                 if schema.__name__ == "IntentResponse":
                     return RunnableLambda(lambda x: IntentResponse(intent="HIGH_INTENT_LEAD", confidence=0.99))

tests/test_lead_workflow.py CHANGED Viewed

@@ -4,7 +4,7 @@ from agent.state import AgentState
 from langchain_core.runnables import RunnableLambda
 def test_lead_workflow_step_by_step(mocker):
-    # Step 1: User says they want the Pro plan for YouTube
     state = AgentState(
         conversation_history=[],
         current_message="I want the Pro plan for my YouTube channel",
@@ -27,12 +27,12 @@ def test_lead_workflow_step_by_step(mocker):
     assert result.get("creator_platform") == "YouTube"
     assert "name" in result["response"].lower()
-    # Simulate state update
     state.update(result)
     state["conversation_history"].append({"role": "user", "content": state["current_message"]})
     state["conversation_history"].append({"role": "assistant", "content": state["response"]})
-    # Step 2: User provides name
     state["current_message"] = "My name is Alex"
     mock_chain_2 = RunnableLambda(lambda x: LeadExtractionResponse(user_name="Alex", user_email=None, creator_platform=None))
     mock_llm.with_structured_output.return_value = mock_chain_2
@@ -41,12 +41,12 @@ def test_lead_workflow_step_by_step(mocker):
     assert result.get("user_name") == "Alex"
     assert "email" in result["response"].lower()
-    # Simulate state update
     state.update(result)
     state["conversation_history"].append({"role": "user", "content": state["current_message"]})
     state["conversation_history"].append({"role": "assistant", "content": state["response"]})
-    # Step 3: User provides email
     state["current_message"] = "alex@email.com"
     mock_chain_3 = RunnableLambda(lambda x: LeadExtractionResponse(user_name=None, user_email="alex@email.com", creator_platform=None))
     mock_llm.with_structured_output.return_value = mock_chain_3

 from langchain_core.runnables import RunnableLambda
 def test_lead_workflow_step_by_step(mocker):
     state = AgentState(
         conversation_history=[],
         current_message="I want the Pro plan for my YouTube channel",
     assert result.get("creator_platform") == "YouTube"
     assert "name" in result["response"].lower()
     state.update(result)
     state["conversation_history"].append({"role": "user", "content": state["current_message"]})
     state["conversation_history"].append({"role": "assistant", "content": state["response"]})
     state["current_message"] = "My name is Alex"
     mock_chain_2 = RunnableLambda(lambda x: LeadExtractionResponse(user_name="Alex", user_email=None, creator_platform=None))
     mock_llm.with_structured_output.return_value = mock_chain_2
     assert result.get("user_name") == "Alex"
     assert "email" in result["response"].lower()
     state.update(result)
     state["conversation_history"].append({"role": "user", "content": state["current_message"]})
     state["conversation_history"].append({"role": "assistant", "content": state["response"]})
     state["current_message"] = "alex@email.com"
     mock_chain_3 = RunnableLambda(lambda x: LeadExtractionResponse(user_name=None, user_email="alex@email.com", creator_platform=None))
     mock_llm.with_structured_output.return_value = mock_chain_3

tests/test_rag_pipeline.py CHANGED Viewed

@@ -9,14 +9,14 @@ os.environ["OPENAI_API_KEY"] = "dummy_key"
 class MockEmbedding(Embeddings):
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        # Just return a zero vector of size 1536 for each input text
         return [[0.0] * 1536 for _ in texts]
     def embed_query(self, text: str) -> List[float]:
         return [0.0] * 1536
 def test_rag_pipeline_loads_and_retrieves(mocker, tmp_path):
-    # Test end-to-end vectorstore build and retrieval (testing doc loading and splitting)
     kb_file = tmp_path / "knowledge_base.md"
     kb_file.write_text("""
 # AutoStream Pricing & Features
@@ -28,24 +28,24 @@ def test_rag_pipeline_loads_and_retrieves(mocker, tmp_path):
 * AI captions included
     """)
-    # We must patch get_embeddings in vectorstore so it uses our mock that doesn't call OpenAI
     mocker.patch('rag.vectorstore.get_embeddings', return_value=MockEmbedding())
-    # FAISS has an internal check for Embeddings class, so MockEmbedding must inherit from Embeddings
-    # Mock the actual FAISS from_documents internally to just create an empty FAISS store,
-    # OR we can let FAISS run with our mock embeddings. Let's let it run with mock embeddings.
     vs = build_vectorstore(str(kb_file))
     assert vs is not None
-    # Now patch the global get_vectorstore so our retriever uses this one
     mocker.patch('rag.retriever.get_vectorstore', return_value=vs)
     from rag.retriever import retrieve_documents
     docs = retrieve_documents("What does the Pro plan cost?", k=1)
-    # Since all embeddings are 0, it will return the first document(s) it split.
-    # With chunk size 100, the first few lines should be retrieved.
     assert len(docs) > 0
-    # The actual retrieval will return a chunk. The first chunk should have "Pro Plan" or "AutoStream Pricing".
-    # Just asserting it retrieved something from our mock file.
     assert "AutoStream" in docs[0] or "Pro Plan" in docs[0] or "$79/month" in docs[0]

 class MockEmbedding(Embeddings):
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
         return [[0.0] * 1536 for _ in texts]
     def embed_query(self, text: str) -> List[float]:
         return [0.0] * 1536
 def test_rag_pipeline_loads_and_retrieves(mocker, tmp_path):
     kb_file = tmp_path / "knowledge_base.md"
     kb_file.write_text("""
 # AutoStream Pricing & Features
 * AI captions included
     """)
     mocker.patch('rag.vectorstore.get_embeddings', return_value=MockEmbedding())
     vs = build_vectorstore(str(kb_file))
     assert vs is not None
     mocker.patch('rag.retriever.get_vectorstore', return_value=vs)
     from rag.retriever import retrieve_documents
     docs = retrieve_documents("What does the Pro plan cost?", k=1)
     assert len(docs) > 0
     assert "AutoStream" in docs[0] or "Pro Plan" in docs[0] or "$79/month" in docs[0]

tests/test_tool_execution.py CHANGED Viewed

@@ -12,14 +12,14 @@ def test_tool_execution_missing_fields(mocker):
         retrieved_documents=[],
         user_name="Alex",
         user_email="alex@email.com",
-        creator_platform=None, # Missing platform
         lead_ready=True,
         response=""
     )
     result = execute_tool(state)
-    # Tool should NOT be executed
     mock_tool.assert_not_called()
     assert "Error" in result["response"]
@@ -40,6 +40,6 @@ def test_tool_execution_all_fields(mocker):
     result = execute_tool(state)
-    # Tool should be executed exactly once
     mock_tool.assert_called_once_with("Alex", "alex@email.com", "YouTube")
     assert "Thanks Alex" in result["response"]

         retrieved_documents=[],
         user_name="Alex",
         user_email="alex@email.com",
+        creator_platform=None,
         lead_ready=True,
         response=""
     )
     result = execute_tool(state)
     mock_tool.assert_not_called()
     assert "Error" in result["response"]
     result = execute_tool(state)
     mock_tool.assert_called_once_with("Alex", "alex@email.com", "YouTube")
     assert "Thanks Alex" in result["response"]