Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- .DS_Store +0 -0
- .dockerignore +17 -0
- .env.example +8 -0
- .gitignore +15 -0
- Dockerfile +12 -0
- README.md +744 -6
- app.py +18 -0
- requirements.txt +13 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.dockerignore
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
.env
|
| 5 |
+
.DS_Store
|
| 6 |
+
.git/
|
| 7 |
+
.github/
|
| 8 |
+
.claude/
|
| 9 |
+
.agents/
|
| 10 |
+
*.db
|
| 11 |
+
tests/
|
| 12 |
+
docs/
|
| 13 |
+
*.md
|
| 14 |
+
!README.md
|
| 15 |
+
skills-lock.json
|
| 16 |
+
.dockerignore
|
| 17 |
+
Dockerfile
|
.env.example
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Required
|
| 2 |
+
OPENAI_API_KEY=your-api-key-here
|
| 3 |
+
|
| 4 |
+
# Optional - uncomment to customize
|
| 5 |
+
# OPENAI_API_BASE=https://api.openai.com/v1
|
| 6 |
+
# MODEL_NAME=gpt-4o-mini
|
| 7 |
+
# TEMPERATURE=0
|
| 8 |
+
# PORT=7860
|
.gitignore
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
.env
|
| 5 |
+
.DS_Store
|
| 6 |
+
*.db
|
| 7 |
+
.claude/
|
| 8 |
+
.agents/
|
| 9 |
+
skills-lock.json
|
| 10 |
+
Chinook_Sqlite.sql
|
| 11 |
+
.pytest_cache/
|
| 12 |
+
htmlcov/
|
| 13 |
+
*.egg-info/
|
| 14 |
+
dist/
|
| 15 |
+
build/
|
Dockerfile
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
EXPOSE 7860
|
| 11 |
+
|
| 12 |
+
CMD ["python", "app.py"]
|
README.md
CHANGED
|
@@ -1,12 +1,750 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Music Store Multi-Agent Support
|
| 3 |
+
emoji: 🎵
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.29.0
|
| 8 |
+
python_version: "3.12"
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Multi-Agent Customer Support System
|
| 14 |
+
|
| 15 |
+
A production-grade, **LangGraph-powered hierarchical multi-agent assistant** for a digital music store. It combines a **Supervisor router**, two specialized **ReAct sub-agents** (music catalog + invoice information), **human-in-the-loop identity verification**, **long-term per-customer memory**, and a **Gradio chat UI** - all backed by a real relational schema (the Chinook sample database).
|
| 16 |
+
|
| 17 |
+
**Live Demo:** [huggingface.co/spaces/animeshkcm/Multi-Agent-Customer-Support](https://huggingface.co/spaces/animeshkcm/Multi-Agent-Customer-Support)
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## Capstone Framing
|
| 22 |
+
|
| 23 |
+
This project is designed as an advanced AI engineering capstone, going beyond a simple agent or tool-calling demo. It represents a fully assembled multi-agent AI system incorporating structured state management, persistent memory, safety controls, deterministic execution, and real-world data integration. Each architectural decision such as grounding rules, memory merge semantics, supervisor-based routing, verification gating, and deterministic SQL generation directly addresses known failure modes observed in production-scale LLM systems.
|
| 24 |
+
|
| 25 |
+
### Problem Statement
|
| 26 |
+
|
| 27 |
+
Customer support for a catalog-and-billing business has two opposing requirements:
|
| 28 |
+
|
| 29 |
+
1. **Conversational flexibility** - customers ask in free-form natural language (“what's my most expensive track?”, “any rock albums from AC/DC?”).
|
| 30 |
+
2. **Operational correctness** - invoice totals, track IDs, and customer accounts must be exact, auditable, and access-controlled.
|
| 31 |
+
|
| 32 |
+
A single LLM with a single system prompt cannot satisfy both: it will either hallucinate when asked for unavailable data, or leak account data across customers, or route off-topic questions to SQL tools. This project solves that gap with a **hierarchical agent graph** where each responsibility is isolated into its own node.
|
| 33 |
+
|
| 34 |
+
### Business Use Case
|
| 35 |
+
|
| 36 |
+
A digital music store with **59 customers**, **412 invoices**, **3,503 tracks**, **275 artists**, and **25 genres** (Chinook dataset). The assistant supports three user journeys:
|
| 37 |
+
|
| 38 |
+
| Journey | Entry Point | Guardrails |
|
| 39 |
+
|---|---|---|
|
| 40 |
+
| **Catalog discovery** (anonymous) | Any music question | Tool-grounded only; no PII access |
|
| 41 |
+
| **Account lookup** (identified) | Customer ID, email, or phone | Human-in-the-loop verification before any invoice tool runs |
|
| 42 |
+
| **Personalized recall** (returning) | Verified customer | Music preferences persisted across sessions, merge-only, never deleted |
|
| 43 |
+
|
| 44 |
+
### Why This Matters for AI Engineers
|
| 45 |
+
|
| 46 |
+
The project exercises the full surface area an AI engineer ships in production:
|
| 47 |
+
|
| 48 |
+
- **Graph-native orchestration** (not a flat ReAct loop) with conditional edges, interrupts, and a supervisor router.
|
| 49 |
+
- **Typed shared state** (`TypedDict` + `add_messages` reducer) instead of ad-hoc dicts.
|
| 50 |
+
- **Structured LLM output** (Pydantic schemas) for identifier extraction and preference capture - no regex parsing of model text.
|
| 51 |
+
- **Prompt engineering as a contract**: every sub-agent prompt enforces explicit grounding, exact quoting, and scope boundaries.
|
| 52 |
+
- **Deterministic SQL under an LLM** (CTE + `ROW_NUMBER()`) so the same question returns the same answer.
|
| 53 |
+
- **Two memory modes**: short-term per-thread (`MemorySaver` checkpointer) and long-term per-customer (`InMemoryStore`).
|
| 54 |
+
- **Streaming execution** via `graph.stream(..., stream_mode="updates")` wired to a Gradio chat UI with live status and interrupt handling.
|
| 55 |
+
|
| 56 |
+
### Technical Complexity
|
| 57 |
+
|
| 58 |
+
| Dimension | What's hard | How it's solved |
|
| 59 |
+
|---|---|---|
|
| 60 |
+
| **Routing** | Music vs invoice vs mixed vs off-topic | `langgraph-supervisor` with explicit routing rules in the system prompt |
|
| 61 |
+
| **Identity** | Accept ID / email / phone in free text without trusting user-extracted IDs downstream | Pydantic `UserInput` + DB lookup; verified `customer_id` is injected via `SystemMessage` only |
|
| 62 |
+
| **Hallucination** | LLMs invent albums, prices, totals | Tool-only responses enforced in every prompt; "I could not find…" fallback wording is scripted |
|
| 63 |
+
| **Determinism** | `LIMIT 10` on a genre returns different artists each call | `ROW_NUMBER() OVER (PARTITION BY ArtistId ORDER BY TrackId)` CTE → stable sample |
|
| 64 |
+
| **Memory merge** | LLM summarization can erase prior preferences | Set-union on `music_preferences`; empty output is ignored if existing memory exists |
|
| 65 |
+
| **Interrupts** | Verification needs a second user turn without losing graph state | LangGraph `interrupt()` + thread-scoped checkpointer; UI reads `snapshot.next` to detect pause |
|
| 66 |
+
| **SQL safety** | LLM-driven inputs could inject SQL | 100% parameterized via SQLAlchemy `text()` bindings; `_safe_int()` on all numeric args |
|
| 67 |
+
|
| 68 |
+
### The Engineering Challenge
|
| 69 |
+
|
| 70 |
+
The hardest part was **not** building any one agent. It was composing them so:
|
| 71 |
+
|
| 72 |
+
1. The supervisor cannot bypass verification.
|
| 73 |
+
2. The music agent cannot read invoices.
|
| 74 |
+
3. The invoice agent cannot guess a customer ID from the conversation.
|
| 75 |
+
4. Memory writes cannot erase prior data.
|
| 76 |
+
5. The whole thing is observable (every tool call is logged with input and output length) and testable (28 pytest tests over tools + DB).
|
| 77 |
+
|
| 78 |
+
Those constraints are what turn "LLM + tools" into a **system**.
|
| 79 |
+
|
| 80 |
+
---
|
| 81 |
+
|
| 82 |
+
## System Architecture
|
| 83 |
+
|
| 84 |
+
### High Level System Architecture
|
| 85 |
+
|
| 86 |
+
```mermaid
|
| 87 |
+
flowchart TD
|
| 88 |
+
User([User])
|
| 89 |
+
UI[Gradio Chat UI<br/>src/ui/app.py]
|
| 90 |
+
Graph[Multi-Agent Graph<br/>src/agents/graph.py]
|
| 91 |
+
|
| 92 |
+
subgraph Outer[Outer Graph - Orchestration]
|
| 93 |
+
Verify[verify_info]
|
| 94 |
+
Human[human_input<br/>interrupt]
|
| 95 |
+
Load[load_memory]
|
| 96 |
+
Save[create_memory]
|
| 97 |
+
end
|
| 98 |
+
|
| 99 |
+
Sup[Supervisor<br/>langgraph-supervisor]
|
| 100 |
+
|
| 101 |
+
subgraph Music[Music Catalog Sub-Agent]
|
| 102 |
+
MA[music_assistant<br/>ReAct]
|
| 103 |
+
MT[music_tool_node<br/>5 tools]
|
| 104 |
+
end
|
| 105 |
+
|
| 106 |
+
subgraph Invoice[Invoice Sub-Agent]
|
| 107 |
+
IR[create_react_agent<br/>4 tools]
|
| 108 |
+
end
|
| 109 |
+
|
| 110 |
+
DB[(Chinook SQLite<br/>in-memory)]
|
| 111 |
+
Store[(InMemoryStore<br/>per-customer memory)]
|
| 112 |
+
Ckpt[(MemorySaver<br/>per-thread history)]
|
| 113 |
+
|
| 114 |
+
User -->|message| UI
|
| 115 |
+
UI -->|graph.stream| Graph
|
| 116 |
+
Graph --> Verify
|
| 117 |
+
Verify -->|no customer_id| Human
|
| 118 |
+
Human --> Verify
|
| 119 |
+
Verify -->|verified| Load
|
| 120 |
+
Load --> Sup
|
| 121 |
+
Sup --> MA
|
| 122 |
+
Sup --> IR
|
| 123 |
+
MA <--> MT
|
| 124 |
+
MT --> DB
|
| 125 |
+
IR --> DB
|
| 126 |
+
Sup --> Save
|
| 127 |
+
Load --> Store
|
| 128 |
+
Save --> Store
|
| 129 |
+
Graph -. checkpoints .- Ckpt
|
| 130 |
+
Graph -->|final AIMessage| UI
|
| 131 |
+
UI --> User
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
### Agent Workflow
|
| 135 |
+
|
| 136 |
+
```mermaid
|
| 137 |
+
flowchart LR
|
| 138 |
+
Q[User Query] --> Sup[Supervisor]
|
| 139 |
+
Sup -->|music / catalog| M[Music Agent]
|
| 140 |
+
Sup -->|invoice / billing| I[Invoice Agent]
|
| 141 |
+
Sup -->|mixed| Both[Invoice first, then Music]
|
| 142 |
+
Sup -->|off-topic| Reject[Direct Refusal]
|
| 143 |
+
|
| 144 |
+
M -->|reason| M1[Pick Tool]
|
| 145 |
+
M1 -->|act| M2[Run SQL]
|
| 146 |
+
M2 -->|observe| M
|
| 147 |
+
M -->|done| R1[Music Answer]
|
| 148 |
+
|
| 149 |
+
I -->|reason| I1[Pick Tool]
|
| 150 |
+
I1 -->|act| I2[Run SQL]
|
| 151 |
+
I2 -->|observe| I
|
| 152 |
+
I -->|done| R2[Invoice Answer]
|
| 153 |
+
|
| 154 |
+
R1 --> Merge[Supervisor Merge]
|
| 155 |
+
R2 --> Merge
|
| 156 |
+
Both --> Merge
|
| 157 |
+
Reject --> Merge
|
| 158 |
+
Merge --> Out[Final Response]
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
---
|
| 162 |
+
|
| 163 |
+
## Pipeline Overview
|
| 164 |
+
|
| 165 |
+
End-to-end lifecycle of a single user turn:
|
| 166 |
+
|
| 167 |
+
```mermaid
|
| 168 |
+
flowchart LR
|
| 169 |
+
A[1. User Types] --> B[2. UI Streams to Graph]
|
| 170 |
+
B --> C[3. Verify Identity]
|
| 171 |
+
C -->|found| D[4. Load Memory]
|
| 172 |
+
C -->|not found| C2[3a. Interrupt + Ask]
|
| 173 |
+
C2 --> C
|
| 174 |
+
D --> E[5. Supervisor Routes]
|
| 175 |
+
E --> F[6. Sub-Agent ReAct Loop]
|
| 176 |
+
F --> G[7. SQL via Parameterized Query]
|
| 177 |
+
G --> F
|
| 178 |
+
F --> H[8. Merge Sub-Agent Outputs]
|
| 179 |
+
H --> I[9. Update Memory]
|
| 180 |
+
I --> J[10. Render Response in UI]
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
Each stage is implemented as a distinct LangGraph node. No stage can be skipped; no stage can run out of order. The checkpointer snapshots state after every node so an interrupt can resume exactly where it paused.
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## LangGraph State Machine
|
| 188 |
+
|
| 189 |
+
The outer graph is a strict finite state machine. It always enters at `verify_info` and always exits via `create_memory`.
|
| 190 |
+
|
| 191 |
+
```mermaid
|
| 192 |
+
stateDiagram-v2
|
| 193 |
+
[*] --> verify_info
|
| 194 |
+
verify_info --> human_input: should_interrupt == interrupt<br/>(customer_id is None)
|
| 195 |
+
human_input --> verify_info: user provides identifier
|
| 196 |
+
verify_info --> load_memory: should_interrupt == continue<br/>(customer_id set)
|
| 197 |
+
load_memory --> supervisor: preferences injected
|
| 198 |
+
supervisor --> create_memory: sub-agents returned
|
| 199 |
+
create_memory --> [*]
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
**Shared state** (`src/state.py`):
|
| 203 |
+
|
| 204 |
+
```python
|
| 205 |
+
class State(TypedDict):
|
| 206 |
+
customer_id: Optional[str]
|
| 207 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
| 208 |
+
loaded_memory: str
|
| 209 |
+
remaining_steps: RemainingSteps
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
**Supervisor subgraph** (built by `langgraph-supervisor.create_supervisor`) dispatches to one of the sub-agents and merges their responses into `messages` via the `add_messages` reducer.
|
| 213 |
+
|
| 214 |
+
**Music sub-agent subgraph** (hand-built, `src/agents/graph.py`):
|
| 215 |
+
|
| 216 |
+
```mermaid
|
| 217 |
+
stateDiagram-v2
|
| 218 |
+
[*] --> music_assistant
|
| 219 |
+
music_assistant --> music_tool_node: should_continue == continue<br/>(has tool_calls)
|
| 220 |
+
music_tool_node --> music_assistant
|
| 221 |
+
music_assistant --> [*]: should_continue == end<br/>(no tool_calls)
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
**Invoice sub-agent** is built via `langgraph.prebuilt.create_react_agent` - same pattern, wrapped for you.
|
| 225 |
+
|
| 226 |
+
---
|
| 227 |
+
|
| 228 |
+
## Data Flow
|
| 229 |
+
|
| 230 |
+
```mermaid
|
| 231 |
+
flowchart LR
|
| 232 |
+
subgraph Input
|
| 233 |
+
U[User Message]
|
| 234 |
+
end
|
| 235 |
+
|
| 236 |
+
subgraph State
|
| 237 |
+
S[State TypedDict<br/>customer_id · messages<br/>loaded_memory]
|
| 238 |
+
end
|
| 239 |
+
|
| 240 |
+
subgraph Agents
|
| 241 |
+
V[verify_info<br/>structured LLM + DB]
|
| 242 |
+
LM[load_memory<br/>Store read]
|
| 243 |
+
SUP[Supervisor LLM]
|
| 244 |
+
MUS[Music ReAct]
|
| 245 |
+
INV[Invoice ReAct]
|
| 246 |
+
CM[create_memory<br/>structured LLM + Store write]
|
| 247 |
+
end
|
| 248 |
+
|
| 249 |
+
subgraph Tools
|
| 250 |
+
T1[5 music_tools]
|
| 251 |
+
T2[4 invoice_tools]
|
| 252 |
+
end
|
| 253 |
+
|
| 254 |
+
subgraph Data
|
| 255 |
+
DB[(Chinook SQLite)]
|
| 256 |
+
STORE[(InMemoryStore)]
|
| 257 |
+
CKPT[(MemorySaver)]
|
| 258 |
+
end
|
| 259 |
+
|
| 260 |
+
subgraph Output
|
| 261 |
+
R[AIMessage]
|
| 262 |
+
UI2[Gradio Chatbot]
|
| 263 |
+
end
|
| 264 |
+
|
| 265 |
+
U --> S
|
| 266 |
+
S --> V
|
| 267 |
+
V --> DB
|
| 268 |
+
V --> S
|
| 269 |
+
S --> LM
|
| 270 |
+
LM --> STORE
|
| 271 |
+
LM --> S
|
| 272 |
+
S --> SUP
|
| 273 |
+
SUP --> MUS
|
| 274 |
+
SUP --> INV
|
| 275 |
+
MUS --> T1 --> DB
|
| 276 |
+
INV --> T2 --> DB
|
| 277 |
+
DB --> T1 --> MUS
|
| 278 |
+
DB --> T2 --> INV
|
| 279 |
+
MUS --> SUP
|
| 280 |
+
INV --> SUP
|
| 281 |
+
SUP --> CM
|
| 282 |
+
CM --> STORE
|
| 283 |
+
SUP --> R
|
| 284 |
+
R --> UI2
|
| 285 |
+
S -. persisted per turn .- CKPT
|
| 286 |
+
```
|
| 287 |
+
|
| 288 |
+
**Invariants:**
|
| 289 |
+
|
| 290 |
+
- Only `verify_info` writes `customer_id`. No sub-agent or tool mutates it.
|
| 291 |
+
- Only tools touch `DB`. Neither the supervisor nor the verifier executes SQL directly (except the targeted lookups in `verify_info`).
|
| 292 |
+
- `create_memory` only **unions** into `InMemoryStore`; deletions are not possible via this path.
|
| 293 |
+
|
| 294 |
+
---
|
| 295 |
+
|
| 296 |
+
## Technology Stack
|
| 297 |
+
|
| 298 |
+
| Layer | Technology | Version | Role |
|
| 299 |
+
|---|---|---|---|
|
| 300 |
+
| Language | Python | 3.12+ | Runtime |
|
| 301 |
+
| UI | Gradio | 5.29+ | Chat interface, streaming, interrupts |
|
| 302 |
+
| Agent Orchestration | LangGraph | 1.0+ | State machine, checkpointing, ToolNode |
|
| 303 |
+
| Supervisor | langgraph-supervisor | 0.0.20+ | Hierarchical routing |
|
| 304 |
+
| Prebuilt Agents | langgraph-prebuilt | 1.0+ | `create_react_agent`, `ToolNode` |
|
| 305 |
+
| LLM Integration | langchain-openai | 1.0+ | `ChatOpenAI` (any OpenAI-compatible API) |
|
| 306 |
+
| Core Framework | langchain + langchain-core + langchain-community | 1.0+ / 0.4+ | Messages, tools, SQLDatabase utility |
|
| 307 |
+
| Data Validation | Pydantic | v2+ | `UserInput`, `UserProfile` schemas |
|
| 308 |
+
| Database Engine | SQLAlchemy | 2.0+ | In-memory SQLite via `StaticPool` |
|
| 309 |
+
| Dataset | Chinook | - | Customers, invoices, tracks, albums, artists |
|
| 310 |
+
| Checkpointer | `langgraph.checkpoint.memory.MemorySaver` | - | Per-thread short-term state |
|
| 311 |
+
| Store | `langgraph.store.memory.InMemoryStore` | - | Per-customer long-term memory |
|
| 312 |
+
| Env Config | python-dotenv | 1.0+ | Loads `.env` |
|
| 313 |
+
| HTTP | requests | 2.31+ | One-shot SQL script fetch |
|
| 314 |
+
| Packaging | Docker | 3.12-slim | Reproducible deploy |
|
| 315 |
+
| Hosting | Hugging Face Spaces | - | YAML-frontmatter driven |
|
| 316 |
+
|
| 317 |
+
---
|
| 318 |
+
|
| 319 |
+
## Project Structure
|
| 320 |
+
|
| 321 |
+
```
|
| 322 |
+
Multi-Agent-Customer-Support/
|
| 323 |
+
├── app.py # Entry point (local + HF Spaces: module-level app for HF import)
|
| 324 |
+
├── Dockerfile # Python 3.12-slim, exposes :7860, runs app.py
|
| 325 |
+
├── requirements.txt # Pinned min versions
|
| 326 |
+
├── .env.example # OPENAI_API_KEY, MODEL_NAME, TEMPERATURE, PORT
|
| 327 |
+
├── Chinook_Sqlite.sql # Cached dataset (auto-downloaded if missing)
|
| 328 |
+
│
|
| 329 |
+
├── src/
|
| 330 |
+
│ ├── config.py # Settings class; reads env, sets logging format
|
| 331 |
+
│ ├── state.py # LangGraph State TypedDict
|
| 332 |
+
│ ├── models.py # Pydantic schemas: UserInput, UserProfile
|
| 333 |
+
│ │
|
| 334 |
+
│ ├── db/
|
| 335 |
+
│ │ └── database.py # SQLAlchemy engine, run_query_safe, normalize_phone, verify_database
|
| 336 |
+
│ │
|
| 337 |
+
│ ├── tools/
|
| 338 |
+
│ │ ├── music_catalog.py # 5 @tool functions (fuzzy SQL, deterministic sampling)
|
| 339 |
+
│ │ └── invoice.py # 4 @tool functions (customer-scoped queries)
|
| 340 |
+
│ │
|
| 341 |
+
│ ├── agents/
|
| 342 |
+
│ │ ├── prompts.py # All system prompts (supervisor, sub-agents, verification, memory)
|
| 343 |
+
│ │ ├── nodes.py # Graph node functions: verify_info, human_input, load_memory, create_memory, music_assistant, should_continue, should_interrupt
|
| 344 |
+
│ │ └── graph.py # build_graph(): assembles music subgraph, invoice ReAct, supervisor, outer graph
|
| 345 |
+
│ │
|
| 346 |
+
│ └── ui/
|
| 347 |
+
│ ├── app.py # Gradio Blocks, stream handler, status bar, reset button
|
| 348 |
+
│ └── styles.py # CUSTOM_CSS
|
| 349 |
+
│
|
| 350 |
+
└── tests/
|
| 351 |
+
├── conftest.py # DB fixture (session-scoped)
|
| 352 |
+
├── test_database.py # 11 tests: run_query_safe, normalize_phone, verify_database
|
| 353 |
+
└── test_tools.py # 17 tests: all 9 tool functions
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
**Key files to read in order when grokking the repo:**
|
| 357 |
+
|
| 358 |
+
1. `src/state.py` - the shared contract.
|
| 359 |
+
2. `src/agents/graph.py` - how the whole thing is wired.
|
| 360 |
+
3. `src/agents/nodes.py` - what each node does.
|
| 361 |
+
4. `src/agents/prompts.py` - the behavioral contract for every LLM call.
|
| 362 |
+
5. `src/tools/*.py` - the data surface.
|
| 363 |
+
6. `src/ui/app.py` - how streaming + interrupts are surfaced.
|
| 364 |
+
|
| 365 |
+
---
|
| 366 |
+
|
| 367 |
+
## Application Flow (Gradio UI)
|
| 368 |
+
|
| 369 |
+
```mermaid
|
| 370 |
+
sequenceDiagram
|
| 371 |
+
participant User
|
| 372 |
+
participant Gradio
|
| 373 |
+
participant Graph as LangGraph
|
| 374 |
+
participant Sub as Sub-Agent
|
| 375 |
+
participant SQL as SQLite
|
| 376 |
+
|
| 377 |
+
User->>Gradio: type message + Enter
|
| 378 |
+
Gradio->>Gradio: show_user_message (optimistic render)
|
| 379 |
+
Gradio->>Graph: graph.stream(input, thread_id)
|
| 380 |
+
Graph->>Graph: verify_info
|
| 381 |
+
alt customer not verified
|
| 382 |
+
Graph-->>Gradio: interrupt (snapshot.next set)
|
| 383 |
+
Gradio->>User: "Waiting for your input"
|
| 384 |
+
User->>Gradio: identifier
|
| 385 |
+
Gradio->>Graph: resume
|
| 386 |
+
end
|
| 387 |
+
Graph->>Graph: load_memory
|
| 388 |
+
Graph->>Sub: supervisor routes
|
| 389 |
+
Sub->>SQL: parameterized query
|
| 390 |
+
SQL-->>Sub: rows (JSON)
|
| 391 |
+
Sub-->>Graph: AIMessage
|
| 392 |
+
Graph->>Graph: create_memory (union preferences)
|
| 393 |
+
Graph-->>Gradio: final AIMessage + elapsed
|
| 394 |
+
Gradio->>User: assistant reply + status "Responded in N.Ns · Data sources: …"
|
| 395 |
+
```
|
| 396 |
+
|
| 397 |
+
Every browser session is assigned a UUID `thread_id` stored in `gr.State`. This scopes the checkpointer so concurrent users never see each other's turns. The "New Conversation" button just rotates the UUID.
|
| 398 |
+
|
| 399 |
+
---
|
| 400 |
+
|
| 401 |
+
## Pipeline Stages
|
| 402 |
+
|
| 403 |
+
Each node is a pure function over `State` (plus optional `store`/`config`). Stages in execution order:
|
| 404 |
+
|
| 405 |
+
### 1. `verify_info` - Identity Gate
|
| 406 |
+
- **Input:** `messages`, maybe existing `customer_id`.
|
| 407 |
+
- **If already verified:** no-op pass-through.
|
| 408 |
+
- **Else:** calls `llm.with_structured_output(UserInput)` to pull one identifier (ID / email / phone). Runs a parameterized SQL lookup:
|
| 409 |
+
- numeric → `CustomerId =`
|
| 410 |
+
- contains `@` → `LOWER(Email) =`
|
| 411 |
+
- else → `normalize_phone()` compared against normalized DB phones.
|
| 412 |
+
- **If found:** writes `customer_id` and a `SystemMessage` announcing the verified ID.
|
| 413 |
+
- **If not found:** invokes a polite re-prompt LLM using `VERIFICATION_PROMPT`.
|
| 414 |
+
|
| 415 |
+
### 2. `human_input` - Interrupt
|
| 416 |
+
- Calls LangGraph `interrupt("Please provide input.")`. The UI receives this via `snapshot.next` and pauses the turn. When the user replies, the graph resumes and loops back to `verify_info`.
|
| 417 |
+
|
| 418 |
+
### 3. `load_memory` - Preference Hydration
|
| 419 |
+
- Reads `("memory_profile", customer_id)` from `InMemoryStore`.
|
| 420 |
+
- Formats as `"Music Preferences: rock, AC/DC, jazz"` and sets `loaded_memory`.
|
| 421 |
+
- The music agent's prompt interpolates this string so it can personalize without re-asking.
|
| 422 |
+
|
| 423 |
+
### 4. `supervisor` - Hierarchical Router
|
| 424 |
+
- Built via `langgraph_supervisor.create_supervisor`.
|
| 425 |
+
- Routing rules (encoded in `SUPERVISOR_PROMPT`):
|
| 426 |
+
- music/catalog → `music_catalog_subagent`
|
| 427 |
+
- invoice/purchase/billing → `invoice_information_subagent`
|
| 428 |
+
- mixed → invoice first, then music
|
| 429 |
+
- off-topic → direct refusal, no sub-agent invoked
|
| 430 |
+
- Merges sub-agent outputs into a single coherent response. Never adds information not present in sub-agent outputs.
|
| 431 |
+
|
| 432 |
+
### 5a. `music_catalog_subagent` - Hand-Built ReAct
|
| 433 |
+
- Custom `StateGraph` with two nodes: `music_assistant` (LLM with bound tools) and `music_tool_node` (`ToolNode(music_tools)`).
|
| 434 |
+
- Conditional edge `should_continue` loops until there are no `tool_calls` left.
|
| 435 |
+
- System prompt is generated per call via `generate_music_assistant_prompt(loaded_memory)` so preferences are fresh.
|
| 436 |
+
|
| 437 |
+
### 5b. `invoice_information_subagent` - Prebuilt ReAct
|
| 438 |
+
- Built via `langgraph.prebuilt.create_react_agent(llm, tools=invoice_tools, prompt=INVOICE_SUBAGENT_PROMPT, state_schema=State)`.
|
| 439 |
+
- Prompt explicitly tells it to use the **verified** `customer_id` from the `SystemMessage`, not any ID the user mentions.
|
| 440 |
+
|
| 441 |
+
### 6. `create_memory` - Preference Capture
|
| 442 |
+
- Summarizes last 10 messages.
|
| 443 |
+
- `llm.with_structured_output(UserProfile)` extracts **explicit** preferences.
|
| 444 |
+
- Unions with existing preferences; if the LLM returns empty but there are existing preferences, the write is **skipped** (never erases).
|
| 445 |
+
- Writes back to `InMemoryStore`.
|
| 446 |
+
|
| 447 |
+
---
|
| 448 |
+
|
| 449 |
+
## Getting Started
|
| 450 |
+
|
| 451 |
+
### Prerequisites
|
| 452 |
+
|
| 453 |
+
- Python **3.12** (Python 3.13 is blocked on HF Spaces because Python 3.13 removes `audioop`).
|
| 454 |
+
- An OpenAI API key *or* any OpenAI-compatible endpoint (Groq, Together, Azure OpenAI, LM Studio, Ollama…).
|
| 455 |
+
- Git.
|
| 456 |
+
|
| 457 |
+
### Quick Start
|
| 458 |
+
|
| 459 |
+
```bash
|
| 460 |
+
# 1. Clone
|
| 461 |
+
git clone https://github.com/ANI-IN/Multi-Agent-Customer-Support.git
|
| 462 |
+
cd Multi-Agent-Customer-Support
|
| 463 |
+
|
| 464 |
+
# 2. Virtualenv
|
| 465 |
+
python3.12 -m venv venv
|
| 466 |
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
| 467 |
+
|
| 468 |
+
# 3. Deps
|
| 469 |
+
pip install -r requirements.txt
|
| 470 |
+
|
| 471 |
+
# 4. Config
|
| 472 |
+
cp .env.example .env
|
| 473 |
+
# edit .env → set OPENAI_API_KEY
|
| 474 |
+
|
| 475 |
+
# 5. Run
|
| 476 |
+
python app.py
|
| 477 |
+
# → http://localhost:7860
|
| 478 |
+
```
|
| 479 |
+
|
| 480 |
+
**First-run chat script:**
|
| 481 |
+
```
|
| 482 |
+
> My customer ID is 5
|
| 483 |
+
> What AC/DC albums do you have?
|
| 484 |
+
> Show me my most expensive purchase.
|
| 485 |
+
> I love rock music. # saved to memory
|
| 486 |
+
# ... restart the app, verify again as customer 5 ...
|
| 487 |
+
> What genres do you think I'd like?
|
| 488 |
+
```
|
| 489 |
+
|
| 490 |
+
### Sample Dataset
|
| 491 |
+
|
| 492 |
+
The app boots against the [Chinook sample database](https://github.com/lerocha/chinook-database), loaded into an in-memory SQLite instance via SQLAlchemy `StaticPool`. On first run it reads `Chinook_Sqlite.sql` from the repo root; if missing, it downloads and caches it.
|
| 493 |
+
|
| 494 |
+
| Table | Rows | Notes |
|
| 495 |
+
|---|---:|---|
|
| 496 |
+
| Customer | 59 | PII + `SupportRepId` FK to Employee |
|
| 497 |
+
| Employee | 8 | Support reps |
|
| 498 |
+
| Invoice | 412 | `CustomerId`, `InvoiceDate`, `Total` |
|
| 499 |
+
| InvoiceLine | 2,240 | Each row = one purchased track |
|
| 500 |
+
| Track | 3,503 | `AlbumId`, `GenreId`, `MediaTypeId`, `UnitPrice` |
|
| 501 |
+
| Album | 347 | `ArtistId` FK |
|
| 502 |
+
| Artist | 275 | - |
|
| 503 |
+
| Genre | 25 | - |
|
| 504 |
+
| MediaType | 5 | - |
|
| 505 |
+
| Playlist / PlaylistTrack | 18 / 8,715 | Not currently exposed as tools |
|
| 506 |
+
|
| 507 |
+
### Developer Commands
|
| 508 |
+
|
| 509 |
+
```bash
|
| 510 |
+
# Run app
|
| 511 |
+
python app.py
|
| 512 |
+
|
| 513 |
+
# Full test suite (28 tests)
|
| 514 |
+
pytest tests/ -v
|
| 515 |
+
|
| 516 |
+
# Only DB layer
|
| 517 |
+
pytest tests/test_database.py -v
|
| 518 |
+
|
| 519 |
+
# Only tools
|
| 520 |
+
pytest tests/test_tools.py -v
|
| 521 |
+
|
| 522 |
+
# Docker build + run
|
| 523 |
+
docker build -t music-support .
|
| 524 |
+
docker run -p 7860:7860 -e OPENAI_API_KEY=sk-... music-support
|
| 525 |
+
|
| 526 |
+
# Quick sanity check without the UI
|
| 527 |
+
python -c "from src.db.database import verify_database; print(verify_database())"
|
| 528 |
+
```
|
| 529 |
+
|
| 530 |
+
---
|
| 531 |
+
|
| 532 |
+
## Configuration
|
| 533 |
+
|
| 534 |
+
All configuration is env-driven. `src/config.py` loads `.env` once at import time.
|
| 535 |
+
|
| 536 |
+
| Variable | Required | Default | Purpose |
|
| 537 |
+
|---|---|---|---|
|
| 538 |
+
| `OPENAI_API_KEY` | yes | - | API key for the LLM provider |
|
| 539 |
+
| `OPENAI_API_BASE` | no | - | Override base URL for non-OpenAI providers |
|
| 540 |
+
| `MODEL_NAME` | no | `gpt-4o-mini` | Chat model name |
|
| 541 |
+
| `TEMPERATURE` | no | `0` | `0` = fully deterministic routing |
|
| 542 |
+
| `PORT` | no | `7860` | Gradio port |
|
| 543 |
+
|
| 544 |
+
### LLM Provider (Pick One)
|
| 545 |
+
|
| 546 |
+
The project uses `ChatOpenAI`, which speaks the OpenAI protocol. Any compatible provider works by setting `OPENAI_API_BASE`.
|
| 547 |
+
|
| 548 |
+
<details>
|
| 549 |
+
<summary><b>OpenAI (default)</b></summary>
|
| 550 |
+
|
| 551 |
+
```env
|
| 552 |
+
OPENAI_API_KEY=sk-...
|
| 553 |
+
MODEL_NAME=gpt-4o-mini
|
| 554 |
+
```
|
| 555 |
+
</details>
|
| 556 |
+
|
| 557 |
+
<details>
|
| 558 |
+
<summary><b>Groq</b></summary>
|
| 559 |
+
|
| 560 |
+
```env
|
| 561 |
+
OPENAI_API_BASE=https://api.groq.com/openai/v1
|
| 562 |
+
OPENAI_API_KEY=gsk_...
|
| 563 |
+
MODEL_NAME=llama-3.3-70b-versatile
|
| 564 |
+
```
|
| 565 |
+
</details>
|
| 566 |
+
|
| 567 |
+
<details>
|
| 568 |
+
<summary><b>Together AI</b></summary>
|
| 569 |
+
|
| 570 |
+
```env
|
| 571 |
+
OPENAI_API_BASE=https://api.together.xyz/v1
|
| 572 |
+
OPENAI_API_KEY=...
|
| 573 |
+
MODEL_NAME=meta-llama/Llama-3.3-70B-Instruct-Turbo
|
| 574 |
+
```
|
| 575 |
+
</details>
|
| 576 |
+
|
| 577 |
+
<details>
|
| 578 |
+
<summary><b>Azure OpenAI</b></summary>
|
| 579 |
+
|
| 580 |
+
```env
|
| 581 |
+
OPENAI_API_BASE=https://your-resource.openai.azure.com/
|
| 582 |
+
OPENAI_API_KEY=...
|
| 583 |
+
MODEL_NAME=your-deployment-name
|
| 584 |
+
```
|
| 585 |
+
</details>
|
| 586 |
+
|
| 587 |
+
<details>
|
| 588 |
+
<summary><b>Local (LM Studio / Ollama / vLLM)</b></summary>
|
| 589 |
+
|
| 590 |
+
```env
|
| 591 |
+
OPENAI_API_BASE=http://localhost:1234/v1
|
| 592 |
+
OPENAI_API_KEY=not-needed
|
| 593 |
+
MODEL_NAME=your-local-model
|
| 594 |
+
```
|
| 595 |
+
</details>
|
| 596 |
+
|
| 597 |
+
> **Tip:** The supervisor relies on the model following structured routing instructions. Models smaller than ~7B may degrade routing accuracy on mixed queries.
|
| 598 |
+
|
| 599 |
+
---
|
| 600 |
+
|
| 601 |
+
## Tools Reference
|
| 602 |
+
|
| 603 |
+
### Music Catalog Tools (`src/tools/music_catalog.py`)
|
| 604 |
+
|
| 605 |
+
| Tool | Signature | Returns |
|
| 606 |
+
|---|---|---|
|
| 607 |
+
| `get_albums_by_artist` | `(artist: str)` | Album rows, fuzzy `LIKE '%artist%'` |
|
| 608 |
+
| `get_tracks_by_artist` | `(artist: str)` | Total count + up to 20 full-detail tracks |
|
| 609 |
+
| `get_songs_by_genre` | `(genre: str)` | Total count + 1 track per artist (up to 10), deterministic via `ROW_NUMBER()` CTE |
|
| 610 |
+
| `check_for_songs` | `(song_title: str)` | Up to 10 full-detail matches on track name |
|
| 611 |
+
| `get_track_details` | `(track_id: str)` | Every column for one track including computed `SizeMB` |
|
| 612 |
+
|
| 613 |
+
### Invoice Tools (`src/tools/invoice.py`)
|
| 614 |
+
|
| 615 |
+
| Tool | Signature | Returns |
|
| 616 |
+
|---|---|---|
|
| 617 |
+
| `get_invoices_by_customer_sorted_by_date` | `(customer_id: str)` | All invoices DESC by date |
|
| 618 |
+
| `get_invoice_line_items_sorted_by_price` | `(customer_id: str)` | All purchased **tracks** (not invoices) DESC by unit price |
|
| 619 |
+
| `get_employee_by_invoice_and_customer` | `(invoice_id, customer_id)` | Support rep name / title / email |
|
| 620 |
+
| `get_invoice_line_items` | `(invoice_id, customer_id)` | Full track details for one invoice |
|
| 621 |
+
|
| 622 |
+
All tool inputs pass through `_safe_int()` for numeric args. All return values are either JSON-serialized row lists or a human-readable "not found" string. Empty results are never silently collapsed.
|
| 623 |
+
|
| 624 |
+
---
|
| 625 |
+
|
| 626 |
+
## Prompt Engineering & Anti-Hallucination
|
| 627 |
+
|
| 628 |
+
Grounding rules (applied to every sub-agent prompt in `src/agents/prompts.py`):
|
| 629 |
+
|
| 630 |
+
1. **Tool-only** - never answer from model memory; always call a tool first.
|
| 631 |
+
2. **Exact quoting** - no rounding, no estimating, no "about".
|
| 632 |
+
3. **Honest failures** - "I could not find that in our catalog." is the literal fallback.
|
| 633 |
+
4. **Scope boundaries** - each sub-agent explicitly refuses out-of-scope queries and defers.
|
| 634 |
+
5. **Truncation transparency** - when results are sampled (LIMIT), say so and include the total.
|
| 635 |
+
6. **No invented IDs** - the invoice agent is told to read the verified `customer_id` from the `SystemMessage`, not from user text.
|
| 636 |
+
|
| 637 |
+
Memory rules (`CREATE_MEMORY_PROMPT`):
|
| 638 |
+
|
| 639 |
+
- Only **explicit** statements count ("I love jazz" ✅; "Do you have jazz?" ❌).
|
| 640 |
+
- New preferences **merge** with existing (set union).
|
| 641 |
+
- If the LLM returns empty but prior preferences exist, **skip the write**.
|
| 642 |
+
|
| 643 |
+
---
|
| 644 |
+
|
| 645 |
+
## Security
|
| 646 |
+
|
| 647 |
+
| Threat | Mitigation |
|
| 648 |
+
|---|---|
|
| 649 |
+
| SQL injection | Every query uses SQLAlchemy `text(...)` with bound `:name` parameters. No f-strings or concatenation. |
|
| 650 |
+
| Cross-customer data leak | `customer_id` is set only by `verify_info` and passed via `SystemMessage`. Invoice tools require it as a typed parameter. |
|
| 651 |
+
| Hallucinated accounts | Verification looks up against the real `Customer` table; unknown IDs produce a polite retry prompt, not a pass-through. |
|
| 652 |
+
| Numeric crashes | `_safe_int()` wraps every numeric tool arg and returns a friendly error instead of propagating `ValueError`. |
|
| 653 |
+
| Phone format bypass | `normalize_phone()` strips non-digits (preserving `+`), so `+1 (555) 123-4567` matches `15551234567`. |
|
| 654 |
+
| Thread cross-talk | Gradio issues a UUID `thread_id` per session; the checkpointer is scoped to it. |
|
| 655 |
+
|
| 656 |
+
---
|
| 657 |
+
|
| 658 |
+
## Testing
|
| 659 |
+
|
| 660 |
+
**28 pytest tests**, all deterministic, all hit the real in-memory DB.
|
| 661 |
+
|
| 662 |
+
```bash
|
| 663 |
+
pytest tests/ -v
|
| 664 |
+
```
|
| 665 |
+
|
| 666 |
+
| File | Tests | Coverage |
|
| 667 |
+
|---|---:|---|
|
| 668 |
+
| `tests/test_database.py` | 11 | `run_query_safe` (happy path, params, empty, JSON shape), `normalize_phone` (intl, domestic, dashes, empty, None, `+` prefix), `verify_database` |
|
| 669 |
+
| `tests/test_tools.py` | 17 | All 5 music tools + all 4 invoice tools; found / not-found / invalid-id / determinism / DESC-date ordering |
|
| 670 |
+
|
| 671 |
+
Notable determinism check:
|
| 672 |
+
|
| 673 |
+
```python
|
| 674 |
+
def test_get_songs_by_genre_deterministic(self):
|
| 675 |
+
r1 = get_songs_by_genre.invoke({"genre": "Rock"})
|
| 676 |
+
r2 = get_songs_by_genre.invoke({"genre": "Rock"})
|
| 677 |
+
assert r1 == r2
|
| 678 |
+
```
|
| 679 |
+
|
| 680 |
+
---
|
| 681 |
+
|
| 682 |
+
## Deployment
|
| 683 |
+
|
| 684 |
+
### Hugging Face Spaces
|
| 685 |
+
|
| 686 |
+
This README's YAML frontmatter is the HF Spaces config. Push the repo to a Gradio Space, set `OPENAI_API_KEY` in **Settings → Repository Secrets**, and HF builds and runs `app.py` automatically. Python is pinned to 3.12 (3.13 breaks `audioop` imports pulled in transitively).
|
| 687 |
+
|
| 688 |
+
### Docker
|
| 689 |
+
|
| 690 |
+
```bash
|
| 691 |
+
docker build -t music-support .
|
| 692 |
+
docker run -d \
|
| 693 |
+
-p 7860:7860 \
|
| 694 |
+
-e OPENAI_API_KEY=sk-... \
|
| 695 |
+
-e MODEL_NAME=gpt-4o-mini \
|
| 696 |
+
--name music-support \
|
| 697 |
+
--restart unless-stopped \
|
| 698 |
+
music-support
|
| 699 |
+
```
|
| 700 |
+
|
| 701 |
+
### Docker Compose
|
| 702 |
+
|
| 703 |
+
```yaml
|
| 704 |
+
services:
|
| 705 |
+
music-support:
|
| 706 |
+
build: .
|
| 707 |
+
ports: ["7860:7860"]
|
| 708 |
+
environment:
|
| 709 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
| 710 |
+
- MODEL_NAME=gpt-4o-mini
|
| 711 |
+
- TEMPERATURE=0
|
| 712 |
+
restart: unless-stopped
|
| 713 |
+
```
|
| 714 |
+
|
| 715 |
+
---
|
| 716 |
+
|
| 717 |
+
## Troubleshooting
|
| 718 |
+
|
| 719 |
+
| Symptom | Cause | Fix |
|
| 720 |
+
|---|---|---|
|
| 721 |
+
| `ModuleNotFoundError: gradio` | Deps not installed | `pip install -r requirements.txt` |
|
| 722 |
+
| `ModuleNotFoundError: audioop` | Python 3.13 | Use Python 3.12 |
|
| 723 |
+
| `ImportError: HfFolder` | Old Gradio | `gradio>=5.29.0` |
|
| 724 |
+
| `TypeError: argument of type 'bool' is not iterable` | Gradio/client schema bug | `gradio>=5.29.0` |
|
| 725 |
+
| `OPENAI_API_KEY not set` | Missing `.env` | `cp .env.example .env` + edit |
|
| 726 |
+
| Downloads Chinook on every start | Cache file missing | First run caches it; subsequent runs read locally |
|
| 727 |
+
| Verification keeps failing | No matching customer | Try Customer ID `5` (known to exist); or a real Chinook email / phone |
|
| 728 |
+
|
| 729 |
+
---
|
| 730 |
+
|
| 731 |
+
## Roadmap
|
| 732 |
+
|
| 733 |
+
- Persistent storage: swap `MemorySaver` → `SqliteSaver`; swap `InMemoryStore` → Postgres- or Redis-backed store.
|
| 734 |
+
- Token streaming in the UI (currently streams at node-event granularity).
|
| 735 |
+
- Playlist and customer-profile tools (tables present, tools not yet exposed).
|
| 736 |
+
- Structured JSON logs with correlation IDs per `thread_id` + latency metrics.
|
| 737 |
+
- CI pipeline (GitHub Actions) running `pytest` on every PR.
|
| 738 |
+
- Per-session rate limiting at the UI boundary.
|
| 739 |
+
|
| 740 |
+
---
|
| 741 |
+
|
| 742 |
+
## License
|
| 743 |
+
|
| 744 |
+
MIT. See [LICENSE](LICENSE).
|
| 745 |
+
|
| 746 |
+
---
|
| 747 |
+
|
| 748 |
+
<div align="center">
|
| 749 |
+
<sub>Built and maintained by <b>Animesh Kumar</b> · LangGraph · Gradio · Chinook</sub>
|
| 750 |
+
</div>
|
app.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Entry point for Hugging Face Spaces and local development."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from src.ui.app import create_app
|
| 5 |
+
from src.config import settings
|
| 6 |
+
|
| 7 |
+
if __name__ == "__main__":
|
| 8 |
+
app = create_app()
|
| 9 |
+
app.launch(
|
| 10 |
+
server_name="0.0.0.0",
|
| 11 |
+
server_port=settings.port,
|
| 12 |
+
share=False,
|
| 13 |
+
show_error=True,
|
| 14 |
+
)
|
| 15 |
+
else:
|
| 16 |
+
# HF Spaces calls create_app() at import time
|
| 17 |
+
app = create_app()
|
| 18 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=5.29.0
|
| 2 |
+
langchain>=1.0.0
|
| 3 |
+
langchain-openai>=1.0.0
|
| 4 |
+
langchain-community>=0.4.0
|
| 5 |
+
langchain-core>=1.0.0
|
| 6 |
+
langgraph>=1.0.0
|
| 7 |
+
langgraph-checkpoint>=4.0.0
|
| 8 |
+
langgraph-prebuilt>=1.0.0
|
| 9 |
+
langgraph-supervisor>=0.0.20
|
| 10 |
+
sqlalchemy>=2.0.0
|
| 11 |
+
pydantic>=2.0.0
|
| 12 |
+
requests>=2.31.0
|
| 13 |
+
python-dotenv>=1.0.0
|