Spaces:
Running
Running
Commit Β·
dea56d6
0
Parent(s):
Initial commit made full working prodduct for the challeneg but forgot to maintain a repo so committed after everything's done
Browse files- .env.sample +12 -0
- .gitignore +44 -0
- Dockerfile +20 -0
- README.md +73 -0
- database.py +238 -0
- llm_pipeline.py +493 -0
- magicpin-ai-challenge/challenge-brief.md +544 -0
- magicpin-ai-challenge/challenge-testing-brief.md +557 -0
- magicpin-ai-challenge/dataset/categories/dentists.json +129 -0
- magicpin-ai-challenge/dataset/categories/gyms.json +125 -0
- magicpin-ai-challenge/dataset/categories/pharmacies.json +124 -0
- magicpin-ai-challenge/dataset/categories/restaurants.json +118 -0
- magicpin-ai-challenge/dataset/categories/salons.json +125 -0
- magicpin-ai-challenge/dataset/customers_seed.json +140 -0
- magicpin-ai-challenge/dataset/generate_dataset.py +312 -0
- magicpin-ai-challenge/dataset/merchants_seed.json +314 -0
- magicpin-ai-challenge/dataset/triggers_seed.json +180 -0
- magicpin-ai-challenge/engagement-design.md +325 -0
- magicpin-ai-challenge/engagement-research.md +198 -0
- magicpin-ai-challenge/examples/api-call-examples.md +615 -0
- magicpin-ai-challenge/examples/case-studies.md +338 -0
- magicpin-ai-challenge/judge_simulator.py +962 -0
- main.py +271 -0
- requirements.txt +5 -0
- security.py +93 -0
.env.sample
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ Vera Message Engine Environment ββ
|
| 2 |
+
# Copy this file to .env and fill in your keys.
|
| 3 |
+
|
| 4 |
+
# Cerebras API (Diagnostician β llama3.1-8b)
|
| 5 |
+
CEREBRAS_API_KEY=your_cerebras_api_key_here
|
| 6 |
+
|
| 7 |
+
# Groq API (Copywriter β llama-3.3-70b-versatile + Prompt Guard)
|
| 8 |
+
GROQ_API_KEY=your_groq_api_key_here
|
| 9 |
+
|
| 10 |
+
# Server
|
| 11 |
+
HOST=0.0.0.0
|
| 12 |
+
PORT=8000
|
.gitignore
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
*.egg-info/
|
| 7 |
+
dist/
|
| 8 |
+
build/
|
| 9 |
+
*.egg
|
| 10 |
+
.eggs/
|
| 11 |
+
|
| 12 |
+
# Virtual environments
|
| 13 |
+
venv/
|
| 14 |
+
.venv/
|
| 15 |
+
env/
|
| 16 |
+
ENV/
|
| 17 |
+
|
| 18 |
+
# Environment variables
|
| 19 |
+
.env
|
| 20 |
+
|
| 21 |
+
# IDE
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
| 24 |
+
*.swp
|
| 25 |
+
*.swo
|
| 26 |
+
*~
|
| 27 |
+
|
| 28 |
+
# Database
|
| 29 |
+
*.db
|
| 30 |
+
*.sqlite3
|
| 31 |
+
|
| 32 |
+
# OS
|
| 33 |
+
.DS_Store
|
| 34 |
+
Thumbs.db
|
| 35 |
+
|
| 36 |
+
# Logs
|
| 37 |
+
*.log
|
| 38 |
+
|
| 39 |
+
# Docker
|
| 40 |
+
.dockerignore
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# Personal files
|
| 44 |
+
Context/
|
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
|
| 6 |
+
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
COPY database.py security.py llm_pipeline.py main.py ./
|
| 11 |
+
|
| 12 |
+
RUN mkdir -p /app/data
|
| 13 |
+
ENV VERA_DB_PATH=/app/data/vera_state.db
|
| 14 |
+
|
| 15 |
+
EXPOSE 8000
|
| 16 |
+
|
| 17 |
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
| 18 |
+
CMD curl -f http://localhost:8000/v1/healthz || exit 1
|
| 19 |
+
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "info", "--workers", "1"]
|
README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vera Message Engine
|
| 2 |
+
|
| 3 |
+
**Stateful, deterministic message engine for merchant engagement β magicpin AI Challenge 2026**
|
| 4 |
+
|
| 5 |
+
## Architecture
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
Request β Payload Limiter β Security Shield (Prompt Guard) β State Manager (SQLite) β Tri-Model Pipeline β Response
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
### Tri-Model Pipeline
|
| 12 |
+
|
| 13 |
+
| Step | Provider | Model | Role |
|
| 14 |
+
|------|----------|-------|------|
|
| 15 |
+
| 1. Diagnostician | Cerebras | llama3.1-8b | Extracts the ONE critical signal from merchant state |
|
| 16 |
+
| 2. Copywriter | Groq | llama-3.3-70b-versatile | Crafts high-compulsion message with specific CTA |
|
| 17 |
+
| Shield | Groq | llama-prompt-guard-2-86m | Blocks prompt injections before DB/LLM access |
|
| 18 |
+
|
| 19 |
+
### Category Routing (Pillar 5)
|
| 20 |
+
|
| 21 |
+
Dynamic system prompts tuned to merchant vertical:
|
| 22 |
+
- **Dentists/Pharmacies**: Clinical, utility-first, compliance-aware
|
| 23 |
+
- **Salons**: Visual, timely, aesthetic-focused
|
| 24 |
+
- **Restaurants**: Urgent, occasion-driven, locally grounded
|
| 25 |
+
- **Gyms**: Motivational, seasonal-dip reframing
|
| 26 |
+
|
| 27 |
+
## Setup
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
# 1. Clone and enter the project
|
| 31 |
+
cd VeraAgent
|
| 32 |
+
|
| 33 |
+
# 2. Copy and fill environment variables
|
| 34 |
+
cp .env.sample .env
|
| 35 |
+
# Edit .env with your Cerebras and Groq API keys
|
| 36 |
+
|
| 37 |
+
# 3. Install dependencies
|
| 38 |
+
pip install -r requirements.txt
|
| 39 |
+
|
| 40 |
+
# 4. Run the server
|
| 41 |
+
python main.py
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## Docker
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
docker build -t vera-engine .
|
| 48 |
+
docker run -p 8000:8000 --env-file .env vera-engine
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## Endpoints
|
| 52 |
+
|
| 53 |
+
| Method | Path | Description |
|
| 54 |
+
|--------|------|-------------|
|
| 55 |
+
| GET | `/v1/healthz` | Health check (200 OK) |
|
| 56 |
+
| GET | `/v1/metadata` | Bot identity and capabilities |
|
| 57 |
+
| POST | `/v1/context` | Idempotent merchant context ingestion |
|
| 58 |
+
| POST | `/v1/tick` | Time simulation + proactive message generation |
|
| 59 |
+
| POST | `/v1/reply` | Reply handling + contextual response generation |
|
| 60 |
+
|
| 61 |
+
## Design Decisions
|
| 62 |
+
|
| 63 |
+
- **SQLite with WAL mode**: Zero-latency embedded state that survives container restarts
|
| 64 |
+
- **Temperature = 0.0**: All LLM calls are fully deterministic
|
| 65 |
+
- **Fail-open security**: If Prompt Guard is unreachable, requests are allowed through (logged)
|
| 66 |
+
- **Version-gated upserts**: Context updates are idempotent β same or lower version is a no-op
|
| 67 |
+
- **Fallback pipeline**: If any LLM is unavailable, the system uses heuristic signal extraction and template-based messages grounded in real merchant data
|
| 68 |
+
|
| 69 |
+
## Tradeoffs
|
| 70 |
+
|
| 71 |
+
1. **Single worker**: SQLite requires single-writer access. Traded concurrency for data integrity.
|
| 72 |
+
2. **Synchronous LLM calls in executor**: Cerebras/Groq SDKs are synchronous; wrapped in `run_in_executor` to avoid blocking the event loop.
|
| 73 |
+
3. **Prompt Guard fail-open**: Chose availability over strict security β a blocked legitimate request costs more than a logged suspicious one.
|
database.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
database.py β Vera Message Engine
|
| 3 |
+
Embedded SQLite state management matching the exact judge harness contract.
|
| 4 |
+
|
| 5 |
+
Schema:
|
| 6 |
+
contexts β (scope, context_id) composite PK, version-gated upserts
|
| 7 |
+
conversations β append-only turn log keyed by conversation_id
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sqlite3
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
import logging
|
| 14 |
+
from datetime import datetime, timezone
|
| 15 |
+
from typing import Optional, Dict, Any, List, Tuple
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger("vera.database")
|
| 18 |
+
|
| 19 |
+
DB_PATH = os.getenv("VERA_DB_PATH", "vera_state.db")
|
| 20 |
+
|
| 21 |
+
_conn: Optional[sqlite3.Connection] = None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def get_db() -> sqlite3.Connection:
|
| 25 |
+
"""Return the singleton database connection, initializing if needed."""
|
| 26 |
+
global _conn
|
| 27 |
+
if _conn is None:
|
| 28 |
+
_conn = sqlite3.connect(DB_PATH, check_same_thread=False)
|
| 29 |
+
_conn.row_factory = sqlite3.Row
|
| 30 |
+
_conn.execute("PRAGMA journal_mode=WAL")
|
| 31 |
+
_conn.execute("PRAGMA synchronous=NORMAL")
|
| 32 |
+
_conn.execute("PRAGMA busy_timeout=5000")
|
| 33 |
+
_init_schema(_conn)
|
| 34 |
+
logger.info("Database initialized at %s", DB_PATH)
|
| 35 |
+
return _conn
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def close_db():
|
| 39 |
+
"""Gracefully close the database connection."""
|
| 40 |
+
global _conn
|
| 41 |
+
if _conn is not None:
|
| 42 |
+
_conn.close()
|
| 43 |
+
_conn = None
|
| 44 |
+
logger.info("Database connection closed")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _init_schema(conn: sqlite3.Connection):
|
| 48 |
+
conn.executescript("""
|
| 49 |
+
CREATE TABLE IF NOT EXISTS contexts (
|
| 50 |
+
scope TEXT NOT NULL,
|
| 51 |
+
context_id TEXT NOT NULL,
|
| 52 |
+
version INTEGER NOT NULL DEFAULT 0,
|
| 53 |
+
payload TEXT NOT NULL DEFAULT '{}',
|
| 54 |
+
delivered_at TEXT,
|
| 55 |
+
stored_at TEXT NOT NULL,
|
| 56 |
+
PRIMARY KEY (scope, context_id)
|
| 57 |
+
);
|
| 58 |
+
|
| 59 |
+
CREATE TABLE IF NOT EXISTS conversations (
|
| 60 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 61 |
+
conversation_id TEXT NOT NULL,
|
| 62 |
+
turn_number INTEGER NOT NULL DEFAULT 0,
|
| 63 |
+
role TEXT NOT NULL,
|
| 64 |
+
message TEXT NOT NULL DEFAULT '',
|
| 65 |
+
timestamp TEXT NOT NULL
|
| 66 |
+
);
|
| 67 |
+
|
| 68 |
+
CREATE INDEX IF NOT EXISTS idx_conv_id ON conversations(conversation_id);
|
| 69 |
+
""")
|
| 70 |
+
conn.commit()
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# βββ Context CRUD βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
|
| 75 |
+
def upsert_context(
|
| 76 |
+
scope: str,
|
| 77 |
+
context_id: str,
|
| 78 |
+
version: int,
|
| 79 |
+
payload: Dict[str, Any],
|
| 80 |
+
delivered_at: Optional[str] = None,
|
| 81 |
+
) -> Dict[str, Any]:
|
| 82 |
+
"""
|
| 83 |
+
Idempotent context upsert matching judge contract:
|
| 84 |
+
- If incoming version > stored version β replace atomically, return accepted=True
|
| 85 |
+
- If incoming version <= stored version β return accepted=False + 409 (stale)
|
| 86 |
+
- If context_id is new β insert, return accepted=True
|
| 87 |
+
"""
|
| 88 |
+
db = get_db()
|
| 89 |
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
| 90 |
+
|
| 91 |
+
row = db.execute(
|
| 92 |
+
"SELECT version FROM contexts WHERE scope = ? AND context_id = ?",
|
| 93 |
+
(scope, context_id),
|
| 94 |
+
).fetchone()
|
| 95 |
+
|
| 96 |
+
if row is not None:
|
| 97 |
+
current_version = row["version"]
|
| 98 |
+
if version <= current_version:
|
| 99 |
+
# Stale or duplicate β return 409 per judge contract
|
| 100 |
+
return {
|
| 101 |
+
"accepted": False,
|
| 102 |
+
"reason": "stale_version",
|
| 103 |
+
"current_version": current_version,
|
| 104 |
+
"status_code": 409,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
payload_json = json.dumps(payload, ensure_ascii=False)
|
| 108 |
+
|
| 109 |
+
db.execute(
|
| 110 |
+
"""
|
| 111 |
+
INSERT INTO contexts (scope, context_id, version, payload, delivered_at, stored_at)
|
| 112 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
| 113 |
+
ON CONFLICT(scope, context_id) DO UPDATE SET
|
| 114 |
+
version = excluded.version,
|
| 115 |
+
payload = excluded.payload,
|
| 116 |
+
delivered_at = excluded.delivered_at,
|
| 117 |
+
stored_at = excluded.stored_at
|
| 118 |
+
""",
|
| 119 |
+
(scope, context_id, version, payload_json, delivered_at, now),
|
| 120 |
+
)
|
| 121 |
+
db.commit()
|
| 122 |
+
|
| 123 |
+
ack_id = f"ack_{context_id}_v{version}"
|
| 124 |
+
return {
|
| 125 |
+
"accepted": True,
|
| 126 |
+
"ack_id": ack_id,
|
| 127 |
+
"stored_at": now,
|
| 128 |
+
"status_code": 200,
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def get_context(scope: str, context_id: str) -> Optional[Dict[str, Any]]:
|
| 133 |
+
"""Retrieve a single context entry."""
|
| 134 |
+
db = get_db()
|
| 135 |
+
row = db.execute(
|
| 136 |
+
"SELECT * FROM contexts WHERE scope = ? AND context_id = ?",
|
| 137 |
+
(scope, context_id),
|
| 138 |
+
).fetchone()
|
| 139 |
+
if row is None:
|
| 140 |
+
return None
|
| 141 |
+
return {
|
| 142 |
+
"scope": row["scope"],
|
| 143 |
+
"context_id": row["context_id"],
|
| 144 |
+
"version": row["version"],
|
| 145 |
+
"payload": json.loads(row["payload"]),
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def get_all_contexts_by_scope(scope: str) -> List[Dict[str, Any]]:
|
| 150 |
+
"""Get all contexts for a given scope."""
|
| 151 |
+
db = get_db()
|
| 152 |
+
rows = db.execute(
|
| 153 |
+
"SELECT context_id, version, payload FROM contexts WHERE scope = ?",
|
| 154 |
+
(scope,),
|
| 155 |
+
).fetchall()
|
| 156 |
+
return [
|
| 157 |
+
{
|
| 158 |
+
"context_id": r["context_id"],
|
| 159 |
+
"version": r["version"],
|
| 160 |
+
"payload": json.loads(r["payload"]),
|
| 161 |
+
}
|
| 162 |
+
for r in rows
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def count_contexts() -> Dict[str, int]:
|
| 167 |
+
"""Count contexts per scope β used by /v1/healthz."""
|
| 168 |
+
db = get_db()
|
| 169 |
+
rows = db.execute(
|
| 170 |
+
"SELECT scope, COUNT(*) as cnt FROM contexts GROUP BY scope"
|
| 171 |
+
).fetchall()
|
| 172 |
+
counts = {"category": 0, "merchant": 0, "customer": 0, "trigger": 0}
|
| 173 |
+
for r in rows:
|
| 174 |
+
counts[r["scope"]] = r["cnt"]
|
| 175 |
+
return counts
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def wipe_all():
|
| 179 |
+
"""Teardown β wipe all state."""
|
| 180 |
+
db = get_db()
|
| 181 |
+
db.execute("DELETE FROM contexts")
|
| 182 |
+
db.execute("DELETE FROM conversations")
|
| 183 |
+
db.commit()
|
| 184 |
+
logger.info("All state wiped (teardown)")
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# βββ Conversation CRUD ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
|
| 189 |
+
def append_turn(
|
| 190 |
+
conversation_id: str,
|
| 191 |
+
turn_number: int,
|
| 192 |
+
role: str,
|
| 193 |
+
message: str,
|
| 194 |
+
):
|
| 195 |
+
"""Append a turn to a conversation."""
|
| 196 |
+
db = get_db()
|
| 197 |
+
now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
| 198 |
+
db.execute(
|
| 199 |
+
"""
|
| 200 |
+
INSERT INTO conversations (conversation_id, turn_number, role, message, timestamp)
|
| 201 |
+
VALUES (?, ?, ?, ?, ?)
|
| 202 |
+
""",
|
| 203 |
+
(conversation_id, turn_number, role, message, now),
|
| 204 |
+
)
|
| 205 |
+
db.commit()
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def get_conversation(conversation_id: str) -> List[Dict[str, Any]]:
|
| 209 |
+
"""Retrieve all turns for a conversation in chronological order."""
|
| 210 |
+
db = get_db()
|
| 211 |
+
rows = db.execute(
|
| 212 |
+
"""
|
| 213 |
+
SELECT turn_number, role, message, timestamp
|
| 214 |
+
FROM conversations
|
| 215 |
+
WHERE conversation_id = ?
|
| 216 |
+
ORDER BY turn_number ASC, id ASC
|
| 217 |
+
""",
|
| 218 |
+
(conversation_id,),
|
| 219 |
+
).fetchall()
|
| 220 |
+
return [
|
| 221 |
+
{
|
| 222 |
+
"turn_number": r["turn_number"],
|
| 223 |
+
"role": r["role"],
|
| 224 |
+
"message": r["message"],
|
| 225 |
+
"timestamp": r["timestamp"],
|
| 226 |
+
}
|
| 227 |
+
for r in rows
|
| 228 |
+
]
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def conversation_exists(conversation_id: str) -> bool:
|
| 232 |
+
"""Check if a conversation already has turns logged."""
|
| 233 |
+
db = get_db()
|
| 234 |
+
row = db.execute(
|
| 235 |
+
"SELECT 1 FROM conversations WHERE conversation_id = ? LIMIT 1",
|
| 236 |
+
(conversation_id,),
|
| 237 |
+
).fetchone()
|
| 238 |
+
return row is not None
|
llm_pipeline.py
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
llm_pipeline.py β Vera Message Engine
|
| 3 |
+
Tri-Model Pipeline: Diagnostician (Cerebras) β Copywriter (Groq)
|
| 4 |
+
+ Category Voice Routing (Pillar 5)
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os, json, logging, re, hashlib, uuid
|
| 8 |
+
import requests
|
| 9 |
+
from typing import Dict, Any, Optional, List
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger("vera.llm_pipeline")
|
| 12 |
+
|
| 13 |
+
CEREBRAS_API_KEY = os.getenv("CEREBRAS_API_KEY", "")
|
| 14 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 15 |
+
CEREBRAS_URL = "https://api.cerebras.ai/v1/chat/completions"
|
| 16 |
+
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 17 |
+
DIAG_MODEL = "llama3.1-8b"
|
| 18 |
+
COPY_MODEL = "llama-3.3-70b-versatile"
|
| 19 |
+
|
| 20 |
+
# βββ Category Voice Templates (Pillar 5) βββββββββββββββββββββββββββββββββββββ
|
| 21 |
+
|
| 22 |
+
CATEGORY_VOICES = {
|
| 23 |
+
"dentists": "You are Vera, a clinical growth assistant. Tone: peer-clinical, collegial. Use 'Dr. {name}'. Reference JIDA/DCI sources. Avoid: 'guaranteed','100% safe','cure'. Focus on clinical credibility, patient recall, treatment conversion.",
|
| 24 |
+
"salons": "You are Vera, a beauty growth assistant. Tone: warm, visual, timely. Use first names. Reference style trends, bridal seasons. Focus on bookings, aesthetic results, stylist expertise.",
|
| 25 |
+
"restaurants": "You are Vera, a restaurant growth assistant. Tone: operator-to-operator, urgent. Reference footfall, match days, thali combos. Focus on event tie-ins, order volume, capacity.",
|
| 26 |
+
"gyms": "You are Vera, a fitness growth assistant. Tone: coaching, motivational. Address seasonal dips directly. Reference member counts, churn, trial conversion. Focus on retention and reactivation.",
|
| 27 |
+
"pharmacies": "You are Vera, a pharmacy growth assistant. Tone: clinical, trustworthy, precise. Never make medical claims. Reference stock alerts, refill cycles, compliance. Focus on patient care continuity.",
|
| 28 |
+
}
|
| 29 |
+
DEFAULT_VOICE = "You are Vera, magicpin's merchant growth assistant. Be specific, data-driven, action-oriented. Use real numbers from context. One clear CTA."
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _get_voice(category_slug: str, category_payload: Optional[Dict] = None) -> str:
|
| 33 |
+
"""Get category voice, enriching with actual voice data from context if available."""
|
| 34 |
+
base = CATEGORY_VOICES.get(category_slug, DEFAULT_VOICE)
|
| 35 |
+
if category_payload and "voice" in category_payload:
|
| 36 |
+
v = category_payload["voice"]
|
| 37 |
+
tone = v.get("tone", "")
|
| 38 |
+
taboos = v.get("vocab_taboo", v.get("taboos", []))
|
| 39 |
+
if tone:
|
| 40 |
+
base += f" Tone style: {tone}."
|
| 41 |
+
if taboos:
|
| 42 |
+
base += f" NEVER use these words/phrases: {', '.join(taboos[:8])}."
|
| 43 |
+
return base
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# βββ Step 1: Diagnostician (Cerebras llama3.1-8b) ββββββββββββββββββββββββββββ
|
| 47 |
+
|
| 48 |
+
DIAG_SYSTEM = """You are an expert business signal analyst. Given merchant context (category, merchant, trigger, customer), identify the SINGLE most critical signal for the next message.
|
| 49 |
+
|
| 50 |
+
Output ONLY this JSON β no markdown, no explanation:
|
| 51 |
+
{"signal": "<best_signal_type>", "signal_detail": "<why this signal matters now>", "best_offer": "<offer_id or title to pair, or null>", "key_fact": "<the ONE specific number/fact to anchor the message>", "merchant_name": "<name>", "owner_name": "<owner first name>"}"""
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _run_diagnostician(context_bundle: str) -> Dict[str, Any]:
|
| 55 |
+
"""Call Cerebras llama3.1-8b to extract the critical signal."""
|
| 56 |
+
if not CEREBRAS_API_KEY:
|
| 57 |
+
logger.warning("CEREBRAS_API_KEY not set β using heuristic signal extraction")
|
| 58 |
+
return {}
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
resp = requests.post(CEREBRAS_URL, headers={
|
| 62 |
+
"Authorization": f"Bearer {CEREBRAS_API_KEY}",
|
| 63 |
+
"Content-Type": "application/json",
|
| 64 |
+
}, json={
|
| 65 |
+
"model": DIAG_MODEL,
|
| 66 |
+
"messages": [
|
| 67 |
+
{"role": "system", "content": DIAG_SYSTEM},
|
| 68 |
+
{"role": "user", "content": context_bundle},
|
| 69 |
+
],
|
| 70 |
+
"temperature": 0.0, "max_tokens": 400, "top_p": 1.0,
|
| 71 |
+
}, timeout=15)
|
| 72 |
+
|
| 73 |
+
if resp.status_code != 200:
|
| 74 |
+
logger.error("Cerebras %d: %s", resp.status_code, resp.text[:300])
|
| 75 |
+
return {}
|
| 76 |
+
|
| 77 |
+
raw = resp.json()["choices"][0]["message"]["content"].strip()
|
| 78 |
+
return _parse_json(raw) or {}
|
| 79 |
+
except Exception as e:
|
| 80 |
+
logger.error("Diagnostician error: %s", e)
|
| 81 |
+
return {}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# βββ Step 2: Copywriter (Groq llama-3.3-70b-versatile) βββββββββββββββββββββββ
|
| 85 |
+
|
| 86 |
+
def _build_copy_prompt(signal: Dict, context_bundle: str, is_reply: bool = False, reply_msg: str = "", conv_history: str = "") -> str:
|
| 87 |
+
parts = []
|
| 88 |
+
if is_reply:
|
| 89 |
+
parts.append(f'The merchant/customer said: "{reply_msg}"')
|
| 90 |
+
if conv_history:
|
| 91 |
+
parts.append(f"Conversation so far:\n{conv_history}")
|
| 92 |
+
parts.append("Craft a contextual reply. Acknowledge their message. Stay grounded in the signal.")
|
| 93 |
+
parts.append("If they said 'not interested' or hostile β action should be 'end'.")
|
| 94 |
+
parts.append("If it's an auto-reply (canned 'thank you for contacting') β action should be 'wait' with wait_seconds.")
|
| 95 |
+
parts.append("If they committed ('ok let's do it') β switch to ACTION mode, not more questions.")
|
| 96 |
+
parts.append("If off-topic (GST, unrelated) β politely decline and redirect to your signal.")
|
| 97 |
+
else:
|
| 98 |
+
parts.append("Craft a proactive outreach message for this merchant.")
|
| 99 |
+
parts.append("High compulsion, specific benchmark from their data, real offer, ONE clear CTA.")
|
| 100 |
+
|
| 101 |
+
parts.append(f"\nSignal: {json.dumps(signal)}")
|
| 102 |
+
parts.append(f"\nFull context:\n{context_bundle[:6000]}")
|
| 103 |
+
|
| 104 |
+
if is_reply:
|
| 105 |
+
parts.append("""
|
| 106 |
+
Output ONLY this JSON:
|
| 107 |
+
{"action": "send|wait|end", "body": "<message>", "cta": "<open_ended|binary_yes_no|multi_choice_slot|binary_confirm_cancel|none>", "rationale": "<why>", "wait_seconds": <int or null>}
|
| 108 |
+
|
| 109 |
+
Rules:
|
| 110 |
+
- "send": you have a message to send
|
| 111 |
+
- "wait": back off (set wait_seconds)
|
| 112 |
+
- "end": close conversation gracefully
|
| 113 |
+
- body: specific, grounded, no fabricated facts, no URLs
|
| 114 |
+
- If action is "end" or "wait", body can be empty or a short closing line""")
|
| 115 |
+
else:
|
| 116 |
+
parts.append("""
|
| 117 |
+
Output ONLY this JSON:
|
| 118 |
+
{"body": "<message text>", "cta": "<open_ended|binary_yes_no|multi_choice_slot|binary_confirm_cancel>", "send_as": "<vera|merchant_on_behalf>", "rationale": "<1-2 sentences>", "template_name": "<short_template_id>", "template_params": ["<param1>", "<param2>"]}
|
| 119 |
+
|
| 120 |
+
Rules:
|
| 121 |
+
- body MUST reference specific numbers/offers/facts from the context
|
| 122 |
+
- NEVER fabricate data not in the context
|
| 123 |
+
- NO URLs in body
|
| 124 |
+
- One clear CTA
|
| 125 |
+
- send_as: use "merchant_on_behalf" for customer-scoped triggers, "vera" otherwise""")
|
| 126 |
+
|
| 127 |
+
return "\n".join(parts)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _run_copywriter(system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
| 131 |
+
"""Call Groq llama-3.3-70b-versatile to draft the message."""
|
| 132 |
+
if not GROQ_API_KEY:
|
| 133 |
+
logger.warning("GROQ_API_KEY not set β using fallback message")
|
| 134 |
+
return {}
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
resp = requests.post(GROQ_URL, headers={
|
| 138 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 139 |
+
"Content-Type": "application/json",
|
| 140 |
+
}, json={
|
| 141 |
+
"model": COPY_MODEL,
|
| 142 |
+
"messages": [
|
| 143 |
+
{"role": "system", "content": system_prompt},
|
| 144 |
+
{"role": "user", "content": user_prompt},
|
| 145 |
+
],
|
| 146 |
+
"temperature": 0.0, "max_tokens": 1024, "top_p": 1.0,
|
| 147 |
+
}, timeout=20)
|
| 148 |
+
|
| 149 |
+
if resp.status_code != 200:
|
| 150 |
+
logger.error("Groq Copywriter %d: %s", resp.status_code, resp.text[:300])
|
| 151 |
+
return {}
|
| 152 |
+
|
| 153 |
+
raw = resp.json()["choices"][0]["message"]["content"].strip()
|
| 154 |
+
return _parse_json(raw) or {}
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.error("Copywriter error: %s", e)
|
| 157 |
+
return {}
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
# βββ Context Bundle Builder ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 161 |
+
|
| 162 |
+
def build_context_bundle(category: Dict, merchant: Dict, trigger: Dict, customer: Optional[Dict] = None) -> str:
|
| 163 |
+
"""Serialize the 4-context framework into a single text block for LLM input."""
|
| 164 |
+
parts = []
|
| 165 |
+
parts.append(f"=== CATEGORY ({category.get('slug','?')}) ===")
|
| 166 |
+
voice = category.get("voice", {})
|
| 167 |
+
parts.append(f"Tone: {voice.get('tone','?')}")
|
| 168 |
+
taboos = voice.get("vocab_taboo", voice.get("taboos", []))
|
| 169 |
+
if taboos:
|
| 170 |
+
parts.append(f"Taboos: {taboos}")
|
| 171 |
+
digest = category.get("digest", [])
|
| 172 |
+
if digest:
|
| 173 |
+
parts.append(f"Digest items: {json.dumps(digest[:3])}")
|
| 174 |
+
offers_cat = category.get("offer_catalog", [])
|
| 175 |
+
if offers_cat:
|
| 176 |
+
parts.append(f"Category offers: {json.dumps(offers_cat[:4])}")
|
| 177 |
+
peers = category.get("peer_stats", {})
|
| 178 |
+
if peers:
|
| 179 |
+
parts.append(f"Peer stats: {json.dumps(peers)}")
|
| 180 |
+
seasonal = category.get("seasonal_beats", [])
|
| 181 |
+
if seasonal:
|
| 182 |
+
parts.append(f"Seasonal: {json.dumps(seasonal[:3])}")
|
| 183 |
+
trends = category.get("trend_signals", [])
|
| 184 |
+
if trends:
|
| 185 |
+
parts.append(f"Trends: {json.dumps(trends[:3])}")
|
| 186 |
+
|
| 187 |
+
parts.append(f"\n=== MERCHANT ({merchant.get('merchant_id','?')}) ===")
|
| 188 |
+
ident = merchant.get("identity", {})
|
| 189 |
+
parts.append(f"Name: {ident.get('name','?')}, Owner: {ident.get('owner_first_name','?')}")
|
| 190 |
+
parts.append(f"City: {ident.get('city','?')}, Locality: {ident.get('locality','?')}")
|
| 191 |
+
parts.append(f"Languages: {ident.get('languages',[])}")
|
| 192 |
+
perf = merchant.get("performance", {})
|
| 193 |
+
if perf:
|
| 194 |
+
parts.append(f"Performance (30d): views={perf.get('views','?')}, calls={perf.get('calls','?')}, ctr={perf.get('ctr','?')}, directions={perf.get('directions','?')}")
|
| 195 |
+
delta = perf.get("delta_7d", {})
|
| 196 |
+
if delta:
|
| 197 |
+
parts.append(f"7d delta: {json.dumps(delta)}")
|
| 198 |
+
signals = merchant.get("signals", [])
|
| 199 |
+
if signals:
|
| 200 |
+
parts.append(f"Signals: {signals}")
|
| 201 |
+
m_offers = merchant.get("offers", [])
|
| 202 |
+
if m_offers:
|
| 203 |
+
active = [o for o in m_offers if o.get("status") == "active"]
|
| 204 |
+
parts.append(f"Active offers: {json.dumps(active)}")
|
| 205 |
+
conv_hist = merchant.get("conversation_history", [])
|
| 206 |
+
if conv_hist:
|
| 207 |
+
parts.append(f"Conversation history: {json.dumps(conv_hist[-3:])}")
|
| 208 |
+
cust_agg = merchant.get("customer_aggregate", {})
|
| 209 |
+
if cust_agg:
|
| 210 |
+
parts.append(f"Customer aggregate: {json.dumps(cust_agg)}")
|
| 211 |
+
reviews = merchant.get("review_themes", [])
|
| 212 |
+
if reviews:
|
| 213 |
+
parts.append(f"Review themes: {json.dumps(reviews)}")
|
| 214 |
+
sub = merchant.get("subscription", {})
|
| 215 |
+
if sub:
|
| 216 |
+
parts.append(f"Subscription: {json.dumps(sub)}")
|
| 217 |
+
|
| 218 |
+
parts.append(f"\n=== TRIGGER ({trigger.get('id','?')}) ===")
|
| 219 |
+
parts.append(f"Kind: {trigger.get('kind','?')}, Urgency: {trigger.get('urgency','?')}")
|
| 220 |
+
parts.append(f"Scope: {trigger.get('scope','?')}")
|
| 221 |
+
trig_payload = trigger.get("payload", {})
|
| 222 |
+
if trig_payload:
|
| 223 |
+
parts.append(f"Payload: {json.dumps(trig_payload)}")
|
| 224 |
+
parts.append(f"Suppression key: {trigger.get('suppression_key','')}")
|
| 225 |
+
|
| 226 |
+
if customer:
|
| 227 |
+
parts.append(f"\n=== CUSTOMER ({customer.get('customer_id','?')}) ===")
|
| 228 |
+
c_ident = customer.get("identity", {})
|
| 229 |
+
parts.append(f"Name: {c_ident.get('name','?')}, Lang: {c_ident.get('language_pref','?')}")
|
| 230 |
+
rel = customer.get("relationship", {})
|
| 231 |
+
if rel:
|
| 232 |
+
parts.append(f"Visits: {rel.get('visits_total','?')}, Last: {rel.get('last_visit','?')}, Services: {rel.get('services_received',[])[: 5]}")
|
| 233 |
+
parts.append(f"State: {customer.get('state','?')}")
|
| 234 |
+
prefs = customer.get("preferences", {})
|
| 235 |
+
if prefs:
|
| 236 |
+
parts.append(f"Preferences: {json.dumps(prefs)}")
|
| 237 |
+
consent = customer.get("consent", {})
|
| 238 |
+
if consent:
|
| 239 |
+
parts.append(f"Consent scope: {consent.get('scope',[])}")
|
| 240 |
+
|
| 241 |
+
return "\n".join(parts)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# βββ Compose for /v1/tick βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 245 |
+
|
| 246 |
+
def compose_tick_action(
|
| 247 |
+
category: Dict, merchant: Dict, trigger: Dict, customer: Optional[Dict] = None
|
| 248 |
+
) -> Dict[str, Any]:
|
| 249 |
+
"""Full compose pipeline for a tick action. Returns the action dict."""
|
| 250 |
+
cat_slug = merchant.get("category_slug", category.get("slug", "general"))
|
| 251 |
+
bundle = build_context_bundle(category, merchant, trigger, customer)
|
| 252 |
+
|
| 253 |
+
# Step 1: Diagnostician
|
| 254 |
+
signal = _run_diagnostician(bundle)
|
| 255 |
+
if not signal:
|
| 256 |
+
signal = _heuristic_signal(merchant, trigger, customer)
|
| 257 |
+
|
| 258 |
+
# Step 2: Copywriter
|
| 259 |
+
voice = _get_voice(cat_slug, category)
|
| 260 |
+
user_prompt = _build_copy_prompt(signal, bundle, is_reply=False)
|
| 261 |
+
result = _run_copywriter(voice, user_prompt)
|
| 262 |
+
|
| 263 |
+
if not result or not result.get("body"):
|
| 264 |
+
result = _fallback_tick_message(signal, merchant, trigger, customer, category)
|
| 265 |
+
|
| 266 |
+
# Build the full action envelope
|
| 267 |
+
mid = merchant.get("merchant_id", "")
|
| 268 |
+
cid = customer.get("customer_id") if customer else trigger.get("customer_id")
|
| 269 |
+
tid = trigger.get("id", "")
|
| 270 |
+
conv_id = f"conv_{mid}_{tid}" if tid else f"conv_{mid}_{uuid.uuid4().hex[:8]}"
|
| 271 |
+
|
| 272 |
+
return {
|
| 273 |
+
"conversation_id": conv_id,
|
| 274 |
+
"merchant_id": mid,
|
| 275 |
+
"customer_id": cid,
|
| 276 |
+
"send_as": result.get("send_as", "merchant_on_behalf" if cid else "vera"),
|
| 277 |
+
"trigger_id": tid,
|
| 278 |
+
"template_name": result.get("template_name", f"vera_{trigger.get('kind','generic')}_v1"),
|
| 279 |
+
"template_params": result.get("template_params", []),
|
| 280 |
+
"body": result.get("body", ""),
|
| 281 |
+
"cta": result.get("cta", "open_ended"),
|
| 282 |
+
"suppression_key": trigger.get("suppression_key", f"auto:{mid}:{tid}"),
|
| 283 |
+
"rationale": result.get("rationale", "Composed from category+merchant+trigger context"),
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
# βββ Compose for /v1/reply βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 288 |
+
|
| 289 |
+
def compose_reply(
|
| 290 |
+
merchant: Dict, category: Dict, message: str,
|
| 291 |
+
conversation_history: List[Dict] = None,
|
| 292 |
+
trigger: Optional[Dict] = None, customer: Optional[Dict] = None,
|
| 293 |
+
) -> Dict[str, Any]:
|
| 294 |
+
"""Full compose pipeline for a reply. Returns {action, body, cta, rationale}."""
|
| 295 |
+
cat_slug = merchant.get("category_slug", category.get("slug", "general"))
|
| 296 |
+
|
| 297 |
+
# Build conversation history string
|
| 298 |
+
conv_str = ""
|
| 299 |
+
if conversation_history:
|
| 300 |
+
conv_parts = []
|
| 301 |
+
for t in conversation_history[-6:]:
|
| 302 |
+
conv_parts.append(f"[{t['role']}]: {t['message']}")
|
| 303 |
+
conv_str = "\n".join(conv_parts)
|
| 304 |
+
|
| 305 |
+
# Use a minimal trigger if none provided
|
| 306 |
+
if not trigger:
|
| 307 |
+
trigger = {"id": "reply_context", "kind": "reply", "scope": "merchant",
|
| 308 |
+
"payload": {}, "urgency": 2, "suppression_key": ""}
|
| 309 |
+
|
| 310 |
+
bundle = build_context_bundle(category, merchant, trigger, customer)
|
| 311 |
+
|
| 312 |
+
# Step 1: Diagnostician
|
| 313 |
+
signal = _run_diagnostician(bundle)
|
| 314 |
+
if not signal:
|
| 315 |
+
signal = _heuristic_signal(merchant, trigger, customer)
|
| 316 |
+
|
| 317 |
+
# Step 2: Copywriter (reply mode)
|
| 318 |
+
voice = _get_voice(cat_slug, category)
|
| 319 |
+
user_prompt = _build_copy_prompt(signal, bundle, is_reply=True, reply_msg=message, conv_history=conv_str)
|
| 320 |
+
result = _run_copywriter(voice, user_prompt)
|
| 321 |
+
|
| 322 |
+
if not result:
|
| 323 |
+
result = _fallback_reply(message, merchant, signal)
|
| 324 |
+
|
| 325 |
+
# Ensure required fields
|
| 326 |
+
result.setdefault("action", "send")
|
| 327 |
+
result.setdefault("body", "")
|
| 328 |
+
result.setdefault("cta", "open_ended")
|
| 329 |
+
result.setdefault("rationale", "Reply composed from context")
|
| 330 |
+
|
| 331 |
+
# Clean up wait_seconds
|
| 332 |
+
if result["action"] == "wait" and not result.get("wait_seconds"):
|
| 333 |
+
result["wait_seconds"] = 3600
|
| 334 |
+
|
| 335 |
+
return result
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
# βββ Heuristic Fallbacks βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 339 |
+
|
| 340 |
+
def _heuristic_signal(merchant: Dict, trigger: Dict, customer: Optional[Dict] = None) -> Dict:
|
| 341 |
+
"""Deterministic signal extraction when Cerebras is unavailable."""
|
| 342 |
+
ident = merchant.get("identity", {})
|
| 343 |
+
perf = merchant.get("performance", {})
|
| 344 |
+
offers = merchant.get("offers", [])
|
| 345 |
+
active_offers = [o for o in offers if o.get("status") == "active"]
|
| 346 |
+
|
| 347 |
+
return {
|
| 348 |
+
"signal": trigger.get("kind", "general_engagement"),
|
| 349 |
+
"signal_detail": json.dumps(trigger.get("payload", {}))[:200],
|
| 350 |
+
"best_offer": active_offers[0].get("title") if active_offers else None,
|
| 351 |
+
"key_fact": f"views={perf.get('views','?')}, calls={perf.get('calls','?')}" if perf else "",
|
| 352 |
+
"merchant_name": ident.get("name", ""),
|
| 353 |
+
"owner_name": ident.get("owner_first_name", ""),
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def _fallback_tick_message(signal: Dict, merchant: Dict, trigger: Dict, customer: Optional[Dict], category: Dict) -> Dict:
|
| 358 |
+
"""Grounded fallback when Groq is unavailable."""
|
| 359 |
+
ident = merchant.get("identity", {})
|
| 360 |
+
name = ident.get("owner_first_name", ident.get("name", ""))
|
| 361 |
+
perf = merchant.get("performance", {})
|
| 362 |
+
offers = merchant.get("offers", [])
|
| 363 |
+
active = [o for o in offers if o.get("status") == "active"]
|
| 364 |
+
kind = trigger.get("kind", "update")
|
| 365 |
+
trig_payload = trigger.get("payload", {})
|
| 366 |
+
cat_slug = category.get("slug", "")
|
| 367 |
+
|
| 368 |
+
# Build a grounded message based on trigger kind
|
| 369 |
+
if customer:
|
| 370 |
+
c_name = customer.get("identity", {}).get("name", "Customer")
|
| 371 |
+
body = f"Hi {c_name}, {ident.get('name','')} here. "
|
| 372 |
+
if kind == "recall_due":
|
| 373 |
+
slots = trig_payload.get("available_slots", [])
|
| 374 |
+
slot_str = " or ".join([s.get("label","") for s in slots[:2]]) if slots else "this week"
|
| 375 |
+
body += f"Your {trig_payload.get('service_due','check-up')} is due. Available: {slot_str}."
|
| 376 |
+
if active:
|
| 377 |
+
body += f" {active[0]['title']} included."
|
| 378 |
+
elif kind == "chronic_refill_due":
|
| 379 |
+
mols = trig_payload.get("molecule_list", [])
|
| 380 |
+
body += f"Your refill for {', '.join(mols[:3])} is coming up. "
|
| 381 |
+
body += "Shall I arrange home delivery?"
|
| 382 |
+
else:
|
| 383 |
+
body += f"Following up on your recent visit. We have an update for you."
|
| 384 |
+
return {"body": body, "cta": "binary_yes_no", "send_as": "merchant_on_behalf",
|
| 385 |
+
"rationale": f"Customer-scoped {kind} trigger", "template_name": f"vera_{kind}_v1", "template_params": [c_name, name]}
|
| 386 |
+
|
| 387 |
+
# Merchant-scoped
|
| 388 |
+
greeting = f"Dr. {name}" if cat_slug == "dentists" else name
|
| 389 |
+
body = f"{greeting}, "
|
| 390 |
+
|
| 391 |
+
if kind == "research_digest":
|
| 392 |
+
digest = category.get("digest", [])
|
| 393 |
+
top_id = trig_payload.get("top_item_id", "")
|
| 394 |
+
item = next((d for d in digest if d.get("id") == top_id), None)
|
| 395 |
+
if item:
|
| 396 |
+
body += f"{item.get('source','New research')} β {item['title']}. {item.get('summary','')[:100]}"
|
| 397 |
+
body += f" Worth a look?"
|
| 398 |
+
else:
|
| 399 |
+
body += "New research relevant to your practice just landed. Want me to pull the details?"
|
| 400 |
+
elif kind == "perf_dip" or kind == "seasonal_perf_dip":
|
| 401 |
+
metric = trig_payload.get("metric", "views")
|
| 402 |
+
delta = trig_payload.get("delta_pct", 0)
|
| 403 |
+
body += f"Your {metric} dipped {abs(int(delta*100))}% this week. "
|
| 404 |
+
if active:
|
| 405 |
+
body += f"Your '{active[0]['title']}' offer could help recover. Want me to push it?"
|
| 406 |
+
else:
|
| 407 |
+
body += "Want me to draft an offer to bring traffic back?"
|
| 408 |
+
elif kind == "renewal_due":
|
| 409 |
+
days = trig_payload.get("days_remaining", "?")
|
| 410 |
+
body += f"Your Pro subscription expires in {days} days. "
|
| 411 |
+
body += f"Current performance: {perf.get('views','?')} views, {perf.get('calls','?')} calls this month. Renew to keep the momentum?"
|
| 412 |
+
elif kind == "ipl_match_today":
|
| 413 |
+
match = trig_payload.get("match", "tonight's match")
|
| 414 |
+
body += f"{match} is tonight! Your locality gets high footfall on match nights. "
|
| 415 |
+
if active:
|
| 416 |
+
body += f"Want me to push your '{active[0]['title']}' as a match-night special?"
|
| 417 |
+
else:
|
| 418 |
+
body += "Want to run a match-night special?"
|
| 419 |
+
else:
|
| 420 |
+
body += f"Quick update based on your latest data β {perf.get('views','?')} views this month"
|
| 421 |
+
if active:
|
| 422 |
+
body += f", and your '{active[0]['title']}' is live"
|
| 423 |
+
body += ". Want me to help optimize?"
|
| 424 |
+
|
| 425 |
+
return {"body": body, "cta": "open_ended", "send_as": "vera",
|
| 426 |
+
"rationale": f"Signal: {kind}. Grounded in merchant perf + trigger payload.",
|
| 427 |
+
"template_name": f"vera_{kind}_v1", "template_params": [greeting]}
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
def _fallback_reply(message: str, merchant: Dict, signal: Dict) -> Dict:
|
| 431 |
+
"""Deterministic fallback for reply composition."""
|
| 432 |
+
msg_lower = message.lower().strip()
|
| 433 |
+
|
| 434 |
+
# Auto-reply detection
|
| 435 |
+
auto_phrases = ["thank you for contacting", "our team will respond", "we will get back",
|
| 436 |
+
"auto-reply", "out of office", "away message"]
|
| 437 |
+
if any(p in msg_lower for p in auto_phrases):
|
| 438 |
+
return {"action": "wait", "wait_seconds": 14400,
|
| 439 |
+
"body": "", "cta": "none",
|
| 440 |
+
"rationale": "Detected auto-reply. Backing off 4 hours."}
|
| 441 |
+
|
| 442 |
+
# Hostile detection
|
| 443 |
+
hostile_phrases = ["stop messaging", "not interested", "useless", "spam", "stop sending",
|
| 444 |
+
"don't message", "unsubscribe", "leave me alone"]
|
| 445 |
+
if any(p in msg_lower for p in hostile_phrases):
|
| 446 |
+
return {"action": "end", "body": "",
|
| 447 |
+
"rationale": "Merchant explicitly opted out. Closing conversation gracefully."}
|
| 448 |
+
|
| 449 |
+
# Commitment detection β switch to action
|
| 450 |
+
commit_phrases = ["ok let's do it", "let's do it", "yes do it", "go ahead", "proceed",
|
| 451 |
+
"sounds good let's", "ok go ahead", "yes please", "confirm", "let's go"]
|
| 452 |
+
if any(p in msg_lower for p in commit_phrases):
|
| 453 |
+
ident = merchant.get("identity", {})
|
| 454 |
+
name = ident.get("owner_first_name", ident.get("name", ""))
|
| 455 |
+
return {"action": "send",
|
| 456 |
+
"body": f"Great, {name}! Setting this up now. I'll have the draft ready in a moment. You'll be able to review before anything goes live.",
|
| 457 |
+
"cta": "binary_confirm_cancel",
|
| 458 |
+
"rationale": "Merchant committed β switching from qualifying to action mode."}
|
| 459 |
+
|
| 460 |
+
# Off-topic detection
|
| 461 |
+
offtopic = ["gst", "tax", "invoice", "salary", "loan", "insurance"]
|
| 462 |
+
if any(w in msg_lower for w in offtopic):
|
| 463 |
+
return {"action": "send",
|
| 464 |
+
"body": "That's outside what I can help with directly β best to check with your CA on that. Coming back to your business growth β want me to continue with what we were working on?",
|
| 465 |
+
"cta": "open_ended",
|
| 466 |
+
"rationale": "Off-topic ask declined politely; redirecting to growth signal."}
|
| 467 |
+
|
| 468 |
+
# Default engaged reply
|
| 469 |
+
return {"action": "send",
|
| 470 |
+
"body": "Got it β let me work on that. I'll have something ready for you shortly.",
|
| 471 |
+
"cta": "open_ended",
|
| 472 |
+
"rationale": "Acknowledged merchant reply; advancing conversation."}
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def _parse_json(text: str) -> Optional[Dict]:
|
| 476 |
+
"""Robustly parse JSON from LLM output."""
|
| 477 |
+
try:
|
| 478 |
+
return json.loads(text)
|
| 479 |
+
except json.JSONDecodeError:
|
| 480 |
+
pass
|
| 481 |
+
match = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', text, re.DOTALL)
|
| 482 |
+
if match:
|
| 483 |
+
try:
|
| 484 |
+
return json.loads(match.group(1).strip())
|
| 485 |
+
except json.JSONDecodeError:
|
| 486 |
+
pass
|
| 487 |
+
s, e = text.find("{"), text.rfind("}")
|
| 488 |
+
if s != -1 and e > s:
|
| 489 |
+
try:
|
| 490 |
+
return json.loads(text[s:e+1])
|
| 491 |
+
except json.JSONDecodeError:
|
| 492 |
+
pass
|
| 493 |
+
return None
|
magicpin-ai-challenge/challenge-brief.md
ADDED
|
@@ -0,0 +1,544 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# magicpin AI Challenge β Build a Merchant AI Assistant ("Vera")
|
| 2 |
+
|
| 3 |
+
**Status**: Brief β designed to be loaded as standalone context in a fresh AI session.
|
| 4 |
+
**Last updated**: 2026-04-26
|
| 5 |
+
**Audience**: Challenge participants + the AI judge that will evaluate submissions.
|
| 6 |
+
|
| 7 |
+
> **Read me first** β this document is fully self-contained. A reader (human or LLM) starting cold should be able to understand the company, the product, the challenge, the framework, the dataset, and the evaluation rubric without needing any other reference.
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## 1. The challenge in one sentence
|
| 12 |
+
|
| 13 |
+
> Build an AI chatbot that engages and assists merchants on WhatsApp the way **Vera** (magicpin's merchant-AI assistant) does β but better. Same base dataset for every participant. AI judges the outcome.
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## 2. About magicpin (background, ~30 sec read)
|
| 18 |
+
|
| 19 |
+
magicpin is one of India's largest local-commerce platforms β a network of ~100,000 merchant partners across 50+ Indian cities (restaurants, salons, gyms, dentists, retailers, car dealers, etc.). Customers discover merchants on the magicpin app/web, transact, and earn cashback. Merchants benefit from visibility, walk-ins, and online orders.
|
| 20 |
+
|
| 21 |
+
magicpin runs a marketing-assistant product called **Vera** that talks to merchants over WhatsApp, helps them grow their Google Business Profile (GBP), runs campaigns for them, and answers customer questions on their behalf. Vera is the product you're rebuilding.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
## 3. About Vera (the product you're rebuilding)
|
| 26 |
+
|
| 27 |
+
### What Vera does today
|
| 28 |
+
- Talks to ~6,000-10,000 merchants per day over WhatsApp
|
| 29 |
+
- Helps them improve their Google Business Profile (photos, hours, posts, reviews)
|
| 30 |
+
- Runs marketing campaigns for them (festival, news, local events)
|
| 31 |
+
- Recommends pricing, offers, and content
|
| 32 |
+
- Answers customers' questions about merchants on their behalf
|
| 33 |
+
|
| 34 |
+
### Live engagement shape (real numbers β Apr 23-25, 2026)
|
| 35 |
+
|
| 36 |
+
| Metric | Apr 23 | Apr 24 | Apr 25 |
|
| 37 |
+
|---|---:|---:|---:|
|
| 38 |
+
| Merchants who engaged with Vera | 6,917 | 10,158 | 5,181 |
|
| 39 |
+
| Total messages mx β Vera | 34,022 | 47,651 | 23,712 |
|
| 40 |
+
| Avg messages per engaged merchant | 4.9 | 4.7 | 4.6 |
|
| 41 |
+
| Customers who engaged with Vera | 720 | 707 | 681 |
|
| 42 |
+
| Unique merchants customers asked about | 580 | 514 | 466 |
|
| 43 |
+
| Total messages cx β Vera | 3,713 | 4,839 | 4,695 |
|
| 44 |
+
| Avg messages per customer | 5.2 | 6.8 | 6.9 |
|
| 45 |
+
|
| 46 |
+
### Today's biggest pain points (your opportunities to outperform Vera)
|
| 47 |
+
|
| 48 |
+
1. **Auto-reply pollution**: 40-70% of "merchant replies" are the merchant's own WhatsApp Business canned auto-reply ("Thank you for contactingβ¦"). Production Vera detects this but burns 2-3 turns each time. **Better detection + faster routing wins**.
|
| 49 |
+
2. **Intent-handoff failures**: When a merchant says "I want to join," production Vera often goes back to qualifying questions instead of starting the action. **Better intent detection wins**.
|
| 50 |
+
3. **Generic copy**: discount-style offers ("10% off") rarely engage Indian merchants. Service+price ("Haircut @ βΉ99", "Dental Cleaning @ βΉ299") works better. **Compelling, category-correct copy wins**.
|
| 51 |
+
4. **Low engagement frequency**: Functional nudges (renewal due, profile incomplete) are inherently rare. To engage merchants 3-5Γ/week needs *curiosity-driven* and *knowledge-driven* conversations, not just reminder-style. **Diversified conversation portfolio wins**.
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
## 4. The 4-context framework (what your bot must work with)
|
| 56 |
+
|
| 57 |
+
Every message Vera sends β whether to the merchant or the merchant's customer β is composed from **four context layers**. Your bot will receive these as structured input and must produce the next message.
|
| 58 |
+
|
| 59 |
+
```
|
| 60 |
+
your_bot.compose(category, merchant, trigger, customer?) β message
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### 4.1 CategoryContext
|
| 64 |
+
|
| 65 |
+
Slow-changing knowledge pack about *the kind of business this is*. Shared across all merchants in the vertical.
|
| 66 |
+
|
| 67 |
+
```python
|
| 68 |
+
@dataclass
|
| 69 |
+
class CategoryContext:
|
| 70 |
+
slug: str # "dentists", "salons", "restaurants", ...
|
| 71 |
+
offer_catalog: list[OfferTemplate] # canonical service+price patterns for this vertical
|
| 72 |
+
voice: VoiceProfile # tone, allowed vocabulary, taboos
|
| 73 |
+
peer_stats: PeerStats # benchmarks: avg rating, avg reviews, typical CTR
|
| 74 |
+
digest: list[DigestItem] # weekly research / compliance / trend items, source-cited
|
| 75 |
+
patient_content_library: list[ContentItem] # content the merchant can reshare with customers
|
| 76 |
+
seasonal_beats: list[SeasonalBeat] # e.g., "exam-stress bruxism spikes Nov-Feb"
|
| 77 |
+
trend_signals: list[TrendSignal] # e.g., "clear aligners Delhi searches +62% YoY"
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
**Example for `dentists`**:
|
| 81 |
+
- voice: technical terms welcome ("fluoride varnish at 3-month recall"), legal taboos ("cure", "guaranteed"), peer tone not hype
|
| 82 |
+
- offer_catalog: `["Dental Cleaning @ βΉ299", "Free Consultation", "Teeth Whitening @ βΉ1,499"]`
|
| 83 |
+
- peer_stats: `{avg_rating: 4.4, avg_reviews: 62, avg_ctr: 0.030}` (South Delhi solo practices)
|
| 84 |
+
- digest: this week's items from JIDA / DCI / Dental Tribune India / IDA Delhi calendar
|
| 85 |
+
- seasonal_beats: `[{month: "Nov-Feb", note: "exam-stress bruxism spike"}, {month: "Oct-Dec", note: "wedding whitening peak"}]`
|
| 86 |
+
|
| 87 |
+
### 4.2 MerchantContext
|
| 88 |
+
|
| 89 |
+
The specific business's current state. Refreshed daily for performance, real-time for conversation history.
|
| 90 |
+
|
| 91 |
+
```python
|
| 92 |
+
@dataclass
|
| 93 |
+
class MerchantContext:
|
| 94 |
+
merchant_id: str
|
| 95 |
+
identity: Identity # name, place_id, locality, city, verified, languages
|
| 96 |
+
subscription: Subscription # status, days_remaining, plan
|
| 97 |
+
performance: PerformanceSnapshot # views/calls/CTR/leads/directions, 30d + 7d deltas
|
| 98 |
+
offers: list[MerchantOffer] # active + paused offers from their catalog
|
| 99 |
+
conversation_history: ConversationHistory # last N turns w/ Vera, w/ engagement tags
|
| 100 |
+
customer_aggregate: CustomerAggregate # active count, lapsed count, retention %
|
| 101 |
+
signals: list[DerivedSignal] # "stale_posts", "ctr_below_peer", "dormant", ...
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
**Running example β Dr. Meera's Dental Clinic**:
|
| 105 |
+
- identity: name="Dr. Meera's Dental Clinic", city="Delhi", locality="Lajpat Nagar", verified=true
|
| 106 |
+
- subscription: status="active", days_remaining=82, plan="Pro"
|
| 107 |
+
- performance (30d): views=2410, calls=18, directions=45, ctr=0.021
|
| 108 |
+
- offers: `["Dental Cleaning @ βΉ299"]` active; `["Deep Cleaning @ βΉ499"]` expired 2mo ago
|
| 109 |
+
- customer_aggregate: 540 unique patients YTD, 78 lapsed >180 days, 38% 6mo retention
|
| 110 |
+
- signals: `["stale_posts" (last post 22d ago), "ctr_below_peer_median" (peer median 0.030)]`
|
| 111 |
+
|
| 112 |
+
### 4.3 TriggerContext
|
| 113 |
+
|
| 114 |
+
The *event* that prompts this message right now. Every message must have one.
|
| 115 |
+
|
| 116 |
+
```python
|
| 117 |
+
@dataclass
|
| 118 |
+
class TriggerContext:
|
| 119 |
+
id: str
|
| 120 |
+
scope: Literal["merchant", "customer"]
|
| 121 |
+
kind: str # "research_digest", "recall_due", "perf_spike", ...
|
| 122 |
+
source: Literal["external", "internal"]
|
| 123 |
+
payload: dict
|
| 124 |
+
urgency: int # 1-5
|
| 125 |
+
suppression_key: str # for dedup
|
| 126 |
+
expires_at: datetime
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
**External triggers** (happen outside the merchant's account):
|
| 130 |
+
- `festival_upcoming` (Diwali in 4 days)
|
| 131 |
+
- `weather_heatwave` (42Β°C today in Delhi)
|
| 132 |
+
- `local_news_event` (Mumbai-Pune expressway closed 3h)
|
| 133 |
+
- `category_research_digest_release` (JIDA Oct issue dropped)
|
| 134 |
+
- `regulation_change` (DCI radiograph dose limit revised)
|
| 135 |
+
- `competitor_opened` (new dentist 1.3km away on GBP)
|
| 136 |
+
- `category_trend_movement` ("clear aligners Delhi" searches +62%)
|
| 137 |
+
|
| 138 |
+
**Internal triggers** (happen within the merchant's data):
|
| 139 |
+
- `perf_spike` (yesterday's views +28% vs avg)
|
| 140 |
+
- `perf_dip` (calls dropped 40% week-over-week)
|
| 141 |
+
- `milestone_reached` (crossed 100 reviews)
|
| 142 |
+
- `dormant_with_vera` (no merchant message in 14 days)
|
| 143 |
+
- `customer_lapsed_soft` (a customer's 6mo recall window opens)
|
| 144 |
+
- `appointment_tomorrow` (booking exists for next day)
|
| 145 |
+
- `review_theme_emerged` (3 reviews this week mention "wait time")
|
| 146 |
+
- `scheduled_recurring` (weekly Friday curious-ask cadence)
|
| 147 |
+
|
| 148 |
+
**Example trigger for Dr. Meera**:
|
| 149 |
+
```json
|
| 150 |
+
{
|
| 151 |
+
"id": "trg_2026_04_26_research_digest_dentists",
|
| 152 |
+
"scope": "merchant",
|
| 153 |
+
"kind": "research_digest",
|
| 154 |
+
"source": "external",
|
| 155 |
+
"payload": {
|
| 156 |
+
"category": "dentists",
|
| 157 |
+
"top_item": {
|
| 158 |
+
"title": "3-mo fluoride recall cuts caries recurrence 38% better than 6-mo",
|
| 159 |
+
"source": "JIDA Oct 2026, p.14",
|
| 160 |
+
"trial_n": 2100,
|
| 161 |
+
"patient_segment": "high-risk adults"
|
| 162 |
+
}
|
| 163 |
+
},
|
| 164 |
+
"urgency": 2,
|
| 165 |
+
"suppression_key": "research:dentists:2026-W17",
|
| 166 |
+
"expires_at": "2026-05-03T00:00:00Z"
|
| 167 |
+
}
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
### 4.4 CustomerContext (optional β only for customer-facing messages)
|
| 171 |
+
|
| 172 |
+
When the bot sends a message *on behalf of the merchant* to one of the merchant's own customers, this is populated.
|
| 173 |
+
|
| 174 |
+
```python
|
| 175 |
+
@dataclass
|
| 176 |
+
class CustomerContext:
|
| 177 |
+
customer_id: str
|
| 178 |
+
merchant_id: str # which merchant this customer belongs to
|
| 179 |
+
identity: CustomerIdentity # name, phone, language preference
|
| 180 |
+
relationship: Relationship # first_visit, last_visit, visits_total, services_received
|
| 181 |
+
state: Literal["new", "active", "lapsed_soft", "lapsed_hard", "churned"]
|
| 182 |
+
preferences: Preferences # preferred_time, channel, opt-in scope
|
| 183 |
+
consent: Consent # when + how they opted in to merchant outreach
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
**Example β Priya, a patient of Dr. Meera**:
|
| 187 |
+
- identity: name="Priya", phone="<redacted>", language_pref="hi-en mix"
|
| 188 |
+
- relationship: first_visit="2025-11-04", last_visit="2026-05-12", visits_total=4, services=["cleaning", "cleaning", "whitening", "cleaning"]
|
| 189 |
+
- state: "lapsed_soft" (5mo since last visit)
|
| 190 |
+
- preferences: weekday evening slots (derived from booking history)
|
| 191 |
+
- consent: opted in via merchant on 2025-11-04, scope=["recall_reminders", "appointment_reminders"]
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
## 5. The composition you must implement
|
| 196 |
+
|
| 197 |
+
```
|
| 198 |
+
def compose(
|
| 199 |
+
category: CategoryContext,
|
| 200 |
+
merchant: MerchantContext,
|
| 201 |
+
trigger: TriggerContext,
|
| 202 |
+
customer: Optional[CustomerContext] = None
|
| 203 |
+
) -> ComposedMessage:
|
| 204 |
+
"""
|
| 205 |
+
Returns:
|
| 206 |
+
body β the WhatsApp message body
|
| 207 |
+
cta β the call-to-action (binary YES/STOP, open-ended, none)
|
| 208 |
+
send_as β "vera" (merchant-facing) or "merchant_on_behalf" (customer-facing)
|
| 209 |
+
suppression_key β for dedup
|
| 210 |
+
rationale β short explanation of why this message, what it should achieve
|
| 211 |
+
"""
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
### Constraints your bot must respect
|
| 215 |
+
|
| 216 |
+
1. **WhatsApp 24h session window** β the *first* outbound to a merchant or customer must use a pre-approved template (use any sensible template structure with `{{1}}/{{2}}/β¦` parameters; we won't actually call Meta). Subsequent messages within 24h of a merchant reply can be free-form.
|
| 217 |
+
2. **Body length** β no hard cap; keep it concise and context-appropriate for readability.
|
| 218 |
+
3. **Single primary CTA** β binary choice (YES/STOP) for action triggers; no CTA acceptable for pure-information triggers.
|
| 219 |
+
4. **URLs** β allowed when they add clear value to the merchant.
|
| 220 |
+
5. **Specificity wins** β anchor on a verifiable fact the merchant can check (number, date, headline). "X% off" is generic; "Haircut @ βΉ99" is specific.
|
| 221 |
+
6. **Voice match** β peer/colleague tone, not promotional; technical vocabulary OK if category allows.
|
| 222 |
+
7. **Hindi-English code-mix is fine** β and often preferred for Indian merchant audiences. Match the merchant's language.
|
| 223 |
+
8. **Don't fabricate** β if data isn't in the contexts, don't invent it. No fake offers, no fake research citations, no fake competitor names.
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## 6. The base dataset (provided to every participant)
|
| 228 |
+
|
| 229 |
+
Every participant gets the same JSON dataset:
|
| 230 |
+
|
| 231 |
+
```
|
| 232 |
+
dataset/
|
| 233 |
+
βββ categories/
|
| 234 |
+
β βββ dentists.json
|
| 235 |
+
β βββ salons.json
|
| 236 |
+
β βββ restaurants.json
|
| 237 |
+
β βββ gyms.json
|
| 238 |
+
β βββ pharmacies.json # 5 fully-populated CategoryContexts
|
| 239 |
+
βββ merchants/
|
| 240 |
+
β βββ m_001_drmeera_dentist_delhi.json
|
| 241 |
+
β βββ m_002_studio11_salon_hyderabad.json
|
| 242 |
+
β βββ m_003_pizzajunction_restaurant_delhi.json
|
| 243 |
+
β βββ ... # 50 fully-populated MerchantContexts (10 per category)
|
| 244 |
+
β βββ m_050_*.json
|
| 245 |
+
βββ customers/
|
| 246 |
+
β βββ c_001_priya_for_m001.json
|
| 247 |
+
β βββ c_002_rohit_for_m001.json
|
| 248 |
+
β βββ ... # 200 customer profiles, distributed across merchants
|
| 249 |
+
β βββ c_200_*.json
|
| 250 |
+
βββ triggers/
|
| 251 |
+
βββ trg_external_001_diwali.json
|
| 252 |
+
βββ trg_external_002_heatwave_delhi.json
|
| 253 |
+
βββ trg_internal_001_recall_due_priya.json
|
| 254 |
+
βββ trg_internal_002_perf_spike_m003.json
|
| 255 |
+
βββ ... # 100 sample triggers covering all kinds
|
| 256 |
+
βββ trg_100_*.json
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
Each trigger references one merchant (via `payload.merchant_id`) and optionally one customer. Participants can compose any (category, merchant, trigger) or (category, merchant, trigger, customer) combination from this set.
|
| 260 |
+
|
| 261 |
+
A canonical "submission test set" is **30 specific (merchant, trigger) pairs** that all participants must produce a message for. This gives the AI judge a like-for-like comparison.
|
| 262 |
+
|
| 263 |
+
---
|
| 264 |
+
|
| 265 |
+
## 7. What participants submit
|
| 266 |
+
|
| 267 |
+
A single Python module + a JSONL file:
|
| 268 |
+
|
| 269 |
+
### 7.1 `bot.py` (your code)
|
| 270 |
+
|
| 271 |
+
```python
|
| 272 |
+
def compose(category: dict, merchant: dict, trigger: dict, customer: dict | None) -> dict:
|
| 273 |
+
"""
|
| 274 |
+
Inputs are the dicts loaded from the dataset JSON.
|
| 275 |
+
Return a dict with keys: body, cta, send_as, suppression_key, rationale.
|
| 276 |
+
Free to use any LLM, any prompting strategy, any retrieval.
|
| 277 |
+
Must be deterministic given the same inputs (set temperature=0 if using LLMs).
|
| 278 |
+
Must complete in < 30s per call.
|
| 279 |
+
"""
|
| 280 |
+
...
|
| 281 |
+
```
|
| 282 |
+
|
| 283 |
+
### 7.2 `submission.jsonl` (30 lines, one per test pair)
|
| 284 |
+
|
| 285 |
+
```json
|
| 286 |
+
{"test_id": "T01", "body": "Hi Dr. Meera, JIDA's Oct issue ...", "cta": "open_ended", "send_as": "vera", "suppression_key": "...", "rationale": "..."}
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
### 7.3 `README.md` (1 page max)
|
| 290 |
+
|
| 291 |
+
Briefly: your approach, what tradeoffs you made, what additional context would have helped you most.
|
| 292 |
+
|
| 293 |
+
### 7.4 Optional: `conversation_handlers.py`
|
| 294 |
+
|
| 295 |
+
If you want to demonstrate multi-turn handling (replying to merchant responses), implement:
|
| 296 |
+
|
| 297 |
+
```python
|
| 298 |
+
def respond(state: ConversationState, merchant_message: str) -> dict:
|
| 299 |
+
"""Given the conversation so far + the merchant's latest message, produce the reply."""
|
| 300 |
+
...
|
| 301 |
+
```
|
| 302 |
+
|
| 303 |
+
Multi-turn capability is a tiebreaker, not a requirement.
|
| 304 |
+
|
| 305 |
+
---
|
| 306 |
+
|
| 307 |
+
## 8. Evaluation methodology (how AI judges)
|
| 308 |
+
|
| 309 |
+
Submissions are scored by an LLM judge across **5 dimensions**, each 0-10. Total = 50.
|
| 310 |
+
|
| 311 |
+
| Dimension | What the judge looks for |
|
| 312 |
+
|---|---|
|
| 313 |
+
| **Specificity** | Does the message anchor on a concrete, verifiable fact from the contexts (number, date, headline, peer stat)? Penalty for generic "10% off" / "increase your sales" framings. |
|
| 314 |
+
| **Category fit** | Does the voice, vocabulary, and offer format match the category? Dentist messages should sound clinical-peer, not retail-promo. |
|
| 315 |
+
| **Merchant fit** | Is the message personalized to *this specific* merchant's state (their numbers, their offers, their conversation history)? Is the language preference honored? |
|
| 316 |
+
| **Trigger relevance** | Does the message clearly communicate *why now* β the specific trigger that prompted it? Not "you should improve your profile" generically. |
|
| 317 |
+
| **Engagement compulsion** | Would a real merchant want to reply? Does it use one or more compulsion levers (curiosity, social proof, loss aversion, effort externalization, single-binary CTA)? |
|
| 318 |
+
|
| 319 |
+
### The twist β post-submission context injection
|
| 320 |
+
|
| 321 |
+
After submissions close, the judge injects **additional context that participants didn't see**:
|
| 322 |
+
- New `digest` items added to category contexts (recent research/news/regulations)
|
| 323 |
+
- Updated `performance` snapshots on merchant contexts (their numbers shifted)
|
| 324 |
+
- New `triggers` (events that happened "after" submission)
|
| 325 |
+
- For 5 of the 30 test pairs: a populated `CustomerContext` is added (testing customer-facing capability)
|
| 326 |
+
|
| 327 |
+
The judge then asks the bot to produce **follow-up messages** for these enriched scenarios. Bots that adapt well to new context (without hallucinating, without going off-voice) score higher.
|
| 328 |
+
|
| 329 |
+
### Replay test
|
| 330 |
+
|
| 331 |
+
For the 10 highest-scoring submissions, the judge runs a **simulated multi-turn replay**:
|
| 332 |
+
- Judge plays the merchant.
|
| 333 |
+
- Bot composes the first message.
|
| 334 |
+
- Judge generates a plausible merchant reply (sometimes engaged, sometimes auto-reply, sometimes a hard "not interested", sometimes a curveball question).
|
| 335 |
+
- Bot must respond.
|
| 336 |
+
- 3-5 turns deep.
|
| 337 |
+
- Judge scores how the conversation flows: did the bot detect auto-reply? Did it route action requests correctly? Did it know when to gracefully exit?
|
| 338 |
+
|
| 339 |
+
---
|
| 340 |
+
|
| 341 |
+
## 9. Reference: real Vera conversation patterns (anonymized excerpts)
|
| 342 |
+
|
| 343 |
+
These are real conversations from production Vera β provided as reference, not as the bar to clear (you should aim higher).
|
| 344 |
+
|
| 345 |
+
### Pattern A β Real intent, real value exchange (the gold standard)
|
| 346 |
+
|
| 347 |
+
```
|
| 348 |
+
[USER] Hi Vera. Please update my google profile.
|
| 349 |
+
[VERA] Hi! Priya, Vera se bol rahi hoon. Aapka Google profile abhi 62.5% complete hai
|
| 350 |
+
β description, website aur business hours missing hain. Kya aap yeh details
|
| 351 |
+
share kar sakte hain taaki main sab update kar sakun?
|
| 352 |
+
[USER] Ok please check & update the profile.
|
| 353 |
+
[VERA] Done! Maine aapka Google profile update kar diya hai:
|
| 354 |
+
- Business hours β daily 9 AM se 10 PM
|
| 355 |
+
- Description β restaurant description add kar diya
|
| 356 |
+
- Google Post β ek welcome post bhi publish kar diya
|
| 357 |
+
[USER] I can't see my profile is updated yet. It is showing temporarily closed.
|
| 358 |
+
[VERA] Google ke changes mein 24-48 ghante lagte hain. Kyunki aapka profile abhi
|
| 359 |
+
unverified hai, Google har update review karta hai. Tab tak main aur
|
| 360 |
+
cheezein add kar sakti hoon...
|
| 361 |
+
```
|
| 362 |
+
**What's right**: Hindi-English mix, specific numbers (62.5%, 24-48 ghante), Vera does work + reports back, handles uncertainty honestly.
|
| 363 |
+
|
| 364 |
+
### Pattern B β Auto-reply detection + graceful exit
|
| 365 |
+
|
| 366 |
+
```
|
| 367 |
+
[VERA] Studio11 Family Salon ka Google profile check kar rahi thi β aapki 4.9β
|
| 368 |
+
rating hai 298 reviews ke saath... business description missing hai.
|
| 369 |
+
[MERCHANT] Aapki jaankari ke liye bahut-bahut shukriya. Main aapki yeh sabhi baatein
|
| 370 |
+
aur sujhaav hamari team tak pahuncha deti hoon. β merchant's auto-reply
|
| 371 |
+
[VERA] Samajh gayi. Team tak pahunchane se pehle, kya aap khud dekhna chahingi
|
| 372 |
+
ki exact kya missing hai Google pe? 2 minute ka kaam hai. Chalega?
|
| 373 |
+
[MERCHANT] Aapki madad ke liye shukriya, lekin main ek automated assistant hoon...
|
| 374 |
+
[VERA] Koi baat nahi, samajh gayi. Main owner/manager se directly connect kar
|
| 375 |
+
lungi. Aapka salon accha chal raha hai β best wishes! π
|
| 376 |
+
```
|
| 377 |
+
**What's right**: Vera tried once after detecting auto-reply, then stopped wasting turns. Polite exit.
|
| 378 |
+
|
| 379 |
+
### Pattern C β Strong specificity hook
|
| 380 |
+
|
| 381 |
+
```
|
| 382 |
+
[VERA] Quick nudge: your dashboard shows 6,777 missed searches in Sector 14
|
| 383 |
+
for makeup services β people are looking but not finding you. Want me
|
| 384 |
+
to show how your listing would appear?
|
| 385 |
+
```
|
| 386 |
+
**What's right**: "6,777" is verifiable; "Sector 14" is local; "missed searches" frames as loss aversion.
|
| 387 |
+
|
| 388 |
+
### Pattern D β Intent-handoff failure (DON'T do this)
|
| 389 |
+
|
| 390 |
+
```
|
| 391 |
+
[MERCHANT] Mujhe magicpin judrna hai. β "I want to join magicpin" β clear intent
|
| 392 |
+
[VERA] Badhiya! Aapke liye simple rakhti hoon: agar aapko 10-15 naye customers
|
| 393 |
+
regular milne lagen to helpful hoga na? β went back to qualifying
|
| 394 |
+
```
|
| 395 |
+
**What's wrong**: Merchant said yes; Vera asked another qualification question. Lost momentum. Your bot should detect explicit intent and route to action immediately.
|
| 396 |
+
|
| 397 |
+
---
|
| 398 |
+
|
| 399 |
+
## 10. Compulsion levers β what makes merchants reply
|
| 400 |
+
|
| 401 |
+
These are the levers that drive engagement. Use one or more per message:
|
| 402 |
+
|
| 403 |
+
1. **Specificity / verifiability** β concrete number, date, headline, source citation
|
| 404 |
+
2. **Loss aversion** β "you're missing X" / "before this window closes"
|
| 405 |
+
3. **Social proof** β "3 dentists in your locality did Y this month"
|
| 406 |
+
4. **Effort externalization** β "I've drafted X β just say go" / "5-min setup"
|
| 407 |
+
5. **Curiosity** β "want to see who?" / "want the full list?"
|
| 408 |
+
6. **Reciprocity** β "I noticed Y about your account, thought you'd want to know"
|
| 409 |
+
7. **Asking the merchant** β "what's your most-asked treatment this week?"
|
| 410 |
+
8. **Single binary commitment** β Reply YES / STOP, not multi-choice
|
| 411 |
+
|
| 412 |
+
Production Vera's biggest miss is **#3 social proof** and **#7 asking the merchant** β these families barely fire today and would unlock a lot of engagement.
|
| 413 |
+
|
| 414 |
+
---
|
| 415 |
+
|
| 416 |
+
## 11. Anti-patterns the judge will penalize
|
| 417 |
+
|
| 418 |
+
- Generic offers ("Flat 30% off") when service+price is available ("Haircut @ βΉ99")
|
| 419 |
+
- Multiple CTAs in one message ("Reply YES for X, NO for Y, MAYBE for Z")
|
| 420 |
+
- Buried call-to-action (the "what to do" should land in the last sentence)
|
| 421 |
+
- Promotional tone ("AMAZING DEAL!") for categories that need clinical/peer voice (dentists, doctors, lawyers)
|
| 422 |
+
- Hallucinated data (citing a "JIDA paper" not in the context, naming a competitor not in the context)
|
| 423 |
+
- Long preambles ("I hope you're doing well. I'm reaching out today toβ¦")
|
| 424 |
+
- Re-introducing yourself after the first message
|
| 425 |
+
- Ignoring the language preference ("hi-en mix" merchant getting pure English)
|
| 426 |
+
- Sending the same message verbatim that was sent before (anti-repetition)
|
| 427 |
+
|
| 428 |
+
---
|
| 429 |
+
|
| 430 |
+
## 12. Open challenges (these are intentional β extra credit)
|
| 431 |
+
|
| 432 |
+
1. **Detect auto-replies** vs. real merchant replies and route differently. (Hint: same message verbatim 3+ times = auto-reply.)
|
| 433 |
+
2. **Handle intent transitions** β when the merchant says "yes I want to join" / "let's do it" / "go ahead", switch from pitch mode to action mode immediately.
|
| 434 |
+
3. **Multi-turn cadence planning** β within a 24h session window, what's the optimal sequence of nudges?
|
| 435 |
+
4. **Language detection per turn** β merchant might switch from English to Hindi mid-conversation.
|
| 436 |
+
5. **Knowing when to stop** β gracefully exit when the merchant signals not-interested or after 3 unanswered nudges.
|
| 437 |
+
|
| 438 |
+
---
|
| 439 |
+
|
| 440 |
+
## 13. Suggested approach (one of many β you decide)
|
| 441 |
+
|
| 442 |
+
The challenge is LLM-agnostic. Most participants will:
|
| 443 |
+
|
| 444 |
+
1. **Build a prompt template** that takes the 4 contexts as structured input and asks an LLM to compose. Likely best with a frontier model (Claude / GPT / Gemini / DeepSeek).
|
| 445 |
+
2. **Add a routing layer** β different trigger kinds may want different prompt variants (research-digest framing vs recall-reminder framing vs perf-dip framing).
|
| 446 |
+
3. **Add post-LLM validation** β check the output for CTA shape and language match. Re-prompt if it fails.
|
| 447 |
+
4. **For multi-turn**: maintain a small conversation state with what was already said, what the merchant signaled, what the next best move is.
|
| 448 |
+
|
| 449 |
+
You can also use retrieval (e.g., embed the category digest items, retrieve the most relevant for the trigger), tool use (e.g., let the LLM call a "lookup peer stat" function), or any other architecture.
|
| 450 |
+
|
| 451 |
+
---
|
| 452 |
+
|
| 453 |
+
## 14. Logistics (placeholders β fill in for your run)
|
| 454 |
+
|
| 455 |
+
- **Eligibility**: <e.g., undergrad/grad CS or related, India-based or remote>
|
| 456 |
+
- **Team size**: solo or pairs
|
| 457 |
+
- **Submission window**: <e.g., 14 days from launch>
|
| 458 |
+
- **Deliverables**: as in Β§7
|
| 459 |
+
- **Compute reimbursement**: <e.g., βΉX for API costs>
|
| 460 |
+
- **Prizes**: <top 1 / top 3 / honorable mentions>
|
| 461 |
+
- **Selection outcomes**: <e.g., top N invited to final on-site round>
|
| 462 |
+
|
| 463 |
+
---
|
| 464 |
+
|
| 465 |
+
## 15. Dataset privacy + ethics
|
| 466 |
+
|
| 467 |
+
- The base dataset is **synthetic but realistic** β generated from anonymized templates, no real PII.
|
| 468 |
+
- Merchant names in samples are altered; phone numbers redacted.
|
| 469 |
+
- Participants must **not scrape** real magicpin / Google data; must not impersonate magicpin in any external test.
|
| 470 |
+
- All bot output is for the challenge only; not for real merchant outreach.
|
| 471 |
+
|
| 472 |
+
---
|
| 473 |
+
|
| 474 |
+
## 16. What gets shared with the judge in the new session
|
| 475 |
+
|
| 476 |
+
When a fresh AI session starts to evaluate submissions, the judge gets:
|
| 477 |
+
|
| 478 |
+
1. **This brief** (the entire document you're reading) β full context.
|
| 479 |
+
2. **The base dataset** (categories/, merchants/, customers/, triggers/).
|
| 480 |
+
3. **The post-submission context injections** (new digest items, updated performance, new triggers, optional customer contexts).
|
| 481 |
+
4. **The submitted bot artifacts** (`bot.py`, `submission.jsonl`, optional `conversation_handlers.py`, `README.md`).
|
| 482 |
+
5. **The 5-dimension rubric** (Β§8) and the replay-test instructions.
|
| 483 |
+
|
| 484 |
+
That's everything the judge needs. No prior context from this session is required.
|
| 485 |
+
|
| 486 |
+
---
|
| 487 |
+
|
| 488 |
+
## Appendix A β Composition example (so the judge knows what good looks like)
|
| 489 |
+
|
| 490 |
+
**Inputs**:
|
| 491 |
+
- Category: `dentists` (voice=peer/clinical, peer_stats.avg_ctr=0.030, digest top item="JIDA Oct trial: 3-mo fluoride recall cuts caries 38% better")
|
| 492 |
+
- Merchant: Dr. Meera, Lajpat Nagar Delhi, CTR 2.1% (below peer), high-risk-adult patient cohort, last Vera touch 2 days ago (engaged)
|
| 493 |
+
- Trigger: `research_digest_release` (urgency 2, external)
|
| 494 |
+
- Customer: none
|
| 495 |
+
|
| 496 |
+
**Good composed message** (the bar):
|
| 497 |
+
|
| 498 |
+
```
|
| 499 |
+
Dr. Meera, JIDA's Oct issue landed. One item relevant to your high-risk adult
|
| 500 |
+
patients β 2,100-patient trial showed 3-month fluoride recall cuts caries
|
| 501 |
+
recurrence 38% better than 6-month. Worth a look (2-min abstract). Want me to
|
| 502 |
+
pull it + draft a patient-ed WhatsApp you can share? β JIDA Oct 2026 p.14
|
| 503 |
+
```
|
| 504 |
+
|
| 505 |
+
**Why it scores well**:
|
| 506 |
+
- Specificity: "2,100-patient", "38% better", "JIDA Oct 2026 p.14"
|
| 507 |
+
- Category fit: clinical vocabulary, source citation, peer tone
|
| 508 |
+
- Merchant fit: "your high-risk adult patients" (derived from customer aggregate)
|
| 509 |
+
- Trigger relevance: explicitly references the digest as the reason for messaging
|
| 510 |
+
- Engagement compulsion: curiosity ("Worth a look"), reciprocity ("I'll pull it for you"), low-friction CTA
|
| 511 |
+
|
| 512 |
+
---
|
| 513 |
+
|
| 514 |
+
## Appendix B β Customer-facing composition example
|
| 515 |
+
|
| 516 |
+
**Inputs**:
|
| 517 |
+
- Category: `dentists` (customer-facing voice rules: no "guaranteed", no medical claims)
|
| 518 |
+
- Merchant: Dr. Meera (active offer "Dental Cleaning @ βΉ299", available slots Wed 6pm + Thu 5pm)
|
| 519 |
+
- Trigger: `recall_due` (scope=customer, urgency=3, internal)
|
| 520 |
+
- Customer: Priya (lapsed_soft, weekday-evening preference, hi-en mix language)
|
| 521 |
+
|
| 522 |
+
**Good composed message** (sent from merchant's WA number, drafted by bot):
|
| 523 |
+
|
| 524 |
+
```
|
| 525 |
+
Hi Priya, Dr. Meera's clinic here π¦· It's been 5 months since your last visit β
|
| 526 |
+
your 6-month cleaning recall is due. Apke liye 2 slots ready hain: Wed 6 Nov,
|
| 527 |
+
6pm ya Thu 7 Nov, 5pm. βΉ299 cleaning + complimentary fluoride. Reply 1 for
|
| 528 |
+
Wed, 2 for Thu, or tell us a time that works.
|
| 529 |
+
```
|
| 530 |
+
|
| 531 |
+
**Why it scores well**:
|
| 532 |
+
- send_as = `merchant_on_behalf` (correctly attributed)
|
| 533 |
+
- Category fit: clinical, no overclaims
|
| 534 |
+
- Merchant fit: real catalog price, real open slots
|
| 535 |
+
- Trigger relevance: explicitly names the 6-month recall
|
| 536 |
+
- Customer fit: name, language mix, evening preference honored, multi-choice slot offer (allowed for booking flows)
|
| 537 |
+
|
| 538 |
+
---
|
| 539 |
+
|
| 540 |
+
## End of brief
|
| 541 |
+
|
| 542 |
+
This document is everything the next session needs. Save it as `challenge-brief.md` and load it into the judge session along with the dataset and the submitted bot artifacts.
|
| 543 |
+
|
| 544 |
+
Good luck. Build something better than today's Vera.
|
magicpin-ai-challenge/challenge-testing-brief.md
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# magicpin AI Challenge β Testing & Evaluation Brief
|
| 2 |
+
|
| 3 |
+
**Status**: Companion to `challenge-brief.md`. Defines the technical contract between candidate bots and magicpin's judging system.
|
| 4 |
+
**Last updated**: 2026-04-26
|
| 5 |
+
**Audience**: Candidates building the bot + magicpin engineers running the harness.
|
| 6 |
+
|
| 7 |
+
> **Read this with `challenge-brief.md`** β that brief defines what the bot must do (the 4-context framework, composition contract, evaluation rubric). This brief defines *how the bot is tested* (HTTP API, judge harness, scoring touchpoints).
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## 1. The high-level model
|
| 12 |
+
|
| 13 |
+
```
|
| 14 |
+
ββββββββββββββββββββββββββββ ββββββββββββββββββββββββββββββ
|
| 15 |
+
β magicpin Judge Harness β β Candidate Bot (your code) β
|
| 16 |
+
β (LLM playing merchant + β β HTTP server, public URL β
|
| 17 |
+
β context injector + β ββββ HTTP/JSON βββΊ β Stateful per-conversation β
|
| 18 |
+
β scorer) β βββββ HTTP/JSON ββ β β
|
| 19 |
+
ββββββββββββββββββββββββββββ ββββββββββββββββββββββββββββββ
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
Two information flows:
|
| 23 |
+
|
| 24 |
+
1. **Judge β Bot**: pushes incremental context across all 4 dimensions (category, merchant, trigger, customer) over time. Mimics how production data updates flow into Vera.
|
| 25 |
+
2. **Bot β Judge**: the bot proactively initiates conversations, and the judge plays the merchant (or customer) role, replying realistically. The bot must handle the full conversation.
|
| 26 |
+
|
| 27 |
+
The bot is **stateful** (must remember context pushed earlier and conversations in flight). The judge is the source of truth for what's happened.
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## 2. Endpoints the candidate must expose
|
| 32 |
+
|
| 33 |
+
All endpoints are HTTPS, JSON in/out, UTF-8. Total surface: **5 endpoints**.
|
| 34 |
+
|
| 35 |
+
### 2.1 `POST /v1/context` β receive a context push
|
| 36 |
+
|
| 37 |
+
The judge calls this whenever it wants the bot to know about new or updated context. Could be the initial base dataset, or an incremental update mid-test.
|
| 38 |
+
|
| 39 |
+
**Request body**:
|
| 40 |
+
```json
|
| 41 |
+
{
|
| 42 |
+
"scope": "category" | "merchant" | "customer" | "trigger",
|
| 43 |
+
"context_id": "dentists" | "m_001_drmeera" | "c_001_priya" | "trg_2026_04_26_research_digest",
|
| 44 |
+
"version": 3,
|
| 45 |
+
"payload": { /* the full context object β see Β§3 */ },
|
| 46 |
+
"delivered_at": "2026-04-26T10:00:00Z"
|
| 47 |
+
}
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
**Behavior**:
|
| 51 |
+
- **Idempotent** by `(context_id, version)`. Re-posting the same version is a no-op.
|
| 52 |
+
- A higher `version` for the same `context_id` **replaces** the prior version atomically.
|
| 53 |
+
- Bot must persist context until the test ends. Storing in memory is fine; just don't restart between calls.
|
| 54 |
+
|
| 55 |
+
**Response (200)**:
|
| 56 |
+
```json
|
| 57 |
+
{ "accepted": true, "ack_id": "ack_abc123", "stored_at": "2026-04-26T10:00:00.123Z" }
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
**Response (409)** β version conflict (you already have a higher version):
|
| 61 |
+
```json
|
| 62 |
+
{ "accepted": false, "reason": "stale_version", "current_version": 5 }
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
**Response (400)** β malformed:
|
| 66 |
+
```json
|
| 67 |
+
{ "accepted": false, "reason": "invalid_scope", "details": "..." }
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### 2.2 `POST /v1/tick` β periodic wake-up; bot can initiate
|
| 71 |
+
|
| 72 |
+
The judge calls this every **N seconds of simulated time** (default: every 5 simulated minutes). The bot inspects its current context state and decides whether to send any proactive messages.
|
| 73 |
+
|
| 74 |
+
**Request body**:
|
| 75 |
+
```json
|
| 76 |
+
{
|
| 77 |
+
"now": "2026-04-26T10:30:00Z",
|
| 78 |
+
"available_triggers": ["trg_2026_04_26_research_digest", "trg_2026_04_26_recall_priya"]
|
| 79 |
+
}
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
`available_triggers` is a hint listing trigger context_ids the judge considers "active right now". The bot can use any subset (or none).
|
| 83 |
+
|
| 84 |
+
**Response (200)**:
|
| 85 |
+
```json
|
| 86 |
+
{
|
| 87 |
+
"actions": [
|
| 88 |
+
{
|
| 89 |
+
"conversation_id": "conv_001",
|
| 90 |
+
"merchant_id": "m_001_drmeera",
|
| 91 |
+
"customer_id": null,
|
| 92 |
+
"send_as": "vera",
|
| 93 |
+
"trigger_id": "trg_2026_04_26_research_digest",
|
| 94 |
+
"template_name": "vera_research_digest_v1",
|
| 95 |
+
"template_params": ["Dr. Meera", "JIDA Oct issue", "..."],
|
| 96 |
+
"body": "Dr. Meera, JIDA's Oct issue landed...",
|
| 97 |
+
"cta": "open_ended",
|
| 98 |
+
"suppression_key": "research:dentists:2026-W17",
|
| 99 |
+
"rationale": "External research digest with merchant-relevant clinical anchor; merchant is a dentist with high-risk-adult patient cohort"
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
}
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
`actions` MAY be an empty list β the bot is free to decide nothing's worth sending this tick.
|
| 106 |
+
|
| 107 |
+
`conversation_id`:
|
| 108 |
+
- If you want to start a new conversation, generate any unique string.
|
| 109 |
+
- Reusing an existing `conversation_id` is invalid here β use `/v1/reply` to continue an existing conversation.
|
| 110 |
+
|
| 111 |
+
### 2.3 `POST /v1/reply` β receive a reply from the simulated merchant/customer
|
| 112 |
+
|
| 113 |
+
The judge calls this with the merchant's (or customer's) reply to a previous bot message. The bot must respond synchronously with its next move.
|
| 114 |
+
|
| 115 |
+
**Request body**:
|
| 116 |
+
```json
|
| 117 |
+
{
|
| 118 |
+
"conversation_id": "conv_001",
|
| 119 |
+
"merchant_id": "m_001_drmeera",
|
| 120 |
+
"customer_id": null,
|
| 121 |
+
"from_role": "merchant",
|
| 122 |
+
"message": "Yes, send me the abstract",
|
| 123 |
+
"received_at": "2026-04-26T10:45:00Z",
|
| 124 |
+
"turn_number": 2
|
| 125 |
+
}
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
**Response (200)** β three valid `action` values:
|
| 129 |
+
|
| 130 |
+
```json
|
| 131 |
+
{ "action": "send",
|
| 132 |
+
"body": "Sending now β also drafted a 90-sec patient-ed WhatsApp...",
|
| 133 |
+
"cta": "open_ended",
|
| 134 |
+
"rationale": "Honoring the merchant's accept; adding the next-best-step (patient-ed) as low-friction follow-on" }
|
| 135 |
+
```
|
| 136 |
+
|
| 137 |
+
```json
|
| 138 |
+
{ "action": "wait",
|
| 139 |
+
"wait_seconds": 1800,
|
| 140 |
+
"rationale": "Merchant asked for time; back off 30 min" }
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
```json
|
| 144 |
+
{ "action": "end",
|
| 145 |
+
"rationale": "Merchant said not interested; gracefully exiting conversation" }
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
The bot has **30 seconds** to respond. After 30s the judge marks this turn as `timeout` and proceeds.
|
| 149 |
+
|
| 150 |
+
### 2.4 `GET /v1/healthz` β liveness probe
|
| 151 |
+
|
| 152 |
+
**Response (200)**:
|
| 153 |
+
```json
|
| 154 |
+
{ "status": "ok", "uptime_seconds": 3600, "contexts_loaded": { "category": 5, "merchant": 50, "customer": 200, "trigger": 100 } }
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
The judge polls this every 60s during the test window. Three consecutive failures = bot disqualified for that test slot.
|
| 158 |
+
|
| 159 |
+
### 2.5 `GET /v1/metadata` β bot identity
|
| 160 |
+
|
| 161 |
+
**Response (200)**:
|
| 162 |
+
```json
|
| 163 |
+
{
|
| 164 |
+
"team_name": "Team Alpha",
|
| 165 |
+
"team_members": ["Alice", "Bob"],
|
| 166 |
+
"model": "claude-opus-4-7",
|
| 167 |
+
"approach": "single-prompt composer with retrieval over digest items",
|
| 168 |
+
"contact_email": "team@example.com",
|
| 169 |
+
"version": "1.2.0",
|
| 170 |
+
"submitted_at": "2026-04-26T08:00:00Z"
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
---
|
| 175 |
+
|
| 176 |
+
## 3. Context payload schemas (what the judge pushes to `/v1/context`)
|
| 177 |
+
|
| 178 |
+
Each `scope` has a fixed payload shape. These mirror the dataclasses defined in `challenge-brief.md` Β§4.
|
| 179 |
+
|
| 180 |
+
### 3.1 `scope: "category"`
|
| 181 |
+
```json
|
| 182 |
+
{
|
| 183 |
+
"slug": "dentists",
|
| 184 |
+
"offer_catalog": [{ "title": "Dental Cleaning @ βΉ299", "value": "299", "audience": "new_user" }],
|
| 185 |
+
"voice": { "tone": "peer_clinical", "vocab_allowed": ["fluoride varnish", "caries"], "taboos": ["cure", "guaranteed"] },
|
| 186 |
+
"peer_stats": { "avg_rating": 4.4, "avg_reviews": 62, "avg_ctr": 0.030, "scope": "delhi_solo_practices" },
|
| 187 |
+
"digest": [
|
| 188 |
+
{ "id": "d_2026W17_jida_fluoride", "kind": "research",
|
| 189 |
+
"title": "3-mo fluoride recall cuts caries 38% better than 6-mo",
|
| 190 |
+
"source": "JIDA Oct 2026, p.14", "trial_n": 2100, "patient_segment": "high_risk_adults",
|
| 191 |
+
"summary": "..." }
|
| 192 |
+
],
|
| 193 |
+
"patient_content_library": [
|
| 194 |
+
{ "id": "pc_001", "title": "3 things your teeth tell you about your heart", "channel": "whatsapp", "body": "..." }
|
| 195 |
+
],
|
| 196 |
+
"seasonal_beats": [{ "month_range": "Nov-Feb", "note": "exam-stress bruxism spike" }],
|
| 197 |
+
"trend_signals": [{ "query": "clear aligners delhi", "delta_yoy": 0.62, "segment_age": "28-45" }]
|
| 198 |
+
}
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
### 3.2 `scope: "merchant"`
|
| 202 |
+
```json
|
| 203 |
+
{
|
| 204 |
+
"merchant_id": "m_001_drmeera",
|
| 205 |
+
"category_slug": "dentists",
|
| 206 |
+
"identity": { "name": "Dr. Meera's Dental Clinic", "city": "Delhi", "locality": "Lajpat Nagar",
|
| 207 |
+
"place_id": "ChIJ...", "verified": true, "languages": ["en", "hi"] },
|
| 208 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 82 },
|
| 209 |
+
"performance": {
|
| 210 |
+
"window_days": 30,
|
| 211 |
+
"views": 2410, "calls": 18, "directions": 45, "ctr": 0.021,
|
| 212 |
+
"delta_7d": { "views_pct": 0.18, "calls_pct": -0.05 }
|
| 213 |
+
},
|
| 214 |
+
"offers": [
|
| 215 |
+
{ "id": "o_meera_001", "title": "Dental Cleaning @ βΉ299", "status": "active" },
|
| 216 |
+
{ "id": "o_meera_002", "title": "Deep Cleaning @ βΉ499", "status": "expired" }
|
| 217 |
+
],
|
| 218 |
+
"conversation_history": [
|
| 219 |
+
{ "ts": "2026-04-24T10:00:00Z", "from": "vera", "body": "...", "engagement": "merchant_replied" }
|
| 220 |
+
],
|
| 221 |
+
"customer_aggregate": { "total_unique_ytd": 540, "lapsed_180d_plus": 78, "retention_6mo_pct": 0.38 },
|
| 222 |
+
"signals": ["stale_posts:22d", "ctr_below_peer_median", "high_risk_adult_cohort"]
|
| 223 |
+
}
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
### 3.3 `scope: "customer"`
|
| 227 |
+
```json
|
| 228 |
+
{
|
| 229 |
+
"customer_id": "c_001_priya",
|
| 230 |
+
"merchant_id": "m_001_drmeera",
|
| 231 |
+
"identity": { "name": "Priya", "phone_redacted": "<phone>", "language_pref": "hi-en mix" },
|
| 232 |
+
"relationship": {
|
| 233 |
+
"first_visit": "2025-11-04", "last_visit": "2026-05-12", "visits_total": 4,
|
| 234 |
+
"services_received": ["cleaning", "cleaning", "whitening", "cleaning"]
|
| 235 |
+
},
|
| 236 |
+
"state": "lapsed_soft",
|
| 237 |
+
"preferences": { "preferred_slots": "weekday_evening", "channel": "whatsapp" },
|
| 238 |
+
"consent": { "opted_in_at": "2025-11-04", "scope": ["recall_reminders", "appointment_reminders"] }
|
| 239 |
+
}
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
### 3.4 `scope: "trigger"`
|
| 243 |
+
```json
|
| 244 |
+
{
|
| 245 |
+
"id": "trg_2026_04_26_research_digest_dentists",
|
| 246 |
+
"scope": "merchant",
|
| 247 |
+
"kind": "research_digest",
|
| 248 |
+
"source": "external",
|
| 249 |
+
"merchant_id": "m_001_drmeera",
|
| 250 |
+
"customer_id": null,
|
| 251 |
+
"payload": {
|
| 252 |
+
"category": "dentists",
|
| 253 |
+
"top_item_id": "d_2026W17_jida_fluoride"
|
| 254 |
+
},
|
| 255 |
+
"urgency": 2,
|
| 256 |
+
"suppression_key": "research:dentists:2026-W17",
|
| 257 |
+
"expires_at": "2026-05-03T00:00:00Z"
|
| 258 |
+
}
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
For `scope: "customer"` triggers (e.g., `recall_due`), `customer_id` is populated.
|
| 262 |
+
|
| 263 |
+
---
|
| 264 |
+
|
| 265 |
+
## 4. The judge harness behavior β full lifecycle
|
| 266 |
+
|
| 267 |
+
### Phase 1 β Warmup (T-15 min before scoring window opens)
|
| 268 |
+
|
| 269 |
+
1. Judge calls `GET /v1/healthz` and `GET /v1/metadata` to verify the bot is reachable.
|
| 270 |
+
2. Judge POSTs the **base dataset** to `/v1/context`:
|
| 271 |
+
- 5 category contexts
|
| 272 |
+
- 50 merchant contexts
|
| 273 |
+
- 200 customer contexts
|
| 274 |
+
- 0 triggers (triggers come during the test window)
|
| 275 |
+
3. Judge waits 60s for the bot to settle, then re-checks `/healthz`.
|
| 276 |
+
4. If `contexts_loaded` reflects all 255 base contexts, warmup passes.
|
| 277 |
+
|
| 278 |
+
### Phase 2 β Test window (T0 to T0 + 60 simulated minutes)
|
| 279 |
+
|
| 280 |
+
The judge advances simulated time in **5-minute ticks**. At each tick:
|
| 281 |
+
|
| 282 |
+
1. Judge POSTs any new/updated contexts that "happened" during this tick (incremental updates).
|
| 283 |
+
2. Judge calls `POST /v1/tick` with current simulated time + currently-active triggers.
|
| 284 |
+
3. Bot returns `actions[]` β zero or more proactive sends.
|
| 285 |
+
4. For each action:
|
| 286 |
+
- Judge logs it (with rationale).
|
| 287 |
+
- Judge invokes a sub-LLM playing the merchant/customer role.
|
| 288 |
+
- The sub-LLM replies with a plausible response (sometimes engaged, sometimes auto-reply, sometimes hard "no", sometimes a question).
|
| 289 |
+
- Judge POSTs that reply to `/v1/reply`.
|
| 290 |
+
- Bot returns next action β `send`, `wait`, or `end`.
|
| 291 |
+
- Repeat up to 5 turns or until bot ends.
|
| 292 |
+
|
| 293 |
+
### Phase 3 β Adaptive context injection (interleaved during Phase 2)
|
| 294 |
+
|
| 295 |
+
Between ticks, the judge injects **post-submission context** the bot didn't see during development:
|
| 296 |
+
|
| 297 |
+
- New `digest` items (5 new research/compliance items per category context, posted as a new `version`)
|
| 298 |
+
- Updated `performance` snapshots (10 merchants get new perf numbers β some spikes, some dips)
|
| 299 |
+
- New `triggers` (15 new triggers spread across the test window)
|
| 300 |
+
- For 5 specific merchants: a new `customer` context is pushed mid-test with a `recall_due` trigger 2 minutes later
|
| 301 |
+
|
| 302 |
+
Bots that incorporate the new context in subsequent sends score higher. Bots that ignore it (sending stale composition) score lower. Bots that hallucinate (invent context that wasn't pushed) score lowest.
|
| 303 |
+
|
| 304 |
+
### Phase 4 β Replay test (top 10 only)
|
| 305 |
+
|
| 306 |
+
For the 10 highest-scoring bots from Phase 2, the judge runs **3 deep-dive scenarios** as standalone replays:
|
| 307 |
+
|
| 308 |
+
1. **Auto-reply hell** β judge plays a merchant whose phone is on WA Business with a canned auto-reply; sends the same canned text 4 times in a row. Bot must detect and exit gracefully.
|
| 309 |
+
2. **Intent transition** β judge plays an engaged merchant who, after 2 turns of qualification, says "ok let's do it". Bot must switch from qualifying to action immediately (not ask another qualifying question).
|
| 310 |
+
3. **Hostile / off-topic** β judge plays a merchant who replies with abuse, then asks an unrelated question ("can you also help me file my GST?"). Bot must stay on-mission politely.
|
| 311 |
+
|
| 312 |
+
Each replay: 5 turns, scored on conversation flow only.
|
| 313 |
+
|
| 314 |
+
### Phase 5 β Scoring + report (T0 + 90 min)
|
| 315 |
+
|
| 316 |
+
Judge aggregates:
|
| 317 |
+
- Phase 2 scores (per the 5-dimension rubric in Β§8 of the main brief)
|
| 318 |
+
- Phase 3 adaptation bonus (max +5 per dimension)
|
| 319 |
+
- Phase 4 replay scores (top 10 only; max +30)
|
| 320 |
+
- Operational penalties (timeouts, healthz failures, malformed responses; max -20)
|
| 321 |
+
|
| 322 |
+
Sends each team a per-message scorecard with judge rationale.
|
| 323 |
+
|
| 324 |
+
---
|
| 325 |
+
|
| 326 |
+
## 5. Rate limits + timeouts
|
| 327 |
+
|
| 328 |
+
| Limit | Value |
|
| 329 |
+
|---|---|
|
| 330 |
+
| Max requests/sec from judge to bot | 10 |
|
| 331 |
+
| Per-call timeout (judge waits this long) | 30 s |
|
| 332 |
+
| `/v1/context` payload size cap | 500 KB |
|
| 333 |
+
| `/v1/tick` action count cap | 20 actions per tick |
|
| 334 |
+
| Healthz failures before disqualification | 3 consecutive |
|
| 335 |
+
| Total test window | 60 simulated minutes (real-time ~30-45 min) |
|
| 336 |
+
|
| 337 |
+
If your bot needs more than 30s for `/v1/tick`, return an empty `actions: []` immediately and process work asynchronously β but you can't catch up later, so design for the budget.
|
| 338 |
+
|
| 339 |
+
---
|
| 340 |
+
|
| 341 |
+
## 6. Where to deploy
|
| 342 |
+
|
| 343 |
+
Deploy your bot anywhere that gives you a **public URL**:
|
| 344 |
+
- Any cloud (AWS, GCP, Azure, Render, Fly, Railway, Replit, β¦)
|
| 345 |
+
- ngrok tunnel to localhost
|
| 346 |
+
- Any hosting that exposes HTTP endpoints
|
| 347 |
+
|
| 348 |
+
Requirements:
|
| 349 |
+
- Must respond at the URL pattern `https://<your-host>/v1/*` (or `http://` for local testing)
|
| 350 |
+
- Submit your public URL via the submission portal
|
| 351 |
+
|
| 352 |
+
---
|
| 353 |
+
|
| 354 |
+
## 7. Reference implementation skeleton
|
| 355 |
+
|
| 356 |
+
A minimal-viable bot in ~80 lines of Python (FastAPI). Save as `bot.py`:
|
| 357 |
+
|
| 358 |
+
```python
|
| 359 |
+
import os, time
|
| 360 |
+
from datetime import datetime
|
| 361 |
+
from fastapi import FastAPI, Request, HTTPException
|
| 362 |
+
from pydantic import BaseModel
|
| 363 |
+
from typing import Any
|
| 364 |
+
|
| 365 |
+
app = FastAPI()
|
| 366 |
+
START = time.time()
|
| 367 |
+
|
| 368 |
+
# In-memory stores (use Redis/SQLite for production-grade)
|
| 369 |
+
contexts: dict[tuple[str, str], dict] = {} # (scope, context_id) -> {version, payload}
|
| 370 |
+
conversations: dict[str, list] = {} # conversation_id -> [turns]
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
@app.get("/v1/healthz")
|
| 374 |
+
async def healthz():
|
| 375 |
+
counts = {"category": 0, "merchant": 0, "customer": 0, "trigger": 0}
|
| 376 |
+
for (scope, _), _ in contexts.items():
|
| 377 |
+
counts[scope] = counts.get(scope, 0) + 1
|
| 378 |
+
return {"status": "ok", "uptime_seconds": int(time.time() - START), "contexts_loaded": counts}
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
@app.get("/v1/metadata")
|
| 382 |
+
async def metadata():
|
| 383 |
+
return {"team_name": "Team Alpha", "team_members": ["Alice"], "model": "gpt-4o-mini",
|
| 384 |
+
"approach": "single-prompt composer", "contact_email": "alice@example.com",
|
| 385 |
+
"version": "0.1.0", "submitted_at": "2026-04-26T08:00:00Z"}
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
class CtxBody(BaseModel):
|
| 389 |
+
scope: str
|
| 390 |
+
context_id: str
|
| 391 |
+
version: int
|
| 392 |
+
payload: dict[str, Any]
|
| 393 |
+
delivered_at: str
|
| 394 |
+
|
| 395 |
+
@app.post("/v1/context")
|
| 396 |
+
async def push_context(body: CtxBody):
|
| 397 |
+
key = (body.scope, body.context_id)
|
| 398 |
+
cur = contexts.get(key)
|
| 399 |
+
if cur and cur["version"] >= body.version:
|
| 400 |
+
return {"accepted": False, "reason": "stale_version", "current_version": cur["version"]}
|
| 401 |
+
contexts[key] = {"version": body.version, "payload": body.payload}
|
| 402 |
+
return {"accepted": True, "ack_id": f"ack_{body.context_id}_v{body.version}",
|
| 403 |
+
"stored_at": datetime.utcnow().isoformat() + "Z"}
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
class TickBody(BaseModel):
|
| 407 |
+
now: str
|
| 408 |
+
available_triggers: list[str] = []
|
| 409 |
+
|
| 410 |
+
@app.post("/v1/tick")
|
| 411 |
+
async def tick(body: TickBody):
|
| 412 |
+
actions = []
|
| 413 |
+
for trg_id in body.available_triggers:
|
| 414 |
+
trg = contexts.get(("trigger", trg_id), {}).get("payload")
|
| 415 |
+
if not trg: continue
|
| 416 |
+
merchant_id = trg.get("merchant_id")
|
| 417 |
+
merchant = contexts.get(("merchant", merchant_id), {}).get("payload")
|
| 418 |
+
category = contexts.get(("category", merchant.get("category_slug")), {}).get("payload") if merchant else None
|
| 419 |
+
if not (merchant and category): continue
|
| 420 |
+
# YOUR COMPOSER GOES HERE β call your LLM with the 4 contexts
|
| 421 |
+
body_text = f"Hi {merchant['identity']['name']}, ..." # replace with real composition
|
| 422 |
+
actions.append({
|
| 423 |
+
"conversation_id": f"conv_{merchant_id}_{trg_id}",
|
| 424 |
+
"merchant_id": merchant_id, "customer_id": None,
|
| 425 |
+
"send_as": "vera", "trigger_id": trg_id,
|
| 426 |
+
"template_name": "vera_generic_v1",
|
| 427 |
+
"template_params": [merchant['identity']['name'], "...", "..."],
|
| 428 |
+
"body": body_text, "cta": "open_ended",
|
| 429 |
+
"suppression_key": trg.get("suppression_key", ""),
|
| 430 |
+
"rationale": "Composed from category+merchant+trigger"
|
| 431 |
+
})
|
| 432 |
+
return {"actions": actions}
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
class ReplyBody(BaseModel):
|
| 436 |
+
conversation_id: str
|
| 437 |
+
merchant_id: str | None = None
|
| 438 |
+
customer_id: str | None = None
|
| 439 |
+
from_role: str
|
| 440 |
+
message: str
|
| 441 |
+
received_at: str
|
| 442 |
+
turn_number: int
|
| 443 |
+
|
| 444 |
+
@app.post("/v1/reply")
|
| 445 |
+
async def reply(body: ReplyBody):
|
| 446 |
+
conversations.setdefault(body.conversation_id, []).append({"from": body.from_role, "msg": body.message})
|
| 447 |
+
# YOUR REPLY-COMPOSER GOES HERE
|
| 448 |
+
return {"action": "send", "body": "Got it, here's what's next...", "cta": "open_ended",
|
| 449 |
+
"rationale": "acknowledged + advanced"}
|
| 450 |
+
```
|
| 451 |
+
|
| 452 |
+
Run: `uvicorn bot:app --host 0.0.0.0 --port 8080`
|
| 453 |
+
|
| 454 |
+
This is a working skeleton. The composer logic is stubbed β replace the `# YOUR COMPOSER GOES HERE` blocks with your LLM call.
|
| 455 |
+
|
| 456 |
+
---
|
| 457 |
+
|
| 458 |
+
## 9. Local self-test before submitting
|
| 459 |
+
|
| 460 |
+
Magicpin provides a `judge_simulator.py` that runs a mini version of the harness against your endpoint. Use it during development:
|
| 461 |
+
|
| 462 |
+
```bash
|
| 463 |
+
export BOT_URL=http://localhost:8080
|
| 464 |
+
python judge_simulator.py
|
| 465 |
+
```
|
| 466 |
+
|
| 467 |
+
Each scenario prints the judge's prompts + your bot's responses + a mock score. Iterate until you're happy, then submit your URL.
|
| 468 |
+
|
| 469 |
+
---
|
| 470 |
+
|
| 471 |
+
## 10. Failure modes the judge handles
|
| 472 |
+
|
| 473 |
+
| Failure | Judge behavior | Penalty |
|
| 474 |
+
|---|---|---|
|
| 475 |
+
| `/v1/healthz` returns non-200 (3Γ in a row) | Mark bot offline; skip remaining ticks | -10 (operational) |
|
| 476 |
+
| `/v1/tick` times out (>30s) | Skip this tick's actions; continue | -1 per timeout |
|
| 477 |
+
| `/v1/reply` times out | Mark turn as `bot_silent`; judge plays next merchant turn after 30s | -1 per timeout |
|
| 478 |
+
| Bot returns malformed JSON | Logged, scored as 0 for that action | -2 per malformed |
|
| 479 |
+
| Bot returns `action: send` with empty body | Treated as malformed | -2 |
|
| 480 |
+
| Bot returns the same body verbatim it sent before in the same conversation | Anti-repetition flag | -2 per repeat |
|
| 481 |
+
|
| 482 |
+
---
|
| 483 |
+
|
| 484 |
+
## 11. Security + privacy
|
| 485 |
+
|
| 486 |
+
- All payloads are synthetic β no real PII.
|
| 487 |
+
- Bots **must not** transmit any payload data outside the test environment (no calls to non-LLM external APIs with merchant/customer fields).
|
| 488 |
+
- Bots **may** use commercial LLM APIs (OpenAI, Anthropic, Google, DeepSeek, etc.) β those are necessary for composition.
|
| 489 |
+
- Bots **must not** persist context data after the test ends. magicpin will issue a `POST /v1/teardown` (optional) at end of test; on receiving it, wipe state.
|
| 490 |
+
|
| 491 |
+
---
|
| 492 |
+
|
| 493 |
+
## 12. Pre-flight checklist for candidates
|
| 494 |
+
|
| 495 |
+
Before submitting:
|
| 496 |
+
|
| 497 |
+
- [ ] Endpoint reachable from the public internet (HTTPS or HTTP)
|
| 498 |
+
- [ ] All 5 endpoints implemented and returning correct schemas
|
| 499 |
+
- [ ] `/v1/context` is idempotent on `(scope, context_id, version)`
|
| 500 |
+
- [ ] `/v1/tick` returns within 30s even if it has nothing to send (returns `{"actions": []}`)
|
| 501 |
+
- [ ] `/v1/reply` returns within 30s for any conversation
|
| 502 |
+
- [ ] Bot persists context across calls (in-memory is fine; no restarts during test)
|
| 503 |
+
- [ ] `judge_simulator.py` passes locally with non-zero scores
|
| 504 |
+
- [ ] Submitted URL via submission portal
|
| 505 |
+
- [ ] Compute budget set (rate limits, LLM API quota, etc.) so the bot survives 60-min test
|
| 506 |
+
|
| 507 |
+
---
|
| 508 |
+
|
| 509 |
+
## 13. What the judge logs (for transparency)
|
| 510 |
+
|
| 511 |
+
Every test produces a per-team artifact:
|
| 512 |
+
|
| 513 |
+
```
|
| 514 |
+
results/<team_name>/
|
| 515 |
+
βββ conversations.jsonl # all turns, both sides, with timestamps
|
| 516 |
+
βββ context_pushes.jsonl # every context push, with bot's ack
|
| 517 |
+
βββ scoring.json # 5-dimension scores per action + per conversation
|
| 518 |
+
βββ timeline.html # visual timeline of the test window
|
| 519 |
+
βββ replay_*.jsonl # phase 4 replay transcripts (top 10 only)
|
| 520 |
+
βββ final_report.md # aggregated score + judge's qualitative feedback
|
| 521 |
+
```
|
| 522 |
+
|
| 523 |
+
Candidates receive their own artifact bundle within 48h of the test. Top scorers' bundles (with consent) become reference material for the next cohort.
|
| 524 |
+
|
| 525 |
+
---
|
| 526 |
+
|
| 527 |
+
## 14. FAQ
|
| 528 |
+
|
| 529 |
+
**Q: Can the bot use external tools / function calling during composition?**
|
| 530 |
+
Yes. Your LLM can call any tool you implement internally. You can't call out to non-LLM external APIs that receive merchant/customer payloads (privacy rule Β§11).
|
| 531 |
+
|
| 532 |
+
**Q: What if my bot needs more than 30s to compose a really good message?**
|
| 533 |
+
Two options: (a) speed it up; (b) at `/v1/tick`, return immediately with `{"actions": []}` and skip the cycle. Don't try to background-process and return late β late responses are dropped.
|
| 534 |
+
|
| 535 |
+
**Q: Can I send multiple messages in one tick to the same merchant?**
|
| 536 |
+
Yes, but only one `action` per `(merchant_id, conversation_id)` pair per tick. Use a follow-up tick to send more.
|
| 537 |
+
|
| 538 |
+
**Q: Does the judge see my bot's `rationale` field?**
|
| 539 |
+
Yes β it's included in the scoring rubric ("did the rationale match the actual output?"). High-quality rationales help the judge interpret edge cases generously.
|
| 540 |
+
|
| 541 |
+
**Q: What language do replies have to be in?**
|
| 542 |
+
Match the merchant's `identity.languages` field. Default is English. Hindi-English code-mix is encouraged where the language pref says `hi`.
|
| 543 |
+
|
| 544 |
+
**Q: Can my bot refuse to send when nothing's worth saying?**
|
| 545 |
+
Yes β return `{"actions": []}` from `/v1/tick`. Restraint is rewarded; spam is penalized.
|
| 546 |
+
|
| 547 |
+
**Q: What if the judge pushes a context for a merchant I've never seen before, mid-conversation?**
|
| 548 |
+
Treat it as a normal new merchant. The bot should be ready for any context to arrive at any time.
|
| 549 |
+
|
| 550 |
+
**Q: Is there a way to query the judge for clarification mid-test?**
|
| 551 |
+
No. The bot has only the contexts it's been pushed. This is intentional β production Vera doesn't get clarifications either.
|
| 552 |
+
|
| 553 |
+
---
|
| 554 |
+
|
| 555 |
+
## End of testing brief
|
| 556 |
+
|
| 557 |
+
The two briefs together (`challenge-brief.md` for *what to build* and this one for *how it's tested*) are the complete spec. A team should be able to read both end-to-end in 30 minutes and start coding.
|
magicpin-ai-challenge/dataset/categories/dentists.json
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"slug": "dentists",
|
| 3 |
+
"display_name": "Dentists",
|
| 4 |
+
"voice": {
|
| 5 |
+
"tone": "peer_clinical",
|
| 6 |
+
"register": "respectful_collegial",
|
| 7 |
+
"code_mix": "hindi_english_natural",
|
| 8 |
+
"vocab_allowed": [
|
| 9 |
+
"fluoride varnish", "scaling", "caries", "occlusion", "bruxism",
|
| 10 |
+
"endodontic", "periodontal", "implant", "aligner", "veneer",
|
| 11 |
+
"OPG", "IOPA", "RCT", "CAD/CAM", "zirconia", "PFM"
|
| 12 |
+
],
|
| 13 |
+
"vocab_taboo": [
|
| 14 |
+
"guaranteed", "100% safe", "completely cure", "miracle", "best in city",
|
| 15 |
+
"doctor approved" ,"FDA-approved (use only when actually applicable)"
|
| 16 |
+
],
|
| 17 |
+
"salutation_examples": ["Dr. {first_name}", "Doc"],
|
| 18 |
+
"tone_examples": [
|
| 19 |
+
"Worth a look β JIDA Oct 2026 p.14",
|
| 20 |
+
"This one likely affects your high-risk adult cohort",
|
| 21 |
+
"If your case-mix is mostly cosmetic, may not be relevant"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
"offer_catalog": [
|
| 25 |
+
{ "id": "den_001", "title": "Dental Cleaning @ βΉ299", "value": "299", "audience": "new_user", "type": "service_at_price" },
|
| 26 |
+
{ "id": "den_002", "title": "Free Consultation", "value": "0", "audience": "new_user", "type": "free_service" },
|
| 27 |
+
{ "id": "den_003", "title": "Teeth Whitening @ βΉ1,499", "value": "1499", "audience": "new_user", "type": "service_at_price" },
|
| 28 |
+
{ "id": "den_004", "title": "Root Canal @ βΉ2,999 (single rooted)", "value": "2999", "audience": "new_user", "type": "service_at_price" },
|
| 29 |
+
{ "id": "den_005", "title": "Free Smile Analysis + Digital Scan", "value": "0", "audience": "new_user", "type": "free_service" },
|
| 30 |
+
{ "id": "den_006", "title": "Aligner Consultation @ βΉ499", "value": "499", "audience": "new_user", "type": "service_at_price" },
|
| 31 |
+
{ "id": "den_007", "title": "Pediatric Dental Checkup @ βΉ199", "value": "199", "audience": "new_user", "type": "service_at_price" },
|
| 32 |
+
{ "id": "den_008", "title": "Annual Family Dental Plan @ βΉ4,999", "value": "4999", "audience": "repeat_user", "type": "membership" }
|
| 33 |
+
],
|
| 34 |
+
"peer_stats": {
|
| 35 |
+
"scope": "metro_solo_practices_2026",
|
| 36 |
+
"avg_rating": 4.4,
|
| 37 |
+
"avg_review_count": 62,
|
| 38 |
+
"avg_views_30d": 1820,
|
| 39 |
+
"avg_calls_30d": 12,
|
| 40 |
+
"avg_directions_30d": 38,
|
| 41 |
+
"avg_ctr": 0.030,
|
| 42 |
+
"avg_photos": 9,
|
| 43 |
+
"avg_post_freq_days": 14,
|
| 44 |
+
"retention_6mo_pct": 0.42
|
| 45 |
+
},
|
| 46 |
+
"digest": [
|
| 47 |
+
{
|
| 48 |
+
"id": "d_2026W17_jida_fluoride",
|
| 49 |
+
"kind": "research",
|
| 50 |
+
"title": "3-month fluoride varnish recall outperforms 6-month for high-risk adult caries",
|
| 51 |
+
"source": "JIDA Oct 2026, p.14",
|
| 52 |
+
"trial_n": 2100,
|
| 53 |
+
"patient_segment": "high_risk_adults",
|
| 54 |
+
"summary": "Multi-center Indian trial shows 38% lower caries recurrence with 3-month vs 6-month recall in adults with active decay history. No effect in low-risk patients.",
|
| 55 |
+
"actionable": "Reassess recall interval for adults flagged high-risk in your charting"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"id": "d_2026W17_dci_radiograph",
|
| 59 |
+
"kind": "compliance",
|
| 60 |
+
"title": "DCI revised radiograph dose limits effective 2026-12-15",
|
| 61 |
+
"source": "Dental Council of India circular 2026-11-04",
|
| 62 |
+
"summary": "Maximum dose per IOPA exposure drops from 1.5 mSv to 1.0 mSv. E-speed film passes at the new limit; D-speed does not. Digital RVG sensors unaffected.",
|
| 63 |
+
"actionable": "Audit your X-ray setup before Dec 15; document E-speed or RVG in your SOPs"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"id": "d_2026W17_ida_webinar",
|
| 67 |
+
"kind": "cde",
|
| 68 |
+
"title": "IDA Delhi: Digital impressions β 2026 state of the art",
|
| 69 |
+
"source": "IDA Delhi chapter calendar",
|
| 70 |
+
"date": "2026-05-02T19:00:00+05:30",
|
| 71 |
+
"credits": 2,
|
| 72 |
+
"summary": "Speaker: Dr. R. Mehta. Covers Primescan 2, Trios 5, and CAD/CAM workflow ROI for solo practices.",
|
| 73 |
+
"actionable": "Free for IDA members; βΉ500 for non-members"
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"id": "d_2026W17_aligner_trend",
|
| 77 |
+
"kind": "trend",
|
| 78 |
+
"title": "Clear aligner consultations searches +62% YoY in metros",
|
| 79 |
+
"source": "Practo + Google Trends, Apr 2026",
|
| 80 |
+
"summary": "'Clear aligners near me' query growth concentrated in 28-45 age band; female-skewed. Pricing pressure as DIY aligner brands hit the market.",
|
| 81 |
+
"actionable": "Position your supervised-aligner offer against DIY in your GBP description"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"id": "d_2026W17_e_max_press",
|
| 85 |
+
"kind": "tech",
|
| 86 |
+
"title": "Dentsply launches IPS e.max Press for zirconia crowns in India at βΉ3,200/unit (Delhi labs)",
|
| 87 |
+
"source": "Dentsply India launch note 2026-04-18",
|
| 88 |
+
"summary": "Better aesthetic, similar fracture resistance to standard zirconia. Two Delhi labs (Dental Direkt, Siam Dental) confirmed stocking. Shared-lease scanner deals available.",
|
| 89 |
+
"actionable": "Worth comparing if crown work is >15% of monthly revenue"
|
| 90 |
+
}
|
| 91 |
+
],
|
| 92 |
+
"patient_content_library": [
|
| 93 |
+
{
|
| 94 |
+
"id": "pc_oral_heart",
|
| 95 |
+
"title": "3 things your teeth tell you about your heart",
|
| 96 |
+
"channel": "whatsapp",
|
| 97 |
+
"length_seconds": 90,
|
| 98 |
+
"body": "Periodontal disease shares inflammation pathways with cardiovascular disease. Bleeding gums, persistent bad breath, and loose teeth in middle age are early signals. A 2024 meta-analysis of 18 studies found periodontal patients had 22% higher CVD events. The simplest action: scaling every 6 months reduces both gum and inflammation markers."
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"id": "pc_kid_brushing",
|
| 102 |
+
"title": "Why your child resists brushing β and what to try this week",
|
| 103 |
+
"channel": "whatsapp",
|
| 104 |
+
"length_seconds": 60,
|
| 105 |
+
"body": "Most resistance under age 7 is sensory, not behavioural. Try: (1) a softer brush than you think necessary, (2) brushing in front of a mirror so they see what's happening, (3) two minutes of music as the timer. If resistance persists past 4 weeks, it's worth a pediatric dental consult."
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"id": "pc_aligner_vs_braces",
|
| 109 |
+
"title": "Aligners or braces? What I tell my patients in their 30s",
|
| 110 |
+
"channel": "whatsapp",
|
| 111 |
+
"length_seconds": 75,
|
| 112 |
+
"body": "Aligners work for ~70% of cases β mild-to-moderate crowding, spacing, and minor rotations. Braces still win for severe rotations, deep bites, and skeletal corrections. The tell: if you can't keep aligners on for 22 hours/day, braces are the safer choice."
|
| 113 |
+
}
|
| 114 |
+
],
|
| 115 |
+
"seasonal_beats": [
|
| 116 |
+
{ "month_range": "Nov-Feb", "note": "exam-stress bruxism spike β ortho consults rise 30% in 18-24 cohort" },
|
| 117 |
+
{ "month_range": "Oct-Dec", "note": "wedding whitening peak β bookings 2x baseline; ladies' segment dominant" },
|
| 118 |
+
{ "month_range": "Jan", "note": "new-year resolution surge β annual check-up bookings +40%" },
|
| 119 |
+
{ "month_range": "Apr-Jun", "note": "school holiday window β pediatric appointments +50%" }
|
| 120 |
+
],
|
| 121 |
+
"trend_signals": [
|
| 122 |
+
{ "query": "clear aligners delhi", "delta_yoy": 0.62, "segment_age": "28-45", "skew": "female" },
|
| 123 |
+
{ "query": "teeth whitening price", "delta_yoy": 0.41, "segment_age": "all", "skew": "balanced" },
|
| 124 |
+
{ "query": "dental implants near me", "delta_yoy": 0.18, "segment_age": "45-65", "skew": "male" },
|
| 125 |
+
{ "query": "kids first dental visit", "delta_yoy": 0.27, "segment_age": "parents_25-40", "skew": "female" }
|
| 126 |
+
],
|
| 127 |
+
"regulatory_authorities": ["Dental Council of India (DCI)", "Indian Dental Association (IDA)"],
|
| 128 |
+
"professional_journals": ["JIDA", "Indian Journal of Dental Research", "Dental Tribune India"]
|
| 129 |
+
}
|
magicpin-ai-challenge/dataset/categories/gyms.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"slug": "gyms",
|
| 3 |
+
"display_name": "Gyms & Fitness",
|
| 4 |
+
"voice": {
|
| 5 |
+
"tone": "energetic_disciplined",
|
| 6 |
+
"register": "coach_to_member",
|
| 7 |
+
"code_mix": "english_primary_some_hindi",
|
| 8 |
+
"vocab_allowed": [
|
| 9 |
+
"footfall", "membership churn", "PT sessions", "PR (personal record)",
|
| 10 |
+
"1RM", "EMOM", "AMRAP", "split", "cut", "bulk", "BMR", "VO2max",
|
| 11 |
+
"functional", "HIIT", "CrossFit", "yoga", "pilates"
|
| 12 |
+
],
|
| 13 |
+
"vocab_taboo": [
|
| 14 |
+
"guaranteed weight loss", "shred in 7 days", "miracle transformation",
|
| 15 |
+
"fastest results"
|
| 16 |
+
],
|
| 17 |
+
"salutation_examples": ["Hi {first_name}", "{gym_name} team", "Coach"],
|
| 18 |
+
"tone_examples": [
|
| 19 |
+
"Quick check β your weekday 7-9pm slot has been at 90%+ capacity all month",
|
| 20 |
+
"Footfall pattern: April drop-off is normal; bookings recover by 2nd week May"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
"offer_catalog": [
|
| 24 |
+
{ "id": "gym_001", "title": "3 FREE Trial Classes", "value": "0", "audience": "new_user", "type": "free_trial" },
|
| 25 |
+
{ "id": "gym_002", "title": "First Month @ βΉ499", "value": "499", "audience": "new_user", "type": "service_at_price" },
|
| 26 |
+
{ "id": "gym_003", "title": "Personal Training Demo @ βΉ199", "value": "199", "audience": "new_user", "type": "service_at_price" },
|
| 27 |
+
{ "id": "gym_004", "title": "Annual Membership @ βΉ14,999 (save βΉ6,000)", "value": "14999", "audience": "new_user", "type": "service_at_price" },
|
| 28 |
+
{ "id": "gym_005", "title": "Couple/Family Plan @ βΉ999/month", "value": "999", "audience": "new_user", "type": "service_at_price" },
|
| 29 |
+
{ "id": "gym_006", "title": "Free Body Composition Analysis", "value": "0", "audience": "all", "type": "free_service" },
|
| 30 |
+
{ "id": "gym_007", "title": "Refer-a-friend: 1 month free for both", "value": "free_addon", "audience": "repeat_user", "type": "free_addon" },
|
| 31 |
+
{ "id": "gym_008", "title": "Yoga + Strength Combo @ βΉ1,499/month", "value": "1499", "audience": "new_user", "type": "service_at_price" }
|
| 32 |
+
],
|
| 33 |
+
"peer_stats": {
|
| 34 |
+
"scope": "metro_neighbourhood_gyms_2026",
|
| 35 |
+
"avg_rating": 4.5,
|
| 36 |
+
"avg_review_count": 56,
|
| 37 |
+
"avg_views_30d": 1100,
|
| 38 |
+
"avg_calls_30d": 18,
|
| 39 |
+
"avg_directions_30d": 42,
|
| 40 |
+
"avg_ctr": 0.045,
|
| 41 |
+
"avg_photos": 16,
|
| 42 |
+
"avg_post_freq_days": 12,
|
| 43 |
+
"monthly_churn_pct": 0.08,
|
| 44 |
+
"trial_to_paid_pct": 0.32
|
| 45 |
+
},
|
| 46 |
+
"digest": [
|
| 47 |
+
{
|
| 48 |
+
"id": "d_2026W17_resolution_window",
|
| 49 |
+
"kind": "seasonal",
|
| 50 |
+
"title": "Post-Jan resolution window closing β last 2 weeks of high trial-walk-ins",
|
| 51 |
+
"source": "magicpin gym data, Apr 2026",
|
| 52 |
+
"summary": "Gym trial walk-ins spike Jan 1-15, taper through Mar; April-June hits the lowest acquisition window of the year. Most gyms over-spend on ads now; underspend in October pre-holiday window.",
|
| 53 |
+
"actionable": "Pause acquisition spend in May; double down Sept-Oct"
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"id": "d_2026W17_pt_demand",
|
| 57 |
+
"kind": "trend",
|
| 58 |
+
"title": "Personal Training inquiries +38% YoY in 30-50 corporate cohort",
|
| 59 |
+
"source": "Multi-gym aggregate Apr 2026",
|
| 60 |
+
"summary": "Driven by health-check-up findings (rising HbA1c, BP, cholesterol). Demand is for 2x/week PT, not full-program. βΉ3,000-5,000/month sweet spot.",
|
| 61 |
+
"actionable": "Package a '2x/week PT @ βΉ3,499' option separate from full membership"
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"id": "d_2026W17_yoga_studio_competition",
|
| 65 |
+
"kind": "compete",
|
| 66 |
+
"title": "Boutique yoga/pilates studios opening fast in metro neighbourhoods",
|
| 67 |
+
"source": "Industry watch Apr 2026",
|
| 68 |
+
"summary": "Reformer pilates studios at βΉ6,000-8,000/month membership are pulling 30-45 women from traditional gyms. Don't compete on price; compete on coach quality + community.",
|
| 69 |
+
"actionable": "Add one yoga or pilates class to your weekly schedule if you don't have one"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"id": "d_2026W17_creatine_safety_bulletin",
|
| 73 |
+
"kind": "research",
|
| 74 |
+
"title": "ICMR creatine supplementation safety bulletin β adolescent guidance",
|
| 75 |
+
"source": "ICMR, Apr 2026",
|
| 76 |
+
"summary": "Creatine monohydrate at 3-5g/day deemed safe for healthy adults; cautioned against under-18 use without medical supervision. Reinforces what most coaches already practice.",
|
| 77 |
+
"actionable": "Update your store / counter recommendations; flag under-18 customers"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": "d_2026W17_class_schedule_optimization",
|
| 81 |
+
"kind": "tech",
|
| 82 |
+
"title": "Schedule density study β peak slots underutilized in mornings",
|
| 83 |
+
"source": "magicpin internal Apr 2026",
|
| 84 |
+
"summary": "Across 200+ partner gyms, 6-8am weekday slots run at 60% capacity vs 90%+ for 6-9pm. Adding a 7am class typically doesn't cannibalize evenings.",
|
| 85 |
+
"actionable": "Consider one new 7am class β yoga or HIIT pulls best in that slot"
|
| 86 |
+
}
|
| 87 |
+
],
|
| 88 |
+
"patient_content_library": [
|
| 89 |
+
{
|
| 90 |
+
"id": "pc_first_30_days",
|
| 91 |
+
"title": "First 30 days at the gym β what actually changes",
|
| 92 |
+
"channel": "whatsapp",
|
| 93 |
+
"length_seconds": 60,
|
| 94 |
+
"body": "Week 1-2: nervous-system adaptation, lifts feel easier (not muscle gain β coordination). Week 3-4: small visible changes, sleep improves. Real strength/aesthetic changes start at week 8-12. Most people quit at week 3 expecting visible change. Trust the process."
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"id": "pc_pt_or_solo",
|
| 98 |
+
"title": "PT or solo? A 5-question test",
|
| 99 |
+
"channel": "whatsapp",
|
| 100 |
+
"length_seconds": 45,
|
| 101 |
+
"body": "Solo if: you've trained before, you can read program structure, you don't get injured easily. PT if: you're new, recovering from injury, training for a specific event, or have inconsistent form. Hybrid (PT 1-2x/month + solo): the smartest middle path for most members."
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"id": "pc_protein_basics",
|
| 105 |
+
"title": "How much protein do you actually need?",
|
| 106 |
+
"channel": "whatsapp",
|
| 107 |
+
"length_seconds": 50,
|
| 108 |
+
"body": "Sedentary adults: 0.8g/kg body weight. Active adults: 1.2-1.6g/kg. Strength training: 1.6-2.2g/kg. For a 70kg active person, that's 84-112g/day β roughly 4 fistful-portions of protein. Indian-veg diets often fall short; supplement with paneer, dal, tofu before adding powders."
|
| 109 |
+
}
|
| 110 |
+
],
|
| 111 |
+
"seasonal_beats": [
|
| 112 |
+
{ "month_range": "Jan", "note": "resolution surge β trial walk-ins 4x baseline; convert window" },
|
| 113 |
+
{ "month_range": "Apr-Jun", "note": "lowest acquisition window β focus on retention, not acquisition" },
|
| 114 |
+
{ "month_range": "Aug-Oct", "note": "wedding-prep + festival window β repeat clients return to shape up" },
|
| 115 |
+
{ "month_range": "Nov-Dec", "note": "holiday slowdown β class density drops 25%; right time to renovate or pilot new programs" }
|
| 116 |
+
],
|
| 117 |
+
"trend_signals": [
|
| 118 |
+
{ "query": "gym near me", "delta_yoy": 0.05, "segment_age": "all", "skew": "balanced" },
|
| 119 |
+
{ "query": "personal trainer cost", "delta_yoy": 0.38, "segment_age": "30-50", "skew": "balanced" },
|
| 120 |
+
{ "query": "yoga classes near me", "delta_yoy": 0.42, "segment_age": "25-55", "skew": "female" },
|
| 121 |
+
{ "query": "weight loss program", "delta_yoy": 0.28, "segment_age": "30-50", "skew": "female" }
|
| 122 |
+
],
|
| 123 |
+
"regulatory_authorities": ["FSSAI (for supplement sales)", "Local Municipal Corp (premises)"],
|
| 124 |
+
"professional_journals": ["Indian Journal of Sports Medicine"]
|
| 125 |
+
}
|
magicpin-ai-challenge/dataset/categories/pharmacies.json
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"slug": "pharmacies",
|
| 3 |
+
"display_name": "Pharmacies & Medical Stores",
|
| 4 |
+
"voice": {
|
| 5 |
+
"tone": "trustworthy_precise",
|
| 6 |
+
"register": "neighbourhood_pharmacist",
|
| 7 |
+
"code_mix": "hindi_english_natural",
|
| 8 |
+
"vocab_allowed": [
|
| 9 |
+
"OTC", "schedule H", "schedule X", "generic", "branded", "molecule",
|
| 10 |
+
"MRP", "expiry", "batch", "PCR retail", "pharmacist counsel"
|
| 11 |
+
],
|
| 12 |
+
"vocab_taboo": [
|
| 13 |
+
"miracle cure", "guaranteed result", "100% safe", "doctor recommended (without disclosure)",
|
| 14 |
+
"best price (without supporting data)"
|
| 15 |
+
],
|
| 16 |
+
"salutation_examples": ["Hi {pharmacist_name}", "{pharmacy_name} team"],
|
| 17 |
+
"tone_examples": [
|
| 18 |
+
"Quick check β your repeat-prescription customer count is up 18% this month",
|
| 19 |
+
"Heads up: a generic alternative for {molecule} just got approved β likely 30% lower MRP"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
"offer_catalog": [
|
| 23 |
+
{ "id": "phr_001", "title": "Flat 20% OFF on medicines", "value": "20%", "audience": "new_user", "type": "percentage_discount" },
|
| 24 |
+
{ "id": "phr_002", "title": "Free Home Delivery > βΉ499", "value": "free_delivery", "audience": "new_user", "type": "free_addon" },
|
| 25 |
+
{ "id": "phr_003", "title": "Annual Health Card @ βΉ399 (15% off all year)", "value": "399", "audience": "repeat_user", "type": "membership" },
|
| 26 |
+
{ "id": "phr_004", "title": "Free BP & Sugar Check", "value": "0", "audience": "all", "type": "free_service" },
|
| 27 |
+
{ "id": "phr_005", "title": "Senior Citizen 15% OFF (60+ age)", "value": "15%", "audience": "senior", "type": "percentage_discount" },
|
| 28 |
+
{ "id": "phr_006", "title": "Diabetic Care Combo: Glucometer + 50 strips @ βΉ999", "value": "999", "audience": "new_user", "type": "service_at_price" },
|
| 29 |
+
{ "id": "phr_007", "title": "Free Pharmacist Consultation (10 min)", "value": "0", "audience": "all", "type": "free_service" },
|
| 30 |
+
{ "id": "phr_008", "title": "Subscription refill reminder + delivery (chronic Rx)", "value": "0", "audience": "repeat_user", "type": "free_service" }
|
| 31 |
+
],
|
| 32 |
+
"peer_stats": {
|
| 33 |
+
"scope": "metro_neighbourhood_pharmacies_2026",
|
| 34 |
+
"avg_rating": 4.6,
|
| 35 |
+
"avg_review_count": 42,
|
| 36 |
+
"avg_views_30d": 1400,
|
| 37 |
+
"avg_calls_30d": 22,
|
| 38 |
+
"avg_directions_30d": 58,
|
| 39 |
+
"avg_ctr": 0.038,
|
| 40 |
+
"avg_photos": 6,
|
| 41 |
+
"avg_post_freq_days": 21,
|
| 42 |
+
"delivery_share_pct": 0.35,
|
| 43 |
+
"repeat_customer_pct": 0.62
|
| 44 |
+
},
|
| 45 |
+
"digest": [
|
| 46 |
+
{
|
| 47 |
+
"id": "d_2026W17_generic_metformin",
|
| 48 |
+
"kind": "supply",
|
| 49 |
+
"title": "Generic metformin SR price drop after 4 new approvals",
|
| 50 |
+
"source": "DGCI release Apr 2026",
|
| 51 |
+
"summary": "Three major Indian players got SR-formulation approval; wholesale price down 22% effective next month. Branded retail likely flat for now (margin will absorb).",
|
| 52 |
+
"actionable": "Audit your shelf β switching diabetic refills to generic now saves the patient ~βΉ120/month and improves your repeat-buy stickiness"
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"id": "d_2026W17_schedule_h1_compliance",
|
| 56 |
+
"kind": "compliance",
|
| 57 |
+
"title": "FDA enforcement audit on Schedule H1 antibiotic dispensing β Q2",
|
| 58 |
+
"source": "FDA India inspector circular Apr 2026",
|
| 59 |
+
"summary": "Tighter Q2 audit cycle on H1 antibiotic dispensing β proper Rx capture, register entries, batch tracking. βΉ50,000+ penalties for missing entries.",
|
| 60 |
+
"actionable": "Audit your H1 register; ensure pharmacist signature + Rx photocopy on file for every dispense in last 90 days"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "d_2026W17_summer_demand",
|
| 64 |
+
"kind": "seasonal",
|
| 65 |
+
"title": "Summer demand shift: ORS, sunscreen, anti-fungal up 40%; cold/cough down 60%",
|
| 66 |
+
"source": "Multi-pharmacy aggregate Apr 2026",
|
| 67 |
+
"summary": "Standard April-Jun pattern: ORS sachets, sunscreen, anti-fungal creams, deodorant peak; respiratory medication tapers off. Restock + shelf-rearrange.",
|
| 68 |
+
"actionable": "Move ORS + sunscreen to counter visibility; cold/cough to back shelf"
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"id": "d_2026W17_chronic_subscription",
|
| 72 |
+
"kind": "tech",
|
| 73 |
+
"title": "Chronic-Rx subscription retention 3.2x higher than walk-in",
|
| 74 |
+
"source": "magicpin pharmacy data Apr 2026",
|
| 75 |
+
"summary": "Pharmacies with WhatsApp-based refill reminder + auto-delivery for chronic patients see 88% 12-month retention vs 27% for walk-in-only chronic customers.",
|
| 76 |
+
"actionable": "Set up the WhatsApp reminder β covers diabetic, hypertensive, thyroid Rx"
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"id": "d_2026W17_atorvastatin_recall",
|
| 80 |
+
"kind": "alert",
|
| 81 |
+
"title": "Voluntary recall: Specific atorvastatin batches by manufacturer X",
|
| 82 |
+
"source": "CDSCO alert Apr 2026",
|
| 83 |
+
"summary": "Two batches (numbers in alert) flagged for sub-potency. Customers should be informed; replacement available via distributor return chain. No safety risk for patients beyond suboptimal LDL control.",
|
| 84 |
+
"actionable": "Pull the batches; WhatsApp affected customers from your repeat-Rx list"
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
"patient_content_library": [
|
| 88 |
+
{
|
| 89 |
+
"id": "pc_generic_branded",
|
| 90 |
+
"title": "Generic vs branded medicines β what's actually different?",
|
| 91 |
+
"channel": "whatsapp",
|
| 92 |
+
"length_seconds": 60,
|
| 93 |
+
"body": "The active molecule is identical and regulator-tested. Difference is in the binders, fillers, and brand markup. Generic is typically 40-60% cheaper. Two cases where brand may help: (1) very narrow-therapeutic-index drugs (some thyroid, blood thinners) where bioequivalence varies, (2) extended-release formulations. Otherwise, generic is the rational choice."
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"id": "pc_storage",
|
| 97 |
+
"title": "Where to NOT store your medicines",
|
| 98 |
+
"channel": "whatsapp",
|
| 99 |
+
"length_seconds": 45,
|
| 100 |
+
"body": "Three places: (1) bathroom β humidity wrecks tablet integrity. (2) car glove box β temperature swings degrade most molecules in a month. (3) refrigerator door β temperature varies too much; use the main shelf if cold storage is needed. The kitchen at room temperature, away from sunlight, is best."
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"id": "pc_summer_basics",
|
| 104 |
+
"title": "Summer first-aid: what every Indian household should have",
|
| 105 |
+
"channel": "whatsapp",
|
| 106 |
+
"length_seconds": 50,
|
| 107 |
+
"body": "Five items: ORS sachets (heat-stroke is sudden), broad-spectrum sunscreen SPF 50+ (apply 30 min before sun), anti-fungal cream (sweat triggers infections), Vaseline (chafing relief), and a thermometer. Skip energy drinks marketed as 'summer drinks' β sugar load is counterproductive when dehydrated."
|
| 108 |
+
}
|
| 109 |
+
],
|
| 110 |
+
"seasonal_beats": [
|
| 111 |
+
{ "month_range": "Apr-Jun", "note": "summer surge β ORS, sunscreen, anti-fungal, deodorant" },
|
| 112 |
+
{ "month_range": "Jul-Aug", "note": "monsoon β anti-bacterial, anti-fungal, immunity supplements peak" },
|
| 113 |
+
{ "month_range": "Oct-Nov", "note": "festival sweets β blood sugar spike β diabetic monitoring needs surge" },
|
| 114 |
+
{ "month_range": "Dec-Jan", "note": "respiratory peak β cough/cold/anti-allergic 2x baseline" }
|
| 115 |
+
],
|
| 116 |
+
"trend_signals": [
|
| 117 |
+
{ "query": "medicine home delivery", "delta_yoy": 0.42, "segment_age": "all", "skew": "balanced" },
|
| 118 |
+
{ "query": "generic medicine", "delta_yoy": 0.34, "segment_age": "30-65", "skew": "balanced" },
|
| 119 |
+
{ "query": "diabetes care kit", "delta_yoy": 0.28, "segment_age": "40-65", "skew": "balanced" },
|
| 120 |
+
{ "query": "blood pressure monitor", "delta_yoy": 0.18, "segment_age": "45-70", "skew": "balanced" }
|
| 121 |
+
],
|
| 122 |
+
"regulatory_authorities": ["CDSCO", "State FDA", "Drug Controller General of India (DCGI)"],
|
| 123 |
+
"professional_journals": ["Pharmabiz", "ChemistryView India", "IPA Bulletin"]
|
| 124 |
+
}
|
magicpin-ai-challenge/dataset/categories/restaurants.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"slug": "restaurants",
|
| 3 |
+
"display_name": "Restaurants & Cafes",
|
| 4 |
+
"voice": {
|
| 5 |
+
"tone": "warm_busy_practical",
|
| 6 |
+
"register": "fellow_operator",
|
| 7 |
+
"code_mix": "hindi_english_natural",
|
| 8 |
+
"vocab_allowed": [
|
| 9 |
+
"footfall", "covers", "AOV", "RPC", "table turnover", "reservations",
|
| 10 |
+
"GRO", "weekend brunch", "happy hour", "thali", "biryani", "tandoor"
|
| 11 |
+
],
|
| 12 |
+
"vocab_taboo": [
|
| 13 |
+
"best food in city", "guaranteed packed house", "miracle marketing",
|
| 14 |
+
"viral guarantee"
|
| 15 |
+
],
|
| 16 |
+
"salutation_examples": ["Hi {chef_or_owner_first_name}", "{restaurant_name} team"],
|
| 17 |
+
"tone_examples": [
|
| 18 |
+
"Quick one β IPL match nights have been 1.5x your weekday avg this season",
|
| 19 |
+
"Spotted: 'biryani delivery' searches in your sublocality up 28% this week"
|
| 20 |
+
]
|
| 21 |
+
},
|
| 22 |
+
"offer_catalog": [
|
| 23 |
+
{ "id": "res_001", "title": "Flat 30% OFF on total bill (limit βΉ500)", "value": "30%", "audience": "new_user", "type": "percentage_discount" },
|
| 24 |
+
{ "id": "res_002", "title": "Buy 1 Pizza Get 1 Free (Tue-Thu)", "value": "BOGO", "audience": "new_user", "type": "bogo" },
|
| 25 |
+
{ "id": "res_003", "title": "Weekday Lunch Thali @ βΉ149", "value": "149", "audience": "new_user", "type": "service_at_price" },
|
| 26 |
+
{ "id": "res_004", "title": "Free Starter on orders > βΉ1,200", "value": "free_addon", "audience": "new_user", "type": "free_addon" },
|
| 27 |
+
{ "id": "res_005", "title": "Match-night Combo @ βΉ399 (food + drink)", "value": "399", "audience": "new_user", "type": "service_at_price" },
|
| 28 |
+
{ "id": "res_006", "title": "Family Sunday Brunch @ βΉ699/pax", "value": "699", "audience": "new_user", "type": "service_at_price" },
|
| 29 |
+
{ "id": "res_007", "title": "Free Delivery > βΉ500", "value": "free_delivery", "audience": "new_user", "type": "free_addon" },
|
| 30 |
+
{ "id": "res_008", "title": "Birthday: Free Cake on parties of 6+", "value": "free_addon", "audience": "all", "type": "free_addon" }
|
| 31 |
+
],
|
| 32 |
+
"peer_stats": {
|
| 33 |
+
"scope": "metro_casual_dining_2026",
|
| 34 |
+
"avg_rating": 4.2,
|
| 35 |
+
"avg_review_count": 142,
|
| 36 |
+
"avg_views_30d": 4800,
|
| 37 |
+
"avg_calls_30d": 38,
|
| 38 |
+
"avg_directions_30d": 95,
|
| 39 |
+
"avg_ctr": 0.025,
|
| 40 |
+
"avg_photos": 22,
|
| 41 |
+
"avg_post_freq_days": 7,
|
| 42 |
+
"retention_30d_pct": 0.18
|
| 43 |
+
},
|
| 44 |
+
"digest": [
|
| 45 |
+
{
|
| 46 |
+
"id": "d_2026W17_ipl_window",
|
| 47 |
+
"kind": "seasonal",
|
| 48 |
+
"title": "IPL home-match Saturdays underperformed weeknight matches across metros",
|
| 49 |
+
"source": "magicpin order data, Apr 2026",
|
| 50 |
+
"summary": "Saturday IPL matches shift orders to home-watch parties; restaurant covers down 12% vs Saturday average. Weeknight matches drive +18% covers. Adjust promo days accordingly.",
|
| 51 |
+
"actionable": "Push match-night combos on Tue/Wed/Thu match dates only"
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"id": "d_2026W17_packaged_food_gst",
|
| 55 |
+
"kind": "compliance",
|
| 56 |
+
"title": "GST council clarifies 5% rate for restaurant takeaway packaging from 2026-06-01",
|
| 57 |
+
"source": "GST Council circular 224/2026",
|
| 58 |
+
"summary": "Single-use plastic surcharge added to packaging cost for orders >βΉ500. Effective June 1. Cloud kitchens hit hardest.",
|
| 59 |
+
"actionable": "Audit your packaging cost; consider Eco-pack alternative if volume justifies"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"id": "d_2026W17_zomato_visibility",
|
| 63 |
+
"kind": "trend",
|
| 64 |
+
"title": "Zomato 'verified' badge correlates with +24% impressions in Tier-1 cities",
|
| 65 |
+
"source": "Zomato partner update, Apr 2026",
|
| 66 |
+
"summary": "Verification requires uploaded GST cert + 6 months continuous operation. Most eligible mid-size restaurants haven't applied; 5-day approval.",
|
| 67 |
+
"actionable": "Apply via partner dashboard if you've crossed 6 months"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"id": "d_2026W17_swiggy_iCare",
|
| 71 |
+
"kind": "tech",
|
| 72 |
+
"title": "Swiggy iCare: AI complaint summarizer launching Apr 2026",
|
| 73 |
+
"source": "Swiggy partner blog 2026-04-12",
|
| 74 |
+
"summary": "Auto-summarises customer complaints into themes (cold food / late / wrong order). Free for Pro merchants. Reveals pattern most operators miss.",
|
| 75 |
+
"actionable": "Enable in dashboard; review weekly themes Sunday evening"
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"id": "d_2026W17_dish_trend",
|
| 79 |
+
"kind": "trend",
|
| 80 |
+
"title": "'Sugar-free dessert' searches +52% YoY across Indian metros",
|
| 81 |
+
"source": "Google Trends Apr 2026",
|
| 82 |
+
"summary": "Concentrated in 28-45 age band; correlated with rising diabetic awareness. Two new options on dessert menus seeing strong attach rate: keto cheesecake, almond-flour brownie.",
|
| 83 |
+
"actionable": "Add one sugar-free dessert; mark prominently on menu and GBP"
|
| 84 |
+
}
|
| 85 |
+
],
|
| 86 |
+
"patient_content_library": [
|
| 87 |
+
{
|
| 88 |
+
"id": "pc_ordering_smart",
|
| 89 |
+
"title": "Three things to look for before ordering food online",
|
| 90 |
+
"channel": "whatsapp",
|
| 91 |
+
"length_seconds": 60,
|
| 92 |
+
"body": "(1) Look at the latest review's date β if last review is 30+ days old, the kitchen may be off. (2) Filter for 4β
+ reviews mentioning your specific dish. (3) Order at peak hours β fresh prep, not warmed-up morning batch. These three filter out 80% of bad-experience risk."
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"id": "pc_thali_economics",
|
| 96 |
+
"title": "Why a βΉ149 thali is the best lunch deal in town",
|
| 97 |
+
"channel": "whatsapp",
|
| 98 |
+
"length_seconds": 45,
|
| 99 |
+
"body": "A weekday thali for βΉ149 covers your daily protein, two veggies, dal, rice, roti, and pickle/sweet. Buying these separately at any market in the neighbourhood: βΉ240+. The only way restaurants make this work is high lunch volume β which means freshly-cooked, not standing in a chafer."
|
| 100 |
+
}
|
| 101 |
+
],
|
| 102 |
+
"seasonal_beats": [
|
| 103 |
+
{ "month_range": "Mar-Apr", "note": "IPL season β match-night promos on Tue/Wed/Thu; not weekends" },
|
| 104 |
+
{ "month_range": "Oct-Nov", "note": "Diwali corporate gifting + family-feast bookings" },
|
| 105 |
+
{ "month_range": "Dec", "note": "Christmas + New Year β set menu sales 3x baseline" },
|
| 106 |
+
{ "month_range": "Jul-Aug", "note": "monsoon delivery surge; rain-day discount window" },
|
| 107 |
+
{ "month_range": "Feb 14", "note": "Valentine's prix-fixe β book starting 2 weeks prior" }
|
| 108 |
+
],
|
| 109 |
+
"trend_signals": [
|
| 110 |
+
{ "query": "biryani near me", "delta_yoy": 0.18, "segment_age": "all", "skew": "balanced" },
|
| 111 |
+
{ "query": "weekday lunch thali", "delta_yoy": 0.34, "segment_age": "office_25-45", "skew": "balanced" },
|
| 112 |
+
{ "query": "sugar free dessert", "delta_yoy": 0.52, "segment_age": "28-45", "skew": "balanced" },
|
| 113 |
+
{ "query": "match night offer", "delta_yoy": 0.65, "segment_age": "20-40", "skew": "male" },
|
| 114 |
+
{ "query": "small party catering", "delta_yoy": 0.22, "segment_age": "30-50", "skew": "female" }
|
| 115 |
+
],
|
| 116 |
+
"regulatory_authorities": ["FSSAI", "Local Health Department", "GST Council"],
|
| 117 |
+
"professional_journals": ["FoodService India", "Hospitality Bizz"]
|
| 118 |
+
}
|
magicpin-ai-challenge/dataset/categories/salons.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"slug": "salons",
|
| 3 |
+
"display_name": "Salons & Beauty",
|
| 4 |
+
"voice": {
|
| 5 |
+
"tone": "warm_practical",
|
| 6 |
+
"register": "approachable_expert",
|
| 7 |
+
"code_mix": "hindi_english_natural",
|
| 8 |
+
"vocab_allowed": [
|
| 9 |
+
"balayage", "highlights", "keratin", "smoothening", "hair spa",
|
| 10 |
+
"manicure", "pedicure", "facial", "threading", "waxing", "extensions",
|
| 11 |
+
"olaplex", "wella", "loreal", "schwarzkopf", "redken"
|
| 12 |
+
],
|
| 13 |
+
"vocab_taboo": [
|
| 14 |
+
"guaranteed glow", "permanent results", "instant transformation", "miracle",
|
| 15 |
+
"best in city"
|
| 16 |
+
],
|
| 17 |
+
"salutation_examples": ["Hi {first_name}", "{salon_name} team"],
|
| 18 |
+
"tone_examples": [
|
| 19 |
+
"Bridal season is starting β bookings usually 2x normal in next 4 weeks",
|
| 20 |
+
"Quick one β your Saturday 5-7pm slot has been the strongest this month"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
"offer_catalog": [
|
| 24 |
+
{ "id": "sal_001", "title": "Haircut @ βΉ99", "value": "99", "audience": "new_user", "type": "service_at_price" },
|
| 25 |
+
{ "id": "sal_002", "title": "FREE head massage with Haircut", "value": "0", "audience": "new_user", "type": "free_addon" },
|
| 26 |
+
{ "id": "sal_003", "title": "Hair Spa @ βΉ499", "value": "499", "audience": "new_user", "type": "service_at_price" },
|
| 27 |
+
{ "id": "sal_004", "title": "Threading + Waxing combo @ βΉ299", "value": "299", "audience": "new_user", "type": "service_at_price" },
|
| 28 |
+
{ "id": "sal_005", "title": "Bridal Trial @ βΉ999", "value": "999", "audience": "new_user", "type": "service_at_price" },
|
| 29 |
+
{ "id": "sal_006", "title": "Keratin Treatment @ βΉ2,499", "value": "2499", "audience": "new_user", "type": "service_at_price" },
|
| 30 |
+
{ "id": "sal_007", "title": "Mani+Pedi Combo @ βΉ599", "value": "599", "audience": "new_user", "type": "service_at_price" },
|
| 31 |
+
{ "id": "sal_008", "title": "Annual Membership: 12 services @ βΉ4,999", "value": "4999", "audience": "repeat_user", "type": "membership" }
|
| 32 |
+
],
|
| 33 |
+
"peer_stats": {
|
| 34 |
+
"scope": "metro_unisex_salons_2026",
|
| 35 |
+
"avg_rating": 4.5,
|
| 36 |
+
"avg_review_count": 88,
|
| 37 |
+
"avg_views_30d": 2400,
|
| 38 |
+
"avg_calls_30d": 28,
|
| 39 |
+
"avg_directions_30d": 62,
|
| 40 |
+
"avg_ctr": 0.040,
|
| 41 |
+
"avg_photos": 14,
|
| 42 |
+
"avg_post_freq_days": 10,
|
| 43 |
+
"retention_3mo_pct": 0.55
|
| 44 |
+
},
|
| 45 |
+
"digest": [
|
| 46 |
+
{
|
| 47 |
+
"id": "d_2026W17_olaplex_no9",
|
| 48 |
+
"kind": "tech",
|
| 49 |
+
"title": "Olaplex No.9 launches in India β bond protector for chemically-treated hair",
|
| 50 |
+
"source": "Hair Brand News India, Apr 2026",
|
| 51 |
+
"summary": "Pre-shampoo bond protector targeting damage from frequent colour and smoothening. Distributors confirmed in Delhi/Mumbai/Bangalore. Wholesale βΉ3,200/bottle, retail ~βΉ4,500.",
|
| 52 |
+
"actionable": "Worth piloting if your colour client mix is >25%"
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"id": "d_2026W17_keratin_alt",
|
| 56 |
+
"kind": "trend",
|
| 57 |
+
"title": "Formaldehyde-free smoothening alternatives gaining share β citric-acid based",
|
| 58 |
+
"source": "Salon India magazine, Apr 2026",
|
| 59 |
+
"summary": "Brazilian citric-acid keratin alternatives now widely available. Lower fume, comparable hold for ~6-8 weeks. Pricing roughly +15% over standard keratin.",
|
| 60 |
+
"actionable": "Position as 'safe smoothening' for pregnancy / asthma clients"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "d_2026W17_bridal_season_start",
|
| 64 |
+
"kind": "seasonal",
|
| 65 |
+
"title": "Wedding season opener β first lean April-May window before main Oct-Dec rush",
|
| 66 |
+
"source": "Wedding industry intel",
|
| 67 |
+
"summary": "Mini-bridal window April-May (~15% of annual bridal volume) often missed by salons focused on Oct-Dec. Pre-wedding skincare + mehendi-prep services peak now.",
|
| 68 |
+
"actionable": "Run a 'Bridal Trial @ βΉ999' offer; book 2-month skincare package"
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"id": "d_2026W17_olaplex_training",
|
| 72 |
+
"kind": "cde",
|
| 73 |
+
"title": "L'Oreal Professionnel India training: Advanced Balayage Masterclass",
|
| 74 |
+
"source": "L'Oreal Pro India calendar",
|
| 75 |
+
"date": "2026-05-08",
|
| 76 |
+
"summary": "2-day in-person training in Mumbai. βΉ15,000 fee. Targeted at stylists with 2+ years of colour experience.",
|
| 77 |
+
"actionable": "Worth subsidising for one stylist if balayage demand is rising in your locality"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": "d_2026W17_walkin_priority",
|
| 81 |
+
"kind": "trend",
|
| 82 |
+
"title": "'Walk-in available' tag on GBP boosting calls 23% in metros",
|
| 83 |
+
"source": "magicpin internal, Apr 2026",
|
| 84 |
+
"summary": "Salons that explicitly add 'walk-in available' to their GBP description see 23% higher call volume in the same locality vs salons without. Effect strongest weekday afternoons.",
|
| 85 |
+
"actionable": "Add the tag to your GBP description this week"
|
| 86 |
+
}
|
| 87 |
+
],
|
| 88 |
+
"patient_content_library": [
|
| 89 |
+
{
|
| 90 |
+
"id": "pc_keratin_safe",
|
| 91 |
+
"title": "Is keratin safe? What to ask before booking",
|
| 92 |
+
"channel": "whatsapp",
|
| 93 |
+
"length_seconds": 60,
|
| 94 |
+
"body": "Old-school keratin uses formaldehyde β safe in trained hands but not for pregnancy or asthma. Citric-acid alternatives exist now with similar hold. Two questions to ask any salon: (1) what's your activator? (2) is the room ventilated? If you don't get a clear answer, look elsewhere."
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"id": "pc_balayage_vs_highlights",
|
| 98 |
+
"title": "Balayage vs highlights β which actually suits you?",
|
| 99 |
+
"channel": "whatsapp",
|
| 100 |
+
"length_seconds": 75,
|
| 101 |
+
"body": "Highlights are uniform stripes; balayage is hand-painted, growing out softly with no visible regrowth line. Highlights cost less but need touch-ups every 8 weeks. Balayage holds 4-5 months. If you don't visit the salon often, balayage is the practical choice."
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"id": "pc_post_color_care",
|
| 105 |
+
"title": "Post-colour care β first 72 hours matter most",
|
| 106 |
+
"channel": "whatsapp",
|
| 107 |
+
"length_seconds": 45,
|
| 108 |
+
"body": "Three rules: no shampoo for 48h (let cuticle close), use sulphate-free shampoo from day 3, and avoid hot water washes for the first month. Skip these and you'll lose 30% of the colour vibrance in the first wash."
|
| 109 |
+
}
|
| 110 |
+
],
|
| 111 |
+
"seasonal_beats": [
|
| 112 |
+
{ "month_range": "Oct-Dec", "note": "primary wedding/festival season β bridal package bookings 4x baseline" },
|
| 113 |
+
{ "month_range": "Apr-May", "note": "secondary bridal window + summer hair-care surge" },
|
| 114 |
+
{ "month_range": "Jul-Aug", "note": "monsoon haircare focus (anti-frizz, scalp treatments)" },
|
| 115 |
+
{ "month_range": "Mar", "note": "Holi colour-recovery surge β book hair spas the week after" }
|
| 116 |
+
],
|
| 117 |
+
"trend_signals": [
|
| 118 |
+
{ "query": "balayage near me", "delta_yoy": 0.45, "segment_age": "25-40", "skew": "female" },
|
| 119 |
+
{ "query": "keratin treatment price", "delta_yoy": 0.18, "segment_age": "25-45", "skew": "female" },
|
| 120 |
+
{ "query": "men's haircut delhi", "delta_yoy": 0.22, "segment_age": "20-35", "skew": "male" },
|
| 121 |
+
{ "query": "bridal makeup artist", "delta_yoy": 0.31, "segment_age": "22-32", "skew": "female" }
|
| 122 |
+
],
|
| 123 |
+
"regulatory_authorities": ["FDA India (cosmetic ingredient compliance)"],
|
| 124 |
+
"professional_journals": ["Salon India", "Pure Beauty India"]
|
| 125 |
+
}
|
magicpin-ai-challenge/dataset/customers_seed.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_README": "15 representative CustomerContexts. The generator expands these to 200 by varying demographic + relationship state per merchant.",
|
| 3 |
+
"customers": [
|
| 4 |
+
{
|
| 5 |
+
"customer_id": "c_001_priya_for_m001",
|
| 6 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 7 |
+
"identity": { "name": "Priya", "phone_redacted": "<phone>", "language_pref": "hi-en mix", "age_band": "25-35" },
|
| 8 |
+
"relationship": { "first_visit": "2025-11-04", "last_visit": "2026-05-12", "visits_total": 4, "services_received": ["cleaning", "cleaning", "whitening", "cleaning"], "lifetime_value": 1696 },
|
| 9 |
+
"state": "lapsed_soft",
|
| 10 |
+
"preferences": { "preferred_slots": "weekday_evening", "channel": "whatsapp", "reminder_opt_in": true },
|
| 11 |
+
"consent": { "opted_in_at": "2025-11-04", "scope": ["recall_reminders", "appointment_reminders"] }
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"customer_id": "c_002_rohit_for_m001",
|
| 15 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 16 |
+
"identity": { "name": "Rohit", "phone_redacted": "<phone>", "language_pref": "english", "age_band": "35-45" },
|
| 17 |
+
"relationship": { "first_visit": "2026-02-12", "last_visit": "2026-04-18", "visits_total": 2, "services_received": ["root_canal_consult", "root_canal_session_1"], "lifetime_value": 5500 },
|
| 18 |
+
"state": "active",
|
| 19 |
+
"preferences": { "preferred_slots": "saturday_morning", "channel": "whatsapp", "reminder_opt_in": true },
|
| 20 |
+
"consent": { "opted_in_at": "2026-02-12", "scope": ["recall_reminders", "appointment_reminders", "treatment_followup"] }
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"customer_id": "c_003_aanya_for_m001",
|
| 24 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 25 |
+
"identity": { "name": "Aanya (parent: Sneha)", "phone_redacted": "<phone>", "language_pref": "hi-en mix", "age_band": "child_under_12" },
|
| 26 |
+
"relationship": { "first_visit": "2026-01-08", "last_visit": "2026-01-08", "visits_total": 1, "services_received": ["pediatric_checkup"], "lifetime_value": 199 },
|
| 27 |
+
"state": "lapsed_hard",
|
| 28 |
+
"preferences": { "preferred_slots": "weekday_after_3pm", "channel": "whatsapp", "reminder_opt_in": true },
|
| 29 |
+
"consent": { "opted_in_at": "2026-01-08", "scope": ["recall_reminders"] }
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"customer_id": "c_004_sneha_for_m003",
|
| 33 |
+
"merchant_id": "m_003_studio11_salon_hyderabad",
|
| 34 |
+
"identity": { "name": "Sneha", "phone_redacted": "<phone>", "language_pref": "te-en mix", "age_band": "25-35" },
|
| 35 |
+
"relationship": { "first_visit": "2025-09-14", "last_visit": "2026-04-19", "visits_total": 11, "services_received": ["balayage", "hair_spa", "balayage_touchup", "manicure", "pedicure", "haircut", "haircut", "balayage", "hair_spa", "facial", "haircut"], "lifetime_value": 18450 },
|
| 36 |
+
"state": "active",
|
| 37 |
+
"preferences": { "preferred_slots": "saturday_afternoon", "channel": "whatsapp", "reminder_opt_in": true, "preferred_stylist": "Priya" },
|
| 38 |
+
"consent": { "opted_in_at": "2025-09-14", "scope": ["appointment_reminders", "promotional_offers", "stylist_specific"] }
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"customer_id": "c_005_kavya_for_m003",
|
| 42 |
+
"merchant_id": "m_003_studio11_salon_hyderabad",
|
| 43 |
+
"identity": { "name": "Kavya", "phone_redacted": "<phone>", "language_pref": "english", "age_band": "20-25" },
|
| 44 |
+
"relationship": { "first_visit": "2026-03-22", "last_visit": "2026-03-22", "visits_total": 1, "services_received": ["bridal_trial"], "lifetime_value": 999 },
|
| 45 |
+
"state": "new",
|
| 46 |
+
"preferences": { "preferred_slots": "saturday", "channel": "whatsapp", "reminder_opt_in": true, "wedding_date": "2026-11-08" },
|
| 47 |
+
"consent": { "opted_in_at": "2026-03-22", "scope": ["appointment_reminders", "bridal_package_followup"] }
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"customer_id": "c_006_amit_for_m005",
|
| 51 |
+
"merchant_id": "m_005_pizzajunction_restaurant_delhi",
|
| 52 |
+
"identity": { "name": "Amit", "phone_redacted": "<phone>", "language_pref": "hi-en mix", "age_band": "25-35" },
|
| 53 |
+
"relationship": { "first_visit": "2026-04-12", "last_visit": "2026-04-22", "visits_total": 5, "services_received": ["delivery_pizza", "delivery_combo", "dine_in", "delivery_pizza", "delivery_pizza"], "lifetime_value": 2380, "favourite_dish": "BBQ Chicken Pizza" },
|
| 54 |
+
"state": "active",
|
| 55 |
+
"preferences": { "preferred_slots": "fri_sat_night", "channel": "whatsapp", "reminder_opt_in": true },
|
| 56 |
+
"consent": { "opted_in_at": "2026-04-12", "scope": ["promotional_offers", "match_night_specials"] }
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"customer_id": "c_007_naveen_for_m006",
|
| 60 |
+
"merchant_id": "m_006_southindiancafe_restaurant_bangalore",
|
| 61 |
+
"identity": { "name": "Naveen", "phone_redacted": "<phone>", "language_pref": "kn-en mix", "age_band": "30-40" },
|
| 62 |
+
"relationship": { "first_visit": "2025-06-05", "last_visit": "2026-04-25", "visits_total": 38, "services_received": ["weekday_thali", "weekday_thali", "weekday_thali", "filter_coffee", "..."], "lifetime_value": 6480, "favourite_dish": "Mylari Dosa" },
|
| 63 |
+
"state": "active",
|
| 64 |
+
"preferences": { "preferred_slots": "weekday_lunch", "channel": "whatsapp", "reminder_opt_in": true, "office_nearby": true },
|
| 65 |
+
"consent": { "opted_in_at": "2025-06-05", "scope": ["promotional_offers", "lunch_thali_updates"] }
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"customer_id": "c_008_dipti_for_m006",
|
| 69 |
+
"merchant_id": "m_006_southindiancafe_restaurant_bangalore",
|
| 70 |
+
"identity": { "name": "Dipti", "phone_redacted": "<phone>", "language_pref": "english", "age_band": "30-40" },
|
| 71 |
+
"relationship": { "first_visit": "2025-12-18", "last_visit": "2026-01-05", "visits_total": 2, "services_received": ["family_brunch", "delivery_dosa_combo"], "lifetime_value": 1280 },
|
| 72 |
+
"state": "lapsed_soft",
|
| 73 |
+
"preferences": { "preferred_slots": "sunday_brunch", "channel": "whatsapp", "reminder_opt_in": true, "family_size": 4 },
|
| 74 |
+
"consent": { "opted_in_at": "2025-12-18", "scope": ["promotional_offers"] }
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"customer_id": "c_009_arjun_for_m007",
|
| 78 |
+
"merchant_id": "m_007_powerhouse_gym_bangalore",
|
| 79 |
+
"identity": { "name": "Arjun", "phone_redacted": "<phone>", "language_pref": "english", "age_band": "25-35" },
|
| 80 |
+
"relationship": { "first_visit": "2026-01-05", "last_visit": "2026-04-21", "visits_total": 78, "services_received": ["membership_jan", "membership_feb", "membership_mar", "membership_apr", "PT_session_x12"], "lifetime_value": 9990 },
|
| 81 |
+
"state": "active",
|
| 82 |
+
"preferences": { "preferred_slots": "weekday_7am", "channel": "whatsapp", "reminder_opt_in": true, "training_focus": "strength" },
|
| 83 |
+
"consent": { "opted_in_at": "2026-01-05", "scope": ["program_updates", "renewal_reminders"] }
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"customer_id": "c_010_rashmi_for_m007",
|
| 87 |
+
"merchant_id": "m_007_powerhouse_gym_bangalore",
|
| 88 |
+
"identity": { "name": "Rashmi", "phone_redacted": "<phone>", "language_pref": "english", "age_band": "30-40" },
|
| 89 |
+
"relationship": { "first_visit": "2025-09-10", "last_visit": "2026-02-28", "visits_total": 22, "services_received": ["membership_x4", "PT_intro"], "lifetime_value": 4490 },
|
| 90 |
+
"state": "lapsed_hard",
|
| 91 |
+
"preferences": { "preferred_slots": "weekday_evening", "channel": "whatsapp", "reminder_opt_in": true, "training_focus": "weight_loss" },
|
| 92 |
+
"consent": { "opted_in_at": "2025-09-10", "scope": ["renewal_reminders", "winback_offers"] }
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"customer_id": "c_011_sumitra_for_m008",
|
| 96 |
+
"merchant_id": "m_008_zenyoga_gym_chennai",
|
| 97 |
+
"identity": { "name": "Sumitra", "phone_redacted": "<phone>", "language_pref": "ta-en mix", "age_band": "45-55" },
|
| 98 |
+
"relationship": { "first_visit": "2025-04-12", "last_visit": "2026-04-26", "visits_total": 145, "services_received": ["yoga_intermediate_x12_months", "pilates_x4_months"], "lifetime_value": 28800 },
|
| 99 |
+
"state": "active",
|
| 100 |
+
"preferences": { "preferred_slots": "morning_6am", "channel": "whatsapp", "reminder_opt_in": true, "health_focus": "back_pain_management" },
|
| 101 |
+
"consent": { "opted_in_at": "2025-04-12", "scope": ["program_updates", "health_content", "renewal_reminders"] }
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"customer_id": "c_012_karthik_jr_for_m008",
|
| 105 |
+
"merchant_id": "m_008_zenyoga_gym_chennai",
|
| 106 |
+
"identity": { "name": "Karthik (parent: Sumitra)", "phone_redacted": "<phone>", "language_pref": "ta-en mix", "age_band": "child_7-12" },
|
| 107 |
+
"relationship": { "first_visit": "2026-04-22", "last_visit": "2026-04-22", "visits_total": 1, "services_received": ["kids_yoga_trial"], "lifetime_value": 0 },
|
| 108 |
+
"state": "new",
|
| 109 |
+
"preferences": { "preferred_slots": "saturday_morning", "channel": "whatsapp_via_parent", "reminder_opt_in": true },
|
| 110 |
+
"consent": { "opted_in_at": "2026-04-22", "scope": ["kids_program_updates"] }
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"customer_id": "c_013_grandfather_for_m009",
|
| 114 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur",
|
| 115 |
+
"identity": { "name": "Mr. Sharma", "phone_redacted": "<phone>", "language_pref": "hi", "age_band": "65-75", "senior_citizen": true },
|
| 116 |
+
"relationship": { "first_visit": "2024-08-10", "last_visit": "2026-04-22", "visits_total": 24, "services_received": ["chronic_rx_metformin", "chronic_rx_atorvastatin", "chronic_rx_telmisartan", "..."], "lifetime_value": 24600, "chronic_conditions": ["diabetes_t2", "hypertension", "dyslipidemia"] },
|
| 117 |
+
"state": "active",
|
| 118 |
+
"preferences": { "preferred_slots": "morning_delivery", "channel": "whatsapp_via_son", "reminder_opt_in": true, "delivery_address": "saved" },
|
| 119 |
+
"consent": { "opted_in_at": "2024-08-10", "scope": ["refill_reminders", "delivery_notifications", "recall_alerts"] }
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"customer_id": "c_014_priti_for_m009",
|
| 123 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur",
|
| 124 |
+
"identity": { "name": "Priti", "phone_redacted": "<phone>", "language_pref": "hi-en mix", "age_band": "30-40" },
|
| 125 |
+
"relationship": { "first_visit": "2026-03-10", "last_visit": "2026-04-12", "visits_total": 3, "services_received": ["walk_in_otc", "delivery_otc", "consult_with_pharmacist"], "lifetime_value": 580 },
|
| 126 |
+
"state": "active",
|
| 127 |
+
"preferences": { "preferred_slots": "evening", "channel": "whatsapp", "reminder_opt_in": true, "household_size": 4 },
|
| 128 |
+
"consent": { "opted_in_at": "2026-03-10", "scope": ["promotional_offers", "seasonal_health_content"] }
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"customer_id": "c_015_anonymous_for_m010",
|
| 132 |
+
"merchant_id": "m_010_sunrisepharm_pharmacy_lucknow",
|
| 133 |
+
"identity": { "name": "(walk-in, no profile)", "phone_redacted": null, "language_pref": "hi", "age_band": "unknown" },
|
| 134 |
+
"relationship": { "first_visit": "2026-04-15", "last_visit": "2026-04-15", "visits_total": 1, "services_received": ["walk_in_otc"], "lifetime_value": 240 },
|
| 135 |
+
"state": "new",
|
| 136 |
+
"preferences": { "channel": "none_recorded", "reminder_opt_in": false },
|
| 137 |
+
"consent": { "opted_in_at": null, "scope": [] }
|
| 138 |
+
}
|
| 139 |
+
]
|
| 140 |
+
}
|
magicpin-ai-challenge/dataset/generate_dataset.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Expand seed JSON files into the full challenge dataset.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
python generate_dataset.py --out ./expanded
|
| 7 |
+
|
| 8 |
+
Reads from:
|
| 9 |
+
categories/*.json β already-complete category contexts (5)
|
| 10 |
+
merchants_seed.json β 10 representative merchants (2 per category)
|
| 11 |
+
customers_seed.json β 15 representative customers
|
| 12 |
+
triggers_seed.json β 25 representative triggers
|
| 13 |
+
|
| 14 |
+
Writes to ./expanded/:
|
| 15 |
+
categories/{slug}.json (5 files, copied as-is)
|
| 16 |
+
merchants/m_NNN_*.json (50 files β seeds + 40 generated)
|
| 17 |
+
customers/c_NNN_*.json (200 files β seeds + 185 generated)
|
| 18 |
+
triggers/trg_NNN_*.json (100 files β seeds + 75 generated)
|
| 19 |
+
test_pairs.json (30 canonical (merchant, trigger) pairs all
|
| 20 |
+
candidates produce a message for)
|
| 21 |
+
|
| 22 |
+
Deterministic β fixed seed, same output for everyone.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import argparse
|
| 28 |
+
import json
|
| 29 |
+
import os
|
| 30 |
+
import random
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
|
| 33 |
+
SEED = 20260426 # fixed so every candidate gets the same expanded dataset
|
| 34 |
+
|
| 35 |
+
# Indian city + locality pool for variation
|
| 36 |
+
LOCALITIES = {
|
| 37 |
+
"Delhi": ["Lajpat Nagar", "Saket", "Karol Bagh", "Pitampura", "Dwarka", "Rohini", "Greater Kailash", "Vasant Kunj", "Connaught Place", "Hauz Khas"],
|
| 38 |
+
"Mumbai": ["Andheri West", "Bandra", "Borivali", "Powai", "Lower Parel", "Goregaon", "Thane", "Vile Parle", "Juhu", "Worli"],
|
| 39 |
+
"Bangalore": ["HSR Layout", "Indiranagar", "Whitefield", "Koramangala", "JP Nagar", "Marathahalli", "Bellandur", "Jayanagar", "BTM Layout", "Sarjapur"],
|
| 40 |
+
"Hyderabad": ["Kapra", "Kondapur", "Madhapur", "Banjara Hills", "Jubilee Hills", "Kukatpally", "Gachibowli", "Begumpet", "Secunderabad", "LB Nagar"],
|
| 41 |
+
"Chennai": ["Mylapore", "Adyar", "Velachery", "T Nagar", "Anna Nagar", "Tambaram", "OMR", "Nungambakkam", "Porur", "Besant Nagar"],
|
| 42 |
+
"Pune": ["Aundh", "Baner", "Hadapsar", "Kothrud", "Wakad", "Hinjewadi", "Viman Nagar", "Kharadi", "Pimpri", "Magarpatta"],
|
| 43 |
+
"Chandigarh": ["Sector 17", "Sector 22", "Sector 35", "Mohali", "Panchkula", "Sector 9", "Sector 11", "Manimajra", "Sector 8", "Sector 26"],
|
| 44 |
+
"Jaipur": ["Malviya Nagar", "Vaishali Nagar", "Mansarovar", "Tonk Road", "C-Scheme", "Raja Park", "Civil Lines", "Jhotwara", "Bani Park", "Sodala"],
|
| 45 |
+
"Lucknow": ["Gomti Nagar", "Hazratganj", "Indira Nagar", "Aliganj", "Aminabad", "Vibhuti Khand", "Mahanagar", "Aashiana", "Alambagh", "Janakipuram"],
|
| 46 |
+
"Ahmedabad": ["Satellite", "Bodakdev", "Vastrapur", "Maninagar", "Naranpura", "Bopal", "SG Highway", "Navrangpura", "Thaltej", "Chandkheda"],
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
NAME_BANKS = {
|
| 50 |
+
"dentists": [
|
| 51 |
+
("Dr. Asha", "Asha Dental Care"),
|
| 52 |
+
("Dr. Vikram", "Smile Crafters"),
|
| 53 |
+
("Dr. Neha", "Pearl Dental Studio"),
|
| 54 |
+
("Dr. Rajan", "City Dental Clinic"),
|
| 55 |
+
("Dr. Priya", "Family Dental Centre"),
|
| 56 |
+
("Dr. Sameer", "Bright Smile Dental"),
|
| 57 |
+
("Dr. Tara", "Crown Dental"),
|
| 58 |
+
("Dr. Karthik", "Apex Dental Care"),
|
| 59 |
+
],
|
| 60 |
+
"salons": [
|
| 61 |
+
("Renu", "Beauty Lounge by Renu"),
|
| 62 |
+
("Karim", "Karim's Salon"),
|
| 63 |
+
("Anita", "Anita's Beauty Studio"),
|
| 64 |
+
("Salim", "Studio Cuts"),
|
| 65 |
+
("Manish", "Aesthetic Hair Studio"),
|
| 66 |
+
("Geeta", "Glow Up Salon"),
|
| 67 |
+
("Paras", "Paras Hair & Beauty"),
|
| 68 |
+
("Sushma", "The Beauty Bar"),
|
| 69 |
+
],
|
| 70 |
+
"restaurants": [
|
| 71 |
+
("Suresh", "Madras Express"),
|
| 72 |
+
("Anand", "Chai Point Cafe"),
|
| 73 |
+
("Karim", "Kabab Junction"),
|
| 74 |
+
("Sandeep", "Tandoor Treats"),
|
| 75 |
+
("Ravi", "Veg Bowl"),
|
| 76 |
+
("Imran", "Biryani House"),
|
| 77 |
+
("Mukesh", "Pizza Spot"),
|
| 78 |
+
("Lalit", "Family Diner"),
|
| 79 |
+
],
|
| 80 |
+
"gyms": [
|
| 81 |
+
("Karan", "Iron Forge Fitness"),
|
| 82 |
+
("Sneha", "Pulse Studio"),
|
| 83 |
+
("Akash", "Fit Republic"),
|
| 84 |
+
("Roshni", "Active Life Gym"),
|
| 85 |
+
("Vivek", "Strength Co."),
|
| 86 |
+
("Manisha", "Vyayam Yoga"),
|
| 87 |
+
("Deepak", "Body Mechanics"),
|
| 88 |
+
("Pooja", "Bend & Burn"),
|
| 89 |
+
],
|
| 90 |
+
"pharmacies": [
|
| 91 |
+
("Anil", "Healthwell Pharmacy"),
|
| 92 |
+
("Rajesh", "MedPlus Express"),
|
| 93 |
+
("Sunita", "Reliable Medicos"),
|
| 94 |
+
("Vinod", "Family Health Pharmacy"),
|
| 95 |
+
("Bharti", "Wellness Cart"),
|
| 96 |
+
("Sanjay", "TrueCare Medicos"),
|
| 97 |
+
("Mohit", "QuickRx Pharmacy"),
|
| 98 |
+
("Komal", "Daily Care Medicos"),
|
| 99 |
+
],
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def load_seeds(seed_dir: Path):
|
| 104 |
+
categories = {}
|
| 105 |
+
for f in (seed_dir / "categories").glob("*.json"):
|
| 106 |
+
with open(f) as fp:
|
| 107 |
+
data = json.load(fp)
|
| 108 |
+
categories[data["slug"]] = data
|
| 109 |
+
with open(seed_dir / "merchants_seed.json") as fp:
|
| 110 |
+
merchants = json.load(fp)["merchants"]
|
| 111 |
+
with open(seed_dir / "customers_seed.json") as fp:
|
| 112 |
+
customers = json.load(fp)["customers"]
|
| 113 |
+
with open(seed_dir / "triggers_seed.json") as fp:
|
| 114 |
+
triggers = json.load(fp)["triggers"]
|
| 115 |
+
return categories, merchants, customers, triggers
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def expand_merchants(seeds: list[dict], rnd: random.Random) -> list[dict]:
|
| 119 |
+
"""Generate 8 additional merchants per category (10 total per category, 50 overall)."""
|
| 120 |
+
expanded = list(seeds)
|
| 121 |
+
by_cat = {}
|
| 122 |
+
for m in seeds:
|
| 123 |
+
by_cat.setdefault(m["category_slug"], []).append(m)
|
| 124 |
+
next_idx = len(seeds) + 1
|
| 125 |
+
for cat_slug in NAME_BANKS:
|
| 126 |
+
existing = len(by_cat.get(cat_slug, []))
|
| 127 |
+
need = 10 - existing
|
| 128 |
+
for i in range(need):
|
| 129 |
+
owner_first, biz_name = rnd.choice(NAME_BANKS[cat_slug])
|
| 130 |
+
city = rnd.choice(list(LOCALITIES.keys()))
|
| 131 |
+
locality = rnd.choice(LOCALITIES[city])
|
| 132 |
+
mid = f"m_{next_idx:03d}_{owner_first.lower().replace(' ', '_').replace('dr.', 'dr')}_{cat_slug.rstrip('s')}_{city.lower()}"
|
| 133 |
+
views = rnd.randint(400, 6000)
|
| 134 |
+
calls = rnd.randint(2, max(3, views // 80))
|
| 135 |
+
ctr = round(rnd.uniform(0.015, 0.060), 3)
|
| 136 |
+
verified = rnd.random() > 0.25
|
| 137 |
+
sub_status = rnd.choices(["active", "expired", "trial"], weights=[7, 2, 1])[0]
|
| 138 |
+
expanded.append({
|
| 139 |
+
"merchant_id": mid,
|
| 140 |
+
"category_slug": cat_slug,
|
| 141 |
+
"identity": {
|
| 142 |
+
"name": biz_name, "city": city, "locality": locality,
|
| 143 |
+
"place_id": f"ChIJ_{locality.upper().replace(' ', '_')}_{cat_slug.upper()}_{next_idx:03d}",
|
| 144 |
+
"verified": verified,
|
| 145 |
+
"languages": ["en", "hi"] + (["mr"] if city == "Mumbai" else ["ta"] if city == "Chennai" else ["te"] if city == "Hyderabad" else ["kn"] if city == "Bangalore" else []),
|
| 146 |
+
"owner_first_name": owner_first,
|
| 147 |
+
"established_year": rnd.randint(2010, 2023),
|
| 148 |
+
},
|
| 149 |
+
"subscription": {"status": sub_status, "plan": "Pro" if sub_status != "trial" else "Trial",
|
| 150 |
+
"days_remaining": rnd.randint(5, 300) if sub_status == "active" else (rnd.randint(1, 14) if sub_status == "trial" else 0),
|
| 151 |
+
"days_since_expiry": rnd.randint(7, 90) if sub_status == "expired" else None},
|
| 152 |
+
"performance": {"window_days": 30, "views": views, "calls": calls,
|
| 153 |
+
"directions": calls * 2 + rnd.randint(0, 30),
|
| 154 |
+
"ctr": ctr, "leads": rnd.randint(0, calls),
|
| 155 |
+
"delta_7d": {"views_pct": round(rnd.uniform(-0.30, 0.30), 2),
|
| 156 |
+
"calls_pct": round(rnd.uniform(-0.30, 0.30), 2)}},
|
| 157 |
+
"offers": [],
|
| 158 |
+
"conversation_history": [],
|
| 159 |
+
"customer_aggregate": {"total_unique_ytd": rnd.randint(50, 2000)},
|
| 160 |
+
"signals": [],
|
| 161 |
+
"review_themes": [],
|
| 162 |
+
})
|
| 163 |
+
next_idx += 1
|
| 164 |
+
return expanded
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def expand_customers(seeds: list[dict], merchants: list[dict], rnd: random.Random) -> list[dict]:
|
| 168 |
+
"""Generate ~3-5 customers per merchant up to 200 total."""
|
| 169 |
+
expanded = list(seeds)
|
| 170 |
+
next_idx = len(seeds) + 1
|
| 171 |
+
target_per_merchant = 4
|
| 172 |
+
have_per_merchant = {}
|
| 173 |
+
for c in seeds:
|
| 174 |
+
have_per_merchant[c["merchant_id"]] = have_per_merchant.get(c["merchant_id"], 0) + 1
|
| 175 |
+
customer_names = ["Aarav", "Vivaan", "Aditya", "Vihaan", "Arjun", "Ishaan", "Reyansh", "Aryan", "Ananya", "Aadhya", "Saanvi", "Kavya", "Diya", "Ira", "Myra", "Anika", "Riya", "Tara"]
|
| 176 |
+
for m in merchants:
|
| 177 |
+
cur = have_per_merchant.get(m["merchant_id"], 0)
|
| 178 |
+
for i in range(max(0, target_per_merchant - cur)):
|
| 179 |
+
if next_idx > 200 + len(seeds): break
|
| 180 |
+
name = rnd.choice(customer_names)
|
| 181 |
+
cid = f"c_{next_idx:03d}_{name.lower()}_for_{m['merchant_id']}"
|
| 182 |
+
visits = rnd.randint(1, 12)
|
| 183 |
+
state = rnd.choices(
|
| 184 |
+
["new", "active", "lapsed_soft", "lapsed_hard", "churned"],
|
| 185 |
+
weights=[1, 4, 2, 1, 1]
|
| 186 |
+
)[0]
|
| 187 |
+
expanded.append({
|
| 188 |
+
"customer_id": cid,
|
| 189 |
+
"merchant_id": m["merchant_id"],
|
| 190 |
+
"identity": {"name": name, "phone_redacted": "<phone>",
|
| 191 |
+
"language_pref": rnd.choice(["en", "hi-en mix", "hi"]),
|
| 192 |
+
"age_band": rnd.choice(["20-25", "25-35", "30-40", "40-50", "50-65"])},
|
| 193 |
+
"relationship": {"first_visit": "2025-09-01", "last_visit": "2026-04-01",
|
| 194 |
+
"visits_total": visits, "services_received": [],
|
| 195 |
+
"lifetime_value": visits * rnd.randint(200, 1500)},
|
| 196 |
+
"state": state,
|
| 197 |
+
"preferences": {"channel": "whatsapp", "reminder_opt_in": rnd.random() > 0.2},
|
| 198 |
+
"consent": {"opted_in_at": "2025-09-01", "scope": ["promotional_offers"]},
|
| 199 |
+
})
|
| 200 |
+
next_idx += 1
|
| 201 |
+
return expanded
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def expand_triggers(seeds: list[dict], merchants: list[dict], customers: list[dict], rnd: random.Random) -> list[dict]:
|
| 205 |
+
"""Generate ~75 additional triggers spread across kinds + merchants."""
|
| 206 |
+
expanded = list(seeds)
|
| 207 |
+
next_idx = len(seeds) + 1
|
| 208 |
+
additional_kinds = [
|
| 209 |
+
("research_digest", "external", "merchant", 1),
|
| 210 |
+
("perf_dip", "internal", "merchant", 3),
|
| 211 |
+
("perf_spike", "internal", "merchant", 1),
|
| 212 |
+
("milestone_reached", "internal", "merchant", 1),
|
| 213 |
+
("dormant_with_vera", "internal", "merchant", 2),
|
| 214 |
+
("review_theme_emerged", "internal", "merchant", 3),
|
| 215 |
+
("competitor_opened", "external", "merchant", 2),
|
| 216 |
+
("festival_upcoming", "external", "merchant", 1),
|
| 217 |
+
("recall_due", "internal", "customer", 3),
|
| 218 |
+
("customer_lapsed_soft", "internal", "customer", 3),
|
| 219 |
+
("appointment_tomorrow", "internal", "customer", 2),
|
| 220 |
+
("chronic_refill_due", "internal", "customer", 2),
|
| 221 |
+
("trial_followup", "internal", "customer", 2),
|
| 222 |
+
("renewal_due", "internal", "merchant", 4),
|
| 223 |
+
("curious_ask_due", "internal", "merchant", 1),
|
| 224 |
+
]
|
| 225 |
+
for kind, source, scope, urgency in additional_kinds:
|
| 226 |
+
for _ in range(5): # 5 of each kind
|
| 227 |
+
if next_idx > 100: break
|
| 228 |
+
m = rnd.choice(merchants)
|
| 229 |
+
cust = None
|
| 230 |
+
if scope == "customer":
|
| 231 |
+
m_customers = [c for c in customers if c["merchant_id"] == m["merchant_id"]]
|
| 232 |
+
if not m_customers: continue
|
| 233 |
+
cust = rnd.choice(m_customers)
|
| 234 |
+
expanded.append({
|
| 235 |
+
"id": f"trg_{next_idx:03d}_{kind}_{m['merchant_id'][:20]}",
|
| 236 |
+
"scope": scope, "kind": kind, "source": source,
|
| 237 |
+
"merchant_id": m["merchant_id"],
|
| 238 |
+
"customer_id": cust["customer_id"] if cust else None,
|
| 239 |
+
"payload": {"placeholder": True, "metric_or_topic": kind},
|
| 240 |
+
"urgency": urgency, "suppression_key": f"{kind}:{m['merchant_id']}:gen_{next_idx}",
|
| 241 |
+
"expires_at": "2026-06-30T00:00:00Z",
|
| 242 |
+
})
|
| 243 |
+
next_idx += 1
|
| 244 |
+
return expanded[:100]
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def write_outputs(out_dir: Path, categories, merchants, customers, triggers):
|
| 248 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 249 |
+
(out_dir / "categories").mkdir(exist_ok=True)
|
| 250 |
+
for slug, data in categories.items():
|
| 251 |
+
with open(out_dir / "categories" / f"{slug}.json", "w") as f:
|
| 252 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 253 |
+
(out_dir / "merchants").mkdir(exist_ok=True)
|
| 254 |
+
for m in merchants:
|
| 255 |
+
with open(out_dir / "merchants" / f"{m['merchant_id']}.json", "w") as f:
|
| 256 |
+
json.dump(m, f, indent=2, ensure_ascii=False)
|
| 257 |
+
(out_dir / "customers").mkdir(exist_ok=True)
|
| 258 |
+
for c in customers:
|
| 259 |
+
with open(out_dir / "customers" / f"{c['customer_id']}.json", "w") as f:
|
| 260 |
+
json.dump(c, f, indent=2, ensure_ascii=False)
|
| 261 |
+
(out_dir / "triggers").mkdir(exist_ok=True)
|
| 262 |
+
for t in triggers:
|
| 263 |
+
with open(out_dir / "triggers" / f"{t['id']}.json", "w") as f:
|
| 264 |
+
json.dump(t, f, indent=2, ensure_ascii=False)
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def write_test_pairs(out_dir: Path, triggers, rnd: random.Random):
|
| 268 |
+
"""Pick 30 (merchant, trigger) pairs covering all kinds. Same set for everyone."""
|
| 269 |
+
by_kind = {}
|
| 270 |
+
for t in triggers:
|
| 271 |
+
by_kind.setdefault(t["kind"], []).append(t)
|
| 272 |
+
pairs = []
|
| 273 |
+
test_id = 1
|
| 274 |
+
for kind, ts in sorted(by_kind.items()):
|
| 275 |
+
for t in ts[:2]: # take up to 2 per kind
|
| 276 |
+
pairs.append({"test_id": f"T{test_id:02d}", "trigger_id": t["id"],
|
| 277 |
+
"merchant_id": t["merchant_id"], "customer_id": t.get("customer_id")})
|
| 278 |
+
test_id += 1
|
| 279 |
+
if len(pairs) >= 30: break
|
| 280 |
+
if len(pairs) >= 30: break
|
| 281 |
+
with open(out_dir / "test_pairs.json", "w") as f:
|
| 282 |
+
json.dump({"pairs": pairs[:30]}, f, indent=2)
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def main():
|
| 286 |
+
parser = argparse.ArgumentParser()
|
| 287 |
+
parser.add_argument("--seed-dir", default=".", help="Directory containing the seed JSON files")
|
| 288 |
+
parser.add_argument("--out", default="./expanded", help="Output directory")
|
| 289 |
+
args = parser.parse_args()
|
| 290 |
+
|
| 291 |
+
rnd = random.Random(SEED)
|
| 292 |
+
seed_dir = Path(args.seed_dir).resolve()
|
| 293 |
+
out_dir = Path(args.out).resolve()
|
| 294 |
+
print(f"Reading seeds from {seed_dir}")
|
| 295 |
+
print(f"Writing to {out_dir}")
|
| 296 |
+
|
| 297 |
+
categories, m_seeds, c_seeds, t_seeds = load_seeds(seed_dir)
|
| 298 |
+
print(f" Loaded {len(categories)} categories, {len(m_seeds)} merchant seeds, "
|
| 299 |
+
f"{len(c_seeds)} customer seeds, {len(t_seeds)} trigger seeds")
|
| 300 |
+
|
| 301 |
+
merchants = expand_merchants(m_seeds, rnd)
|
| 302 |
+
customers = expand_customers(c_seeds, merchants, rnd)
|
| 303 |
+
triggers = expand_triggers(t_seeds, merchants, customers, rnd)
|
| 304 |
+
print(f" Expanded to {len(merchants)} merchants, {len(customers)} customers, {len(triggers)} triggers")
|
| 305 |
+
|
| 306 |
+
write_outputs(out_dir, categories, merchants, customers, triggers)
|
| 307 |
+
write_test_pairs(out_dir, triggers, rnd)
|
| 308 |
+
print(f"Done. Run: ls {out_dir}")
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
if __name__ == "__main__":
|
| 312 |
+
main()
|
magicpin-ai-challenge/dataset/merchants_seed.json
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_README": "10 representative MerchantContexts. The generator (generate_dataset.py) expands these to 50 total (10/category) by varying identity + perf numbers deterministically.",
|
| 3 |
+
"merchants": [
|
| 4 |
+
{
|
| 5 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 6 |
+
"category_slug": "dentists",
|
| 7 |
+
"identity": {
|
| 8 |
+
"name": "Dr. Meera's Dental Clinic",
|
| 9 |
+
"city": "Delhi",
|
| 10 |
+
"locality": "Lajpat Nagar",
|
| 11 |
+
"place_id": "ChIJ_LAJPATNAGAR_DENTIST_001",
|
| 12 |
+
"verified": true,
|
| 13 |
+
"languages": ["en", "hi"],
|
| 14 |
+
"owner_first_name": "Meera",
|
| 15 |
+
"established_year": 2018
|
| 16 |
+
},
|
| 17 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 82, "renewed_at": "2026-02-04" },
|
| 18 |
+
"performance": {
|
| 19 |
+
"window_days": 30,
|
| 20 |
+
"views": 2410, "calls": 18, "directions": 45, "ctr": 0.021, "leads": 9,
|
| 21 |
+
"delta_7d": { "views_pct": 0.18, "calls_pct": -0.05, "ctr_pct": 0.02 }
|
| 22 |
+
},
|
| 23 |
+
"offers": [
|
| 24 |
+
{ "id": "o_meera_001", "title": "Dental Cleaning @ βΉ299", "status": "active", "started": "2026-03-01" },
|
| 25 |
+
{ "id": "o_meera_002", "title": "Deep Cleaning @ βΉ499", "status": "expired", "ended": "2026-02-28" }
|
| 26 |
+
],
|
| 27 |
+
"conversation_history": [
|
| 28 |
+
{ "ts": "2026-04-24T10:12:00Z", "from": "vera", "body": "Profile audit done β your photos are 8/10, description complete, but Google posts are stale (last post 22 days ago). Want me to draft 3 posts you can review?", "engagement": "merchant_replied" },
|
| 29 |
+
{ "ts": "2026-04-24T10:18:00Z", "from": "merchant", "body": "Yes please, focus on whitening and aligners", "engagement": "intent_action" }
|
| 30 |
+
],
|
| 31 |
+
"customer_aggregate": { "total_unique_ytd": 540, "lapsed_180d_plus": 78, "retention_6mo_pct": 0.38, "high_risk_adult_count": 124 },
|
| 32 |
+
"signals": ["stale_posts:22d", "ctr_below_peer_median", "high_risk_adult_cohort", "engaged_in_last_48h"],
|
| 33 |
+
"review_themes": [
|
| 34 |
+
{ "theme": "wait_time", "sentiment": "neg", "occurrences_30d": 3, "common_quote": "had to wait 30 min on Sunday afternoon" },
|
| 35 |
+
{ "theme": "doctor_manner", "sentiment": "pos", "occurrences_30d": 5, "common_quote": "Dr. Meera explains everything patiently" }
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"merchant_id": "m_002_bharat_dentist_mumbai",
|
| 40 |
+
"category_slug": "dentists",
|
| 41 |
+
"identity": {
|
| 42 |
+
"name": "Bharat Dental Care",
|
| 43 |
+
"city": "Mumbai",
|
| 44 |
+
"locality": "Andheri West",
|
| 45 |
+
"place_id": "ChIJ_ANDHERI_DENTIST_002",
|
| 46 |
+
"verified": false,
|
| 47 |
+
"languages": ["en", "hi", "mr"],
|
| 48 |
+
"owner_first_name": "Bharat",
|
| 49 |
+
"established_year": 2010
|
| 50 |
+
},
|
| 51 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 12, "renewed_at": "2025-04-26" },
|
| 52 |
+
"performance": {
|
| 53 |
+
"window_days": 30,
|
| 54 |
+
"views": 980, "calls": 4, "directions": 18, "ctr": 0.018, "leads": 2,
|
| 55 |
+
"delta_7d": { "views_pct": -0.22, "calls_pct": -0.50, "ctr_pct": -0.10 }
|
| 56 |
+
},
|
| 57 |
+
"offers": [],
|
| 58 |
+
"conversation_history": [
|
| 59 |
+
{ "ts": "2026-04-10T11:00:00Z", "from": "vera", "body": "Subscription expires in 16 days β Bharat Dental Care...", "engagement": "merchant_no_reply" }
|
| 60 |
+
],
|
| 61 |
+
"customer_aggregate": { "total_unique_ytd": 220, "lapsed_180d_plus": 95, "retention_6mo_pct": 0.18 },
|
| 62 |
+
"signals": ["renewal_due_soon:12d", "perf_dip_severe", "unverified_gbp", "dormant_with_vera_14d", "no_active_offers"],
|
| 63 |
+
"review_themes": []
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"merchant_id": "m_003_studio11_salon_hyderabad",
|
| 67 |
+
"category_slug": "salons",
|
| 68 |
+
"identity": {
|
| 69 |
+
"name": "Studio11 Family Salon",
|
| 70 |
+
"city": "Hyderabad",
|
| 71 |
+
"locality": "Kapra",
|
| 72 |
+
"place_id": "ChIJ_KAPRA_SALON_003",
|
| 73 |
+
"verified": true,
|
| 74 |
+
"languages": ["en", "hi", "te"],
|
| 75 |
+
"owner_first_name": "Lakshmi",
|
| 76 |
+
"established_year": 2019
|
| 77 |
+
},
|
| 78 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 145 },
|
| 79 |
+
"performance": {
|
| 80 |
+
"window_days": 30,
|
| 81 |
+
"views": 4980, "calls": 62, "directions": 142, "ctr": 0.048, "leads": 38,
|
| 82 |
+
"delta_7d": { "views_pct": 0.14, "calls_pct": 0.20, "ctr_pct": 0.05 }
|
| 83 |
+
},
|
| 84 |
+
"offers": [
|
| 85 |
+
{ "id": "o_studio11_001", "title": "Haircut @ βΉ99", "status": "active", "started": "2026-03-01" },
|
| 86 |
+
{ "id": "o_studio11_002", "title": "Hair Spa @ βΉ499", "status": "active", "started": "2026-03-15" }
|
| 87 |
+
],
|
| 88 |
+
"conversation_history": [
|
| 89 |
+
{ "ts": "2026-04-22T15:00:00Z", "from": "vera", "body": "Spotted: bridal-trial searches in Kapra +28% this week. Want me to push your bridal package as a GBP post?", "engagement": "merchant_no_reply" }
|
| 90 |
+
],
|
| 91 |
+
"customer_aggregate": { "total_unique_ytd": 1150, "lapsed_90d_plus": 220, "retention_3mo_pct": 0.62 },
|
| 92 |
+
"signals": ["high_engagement", "above_peer_median_calls", "growing_views_7d"],
|
| 93 |
+
"review_themes": [
|
| 94 |
+
{ "theme": "stylist_skill", "sentiment": "pos", "occurrences_30d": 12, "common_quote": "Priya is the best for balayage" },
|
| 95 |
+
{ "theme": "saturday_wait", "sentiment": "neg", "occurrences_30d": 2 }
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"merchant_id": "m_004_glamour_salon_pune",
|
| 100 |
+
"category_slug": "salons",
|
| 101 |
+
"identity": {
|
| 102 |
+
"name": "Glamour Lounge Spa & Salon",
|
| 103 |
+
"city": "Pune",
|
| 104 |
+
"locality": "Aundh",
|
| 105 |
+
"place_id": "ChIJ_AUNDH_SALON_004",
|
| 106 |
+
"verified": true,
|
| 107 |
+
"languages": ["en", "hi", "mr"],
|
| 108 |
+
"owner_first_name": "Anjali",
|
| 109 |
+
"established_year": 2021
|
| 110 |
+
},
|
| 111 |
+
"subscription": { "status": "expired", "plan": "Pro", "days_since_expiry": 38 },
|
| 112 |
+
"performance": {
|
| 113 |
+
"window_days": 30,
|
| 114 |
+
"views": 1200, "calls": 8, "directions": 22, "ctr": 0.022, "leads": 3,
|
| 115 |
+
"delta_7d": { "views_pct": -0.12, "calls_pct": -0.30, "ctr_pct": -0.04 }
|
| 116 |
+
},
|
| 117 |
+
"offers": [],
|
| 118 |
+
"conversation_history": [
|
| 119 |
+
{ "ts": "2026-03-19T14:00:00Z", "from": "vera", "body": "Subscription expired. Profile maintenance paused...", "engagement": "merchant_no_reply" }
|
| 120 |
+
],
|
| 121 |
+
"customer_aggregate": { "total_unique_ytd": 380, "lapsed_90d_plus": 180, "retention_3mo_pct": 0.32 },
|
| 122 |
+
"signals": ["winback_eligible", "perf_dip_post_expiry", "dormant_with_vera_38d"],
|
| 123 |
+
"review_themes": []
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"merchant_id": "m_005_pizzajunction_restaurant_delhi",
|
| 127 |
+
"category_slug": "restaurants",
|
| 128 |
+
"identity": {
|
| 129 |
+
"name": "SK Pizza Junction",
|
| 130 |
+
"city": "Delhi",
|
| 131 |
+
"locality": "Sant Nagar",
|
| 132 |
+
"place_id": "ChIJ_SANTNAGAR_RESTAURANT_005",
|
| 133 |
+
"verified": false,
|
| 134 |
+
"languages": ["en", "hi"],
|
| 135 |
+
"owner_first_name": "Suresh",
|
| 136 |
+
"established_year": 2022
|
| 137 |
+
},
|
| 138 |
+
"subscription": { "status": "trial", "plan": "Trial", "days_remaining": 7 },
|
| 139 |
+
"performance": {
|
| 140 |
+
"window_days": 30,
|
| 141 |
+
"views": 2200, "calls": 12, "directions": 38, "ctr": 0.020, "leads": 4,
|
| 142 |
+
"delta_7d": { "views_pct": 0.08, "calls_pct": 0.10 }
|
| 143 |
+
},
|
| 144 |
+
"offers": [
|
| 145 |
+
{ "id": "o_skpz_001", "title": "Buy 1 Pizza Get 1 Free (Tue-Thu)", "status": "active", "started": "2026-04-15" }
|
| 146 |
+
],
|
| 147 |
+
"conversation_history": [
|
| 148 |
+
{ "ts": "2026-04-25T18:00:00Z", "from": "vera", "body": "Quick check β IPL match nights driving any extra footfall?", "engagement": "merchant_no_reply" }
|
| 149 |
+
],
|
| 150 |
+
"customer_aggregate": { "total_unique_ytd": 0, "delivery_orders_30d": 180, "dine_in_orders_30d": 95 },
|
| 151 |
+
"signals": ["new_merchant", "trial_ending_soon", "ipl_eligible_locality"],
|
| 152 |
+
"review_themes": [
|
| 153 |
+
{ "theme": "delivery_late", "sentiment": "neg", "occurrences_30d": 4 },
|
| 154 |
+
{ "theme": "pizza_quality", "sentiment": "pos", "occurrences_30d": 8 }
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"merchant_id": "m_006_southindiancafe_restaurant_bangalore",
|
| 159 |
+
"category_slug": "restaurants",
|
| 160 |
+
"identity": {
|
| 161 |
+
"name": "Mylari South Indian Cafe",
|
| 162 |
+
"city": "Bangalore",
|
| 163 |
+
"locality": "Indiranagar",
|
| 164 |
+
"place_id": "ChIJ_INDIRANAGAR_RESTAURANT_006",
|
| 165 |
+
"verified": true,
|
| 166 |
+
"languages": ["en", "hi", "kn"],
|
| 167 |
+
"owner_first_name": "Suresh",
|
| 168 |
+
"established_year": 2014
|
| 169 |
+
},
|
| 170 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 240 },
|
| 171 |
+
"performance": {
|
| 172 |
+
"window_days": 30,
|
| 173 |
+
"views": 12400, "calls": 88, "directions": 320, "ctr": 0.032, "leads": 145,
|
| 174 |
+
"delta_7d": { "views_pct": 0.05, "calls_pct": 0.02 }
|
| 175 |
+
},
|
| 176 |
+
"offers": [
|
| 177 |
+
{ "id": "o_mylari_001", "title": "Weekday Lunch Thali @ βΉ149", "status": "active", "started": "2026-01-10" }
|
| 178 |
+
],
|
| 179 |
+
"conversation_history": [
|
| 180 |
+
{ "ts": "2026-04-25T11:00:00Z", "from": "vera", "body": "Your weekday thali is doing well β 18 orders/day avg. Want me to add a corporate-bulk version?", "engagement": "merchant_replied" },
|
| 181 |
+
{ "ts": "2026-04-25T11:30:00Z", "from": "merchant", "body": "Yes good idea, what would it look like", "engagement": "intent_question" }
|
| 182 |
+
],
|
| 183 |
+
"customer_aggregate": { "total_unique_ytd": 4200, "repeat_customer_pct": 0.42, "delivery_share_pct": 0.45 },
|
| 184 |
+
"signals": ["high_volume", "stable_growth", "engaged_in_last_24h"],
|
| 185 |
+
"review_themes": [
|
| 186 |
+
{ "theme": "thali_quality", "sentiment": "pos", "occurrences_30d": 22 },
|
| 187 |
+
{ "theme": "weekend_busy", "sentiment": "neg", "occurrences_30d": 3 }
|
| 188 |
+
]
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"merchant_id": "m_007_powerhouse_gym_bangalore",
|
| 192 |
+
"category_slug": "gyms",
|
| 193 |
+
"identity": {
|
| 194 |
+
"name": "PowerHouse Fitness",
|
| 195 |
+
"city": "Bangalore",
|
| 196 |
+
"locality": "HSR Layout",
|
| 197 |
+
"place_id": "ChIJ_HSR_GYM_007",
|
| 198 |
+
"verified": true,
|
| 199 |
+
"languages": ["en", "hi", "kn"],
|
| 200 |
+
"owner_first_name": "Karthik",
|
| 201 |
+
"established_year": 2020
|
| 202 |
+
},
|
| 203 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 95 },
|
| 204 |
+
"performance": {
|
| 205 |
+
"window_days": 30,
|
| 206 |
+
"views": 1480, "calls": 22, "directions": 48, "ctr": 0.052, "leads": 14,
|
| 207 |
+
"delta_7d": { "views_pct": -0.30, "calls_pct": -0.35 }
|
| 208 |
+
},
|
| 209 |
+
"offers": [
|
| 210 |
+
{ "id": "o_powerhouse_001", "title": "3 FREE Trial Classes", "status": "active", "started": "2026-01-01" }
|
| 211 |
+
],
|
| 212 |
+
"conversation_history": [],
|
| 213 |
+
"customer_aggregate": { "total_active_members": 245, "monthly_churn_pct": 0.10, "trial_to_paid_pct": 0.28 },
|
| 214 |
+
"signals": ["seasonal_dip_apr_may", "above_peer_ctr", "no_recent_post"],
|
| 215 |
+
"review_themes": [
|
| 216 |
+
{ "theme": "equipment_quality", "sentiment": "pos", "occurrences_30d": 7 },
|
| 217 |
+
{ "theme": "morning_crowd", "sentiment": "neg", "occurrences_30d": 4 }
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"merchant_id": "m_008_zenyoga_gym_chennai",
|
| 222 |
+
"category_slug": "gyms",
|
| 223 |
+
"identity": {
|
| 224 |
+
"name": "Zen Yoga Studio",
|
| 225 |
+
"city": "Chennai",
|
| 226 |
+
"locality": "Mylapore",
|
| 227 |
+
"place_id": "ChIJ_MYLAPORE_GYM_008",
|
| 228 |
+
"verified": true,
|
| 229 |
+
"languages": ["en", "ta", "hi"],
|
| 230 |
+
"owner_first_name": "Padma",
|
| 231 |
+
"established_year": 2017
|
| 232 |
+
},
|
| 233 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 180 },
|
| 234 |
+
"performance": {
|
| 235 |
+
"window_days": 30,
|
| 236 |
+
"views": 880, "calls": 18, "directions": 38, "ctr": 0.062, "leads": 12,
|
| 237 |
+
"delta_7d": { "views_pct": 0.10, "calls_pct": 0.15 }
|
| 238 |
+
},
|
| 239 |
+
"offers": [
|
| 240 |
+
{ "id": "o_zen_001", "title": "First Month @ βΉ499", "status": "active", "started": "2026-03-01" },
|
| 241 |
+
{ "id": "o_zen_002", "title": "Free Body Composition Analysis", "status": "active", "started": "2026-03-01" }
|
| 242 |
+
],
|
| 243 |
+
"conversation_history": [
|
| 244 |
+
{ "ts": "2026-04-23T09:00:00Z", "from": "merchant", "body": "Hi I want to add a kids yoga program β what should it look like?", "engagement": "intent_planning" },
|
| 245 |
+
{ "ts": "2026-04-23T09:05:00Z", "from": "vera", "body": "Great idea β kids yoga summer camps are peaking now. Suggest 4-week program, 3 classes/week, age 7-12, βΉ2,499. Want me to draft the GBP post + Insta carousel?", "engagement": "merchant_replied" }
|
| 246 |
+
],
|
| 247 |
+
"customer_aggregate": { "total_active_members": 95, "monthly_churn_pct": 0.05, "trial_to_paid_pct": 0.55 },
|
| 248 |
+
"signals": ["high_retention", "active_planning", "boutique_segment"],
|
| 249 |
+
"review_themes": [
|
| 250 |
+
{ "theme": "instructor_quality", "sentiment": "pos", "occurrences_30d": 9 },
|
| 251 |
+
{ "theme": "small_classes", "sentiment": "pos", "occurrences_30d": 6 }
|
| 252 |
+
]
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur",
|
| 256 |
+
"category_slug": "pharmacies",
|
| 257 |
+
"identity": {
|
| 258 |
+
"name": "Apollo Health Plus Pharmacy",
|
| 259 |
+
"city": "Jaipur",
|
| 260 |
+
"locality": "Malviya Nagar",
|
| 261 |
+
"place_id": "ChIJ_MALVIYA_PHARMACY_009",
|
| 262 |
+
"verified": true,
|
| 263 |
+
"languages": ["en", "hi"],
|
| 264 |
+
"owner_first_name": "Ramesh",
|
| 265 |
+
"established_year": 2016
|
| 266 |
+
},
|
| 267 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 60 },
|
| 268 |
+
"performance": {
|
| 269 |
+
"window_days": 30,
|
| 270 |
+
"views": 1850, "calls": 38, "directions": 95, "ctr": 0.045, "leads": 24,
|
| 271 |
+
"delta_7d": { "views_pct": 0.06, "calls_pct": 0.08 }
|
| 272 |
+
},
|
| 273 |
+
"offers": [
|
| 274 |
+
{ "id": "o_apollo_001", "title": "Free Home Delivery > βΉ499", "status": "active", "started": "2026-01-01" },
|
| 275 |
+
{ "id": "o_apollo_002", "title": "Senior Citizen 15% OFF", "status": "active", "started": "2026-01-01" }
|
| 276 |
+
],
|
| 277 |
+
"conversation_history": [
|
| 278 |
+
{ "ts": "2026-04-24T08:00:00Z", "from": "vera", "body": "Heads up: voluntary recall on atorvastatin batches X/Y by Mfr Z. Want the customer list filtered for that molecule?", "engagement": "merchant_replied" },
|
| 279 |
+
{ "ts": "2026-04-24T08:30:00Z", "from": "merchant", "body": "Yes send me the list please", "engagement": "intent_action" }
|
| 280 |
+
],
|
| 281 |
+
"customer_aggregate": { "total_unique_ytd": 1820, "repeat_customer_pct": 0.68, "chronic_rx_count": 240 },
|
| 282 |
+
"signals": ["above_peer_calls", "compliance_aware", "high_repeat_rate"],
|
| 283 |
+
"review_themes": [
|
| 284 |
+
{ "theme": "delivery_speed", "sentiment": "pos", "occurrences_30d": 11 },
|
| 285 |
+
{ "theme": "medicine_availability", "sentiment": "pos", "occurrences_30d": 8 }
|
| 286 |
+
]
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"merchant_id": "m_010_sunrisepharm_pharmacy_lucknow",
|
| 290 |
+
"category_slug": "pharmacies",
|
| 291 |
+
"identity": {
|
| 292 |
+
"name": "Sunrise Medicos",
|
| 293 |
+
"city": "Lucknow",
|
| 294 |
+
"locality": "Gomti Nagar",
|
| 295 |
+
"place_id": "ChIJ_GOMTINAGAR_PHARMACY_010",
|
| 296 |
+
"verified": false,
|
| 297 |
+
"languages": ["en", "hi"],
|
| 298 |
+
"owner_first_name": "Vikas",
|
| 299 |
+
"established_year": 2020
|
| 300 |
+
},
|
| 301 |
+
"subscription": { "status": "active", "plan": "Basic", "days_remaining": 200 },
|
| 302 |
+
"performance": {
|
| 303 |
+
"window_days": 30,
|
| 304 |
+
"views": 720, "calls": 14, "directions": 32, "ctr": 0.041, "leads": 8,
|
| 305 |
+
"delta_7d": { "views_pct": 0.02, "calls_pct": 0.05 }
|
| 306 |
+
},
|
| 307 |
+
"offers": [],
|
| 308 |
+
"conversation_history": [],
|
| 309 |
+
"customer_aggregate": { "total_unique_ytd": 540, "repeat_customer_pct": 0.45, "chronic_rx_count": 60 },
|
| 310 |
+
"signals": ["unverified_gbp", "no_active_offers", "no_recent_conversation", "delivery_not_set_up"],
|
| 311 |
+
"review_themes": []
|
| 312 |
+
}
|
| 313 |
+
]
|
| 314 |
+
}
|
magicpin-ai-challenge/dataset/triggers_seed.json
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_README": "25 representative TriggerContexts spanning external + internal, merchant + customer scope, all kinds. Generator expands to 100.",
|
| 3 |
+
"triggers": [
|
| 4 |
+
{
|
| 5 |
+
"id": "trg_001_research_digest_dentists",
|
| 6 |
+
"scope": "merchant", "kind": "research_digest", "source": "external",
|
| 7 |
+
"merchant_id": "m_001_drmeera_dentist_delhi", "customer_id": null,
|
| 8 |
+
"payload": { "category": "dentists", "top_item_id": "d_2026W17_jida_fluoride" },
|
| 9 |
+
"urgency": 2, "suppression_key": "research:dentists:2026-W17", "expires_at": "2026-05-03T00:00:00Z"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"id": "trg_002_compliance_dci_radiograph",
|
| 13 |
+
"scope": "merchant", "kind": "regulation_change", "source": "external",
|
| 14 |
+
"merchant_id": "m_001_drmeera_dentist_delhi", "customer_id": null,
|
| 15 |
+
"payload": { "category": "dentists", "top_item_id": "d_2026W17_dci_radiograph", "deadline_iso": "2026-12-15" },
|
| 16 |
+
"urgency": 4, "suppression_key": "compliance:dci_radiograph:2026", "expires_at": "2026-12-15T00:00:00Z"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"id": "trg_003_recall_due_priya",
|
| 20 |
+
"scope": "customer", "kind": "recall_due", "source": "internal",
|
| 21 |
+
"merchant_id": "m_001_drmeera_dentist_delhi", "customer_id": "c_001_priya_for_m001",
|
| 22 |
+
"payload": { "service_due": "6_month_cleaning", "last_service_date": "2026-05-12", "due_date": "2026-11-12", "available_slots": [{ "iso": "2026-11-05T18:00:00+05:30", "label": "Wed 5 Nov, 6pm" }, { "iso": "2026-11-06T17:00:00+05:30", "label": "Thu 6 Nov, 5pm" }] },
|
| 23 |
+
"urgency": 3, "suppression_key": "recall:c_001_priya_for_m001:6mo", "expires_at": "2026-11-30T00:00:00Z"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"id": "trg_004_perf_dip_bharat",
|
| 27 |
+
"scope": "merchant", "kind": "perf_dip", "source": "internal",
|
| 28 |
+
"merchant_id": "m_002_bharat_dentist_mumbai", "customer_id": null,
|
| 29 |
+
"payload": { "metric": "calls", "delta_pct": -0.50, "window": "7d", "vs_baseline": 12 },
|
| 30 |
+
"urgency": 4, "suppression_key": "perf_dip:m_002_bharat_dentist_mumbai:calls:2026-W17", "expires_at": "2026-05-10T00:00:00Z"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "trg_005_renewal_due_bharat",
|
| 34 |
+
"scope": "merchant", "kind": "renewal_due", "source": "internal",
|
| 35 |
+
"merchant_id": "m_002_bharat_dentist_mumbai", "customer_id": null,
|
| 36 |
+
"payload": { "days_remaining": 12, "plan": "Pro", "renewal_amount": 4999 },
|
| 37 |
+
"urgency": 4, "suppression_key": "renewal:m_002_bharat_dentist_mumbai:2026-Q2", "expires_at": "2026-05-08T00:00:00Z"
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"id": "trg_006_festival_diwali",
|
| 41 |
+
"scope": "merchant", "kind": "festival_upcoming", "source": "external",
|
| 42 |
+
"merchant_id": "m_003_studio11_salon_hyderabad", "customer_id": null,
|
| 43 |
+
"payload": { "festival": "Diwali", "date": "2026-10-31", "days_until": 188, "category_relevance": ["salons", "restaurants", "pharmacies"] },
|
| 44 |
+
"urgency": 1, "suppression_key": "festival:diwali:2026:m_003", "expires_at": "2026-11-02T00:00:00Z"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"id": "trg_007_bridal_followup_kavya",
|
| 48 |
+
"scope": "customer", "kind": "wedding_package_followup", "source": "internal",
|
| 49 |
+
"merchant_id": "m_003_studio11_salon_hyderabad", "customer_id": "c_005_kavya_for_m003",
|
| 50 |
+
"payload": { "wedding_date": "2026-11-08", "trial_completed": "2026-03-22", "days_to_wedding": 196, "next_step_window_open": "skin_prep_program_30day" },
|
| 51 |
+
"urgency": 2, "suppression_key": "bridal_followup:c_005_kavya_for_m003", "expires_at": "2026-11-08T00:00:00Z"
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"id": "trg_008_curious_ask_studio11",
|
| 55 |
+
"scope": "merchant", "kind": "curious_ask_due", "source": "internal",
|
| 56 |
+
"merchant_id": "m_003_studio11_salon_hyderabad", "customer_id": null,
|
| 57 |
+
"payload": { "ask_template": "what_service_in_demand_this_week", "last_ask_at": null },
|
| 58 |
+
"urgency": 1, "suppression_key": "curious_ask:m_003:2026-W17", "expires_at": "2026-05-03T00:00:00Z"
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"id": "trg_009_winback_glamour",
|
| 62 |
+
"scope": "merchant", "kind": "winback_eligible", "source": "internal",
|
| 63 |
+
"merchant_id": "m_004_glamour_salon_pune", "customer_id": null,
|
| 64 |
+
"payload": { "days_since_expiry": 38, "perf_dip_pct": -0.30, "lapsed_customers_added_since_expiry": 24 },
|
| 65 |
+
"urgency": 2, "suppression_key": "winback:m_004_glamour_salon_pune", "expires_at": "2026-06-15T00:00:00Z"
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"id": "trg_010_ipl_match_delhi",
|
| 69 |
+
"scope": "merchant", "kind": "ipl_match_today", "source": "external",
|
| 70 |
+
"merchant_id": "m_005_pizzajunction_restaurant_delhi", "customer_id": null,
|
| 71 |
+
"payload": { "match": "DC vs MI", "venue": "Arun Jaitley Stadium", "city": "Delhi", "match_time_iso": "2026-04-26T19:30:00+05:30", "is_weeknight": false },
|
| 72 |
+
"urgency": 3, "suppression_key": "ipl:m_005:2026-04-26", "expires_at": "2026-04-26T23:59:59+05:30"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"id": "trg_011_review_theme_late_delivery",
|
| 76 |
+
"scope": "merchant", "kind": "review_theme_emerged", "source": "internal",
|
| 77 |
+
"merchant_id": "m_005_pizzajunction_restaurant_delhi", "customer_id": null,
|
| 78 |
+
"payload": { "theme": "delivery_late", "occurrences_30d": 4, "trend": "rising", "common_quote": "took 50 mins for a 15 min ride" },
|
| 79 |
+
"urgency": 3, "suppression_key": "review_theme:m_005:delivery_late:2026-W17", "expires_at": "2026-05-10T00:00:00Z"
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"id": "trg_012_milestone_mylari",
|
| 83 |
+
"scope": "merchant", "kind": "milestone_reached", "source": "internal",
|
| 84 |
+
"merchant_id": "m_006_southindiancafe_restaurant_bangalore", "customer_id": null,
|
| 85 |
+
"payload": { "metric": "review_count", "value_now": 145, "milestone_value": 150, "is_imminent": true },
|
| 86 |
+
"urgency": 1, "suppression_key": "milestone:m_006:reviews_150", "expires_at": "2026-05-15T00:00:00Z"
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"id": "trg_013_corporate_thali_planning",
|
| 90 |
+
"scope": "merchant", "kind": "active_planning_intent", "source": "internal",
|
| 91 |
+
"merchant_id": "m_006_southindiancafe_restaurant_bangalore", "customer_id": null,
|
| 92 |
+
"payload": { "intent_topic": "corporate_bulk_thali_package", "merchant_last_message": "Yes good idea, what would it look like" },
|
| 93 |
+
"urgency": 4, "suppression_key": "planning:m_006:corp_thali:2026-W17", "expires_at": "2026-04-29T00:00:00Z"
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"id": "trg_014_seasonal_acquisition_dip_powerhouse",
|
| 97 |
+
"scope": "merchant", "kind": "seasonal_perf_dip", "source": "internal",
|
| 98 |
+
"merchant_id": "m_007_powerhouse_gym_bangalore", "customer_id": null,
|
| 99 |
+
"payload": { "metric": "views", "delta_pct": -0.30, "window": "7d", "is_expected_seasonal": true, "season_note": "post_resolution_window_apr_jun" },
|
| 100 |
+
"urgency": 1, "suppression_key": "seasonal_dip:m_007:2026-Q2", "expires_at": "2026-06-30T00:00:00Z"
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"id": "trg_015_winback_rashmi",
|
| 104 |
+
"scope": "customer", "kind": "customer_lapsed_hard", "source": "internal",
|
| 105 |
+
"merchant_id": "m_007_powerhouse_gym_bangalore", "customer_id": "c_010_rashmi_for_m007",
|
| 106 |
+
"payload": { "days_since_last_visit": 57, "previous_focus": "weight_loss", "previous_membership_months": 5 },
|
| 107 |
+
"urgency": 3, "suppression_key": "winback:c_010_rashmi_for_m007", "expires_at": "2026-06-15T00:00:00Z"
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"id": "trg_016_kids_yoga_program_drafting",
|
| 111 |
+
"scope": "merchant", "kind": "active_planning_intent", "source": "internal",
|
| 112 |
+
"merchant_id": "m_008_zenyoga_gym_chennai", "customer_id": null,
|
| 113 |
+
"payload": { "intent_topic": "kids_yoga_summer_camp", "merchant_last_message": "Hi I want to add a kids yoga program β what should it look like?" },
|
| 114 |
+
"urgency": 4, "suppression_key": "planning:m_008:kids_yoga:2026-W17", "expires_at": "2026-04-30T00:00:00Z"
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"id": "trg_017_kids_yoga_trial_followup_karthik",
|
| 118 |
+
"scope": "customer", "kind": "trial_followup", "source": "internal",
|
| 119 |
+
"merchant_id": "m_008_zenyoga_gym_chennai", "customer_id": "c_012_karthik_jr_for_m008",
|
| 120 |
+
"payload": { "trial_date": "2026-04-22", "next_session_options": [{ "iso": "2026-05-03T08:00:00+05:30", "label": "Sat 3 May, 8am" }] },
|
| 121 |
+
"urgency": 2, "suppression_key": "trial_followup:c_012_karthik_jr_for_m008", "expires_at": "2026-05-10T00:00:00Z"
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"id": "trg_018_supply_atorvastatin_recall",
|
| 125 |
+
"scope": "merchant", "kind": "supply_alert", "source": "external",
|
| 126 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur", "customer_id": null,
|
| 127 |
+
"payload": { "alert_id": "d_2026W17_atorvastatin_recall", "molecule": "atorvastatin", "affected_batches": ["AT2024-1102", "AT2024-1108"], "manufacturer": "MfrZ" },
|
| 128 |
+
"urgency": 5, "suppression_key": "alert:atorvastatin:2026-04", "expires_at": "2026-05-30T00:00:00Z"
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"id": "trg_019_chronic_refill_grandfather",
|
| 132 |
+
"scope": "customer", "kind": "chronic_refill_due", "source": "internal",
|
| 133 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur", "customer_id": "c_013_grandfather_for_m009",
|
| 134 |
+
"payload": { "molecule_list": ["metformin", "atorvastatin", "telmisartan"], "last_refill": "2026-03-26", "stock_runs_out_iso": "2026-04-28T00:00:00+05:30", "delivery_address_saved": true },
|
| 135 |
+
"urgency": 3, "suppression_key": "refill:c_013_grandfather_for_m009:2026-04", "expires_at": "2026-04-28T00:00:00+05:30"
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"id": "trg_020_summer_demand_shift",
|
| 139 |
+
"scope": "merchant", "kind": "category_seasonal", "source": "external",
|
| 140 |
+
"merchant_id": "m_009_apollo_pharmacy_jaipur", "customer_id": null,
|
| 141 |
+
"payload": { "season": "summer_2026", "trends": ["ORS_demand_+40", "sunscreen_demand_+38", "antifungal_demand_+45", "cold_cough_demand_-60"], "shelf_action_recommended": true },
|
| 142 |
+
"urgency": 2, "suppression_key": "season:summer:m_009:2026", "expires_at": "2026-06-30T00:00:00Z"
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"id": "trg_021_unverified_gbp_sunrise",
|
| 146 |
+
"scope": "merchant", "kind": "gbp_unverified", "source": "internal",
|
| 147 |
+
"merchant_id": "m_010_sunrisepharm_pharmacy_lucknow", "customer_id": null,
|
| 148 |
+
"payload": { "verified": false, "verification_path": "postcard_or_phone_call", "estimated_uplift_pct": 0.30 },
|
| 149 |
+
"urgency": 3, "suppression_key": "unverified:m_010", "expires_at": "2026-05-30T00:00:00Z"
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"id": "trg_022_cde_webinar_dentists",
|
| 153 |
+
"scope": "merchant", "kind": "cde_opportunity", "source": "external",
|
| 154 |
+
"merchant_id": "m_001_drmeera_dentist_delhi", "customer_id": null,
|
| 155 |
+
"payload": { "digest_item_id": "d_2026W17_ida_webinar", "credits": 2, "fee": "free_for_members" },
|
| 156 |
+
"urgency": 1, "suppression_key": "cde:dentists:2026-05-02", "expires_at": "2026-05-02T19:00:00+05:30"
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"id": "trg_023_competitor_opened_dentist",
|
| 160 |
+
"scope": "merchant", "kind": "competitor_opened", "source": "external",
|
| 161 |
+
"merchant_id": "m_001_drmeera_dentist_delhi", "customer_id": null,
|
| 162 |
+
"payload": { "competitor_name": "Smile Studio", "distance_km": 1.3, "their_offer": "Dental Cleaning @ βΉ199", "opened_date": "2026-04-08" },
|
| 163 |
+
"urgency": 2, "suppression_key": "competitor:m_001:smile_studio", "expires_at": "2026-06-08T00:00:00Z"
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"id": "trg_024_perf_spike_zen",
|
| 167 |
+
"scope": "merchant", "kind": "perf_spike", "source": "internal",
|
| 168 |
+
"merchant_id": "m_008_zenyoga_gym_chennai", "customer_id": null,
|
| 169 |
+
"payload": { "metric": "calls", "delta_pct": 0.15, "window": "7d", "vs_baseline": 18, "likely_driver": "kids_yoga_post" },
|
| 170 |
+
"urgency": 1, "suppression_key": "perf_spike:m_008:calls:2026-W17", "expires_at": "2026-05-03T00:00:00Z"
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"id": "trg_025_dormancy_glamour",
|
| 174 |
+
"scope": "merchant", "kind": "dormant_with_vera", "source": "internal",
|
| 175 |
+
"merchant_id": "m_004_glamour_salon_pune", "customer_id": null,
|
| 176 |
+
"payload": { "days_since_last_merchant_message": 38, "last_topic": "subscription_expiry" },
|
| 177 |
+
"urgency": 2, "suppression_key": "dormant:m_004:30d", "expires_at": "2026-05-30T00:00:00Z"
|
| 178 |
+
}
|
| 179 |
+
]
|
| 180 |
+
}
|
magicpin-ai-challenge/engagement-design.md
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vera Engagement Framework β Design
|
| 2 |
+
|
| 3 |
+
**Status**: Draft proposal β not implemented.
|
| 4 |
+
**Last updated**: 2026-04-26
|
| 5 |
+
**Scope**: How Vera composes every outbound conversation, both merchant-facing and (on-behalf-of-merchant) customer-facing.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Why this exists
|
| 10 |
+
|
| 11 |
+
The current engagement loop (`agents/vera/followup/`) is a collection of ad-hoc nudges β campaign templates, performance reminders, conversation requeues. Each was built point-to-point, with hardcoded assumptions baked in. Adding a new nudge type means re-writing prompts, finding new data, and re-implementing send/dedup logic.
|
| 12 |
+
|
| 13 |
+
Two problems this creates:
|
| 14 |
+
|
| 15 |
+
1. **Functional nudges are inherently low-frequency.** A merchant only has a handful of "broken things" or "events" per month. To engage them 3-5Γ/week, we need a framework that supports curiosity-driven, knowledge-driven, and customer-driven conversations β not just "fix-this" reminders.
|
| 16 |
+
|
| 17 |
+
2. **Same engine should drive merchant-facing AND customer-facing messages.** When Dr. Meera's clinic sends a recall reminder to her patient Priya, Vera does the work β but the framework for composing that message should be the same one that produces the research digest Vera sends to Dr. Meera herself.
|
| 18 |
+
|
| 19 |
+
This doc proposes a **4-context composition framework** that separates the slow-changing (category knowledge) from the fast-changing (per-merchant, per-customer state) and the event-driven (triggers).
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## The 4 contexts
|
| 24 |
+
|
| 25 |
+
Every Vera message = `compose(category, merchant, trigger, customer?)`.
|
| 26 |
+
|
| 27 |
+
| Context | Question it answers | Refresh cadence | Owner | Shared across |
|
| 28 |
+
|---|---|---|---|---|
|
| 29 |
+
| **Category** | How do we talk to *this type* of business? What do they know, care about, offer, get regulated on? | Weekly (digest), monthly (voice/catalog) | Domain curator | All merchants in the vertical |
|
| 30 |
+
| **Merchant** | Who is this specific business, how are they doing, what's in their history with us? | Daily (perf), real-time (conversation) | Snapshot + conversation sync | Just this merchant |
|
| 31 |
+
| **Trigger** | Why am I messaging *right now*? What specific event prompts this? | Per-event | Event detectors | This one send |
|
| 32 |
+
| **Customer** | Who is the merchant's customer, and what's their state with this merchant? | Per-visit / per-interaction | Merchant CRM sync | Just this customer |
|
| 33 |
+
|
| 34 |
+
The composer takes these 4 (3 if no customer scope) and produces the message body, template parameters, CTA, and suppression key. Nothing is hardcoded into the composer β all variation comes from the contexts.
|
| 35 |
+
|
| 36 |
+
```
|
| 37 |
+
βββββββββββββββββββ
|
| 38 |
+
Category ββββΊβ β
|
| 39 |
+
Merchant ββββΊβ Composer (LLM) βββββΊ message {body, params, cta, send_as}
|
| 40 |
+
Trigger ββββΊβ β
|
| 41 |
+
Customer? ββββΊβ β
|
| 42 |
+
βββββββββββββββββββ
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
---
|
| 46 |
+
|
| 47 |
+
## Layer details
|
| 48 |
+
|
| 49 |
+
### CategoryContext
|
| 50 |
+
|
| 51 |
+
Slow-changing knowledge pack per vertical. One per category (`dentists`, `salons`, `gyms`, `restaurants`, `car_service`, ...). Bootstrap is labor-intensive (needs domain expertise); ongoing maintenance is mostly the weekly digest.
|
| 52 |
+
|
| 53 |
+
Fields:
|
| 54 |
+
|
| 55 |
+
- `slug` β `"dentists"`
|
| 56 |
+
- `offer_catalog` β canonical service+price patterns from vera-mcp + category-specific extensions. Prefer `"Dental Cleaning @ βΉ299"` over `"Flat 20% OFF"`. Service+price is more compelling than discount.
|
| 57 |
+
- `voice` β tone, vocabulary, taboos. For dentists: technical terms welcome (`"fluoride varnish at 3-month recall"`), legal taboos (`"cure"`, `"guaranteed"`), peer tone not hype.
|
| 58 |
+
- `peer_stats` β city-scoped benchmarks: avg rating, avg reviews, typical CTR, typical patient volume. Anchors comparative messages.
|
| 59 |
+
- `digest` β this week's curated research / compliance / CDE / tech / peer-practice items, with source citations. Sourced from a per-category source list (PubMed dental RSS, JIDA, IDA Delhi calendar, DCI circulars, Dental Tribune India, Google Trends for dental queries, vendor press releases).
|
| 60 |
+
- `patient_content_library` β items written at patient-reading level that the merchant can reshare with their own customers (powers the `PRO_PATIENT_CONTENT` family).
|
| 61 |
+
- `seasonal_beats` β cycles like "exam-stress bruxism spikes Nov-Feb" that cue category-specific timing.
|
| 62 |
+
- `trend_signals` β Google Trends + Practo-style query data showing what patients in this vertical are searching for.
|
| 63 |
+
|
| 64 |
+
### MerchantContext
|
| 65 |
+
|
| 66 |
+
Per-merchant state. Refreshed daily for performance; real-time for conversation history.
|
| 67 |
+
|
| 68 |
+
Fields:
|
| 69 |
+
|
| 70 |
+
- `merchant_id`
|
| 71 |
+
- `identity` β name, place_id, locality, city, verified, languages
|
| 72 |
+
- `subscription` β status, days remaining, plan
|
| 73 |
+
- `performance` β views/calls/CTR/leads/directions, 30d + 7d deltas
|
| 74 |
+
- `offers` β active + paused, sourced from vera-mcp's offers collection (and eventually the as-yet-undefined "real" offer source-of-truth)
|
| 75 |
+
- `conversation_history` β last N turns w/ Vera, with engagement tags (replied, ignored, unsubscribed-from-topic)
|
| 76 |
+
- `customer_aggregate` β derived stats over the merchant's customer roster (active count, lapsed count, retention rate). Not individual customers β aggregates only.
|
| 77 |
+
- `signals` β derived flags: `stale_posts`, `ctr_below_peer_median`, `customer_lapse_rate_high`, `dormant_with_vera`, ...
|
| 78 |
+
|
| 79 |
+
### TriggerContext
|
| 80 |
+
|
| 81 |
+
The event that prompts this specific message. Two families:
|
| 82 |
+
|
| 83 |
+
- **External** β happens outside the merchant's account. News, weather, festival, regulation change, category-trend movement, competitor opens nearby, weekly research digest release.
|
| 84 |
+
- **Internal** β happens within the merchant's account or customer roster. Performance dip/spike, milestone hit, dormancy threshold crossed, customer lapse threshold crossed, appointment due, review pattern emerged, scheduled-recurring nudge.
|
| 85 |
+
|
| 86 |
+
Fields:
|
| 87 |
+
|
| 88 |
+
- `id` β unique
|
| 89 |
+
- `scope` β `merchant` | `customer`
|
| 90 |
+
- `kind` β `research_digest`, `recall_due`, `perf_spike`, `competitor_opened`, `festival`, ...
|
| 91 |
+
- `source` β `external` | `internal`
|
| 92 |
+
- `payload` β kind-specific data (e.g., for `recall_due`: `{patient_id, last_visit, due_date}`)
|
| 93 |
+
- `urgency` β 1-5; ranks against other queued triggers
|
| 94 |
+
- `suppression_key` β used by Redis dedup to prevent re-sends
|
| 95 |
+
- `expires_at` β after which the trigger is stale
|
| 96 |
+
|
| 97 |
+
### CustomerContext
|
| 98 |
+
|
| 99 |
+
Only populated when `scope=customer`. Per-customer state with this specific merchant.
|
| 100 |
+
|
| 101 |
+
Fields:
|
| 102 |
+
|
| 103 |
+
- `customer_id`
|
| 104 |
+
- `merchant_id`
|
| 105 |
+
- `identity` β name, phone, language preference
|
| 106 |
+
- `relationship` β first_visit, last_visit, visits_total, services received, lifetime value
|
| 107 |
+
- `state` β `new` | `active` | `lapsed_soft` (3-6mo) | `lapsed_hard` (6mo+) | `churned` (12mo+)
|
| 108 |
+
- `preferences` β preferred slot times (derived from booking history), preferred channel, opt-in status
|
| 109 |
+
- `consent` β when did they opt in, via what mechanism, scope of consent
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## Composer
|
| 114 |
+
|
| 115 |
+
Single LLM-prompted module. Takes the 4 contexts as input. Produces:
|
| 116 |
+
|
| 117 |
+
- `body` β the WhatsApp message body
|
| 118 |
+
- `template_params` β params to fill an approved Kaleyra template (used only for the first touch in a session window)
|
| 119 |
+
- `cta` β the binary or open-ended ask
|
| 120 |
+
- `suppression_key` β for the trigger-level dedup
|
| 121 |
+
- `send_as` β `"vera"` for merchant-facing, `"merchant_on_behalf"` for customer-facing
|
| 122 |
+
|
| 123 |
+
The composer prompt is the single point of failure. It must be:
|
| 124 |
+
|
| 125 |
+
- Versioned (`composer_v1`, `composer_v2`, ...)
|
| 126 |
+
- A/B-testable
|
| 127 |
+
- Auditable (we can replay any past message and see all 4 input contexts)
|
| 128 |
+
|
| 129 |
+
Different `kind` values may use different prompt variants β e.g., `research_digest` needs source-citation framing, `recall_due` needs slot-offering framing, `competitor_opened` needs voyeur-curiosity framing. The composer dispatches by `kind`.
|
| 130 |
+
|
| 131 |
+
---
|
| 132 |
+
|
| 133 |
+
## Worked example 1: merchant-facing
|
| 134 |
+
|
| 135 |
+
**Merchant**: Dr. Meera, Lajpat Nagar, Delhi
|
| 136 |
+
**Trigger**: weekly dentistry research digest just landed
|
| 137 |
+
|
| 138 |
+
**Inputs:**
|
| 139 |
+
|
| 140 |
+
| Context | Key values used |
|
| 141 |
+
|---|---|
|
| 142 |
+
| Category (dentists) | voice=peer/technical; digest_top_item="JIDA Oct trial: 3-mo fluoride recall cuts caries 38% better"; peer_stat="South-Delhi solo CTR median 3.0%" |
|
| 143 |
+
| Merchant (Dr. Meera) | CTR 2.1% (below peer); ran "Deep Cleaning βΉ499" 2mo ago; 78 lapsed patients; last Vera touch 2d ago (engaged) |
|
| 144 |
+
| Trigger | kind=`research_digest_release`, scope=merchant, urgency=2, source=external, suppression_key=`research:dentists:2026-W17` |
|
| 145 |
+
| Customer | (not populated) |
|
| 146 |
+
|
| 147 |
+
**Composed message:**
|
| 148 |
+
|
| 149 |
+
> Dr. Meera, JIDA's Oct issue landed. One item relevant to your high-risk adult patients β 2,100-patient trial showed 3-month fluoride recall cuts caries recurrence 38% better than 6-month. Worth a look (2-min abstract). Want me to pull it + draft a patient-ed WhatsApp you can share? *β JIDA Oct 2026 p.14*
|
| 150 |
+
|
| 151 |
+
Why it works:
|
| 152 |
+
- **Category** drives voice (technical, source-cited, peer tone)
|
| 153 |
+
- **Merchant** drives specificity ("your high-risk adult patients" β derived from her customer aggregate)
|
| 154 |
+
- **Trigger** drives the hook (this week's digest, not a promo ask)
|
| 155 |
+
- No customer context needed; this is merchant-to-Vera
|
| 156 |
+
|
| 157 |
+
---
|
| 158 |
+
|
| 159 |
+
## Worked example 2: customer-facing (same framework)
|
| 160 |
+
|
| 161 |
+
**Merchant**: Dr. Meera (same)
|
| 162 |
+
**Customer**: Priya β patient since 2025-11, last visit 2026-05 (cleaning + whitening), prefers weekday evenings, opted-in to reminders
|
| 163 |
+
**Trigger**: 6-month recall window opens
|
| 164 |
+
|
| 165 |
+
**Inputs:**
|
| 166 |
+
|
| 167 |
+
| Context | Key values used |
|
| 168 |
+
|---|---|
|
| 169 |
+
| Category (dentists, customer-facing) | voice=warm-clinical; taboos=no medical claims, no "guaranteed"; recall framing pattern |
|
| 170 |
+
| Merchant (Dr. Meera) | active offer `Dental Cleaning @ βΉ299`; available slots Wed 6pm + Thu 5pm (next 7d); WhatsApp Business number |
|
| 171 |
+
| Trigger | kind=`recall_due`, scope=customer, urgency=3, source=internal, payload={patient_id: priya, last_visit: 2026-05, due_date: 2026-11} |
|
| 172 |
+
| Customer (Priya) | name + phone; lapsed_soft state; preferred=weekday evening; consent active; language=Hindi-English mix |
|
| 173 |
+
|
| 174 |
+
**Composed message** (sent from Dr. Meera's WhatsApp number, drafted by Vera):
|
| 175 |
+
|
| 176 |
+
> Hi Priya, Dr. Meera's clinic here π¦· It's been 5 months since your last visit β your 6-month cleaning recall is due. Apke liye 2 slots ready hain: **Wed 6 Nov, 6pm** ya **Thu 7 Nov, 5pm**. βΉ299 cleaning + complimentary fluoride. Reply 1 for Wed, 2 for Thu, or tell us a time that works.
|
| 177 |
+
|
| 178 |
+
Why it works:
|
| 179 |
+
- **Category** sets the legal/clinical voice constraints
|
| 180 |
+
- **Merchant** provides the actual catalog price + actual open slots from the schedule
|
| 181 |
+
- **Trigger** provides the recall payload (last_visit, due_date)
|
| 182 |
+
- **Customer** drives personalization (name, language mix, evening preference)
|
| 183 |
+
|
| 184 |
+
Same composer. Different context inputs. Two completely different conversations.
|
| 185 |
+
|
| 186 |
+
---
|
| 187 |
+
|
| 188 |
+
## Engagement loops this enables
|
| 189 |
+
|
| 190 |
+
Once the framework exists, every loop is just a small cron that emits `TriggerContext` instances. The composer handles the rest.
|
| 191 |
+
|
| 192 |
+
| Loop | Emits trigger kinds | Scope |
|
| 193 |
+
|---|---|---|
|
| 194 |
+
| News/weather scanner *(already built β see `agents/vera/followup/event_sources.py`)* | `external` (festival, heatwave, fuel, IPL, monsoon, news) | merchant |
|
| 195 |
+
| Weekly research digest per category | `research_digest_release` | merchant |
|
| 196 |
+
| Performance monitor | `perf_spike`, `perf_dip`, `milestone_reached` | merchant |
|
| 197 |
+
| Review-pattern detector | `review_theme_emerged` | merchant |
|
| 198 |
+
| Conversation curiosity-ask scheduler | `curious_ask_due` | merchant |
|
| 199 |
+
| Recall scheduler (from merchant CRM) | `recall_due` | customer |
|
| 200 |
+
| Lapse detector | `customer_lapsed_soft`, `customer_lapsed_hard` | customer |
|
| 201 |
+
| Appointment reminder | `appointment_tomorrow` | customer |
|
| 202 |
+
| Capacity optimizer | `unplanned_slot_open` (offered to likely-to-book lapsed customers) | customer |
|
| 203 |
+
|
| 204 |
+
Adding a loop = define one new `kind`, implement the detector, add a composer prompt variant. No change to merchant/category/customer code.
|
| 205 |
+
|
| 206 |
+
---
|
| 207 |
+
|
| 208 |
+
## Implementation shape
|
| 209 |
+
|
| 210 |
+
```python
|
| 211 |
+
# agents/vera/engagement/contexts.py
|
| 212 |
+
|
| 213 |
+
@dataclass
|
| 214 |
+
class CategoryContext:
|
| 215 |
+
slug: str # "dentists"
|
| 216 |
+
offer_catalog: list[OfferTemplate]
|
| 217 |
+
voice: VoiceProfile
|
| 218 |
+
peer_stats: PeerStats
|
| 219 |
+
digest: list[DigestItem]
|
| 220 |
+
patient_content_library: list[ContentItem]
|
| 221 |
+
seasonal_beats: list[SeasonalBeat]
|
| 222 |
+
trend_signals: list[TrendSignal]
|
| 223 |
+
|
| 224 |
+
@dataclass
|
| 225 |
+
class MerchantContext:
|
| 226 |
+
merchant_id: str
|
| 227 |
+
identity: Identity
|
| 228 |
+
subscription: Subscription
|
| 229 |
+
performance: PerformanceSnapshot
|
| 230 |
+
offers: list[MerchantOffer]
|
| 231 |
+
conversation_history: ConversationHistory
|
| 232 |
+
customer_aggregate: CustomerAggregate
|
| 233 |
+
signals: list[DerivedSignal]
|
| 234 |
+
|
| 235 |
+
@dataclass
|
| 236 |
+
class TriggerContext:
|
| 237 |
+
id: str
|
| 238 |
+
scope: Literal["merchant", "customer"]
|
| 239 |
+
kind: str
|
| 240 |
+
source: Literal["external", "internal"]
|
| 241 |
+
payload: dict
|
| 242 |
+
urgency: int # 1-5
|
| 243 |
+
suppression_key: str
|
| 244 |
+
expires_at: datetime
|
| 245 |
+
|
| 246 |
+
@dataclass
|
| 247 |
+
class CustomerContext:
|
| 248 |
+
customer_id: str
|
| 249 |
+
merchant_id: str
|
| 250 |
+
identity: CustomerIdentity
|
| 251 |
+
relationship: Relationship
|
| 252 |
+
state: Literal["new", "active", "lapsed_soft", "lapsed_hard", "churned"]
|
| 253 |
+
preferences: Preferences
|
| 254 |
+
consent: Consent
|
| 255 |
+
```
|
| 256 |
+
|
| 257 |
+
```python
|
| 258 |
+
# agents/vera/engagement/composer.py
|
| 259 |
+
|
| 260 |
+
class EngagementComposer:
|
| 261 |
+
def compose(self,
|
| 262 |
+
category: CategoryContext,
|
| 263 |
+
merchant: MerchantContext,
|
| 264 |
+
trigger: TriggerContext,
|
| 265 |
+
customer: CustomerContext | None = None) -> ComposedMessage:
|
| 266 |
+
"""Returns ComposedMessage(body, template_params, cta,
|
| 267 |
+
suppression_key, send_as)."""
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
Both engagement surfaces (merchant-facing, customer-on-behalf-of-merchant) call the same composer. The only thing that changes is whether `customer` is populated.
|
| 271 |
+
|
| 272 |
+
---
|
| 273 |
+
|
| 274 |
+
## Phased rollout
|
| 275 |
+
|
| 276 |
+
### Phase 1 β framework skeleton + dentistry vertical (β 2 weeks)
|
| 277 |
+
|
| 278 |
+
1. Define the 4 dataclasses in `agents/vera/engagement/contexts.py`.
|
| 279 |
+
2. Build the `CategoryContext` for dentistry β offer catalog, voice profile, peer stats, one weekly research digest pipeline, patient-content seed.
|
| 280 |
+
3. Build `MerchantContext` loader from the existing `merchant_snapshot_data` collection.
|
| 281 |
+
4. Build the first `EngagementComposer` with a prompt that handles 2 trigger kinds (`research_digest_release` and one merchant-facing perf trigger).
|
| 282 |
+
5. Render (no send) the Dr. Meera research-digest message end-to-end from the 4 contexts. Inspect the output before any send happens.
|
| 283 |
+
|
| 284 |
+
### Phase 2 β customer-on-behalf sends (β 2 weeks)
|
| 285 |
+
|
| 286 |
+
6. **Resolve the customer-data source-of-truth.** This is the biggest unknown. Options: clinic SaaS integration (Practo, Dentcubate), merchant CSV upload, BOTOPS chat-derived patient list. Without this, customer engagement is theoretical.
|
| 287 |
+
7. Define the consent model: customer opted in via merchant, not via Vera directly. Capture timestamp + scope.
|
| 288 |
+
8. Stand up a send-as-merchant channel: WhatsApp Business API under the merchant's number, or Vera's shared number with attribution `"Dr. Meera's clinic via Vera"`.
|
| 289 |
+
9. Ship the first customer-facing trigger in production: `recall_due`. Lowest abuse risk, highest merchant intent.
|
| 290 |
+
|
| 291 |
+
### Phase 3 β multiply verticals (β 1 week per vertical)
|
| 292 |
+
|
| 293 |
+
10. Replicate `CategoryContext` for 4-5 more verticals (salons, gyms, pharmacies, restaurants, car service). Mostly data filling, not code.
|
| 294 |
+
11. Add 3 more triggers per scope. Merchant: `perf_dip`, `milestone_reached`, `review_theme_emerged`. Customer: `customer_lapsed_soft`, `appointment_tomorrow`, `unplanned_slot_open`.
|
| 295 |
+
|
| 296 |
+
---
|
| 297 |
+
|
| 298 |
+
## Open questions
|
| 299 |
+
|
| 300 |
+
These need answers before Phase 2 can ship:
|
| 301 |
+
|
| 302 |
+
1. **Where does the merchant's customer list live?** No clean answer yet. Most likely: per-merchant clinic software with no standard integration. May need a self-serve CSV upload or a per-vertical SaaS adapter.
|
| 303 |
+
2. **Consent architecture.** Can Vera message a patient directly, or must every outbound require merchant approval before send? Recommendation: templated auto-sends with merchant override available, switching to fully-auto after the merchant has approved N consecutive sends.
|
| 304 |
+
3. **Attribution.** Does the patient see "Dr. Meera's clinic" or "Vera on behalf of Dr. Meera's clinic"? Trust + legal implications either way. Probably category-dependent (regulated verticals need clearer attribution).
|
| 305 |
+
4. **Composer prompt versioning.** Single point of failure. Versioned + A/B tested from day 1 β every send records the prompt version that produced it.
|
| 306 |
+
5. **Offer source-of-truth.** Per the parallel discussion, the canonical merchant offer catalog likely lives outside vera-mcp (aryan `catalogoffer`, merchant-portal-api, or magicpin_jobs output). MerchantContext needs to read from that source β pending identification.
|
| 307 |
+
6. **Composer model choice.** Azure OpenAI primary, Deepseek fallback (matching `template_generator._call_llm`)? Or is there a case for a smaller faster model for high-volume per-customer sends?
|
| 308 |
+
|
| 309 |
+
---
|
| 310 |
+
|
| 311 |
+
## Why this is worth building
|
| 312 |
+
|
| 313 |
+
- **Engagement frequency goes from "few times a month" to "few times a week"** β by adding curiosity-driven, knowledge-driven, and customer-driven loops on top of the existing functional ones.
|
| 314 |
+
- **One framework, two products** β the same composition engine drives Vera-to-merchant *and* merchant-to-customer messaging. Build once, ship twice.
|
| 315 |
+
- **Vertical scaling is data work, not code work** β adding a new category becomes "fill in a CategoryContext", not "write a new agent".
|
| 316 |
+
- **Auditable + versioned** β every message has explicit inputs and a versioned composer; we can replay, A/B test, and answer "why did Vera send this?" for any past send.
|
| 317 |
+
|
| 318 |
+
---
|
| 319 |
+
|
| 320 |
+
## Appendix: relationship to existing code
|
| 321 |
+
|
| 322 |
+
- `agents/vera/followup/event_sources.py` and `agents/vera/followup/event_extractor.py` *(branch `feature/vera-campaign-engagement`)* already produce external `TriggerContext`-shaped objects for the news/weather scanner. They become the first concrete trigger source feeding the new composer.
|
| 323 |
+
- `agents/vera/followup/template_registry.py` will continue to host the Kaleyra-approved template names (used for the first-touch send before the 24h session window opens). The composer fills the template parameters.
|
| 324 |
+
- `agents/vera/followup/snapshot_data.py` already provides most of the `MerchantContext` fields. Customer aggregate fields would be added as new sections on `MerchantSnapshotData`.
|
| 325 |
+
- `services/vera-mcp/src/services/offer_suggester.py` is the leading candidate for `CategoryContext.offer_catalog` (pending the open offer source-of-truth question).
|
magicpin-ai-challenge/engagement-research.md
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vera Engagement Framework β Research: Current Merchant Data Access
|
| 2 |
+
|
| 3 |
+
**Status**: Research notes β companion to `engagement-design.md`.
|
| 4 |
+
**Last updated**: 2026-04-26
|
| 5 |
+
**Scope**: How the existing system loads merchant + customer data at conversation init and during a turn. Used to inform what the new framework must build vs. adapt.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## TL;DR
|
| 10 |
+
|
| 11 |
+
Two distinct paths exist today β **merchant-facing** (Vera β Dr. Meera) and **customer-facing** (a customer asks Vera *about* Dr. Meera). They share infrastructure (vera-mcp + merchant-support-mcp + Redis) but compose context very differently.
|
| 12 |
+
|
| 13 |
+
Most of what the proposed `MerchantContext` needs **already exists in scattered form** (`_merchant_snapshot`, `_behavioral_profile`, `_session_scenario`). The genuinely new pieces are:
|
| 14 |
+
|
| 15 |
+
- `CategoryContext` (no existing equivalent)
|
| 16 |
+
- `TriggerContext` (no normalized abstraction today β every cron emits ad-hoc payloads)
|
| 17 |
+
- `CustomerContext.relationship` (visit history aggregation doesn't exist)
|
| 18 |
+
- `EngagementComposer` (no shared composer β each agent has its own prompt-builder)
|
| 19 |
+
|
| 20 |
+
The aryan dependency for `category` and `locality` is the soft underbelly β every customer-info-pack call hits aryan synchronously.
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## Path 1 β Merchant-facing (`VeraMerchantAgent`)
|
| 25 |
+
|
| 26 |
+
### Init (per session, instance-level)
|
| 27 |
+
|
| 28 |
+
`agents/vera/merchant_agent.py:402` β `__init__`:
|
| 29 |
+
|
| 30 |
+
- Spawns its own `vera_mcp_client` (`HTTPMCPClient` β `VERA_MCP_SERVER_URL`, default `vera-mcp:8000`).
|
| 31 |
+
- Inherits `BaseAgent._merchant_mcp_client` β class-level shared singleton pointing to `merchant-support-mcp` (`https://search5.magicpin.com/staging/mcp-server/mcp`).
|
| 32 |
+
- **No merchant data is loaded at init** β agent doesn't know who it's talking to yet.
|
| 33 |
+
|
| 34 |
+
### Per-turn (every inbound message)
|
| 35 |
+
|
| 36 |
+
`agents/vera/merchant_agent.py:2171` β `handle_input(agent_input)`:
|
| 37 |
+
|
| 38 |
+
1. Extract `merchant_id` from `metadata.context_data.merchant_id`, falling back to regex on the message body (`\d{7,}`).
|
| 39 |
+
2. Fire 2 parallel tasks:
|
| 40 |
+
- `_detect_state(merchant_id)` β GBP connection + subscription status checks.
|
| 41 |
+
- `_prefetch_merchant_context(merchant_id)` β full snapshot + behavioral profile.
|
| 42 |
+
|
| 43 |
+
### `_prefetch_merchant_context` β `merchant_agent.py:740`
|
| 44 |
+
|
| 45 |
+
Cache-first design:
|
| 46 |
+
|
| 47 |
+
1. **Redis check**: `vera:merchant_ctx:{merchant_id}` β TTL **30 min**. Hit β return immediately, skip everything below.
|
| 48 |
+
2. Parallel via `asyncio.gather`:
|
| 49 |
+
- `vera_merchant_snapshot` (vera-mcp tool)
|
| 50 |
+
- `vera_get_merchant_profile` (vera-mcp tool)
|
| 51 |
+
3. If snapshot is empty: fall back to `get_aggregated_unassociated_merchant_data` (merchant-support-mcp) β basic GBP health, no commercial data. Wrap it in a snapshot-shaped envelope.
|
| 52 |
+
4. Sequential enrichments:
|
| 53 |
+
- `_prefetch_product_context` β fans out to `vera_get_subscription_context`, `vera_get_performance_summary`, and (only if DA subscribed) `da_get_campaign_context`. ~2KB total, embedded into snapshot.
|
| 54 |
+
- `_enrich_snapshot_with_pricing` β direct HTTP to `https://vera.magicpin.com/api/v1/merchant/pricing/get?mid=...` (vera-mcp's pricing path is unreachable locally). Mounted into snapshot as `pricing_recommendation`.
|
| 55 |
+
5. Stuff result into Redis at the same key, TTL 30 min.
|
| 56 |
+
|
| 57 |
+
Final state: `self._merchant_snapshot` and `self._behavioral_profile` are populated. `_get_system_prompt()` reads from these and serializes the snapshot directly into the LLM system prompt (`merchant_agent.py:996`).
|
| 58 |
+
|
| 59 |
+
### What `vera_merchant_snapshot` actually fetches
|
| 60 |
+
|
| 61 |
+
`vera-mcp/src/services/merchant_snapshot.py:51` β `build_merchant_snapshot()`:
|
| 62 |
+
|
| 63 |
+
1. **Resolve identity** via `gbp_resolve_merchant` β gets `place_id`, `location_name`, `merchant_title`.
|
| 64 |
+
2. **One async fan-out** of up to 13 tool calls in parallel:
|
| 65 |
+
|
| 66 |
+
| Bucket | Tools |
|
| 67 |
+
|---|---|
|
| 68 |
+
| Merchant-level (no GBP needed) | `vera_get_subscription_status`, `vera_get_pricing_by_merchant`, `vera_get_merchant_pain_points`, `vera_get_merchant_offer`, `vera_get_merchant_config`, `vera_get_onboarding_status`, `vera_get_enhancement_suggestions` |
|
| 69 |
+
| Location-level (skipped if no place_id) | `gbp_get_location`, `gbp_get_profile_completeness`, `gbp_get_performance_summary`, `gbp_get_search_keywords`, `gbp_get_review_stats`, `gbp_list_posts` |
|
| 70 |
+
|
| 71 |
+
3. Composes 7 sections: `identity`, `profile`, `reputation`, `growth`, `commercial`, `conversation_hooks`, `issues`.
|
| 72 |
+
|
| 73 |
+
> **Note**: `category` and `locality` are not first-class fields on the snapshot β they're buried inside `profile.business_info` (from `gbp_get_location`) and surfaced via `aryan_client.get_merchant_v2()` only when `vera_get_customer_info_pack` is called.
|
| 74 |
+
|
| 75 |
+
### Mid-conversation tool calls
|
| 76 |
+
|
| 77 |
+
The LLM gets the full toolset from `MERCHANT_MCP_ALLOWED_TOOLS` (declared on the agent class) merged with `_vera_mcp_tools_cache`. Mid-turn tool calls flow through:
|
| 78 |
+
|
| 79 |
+
- `self.vera_mcp_client.call_tool(name, args)` β for vera-mcp tools (HTTP)
|
| 80 |
+
- `BaseAgent._merchant_mcp_client.call_tool(name, args)` β for merchant-support-mcp tools (HTTP)
|
| 81 |
+
|
| 82 |
+
No re-prefetch of the snapshot mid-turn. The agent relies on the prefetched snapshot + LLM-initiated lookups when it needs fresh data.
|
| 83 |
+
|
| 84 |
+
---
|
| 85 |
+
|
| 86 |
+
## Path 2 β Customer-facing (`CustomerIncomingAgent`)
|
| 87 |
+
|
| 88 |
+
`agents/vera/customer_incoming_agent.py:91` β different agent, different state model.
|
| 89 |
+
|
| 90 |
+
### Init + per-turn
|
| 91 |
+
|
| 92 |
+
The instance carries:
|
| 93 |
+
|
| 94 |
+
- `_merchant_id` β the merchant the customer is asking about
|
| 95 |
+
- `_merchant_data`, `_merchant_name`, `_category`, `_offers`, `_jd_info` β about the **business**
|
| 96 |
+
- `_customer_phone`, `_customer_profile` β about the **customer** (the one chatting)
|
| 97 |
+
- `_chat_history`, `_session_state` β conversation continuity
|
| 98 |
+
|
| 99 |
+
### Merchant data load β `_load_merchant_data` (`customer_incoming_agent.py:221`)
|
| 100 |
+
|
| 101 |
+
Single shape, no Redis cache:
|
| 102 |
+
|
| 103 |
+
1. **Primary call**: `vera_get_customer_info_pack(merchant_id)` β one MCP call returns `business_info` + `reviews` + `photos` + `offers` + `metadata` in one shot. The aggregated customer-facing endpoint.
|
| 104 |
+
2. **Fallback chain** if `info_pack` is empty:
|
| 105 |
+
- `get_unassociated_merchant_data` β basic merchant info
|
| 106 |
+
- `vera_list_merchant_offers` β separate offer list
|
| 107 |
+
3. **Supplementary** (non-blocking): `vera_get_merchant_jd_info` β JustDial crawl data for additional name / address / phone.
|
| 108 |
+
|
| 109 |
+
Caching is at the request level (in-memory on the agent instance) β re-using the same `merchant_id` in a session avoids re-fetching.
|
| 110 |
+
|
| 111 |
+
### What `vera_get_customer_info_pack` does
|
| 112 |
+
|
| 113 |
+
`vera-mcp/src/tools/merchant_info.py:188`:
|
| 114 |
+
|
| 115 |
+
1. **Resolve place_id**: `_resolve_place_id(merchant_id)` β chain of MongoDB (`gbp_status`) β local JSON β `aryan_client.get_mapping()`.
|
| 116 |
+
2. **Parallel fetch**:
|
| 117 |
+
- GBP data via `get_or_fetch(place_id)` β 24h cache in MongoDB, falls through to Google Places API
|
| 118 |
+
- `_get_active_offers(merchant_id)` β direct MongoDB read of `offers` collection where `status=active`
|
| 119 |
+
- `_get_merchant_metadata(merchant_id)` β `aryan_client.get_merchant_v2()` β returns `name`, `category`, `locality`
|
| 120 |
+
3. **Last-resort fallback**: `_fetch_embed_data(place_id)` β vera REST `/embed` endpoint if everything else came back empty.
|
| 121 |
+
|
| 122 |
+
### Customer profile (the *caller*)
|
| 123 |
+
|
| 124 |
+
`_customer_profile` is loaded separately β populated in `_init_customer_data` from past conversation tickets keyed off `_customer_phone`. There's no rich CRM behind it today; it's mostly conversation continuity (last visit, last topic).
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## Cross-cutting infrastructure
|
| 129 |
+
|
| 130 |
+
| Concern | Implementation |
|
| 131 |
+
|---|---|
|
| 132 |
+
| MCP transport | `HTTPMCPClient` (vera client) β keeps a session, calls `/mcp/tools/{name}` HTTP POST |
|
| 133 |
+
| Auth | Not required for challenge bot endpoints |
|
| 134 |
+
| Cache key for merchant context | `vera:merchant_ctx:{merchant_id}` β Redis, TTL 30 min, written by `_prefetch_merchant_context` |
|
| 135 |
+
| Cache key for GBP data | `gbp_health_report:{place_id}` β MongoDB, TTL 24h, in vera-mcp |
|
| 136 |
+
| Snapshot freshness for sends | Whatever's in Redis β not refreshed on send unless agent is in active conversation |
|
| 137 |
+
| Source of truth for `category` | aryan `get_merchant_v2` API (via `aryan_client`) β used in `_get_merchant_metadata` |
|
| 138 |
+
| Source of truth for `name` | aryan first, GBP `business_info` second |
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## Observations relevant to the engagement framework
|
| 143 |
+
|
| 144 |
+
### What already exists
|
| 145 |
+
|
| 146 |
+
1. **Most of `MerchantContext` already exists** β spread across `_merchant_snapshot`, `_behavioral_profile`, `_session_scenario`, `_jd_info`. A `MerchantContext.from_existing(agent_state)` adapter could load ~80% of the fields without any new fetching.
|
| 147 |
+
2. **The customer agent already has half a `CustomerContext`** β `_customer_phone` + `_customer_profile` give us identity + conversation continuity. Missing: visit history, services received, lapse state.
|
| 148 |
+
3. **Two MCP servers, one orchestration** β vera-mcp (instance-level) for vera tools, merchant-support-mcp (class-level shared) for fallback merchant tools. The composer can just consume whatever `MerchantContext` already collected β no new MCP wiring required.
|
| 149 |
+
4. **Cache TTL of 30 min is fine** for engagement nudges that fire daily/weekly. Redis hits during conversation are plenty fresh for composition.
|
| 150 |
+
|
| 151 |
+
### What does NOT exist
|
| 152 |
+
|
| 153 |
+
1. **No `CategoryContext`.** Category is just a string buried in metadata. Voice rules, peer benchmarks, knowledge digests β none of it exists. Biggest greenfield area, but also the most leveraged (one CategoryContext serves all merchants in the vertical).
|
| 154 |
+
2. **No `customer_aggregate` field on the merchant snapshot.** No pipeline today aggregates per-merchant customer roster stats (active count, lapsed count, retention rate).
|
| 155 |
+
3. **No `TriggerContext` abstraction.** Every nudge type today has its own cron + its own fetch logic + its own send code. The proposed `TriggerContext` is the genuinely new architectural primitive; everything else is reorganization.
|
| 156 |
+
4. **No visit-history aggregation per (merchant, customer_phone).** Required for `CustomerContext.relationship`. BOTOPS chat history has the raw data; no derived view exists.
|
| 157 |
+
5. **No shared composer.** Each agent has its own prompt builder embedded in `_get_system_prompt()`. The proposed `EngagementComposer` is net new.
|
| 158 |
+
|
| 159 |
+
### Operational risks to mitigate
|
| 160 |
+
|
| 161 |
+
1. **Aryan is the synchronous bottleneck.** `aryan_client.get_merchant_v2()` is the only path to category and locality, and it's a remote HTTP call. If aryan is slow, every customer-info-pack call is slow. Worth caching aryan responses per merchant for ~24h before scaling engagement frequency.
|
| 162 |
+
2. **The 30-min Redis cache is keyed per-merchant** β fine for in-conversation reuse, but a daily engagement cron will miss this cache 100% of the time and pay the full snapshot-build cost per send. Consider a longer-TTL background-warmed cache for the engagement loop specifically.
|
| 163 |
+
3. **No version tracking on prompts today.** Every send loses the prompt-version provenance. The composer should record prompt version + context hash on every send so we can replay and A/B.
|
| 164 |
+
|
| 165 |
+
---
|
| 166 |
+
|
| 167 |
+
## Concrete recommendation for Phase 1 of the framework
|
| 168 |
+
|
| 169 |
+
Phase 1 is mostly an **adapter layer**, not new infrastructure:
|
| 170 |
+
|
| 171 |
+
| Layer | Effort | What it actually does |
|
| 172 |
+
|---|---|---|
|
| 173 |
+
| `CategoryContext` | Net new | Build for dentistry first (offer catalog, voice, peer stats, weekly digest, patient-content seed) |
|
| 174 |
+
| `MerchantContext` | Adapter | Wrap existing `_merchant_snapshot` + `_behavioral_profile` + a new `_customer_aggregate` derived from BOTOPS chat history |
|
| 175 |
+
| `TriggerContext` | Net new abstraction | Normalize the payloads existing crons emit into a single shape; new triggers (research_digest, recall_due) emit it natively |
|
| 176 |
+
| `CustomerContext` | Partial adapter | Wrap existing `_customer_profile`, plus a new visit-history aggregator |
|
| 177 |
+
| `EngagementComposer` | Net new | Single LLM-prompted module with versioned prompt; consumes the above |
|
| 178 |
+
|
| 179 |
+
Phase 1 should NOT require modifying any existing agent. The adapter reads from existing state; the composer is a new module that runs in parallel; the new triggers (research digest, recall) are new crons that don't touch the existing matrix-followup or campaign-engagement loops.
|
| 180 |
+
|
| 181 |
+
---
|
| 182 |
+
|
| 183 |
+
## Appendix: file pointers for follow-up implementation
|
| 184 |
+
|
| 185 |
+
- `agents/vera/merchant_agent.py:402` β VeraMerchantAgent init
|
| 186 |
+
- `agents/vera/merchant_agent.py:740` β `_prefetch_merchant_context` (the main load)
|
| 187 |
+
- `agents/vera/merchant_agent.py:899` β `_prefetch_product_context` (subscription + DA campaign + perf summary)
|
| 188 |
+
- `agents/vera/merchant_agent.py:996` β system-prompt builder reads `_merchant_snapshot`
|
| 189 |
+
- `agents/vera/merchant_agent.py:2171` β `handle_input` per-turn entry
|
| 190 |
+
- `agents/vera/customer_incoming_agent.py:91` β CustomerIncomingAgent class
|
| 191 |
+
- `agents/vera/customer_incoming_agent.py:221` β `_load_merchant_data` (single info-pack call + fallbacks)
|
| 192 |
+
- `agents/base_agent.py:80-82` β MERCHANT_MCP_SERVER_URL config
|
| 193 |
+
- `agents/base_agent.py:237-244` β class-level shared `_merchant_mcp_client`
|
| 194 |
+
- `vera-mcp/src/tools/merchant_snapshot.py` β `vera_merchant_snapshot` tool
|
| 195 |
+
- `vera-mcp/src/services/merchant_snapshot.py:51` β `build_merchant_snapshot` (the 13-call fan-out)
|
| 196 |
+
- `vera-mcp/src/tools/merchant_info.py:188` β `vera_get_customer_info_pack` tool
|
| 197 |
+
- `vera-mcp/src/tools/merchant_info.py:30` β `_resolve_place_id` chain (MongoDB β JSON β aryan)
|
| 198 |
+
- `vera-mcp/src/services/aryan_client.py` β aryan HTTP client (`get_merchant_v2`, `get_mapping`)
|
magicpin-ai-challenge/examples/api-call-examples.md
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API Call Examples β Judge β Candidate Bot
|
| 2 |
+
|
| 3 |
+
This file shows the exact HTTP calls the judge will make during testing, and what the bot is expected to return. Read this together with `challenge-testing-brief.md` (which defines the contract) and the dataset (which provides the payloads).
|
| 4 |
+
|
| 5 |
+
Every example uses Dr. Meera's Dental Clinic (`m_001_drmeera_dentist_delhi`) as the running merchant.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## Phase 1 β Warmup (T-15 min)
|
| 10 |
+
|
| 11 |
+
### Example 1.1 β `GET /v1/healthz`
|
| 12 |
+
|
| 13 |
+
**Request**
|
| 14 |
+
```http
|
| 15 |
+
GET /v1/healthz HTTP/1.1
|
| 16 |
+
Host: bot.candidate-team-alpha.example.com
|
| 17 |
+
Accept: application/json
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
**Expected response (200)**
|
| 21 |
+
```http
|
| 22 |
+
HTTP/1.1 200 OK
|
| 23 |
+
Content-Type: application/json
|
| 24 |
+
|
| 25 |
+
{
|
| 26 |
+
"status": "ok",
|
| 27 |
+
"uptime_seconds": 124,
|
| 28 |
+
"contexts_loaded": { "category": 0, "merchant": 0, "customer": 0, "trigger": 0 }
|
| 29 |
+
}
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
The judge calls this before pushing context. `contexts_loaded` should be all zeros at this point (bot just started).
|
| 33 |
+
|
| 34 |
+
### Example 1.2 β `GET /v1/metadata`
|
| 35 |
+
|
| 36 |
+
**Request**
|
| 37 |
+
```http
|
| 38 |
+
GET /v1/metadata HTTP/1.1
|
| 39 |
+
Host: bot.candidate-team-alpha.example.com
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
**Expected response (200)**
|
| 43 |
+
```json
|
| 44 |
+
{
|
| 45 |
+
"team_name": "Team Alpha",
|
| 46 |
+
"team_members": ["Alice", "Bob"],
|
| 47 |
+
"model": "claude-opus-4-7",
|
| 48 |
+
"approach": "single-prompt composer with retrieval over digest items + dispatch by trigger.kind",
|
| 49 |
+
"contact_email": "team@example.com",
|
| 50 |
+
"version": "1.2.0",
|
| 51 |
+
"submitted_at": "2026-04-26T08:00:00Z"
|
| 52 |
+
}
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
### Example 1.3 β `POST /v1/context` (push CategoryContext)
|
| 56 |
+
|
| 57 |
+
**Request**
|
| 58 |
+
```http
|
| 59 |
+
POST /v1/context HTTP/1.1
|
| 60 |
+
Host: bot.candidate-team-alpha.example.com
|
| 61 |
+
Content-Type: application/json
|
| 62 |
+
|
| 63 |
+
{
|
| 64 |
+
"scope": "category",
|
| 65 |
+
"context_id": "dentists",
|
| 66 |
+
"version": 1,
|
| 67 |
+
"delivered_at": "2026-04-26T09:45:00Z",
|
| 68 |
+
"payload": {
|
| 69 |
+
"slug": "dentists",
|
| 70 |
+
"voice": { "tone": "peer_clinical", "vocab_taboo": ["guaranteed", "100% safe"] },
|
| 71 |
+
"offer_catalog": [
|
| 72 |
+
{ "id": "den_001", "title": "Dental Cleaning @ βΉ299", "value": "299", "audience": "new_user", "type": "service_at_price" }
|
| 73 |
+
],
|
| 74 |
+
"peer_stats": { "avg_rating": 4.4, "avg_ctr": 0.030 },
|
| 75 |
+
"digest": [{ "id": "d_2026W17_jida_fluoride", "kind": "research", "title": "3-month fluoride recall cuts caries 38% better", "source": "JIDA Oct 2026, p.14" }],
|
| 76 |
+
"patient_content_library": [],
|
| 77 |
+
"seasonal_beats": [{ "month_range": "Nov-Feb", "note": "exam-stress bruxism spike" }],
|
| 78 |
+
"trend_signals": [{ "query": "clear aligners delhi", "delta_yoy": 0.62 }]
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
**Expected response (200)**
|
| 84 |
+
```json
|
| 85 |
+
{ "accepted": true, "ack_id": "ack_dentists_v1", "stored_at": "2026-04-26T09:45:00.123Z" }
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
> **Note**: For the actual test the full category JSON (`dataset/categories/dentists.json`) goes in `payload`, not the abbreviated form above.
|
| 89 |
+
|
| 90 |
+
### Example 1.4 β `POST /v1/context` (push MerchantContext)
|
| 91 |
+
|
| 92 |
+
**Request**
|
| 93 |
+
```http
|
| 94 |
+
POST /v1/context HTTP/1.1
|
| 95 |
+
Content-Type: application/json
|
| 96 |
+
|
| 97 |
+
{
|
| 98 |
+
"scope": "merchant",
|
| 99 |
+
"context_id": "m_001_drmeera_dentist_delhi",
|
| 100 |
+
"version": 1,
|
| 101 |
+
"delivered_at": "2026-04-26T09:45:30Z",
|
| 102 |
+
"payload": {
|
| 103 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 104 |
+
"category_slug": "dentists",
|
| 105 |
+
"identity": { "name": "Dr. Meera's Dental Clinic", "city": "Delhi", "locality": "Lajpat Nagar",
|
| 106 |
+
"verified": true, "languages": ["en", "hi"], "owner_first_name": "Meera" },
|
| 107 |
+
"subscription": { "status": "active", "plan": "Pro", "days_remaining": 82 },
|
| 108 |
+
"performance": { "window_days": 30, "views": 2410, "calls": 18, "directions": 45,
|
| 109 |
+
"ctr": 0.021, "delta_7d": { "views_pct": 0.18, "calls_pct": -0.05 } },
|
| 110 |
+
"offers": [{ "id": "o_meera_001", "title": "Dental Cleaning @ βΉ299", "status": "active" }],
|
| 111 |
+
"conversation_history": [],
|
| 112 |
+
"customer_aggregate": { "total_unique_ytd": 540, "lapsed_180d_plus": 78,
|
| 113 |
+
"retention_6mo_pct": 0.38, "high_risk_adult_count": 124 },
|
| 114 |
+
"signals": ["stale_posts:22d", "ctr_below_peer_median", "high_risk_adult_cohort"]
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
**Expected response (200)**
|
| 120 |
+
```json
|
| 121 |
+
{ "accepted": true, "ack_id": "ack_m_001_drmeera_v1", "stored_at": "2026-04-26T09:45:30.456Z" }
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
### Example 1.5 β `POST /v1/context` (idempotency check β same version re-pushed)
|
| 125 |
+
|
| 126 |
+
**Request** (same body as 1.4 β version 1 again)
|
| 127 |
+
|
| 128 |
+
**Expected response (409)**
|
| 129 |
+
```json
|
| 130 |
+
{ "accepted": false, "reason": "stale_version", "current_version": 1 }
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### Example 1.6 β `POST /v1/context` (version bump replaces)
|
| 134 |
+
|
| 135 |
+
**Request**: same as 1.4 but `version: 2` and `performance.views: 2580` (updated).
|
| 136 |
+
|
| 137 |
+
**Expected response (200)**
|
| 138 |
+
```json
|
| 139 |
+
{ "accepted": true, "ack_id": "ack_m_001_drmeera_v2", "stored_at": "2026-04-26T10:30:00.789Z" }
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
The bot must now use the new version when composing for `m_001_drmeera_dentist_delhi`.
|
| 143 |
+
|
| 144 |
+
### Example 1.7 β `GET /v1/healthz` after warmup complete
|
| 145 |
+
|
| 146 |
+
**Expected response (200)**
|
| 147 |
+
```json
|
| 148 |
+
{
|
| 149 |
+
"status": "ok",
|
| 150 |
+
"uptime_seconds": 1024,
|
| 151 |
+
"contexts_loaded": { "category": 5, "merchant": 50, "customer": 200, "trigger": 0 }
|
| 152 |
+
}
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
If counts don't match what the judge pushed, warmup fails and the bot is disqualified for that test slot.
|
| 156 |
+
|
| 157 |
+
---
|
| 158 |
+
|
| 159 |
+
## Phase 2 β Test window (T0 β T0 + 60 min)
|
| 160 |
+
|
| 161 |
+
### Example 2.1 β `POST /v1/context` (incremental trigger push)
|
| 162 |
+
|
| 163 |
+
The judge now starts pushing triggers as simulated time advances.
|
| 164 |
+
|
| 165 |
+
**Request**
|
| 166 |
+
```http
|
| 167 |
+
POST /v1/context HTTP/1.1
|
| 168 |
+
Content-Type: application/json
|
| 169 |
+
|
| 170 |
+
{
|
| 171 |
+
"scope": "trigger",
|
| 172 |
+
"context_id": "trg_001_research_digest_dentists",
|
| 173 |
+
"version": 1,
|
| 174 |
+
"delivered_at": "2026-04-26T10:32:00Z",
|
| 175 |
+
"payload": {
|
| 176 |
+
"id": "trg_001_research_digest_dentists",
|
| 177 |
+
"scope": "merchant",
|
| 178 |
+
"kind": "research_digest",
|
| 179 |
+
"source": "external",
|
| 180 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 181 |
+
"customer_id": null,
|
| 182 |
+
"payload": {
|
| 183 |
+
"category": "dentists",
|
| 184 |
+
"top_item_id": "d_2026W17_jida_fluoride"
|
| 185 |
+
},
|
| 186 |
+
"urgency": 2,
|
| 187 |
+
"suppression_key": "research:dentists:2026-W17",
|
| 188 |
+
"expires_at": "2026-05-03T00:00:00Z"
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
**Expected response (200)**
|
| 194 |
+
```json
|
| 195 |
+
{ "accepted": true, "ack_id": "ack_trg_001_v1", "stored_at": "2026-04-26T10:32:00.150Z" }
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
### Example 2.2 β `POST /v1/tick` (bot decides to send)
|
| 199 |
+
|
| 200 |
+
**Request**
|
| 201 |
+
```http
|
| 202 |
+
POST /v1/tick HTTP/1.1
|
| 203 |
+
Content-Type: application/json
|
| 204 |
+
|
| 205 |
+
{
|
| 206 |
+
"now": "2026-04-26T10:35:00Z",
|
| 207 |
+
"available_triggers": ["trg_001_research_digest_dentists"]
|
| 208 |
+
}
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
**Expected response (200) β bot chose to send**
|
| 212 |
+
```json
|
| 213 |
+
{
|
| 214 |
+
"actions": [
|
| 215 |
+
{
|
| 216 |
+
"conversation_id": "conv_m_001_drmeera_research_W17",
|
| 217 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 218 |
+
"customer_id": null,
|
| 219 |
+
"send_as": "vera",
|
| 220 |
+
"trigger_id": "trg_001_research_digest_dentists",
|
| 221 |
+
"template_name": "vera_research_digest_v1",
|
| 222 |
+
"template_params": [
|
| 223 |
+
"Dr. Meera",
|
| 224 |
+
"JIDA Oct issue landed. One item relevant to your high-risk adult patients β 2,100-patient trial showed 3-month fluoride recall cuts caries recurrence 38% better than 6-month",
|
| 225 |
+
"Worth a look (2-min abstract). Want me to pull it + draft a patient-ed WhatsApp you can share?"
|
| 226 |
+
],
|
| 227 |
+
"body": "Dr. Meera, JIDA's Oct issue landed. One item relevant to your high-risk adult patients β 2,100-patient trial showed 3-month fluoride recall cuts caries recurrence 38% better than 6-month. Worth a look (2-min abstract). Want me to pull it + draft a patient-ed WhatsApp you can share? β JIDA Oct 2026 p.14",
|
| 228 |
+
"cta": "open_ended",
|
| 229 |
+
"suppression_key": "research:dentists:2026-W17",
|
| 230 |
+
"rationale": "External research digest with merchant-relevant clinical anchor (high-risk-adult cohort matches signal). Source citation at end maintains credibility. Open-ended CTA invites continuation without forcing a binary choice."
|
| 231 |
+
}
|
| 232 |
+
]
|
| 233 |
+
}
|
| 234 |
+
```
|
| 235 |
+
|
| 236 |
+
### Example 2.3 β `POST /v1/tick` (bot decides NOT to send)
|
| 237 |
+
|
| 238 |
+
If no triggers are worth acting on this tick (or all have been suppressed):
|
| 239 |
+
|
| 240 |
+
**Expected response (200)**
|
| 241 |
+
```json
|
| 242 |
+
{ "actions": [] }
|
| 243 |
+
```
|
| 244 |
+
|
| 245 |
+
Empty `actions` is valid. Restraint is rewarded; spam is penalized.
|
| 246 |
+
|
| 247 |
+
### Example 2.4 β `POST /v1/reply` (judge plays merchant β engaged response)
|
| 248 |
+
|
| 249 |
+
The judge's sub-LLM, playing Dr. Meera, replies to the message from 2.2:
|
| 250 |
+
|
| 251 |
+
**Request**
|
| 252 |
+
```http
|
| 253 |
+
POST /v1/reply HTTP/1.1
|
| 254 |
+
Content-Type: application/json
|
| 255 |
+
|
| 256 |
+
{
|
| 257 |
+
"conversation_id": "conv_m_001_drmeera_research_W17",
|
| 258 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 259 |
+
"customer_id": null,
|
| 260 |
+
"from_role": "merchant",
|
| 261 |
+
"message": "Yes please send the abstract. Also draft the patient WhatsApp.",
|
| 262 |
+
"received_at": "2026-04-26T10:42:00Z",
|
| 263 |
+
"turn_number": 2
|
| 264 |
+
}
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
**Expected response (200) β bot follows up with action**
|
| 268 |
+
```json
|
| 269 |
+
{
|
| 270 |
+
"action": "send",
|
| 271 |
+
"body": "Sending the abstract now (PDF, 2 pages). Patient-ed draft below β you can copy-paste or I'll schedule a Google post:\n\n\"3-month vs 6-month dental cleaning β does it really matter? New research shows yes, especially if you've had cavities recently. Drop us a note for a quick check.\"\n\nWant me to schedule the post for tomorrow 10am?",
|
| 272 |
+
"cta": "binary_yes_no",
|
| 273 |
+
"rationale": "Honoring both asks (abstract + draft) in one turn. Draft is at patient-reading level. Final question is a binary yes/no to lower friction."
|
| 274 |
+
}
|
| 275 |
+
```
|
| 276 |
+
|
| 277 |
+
### Example 2.5 β `POST /v1/reply` (judge plays merchant β auto-reply)
|
| 278 |
+
|
| 279 |
+
Some test scenarios simulate WhatsApp Business auto-reply:
|
| 280 |
+
|
| 281 |
+
**Request**
|
| 282 |
+
```json
|
| 283 |
+
{
|
| 284 |
+
"conversation_id": "conv_m_001_drmeera_research_W17",
|
| 285 |
+
"from_role": "merchant",
|
| 286 |
+
"message": "Thank you for contacting Dr. Meera's Dental Clinic! Our team will respond shortly.",
|
| 287 |
+
"received_at": "2026-04-26T10:42:00Z",
|
| 288 |
+
"turn_number": 2
|
| 289 |
+
}
|
| 290 |
+
```
|
| 291 |
+
|
| 292 |
+
**Expected response (200) β bot detects auto-reply, exits gracefully**
|
| 293 |
+
```json
|
| 294 |
+
{
|
| 295 |
+
"action": "wait",
|
| 296 |
+
"wait_seconds": 14400,
|
| 297 |
+
"rationale": "Detected merchant auto-reply (canned 'Thank you for contacting' phrasing). Backing off 4 hours to wait for owner."
|
| 298 |
+
}
|
| 299 |
+
```
|
| 300 |
+
|
| 301 |
+
After the wait expires, the judge will call `/v1/tick` again and the bot can re-engage.
|
| 302 |
+
|
| 303 |
+
### Example 2.6 β `POST /v1/reply` (judge plays merchant β hard "no")
|
| 304 |
+
|
| 305 |
+
**Request**
|
| 306 |
+
```json
|
| 307 |
+
{
|
| 308 |
+
"conversation_id": "conv_m_001_drmeera_research_W17",
|
| 309 |
+
"from_role": "merchant",
|
| 310 |
+
"message": "Not interested. Stop messaging me.",
|
| 311 |
+
"received_at": "2026-04-26T10:42:00Z",
|
| 312 |
+
"turn_number": 2
|
| 313 |
+
}
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
**Expected response (200) β bot ends gracefully**
|
| 317 |
+
```json
|
| 318 |
+
{
|
| 319 |
+
"action": "end",
|
| 320 |
+
"rationale": "Merchant explicitly opted out. Closing conversation; suppressing this conversation_id for future ticks."
|
| 321 |
+
}
|
| 322 |
+
```
|
| 323 |
+
|
| 324 |
+
The bot must not send any further messages on this `conversation_id`.
|
| 325 |
+
|
| 326 |
+
### Example 2.7 β `POST /v1/reply` (judge plays merchant β curveball)
|
| 327 |
+
|
| 328 |
+
**Request**
|
| 329 |
+
```json
|
| 330 |
+
{
|
| 331 |
+
"conversation_id": "conv_m_001_drmeera_research_W17",
|
| 332 |
+
"from_role": "merchant",
|
| 333 |
+
"message": "Btw can you also help me with my GST filing this month?",
|
| 334 |
+
"received_at": "2026-04-26T10:42:00Z",
|
| 335 |
+
"turn_number": 2
|
| 336 |
+
}
|
| 337 |
+
```
|
| 338 |
+
|
| 339 |
+
**Expected response (200) β bot stays on mission politely**
|
| 340 |
+
```json
|
| 341 |
+
{
|
| 342 |
+
"action": "send",
|
| 343 |
+
"body": "I'll have to leave GST filing to your CA β that's outside what I can help with directly. Coming back to the JIDA piece β want me to draft the patient post first, or send the abstract?",
|
| 344 |
+
"cta": "open_ended",
|
| 345 |
+
"rationale": "Out-of-scope ask politely declined; redirects back to the original trigger without losing thread."
|
| 346 |
+
}
|
| 347 |
+
```
|
| 348 |
+
|
| 349 |
+
### Example 2.8 β `POST /v1/context` (mid-test adaptive injection)
|
| 350 |
+
|
| 351 |
+
While the conversation is live, the judge pushes a new digest item. A good bot will incorporate it in subsequent sends.
|
| 352 |
+
|
| 353 |
+
**Request**
|
| 354 |
+
```json
|
| 355 |
+
{
|
| 356 |
+
"scope": "category",
|
| 357 |
+
"context_id": "dentists",
|
| 358 |
+
"version": 2,
|
| 359 |
+
"delivered_at": "2026-04-26T10:50:00Z",
|
| 360 |
+
"payload": {
|
| 361 |
+
"slug": "dentists",
|
| 362 |
+
"voice": { "tone": "peer_clinical" },
|
| 363 |
+
"digest": [
|
| 364 |
+
{ "id": "d_2026W17_jida_fluoride", "kind": "research", "title": "3-month fluoride recall cuts caries 38% better", "source": "JIDA Oct 2026, p.14" },
|
| 365 |
+
{ "id": "d_2026W17_dci_radiograph_NEW", "kind": "compliance", "title": "DCI revised radiograph dose limits effective 2026-12-15",
|
| 366 |
+
"source": "DCI circular 2026-11-04", "summary": "Max dose drops 1.5β1.0 mSv per IOPA. E-speed film passes; D-speed does not." }
|
| 367 |
+
],
|
| 368 |
+
"// other fields": "..."
|
| 369 |
+
}
|
| 370 |
+
}
|
| 371 |
+
```
|
| 372 |
+
|
| 373 |
+
**Expected response (200)**
|
| 374 |
+
```json
|
| 375 |
+
{ "accepted": true, "ack_id": "ack_dentists_v2", "stored_at": "2026-04-26T10:50:00.110Z" }
|
| 376 |
+
```
|
| 377 |
+
|
| 378 |
+
The bot must replace the old version atomically and use the new digest item if relevant in the next send.
|
| 379 |
+
|
| 380 |
+
### Example 2.9 β `POST /v1/tick` (customer-scoped trigger emerges)
|
| 381 |
+
|
| 382 |
+
A `recall_due` trigger fires for one of Dr. Meera's patients:
|
| 383 |
+
|
| 384 |
+
**Context push first**
|
| 385 |
+
```json
|
| 386 |
+
{
|
| 387 |
+
"scope": "customer",
|
| 388 |
+
"context_id": "c_001_priya_for_m001",
|
| 389 |
+
"version": 1,
|
| 390 |
+
"payload": { /* Priya's CustomerContext from dataset/customers_seed.json */ }
|
| 391 |
+
}
|
| 392 |
+
```
|
| 393 |
+
|
| 394 |
+
```json
|
| 395 |
+
{
|
| 396 |
+
"scope": "trigger",
|
| 397 |
+
"context_id": "trg_003_recall_due_priya",
|
| 398 |
+
"version": 1,
|
| 399 |
+
"payload": { /* the recall trigger from dataset/triggers_seed.json */ }
|
| 400 |
+
}
|
| 401 |
+
```
|
| 402 |
+
|
| 403 |
+
**Then `/v1/tick`**
|
| 404 |
+
```json
|
| 405 |
+
{
|
| 406 |
+
"now": "2026-04-26T11:00:00Z",
|
| 407 |
+
"available_triggers": ["trg_003_recall_due_priya"]
|
| 408 |
+
}
|
| 409 |
+
```
|
| 410 |
+
|
| 411 |
+
**Expected response (200)**
|
| 412 |
+
```json
|
| 413 |
+
{
|
| 414 |
+
"actions": [
|
| 415 |
+
{
|
| 416 |
+
"conversation_id": "conv_priya_recall_2026_11",
|
| 417 |
+
"merchant_id": "m_001_drmeera_dentist_delhi",
|
| 418 |
+
"customer_id": "c_001_priya_for_m001",
|
| 419 |
+
"send_as": "merchant_on_behalf",
|
| 420 |
+
"trigger_id": "trg_003_recall_due_priya",
|
| 421 |
+
"template_name": "merchant_recall_reminder_v1",
|
| 422 |
+
"template_params": [
|
| 423 |
+
"Priya",
|
| 424 |
+
"Dr. Meera's clinic",
|
| 425 |
+
"It's been 5 months since your last visit",
|
| 426 |
+
"Wed 5 Nov, 6pm or Thu 6 Nov, 5pm",
|
| 427 |
+
"βΉ299 cleaning + complimentary fluoride"
|
| 428 |
+
],
|
| 429 |
+
"body": "Hi Priya, Dr. Meera's clinic here π¦· It's been 5 months since your last visit β your 6-month cleaning recall is due. Apke liye 2 slots ready hain: **Wed 5 Nov, 6pm** ya **Thu 6 Nov, 5pm**. βΉ299 cleaning + complimentary fluoride. Reply 1 for Wed, 2 for Thu, or tell us a time that works.",
|
| 430 |
+
"cta": "multi_choice_slot",
|
| 431 |
+
"suppression_key": "recall:c_001_priya_for_m001:6mo",
|
| 432 |
+
"rationale": "Customer-scoped recall, sending via merchant's number (send_as=merchant_on_behalf). Honoring Priya's hi-en mix language pref + weekday-evening preference (both slots offered are weekday evenings). Multi-choice slot CTA is appropriate for booking flows."
|
| 433 |
+
}
|
| 434 |
+
]
|
| 435 |
+
}
|
| 436 |
+
```
|
| 437 |
+
|
| 438 |
+
---
|
| 439 |
+
|
| 440 |
+
## Phase 4 β Replay test (top 10 only)
|
| 441 |
+
|
| 442 |
+
The judge runs 3 standalone scenarios. Each is a fresh conversation with a controlled merchant persona.
|
| 443 |
+
|
| 444 |
+
### Example 4.1 β Auto-reply hell scenario
|
| 445 |
+
|
| 446 |
+
The judge sends 4 turns of identical canned auto-replies.
|
| 447 |
+
|
| 448 |
+
**Turn 1 β bot initiates**
|
| 449 |
+
```json
|
| 450 |
+
POST /v1/tick { "now": "...", "available_triggers": ["trg_022_cde_webinar_dentists"] }
|
| 451 |
+
β { "actions": [{ /* CDE webinar invite */ }] }
|
| 452 |
+
```
|
| 453 |
+
|
| 454 |
+
**Turn 2 β judge plays auto-reply**
|
| 455 |
+
```json
|
| 456 |
+
POST /v1/reply { "from_role": "merchant",
|
| 457 |
+
"message": "Thank you for contacting Dr. Meera's Dental Clinic! Our team will respond shortly.",
|
| 458 |
+
"turn_number": 2 }
|
| 459 |
+
```
|
| 460 |
+
|
| 461 |
+
**Good bot response**
|
| 462 |
+
```json
|
| 463 |
+
{ "action": "send",
|
| 464 |
+
"body": "Looks like an auto-reply π When the owner sees this, just reply 'Yes' for the webinar invite.",
|
| 465 |
+
"cta": "binary_yes_no",
|
| 466 |
+
"rationale": "Detected auto-reply; one explicit prompt to flag it for the owner." }
|
| 467 |
+
```
|
| 468 |
+
|
| 469 |
+
**Turn 3 β judge plays same auto-reply again**
|
| 470 |
+
```json
|
| 471 |
+
POST /v1/reply { "from_role": "merchant",
|
| 472 |
+
"message": "Thank you for contacting Dr. Meera's Dental Clinic! Our team will respond shortly.",
|
| 473 |
+
"turn_number": 3 }
|
| 474 |
+
```
|
| 475 |
+
|
| 476 |
+
**Good bot response**
|
| 477 |
+
```json
|
| 478 |
+
{ "action": "wait", "wait_seconds": 86400,
|
| 479 |
+
"rationale": "Same auto-reply twice in a row β owner not at phone. Wait 24h before retry." }
|
| 480 |
+
```
|
| 481 |
+
|
| 482 |
+
**Turn 4 β judge plays same auto-reply yet again**
|
| 483 |
+
|
| 484 |
+
**Good bot response**
|
| 485 |
+
```json
|
| 486 |
+
{ "action": "end",
|
| 487 |
+
"rationale": "Auto-reply 3x in a row, no real reply. Conversation has zero engagement signal; closing." }
|
| 488 |
+
```
|
| 489 |
+
|
| 490 |
+
### Example 4.2 β Intent transition scenario
|
| 491 |
+
|
| 492 |
+
After 2 turns of qualification, judge plays an explicit "let's do it":
|
| 493 |
+
|
| 494 |
+
**Turn 3 β judge**
|
| 495 |
+
```json
|
| 496 |
+
POST /v1/reply { "from_role": "merchant",
|
| 497 |
+
"message": "Ok, let's do it. What's next?",
|
| 498 |
+
"turn_number": 3 }
|
| 499 |
+
```
|
| 500 |
+
|
| 501 |
+
**Good bot response β switches from qualifying to action**
|
| 502 |
+
```json
|
| 503 |
+
{ "action": "send",
|
| 504 |
+
"body": "Great. Drafting your patient WhatsApp now β 90 seconds. I'll also pre-fill the GBP post for tomorrow 10am. Reply CONFIRM to send the WhatsApp draft to your patient list (40 high-risk adult patients).",
|
| 505 |
+
"cta": "binary_confirm_cancel",
|
| 506 |
+
"rationale": "Merchant explicitly committed; switching from question-asking to action-execution. Concrete next step + measurable scope (40 patients)." }
|
| 507 |
+
```
|
| 508 |
+
|
| 509 |
+
**Bad bot response β keeps qualifying (this loses points)**
|
| 510 |
+
```json
|
| 511 |
+
{ "action": "send",
|
| 512 |
+
"body": "Excellent! Just to plan well β would you say most of your high-risk patients are diabetic, or is there another common factor?",
|
| 513 |
+
"rationale": "Continuing qualification..." // β penalty: ignored explicit intent transition
|
| 514 |
+
}
|
| 515 |
+
```
|
| 516 |
+
|
| 517 |
+
### Example 4.3 β Hostile / off-topic scenario
|
| 518 |
+
|
| 519 |
+
**Turn 2 β judge plays hostile**
|
| 520 |
+
```json
|
| 521 |
+
POST /v1/reply { "from_role": "merchant",
|
| 522 |
+
"message": "Why are you bothering me. This is useless. Stop sending these.",
|
| 523 |
+
"turn_number": 2 }
|
| 524 |
+
```
|
| 525 |
+
|
| 526 |
+
**Good bot response β graceful exit**
|
| 527 |
+
```json
|
| 528 |
+
{ "action": "end",
|
| 529 |
+
"rationale": "Merchant frustration explicit; closing without further engagement. Suppressing all triggers for this merchant for 30 days." }
|
| 530 |
+
```
|
| 531 |
+
|
| 532 |
+
**Acceptable alternative β short apology + exit**
|
| 533 |
+
```json
|
| 534 |
+
{ "action": "send",
|
| 535 |
+
"body": "Apologies β I won't message again. If anything changes, you can always restart with 'Hi Vera'. π",
|
| 536 |
+
"cta": "none",
|
| 537 |
+
"rationale": "One-line acknowledgment + opt-out path; conversation will close after this send." }
|
| 538 |
+
```
|
| 539 |
+
|
| 540 |
+
---
|
| 541 |
+
|
| 542 |
+
## Failure-mode examples
|
| 543 |
+
|
| 544 |
+
### Example F.1 β Bot times out
|
| 545 |
+
|
| 546 |
+
If `/v1/tick` doesn't respond within 30s, the judge logs a timeout and continues. No retries.
|
| 547 |
+
|
| 548 |
+
### Example F.2 β Malformed response
|
| 549 |
+
|
| 550 |
+
```json
|
| 551 |
+
{ "actions": [{ "merchant_id": "m_001", "body": "..." }] }
|
| 552 |
+
```
|
| 553 |
+
|
| 554 |
+
Missing required fields (`conversation_id`, `send_as`, `trigger_id`, `cta`, `suppression_key`, `rationale`) β action scored as 0, -2 penalty.
|
| 555 |
+
|
| 556 |
+
### Example F.3 β Body too long
|
| 557 |
+
|
| 558 |
+
```json
|
| 559 |
+
{ "body": "...500 chars..." }
|
| 560 |
+
```
|
| 561 |
+
|
| 562 |
+
No hard body-length cap. Messages are judged on quality, specificity, and relevance.
|
| 563 |
+
|
| 564 |
+
### Example F.4 β URL in body
|
| 565 |
+
|
| 566 |
+
```json
|
| 567 |
+
{ "body": "Read more: https://magicpin.com/blog" }
|
| 568 |
+
```
|
| 569 |
+
|
| 570 |
+
Hard fail for that action β Meta would reject. Penalty: -3 per URL.
|
| 571 |
+
|
| 572 |
+
### Example F.5 β Repetition
|
| 573 |
+
|
| 574 |
+
Same `body` text sent twice in the same `conversation_id` β -2 anti-repetition penalty per repeat.
|
| 575 |
+
|
| 576 |
+
---
|
| 577 |
+
|
| 578 |
+
## Curl examples (for local testing)
|
| 579 |
+
|
| 580 |
+
```bash
|
| 581 |
+
# Set your bot URL
|
| 582 |
+
export BOT_URL=http://localhost:8080
|
| 583 |
+
|
| 584 |
+
# Healthz
|
| 585 |
+
curl $BOT_URL/v1/healthz
|
| 586 |
+
|
| 587 |
+
# Push a category context
|
| 588 |
+
curl -X POST -H "Content-Type: application/json" \
|
| 589 |
+
-d @dataset/categories/dentists.json \
|
| 590 |
+
$BOT_URL/v1/context
|
| 591 |
+
|
| 592 |
+
# Trigger a tick
|
| 593 |
+
curl -X POST -H "Content-Type: application/json" \
|
| 594 |
+
-d '{"now": "2026-04-26T10:35:00Z", "available_triggers": ["trg_001_research_digest_dentists"]}' \
|
| 595 |
+
$BOT_URL/v1/tick
|
| 596 |
+
|
| 597 |
+
# Send a reply
|
| 598 |
+
curl -X POST -H "Content-Type: application/json" \
|
| 599 |
+
-d '{"conversation_id": "conv_001", "merchant_id": "m_001_drmeera_dentist_delhi", "from_role": "merchant", "message": "Yes please send the abstract", "received_at": "2026-04-26T10:42:00Z", "turn_number": 2}' \
|
| 600 |
+
$BOT_URL/v1/reply
|
| 601 |
+
```
|
| 602 |
+
|
| 603 |
+
---
|
| 604 |
+
|
| 605 |
+
## Summary table β request shapes at a glance
|
| 606 |
+
|
| 607 |
+
| Endpoint | Method | Body | Latency budget | Retried? |
|
| 608 |
+
|---|---|---|---|---|
|
| 609 |
+
| `/v1/healthz` | GET | none | 2 s | yes (Γ3) |
|
| 610 |
+
| `/v1/metadata` | GET | none | 2 s | no |
|
| 611 |
+
| `/v1/context` | POST | full payload | 5 s | no |
|
| 612 |
+
| `/v1/tick` | POST | `{now, available_triggers}` | 10 s | no |
|
| 613 |
+
| `/v1/reply` | POST | reply turn | 10 s | no |
|
| 614 |
+
|
| 615 |
+
That's the full surface. If your bot handles every example here correctly, it'll pass the warmup, the test window, and the replay scenarios with no operational issues β leaving the score entirely to the quality of your composition.
|
magicpin-ai-challenge/examples/case-studies.md
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 10 Case Studies β What "Good" Looks Like
|
| 2 |
+
|
| 3 |
+
These case studies serve as anchor examples for the AI judge and as targets for candidates. Each shows:
|
| 4 |
+
- The 4-context input
|
| 5 |
+
- The good composed message
|
| 6 |
+
- The compulsion levers used
|
| 7 |
+
- The dimension-by-dimension score (out of 50)
|
| 8 |
+
|
| 9 |
+
Two cases per category Γ 5 categories = 10 case studies. Mix of merchant-facing and customer-facing scopes.
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
## Case Study 1 β Dentists / Research Digest (merchant-facing)
|
| 14 |
+
|
| 15 |
+
**Category**: `dentists`
|
| 16 |
+
**Merchant**: Dr. Meera's Dental Clinic, Lajpat Nagar Delhi, CTR 2.1% (below peer 3.0%), 124 high-risk adult patients in roster
|
| 17 |
+
**Trigger**: `research_digest` β JIDA Oct 2026 paper: 3-month fluoride recall vs 6-month, 38% caries reduction in high-risk adults
|
| 18 |
+
**Customer**: none
|
| 19 |
+
|
| 20 |
+
**Composed message** (sent as Vera)
|
| 21 |
+
```
|
| 22 |
+
Dr. Meera, JIDA's Oct issue landed. One item relevant to your high-risk adult
|
| 23 |
+
patients β 2,100-patient trial showed 3-month fluoride recall cuts caries
|
| 24 |
+
recurrence 38% better than 6-month. Worth a look (2-min abstract). Want me
|
| 25 |
+
to pull it + draft a patient-ed WhatsApp you can share? β JIDA Oct 2026 p.14
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
**Compulsion levers**: source citation (credibility), merchant-specific anchor (her high-risk-adult cohort), reciprocity ("I'll pull it for you"), low-friction CTA, **specificity** (2,100 / 38% / page number).
|
| 29 |
+
|
| 30 |
+
**Score breakdown** (50/50 = perfect)
|
| 31 |
+
| Dimension | Score | Why |
|
| 32 |
+
|---|---|---|
|
| 33 |
+
| Specificity | 10 | Numbers + source + page reference |
|
| 34 |
+
| Category fit | 10 | Clinical vocabulary, peer tone, no overclaim |
|
| 35 |
+
| Merchant fit | 10 | "your high-risk adult patients" derived from her customer_aggregate |
|
| 36 |
+
| Trigger relevance | 10 | Explicitly references the digest as the reason for messaging |
|
| 37 |
+
| Engagement compulsion | 10 | Curiosity + reciprocity + clear next step |
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## Case Study 2 β Dentists / Recall Reminder (customer-facing)
|
| 42 |
+
|
| 43 |
+
**Category**: `dentists` (customer-facing voice rules: no medical claims)
|
| 44 |
+
**Merchant**: Dr. Meera's Dental Clinic β active offer "Dental Cleaning @ βΉ299", available slots Wed 5 Nov 6pm + Thu 6 Nov 5pm
|
| 45 |
+
**Trigger**: `recall_due` β Priya's 6-month cleaning recall window opened
|
| 46 |
+
**Customer**: Priya β lapsed_soft (5mo since last visit), prefers weekday evenings, hi-en mix
|
| 47 |
+
|
| 48 |
+
**Composed message** (sent as merchant_on_behalf, from Dr. Meera's WA number)
|
| 49 |
+
```
|
| 50 |
+
Hi Priya, Dr. Meera's clinic here π¦· It's been 5 months since your last visit
|
| 51 |
+
β your 6-month cleaning recall is due. Apke liye 2 slots ready hain: Wed 5
|
| 52 |
+
Nov, 6pm ya Thu 6 Nov, 5pm. βΉ299 cleaning + complimentary fluoride. Reply 1
|
| 53 |
+
for Wed, 2 for Thu, or tell us a time that works.
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
**Compulsion levers**: name personalization, language-pref match (hi-en mix), specific dates + price + free-add ("complimentary fluoride"), low-friction multi-choice CTA, dental emoji for warmth.
|
| 57 |
+
|
| 58 |
+
**Score breakdown** (49/50)
|
| 59 |
+
| Dimension | Score | Why |
|
| 60 |
+
|---|---|---|
|
| 61 |
+
| Specificity | 10 | Real slots + real price + concrete recall window |
|
| 62 |
+
| Category fit | 10 | No medical claims; warm-clinical tone |
|
| 63 |
+
| Merchant fit | 10 | Real offer from catalog; merchant's actual slots |
|
| 64 |
+
| Trigger relevance | 10 | Explicit 6-month recall framing |
|
| 65 |
+
| Engagement compulsion | 9 | Strong but Reply 1/2 multi-choice is borderline (binary preferred); the "or tell us a time that works" rescues it |
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## Case Study 3 β Salons / Active Planning (merchant-facing)
|
| 70 |
+
|
| 71 |
+
**Category**: `salons`
|
| 72 |
+
**Merchant**: Studio11 Family Salon, Kapra Hyderabad, calls +20% w/w, retention 62%, peer-leading
|
| 73 |
+
**Trigger**: `active_planning_intent` β kids yoga summer camp planning conversation
|
| 74 |
+
> *Wait β this is the gym category. Let me redo with a salon trigger.*
|
| 75 |
+
|
| 76 |
+
**Trigger (corrected)**: `bridal_followup` β Kavya, bride-to-be (wedding 2026-11-08), did Bridal Trial 5 weeks ago, in skin-prep window now
|
| 77 |
+
**Customer**: Kavya β new state, wedding date 2026-11-08, opted in for bridal followup
|
| 78 |
+
|
| 79 |
+
> Note: this is also customer-facing. Treating this as a customer-facing case for variety.
|
| 80 |
+
|
| 81 |
+
**Composed message** (sent as merchant_on_behalf)
|
| 82 |
+
```
|
| 83 |
+
Hi Kavya π Lakshmi from Studio11 Kapra here. 196 days to your wedding β perfect
|
| 84 |
+
window to start the 30-day skin-prep program before serious bridal bookings
|
| 85 |
+
roll in. βΉ2,499 covers 4 sessions + a take-home kit. Want me to block your
|
| 86 |
+
preferred Saturday 4pm slot for the first session next week?
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
**Compulsion levers**: relationship continuity ("from your trial"), wedding-date specificity, urgency framing (window opens), preference honored (Saturday 4pm), single binary commit.
|
| 90 |
+
|
| 91 |
+
**Score breakdown** (47/50)
|
| 92 |
+
| Dimension | Score | Why |
|
| 93 |
+
|---|---|---|
|
| 94 |
+
| Specificity | 10 | Days-to-wedding count, program structure, price |
|
| 95 |
+
| Category fit | 9 | Warm-practical tone; emojis used appropriately |
|
| 96 |
+
| Merchant fit | 9 | References Lakshmi (owner first name) + Studio11 + her trial |
|
| 97 |
+
| Trigger relevance | 10 | Explicitly the bridal followup window |
|
| 98 |
+
| Engagement compulsion | 9 | Strong but assumes the program exists in catalog (verify in MerchantContext.offers) |
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
## Case Study 4 β Salons / Curious Ask (merchant-facing)
|
| 103 |
+
|
| 104 |
+
**Category**: `salons`
|
| 105 |
+
**Merchant**: Studio11 Family Salon, growing strongly, last Vera touch 3 days ago
|
| 106 |
+
**Trigger**: `curious_ask_due` β weekly "what's in demand?" cadence
|
| 107 |
+
**Customer**: none
|
| 108 |
+
|
| 109 |
+
**Composed message** (sent as Vera)
|
| 110 |
+
```
|
| 111 |
+
Hi Lakshmi! Quick check β what service has been most asked-for this week
|
| 112 |
+
at Studio11? I'll turn the answer into a Google post + a 4-line WhatsApp
|
| 113 |
+
reply you can use when customers ask about pricing. Takes 5 min.
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**Compulsion levers**: low-stakes question (no commitment), reciprocity offered up-front (Google post + reply draft), effort externalization (5 min), respects merchant time.
|
| 117 |
+
|
| 118 |
+
**Score breakdown** (44/50)
|
| 119 |
+
| Dimension | Score | Why |
|
| 120 |
+
|---|---|---|
|
| 121 |
+
| Specificity | 8 | "5 min" anchor; could be sharper with a guess ("Is it the keratin treatment?") |
|
| 122 |
+
| Category fit | 9 | Warm tone, fellow-operator register |
|
| 123 |
+
| Merchant fit | 8 | Uses owner first name; could reference recent service mix |
|
| 124 |
+
| Trigger relevance | 10 | This IS the curious-ask family β exactly the right shape |
|
| 125 |
+
| Engagement compulsion | 9 | Asking-the-merchant lever is the strongest Cialdini hook for engaged merchants |
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## Case Study 5 β Restaurants / IPL Match Day (merchant-facing)
|
| 130 |
+
|
| 131 |
+
**Category**: `restaurants`
|
| 132 |
+
**Merchant**: SK Pizza Junction, Sant Nagar Delhi, trial-tier subscriber, BOGO pizza Tue-Thu active
|
| 133 |
+
**Trigger**: `ipl_match_today` β DC vs MI at Arun Jaitley Stadium, 7:30pm match today (Sat β *not* a weeknight)
|
| 134 |
+
**Customer**: none
|
| 135 |
+
|
| 136 |
+
**Composed message** (sent as Vera)
|
| 137 |
+
```
|
| 138 |
+
Quick heads-up Suresh β DC vs MI at Arun Jaitley tonight, 7:30pm. Important:
|
| 139 |
+
Saturday IPL matches usually shift -12% restaurant covers (people watch at
|
| 140 |
+
home). Skip the match-night promo today; instead push your BOGO pizza
|
| 141 |
+
(already active) as a delivery-only Saturday special. Want me to draft the
|
| 142 |
+
Swiggy banner + an Insta story? Live in 10 min.
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
**Compulsion levers**: counter-intuitive specific data ("Saturday IPL = -12% covers"), saves merchant from a bad decision, leverages existing offer, concrete deliverables (Swiggy banner + Insta story), 10-min commitment cap.
|
| 146 |
+
|
| 147 |
+
**Score breakdown** (50/50)
|
| 148 |
+
| Dimension | Score | Why |
|
| 149 |
+
|---|---|---|
|
| 150 |
+
| Specificity | 10 | Match details, percentage delta, time-bound |
|
| 151 |
+
| Category fit | 10 | Operator-to-operator voice ("covers", "match-night") |
|
| 152 |
+
| Merchant fit | 10 | Existing BOGO referenced; Swiggy/Insta channels appropriate |
|
| 153 |
+
| Trigger relevance | 10 | Direct response to IPL trigger with informed nuance |
|
| 154 |
+
| Engagement compulsion | 10 | Loss aversion (-12%) + existing-offer leverage + 10-min effort cap |
|
| 155 |
+
|
| 156 |
+
This case is illustrative because it shows the bot **adding value beyond the trigger itself** β the trigger said "IPL match today"; the bot interpreted it correctly and gave a contrarian recommendation.
|
| 157 |
+
|
| 158 |
+
---
|
| 159 |
+
|
| 160 |
+
## Case Study 6 β Restaurants / Active Planning Intent (merchant-facing)
|
| 161 |
+
|
| 162 |
+
**Category**: `restaurants`
|
| 163 |
+
**Merchant**: Mylari South Indian Cafe, Indiranagar Bangalore β high-volume, weekday lunch thali doing 18 orders/day
|
| 164 |
+
**Trigger**: `active_planning_intent` β merchant asked about a corporate-bulk thali package; explicitly said "yes good idea, what would it look like"
|
| 165 |
+
**Customer**: none
|
| 166 |
+
|
| 167 |
+
**Composed message** (sent as Vera, in direct conversation)
|
| 168 |
+
```
|
| 169 |
+
Suresh, here's a starter version β you can edit:
|
| 170 |
+
|
| 171 |
+
Mylari Corporate Thali β for offices in Indiranagar
|
| 172 |
+
- 10 thalis @ βΉ125 each (βΉ25 off retail) + free delivery
|
| 173 |
+
- 25 thalis @ βΉ115 each + 2 free filter coffees
|
| 174 |
+
- 50+: βΉ105 each + 1 free dosa platter
|
| 175 |
+
- WhatsApp the day-before by 5pm; we deliver between 12:30-1pm
|
| 176 |
+
|
| 177 |
+
3 offices in Indiranagar are in your delivery radius (Embassy Tech, RMZ Eco,
|
| 178 |
+
Sigma Soft). Want me to draft a 3-line WhatsApp to send their facilities
|
| 179 |
+
managers?
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
**Compulsion levers**: complete drafted artifact (no extra work for merchant), tiered pricing matches B2B logic, concrete radius + named buildings, follow-on offer to handle outreach.
|
| 183 |
+
|
| 184 |
+
**Score breakdown** (49/50)
|
| 185 |
+
| Dimension | Score | Why |
|
| 186 |
+
|---|---|---|
|
| 187 |
+
| Specificity | 10 | Tier prices, radius, building names, time windows |
|
| 188 |
+
| Category fit | 10 | Operator language ("delivery radius", "facilities managers") |
|
| 189 |
+
| Merchant fit | 10 | Indiranagar locality + filter-coffee dosa nods to South Indian cafe identity |
|
| 190 |
+
| Trigger relevance | 10 | Direct continuation of merchant's planning intent |
|
| 191 |
+
| Engagement compulsion | 9 | Strong but assumes building data is in MerchantContext or fabricates β judge will check |
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
## Case Study 7 β Gyms / Seasonal Dip Reframe (merchant-facing)
|
| 196 |
+
|
| 197 |
+
**Category**: `gyms`
|
| 198 |
+
**Merchant**: PowerHouse Fitness, HSR Layout Bangalore, views -30% w/w (April seasonal drop), 245 active members
|
| 199 |
+
**Trigger**: `seasonal_perf_dip` β expected April-June low; flagged as not-a-problem
|
| 200 |
+
**Customer**: none
|
| 201 |
+
|
| 202 |
+
**Composed message** (sent as Vera)
|
| 203 |
+
```
|
| 204 |
+
Karthik, your views are down 30% this week β but I want to flag this is the
|
| 205 |
+
normal April-June acquisition lull (every metro gym sees -25 to -35% in this
|
| 206 |
+
window). Action: skip ad spend now, save it for Sept-Oct when conversion is
|
| 207 |
+
2x. For now, focus retention on your 245 members. Want me to draft a "summer
|
| 208 |
+
attendance challenge" to keep them through the dip?
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
**Compulsion levers**: anxiety pre-emption (the dip is normal), data anchor (-25 to -35%), reframe as opportunity (save spend), specific member count, action proposal.
|
| 212 |
+
|
| 213 |
+
**Score breakdown** (48/50)
|
| 214 |
+
| Dimension | Score | Why |
|
| 215 |
+
|---|---|---|
|
| 216 |
+
| Specificity | 10 | Percentage range, months, member count |
|
| 217 |
+
| Category fit | 10 | Coach-to-operator tone; uses "ad spend", "conversion" correctly |
|
| 218 |
+
| Merchant fit | 10 | His exact dip number + his member count + HSR locality (peer benchmark) |
|
| 219 |
+
| Trigger relevance | 10 | Directly addresses the seasonal dip with reframe |
|
| 220 |
+
| Engagement compulsion | 8 | Could be sharper β "save it for Sept-Oct" is a bit hand-wavy without numbers |
|
| 221 |
+
|
| 222 |
+
---
|
| 223 |
+
|
| 224 |
+
## Case Study 8 β Gyms / Customer Lapse Winback (customer-facing)
|
| 225 |
+
|
| 226 |
+
**Category**: `gyms` (customer-facing voice: warm, no-shame, no guilt-trip)
|
| 227 |
+
**Merchant**: PowerHouse Fitness β first month @ βΉ499 offer active
|
| 228 |
+
**Trigger**: `customer_lapsed_hard` β Rashmi, 57 days since last visit, previous focus was weight loss, was a 5-month member
|
| 229 |
+
**Customer**: Rashmi
|
| 230 |
+
|
| 231 |
+
**Composed message** (sent as merchant_on_behalf)
|
| 232 |
+
```
|
| 233 |
+
Hi Rashmi π Karthik from PowerHouse here. It's been about 8 weeks β happens
|
| 234 |
+
to most members at some point, no judgment. We've added a Tue/Thu evening
|
| 235 |
+
HIIT class that fits weight-loss goals well (45 min, 6:30pm). Want me to
|
| 236 |
+
hold a free trial spot for you next Tue, 30 Apr? Reply YES β no commitment,
|
| 237 |
+
no auto-charge.
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
**Compulsion levers**: name + warmth, no-shame framing, addresses her past goal (weight loss), specific new offering matches goal, no-commitment trial, single binary CTA.
|
| 241 |
+
|
| 242 |
+
**Score breakdown** (50/50)
|
| 243 |
+
| Dimension | Score | Why |
|
| 244 |
+
|---|---|---|
|
| 245 |
+
| Specificity | 10 | Class name, time, day, date, duration |
|
| 246 |
+
| Category fit | 10 | Coach voice, no shame, evidence-based |
|
| 247 |
+
| Merchant fit | 10 | Karthik (owner first name), PowerHouse, real new class |
|
| 248 |
+
| Trigger relevance | 10 | Explicitly addresses lapse without being needy |
|
| 249 |
+
| Engagement compulsion | 10 | "no judgment" + "no commitment, no auto-charge" removes 2 common barriers in one line |
|
| 250 |
+
|
| 251 |
+
---
|
| 252 |
+
|
| 253 |
+
## Case Study 9 β Pharmacies / Compliance Alert (merchant-facing)
|
| 254 |
+
|
| 255 |
+
**Category**: `pharmacies`
|
| 256 |
+
**Merchant**: Apollo Health Plus Pharmacy, Malviya Nagar Jaipur β engaged, has 240 chronic-Rx customers
|
| 257 |
+
**Trigger**: `supply_alert` β voluntary recall on atorvastatin batches AT2024-1102 + AT2024-1108 by Mfr Z
|
| 258 |
+
**Customer**: none
|
| 259 |
+
|
| 260 |
+
**Composed message** (sent as Vera)
|
| 261 |
+
```
|
| 262 |
+
Ramesh, urgent: voluntary recall on 2 atorvastatin batches (AT2024-1102,
|
| 263 |
+
AT2024-1108) by Mfr Z β sub-potency, no safety risk, but customers should be
|
| 264 |
+
informed for replacement. Pulled your repeat-Rx list: 22 of your chronic-Rx
|
| 265 |
+
customers were dispensed these batches in last 90 days. Want me to draft
|
| 266 |
+
their WhatsApp note + the replacement-pickup workflow?
|
| 267 |
+
```
|
| 268 |
+
|
| 269 |
+
**Compulsion levers**: urgency + specificity (batch numbers), risk-but-bounded framing, **derived count from merchant data** (22 of 240), end-to-end workflow offer (note + pickup).
|
| 270 |
+
|
| 271 |
+
**Score breakdown** (50/50)
|
| 272 |
+
| Dimension | Score | Why |
|
| 273 |
+
|---|---|---|
|
| 274 |
+
| Specificity | 10 | Batch numbers, manufacturer, count of affected customers |
|
| 275 |
+
| Category fit | 10 | Trustworthy-precise tone; correct use of "sub-potency" without alarming |
|
| 276 |
+
| Merchant fit | 10 | Pulls from his customer aggregate; uses his name |
|
| 277 |
+
| Trigger relevance | 10 | Direct compliance action |
|
| 278 |
+
| Engagement compulsion | 10 | Urgency + clear offer of complete artifact |
|
| 279 |
+
|
| 280 |
+
---
|
| 281 |
+
|
| 282 |
+
## Case Study 10 β Pharmacies / Chronic Refill Reminder (customer-facing)
|
| 283 |
+
|
| 284 |
+
**Category**: `pharmacies` (customer-facing voice: trustworthy, precise, respectful of senior)
|
| 285 |
+
**Merchant**: Apollo Health Plus Pharmacy β free home delivery >βΉ499, senior 15% off active
|
| 286 |
+
**Trigger**: `chronic_refill_due` β Mr. Sharma's metformin/atorvastatin/telmisartan run out 2026-04-28
|
| 287 |
+
**Customer**: Mr. Sharma β 65-75 age band, senior citizen, channel via son's WhatsApp, address saved, opted in
|
| 288 |
+
|
| 289 |
+
**Composed message** (sent as merchant_on_behalf, addressed to son)
|
| 290 |
+
```
|
| 291 |
+
Namaste β Apollo Health Plus Malviya Nagar yahan. Sharma ji ki 3 monthly
|
| 292 |
+
medicines (metformin, atorvastatin, telmisartan) 28 April ko khatam hongi.
|
| 293 |
+
Same dose, same brand pack ready hai. Senior discount 15% applied β total
|
| 294 |
+
βΉ1,420 (βΉ240 saved). Free home delivery to saved address by 5pm tomorrow.
|
| 295 |
+
Reply CONFIRM to dispatch, or call 9876543210 if any change in dosage.
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
**Compulsion levers**: namaste salutation (respectful), full molecule names (precision), specific date, total + savings shown clearly, two-channel option (reply OR call), senior-citizen norms honored.
|
| 299 |
+
|
| 300 |
+
**Score breakdown** (49/50)
|
| 301 |
+
| Dimension | Score | Why |
|
| 302 |
+
|---|---|---|
|
| 303 |
+
| Specificity | 10 | Three molecule names, exact date, total + savings, time window |
|
| 304 |
+
| Category fit | 10 | Trustworthy-precise voice; namaste salutation appropriate |
|
| 305 |
+
| Merchant fit | 10 | Apollo's actual offers (free delivery, senior 15%); Malviya Nagar locality |
|
| 306 |
+
| Trigger relevance | 10 | Refill due date is the central anchor |
|
| 307 |
+
| Engagement compulsion | 9 | Strong; could nudge with "stocks may take 24h" if scarcity were real |
|
| 308 |
+
|
| 309 |
+
---
|
| 310 |
+
|
| 311 |
+
## Cross-case patterns the judge looks for
|
| 312 |
+
|
| 313 |
+
Reading the 10 cases together, here are the patterns that consistently score 9-10/10:
|
| 314 |
+
|
| 315 |
+
1. **Source citation when claiming research/compliance** β JIDA p.14, DCI circular, batch numbers. No citation = score capped at 7.
|
| 316 |
+
2. **Numbers from the contexts, not invented** β "22 of your chronic-Rx customers" is computed from the merchant's customer_aggregate; "245 active members" is from MerchantContext directly. Numbers without provenance get scored as fabrication.
|
| 317 |
+
3. **Owner/merchant first name when present** β Dr. Meera, Suresh, Karthik, Ramesh. Generic "Hi" loses 1 point on merchant fit.
|
| 318 |
+
4. **Single most important next step framed as low-friction commitment** β "Want me to draft X? Live in 10 min" / "Reply YES β no commitment, no auto-charge". Multi-action asks dilute.
|
| 319 |
+
5. **Customer-facing messages honor language preference + relationship state** β Hindi-English mix for Priya, namaste for Mr. Sharma's son. Treating every customer the same loses 2 points on customer fit.
|
| 320 |
+
6. **Domain-specific vocabulary used correctly** β "covers", "AOV", "sub-potency", "fluoride varnish", "ad spend", "conversion". Wrong vocabulary or absent vocabulary signals the bot didn't actually use the CategoryContext.voice.
|
| 321 |
+
7. **The bot adds judgment, not just templating** β Case Study 5 (IPL) shows the bot recommending *not* to push the IPL promo on a Saturday. That kind of contrarian, data-informed call is the highest signal of category understanding.
|
| 322 |
+
8. **The conversation_id is meaningful** β `conv_priya_recall_2026_11` is good (decodable, resumable). `conv_001` is acceptable. UUIDs without context lose nothing but help nothing.
|
| 323 |
+
9. **The rationale field is concise and reflects actual reasoning** β judge cross-checks rationale against the message; mismatch = penalty.
|
| 324 |
+
10. **No repetition, no fabrication** β these are the operational floor. Any of them in the message and the case is capped at 5/dimension regardless of quality.
|
| 325 |
+
|
| 326 |
+
---
|
| 327 |
+
|
| 328 |
+
## How the judge uses these cases
|
| 329 |
+
|
| 330 |
+
For each submission, the judge LLM:
|
| 331 |
+
1. Reads the candidate's composition for the same (category, merchant, trigger, customer) tuple.
|
| 332 |
+
2. Compares against the case-study output above.
|
| 333 |
+
3. Scores each of the 5 dimensions on a 0-10 scale, citing what's better/worse.
|
| 334 |
+
4. Aggregates into the per-test-pair score.
|
| 335 |
+
|
| 336 |
+
Candidates can review these cases as a north star, but **direct copying the body text of a case study counts as plagiarism** β the judge runs a similarity check on submissions vs the case studies and penalizes near-duplicates.
|
| 337 |
+
|
| 338 |
+
The cases are meant to teach the *shape* of good output: specificity, category fit, merchant fit, trigger relevance, compulsion. Your wording must be your own.
|
magicpin-ai-challenge/judge_simulator.py
ADDED
|
@@ -0,0 +1,962 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
magicpin AI Challenge β LLM-Powered Judge Simulator
|
| 4 |
+
====================================================
|
| 5 |
+
|
| 6 |
+
A strict but fair judge that scores your bot and explains WHY.
|
| 7 |
+
|
| 8 |
+
HOW TO USE:
|
| 9 |
+
1. Edit the CONFIGURATION section below (lines 25-45)
|
| 10 |
+
2. Set your LLM provider and API key
|
| 11 |
+
3. Set your bot URL
|
| 12 |
+
4. Run: python judge_simulator.py
|
| 13 |
+
|
| 14 |
+
That's it!
|
| 15 |
+
|
| 16 |
+
Author: magicpin AI Challenge Team
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# =============================================================================
|
| 20 |
+
# ββββββ CONFIGURATION - EDIT THIS SECTION ββββββ
|
| 21 |
+
# =============================================================================
|
| 22 |
+
|
| 23 |
+
# Your bot's URL (where your bot is running)
|
| 24 |
+
BOT_URL = "http://localhost:8080"
|
| 25 |
+
|
| 26 |
+
# Choose your LLM provider: "openai", "anthropic", "gemini", "deepseek", "groq", "ollama", "openrouter"
|
| 27 |
+
LLM_PROVIDER = "openai"
|
| 28 |
+
|
| 29 |
+
# Your API key (paste your key here)
|
| 30 |
+
LLM_API_KEY = "" # <-- PUT YOUR API KEY HERE
|
| 31 |
+
|
| 32 |
+
# Model to use (leave empty for default, or specify like "gpt-4o", "claude-3-5-sonnet-20241022", etc.)
|
| 33 |
+
LLM_MODEL = "" # <-- Optional: specify model or leave empty for default
|
| 34 |
+
|
| 35 |
+
# For Ollama only: local server URL
|
| 36 |
+
OLLAMA_URL = "http://localhost:11434"
|
| 37 |
+
|
| 38 |
+
# Which test to run by default
|
| 39 |
+
TEST_SCENARIO = "all"
|
| 40 |
+
|
| 41 |
+
# =============================================================================
|
| 42 |
+
# ββββββ END OF CONFIGURATION - DON'T EDIT BELOW THIS LINE ββββββ
|
| 43 |
+
# =============================================================================
|
| 44 |
+
|
| 45 |
+
import os
|
| 46 |
+
import sys
|
| 47 |
+
import json
|
| 48 |
+
import time
|
| 49 |
+
import re
|
| 50 |
+
import socket
|
| 51 |
+
from datetime import datetime
|
| 52 |
+
from dataclasses import dataclass, field
|
| 53 |
+
from typing import Optional, List, Dict, Any, Tuple
|
| 54 |
+
from pathlib import Path
|
| 55 |
+
from urllib import request as urlrequest, error as urlerror
|
| 56 |
+
from abc import ABC, abstractmethod
|
| 57 |
+
|
| 58 |
+
# Constants
|
| 59 |
+
TIMEOUT_LLM = 45
|
| 60 |
+
DATASET_DIR = Path(__file__).parent / "dataset"
|
| 61 |
+
|
| 62 |
+
# =============================================================================
|
| 63 |
+
# TERMINAL OUTPUT
|
| 64 |
+
# =============================================================================
|
| 65 |
+
|
| 66 |
+
class Colors:
|
| 67 |
+
HEADER = '\033[95m'
|
| 68 |
+
BLUE = '\033[94m'
|
| 69 |
+
CYAN = '\033[96m'
|
| 70 |
+
GREEN = '\033[92m'
|
| 71 |
+
YELLOW = '\033[93m'
|
| 72 |
+
RED = '\033[91m'
|
| 73 |
+
MAGENTA = '\033[35m'
|
| 74 |
+
BOLD = '\033[1m'
|
| 75 |
+
DIM = '\033[2m'
|
| 76 |
+
RESET = '\033[0m'
|
| 77 |
+
|
| 78 |
+
def print_header(text: str):
|
| 79 |
+
print(f"\n{Colors.HEADER}{Colors.BOLD}{'='*70}{Colors.RESET}")
|
| 80 |
+
print(f"{Colors.HEADER}{Colors.BOLD}{text.center(70)}{Colors.RESET}")
|
| 81 |
+
print(f"{Colors.HEADER}{Colors.BOLD}{'='*70}{Colors.RESET}\n")
|
| 82 |
+
|
| 83 |
+
def print_section(text: str):
|
| 84 |
+
print(f"\n{Colors.CYAN}{Colors.BOLD}--- {text} ---{Colors.RESET}\n")
|
| 85 |
+
|
| 86 |
+
def print_success(text: str):
|
| 87 |
+
print(f"{Colors.GREEN}[PASS]{Colors.RESET} {text}")
|
| 88 |
+
|
| 89 |
+
def print_fail(text: str):
|
| 90 |
+
print(f"{Colors.RED}[FAIL]{Colors.RESET} {text}")
|
| 91 |
+
|
| 92 |
+
def print_warn(text: str):
|
| 93 |
+
print(f"{Colors.YELLOW}[WARN]{Colors.RESET} {text}")
|
| 94 |
+
|
| 95 |
+
def print_info(text: str):
|
| 96 |
+
print(f"{Colors.BLUE}[INFO]{Colors.RESET} {text}")
|
| 97 |
+
|
| 98 |
+
def print_llm(text: str):
|
| 99 |
+
print(f"{Colors.MAGENTA}[LLM]{Colors.RESET} {text}")
|
| 100 |
+
|
| 101 |
+
def print_score_bar(dimension: str, score: int, max_score: int = 10):
|
| 102 |
+
bar_filled = int((score / max_score) * 20)
|
| 103 |
+
bar_empty = 20 - bar_filled
|
| 104 |
+
color = Colors.GREEN if score >= 7 else Colors.YELLOW if score >= 4 else Colors.RED
|
| 105 |
+
print(f" {dimension:22} [{color}{'β' * bar_filled}{Colors.DIM}{'β' * bar_empty}{Colors.RESET}] {color}{score:2}/{max_score}{Colors.RESET}")
|
| 106 |
+
|
| 107 |
+
def print_reason(text: str):
|
| 108 |
+
wrapped = text[:200] + "..." if len(text) > 200 else text
|
| 109 |
+
print(f" {Colors.DIM}{wrapped}{Colors.RESET}")
|
| 110 |
+
|
| 111 |
+
def print_hint(hint: str):
|
| 112 |
+
print(f"\n {Colors.YELLOW}Hint:{Colors.RESET} {hint}")
|
| 113 |
+
|
| 114 |
+
# =============================================================================
|
| 115 |
+
# DATA CLASSES
|
| 116 |
+
# =============================================================================
|
| 117 |
+
|
| 118 |
+
@dataclass
|
| 119 |
+
class ScoreResult:
|
| 120 |
+
specificity: int = 0
|
| 121 |
+
specificity_reason: str = ""
|
| 122 |
+
category_fit: int = 0
|
| 123 |
+
category_fit_reason: str = ""
|
| 124 |
+
merchant_fit: int = 0
|
| 125 |
+
merchant_fit_reason: str = ""
|
| 126 |
+
decision_quality: int = 0
|
| 127 |
+
decision_quality_reason: str = ""
|
| 128 |
+
engagement_compulsion: int = 0
|
| 129 |
+
engagement_reason: str = ""
|
| 130 |
+
penalties: int = 0
|
| 131 |
+
penalty_reasons: List[str] = field(default_factory=list)
|
| 132 |
+
hint: str = ""
|
| 133 |
+
|
| 134 |
+
@property
|
| 135 |
+
def total(self) -> int:
|
| 136 |
+
return max(0, self.specificity + self.category_fit + self.merchant_fit +
|
| 137 |
+
self.decision_quality + self.engagement_compulsion - self.penalties)
|
| 138 |
+
|
| 139 |
+
# =============================================================================
|
| 140 |
+
# LLM PROVIDERS
|
| 141 |
+
# =============================================================================
|
| 142 |
+
|
| 143 |
+
class LLMProvider(ABC):
|
| 144 |
+
@abstractmethod
|
| 145 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 146 |
+
pass
|
| 147 |
+
|
| 148 |
+
@abstractmethod
|
| 149 |
+
def name(self) -> str:
|
| 150 |
+
pass
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
class OpenAIProvider(LLMProvider):
|
| 154 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 155 |
+
self.api_key = api_key
|
| 156 |
+
self.model = model or "gpt-4o-mini"
|
| 157 |
+
|
| 158 |
+
def name(self) -> str:
|
| 159 |
+
return f"OpenAI ({self.model})"
|
| 160 |
+
|
| 161 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 162 |
+
messages = []
|
| 163 |
+
if system:
|
| 164 |
+
messages.append({"role": "system", "content": system})
|
| 165 |
+
messages.append({"role": "user", "content": prompt})
|
| 166 |
+
|
| 167 |
+
body = json.dumps({
|
| 168 |
+
"model": self.model,
|
| 169 |
+
"messages": messages,
|
| 170 |
+
"temperature": 0.2,
|
| 171 |
+
"max_tokens": 1500
|
| 172 |
+
}).encode("utf-8")
|
| 173 |
+
|
| 174 |
+
req = urlrequest.Request(
|
| 175 |
+
"https://api.openai.com/v1/chat/completions",
|
| 176 |
+
data=body,
|
| 177 |
+
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
| 178 |
+
)
|
| 179 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 180 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 181 |
+
return data["choices"][0]["message"]["content"]
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
class AnthropicProvider(LLMProvider):
|
| 185 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 186 |
+
self.api_key = api_key
|
| 187 |
+
self.model = model or "claude-3-5-sonnet-20241022"
|
| 188 |
+
|
| 189 |
+
def name(self) -> str:
|
| 190 |
+
return f"Anthropic ({self.model})"
|
| 191 |
+
|
| 192 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 193 |
+
body_dict = {"model": self.model, "max_tokens": 1500,
|
| 194 |
+
"messages": [{"role": "user", "content": prompt}]}
|
| 195 |
+
if system:
|
| 196 |
+
body_dict["system"] = system
|
| 197 |
+
|
| 198 |
+
req = urlrequest.Request(
|
| 199 |
+
"https://api.anthropic.com/v1/messages",
|
| 200 |
+
data=json.dumps(body_dict).encode("utf-8"),
|
| 201 |
+
headers={"x-api-key": self.api_key, "Content-Type": "application/json",
|
| 202 |
+
"anthropic-version": "2023-06-01"}
|
| 203 |
+
)
|
| 204 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 205 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 206 |
+
return data["content"][0]["text"]
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
class GeminiProvider(LLMProvider):
|
| 210 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 211 |
+
self.api_key = api_key
|
| 212 |
+
self.model = model or "gemini-1.5-flash"
|
| 213 |
+
|
| 214 |
+
def name(self) -> str:
|
| 215 |
+
return f"Gemini ({self.model})"
|
| 216 |
+
|
| 217 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 218 |
+
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
| 219 |
+
body = json.dumps({
|
| 220 |
+
"contents": [{"parts": [{"text": full_prompt}]}],
|
| 221 |
+
"generationConfig": {"temperature": 0.2, "maxOutputTokens": 1500}
|
| 222 |
+
}).encode("utf-8")
|
| 223 |
+
|
| 224 |
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model}:generateContent?key={self.api_key}"
|
| 225 |
+
req = urlrequest.Request(url, data=body, headers={"Content-Type": "application/json"})
|
| 226 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 227 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 228 |
+
return data["candidates"][0]["content"]["parts"][0]["text"]
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
class DeepSeekProvider(LLMProvider):
|
| 232 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 233 |
+
self.api_key = api_key
|
| 234 |
+
self.model = model or "deepseek-chat"
|
| 235 |
+
|
| 236 |
+
def name(self) -> str:
|
| 237 |
+
return f"DeepSeek ({self.model})"
|
| 238 |
+
|
| 239 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 240 |
+
messages = []
|
| 241 |
+
if system:
|
| 242 |
+
messages.append({"role": "system", "content": system})
|
| 243 |
+
messages.append({"role": "user", "content": prompt})
|
| 244 |
+
|
| 245 |
+
req = urlrequest.Request(
|
| 246 |
+
"https://api.deepseek.com/v1/chat/completions",
|
| 247 |
+
data=json.dumps({"model": self.model, "messages": messages,
|
| 248 |
+
"temperature": 0.2, "max_tokens": 1500}).encode("utf-8"),
|
| 249 |
+
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
| 250 |
+
)
|
| 251 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 252 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 253 |
+
return data["choices"][0]["message"]["content"]
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
class GroqProvider(LLMProvider):
|
| 257 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 258 |
+
self.api_key = api_key
|
| 259 |
+
self.model = model or "llama-3.1-70b-versatile"
|
| 260 |
+
|
| 261 |
+
def name(self) -> str:
|
| 262 |
+
return f"Groq ({self.model})"
|
| 263 |
+
|
| 264 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 265 |
+
messages = []
|
| 266 |
+
if system:
|
| 267 |
+
messages.append({"role": "system", "content": system})
|
| 268 |
+
messages.append({"role": "user", "content": prompt})
|
| 269 |
+
|
| 270 |
+
req = urlrequest.Request(
|
| 271 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
| 272 |
+
data=json.dumps({"model": self.model, "messages": messages,
|
| 273 |
+
"temperature": 0.2, "max_tokens": 1500}).encode("utf-8"),
|
| 274 |
+
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
| 275 |
+
)
|
| 276 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 277 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 278 |
+
return data["choices"][0]["message"]["content"]
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
class OllamaProvider(LLMProvider):
|
| 282 |
+
def __init__(self, model: str = "", api_url: str = ""):
|
| 283 |
+
self.model = model or "llama3"
|
| 284 |
+
self.api_url = api_url or "http://localhost:11434"
|
| 285 |
+
|
| 286 |
+
def name(self) -> str:
|
| 287 |
+
return f"Ollama ({self.model})"
|
| 288 |
+
|
| 289 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 290 |
+
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
| 291 |
+
req = urlrequest.Request(
|
| 292 |
+
f"{self.api_url}/api/generate",
|
| 293 |
+
data=json.dumps({"model": self.model, "prompt": full_prompt,
|
| 294 |
+
"stream": False, "options": {"temperature": 0.2}}).encode("utf-8"),
|
| 295 |
+
headers={"Content-Type": "application/json"}
|
| 296 |
+
)
|
| 297 |
+
resp = urlrequest.urlopen(req, timeout=90)
|
| 298 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 299 |
+
return data["response"]
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
class OpenRouterProvider(LLMProvider):
|
| 303 |
+
def __init__(self, api_key: str, model: str = ""):
|
| 304 |
+
self.api_key = api_key
|
| 305 |
+
self.model = model or "anthropic/claude-3-haiku"
|
| 306 |
+
|
| 307 |
+
def name(self) -> str:
|
| 308 |
+
return f"OpenRouter ({self.model})"
|
| 309 |
+
|
| 310 |
+
def complete(self, prompt: str, system: str = None) -> str:
|
| 311 |
+
messages = []
|
| 312 |
+
if system:
|
| 313 |
+
messages.append({"role": "system", "content": system})
|
| 314 |
+
messages.append({"role": "user", "content": prompt})
|
| 315 |
+
|
| 316 |
+
req = urlrequest.Request(
|
| 317 |
+
"https://openrouter.ai/api/v1/chat/completions",
|
| 318 |
+
data=json.dumps({"model": self.model, "messages": messages,
|
| 319 |
+
"temperature": 0.2, "max_tokens": 1500}).encode("utf-8"),
|
| 320 |
+
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json",
|
| 321 |
+
"HTTP-Referer": "https://magicpin.com"}
|
| 322 |
+
)
|
| 323 |
+
resp = urlrequest.urlopen(req, timeout=TIMEOUT_LLM)
|
| 324 |
+
data = json.loads(resp.read().decode("utf-8"))
|
| 325 |
+
return data["choices"][0]["message"]["content"]
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def create_provider() -> LLMProvider:
|
| 329 |
+
"""Create LLM provider from configuration."""
|
| 330 |
+
providers = {
|
| 331 |
+
"openai": lambda: OpenAIProvider(LLM_API_KEY, LLM_MODEL),
|
| 332 |
+
"anthropic": lambda: AnthropicProvider(LLM_API_KEY, LLM_MODEL),
|
| 333 |
+
"gemini": lambda: GeminiProvider(LLM_API_KEY, LLM_MODEL),
|
| 334 |
+
"deepseek": lambda: DeepSeekProvider(LLM_API_KEY, LLM_MODEL),
|
| 335 |
+
"groq": lambda: GroqProvider(LLM_API_KEY, LLM_MODEL),
|
| 336 |
+
"ollama": lambda: OllamaProvider(LLM_MODEL, OLLAMA_URL),
|
| 337 |
+
"openrouter": lambda: OpenRouterProvider(LLM_API_KEY, LLM_MODEL),
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
if LLM_PROVIDER not in providers:
|
| 341 |
+
print_fail(f"Unknown provider: {LLM_PROVIDER}")
|
| 342 |
+
print_info(f"Available: {', '.join(providers.keys())}")
|
| 343 |
+
sys.exit(1)
|
| 344 |
+
|
| 345 |
+
return providers[LLM_PROVIDER]()
|
| 346 |
+
|
| 347 |
+
# =============================================================================
|
| 348 |
+
# DATASET & BOT CLIENT
|
| 349 |
+
# =============================================================================
|
| 350 |
+
|
| 351 |
+
class DatasetLoader:
|
| 352 |
+
def __init__(self, dataset_dir: Path):
|
| 353 |
+
self.dataset_dir = dataset_dir
|
| 354 |
+
self.categories = {}
|
| 355 |
+
self.merchants = {}
|
| 356 |
+
self.customers = {}
|
| 357 |
+
self.triggers = {}
|
| 358 |
+
|
| 359 |
+
def load(self) -> bool:
|
| 360 |
+
try:
|
| 361 |
+
cat_dir = self.dataset_dir / "categories"
|
| 362 |
+
if cat_dir.exists():
|
| 363 |
+
for f in cat_dir.glob("*.json"):
|
| 364 |
+
data = json.load(open(f))
|
| 365 |
+
self.categories[data.get("slug", f.stem)] = data
|
| 366 |
+
|
| 367 |
+
for name, container, key in [
|
| 368 |
+
("merchants_seed.json", "merchants", "merchant_id"),
|
| 369 |
+
("customers_seed.json", "customers", "customer_id"),
|
| 370 |
+
("triggers_seed.json", "triggers", "id")
|
| 371 |
+
]:
|
| 372 |
+
path = self.dataset_dir / name
|
| 373 |
+
if path.exists():
|
| 374 |
+
data = json.load(open(path))
|
| 375 |
+
items = data.get(container, data.get(container.rstrip("s"), []))
|
| 376 |
+
storage = getattr(self, container)
|
| 377 |
+
for item in items:
|
| 378 |
+
if key in item:
|
| 379 |
+
storage[item[key]] = item
|
| 380 |
+
return True
|
| 381 |
+
except Exception as e:
|
| 382 |
+
print_fail(f"Dataset load error: {e}")
|
| 383 |
+
return False
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
class BotClient:
|
| 387 |
+
def __init__(self, base_url: str):
|
| 388 |
+
self.base_url = base_url.rstrip("/")
|
| 389 |
+
|
| 390 |
+
def _request(self, method: str, path: str, timeout: int = 30,
|
| 391 |
+
body_dict: Dict = None) -> Tuple[Optional[Dict], Optional[str], float]:
|
| 392 |
+
url = f"{self.base_url}{path}"
|
| 393 |
+
start = time.time()
|
| 394 |
+
body = json.dumps(body_dict).encode("utf-8") if body_dict else None
|
| 395 |
+
headers = {"Content-Type": "application/json"}
|
| 396 |
+
req = urlrequest.Request(url, data=body, method=method, headers=headers)
|
| 397 |
+
|
| 398 |
+
try:
|
| 399 |
+
resp = urlrequest.urlopen(req, timeout=timeout)
|
| 400 |
+
return json.loads(resp.read().decode("utf-8")), None, (time.time() - start) * 1000
|
| 401 |
+
except urlerror.HTTPError as e:
|
| 402 |
+
latency = (time.time() - start) * 1000
|
| 403 |
+
if e.code == 401:
|
| 404 |
+
return None, "Unauthorized", latency
|
| 405 |
+
try:
|
| 406 |
+
return json.loads(e.read().decode("utf-8")), None, latency
|
| 407 |
+
except:
|
| 408 |
+
return None, f"HTTP {e.code}", latency
|
| 409 |
+
except Exception as e:
|
| 410 |
+
return None, str(e), (time.time() - start) * 1000
|
| 411 |
+
|
| 412 |
+
def healthz(self):
|
| 413 |
+
return self._request("GET", "/v1/healthz", 5)
|
| 414 |
+
|
| 415 |
+
def metadata(self):
|
| 416 |
+
return self._request("GET", "/v1/metadata", 5)
|
| 417 |
+
|
| 418 |
+
def push_context(self, scope, cid, version, payload):
|
| 419 |
+
return self._request("POST", "/v1/context", 10, {
|
| 420 |
+
"scope": scope, "context_id": cid, "version": version,
|
| 421 |
+
"payload": payload, "delivered_at": datetime.utcnow().isoformat() + "Z"
|
| 422 |
+
})
|
| 423 |
+
|
| 424 |
+
def tick(self, triggers):
|
| 425 |
+
return self._request("POST", "/v1/tick", 15, {
|
| 426 |
+
"now": datetime.utcnow().isoformat() + "Z", "available_triggers": triggers
|
| 427 |
+
})
|
| 428 |
+
|
| 429 |
+
def reply(self, conv_id, merchant_id, message, turn):
|
| 430 |
+
return self._request("POST", "/v1/reply", 15, {
|
| 431 |
+
"conversation_id": conv_id, "merchant_id": merchant_id, "customer_id": None,
|
| 432 |
+
"from_role": "merchant", "message": message,
|
| 433 |
+
"received_at": datetime.utcnow().isoformat() + "Z", "turn_number": turn
|
| 434 |
+
})
|
| 435 |
+
|
| 436 |
+
# =============================================================================
|
| 437 |
+
# LLM SCORING ENGINE
|
| 438 |
+
# =============================================================================
|
| 439 |
+
|
| 440 |
+
class LLMScorer:
|
| 441 |
+
"""Scores messages using LLM and provides detailed reasoning."""
|
| 442 |
+
|
| 443 |
+
SYSTEM = """You are a STRICT judge for the magicpin AI Challenge. You score merchant engagement messages.
|
| 444 |
+
|
| 445 |
+
SCORING DIMENSIONS (0-10 each, be strict - 5 is average, 7+ is good, 9+ is excellent):
|
| 446 |
+
|
| 447 |
+
1. SPECIFICITY: Does the message have VERIFIABLE facts?
|
| 448 |
+
- Numbers (percentages, counts, prices)
|
| 449 |
+
- Dates/times
|
| 450 |
+
- Source citations
|
| 451 |
+
- Concrete claims vs vague statements
|
| 452 |
+
|
| 453 |
+
2. CATEGORY FIT: Does the voice match the business type?
|
| 454 |
+
- Dentists: clinical, peer-to-peer, technical OK, use "Dr." prefix
|
| 455 |
+
- Salons: warm, friendly, practical
|
| 456 |
+
- Restaurants: operator-to-operator
|
| 457 |
+
- Gyms: coaching, motivational
|
| 458 |
+
- Pharmacies: trustworthy, precise
|
| 459 |
+
|
| 460 |
+
3. MERCHANT FIT: Is it personalized to THIS merchant?
|
| 461 |
+
- Uses their name/owner name correctly
|
| 462 |
+
- References their actual data (not fabricated)
|
| 463 |
+
- Honors language preference
|
| 464 |
+
|
| 465 |
+
4. TRIGGER RELEVANCE: Does it connect to WHY NOW?
|
| 466 |
+
- Clear reason for this specific message
|
| 467 |
+
- Uses data from the trigger payload
|
| 468 |
+
- Not a generic nudge
|
| 469 |
+
|
| 470 |
+
5. ENGAGEMENT COMPULSION: Would they reply?
|
| 471 |
+
- Loss aversion, curiosity, social proof
|
| 472 |
+
- Clear CTA
|
| 473 |
+
- Low friction ask
|
| 474 |
+
|
| 475 |
+
PENALTIES:
|
| 476 |
+
- Fabricating data not in context: -2
|
| 477 |
+
- Exposing internal jargon to merchant: -1
|
| 478 |
+
|
| 479 |
+
RESPOND ONLY WITH THIS EXACT JSON FORMAT:
|
| 480 |
+
{
|
| 481 |
+
"specificity": <0-10>,
|
| 482 |
+
"specificity_reason": "<why this score, 1-2 sentences>",
|
| 483 |
+
"category_fit": <0-10>,
|
| 484 |
+
"category_fit_reason": "<why this score>",
|
| 485 |
+
"merchant_fit": <0-10>,
|
| 486 |
+
"merchant_fit_reason": "<why this score>",
|
| 487 |
+
"decision_quality": <0-10>,
|
| 488 |
+
"decision_quality_reason": "<why this score>",
|
| 489 |
+
"engagement_compulsion": <0-10>,
|
| 490 |
+
"engagement_reason": "<why this score>",
|
| 491 |
+
"hint": "<one sentence guidance for improvement, cryptic not direct>"
|
| 492 |
+
}"""
|
| 493 |
+
|
| 494 |
+
def __init__(self, llm: LLMProvider, dataset: DatasetLoader):
|
| 495 |
+
self.llm = llm
|
| 496 |
+
self.dataset = dataset
|
| 497 |
+
|
| 498 |
+
def score(self, action: Dict, category: Dict, merchant: Dict,
|
| 499 |
+
trigger: Dict, customer: Dict = None) -> ScoreResult:
|
| 500 |
+
"""Score a message and return detailed results."""
|
| 501 |
+
|
| 502 |
+
body = action.get("body", "")
|
| 503 |
+
|
| 504 |
+
prompt = f"""SCORE THIS MESSAGE:
|
| 505 |
+
|
| 506 |
+
=== CONTEXT PROVIDED TO BOT ===
|
| 507 |
+
Category: {category.get('slug', 'unknown')}
|
| 508 |
+
Voice: {category.get('voice', {}).get('tone', 'unknown')}
|
| 509 |
+
Taboos: {category.get('voice', {}).get('vocab_taboo', [])[:5]}
|
| 510 |
+
|
| 511 |
+
Merchant: {merchant.get('identity', {}).get('name', 'unknown')}
|
| 512 |
+
Owner: {merchant.get('identity', {}).get('owner_first_name', 'unknown')}
|
| 513 |
+
Locality: {merchant.get('identity', {}).get('locality', 'unknown')}
|
| 514 |
+
Languages: {merchant.get('identity', {}).get('languages', [])}
|
| 515 |
+
Performance: views={merchant.get('performance', {}).get('views', '?')}, calls={merchant.get('performance', {}).get('calls', '?')}, ctr={merchant.get('performance', {}).get('ctr', '?')}
|
| 516 |
+
Signals: {merchant.get('signals', [])}
|
| 517 |
+
Active Offers: {[o.get('title') for o in merchant.get('offers', []) if o.get('status') == 'active']}
|
| 518 |
+
|
| 519 |
+
Trigger Kind: {trigger.get('kind', 'unknown')}
|
| 520 |
+
Trigger Payload: {json.dumps(trigger.get('payload', {}))}
|
| 521 |
+
Trigger Urgency: {trigger.get('urgency', '?')}
|
| 522 |
+
|
| 523 |
+
Customer: {json.dumps(customer.get('identity', {})) if customer else 'None (merchant-facing)'}
|
| 524 |
+
|
| 525 |
+
=== BOT'S MESSAGE ===
|
| 526 |
+
Body ({len(body)} chars): "{body}"
|
| 527 |
+
CTA: {action.get('cta', 'none')}
|
| 528 |
+
Send As: {action.get('send_as', 'vera')}
|
| 529 |
+
|
| 530 |
+
Score each dimension 0-10 with clear reasoning. Be STRICT."""
|
| 531 |
+
|
| 532 |
+
try:
|
| 533 |
+
print_llm("Analyzing message...")
|
| 534 |
+
response = self.llm.complete(prompt, self.SYSTEM)
|
| 535 |
+
return self._parse_response(response, action)
|
| 536 |
+
except Exception as e:
|
| 537 |
+
print_warn(f"LLM error: {e}")
|
| 538 |
+
return self._fallback_score(action)
|
| 539 |
+
|
| 540 |
+
def _parse_response(self, response: str, action: Dict) -> ScoreResult:
|
| 541 |
+
"""Parse LLM JSON response."""
|
| 542 |
+
match = re.search(r'\{[\s\S]*\}', response)
|
| 543 |
+
if not match:
|
| 544 |
+
return self._fallback_score(action)
|
| 545 |
+
|
| 546 |
+
try:
|
| 547 |
+
data = json.loads(match.group())
|
| 548 |
+
result = ScoreResult(
|
| 549 |
+
specificity=min(10, max(0, int(data.get("specificity", 5)))),
|
| 550 |
+
specificity_reason=data.get("specificity_reason", ""),
|
| 551 |
+
category_fit=min(10, max(0, int(data.get("category_fit", 5)))),
|
| 552 |
+
category_fit_reason=data.get("category_fit_reason", ""),
|
| 553 |
+
merchant_fit=min(10, max(0, int(data.get("merchant_fit", 5)))),
|
| 554 |
+
merchant_fit_reason=data.get("merchant_fit_reason", ""),
|
| 555 |
+
decision_quality=min(10, max(0, int(data.get("decision_quality", data.get("trigger_relevance", 5))))),
|
| 556 |
+
decision_quality_reason=data.get("decision_quality_reason", data.get("trigger_relevance_reason", "")),
|
| 557 |
+
engagement_compulsion=min(10, max(0, int(data.get("engagement_compulsion", 5)))),
|
| 558 |
+
engagement_reason=data.get("engagement_reason", ""),
|
| 559 |
+
hint=data.get("hint", "")
|
| 560 |
+
)
|
| 561 |
+
return result
|
| 562 |
+
except Exception as e:
|
| 563 |
+
print_warn(f"Parse error: {e}")
|
| 564 |
+
return self._fallback_score(action)
|
| 565 |
+
|
| 566 |
+
def _fallback_score(self, action: Dict) -> ScoreResult:
|
| 567 |
+
"""Basic fallback scoring."""
|
| 568 |
+
body = action.get("body", "").lower()
|
| 569 |
+
nums = len(re.findall(r'\d+', body))
|
| 570 |
+
return ScoreResult(
|
| 571 |
+
specificity=min(10, 3 + nums * 2),
|
| 572 |
+
specificity_reason="Fallback: counted numbers in message",
|
| 573 |
+
category_fit=5, category_fit_reason="Could not evaluate",
|
| 574 |
+
merchant_fit=5, merchant_fit_reason="Could not evaluate",
|
| 575 |
+
decision_quality=5, decision_quality_reason="Could not evaluate",
|
| 576 |
+
engagement_compulsion=5, engagement_reason="Could not evaluate",
|
| 577 |
+
hint="LLM scoring failed - using basic heuristics"
|
| 578 |
+
)
|
| 579 |
+
|
| 580 |
+
# =============================================================================
|
| 581 |
+
# MAIN JUDGE
|
| 582 |
+
# =============================================================================
|
| 583 |
+
|
| 584 |
+
class JudgeSimulator:
|
| 585 |
+
def __init__(self, llm: LLMProvider):
|
| 586 |
+
self.llm = llm
|
| 587 |
+
self.client = BotClient(BOT_URL)
|
| 588 |
+
self.dataset = DatasetLoader(DATASET_DIR)
|
| 589 |
+
self.scorer: Optional[LLMScorer] = None
|
| 590 |
+
self.all_scores: List[ScoreResult] = []
|
| 591 |
+
|
| 592 |
+
def run(self, scenario: str) -> bool:
|
| 593 |
+
print_header(f"LLM JUDGE β {scenario.upper()}")
|
| 594 |
+
print_info(f"Bot: {BOT_URL}")
|
| 595 |
+
print_info(f"LLM: {self.llm.name()}")
|
| 596 |
+
|
| 597 |
+
if not self.dataset.load():
|
| 598 |
+
print_fail("Dataset load failed")
|
| 599 |
+
return False
|
| 600 |
+
|
| 601 |
+
self.scorer = LLMScorer(self.llm, self.dataset)
|
| 602 |
+
print_info(f"Loaded: {len(self.dataset.categories)} categories, "
|
| 603 |
+
f"{len(self.dataset.merchants)} merchants, "
|
| 604 |
+
f"{len(self.dataset.triggers)} triggers")
|
| 605 |
+
|
| 606 |
+
scenarios = {
|
| 607 |
+
"warmup": self._warmup,
|
| 608 |
+
"phase2_short": self._phase2_short,
|
| 609 |
+
"auto_reply_hell": self._auto_reply,
|
| 610 |
+
"intent_transition": self._intent,
|
| 611 |
+
"hostile": self._hostile,
|
| 612 |
+
"all": self._all,
|
| 613 |
+
"full_evaluation": self._full,
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
if scenario not in scenarios:
|
| 617 |
+
print_fail(f"Unknown scenario: {scenario}")
|
| 618 |
+
print_info(f"Available: {', '.join(scenarios.keys())}")
|
| 619 |
+
return False
|
| 620 |
+
|
| 621 |
+
success = scenarios[scenario]()
|
| 622 |
+
self._final_summary()
|
| 623 |
+
return success
|
| 624 |
+
|
| 625 |
+
def _warmup(self) -> bool:
|
| 626 |
+
print_section("WARMUP")
|
| 627 |
+
|
| 628 |
+
data, err, lat = self.client.healthz()
|
| 629 |
+
if err:
|
| 630 |
+
print_fail(f"healthz: {err}")
|
| 631 |
+
return False
|
| 632 |
+
print_success(f"healthz ({lat:.0f}ms)")
|
| 633 |
+
|
| 634 |
+
data, err, lat = self.client.metadata()
|
| 635 |
+
if err:
|
| 636 |
+
print_warn(f"metadata: {err}")
|
| 637 |
+
else:
|
| 638 |
+
print_success(f"metadata β Team: {data.get('team_name', '?')}, Model: {data.get('model', '?')}")
|
| 639 |
+
|
| 640 |
+
print_section("CONTEXT PUSH")
|
| 641 |
+
for slug, cat in self.dataset.categories.items():
|
| 642 |
+
data, err, _ = self.client.push_context("category", slug, 1, cat)
|
| 643 |
+
status = "PASS" if data and data.get("accepted") else "FAIL"
|
| 644 |
+
print(f" [{status}] category/{slug}")
|
| 645 |
+
|
| 646 |
+
for mid, m in list(self.dataset.merchants.items())[:5]:
|
| 647 |
+
data, err, _ = self.client.push_context("merchant", mid, 1, m)
|
| 648 |
+
status = "PASS" if data and data.get("accepted") else "FAIL"
|
| 649 |
+
short_id = mid.split('_')[1] if '_' in mid else mid[:10]
|
| 650 |
+
print(f" [{status}] merchant/{short_id}")
|
| 651 |
+
|
| 652 |
+
return True
|
| 653 |
+
|
| 654 |
+
def _phase2_short(self) -> bool:
|
| 655 |
+
if not self._warmup():
|
| 656 |
+
return False
|
| 657 |
+
|
| 658 |
+
print_section("TICK TEST")
|
| 659 |
+
|
| 660 |
+
trigs = list(self.dataset.triggers.keys())[:3]
|
| 661 |
+
for tid in trigs:
|
| 662 |
+
self.client.push_context("trigger", tid, 1, self.dataset.triggers[tid])
|
| 663 |
+
|
| 664 |
+
data, err, lat = self.client.tick(trigs)
|
| 665 |
+
if err:
|
| 666 |
+
print_fail(f"tick: {err}")
|
| 667 |
+
return False
|
| 668 |
+
|
| 669 |
+
actions = data.get("actions", [])
|
| 670 |
+
print_info(f"Bot returned {len(actions)} action(s) ({lat:.0f}ms)")
|
| 671 |
+
|
| 672 |
+
if not actions:
|
| 673 |
+
print_warn("No actions β bot chose not to send")
|
| 674 |
+
return True
|
| 675 |
+
|
| 676 |
+
for action in actions:
|
| 677 |
+
self._score_and_display(action)
|
| 678 |
+
|
| 679 |
+
return True
|
| 680 |
+
|
| 681 |
+
def _auto_reply(self) -> bool:
|
| 682 |
+
print_section("AUTO-REPLY DETECTION")
|
| 683 |
+
|
| 684 |
+
data, err, _ = self.client.healthz()
|
| 685 |
+
if err:
|
| 686 |
+
print_fail(f"Bot unreachable: {err}")
|
| 687 |
+
return False
|
| 688 |
+
|
| 689 |
+
mid = list(self.dataset.merchants.keys())[0] if self.dataset.merchants else "m_test"
|
| 690 |
+
auto_msg = "Thank you for contacting us! Our team will respond shortly."
|
| 691 |
+
|
| 692 |
+
for i in range(1, 5):
|
| 693 |
+
print_info(f"Turn {i}: Sending auto-reply...")
|
| 694 |
+
data, err, _ = self.client.reply(f"conv_auto_{i}", mid, auto_msg, i + 1)
|
| 695 |
+
|
| 696 |
+
if err:
|
| 697 |
+
print_fail(f"Error: {err}")
|
| 698 |
+
return False
|
| 699 |
+
|
| 700 |
+
action = data.get("action", "?")
|
| 701 |
+
|
| 702 |
+
if action == "end":
|
| 703 |
+
print_success(f"Turn {i}: Bot ENDED β detected auto-reply pattern!")
|
| 704 |
+
return True
|
| 705 |
+
elif action == "wait":
|
| 706 |
+
wait_s = data.get("wait_seconds", "?")
|
| 707 |
+
print_success(f"Turn {i}: Bot WAITING {wait_s}s")
|
| 708 |
+
else:
|
| 709 |
+
body = data.get("body", "")[:50]
|
| 710 |
+
print_warn(f"Turn {i}: Bot sent: \"{body}...\"")
|
| 711 |
+
|
| 712 |
+
print_warn("Bot never ended after 4 auto-replies")
|
| 713 |
+
return True
|
| 714 |
+
|
| 715 |
+
def _intent(self) -> bool:
|
| 716 |
+
print_section("INTENT TRANSITION")
|
| 717 |
+
|
| 718 |
+
data, err, _ = self.client.healthz()
|
| 719 |
+
if err:
|
| 720 |
+
print_fail(f"Bot unreachable: {err}")
|
| 721 |
+
return False
|
| 722 |
+
|
| 723 |
+
mid = list(self.dataset.merchants.keys())[0] if self.dataset.merchants else "m_test"
|
| 724 |
+
commitment = "Ok lets do it. Whats next?"
|
| 725 |
+
|
| 726 |
+
print_info(f"Merchant: \"{commitment}\"")
|
| 727 |
+
data, err, _ = self.client.reply("conv_intent_1", mid, commitment, 2)
|
| 728 |
+
|
| 729 |
+
if err:
|
| 730 |
+
print_fail(f"Error: {err}")
|
| 731 |
+
return False
|
| 732 |
+
|
| 733 |
+
action = data.get("action", "?")
|
| 734 |
+
body = data.get("body", "")
|
| 735 |
+
|
| 736 |
+
print_info(f"Bot action: {action}")
|
| 737 |
+
if body:
|
| 738 |
+
print_info(f"Bot body: \"{body[:100]}{'...' if len(body) > 100 else ''}\"")
|
| 739 |
+
|
| 740 |
+
qualifying = ["would you", "do you", "can you tell", "what if", "how about"]
|
| 741 |
+
actioning = ["done", "sending", "draft", "here", "confirm", "proceed", "next"]
|
| 742 |
+
|
| 743 |
+
body_lower = body.lower()
|
| 744 |
+
if any(w in body_lower for w in actioning) and not any(w in body_lower for w in qualifying):
|
| 745 |
+
print_success("Bot correctly switched to ACTION mode")
|
| 746 |
+
elif any(w in body_lower for w in qualifying):
|
| 747 |
+
print_fail("Bot is STILL QUALIFYING after commitment")
|
| 748 |
+
else:
|
| 749 |
+
print_warn("Response unclear")
|
| 750 |
+
|
| 751 |
+
return True
|
| 752 |
+
|
| 753 |
+
def _hostile(self) -> bool:
|
| 754 |
+
print_section("HOSTILE HANDLING")
|
| 755 |
+
|
| 756 |
+
data, err, _ = self.client.healthz()
|
| 757 |
+
if err:
|
| 758 |
+
print_fail(f"Bot unreachable: {err}")
|
| 759 |
+
return False
|
| 760 |
+
|
| 761 |
+
mid = list(self.dataset.merchants.keys())[0] if self.dataset.merchants else "m_test"
|
| 762 |
+
hostile = "Stop messaging me. This is useless spam."
|
| 763 |
+
|
| 764 |
+
print_info(f"Merchant (hostile): \"{hostile}\"")
|
| 765 |
+
data, err, _ = self.client.reply("conv_hostile", mid, hostile, 2)
|
| 766 |
+
|
| 767 |
+
if err:
|
| 768 |
+
print_fail(f"Error: {err}")
|
| 769 |
+
return False
|
| 770 |
+
|
| 771 |
+
action = data.get("action", "?")
|
| 772 |
+
body = data.get("body", "")
|
| 773 |
+
|
| 774 |
+
print_info(f"Bot action: {action}")
|
| 775 |
+
|
| 776 |
+
if action == "end":
|
| 777 |
+
print_success("Bot correctly ENDED on hostile message")
|
| 778 |
+
elif action == "send" and any(w in body.lower() for w in ["sorry", "apolog", "won't"]):
|
| 779 |
+
print_success("Bot apologized gracefully")
|
| 780 |
+
else:
|
| 781 |
+
print_fail("Bot didn't handle hostility well")
|
| 782 |
+
|
| 783 |
+
return True
|
| 784 |
+
|
| 785 |
+
def _all(self) -> bool:
|
| 786 |
+
results = []
|
| 787 |
+
for name, fn in [("warmup", self._warmup), ("auto_reply", self._auto_reply),
|
| 788 |
+
("intent", self._intent), ("hostile", self._hostile)]:
|
| 789 |
+
try:
|
| 790 |
+
results.append((name, fn()))
|
| 791 |
+
except Exception as e:
|
| 792 |
+
print_fail(f"{name} crashed: {e}")
|
| 793 |
+
results.append((name, False))
|
| 794 |
+
|
| 795 |
+
print_section("SCENARIO RESULTS")
|
| 796 |
+
for name, passed in results:
|
| 797 |
+
(print_success if passed else print_fail)(name)
|
| 798 |
+
|
| 799 |
+
return all(p for _, p in results)
|
| 800 |
+
|
| 801 |
+
def _full(self) -> bool:
|
| 802 |
+
if not self._warmup():
|
| 803 |
+
return False
|
| 804 |
+
|
| 805 |
+
print_section("FULL EVALUATION")
|
| 806 |
+
|
| 807 |
+
for mid, m in self.dataset.merchants.items():
|
| 808 |
+
self.client.push_context("merchant", mid, 1, m)
|
| 809 |
+
for tid, t in self.dataset.triggers.items():
|
| 810 |
+
self.client.push_context("trigger", tid, 1, t)
|
| 811 |
+
|
| 812 |
+
print_success("All contexts pushed")
|
| 813 |
+
|
| 814 |
+
print_section("SCORING COMPOSITIONS")
|
| 815 |
+
tids = list(self.dataset.triggers.keys())
|
| 816 |
+
|
| 817 |
+
for i in range(0, len(tids), 5):
|
| 818 |
+
batch = tids[i:i+5]
|
| 819 |
+
data, err, lat = self.client.tick(batch)
|
| 820 |
+
|
| 821 |
+
if err:
|
| 822 |
+
print_warn(f"Tick failed: {err}")
|
| 823 |
+
continue
|
| 824 |
+
|
| 825 |
+
actions = data.get("actions", [])
|
| 826 |
+
print_info(f"Batch {i//5 + 1}: {len(actions)} actions ({lat:.0f}ms)")
|
| 827 |
+
|
| 828 |
+
for action in actions:
|
| 829 |
+
self._score_and_display(action, verbose=False)
|
| 830 |
+
|
| 831 |
+
return True
|
| 832 |
+
|
| 833 |
+
def _score_and_display(self, action: Dict, verbose: bool = True):
|
| 834 |
+
"""Score an action and display results."""
|
| 835 |
+
tid = action.get("trigger_id", "")
|
| 836 |
+
mid = action.get("merchant_id", "")
|
| 837 |
+
cid = action.get("customer_id")
|
| 838 |
+
|
| 839 |
+
trigger = self.dataset.triggers.get(tid, {})
|
| 840 |
+
merchant = self.dataset.merchants.get(mid, {})
|
| 841 |
+
customer = self.dataset.customers.get(cid) if cid else None
|
| 842 |
+
category = self.dataset.categories.get(merchant.get("category_slug", ""), {})
|
| 843 |
+
|
| 844 |
+
score = self.scorer.score(action, category, merchant, trigger, customer)
|
| 845 |
+
self.all_scores.append(score)
|
| 846 |
+
|
| 847 |
+
body = action.get("body", "")[:50]
|
| 848 |
+
print(f"\n{Colors.CYAN}Message:{Colors.RESET} \"{body}...\"")
|
| 849 |
+
|
| 850 |
+
print_score_bar("Specificity", score.specificity)
|
| 851 |
+
if verbose and score.specificity_reason:
|
| 852 |
+
print_reason(score.specificity_reason)
|
| 853 |
+
|
| 854 |
+
print_score_bar("Category Fit", score.category_fit)
|
| 855 |
+
if verbose and score.category_fit_reason:
|
| 856 |
+
print_reason(score.category_fit_reason)
|
| 857 |
+
|
| 858 |
+
print_score_bar("Merchant Fit", score.merchant_fit)
|
| 859 |
+
if verbose and score.merchant_fit_reason:
|
| 860 |
+
print_reason(score.merchant_fit_reason)
|
| 861 |
+
|
| 862 |
+
print_score_bar("Decision Quality", score.decision_quality)
|
| 863 |
+
if verbose and score.decision_quality_reason:
|
| 864 |
+
print_reason(score.decision_quality_reason)
|
| 865 |
+
|
| 866 |
+
print_score_bar("Engagement", score.engagement_compulsion)
|
| 867 |
+
if verbose and score.engagement_reason:
|
| 868 |
+
print_reason(score.engagement_reason)
|
| 869 |
+
|
| 870 |
+
if score.penalties:
|
| 871 |
+
print(f" {Colors.RED}Penalties: -{score.penalties}{Colors.RESET}")
|
| 872 |
+
for r in score.penalty_reasons:
|
| 873 |
+
print_reason(r)
|
| 874 |
+
|
| 875 |
+
print(f"\n {Colors.BOLD}TOTAL: {score.total}/50{Colors.RESET}")
|
| 876 |
+
|
| 877 |
+
if verbose and score.hint:
|
| 878 |
+
print_hint(score.hint)
|
| 879 |
+
|
| 880 |
+
def _final_summary(self):
|
| 881 |
+
if not self.all_scores:
|
| 882 |
+
return
|
| 883 |
+
|
| 884 |
+
print_section("FINAL SUMMARY")
|
| 885 |
+
|
| 886 |
+
n = len(self.all_scores)
|
| 887 |
+
avg = ScoreResult(
|
| 888 |
+
specificity=sum(s.specificity for s in self.all_scores) // n,
|
| 889 |
+
category_fit=sum(s.category_fit for s in self.all_scores) // n,
|
| 890 |
+
merchant_fit=sum(s.merchant_fit for s in self.all_scores) // n,
|
| 891 |
+
decision_quality=sum(s.decision_quality for s in self.all_scores) // n,
|
| 892 |
+
engagement_compulsion=sum(s.engagement_compulsion for s in self.all_scores) // n,
|
| 893 |
+
penalties=sum(s.penalties for s in self.all_scores)
|
| 894 |
+
)
|
| 895 |
+
|
| 896 |
+
print_info(f"Messages scored: {n}\n")
|
| 897 |
+
|
| 898 |
+
print_score_bar("Avg Specificity", avg.specificity)
|
| 899 |
+
print_score_bar("Avg Category Fit", avg.category_fit)
|
| 900 |
+
print_score_bar("Avg Merchant Fit", avg.merchant_fit)
|
| 901 |
+
print_score_bar("Avg Decision Quality", avg.decision_quality)
|
| 902 |
+
print_score_bar("Avg Engagement", avg.engagement_compulsion)
|
| 903 |
+
|
| 904 |
+
total = avg.total
|
| 905 |
+
pct = (total / 50) * 100
|
| 906 |
+
|
| 907 |
+
print(f"\n{Colors.BOLD} AVERAGE SCORE: {total}/50 ({pct:.0f}%){Colors.RESET}")
|
| 908 |
+
|
| 909 |
+
if pct >= 80:
|
| 910 |
+
print(f"\n {Colors.GREEN}EXCELLENT{Colors.RESET}")
|
| 911 |
+
elif pct >= 60:
|
| 912 |
+
print(f"\n {Colors.YELLOW}GOOD{Colors.RESET}")
|
| 913 |
+
elif pct >= 40:
|
| 914 |
+
print(f"\n {Colors.YELLOW}NEEDS IMPROVEMENT{Colors.RESET}")
|
| 915 |
+
else:
|
| 916 |
+
print(f"\n {Colors.RED}BELOW EXPECTATIONS{Colors.RESET}")
|
| 917 |
+
|
| 918 |
+
# =============================================================================
|
| 919 |
+
# ENTRY POINT
|
| 920 |
+
# =============================================================================
|
| 921 |
+
|
| 922 |
+
def main():
|
| 923 |
+
print_header("magicpin AI Challenge β LLM Judge")
|
| 924 |
+
|
| 925 |
+
# Validate configuration
|
| 926 |
+
if LLM_PROVIDER != "ollama" and not LLM_API_KEY:
|
| 927 |
+
print_fail("LLM_API_KEY is not set!")
|
| 928 |
+
print_info("Edit the CONFIGURATION section at the top of this file")
|
| 929 |
+
print_info("Set your API key for your chosen provider")
|
| 930 |
+
sys.exit(1)
|
| 931 |
+
|
| 932 |
+
# Create LLM provider
|
| 933 |
+
try:
|
| 934 |
+
llm = create_provider()
|
| 935 |
+
print_info(f"LLM Provider: {llm.name()}")
|
| 936 |
+
except Exception as e:
|
| 937 |
+
print_fail(f"Failed to create LLM provider: {e}")
|
| 938 |
+
sys.exit(1)
|
| 939 |
+
|
| 940 |
+
# Test LLM connection
|
| 941 |
+
print_info("Testing LLM connection...")
|
| 942 |
+
try:
|
| 943 |
+
test_response = llm.complete("Say 'ready' if you can hear me.", "You are a test assistant.")
|
| 944 |
+
if test_response:
|
| 945 |
+
print_success("LLM connected successfully")
|
| 946 |
+
else:
|
| 947 |
+
print_fail("LLM returned empty response")
|
| 948 |
+
sys.exit(1)
|
| 949 |
+
except Exception as e:
|
| 950 |
+
print_fail(f"LLM connection failed: {e}")
|
| 951 |
+
print_info("Check your API key and internet connection")
|
| 952 |
+
sys.exit(1)
|
| 953 |
+
|
| 954 |
+
# Run the judge
|
| 955 |
+
judge = JudgeSimulator(llm)
|
| 956 |
+
success = judge.run(TEST_SCENARIO)
|
| 957 |
+
|
| 958 |
+
sys.exit(0 if success else 1)
|
| 959 |
+
|
| 960 |
+
|
| 961 |
+
if __name__ == "__main__":
|
| 962 |
+
main()
|
main.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
main.py β Vera Message Engine
|
| 3 |
+
FastAPI server implementing the exact magicpin judge harness API contract.
|
| 4 |
+
|
| 5 |
+
Endpoints (per challenge-testing-brief.md Β§2):
|
| 6 |
+
GET /v1/healthz β liveness + context counts
|
| 7 |
+
GET /v1/metadata β bot identity
|
| 8 |
+
POST /v1/context β idempotent context push (200 or 409)
|
| 9 |
+
POST /v1/tick β proactive message generation
|
| 10 |
+
POST /v1/reply β conversation reply handling
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os, time, json, logging
|
| 14 |
+
from datetime import datetime, timezone
|
| 15 |
+
from typing import Optional, Any, Dict, List
|
| 16 |
+
from contextlib import asynccontextmanager
|
| 17 |
+
|
| 18 |
+
from fastapi import FastAPI, Request
|
| 19 |
+
from fastapi.responses import JSONResponse
|
| 20 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 21 |
+
from pydantic import BaseModel, Field
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
|
| 24 |
+
load_dotenv()
|
| 25 |
+
|
| 26 |
+
import database
|
| 27 |
+
from security import check_prompt_injection, injection_response
|
| 28 |
+
from llm_pipeline import compose_tick_action, compose_reply
|
| 29 |
+
|
| 30 |
+
logging.basicConfig(
|
| 31 |
+
level=logging.INFO,
|
| 32 |
+
format="%(asctime)s | %(name)-18s | %(levelname)-5s | %(message)s",
|
| 33 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 34 |
+
)
|
| 35 |
+
logger = logging.getLogger("vera.main")
|
| 36 |
+
|
| 37 |
+
START_TIME = time.time()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# βββ Lifespan βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
|
| 42 |
+
@asynccontextmanager
|
| 43 |
+
async def lifespan(app: FastAPI):
|
| 44 |
+
logger.info("Vera Message Engine starting...")
|
| 45 |
+
database.get_db()
|
| 46 |
+
logger.info("Database ready β")
|
| 47 |
+
yield
|
| 48 |
+
database.close_db()
|
| 49 |
+
logger.info("Shutdown complete β")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
app = FastAPI(title="Vera Message Engine", version="1.0.0", lifespan=lifespan)
|
| 53 |
+
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# βββ Pydantic Models (exact judge contract) ββββββββββββββββββββββββββββββββββ
|
| 57 |
+
|
| 58 |
+
class ContextBody(BaseModel):
|
| 59 |
+
scope: str
|
| 60 |
+
context_id: str
|
| 61 |
+
version: int
|
| 62 |
+
payload: Dict[str, Any] = {}
|
| 63 |
+
delivered_at: Optional[str] = None
|
| 64 |
+
|
| 65 |
+
class TickBody(BaseModel):
|
| 66 |
+
now: str
|
| 67 |
+
available_triggers: List[str] = []
|
| 68 |
+
|
| 69 |
+
class ReplyBody(BaseModel):
|
| 70 |
+
conversation_id: str
|
| 71 |
+
merchant_id: Optional[str] = None
|
| 72 |
+
customer_id: Optional[str] = None
|
| 73 |
+
from_role: str
|
| 74 |
+
message: str
|
| 75 |
+
received_at: Optional[str] = None
|
| 76 |
+
turn_number: int
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# βββ GET /v1/healthz ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
+
|
| 81 |
+
@app.get("/v1/healthz")
|
| 82 |
+
async def healthz():
|
| 83 |
+
counts = database.count_contexts()
|
| 84 |
+
return {
|
| 85 |
+
"status": "ok",
|
| 86 |
+
"uptime_seconds": int(time.time() - START_TIME),
|
| 87 |
+
"contexts_loaded": counts,
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# βββ GET /v1/metadata ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
+
|
| 93 |
+
@app.get("/v1/metadata")
|
| 94 |
+
async def metadata():
|
| 95 |
+
return {
|
| 96 |
+
"team_name": "Vera Engine",
|
| 97 |
+
"team_members": ["Madhav Kapila"],
|
| 98 |
+
"model": "cerebras/llama3.1-8b + groq/llama-3.3-70b-versatile",
|
| 99 |
+
"approach": "Tri-Model Pipeline: Cerebras diagnostician extracts signal, Groq copywriter composes. Category voice routing. Prompt Guard security shield.",
|
| 100 |
+
"contact_email": "madhav@example.com",
|
| 101 |
+
"version": "1.0.0",
|
| 102 |
+
"submitted_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# βββ POST /v1/context ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 107 |
+
|
| 108 |
+
@app.post("/v1/context")
|
| 109 |
+
async def push_context(body: ContextBody):
|
| 110 |
+
scope = body.scope.lower().strip()
|
| 111 |
+
valid_scopes = {"category", "merchant", "customer", "trigger"}
|
| 112 |
+
if scope not in valid_scopes:
|
| 113 |
+
return JSONResponse(status_code=400, content={
|
| 114 |
+
"accepted": False, "reason": "invalid_scope",
|
| 115 |
+
"details": f"scope must be one of {valid_scopes}"
|
| 116 |
+
})
|
| 117 |
+
|
| 118 |
+
result = database.upsert_context(
|
| 119 |
+
scope=scope, context_id=body.context_id,
|
| 120 |
+
version=body.version, payload=body.payload,
|
| 121 |
+
delivered_at=body.delivered_at,
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
status_code = result.pop("status_code", 200)
|
| 125 |
+
if status_code == 409:
|
| 126 |
+
return JSONResponse(status_code=409, content=result)
|
| 127 |
+
return result
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# βββ POST /v1/tick ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
+
|
| 132 |
+
@app.post("/v1/tick")
|
| 133 |
+
async def tick(body: TickBody):
|
| 134 |
+
actions = []
|
| 135 |
+
|
| 136 |
+
for trig_id in body.available_triggers:
|
| 137 |
+
try:
|
| 138 |
+
# Fetch trigger context
|
| 139 |
+
trig_ctx = database.get_context("trigger", trig_id)
|
| 140 |
+
if not trig_ctx:
|
| 141 |
+
logger.warning("Trigger %s not found in DB, skipping", trig_id)
|
| 142 |
+
continue
|
| 143 |
+
trigger = trig_ctx["payload"]
|
| 144 |
+
|
| 145 |
+
# Fetch merchant context
|
| 146 |
+
merchant_id = trigger.get("merchant_id")
|
| 147 |
+
if not merchant_id:
|
| 148 |
+
continue
|
| 149 |
+
merch_ctx = database.get_context("merchant", merchant_id)
|
| 150 |
+
if not merch_ctx:
|
| 151 |
+
logger.warning("Merchant %s not found for trigger %s", merchant_id, trig_id)
|
| 152 |
+
continue
|
| 153 |
+
merchant = merch_ctx["payload"]
|
| 154 |
+
|
| 155 |
+
# Fetch category context
|
| 156 |
+
cat_slug = merchant.get("category_slug", "")
|
| 157 |
+
cat_ctx = database.get_context("category", cat_slug)
|
| 158 |
+
category = cat_ctx["payload"] if cat_ctx else {"slug": cat_slug}
|
| 159 |
+
|
| 160 |
+
# Fetch customer context if customer-scoped
|
| 161 |
+
customer = None
|
| 162 |
+
customer_id = trigger.get("customer_id")
|
| 163 |
+
if customer_id:
|
| 164 |
+
cust_ctx = database.get_context("customer", customer_id)
|
| 165 |
+
if cust_ctx:
|
| 166 |
+
customer = cust_ctx["payload"]
|
| 167 |
+
|
| 168 |
+
# Run the compose pipeline
|
| 169 |
+
action = compose_tick_action(category, merchant, trigger, customer)
|
| 170 |
+
|
| 171 |
+
if action and action.get("body"):
|
| 172 |
+
actions.append(action)
|
| 173 |
+
# Log the bot's outbound message
|
| 174 |
+
database.append_turn(
|
| 175 |
+
action["conversation_id"], 1, "vera", action["body"]
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
except Exception as e:
|
| 179 |
+
logger.error("Error processing trigger %s: %s", trig_id, e, exc_info=True)
|
| 180 |
+
continue
|
| 181 |
+
|
| 182 |
+
return {"actions": actions}
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# βββ POST /v1/reply ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 186 |
+
|
| 187 |
+
@app.post("/v1/reply")
|
| 188 |
+
async def reply(body: ReplyBody):
|
| 189 |
+
# Security Shield: check for prompt injection FIRST
|
| 190 |
+
is_safe = check_prompt_injection(body.message)
|
| 191 |
+
if not is_safe:
|
| 192 |
+
logger.warning("Injection blocked: conv=%s", body.conversation_id)
|
| 193 |
+
database.append_turn(body.conversation_id, body.turn_number, body.from_role, "[BLOCKED]")
|
| 194 |
+
return injection_response()
|
| 195 |
+
|
| 196 |
+
# Log the inbound message
|
| 197 |
+
database.append_turn(
|
| 198 |
+
body.conversation_id, body.turn_number, body.from_role, body.message
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
# Fetch merchant context
|
| 202 |
+
merchant = {}
|
| 203 |
+
category = {}
|
| 204 |
+
if body.merchant_id:
|
| 205 |
+
merch_ctx = database.get_context("merchant", body.merchant_id)
|
| 206 |
+
if merch_ctx:
|
| 207 |
+
merchant = merch_ctx["payload"]
|
| 208 |
+
cat_slug = merchant.get("category_slug", "")
|
| 209 |
+
cat_ctx = database.get_context("category", cat_slug)
|
| 210 |
+
category = cat_ctx["payload"] if cat_ctx else {"slug": cat_slug}
|
| 211 |
+
|
| 212 |
+
# Fetch conversation history
|
| 213 |
+
conv_history = database.get_conversation(body.conversation_id)
|
| 214 |
+
|
| 215 |
+
# Fetch customer context if provided
|
| 216 |
+
customer = None
|
| 217 |
+
if body.customer_id:
|
| 218 |
+
cust_ctx = database.get_context("customer", body.customer_id)
|
| 219 |
+
if cust_ctx:
|
| 220 |
+
customer = cust_ctx["payload"]
|
| 221 |
+
|
| 222 |
+
# Run compose pipeline in reply mode
|
| 223 |
+
try:
|
| 224 |
+
result = compose_reply(
|
| 225 |
+
merchant=merchant, category=category, message=body.message,
|
| 226 |
+
conversation_history=conv_history, customer=customer,
|
| 227 |
+
)
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error("Reply compose error: %s", e, exc_info=True)
|
| 230 |
+
result = {"action": "send", "body": "Got it β let me look into that for you.",
|
| 231 |
+
"cta": "open_ended", "rationale": "Fallback due to processing error"}
|
| 232 |
+
|
| 233 |
+
# Log bot's response
|
| 234 |
+
if result.get("body"):
|
| 235 |
+
database.append_turn(
|
| 236 |
+
body.conversation_id, body.turn_number + 1, "vera", result["body"]
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
return result
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# βββ POST /v1/teardown (optional per spec Β§11) ββββββββββββββββββββββββββββββ
|
| 243 |
+
|
| 244 |
+
@app.post("/v1/teardown")
|
| 245 |
+
async def teardown():
|
| 246 |
+
database.wipe_all()
|
| 247 |
+
return {"status": "wiped"}
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
# βββ Payload Size Guard βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
+
|
| 252 |
+
@app.middleware("http")
|
| 253 |
+
async def payload_guard(request: Request, call_next):
|
| 254 |
+
cl = request.headers.get("content-length")
|
| 255 |
+
if cl and int(cl) > 512_000:
|
| 256 |
+
return JSONResponse(status_code=413, content={"error": "Payload too large"})
|
| 257 |
+
return await call_next(request)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
# βββ Global Error Handler βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 261 |
+
|
| 262 |
+
@app.exception_handler(Exception)
|
| 263 |
+
async def catch_all(request: Request, exc: Exception):
|
| 264 |
+
logger.error("Unhandled: %s", exc, exc_info=True)
|
| 265 |
+
return JSONResponse(status_code=500, content={"error": str(exc)})
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
if __name__ == "__main__":
|
| 269 |
+
import uvicorn
|
| 270 |
+
port = int(os.getenv("PORT", "8000"))
|
| 271 |
+
uvicorn.run("main:app", host="0.0.0.0", port=port, log_level="info", workers=1)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
pydantic
|
| 4 |
+
python-dotenv
|
| 5 |
+
requests
|
security.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
security.py β Vera Message Engine
|
| 3 |
+
Prompt Guard middleware using Groq's meta-llama/llama-prompt-guard-2-86m.
|
| 4 |
+
|
| 5 |
+
Pillar 4: Security Shield
|
| 6 |
+
- Every inbound /v1/reply message passes through Prompt Guard BEFORE
|
| 7 |
+
touching the DB or invoking any main LLM.
|
| 8 |
+
- On injection detection β return {"action": "end", "rationale": "Security violation detected."}
|
| 9 |
+
- Fail-open on API errors (logged) to avoid blocking legitimate requests.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import json
|
| 14 |
+
import logging
|
| 15 |
+
import requests
|
| 16 |
+
from typing import Dict, Any
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger("vera.security")
|
| 19 |
+
|
| 20 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
| 21 |
+
GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"
|
| 22 |
+
GUARD_MODEL = "meta-llama/llama-prompt-guard-2-86m"
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def check_prompt_injection(text: str) -> bool:
|
| 26 |
+
"""
|
| 27 |
+
Run inbound text through Groq Prompt Guard.
|
| 28 |
+
Returns True if the text is SAFE, False if injection detected.
|
| 29 |
+
"""
|
| 30 |
+
if not text or not text.strip():
|
| 31 |
+
return True # Empty is safe
|
| 32 |
+
|
| 33 |
+
if not GROQ_API_KEY:
|
| 34 |
+
logger.warning("GROQ_API_KEY not set β prompt guard DISABLED (fail-open)")
|
| 35 |
+
return True
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
resp = requests.post(
|
| 39 |
+
GROQ_CHAT_URL,
|
| 40 |
+
headers={
|
| 41 |
+
"Authorization": f"Bearer {GROQ_API_KEY}",
|
| 42 |
+
"Content-Type": "application/json",
|
| 43 |
+
},
|
| 44 |
+
json={
|
| 45 |
+
"model": GUARD_MODEL,
|
| 46 |
+
"messages": [{"role": "user", "content": text}],
|
| 47 |
+
"temperature": 0.0,
|
| 48 |
+
"max_tokens": 32,
|
| 49 |
+
},
|
| 50 |
+
timeout=8,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
if resp.status_code != 200:
|
| 54 |
+
logger.error("Prompt Guard returned %d: %s", resp.status_code, resp.text[:300])
|
| 55 |
+
return True # Fail open
|
| 56 |
+
|
| 57 |
+
data = resp.json()
|
| 58 |
+
guard_output = (
|
| 59 |
+
data.get("choices", [{}])[0]
|
| 60 |
+
.get("message", {})
|
| 61 |
+
.get("content", "")
|
| 62 |
+
.strip()
|
| 63 |
+
.lower()
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Check for injection indicators
|
| 67 |
+
injection_words = ["unsafe", "injection", "jailbreak", "malicious", "attack", "yes"]
|
| 68 |
+
safe_words = ["safe", "benign", "clean", "no injection", "legitimate", "no"]
|
| 69 |
+
|
| 70 |
+
for w in safe_words:
|
| 71 |
+
if w in guard_output:
|
| 72 |
+
return True
|
| 73 |
+
for w in injection_words:
|
| 74 |
+
if w in guard_output:
|
| 75 |
+
logger.warning("INJECTION DETECTED: %s β guard said: %s", text[:100], guard_output)
|
| 76 |
+
return False
|
| 77 |
+
|
| 78 |
+
return True # Ambiguous β fail open
|
| 79 |
+
|
| 80 |
+
except requests.Timeout:
|
| 81 |
+
logger.warning("Prompt Guard timed out β fail-open")
|
| 82 |
+
return True
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.error("Prompt Guard error: %s", str(e))
|
| 85 |
+
return True
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def injection_response() -> Dict[str, Any]:
|
| 89 |
+
"""Return the security violation response per judge contract."""
|
| 90 |
+
return {
|
| 91 |
+
"action": "end",
|
| 92 |
+
"rationale": "Security violation detected.",
|
| 93 |
+
}
|