Spaces:

ucGroupProj
/

Agentic-Chat-bot

Sleeping

App Files Files Community

JerameeUC commited on Sep 28, 2025

Commit

ecbc643

1 Parent(s): 071c820

7th Commit - All place holder code added.

Browse files

Files changed (35) hide show

FLATTENED_CODE.txt +0 -0
Makefile +59 -2
anon_bot/handler.py +41 -2
anon_bot/rules.py +90 -1
docs/architecture.md +72 -1
docs/design.md +71 -1
docs/flowchart.png +0 -0
examples/example-dev.py +36 -0
examples/example.py +59 -5
integrations/azure/bot_framework.py +39 -2
integrations/email/ticket_stub.py +56 -1
logged_in_bot/sentiment_azure.py +187 -0
logged_in_bot/tools.py +224 -0
memory/rag/indexer.py +343 -0
memory/rag/retriever.py +267 -0
memory/sessions.py +243 -0
memory/store.py +143 -1
nlu/pipeline.py +75 -1
nlu/prompts.py +77 -0
nlu/router.py +142 -0
requirements-dev.txt +7 -0
requirements-ml.txt +7 -0
requirements.txt +10 -13
scripts/check_compliance.py +79 -1
scripts/run_local.sh +43 -3
scripts/seed_data.py +92 -1
tests/test_anon_bot.py +119 -1
tests/test_guardrails.py +39 -1
tests/test_indexer.py +24 -0
tests/test_logged_in_bot.py +83 -1
tests/test_memory.py +94 -1
tests/test_nlu.py +45 -1
tests/test_retriever.py +34 -0
tests/test_sessions.py +21 -0
tree.txt +4 -5

FLATTENED_CODE.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

Makefile CHANGED Viewed

@@ -1,11 +1,68 @@
-.PHONY: dev test run seed check
 dev:
 	pip install -r requirements.txt
 test:
-	pytest -q
 run:
 	export PYTHONPATH=. && python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"
 seed:
 	python storefront_chatbot/scripts/seed_data.py
 check:
 	python storefront_chatbot/scripts/check_compliance.py

+.PHONY: dev ml dev-deps example example-dev test run seed check lint fmt typecheck clean serve all ci coverage docker-build docker-run
+# --- setup ---
 dev:
 	pip install -r requirements.txt
+ml:
+	pip install -r requirements-ml.txt
+dev-deps:
+	pip install -r requirements-dev.txt
+# --- one-stop local env + tests ---
+example-dev: dev dev-deps
+	pytest
+	@echo "✅ Dev environment ready. Try 'make example' to run the CLI demo."
+# --- tests & coverage ---
 test:
+	pytest
+coverage:
+	pytest --cov=storefront_chatbot --cov-report=term-missing
+# --- run app ---
 run:
 	export PYTHONPATH=. && python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"
+# --- example demo ---
+example:
+	export PYTHONPATH=. && python example/example.py "hello world"
+# --- data & checks ---
 seed:
 	python storefront_chatbot/scripts/seed_data.py
 check:
 	python storefront_chatbot/scripts/check_compliance.py
+# --- quality gates ---
+lint:
+	flake8 storefront_chatbot
+fmt:
+	black .
+	isort .
+typecheck:
+	mypy .
+# --- hygiene ---
+clean:
+	find . -type d -name "__pycache__" -exec rm -rf {} +
+	rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage
+serve:
+	export PYTHONPATH=. && uvicorn storefront_chatbot.app.app:build --reload --host 0.0.0.0 --port 7860
+# --- docker (optional) ---
+docker-build:
+	docker build -t storefront-chatbot .
+docker-run:
+	docker run -p 7860:7860 storefront-chatbot
+# --- bundles ---
+all: clean check test
+ci: lint typecheck coverage

anon_bot/handler.py CHANGED Viewed

@@ -1,3 +1,42 @@
-# /anon_bot/handler.py
-def handle_turn(m,h,u): return (h or [])+[[m,'hi']]

+# anon_bot/handler.py
+"""
+Stateless(ish) turn handler for the anonymous chatbot.
+Signature kept tiny: handle_turn(message, history, user) -> new_history
+- message: str (user text)
+- history: list of [speaker, text] or None
+- user: dict-like info (ignored here, but accepted for compatibility)
+"""
+from __future__ import annotations
+from typing import List, Tuple, Any
+from . import rules
+History = List[Tuple[str, str]]  # [("user","..."), ("bot","...")]
+def _coerce_history(h: Any) -> History:
+    if not h:
+        return []
+    # normalize to tuple pairs
+    out: History = []
+    for item in h:
+        try:
+            who, text = item[0], item[1]
+        except Exception:
+            continue
+        out.append((str(who), str(text)))
+    return out
+def handle_turn(message: str, history: History | None, user: dict | None) -> History:
+    hist = _coerce_history(history)
+    user_text = (message or "").strip()
+    if user_text:
+        hist.append(("user", user_text))
+    rep = rules.reply_for(user_text, hist)
+    hist.append(("bot", rep.text))
+    return hist
+# Convenience: one-shot string→string (used by plain JSON endpoints)
+def handle_text(message: str, history: History | None = None) -> str:
+    new_hist = handle_turn(message, history, user=None)
+    # last item is bot reply
+    return new_hist[-1][1] if new_hist else ""

anon_bot/rules.py CHANGED Viewed

	@@ -1 +1,90 @@
1	- # /anon_bot/rules.py

+# anon_bot/rules.py
+"""
+Lightweight rule set for an anonymous chatbot.
+No external providers required. Pure-Python, deterministic.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, List, Tuple
+# ---- Types ----
+History = List[Tuple[str, str]]  # e.g., [("user","hi"), ("bot","hello!")]
+@dataclass(frozen=True)
+class Reply:
+    text: str
+    meta: Dict[str, str] | None = None
+def normalize(s: str) -> str:
+    return " ".join((s or "").strip().split()).lower()
+def capabilities() -> List[str]:
+    return [
+        "help",
+        "reverse <text>",
+        "echo <text>",
+        "small talk (hi/hello/hey)",
+    ]
+def intent_of(text: str) -> str:
+    t = normalize(text)
+    if not t:
+        return "empty"
+    if t in {"help", "/help", "capabilities"}:
+        return "help"
+    if t.startswith("reverse "):
+        return "reverse"
+    if t.startswith("echo "):
+        return "echo"
+    if t in {"hi", "hello", "hey"}:
+        return "greet"
+    return "chat"
+def handle_help() -> Reply:
+    lines = ["I can:"]
+    for c in capabilities():
+        lines.append(f"- {c}")
+    return Reply("\n".join(lines))
+def handle_reverse(t: str) -> Reply:
+    payload = t.split(" ", 1)[1] if " " in t else ""
+    return Reply(payload[::-1] if payload else "(nothing to reverse)")
+def handle_echo(t: str) -> Reply:
+    payload = t.split(" ", 1)[1] if " " in t else ""
+    return Reply(payload or "(nothing to echo)")
+def handle_greet() -> Reply:
+    return Reply("Hello! 👋  Type 'help' to see what I can do.")
+def handle_chat(t: str, history: History) -> Reply:
+    # Very simple “ELIZA-ish” fallback.
+    if "help" in t:
+        return handle_help()
+    if "you" in t and "who" in t:
+        return Reply("I'm a tiny anonymous chatbot kernel.")
+    return Reply("Noted. (anonymous mode)  Type 'help' for commands.")
+def reply_for(text: str, history: History) -> Reply:
+    it = intent_of(text)
+    if it == "empty":
+        return Reply("Please type something. Try 'help'.")
+    if it == "help":
+        return handle_help()
+    if it == "reverse":
+        return handle_reverse(text)
+    if it == "echo":
+        return handle_echo(text)
+    if it == "greet":
+        return handle_greet()
+    return handle_chat(text.lower(), history)

docs/architecture.md CHANGED Viewed

@@ -1,2 +1,73 @@
 <!-- /docs/slides/architecture.md -->
-# Architecture\n\nShort explainer tied to the flowchart.\n

 <!-- /docs/slides/architecture.md -->
+# Architecture
+This system follows a **modular chatbot architecture** built around a clear flow of data from the user interface to external services and back. The design emphasizes separation of concerns, allowing each module to handle a specific responsibility while keeping the overall system simple to test and extend.
+---
+## High-Level Flow (tied to flowchart)
+1. **User Interface (UI)**
+   - The entry point for user interaction.
+   - Implemented through a web client (e.g., Gradio, HTML templates, or API endpoint).
+   - Captures user input and displays bot responses.
+2. **Router / Core Logic**
+   - Handles conversation state and routes messages.
+   - Delegates to either the anonymous bot, logged-in bot, or agentic extensions.
+   - Imports lightweight rules from `anon_bot/rules.py` for anonymous sessions, and integrates with advanced providers for logged-in sessions.
+3. **NLU (Natural Language Understanding)**
+   - Managed by the `nlu/` pipeline (intent recognition, prompts, and routing).
+   - Provides preprocessing, normalization, and optional summarization/RAG.
+   - Keeps the system extensible for additional models without changing the rest of the stack.
+4. **Memory & Context Layer**
+   - Implemented in `memory/` (sessions, store, and optional RAG retriever/indexer).
+   - Stores session history, enabling context-aware responses.
+   - Supports modular backends (in-memory, file-based, or vector index).
+5. **External AI Service Connector (optional)**
+   - For logged-in flows, integrates with cloud AIaaS (e.g., Azure, HuggingFace, or open-source LLMs).
+   - Uses `logged_in_bot/sentiment_azure.py` or `agenticcore/providers_unified.py`.
+   - Provides NLP services like sentiment analysis or summarization.
+   - Disabled in anonymous mode for privacy.
+6. **Guardrails & Safety**
+   - Defined in `guardrails/` (PII redaction, safety filters).
+   - Applied before responses are shown to the user.
+   - Ensures compliance with privacy/security requirements.
+7. **Outputs**
+   - Bot response returned to the UI.
+   - Logs written via `core/logging.py` for traceability and debugging.
+   - Optional screenshots and reports recorded for evaluation.
+---
+## Key Principles
+- **Modularity**: Each part of the flow is a self-contained module (UI, NLU, memory, guardrails).
+- **Swap-in Providers**: Agentic core can switch between local rules, RAG memory, or external APIs.
+- **Anonymous vs Logged-In**: Anonymous bot uses lightweight rules with no external calls; logged-in bot can call providers.
+- **Extensibility**: Flowchart design makes it easy to add summarization, conversation modes, or other “agentic” behaviors without rewriting the core.
+- **Resilience**: If an external service fails, the system degrades gracefully to local responses.
+---
+## Mapping to Repo Structure
+- `app/` → User-facing entrypoint (routes, HTML, API).
+- `anon_bot/` → Anonymous chatbot rules + handler.
+- `logged_in_bot/` → Provider-based flows for authenticated users.
+- `nlu/` → Intent routing, prompts, pipeline.
+- `memory/` → Session management + RAG integration.
+- `guardrails/` → Safety filters + PII redaction.
+- `agenticcore/` → Core integration logic and unified providers.
+- `docs/flowchart.png` → Visual representation of this architecture.
+---
+## Summary
+The architecture ensures a **clean separation between interface, logic, and services**, enabling experimentation with different providers while guaranteeing a safe, privacy-friendly anonymous mode. The flowchart illustrates this layered approach: input → logic → NLU/memory → optional AIaaS → guardrails → output.

docs/design.md CHANGED Viewed

@@ -1,2 +1,72 @@
 <!-- /docs/slides/design.md -->
-# Design notes\n\nAPI notes, security, tradeoffs.\n

 <!-- /docs/slides/design.md -->
+# Design Notes
+These notes document the reasoning behind major design choices, focusing on **API usage**, **security considerations**, and **tradeoffs** made during development.
+---
+## API Notes
+- **Anonymous vs Logged-In Flows**
+  - The **anonymous chatbot** relies purely on local rules (`anon_bot/rules.py`) and does not call any external services.
+  - The **logged-in chatbot** integrates with external AIaaS endpoints (e.g., Azure, HuggingFace, or other NLP providers) via modules in `logged_in_bot/` and `agenticcore/providers_unified.py`.
+- **Endpoints**
+  - `/plain-chat` → Anonymous flow; maps to `logic.handle_text`.
+  - `/api/messages` → For framework compatibility (e.g., BotFramework or FastAPI demo).
+  - `/healthz` → Lightweight health check for monitoring.
+- **NLU Pipeline**
+  - Intent routing (`nlu/router.py`) determines if user input should be treated as a direct command, a small-talk message, or passed to providers.
+  - Prompts and transformations are managed in `nlu/prompts.py` to centralize natural language templates.
+- **Memory Integration**
+  - Session memory stored in `memory/sessions.py`.
+  - Optional RAG indexer (`memory/rag/indexer.py`) allows document retrieval for extended context.
+---
+## Security Considerations
+- **API Keys**
+  - Keys for external services are never hard-coded.
+  - They are pulled from environment variables or `.env` files (via `core/config.py`).
+- **Data Handling**
+  - Anonymous mode never sends user text outside the local process.
+  - Logged-in mode applies guardrails before making external calls.
+  - Sensitive information (emails, IDs) is redacted using `guardrails/pii_redaction.py`.
+- **Logging**
+  - Logs are structured (`core/logging.py`) and omit sensitive data by default.
+  - Debug mode can be enabled for local testing but should not be used in production.
+- **Privacy**
+  - Anonymous sessions are ephemeral: conversation state is stored only in memory unless explicitly persisted.
+  - Logged-in sessions may optionally persist data, but only with user consent.
+---
+## Tradeoffs
+- **Rule-Based vs AI-Powered**
+  - Rule-based responses are deterministic, fast, and private but limited in sophistication.
+  - AI-powered responses (via providers) allow richer understanding but introduce latency, costs, and privacy risks.
+- **Extensibility vs Simplicity**
+  - Chose a **modular repo structure** (separate folders for `anon_bot`, `logged_in_bot`, `memory`, `nlu`) to allow future growth.
+  - This adds some boilerplate overhead but makes it easier to swap components.
+- **Performance vs Accuracy**
+  - Non-functional requirement: responses within 2 seconds for 95% of requests.
+  - This meant prioritizing lightweight providers and caching over heavyweight models.
+- **Anonymous Mode as Default**
+  - Defaulting to anonymous mode ensures the system works offline and avoids external dependencies.
+  - Tradeoff: limits functionality until the user explicitly opts in for a logged-in session.
+---
+## Summary
+The design balances **privacy, modularity, and extensibility**. By cleanly separating anonymous and logged-in paths, the system can run entirely offline while still supporting richer AI features when configured. Security and privacy are first-class concerns, and tradeoffs were made to keep the system lightweight, testable, and compliant with project constraints.

docs/flowchart.png DELETED Viewed

examples/example-dev.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# /example/example-dev.py
+"""
+Dev environment sanity example.
+- Imports ChatBot
+- Sends a test message
+- Prints the JSON reply
+- Confirms basic dependencies work
+Usage:
+    python example/example-dev.py
+"""
+import json
+import sys
+try:
+    from agenticcore.chatbot.services import ChatBot
+except ImportError as e:
+    print("❌ Could not import ChatBot. Did you set PYTHONPATH or install dependencies?")
+    sys.exit(1)
+def main():
+    bot = ChatBot()
+    msg = "Hello from example-dev!"
+    result = bot.reply(msg)
+    print("✅ Dev environment is working")
+    print("Input:", msg)
+    print("Reply JSON:")
+    print(json.dumps(result, indent=2))
+if __name__ == "__main__":
+    main()

examples/example.py CHANGED Viewed

@@ -1,9 +1,63 @@
 # /example/example.py
-"""Simple CLI example that sends a message to the ChatBot and prints the JSON reply."""
 import json
-from agenticcore.chatbot.services import ChatBot
 if __name__ == "__main__":
-    bot = ChatBot()
-    result = bot.reply("hello world")
-    print(json.dumps(result, indent=2))

 # /example/example.py
+"""
+Simple CLI/REPL example for the ChatBot.
+Usage:
+    python example/example.py "hello world"
+    python example/example.py        # enters interactive mode
+"""
+import argparse
 import json
+import sys
+try:
+    from agenticcore.chatbot.services import ChatBot
+except ImportError as e:
+    print("❌ Could not import ChatBot. Did you set PYTHONPATH or install agenticcore?")
+    sys.exit(1)
+def main():
+    parser = argparse.ArgumentParser(description="ChatBot CLI/REPL example")
+    parser.add_argument(
+        "message",
+        nargs="*",
+        help="Message to send. Leave empty to start interactive mode.",
+    )
+    args = parser.parse_args()
+    try:
+        bot = ChatBot()
+    except Exception as e:
+        print(f"❌ Failed to initialize ChatBot: {e}")
+        sys.exit(1)
+    if args.message:
+        # One-shot mode
+        msg = " ".join(args.message)
+        result = bot.reply(msg)
+        print(json.dumps(result, indent=2))
+    else:
+        # Interactive REPL
+        print("💬 Interactive mode. Type 'quit' or 'exit' to stop.")
+        while True:
+            try:
+                msg = input("> ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\n👋 Exiting.")
+                break
+            if msg.lower() in {"quit", "exit"}:
+                print("👋 Goodbye.")
+                break
+            if not msg:
+                continue
+            result = bot.reply(msg)
+            print(json.dumps(result, indent=2))
 if __name__ == "__main__":
+    main()

integrations/azure/bot_framework.py CHANGED Viewed

@@ -1,2 +1,39 @@
-# /intergrations/azure/bot_framework.py
-# Azure Bot Framework (placeholder)

+# integrations/azure/bot_framework.py
+"""
+Azure Bot Framework integration (stub).
+This module is a placeholder for connecting the chatbot
+to Microsoft Azure Bot Framework. It is optional —
+the anonymous bot does not depend on this code.
+If you want to enable Azure:
+    1. Install `botbuilder` SDK (pip install botbuilder-core aiohttp).
+    2. Fill in the adapter setup and message handling below.
+"""
+from typing import Any, Dict
+class AzureBotFrameworkNotConfigured(Exception):
+    """Raised when Azure Bot Framework is called but not set up."""
+def init_adapter(config: Dict[str, Any] | None = None):
+    """
+    Placeholder for BotFrameworkAdapter initialization.
+    Returns a dummy object unless replaced with actual Azure code.
+    """
+    raise AzureBotFrameworkNotConfigured(
+        "Azure Bot Framework integration is not configured. "
+        "Use anon_bot for local testing."
+    )
+def handle_activity(activity: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Placeholder for handling an incoming Bot Framework activity.
+    Echoes back a dummy response if called directly.
+    """
+    if not activity:
+        return {"type": "message", "text": "(no activity received)"}
+    return {"type": "message", "text": f"Echo: {activity.get('text', '')}"}

integrations/email/ticket_stub.py CHANGED Viewed

@@ -1,2 +1,57 @@
 # /intergrations/email/ticket_stub.py
-# Email ticket stub (placeholder)

 # /intergrations/email/ticket_stub.py
+"""
+Email / Ticket System Stub.
+This module simulates creating a support ticket via email.
+It is a placeholder — no actual emails are sent.
+"""
+from typing import Dict, Any
+import datetime
+import uuid
+class TicketStub:
+    """
+    A stub ticketing system that generates a fake ticket ID
+    and stores basic info in memory.
+    """
+    def __init__(self):
+        self.tickets: Dict[str, Dict[str, Any]] = {}
+    def create_ticket(self, subject: str, body: str, user: str | None = None) -> Dict[str, Any]:
+        """
+        Create a fake support ticket.
+        Returns a dictionary with ticket metadata.
+        """
+        ticket_id = str(uuid.uuid4())
+        ticket = {
+            "id": ticket_id,
+            "subject": subject,
+            "body": body,
+            "user": user or "anonymous",
+            "created_at": datetime.datetime.utcnow().isoformat() + "Z",
+            "status": "open",
+        }
+        self.tickets[ticket_id] = ticket
+        return ticket
+    def get_ticket(self, ticket_id: str) -> Dict[str, Any] | None:
+        """Retrieve a ticket by ID if it exists."""
+        return self.tickets.get(ticket_id)
+    def list_tickets(self) -> list[Dict[str, Any]]:
+        """Return all created tickets."""
+        return list(self.tickets.values())
+# Singleton for convenience
+stub = TicketStub()
+def create_ticket(subject: str, body: str, user: str | None = None) -> Dict[str, Any]:
+    """
+    Module-level shortcut.
+    """
+    return stub.create_ticket(subject, body, user)

logged_in_bot/sentiment_azure.py CHANGED Viewed

	@@ -1 +1,188 @@
1	# /logged_in_bot/sentiment_azure.py

 # /logged_in_bot/sentiment_azure.py
+"""
+Optional Azure Sentiment integration with safe local fallback.
+Usage:
+    from logged_in_bot.sentiment_azure import analyze_sentiment, SentimentResult
+    res = analyze_sentiment("I love this!")
+    print(res.label, res.score, res.backend)  # e.g., "positive", 0.92, "local"
+Environment (Azure path only):
+    - AZURE_LANGUAGE_ENDPOINT or MICROSOFT_AI_ENDPOINT
+    - AZURE_LANGUAGE_KEY or MICROSOFT_AI_KEY
+If the Azure SDK or env vars are missing, we automatically fall back to a
+deterministic, dependency-free heuristic that is fast and good enough for tests.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional, Tuple
+import os
+import re
+# ---------------------------
+# Public dataclass & API
+# ---------------------------
+@dataclass(frozen=True)
+class SentimentResult:
+    label: str           # "positive" | "neutral" | "negative"
+    score: float         # 0.0 .. 1.0 (confidence-like)
+    backend: str         # "azure" | "local"
+    raw: Optional[dict] = None  # provider raw payload if available
+def analyze_sentiment(text: str) -> SentimentResult:
+    """
+    Analyze sentiment using Azure if configured, otherwise use local heuristic.
+    Never raises on normal use — returns a result even if Azure is misconfigured,
+    satisfying 'graceful degradation' requirements.
+    """
+    text = (text or "").strip()
+    if not text:
+        return SentimentResult(label="neutral", score=0.5, backend="local", raw={"reason": "empty"})
+    # Try Azure first (only if fully configured and package available)
+    azure_ready, why = _is_azure_ready()
+    if azure_ready:
+        try:
+            return _azure_sentiment(text)
+        except Exception as e:
+            # Degrade gracefully to local
+            return _local_sentiment(text, note=f"azure_error: {e!r}")
+    else:
+        # Go local immediately
+        return _local_sentiment(text, note=why)
+# ---------------------------
+# Azure path (optional)
+# ---------------------------
+def _is_azure_ready() -> Tuple[bool, str]:
+    """
+    Check env + optional SDK presence without importing heavy modules unless needed.
+    """
+    endpoint = os.getenv("AZURE_LANGUAGE_ENDPOINT") or os.getenv("MICROSOFT_AI_ENDPOINT")
+    key = os.getenv("AZURE_LANGUAGE_KEY") or os.getenv("MICROSOFT_AI_KEY")
+    if not endpoint or not key:
+        return False, "missing_env"
+    try:
+        # Light import check
+        import importlib
+        client_mod = importlib.import_module("azure.ai.textanalytics")
+        cred_mod = importlib.import_module("azure.core.credentials")
+        # Quick sanity on expected attributes
+        getattr(client_mod, "TextAnalyticsClient")
+        getattr(cred_mod, "AzureKeyCredential")
+    except Exception:
+        return False, "sdk_not_installed"
+    return True, "ok"
+def _azure_sentiment(text: str) -> SentimentResult:
+    """
+    Call Azure Text Analytics (Sentiment). Requires:
+      pip install azure-ai-textanalytics
+    """
+    from azure.ai.textanalytics import TextAnalyticsClient
+    from azure.core.credentials import AzureKeyCredential
+    endpoint = os.getenv("AZURE_LANGUAGE_ENDPOINT") or os.getenv("MICROSOFT_AI_ENDPOINT")
+    key = os.getenv("AZURE_LANGUAGE_KEY") or os.getenv("MICROSOFT_AI_KEY")
+    client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
+    # API expects a list of documents
+    resp = client.analyze_sentiment(documents=[text], show_opinion_mining=False)
+    doc = resp[0]
+    # Map Azure scores to our schema
+    label = (doc.sentiment or "neutral").lower()
+    # Choose max score among pos/neu/neg as "confidence-like"
+    score_map = {
+        "positive": doc.confidence_scores.positive,
+        "neutral": doc.confidence_scores.neutral,
+        "negative": doc.confidence_scores.negative,
+    }
+    score = float(score_map.get(label, max(score_map.values())))
+    raw = {
+        "sentiment": doc.sentiment,
+        "confidence_scores": {
+            "positive": doc.confidence_scores.positive,
+            "neutral": doc.confidence_scores.neutral,
+            "negative": doc.confidence_scores.negative,
+        },
+    }
+    return SentimentResult(label=label, score=score, backend="azure", raw=raw)
+# ---------------------------
+# Local fallback (no deps)
+# ---------------------------
+_POSITIVE = {
+    "good", "great", "love", "excellent", "amazing", "awesome", "happy",
+    "wonderful", "fantastic", "like", "enjoy", "cool", "nice", "positive",
+}
+_NEGATIVE = {
+    "bad", "terrible", "hate", "awful", "horrible", "sad", "angry",
+    "worse", "worst", "broken", "bug", "issue", "problem", "negative",
+}
+# Simple negation tokens to flip nearby polarity
+_NEGATIONS = {"not", "no", "never", "n't"}
+_WORD_RE = re.compile(r"[A-Za-z']+")
+def _local_sentiment(text: str, note: str | None = None) -> SentimentResult:
+    """
+    Tiny lexicon + negation heuristic:
+      - Tokenize letters/apostrophes
+      - Score +1 for positive, -1 for negative
+      - If a negation appears within the previous 3 tokens, flip the sign
+      - Convert final score to pseudo-confidence 0..1
+    """
+    tokens = [t.lower() for t in _WORD_RE.findall(text)]
+    score = 0
+    for i, tok in enumerate(tokens):
+        window_neg = any(t in _NEGATIONS for t in tokens[max(0, i - 3):i])
+        if tok in _POSITIVE:
+            score += -1 if window_neg else 1
+        elif tok in _NEGATIVE:
+            score += 1 if window_neg else -1
+    # Map integer score → label
+    if score > 0:
+        label = "positive"
+    elif score < 0:
+        label = "negative"
+    else:
+        label = "neutral"
+    # Confidence-like mapping: squash by arctan-ish shape without math imports
+    # Clamp |score| to 6 → conf in ~[0.55, 0.95]
+    magnitude = min(abs(score), 6)
+    conf = 0.5 + (magnitude / 6) * 0.45  # 0.5..0.95
+    raw = {"engine": "heuristic", "score_raw": score, "note": note} if note else {"engine": "heuristic", "score_raw": score}
+    return SentimentResult(label=label, score=round(conf, 3), backend="local", raw=raw)
+# ---------------------------
+# Convenience (module-level)
+# ---------------------------
+def sentiment_label(text: str) -> str:
+    """Return only 'positive' | 'neutral' | 'negative'."""
+    return analyze_sentiment(text).label
+def sentiment_score(text: str) -> float:
+    """Return only the 0..1 confidence-like score."""
+    return analyze_sentiment(text).score

logged_in_bot/tools.py CHANGED Viewed

	@@ -1 +1,225 @@
1	# /logged_in_bot/tools.py

 # /logged_in_bot/tools.py
+"""
+Utilities for the logged-in chatbot flow.
+Features
+- PII redaction (optional) via guardrails.pii_redaction
+- Sentiment (optional) via logged_in_bot.sentiment_azure (falls back locally)
+- Tiny intent router: summarize | echo | chat
+- Deterministic, dependency-light; safe to import in any environment
+"""
+from __future__ import annotations
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import os
+import re
+# -------------------------
+# Optional imports (safe)
+# -------------------------
+# Sentiment (ours): falls back to a local heuristic if Azure SDK/env missing
+try:
+    from .sentiment_azure import analyze_sentiment, SentimentResult  # type: ignore
+except Exception:  # pragma: no cover
+    analyze_sentiment = None
+    SentimentResult = None  # type: ignore
+# Guardrails redaction (optional)
+try:
+    from guardrails.pii_redaction import redact as pii_redact  # type: ignore
+except Exception:  # pragma: no cover
+    pii_redact = None
+# core types (optional shape for JSON response)
+try:
+    from core.types import PlainChatResponse  # dataclass with .to_dict()
+except Exception:  # pragma: no cover
+    @dataclass
+    class PlainChatResponse:  # lightweight fallback shape
+        reply: str
+        meta: Optional[Dict[str, Any]] = None
+        def to_dict(self) -> Dict[str, Any]:
+            return asdict(self)
+History = List[Tuple[str, str]]  # [("user","..."), ("bot","...")]
+# -------------------------
+# Helpers
+# -------------------------
+_WHITESPACE_RE = re.compile(r"\s+")
+def sanitize_text(text: str) -> str:
+    """Basic sanitize/normalize; keep CPU-cheap & deterministic."""
+    text = (text or "").strip()
+    text = _WHITESPACE_RE.sub(" ", text)
+    # Optionally cap extremely large payloads to protect inference/services
+    max_len = int(os.getenv("MAX_INPUT_CHARS", "4000"))
+    if len(text) > max_len:
+        text = text[:max_len] + "…"
+    return text
+def redact_text(text: str) -> str:
+    """Apply optional PII redaction if available; otherwise return text."""
+    if pii_redact:
+        try:
+            return pii_redact(text)
+        except Exception:
+            # Fail open but safe
+            return text
+    return text
+def intent_of(text: str) -> str:
+    """Ultra-tiny intent: summarize|echo|help|chat."""
+    t = text.lower().strip()
+    if not t:
+        return "empty"
+    if t.startswith("summarize ") or t.startswith("summarise ") or " summarize " in f" {t} ":
+        return "summarize"
+    if t.startswith("echo "):
+        return "echo"
+    if t in {"help", "/help", "capabilities"}:
+        return "help"
+    return "chat"
+def summarize_text(text: str, target_len: int = 120) -> str:
+    """
+    CPU-cheap pseudo-summarizer:
+    - Extract first sentence; if long, truncate to target_len with ellipsis.
+    Later you can swap this for a real HF model while keeping the same API.
+    """
+    # naive sentence boundary
+    m = re.split(r"(?<=[.!?])\s+", text.strip())
+    first = m[0] if m else text.strip()
+    if len(first) <= target_len:
+        return first
+    return first[: target_len - 1].rstrip() + "…"
+def capabilities() -> List[str]:
+    return [
+        "help",
+        "echo <text>",
+        "summarize <paragraph>",
+        "sentiment tagging (logged-in mode)",
+    ]
+# -------------------------
+# Main entry
+# -------------------------
+def handle_logged_in_turn(message: str, history: Optional[History], user: Optional[dict]) -> Dict[str, Any]:
+    """
+    Process one user turn in 'logged-in' mode.
+    Returns a PlainChatResponse (dict) with:
+      - reply: str
+      - meta: { intent, sentiment: {label, score, backend}, redacted: bool }
+    """
+    history = history or []
+    user_text_raw = message or ""
+    user_text = sanitize_text(user_text_raw)
+    redacted = False
+    # Redact PII if available
+    redacted_text = redact_text(user_text)
+    redacted = (redacted_text != user_text)
+    it = intent_of(redacted_text)
+    # ---------- route ----------
+    if it == "empty":
+        reply = "Please type something. Try 'help' for options."
+        meta = _meta(redacted, it, redacted_text)
+        return PlainChatResponse(reply=reply, meta=meta).to_dict()
+    if it == "help":
+        reply = "I can:\n" + "\n".join(f"- {c}" for c in capabilities())
+        meta = _meta(redacted, it, redacted_text)
+        return PlainChatResponse(reply=reply, meta=meta).to_dict()
+    if it == "echo":
+        payload = redacted_text.split(" ", 1)[1] if " " in redacted_text else ""
+        reply = payload or "(nothing to echo)"
+        meta = _meta(redacted, it, redacted_text)
+        _attach_sentiment(meta, reply)  # sentiment on reply text
+        return PlainChatResponse(reply=reply, meta=meta).to_dict()
+    if it == "summarize":
+        # Use everything after the keyword if present
+        if redacted_text.lower().startswith("summarize "):
+            payload = redacted_text.split(" ", 1)[1]
+        elif redacted_text.lower().startswith("summarise "):
+            payload = redacted_text.split(" ", 1)[1]
+        else:
+            payload = redacted_text
+        reply = summarize_text(payload)
+        meta = _meta(redacted, it, redacted_text)
+        _attach_sentiment(meta, payload)  # sentiment on source text
+        return PlainChatResponse(reply=reply, meta=meta).to_dict()
+    # default: chat
+    reply = _chat_fallback(redacted_text, history)
+    meta = _meta(redacted, it, redacted_text)
+    _attach_sentiment(meta, redacted_text)
+    return PlainChatResponse(reply=reply, meta=meta).to_dict()
+# -------------------------
+# Internals
+# -------------------------
+def _chat_fallback(text: str, history: History) -> str:
+    """
+    Minimal deterministic fallback for general chat in logged-in mode.
+    Swap this for a provider call if/when you enable one.
+    """
+    if "who are you" in text.lower():
+        return "I'm the logged-in chatbot. I can echo, summarize, and tag sentiment."
+    return "Noted! (logged-in mode). Type 'help' for options."
+def _meta(redacted: bool, intent: str, redacted_text: str) -> Dict[str, Any]:
+    return {
+        "intent": intent,
+        "redacted": redacted,
+        "input_len": len(redacted_text),
+    }
+def _attach_sentiment(meta: Dict[str, Any], text: str) -> None:
+    """Attach sentiment to meta if available; never raises."""
+    try:
+        if analyze_sentiment:
+            res = analyze_sentiment(text)
+            if hasattr(res, "__dict__"):
+                meta["sentiment"] = {
+                    "label": res.label,
+                    "score": res.score,
+                    "backend": res.backend,
+                }
+            else:  # unexpected object — store string
+                meta["sentiment"] = {"label": str(res)}
+        else:
+            # no module available
+            meta["sentiment"] = {"label": "neutral", "score": 0.5, "backend": "none"}
+    except Exception as e:  # pragma: no cover
+        meta["sentiment"] = {"error": f"{type(e).__name__}: {e}"}
+__all__ = [
+    "handle_logged_in_turn",
+    "sanitize_text",
+    "redact_text",
+    "intent_of",
+    "summarize_text",
+    "capabilities",
+]

memory/rag/indexer.py CHANGED Viewed

	@@ -1 +1,344 @@
1	# /memory/rag/data/indexer.py

 # /memory/rag/data/indexer.py
+"""
+Minimal, dependency-free TF-IDF indexer for RAG.
+Features
+- Build from folder (recursive), index plain-text files
+- Add individual text blobs with metadata
+- Persist/load inverted index to/from JSON
+- Search with TF-IDF scoring and simple query normalization
+- Return top-k with tiny context snippets
+This module is intentionally small and pure-Python to keep local CPU demos simple.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, asdict
+from typing import Dict, List, Tuple, Iterable, Optional
+from pathlib import Path
+import json
+import math
+import hashlib
+import re
+import fnmatch
+import time
+# -----------------------------
+# Types
+# -----------------------------
+@dataclass(frozen=True)
+class DocMeta:
+    doc_id: str
+    source: str                   # e.g., absolute path or "inline"
+    title: str | None = None
+    tags: List[str] | None = None
+    mtime: float | None = None    # source last modified (if file)
+    hash: str | None = None       # content hash
+@dataclass(frozen=True)
+class Hit:
+    doc_id: str
+    score: float
+    source: str
+    snippet: str
+    title: str | None = None
+    tags: List[str] | None = None
+# -----------------------------
+# Tokenization
+# -----------------------------
+_WORD_RE = re.compile(r"[A-Za-z0-9']+")
+def tokenize(text: str) -> List[str]:
+    # simple, deterministic tokenizer; lowercased
+    return [t.lower() for t in _WORD_RE.findall(text or "")]
+# -----------------------------
+# Index
+# -----------------------------
+class TfidfIndex:
+    """
+    Tiny TF-IDF inverted index with JSON persistence.
+    Structures:
+      - docs: doc_id -> {"meta": DocMeta, "len": int, "text": str (optional)}
+      - inv: term -> {doc_id: tf}   (raw term frequency)
+      - df: term -> document frequency
+      - n_docs: total number of docs
+    """
+    def __init__(self) -> None:
+        self.docs: Dict[str, Dict] = {}
+        self.inv: Dict[str, Dict[str, int]] = {}
+        self.df: Dict[str, int] = {}
+        self.n_docs: int = 0
+    # ---------- add documents ----------
+    def add_text(self, doc_id: str, text: str, meta: DocMeta) -> None:
+        if not text:
+            return
+        if doc_id in self.docs:
+            # idempotent update: remove old postings first
+            self._remove_doc_terms(doc_id)
+        toks = tokenize(text)
+        if not toks:
+            return
+        tf: Dict[str, int] = {}
+        for t in toks:
+            tf[t] = tf.get(t, 0) + 1
+        # update inv + df
+        for term, cnt in tf.items():
+            bucket = self.inv.setdefault(term, {})
+            bucket[doc_id] = cnt
+            self.df[term] = len(bucket)
+        self.docs[doc_id] = {
+            "meta": meta,
+            "len": len(toks),
+            # keep original text for snippet extraction; you can drop this if size matters
+            "text": text,
+        }
+        self.n_docs = len(self.docs)
+    def add_file(self, path: Path, doc_id: str | None = None, title: str | None = None, tags: List[str] | None = None) -> Optional[str]:
+        path = Path(path)
+        if not path.is_file():
+            return None
+        text = path.read_text(encoding="utf-8", errors="ignore")
+        h = sha256_of(text)
+        stat = path.stat()
+        doc_id = doc_id or str(path.resolve())
+        # skip if unchanged
+        prev = self.docs.get(doc_id)
+        if prev:
+            old_meta: DocMeta = prev["meta"]
+            if old_meta.hash == h and old_meta.mtime == stat.st_mtime:
+                return doc_id  # unchanged
+        meta = DocMeta(
+            doc_id=doc_id,
+            source=str(path.resolve()),
+            title=title or path.name,
+            tags=tags,
+            mtime=stat.st_mtime,
+            hash=h,
+        )
+        self.add_text(doc_id, text, meta)
+        return doc_id
+    # ---------- build / scan ----------
+    def build_from_folder(
+        self,
+        root: Path,
+        include: Iterable[str] = ("*.txt", "*.md"),
+        exclude: Iterable[str] = (".git/*",),
+        recursive: bool = True,
+    ) -> int:
+        """
+        Index all files under `root` matching any include pattern and not matching exclude.
+        Returns number of files indexed or updated.
+        """
+        root = Path(root)
+        if not root.exists():
+            return 0
+        count = 0
+        paths = (root.rglob("*") if recursive else root.glob("*"))
+        for p in paths:
+            if not p.is_file():
+                continue
+            rel = str(p.relative_to(root).as_posix())
+            if not any(fnmatch.fnmatch(rel, pat) for pat in include):
+                continue
+            if any(fnmatch.fnmatch(rel, pat) for pat in exclude):
+                continue
+            if self.add_file(p):
+                count += 1
+        return count
+    # ---------- search ----------
+    def search(self, query: str, k: int = 5) -> List[Hit]:
+        q_toks = tokenize(query)
+        if not q_toks or self.n_docs == 0:
+            return []
+        # compute query tf-idf (using binary or raw tf is fine; keep it simple)
+        q_tf: Dict[str, int] = {}
+        for t in q_toks:
+            q_tf[t] = q_tf.get(t, 0) + 1
+        # compute idf with +1 smoothing
+        idf: Dict[str, float] = {}
+        for t in q_tf:
+            df = self.df.get(t, 0)
+            idf[t] = math.log((1 + self.n_docs) / (1 + df)) + 1.0
+        # accumulate scores: cosine-like with length norm
+        scores: Dict[str, float] = {}
+        doc_len_norm: Dict[str, float] = {}
+        for term, qcnt in q_tf.items():
+            postings = self.inv.get(term)
+            if not postings:
+                continue
+            wq = (1 + math.log(qcnt)) * idf[term]  # log tf * idf
+            for doc_id, dcnt in postings.items():
+                wd = (1 + math.log(dcnt)) * idf[term]
+                scores[doc_id] = scores.get(doc_id, 0.0) + (wq * wd)
+                # cache norm
+                if doc_id not in doc_len_norm:
+                    L = max(1, self.docs[doc_id]["len"])
+                    doc_len_norm[doc_id] = 1.0 / math.sqrt(L)
+        # apply a gentle length normalization
+        for d, s in list(scores.items()):
+            scores[d] = s * doc_len_norm.get(d, 1.0)
+        # rank and format
+        ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:k]
+        hits: List[Hit] = []
+        for doc_id, score in ranked:
+            d = self.docs[doc_id]
+            meta: DocMeta = d["meta"]
+            snippet = make_snippet(d.get("text", ""), q_toks)
+            hits.append(Hit(
+                doc_id=doc_id,
+                score=round(float(score), 4),
+                source=meta.source,
+                snippet=snippet,
+                title=meta.title,
+                tags=meta.tags,
+            ))
+        return hits
+    # ---------- persistence ----------
+    def save(self, path: Path) -> None:
+        path = Path(path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        # Store meta as dict to keep JSON serializable
+        serial_docs = {
+            doc_id: {
+                "meta": asdict(d["meta"]),
+                "len": d["len"],
+                # store text to allow snippet generation after load (optional)
+                "text": d.get("text", ""),
+            }
+            for doc_id, d in self.docs.items()
+        }
+        data = {
+            "docs": serial_docs,
+            "inv": self.inv,
+            "df": self.df,
+            "n_docs": self.n_docs,
+            "saved_at": time.time(),
+        }
+        path.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8")
+    @classmethod
+    def load(cls, path: Path) -> "TfidfIndex":
+        path = Path(path)
+        idx = cls()
+        if not path.is_file():
+            return idx
+        data = json.loads(path.read_text(encoding="utf-8"))
+        # reconstruct docs with DocMeta
+        docs: Dict[str, Dict] = {}
+        for doc_id, d in data.get("docs", {}).items():
+            m = d.get("meta", {})
+            meta = DocMeta(**m) if m else DocMeta(doc_id=doc_id, source="unknown")
+            docs[doc_id] = {
+                "meta": meta,
+                "len": d.get("len", 0),
+                "text": d.get("text", ""),
+            }
+        idx.docs = docs
+        idx.inv = {t: {k: int(v) for k, v in postings.items()} for t, postings in data.get("inv", {}).items()}
+        idx.df = {t: int(v) for t, v in data.get("df", {}).items()}
+        idx.n_docs = int(data.get("n_docs", len(idx.docs)))
+        return idx
+    # ---------- internals ----------
+    def _remove_doc_terms(self, doc_id: str) -> None:
+        """Remove a document's postings before re-adding."""
+        if doc_id not in self.docs:
+            return
+        # delete postings
+        for term, postings in list(self.inv.items()):
+            if doc_id in postings:
+                postings.pop(doc_id, None)
+                if postings:
+                    self.df[term] = len(postings)
+                else:
+                    # remove empty term
+                    self.inv.pop(term, None)
+                    self.df.pop(term, None)
+        # delete doc
+        self.docs.pop(doc_id, None)
+        self.n_docs = len(self.docs)
+# -----------------------------
+# Utilities
+# -----------------------------
+def sha256_of(text: str) -> str:
+    return hashlib.sha256((text or "").encode("utf-8")).hexdigest()
+def make_snippet(text: str, query_tokens: List[str], radius: int = 60) -> str:
+    """
+    Extract a tiny context window around the first matched token.
+    """
+    if not text:
+        return ""
+    low = text.lower()
+    for qt in query_tokens:
+        i = low.find(qt.lower())
+        if i >= 0:
+            start = max(0, i - radius)
+            end = min(len(text), i + len(qt) + radius)
+            snippet = text[start:end].replace("\n", " ").strip()
+            if start > 0:
+                snippet = "…" + snippet
+            if end < len(text):
+                snippet = snippet + "…"
+            return snippet
+    # fallback: beginning of the doc
+    s = text[: 2 * radius].replace("\n", " ").strip()
+    return (s + "…") if len(text) > 2 * radius else s
+# -----------------------------
+# Convenience API (module-level)
+# -----------------------------
+DEFAULT_INDEX_PATH = Path("memory/rag/data/.index/tfidf_index.json")
+def build_from_folder(
+    root: str | Path,
+    include: Iterable[str] = ("*.txt", "*.md"),
+    exclude: Iterable[str] = (".git/*",),
+    save_to: str | Path = DEFAULT_INDEX_PATH,
+    recursive: bool = True,
+) -> TfidfIndex:
+    idx = TfidfIndex()
+    idx.build_from_folder(Path(root), include=include, exclude=exclude, recursive=recursive)
+    idx.save(Path(save_to))
+    return idx
+def load_index(path: str | Path = DEFAULT_INDEX_PATH) -> TfidfIndex:
+    return TfidfIndex.load(Path(path))
+def search(query: str, k: int = 5, path: str | Path = DEFAULT_INDEX_PATH) -> List[Hit]:
+    idx = load_index(path)
+    return idx.search(query, k=k)

memory/rag/retriever.py CHANGED Viewed

	@@ -1 +1,268 @@
1	# /memory/rag/data/retriever.py

 # /memory/rag/data/retriever.py
+"""
+Minimal RAG retriever that sits on top of the TF-IDF indexer.
+Features
+- Top-k document retrieval via indexer.search()
+- Optional filters (tags, title substring)
+- Passage extraction around query terms with overlap
+- Lightweight proximity-based reranking of passages
+No third-party dependencies; pairs with memory/rag/data/indexer.py.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+from pathlib import Path
+import math
+import re
+from .indexer import (
+    load_index,
+    search as index_search,
+    DEFAULT_INDEX_PATH,
+    tokenize,
+    TfidfIndex,
+    DocMeta,
+)
+# -----------------------------
+# Public types
+# -----------------------------
+@dataclass(frozen=True)
+class Passage:
+    doc_id: str
+    source: str
+    title: Optional[str]
+    tags: Optional[List[str]]
+    score: float           # combined score (index score +/- rerank)
+    start: int             # char start in original text
+    end: int               # char end in original text
+    text: str              # extracted passage
+    snippet: str           # human-friendly short snippet (may equal text if short)
+@dataclass(frozen=True)
+class Filters:
+    title_contains: Optional[str] = None        # case-insensitive containment
+    require_tags: Optional[Iterable[str]] = None  # all tags must be present (AND)
+# -----------------------------
+# Retrieval API
+# -----------------------------
+def retrieve(
+    query: str,
+    k: int = 5,
+    index_path: str | Path = DEFAULT_INDEX_PATH,
+    filters: Optional[Filters] = None,
+    passage_chars: int = 350,
+    passage_overlap: int = 60,
+    enable_rerank: bool = True,
+) -> List[Passage]:
+    """
+    Retrieve top-k passages for a query.
+    Steps:
+      1. Run TF-IDF doc search
+      2. Apply optional filters
+      3. Extract a focused passage per doc
+      4. (Optional) Rerank by term proximity within the passage
+    """
+    idx = load_index(index_path)
+    if idx.n_docs == 0 or not query.strip():
+        return []
+    # initial doc hits
+    hits = index_search(query, k=max(k * 3, k), path=index_path)  # overshoot; filter + rerank will trim
+    # filter hits by title/tags if requested
+    if filters:
+        hits = _apply_filters(hits, idx, filters)
+    # extract best passage per remaining doc
+    q_tokens = tokenize(query)
+    passages: List[Passage] = []
+    for h in hits:
+        doc = idx.docs.get(h.doc_id)
+        if not doc:
+            continue
+        meta: DocMeta = doc["meta"]
+        full_text: str = doc.get("text", "") or ""
+        start, end, passage_text = _extract_passage(full_text, q_tokens, window=passage_chars, overlap=passage_overlap)
+        snippet = passage_text if len(passage_text) <= 220 else passage_text[:220].rstrip() + "…"
+        passages.append(Passage(
+            doc_id=h.doc_id,
+            source=meta.source,
+            title=meta.title,
+            tags=meta.tags,
+            score=float(h.score),  # base score from index
+            start=start,
+            end=end,
+            text=passage_text,
+            snippet=snippet,
+        ))
+    if not passages:
+        return []
+    # optional rerank by proximity of query terms inside the passage
+    if enable_rerank:
+        passages = _rerank_by_proximity(passages, q_tokens)
+    # final top-k
+    passages.sort(key=lambda p: p.score, reverse=True)
+    return passages[:k]
+def retrieve_texts(
+    query: str,
+    k: int = 5,
+    **kwargs,
+) -> List[str]:
+    """
+    Convenience: return only the passage texts for a query.
+    """
+    return [p.text for p in retrieve(query, k=k, **kwargs)]
+# -----------------------------
+# Internals
+# -----------------------------
+def _apply_filters(hits, idx: TfidfIndex, filters: Filters):
+    out = []
+    want_title = (filters.title_contains or "").strip().lower() or None
+    want_tags = set(t.strip().lower() for t in (filters.require_tags or []) if str(t).strip())
+    for h in hits:
+        d = idx.docs.get(h.doc_id)
+        if not d:
+            continue
+        meta: DocMeta = d["meta"]
+        if want_title:
+            t = (meta.title or "").lower()
+            if want_title not in t:
+                continue
+        if want_tags:
+            tags = set((meta.tags or []))
+            tags = set(x.lower() for x in tags)
+            if not want_tags.issubset(tags):
+                continue
+        out.append(h)
+    return out
+_WORD_RE = re.compile(r"[A-Za-z0-9']+")
+def _find_all(term: str, text: str) -> List[int]:
+    """Return starting indices of all case-insensitive matches of term in text."""
+    if not term or not text:
+        return []
+    term_l = term.lower()
+    low = text.lower()
+    out: List[int] = []
+    i = low.find(term_l)
+    while i >= 0:
+        out.append(i)
+        i = low.find(term_l, i + 1)
+    return out
+def _extract_passage(text: str, q_tokens: List[str], window: int = 350, overlap: int = 60) -> Tuple[int, int, str]:
+    """
+    Pick a passage around the earliest match of any query token.
+    If no match found, return the first window.
+    """
+    if not text:
+        return 0, 0, ""
+    low = text.lower()
+    # choose the earliest hit among query tokens
+    hit_positions: List[int] = []
+    for qt in q_tokens:
+        hit_positions.extend(_find_all(qt, text))
+    start: int
+    end: int
+    if hit_positions:
+        i = max(0, min(hit_positions) - overlap)
+        start = i
+        end = min(len(text), start + window)
+    else:
+        start = 0
+        end = min(len(text), window)
+    return start, end, text[start:end].strip()
+def _rerank_by_proximity(passages: List[Passage], q_tokens: List[str]) -> List[Passage]:
+    """
+    Adjust scores based on how tightly query tokens cluster inside the passage.
+    Heuristic:
+      - For each unique query token, find all positions in the passage (word indices).
+      - Compute average pairwise distance among the closest occurrences.
+      - Convert to a bonus in [0, 0.25] and add to base score.
+    """
+    q_unique = [t for t in dict.fromkeys(q_tokens)]  # preserve order, dedupe
+    if not q_unique:
+        return passages
+    def word_positions(text: str, term: str) -> List[int]:
+        # word-level positions for term
+        positions: List[int] = []
+        words = [w.group(0).lower() for w in _WORD_RE.finditer(text)]
+        for i, w in enumerate(words):
+            if term == w:
+                positions.append(i)
+        return positions
+    def proximity_bonus(p: Passage) -> float:
+        # collect positions per term
+        pos_lists = [word_positions(p.text, t) for t in q_unique]
+        if all(len(ps) == 0 for ps in pos_lists):
+            return 0.0
+        # flatten a representative set of positions (closest aligned indices)
+        reps: List[int] = []
+        for ps in pos_lists:
+            reps.append(ps[0] if ps else 999999)
+        # average absolute distance to the median position
+        med = sorted([x for x in reps if x != 999999])
+        if not med:
+            return 0.0
+        mid = med[len(med) // 2]
+        avg_dist = sum(abs((x if x != 999999 else mid) - mid) for x in reps) / max(1, len(reps))
+        # squash distance → bonus; closer = bigger bonus
+        # dist 0 → 0.25 bonus; dist 10+ → ~0 bonus
+        bonus = max(0.0, 0.25 * (1.0 - min(avg_dist, 10.0) / 10.0))
+        return float(bonus)
+    reranked: List[Passage] = []
+    for p in passages:
+        bonus = proximity_bonus(p)
+        reranked.append(Passage(
+            **{**p.__dict__, "score": p.score + bonus}
+        ))
+    return reranked
+# -----------------------------
+# CLI / quick test
+# -----------------------------
+if __name__ == "__main__":
+    import sys
+    q = " ".join(sys.argv[1:]) or "anonymous chatbot rules"
+    out = retrieve(q, k=3)
+    for i, p in enumerate(out, 1):
+        print(f"[{i}] {p.score:.4f}  {p.title or '(untitled)'}  —  {p.source}")
+        print("    ", (p.snippet.replace("\n", " ") if p.snippet else "")[:200])

memory/sessions.py CHANGED Viewed

	@@ -1 +1,244 @@
1	# /memory/sessions.py

 # /memory/sessions.py
+"""
+Minimal session store for chat history + per-session data.
+Features
+- In-memory store with thread safety
+- Create/get/update/delete sessions
+- Append chat turns: ("user"| "bot", text)
+- Optional TTL cleanup and max-history cap
+- JSON persistence (save/load)
+- Deterministic, dependency-free
+Intended to interoperate with anon_bot and logged_in_bot:
+  - History shape: List[Tuple[str, str]]  e.g., [("user","hi"), ("bot","hello")]
+"""
+from __future__ import annotations
+from dataclasses import dataclass, asdict, field
+from typing import Any, Dict, List, Optional, Tuple
+from pathlib import Path
+import time
+import uuid
+import json
+import threading
+History = List[Tuple[str, str]]  # [("user","..."), ("bot","...")]
+# -----------------------------
+# Data model
+# -----------------------------
+@dataclass
+class Session:
+    session_id: str
+    user_id: Optional[str] = None
+    created_at: float = field(default_factory=lambda: time.time())
+    updated_at: float = field(default_factory=lambda: time.time())
+    data: Dict[str, Any] = field(default_factory=dict)     # arbitrary per-session state
+    history: History = field(default_factory=list)         # chat transcripts
+    def to_dict(self) -> Dict[str, Any]:
+        d = asdict(self)
+        # dataclasses with tuples serialize fine, ensure tuples not lost if reloaded
+        return d
+    @staticmethod
+    def from_dict(d: Dict[str, Any]) -> "Session":
+        s = Session(
+            session_id=d["session_id"],
+            user_id=d.get("user_id"),
+            created_at=float(d.get("created_at", time.time())),
+            updated_at=float(d.get("updated_at", time.time())),
+            data=dict(d.get("data", {})),
+            history=[(str(who), str(text)) for who, text in d.get("history", [])],
+        )
+        return s
+# -----------------------------
+# Store
+# -----------------------------
+class SessionStore:
+    """
+    Thread-safe in-memory session registry with optional TTL and persistence.
+    """
+    def __init__(
+        self,
+        ttl_seconds: Optional[int] = 60 * 60,   # 1 hour default; set None to disable
+        max_history: int = 200,                 # cap messages per session
+    ) -> None:
+        self._ttl = ttl_seconds
+        self._max_history = max_history
+        self._lock = threading.RLock()
+        self._sessions: Dict[str, Session] = {}
+    # ---- id helpers ----
+    @staticmethod
+    def new_id() -> str:
+        return uuid.uuid4().hex
+    # ---- CRUD ----
+    def create(self, user_id: Optional[str] = None, session_id: Optional[str] = None) -> Session:
+        with self._lock:
+            sid = session_id or self.new_id()
+            s = Session(session_id=sid, user_id=user_id)
+            self._sessions[sid] = s
+            return s
+    def get(self, session_id: str, create_if_missing: bool = False, user_id: Optional[str] = None) -> Optional[Session]:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            if s is None and create_if_missing:
+                s = self.create(user_id=user_id, session_id=session_id)
+            return s
+    def delete(self, session_id: str) -> bool:
+        with self._lock:
+            return self._sessions.pop(session_id, None) is not None
+    def all_ids(self) -> List[str]:
+        with self._lock:
+            return list(self._sessions.keys())
+    # ---- housekeeping ----
+    def _expired(self, s: Session) -> bool:
+        if self._ttl is None:
+            return False
+        return (time.time() - s.updated_at) > self._ttl
+    def sweep(self) -> int:
+        """
+        Remove expired sessions. Returns number removed.
+        """
+        with self._lock:
+            dead = [sid for sid, s in self._sessions.items() if self._expired(s)]
+            for sid in dead:
+                self._sessions.pop(sid, None)
+            return len(dead)
+    # ---- history ops ----
+    def append_user(self, session_id: str, text: str) -> Session:
+        return self._append(session_id, "user", text)
+    def append_bot(self, session_id: str, text: str) -> Session:
+        return self._append(session_id, "bot", text)
+    def _append(self, session_id: str, who: str, text: str) -> Session:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            if s is None:
+                s = self.create(session_id=session_id)
+            s.history.append((who, text))
+            if self._max_history and len(s.history) > self._max_history:
+                # Keep most recent N entries
+                s.history = s.history[-self._max_history :]
+            s.updated_at = time.time()
+            return s
+    def get_history(self, session_id: str) -> History:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            return list(s.history) if s else []
+    def clear_history(self, session_id: str) -> bool:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            if not s:
+                return False
+            s.history.clear()
+            s.updated_at = time.time()
+            return True
+    # ---- key/value per-session data ----
+    def set(self, session_id: str, key: str, value: Any) -> Session:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            if s is None:
+                s = self.create(session_id=session_id)
+            s.data[key] = value
+            s.updated_at = time.time()
+            return s
+    def get_value(self, session_id: str, key: str, default: Any = None) -> Any:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            if not s:
+                return default
+            return s.data.get(key, default)
+    def data_dict(self, session_id: str) -> Dict[str, Any]:
+        with self._lock:
+            s = self._sessions.get(session_id)
+            return dict(s.data) if s else {}
+    # ---- persistence ----
+    def save(self, path: str | Path) -> None:
+        p = Path(path)
+        p.parent.mkdir(parents=True, exist_ok=True)
+        with self._lock:
+            payload = {
+                "ttl_seconds": self._ttl,
+                "max_history": self._max_history,
+                "saved_at": time.time(),
+                "sessions": {sid: s.to_dict() for sid, s in self._sessions.items()},
+            }
+        p.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
+    @classmethod
+    def load(cls, path: str | Path) -> "SessionStore":
+        p = Path(path)
+        if not p.is_file():
+            return cls()
+        data = json.loads(p.read_text(encoding="utf-8"))
+        store = cls(
+            ttl_seconds=data.get("ttl_seconds"),
+            max_history=int(data.get("max_history", 200)),
+        )
+        sessions = data.get("sessions", {})
+        with store._lock:
+            for sid, sd in sessions.items():
+                store._sessions[sid] = Session.from_dict(sd)
+        return store
+# -----------------------------
+# Module-level singleton (optional)
+# -----------------------------
+_default_store: Optional[SessionStore] = None
+def get_store() -> SessionStore:
+    global _default_store
+    if _default_store is None:
+        _default_store = SessionStore()
+    return _default_store
+def new_session(user_id: Optional[str] = None) -> Session:
+    return get_store().create(user_id=user_id)
+def append_user(session_id: str, text: str) -> Session:
+    return get_store().append_user(session_id, text)
+def append_bot(session_id: str, text: str) -> Session:
+    return get_store().append_bot(session_id, text)
+def history(session_id: str) -> History:
+    return get_store().get_history(session_id)
+def set_value(session_id: str, key: str, value: Any) -> Session:
+    return get_store().set(session_id, key, value)
+def get_value(session_id: str, key: str, default: Any = None) -> Any:
+    return get_store().get_value(session_id, key, default)
+def sweep() -> int:
+    return get_store().sweep()

memory/store.py CHANGED Viewed

@@ -1,3 +1,145 @@
 # /memory/sessions.py
-DB={}

 # /memory/sessions.py
+"""
+Simple in-memory session manager for chatbot history.
+Supports TTL, max history, and JSON persistence.
+"""
+from __future__ import annotations
+import time, json, uuid
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Dict, List, Tuple, Optional, Any
+History = List[Tuple[str, str]]  # [("user","..."), ("bot","...")]
+@dataclass
+class Session:
+    session_id: str
+    user_id: Optional[str] = None
+    history: History = field(default_factory=list)
+    data: Dict[str, Any] = field(default_factory=dict)
+    created_at: float = field(default_factory=time.time)
+    updated_at: float = field(default_factory=time.time)
+class SessionStore:
+    def __init__(self, ttl_seconds: Optional[int] = 3600, max_history: Optional[int] = 50):
+        self.ttl_seconds = ttl_seconds
+        self.max_history = max_history
+        self._sessions: Dict[str, Session] = {}
+    # --- internals ---
+    def _expired(self, sess: Session) -> bool:
+        if self.ttl_seconds is None:
+            return False
+        return (time.time() - sess.updated_at) > self.ttl_seconds
+    # --- CRUD ---
+    def create(self, user_id: Optional[str] = None) -> Session:
+        sid = str(uuid.uuid4())
+        sess = Session(session_id=sid, user_id=user_id)
+        self._sessions[sid] = sess
+        return sess
+    def get(self, sid: str) -> Optional[Session]:
+        return self._sessions.get(sid)
+    def get_history(self, sid: str) -> History:
+        sess = self.get(sid)
+        return list(sess.history) if sess else []
+    def append_user(self, sid: str, text: str) -> None:
+        self._append(sid, "user", text)
+    def append_bot(self, sid: str, text: str) -> None:
+        self._append(sid, "bot", text)
+    def _append(self, sid: str, who: str, text: str) -> None:
+        sess = self.get(sid)
+        if not sess:
+            return
+        sess.history.append((who, text))
+        if self.max_history and len(sess.history) > self.max_history:
+            sess.history = sess.history[-self.max_history:]
+        sess.updated_at = time.time()
+    # --- Data store ---
+    def set(self, sid: str, key: str, value: Any) -> None:
+        sess = self.get(sid)
+        if sess:
+            sess.data[key] = value
+            sess.updated_at = time.time()
+    def get_value(self, sid: str, key: str, default=None) -> Any:
+        sess = self.get(sid)
+        return sess.data.get(key, default) if sess else default
+    def data_dict(self, sid: str) -> Dict[str, Any]:
+        sess = self.get(sid)
+        return dict(sess.data) if sess else {}
+    # --- TTL management ---
+    def sweep(self) -> int:
+        """Remove expired sessions; return count removed."""
+        expired = [sid for sid, s in self._sessions.items() if self._expired(s)]
+        for sid in expired:
+            self._sessions.pop(sid, None)
+        return len(expired)
+    def all_ids(self):
+        return list(self._sessions.keys())
+    # --- persistence ---
+    def save(self, path: Path) -> None:
+        payload = {
+            sid: {
+                "user_id": s.user_id,
+                "history": s.history,
+                "data": s.data,
+                "created_at": s.created_at,
+                "updated_at": s.updated_at,
+            }
+            for sid, s in self._sessions.items()
+        }
+        path.write_text(json.dumps(payload, indent=2))
+    @classmethod
+    def load(cls, path: Path) -> "SessionStore":
+        store = cls()
+        if not path.exists():
+            return store
+        raw = json.loads(path.read_text())
+        for sid, d in raw.items():
+            s = Session(
+                session_id=sid,
+                user_id=d.get("user_id"),
+                history=d.get("history", []),
+                data=d.get("data", {}),
+                created_at=d.get("created_at", time.time()),
+                updated_at=d.get("updated_at", time.time()),
+            )
+            store._sessions[sid] = s
+        return store
+# --- Module-level singleton for convenience ---
+_store = SessionStore()
+def new_session(user_id: Optional[str] = None) -> Session:
+    return _store.create(user_id)
+def history(sid: str) -> History:
+    return _store.get_history(sid)
+def append_user(sid: str, text: str) -> None:
+    _store.append_user(sid, text)
+def append_bot(sid: str, text: str) -> None:
+    _store.append_bot(sid, text)
+def set_value(sid: str, key: str, value: Any) -> None:
+    _store.set(sid, key, value)
+def get_value(sid: str, key: str, default=None) -> Any:
+    return _store.get_value(sid, key, default)

nlu/pipeline.py CHANGED Viewed

@@ -1,3 +1,77 @@
 # /nlu/pipeline.py
-def analyze(t): return {'intent':'general'}

 # /nlu/pipeline.py
+"""
+Lightweight rule-based NLU pipeline.
+No ML dependencies — just keyword matching and simple heuristics.
+Provides intent classification and placeholder entity extraction.
+"""
+from typing import Dict, List
+# keyword → intent maps
+_INTENT_KEYWORDS = {
+    "greeting": {"hi", "hello", "hey", "good morning", "good evening"},
+    "goodbye": {"bye", "goodbye", "see you", "farewell"},
+    "help": {"help", "support", "assist", "how do i"},
+    "faq": {"what is", "who is", "where is", "when is", "how to"},
+    "sentiment_positive": {"great", "awesome", "fantastic", "love"},
+    "sentiment_negative": {"bad", "terrible", "hate", "awful"},
+}
+def _match_intent(text: str) -> str:
+    low = text.lower().strip()
+    for intent, kws in _INTENT_KEYWORDS.items():
+        for kw in kws:
+            if kw in low:
+                return intent
+    return "general"
+def _extract_entities(text: str) -> List[str]:
+    """
+    Placeholder entity extractor.
+    For now just returns capitalized words (could be names/places).
+    """
+    return [w for w in text.split() if w.istitle()]
+def analyze(text: str) -> Dict:
+    """
+    Analyze a user utterance.
+    Returns:
+      {
+        "intent": str,
+        "entities": list[str],
+        "confidence": float
+      }
+    """
+    if not text or not text.strip():
+        return {"intent": "general", "entities": [], "confidence": 0.0}
+    intent = _match_intent(text)
+    entities = _extract_entities(text)
+    # crude confidence: matched keyword = 0.9, else fallback = 0.5
+    confidence = 0.9 if intent != "general" else 0.5
+    return {
+        "intent": intent,
+        "entities": entities,
+        "confidence": confidence,
+    }
+# quick test
+if __name__ == "__main__":
+    tests = [
+        "Hello there",
+        "Can you help me?",
+        "I love this bot!",
+        "Bye now",
+        "Tell me what is RAG",
+        "random input with no keywords",
+    ]
+    for t in tests:
+        print(t, "->", analyze(t))

nlu/prompts.py CHANGED Viewed

	@@ -1 +1,78 @@
1	# /nlu/prompts.py

 # /nlu/prompts.py
+"""
+Reusable prompt templates for NLU and chatbot responses.
+These can be imported anywhere in the app to keep wording consistent.
+They are plain strings / dicts — no external deps required.
+"""
+from typing import Dict
+# -----------------------------
+# System prompts
+# -----------------------------
+SYSTEM_BASE = """\
+You are a helpful, polite chatbot.
+Answer briefly unless asked for detail.
+"""
+SYSTEM_FAQ = """\
+You are a factual Q&A assistant.
+Answer questions directly, citing facts when possible.
+"""
+SYSTEM_SUPPORT = """\
+You are a friendly support assistant.
+Offer clear, step-by-step help when the user asks for guidance.
+"""
+# -----------------------------
+# Few-shot examples
+# -----------------------------
+FEW_SHOTS: Dict[str, list] = {
+    "greeting": [
+        {"user": "Hello", "bot": "Hi there! How can I help you today?"},
+        {"user": "Good morning", "bot": "Good morning! What’s up?"},
+    ],
+    "goodbye": [
+        {"user": "Bye", "bot": "Goodbye! Have a great day."},
+        {"user": "See you later", "bot": "See you!"},
+    ],
+    "help": [
+        {"user": "I need help", "bot": "Sure! What do you need help with?"},
+        {"user": "Can you assist me?", "bot": "Of course, happy to assist."},
+    ],
+    "faq": [
+        {"user": "What is RAG?", "bot": "RAG stands for Retrieval-Augmented Generation."},
+        {"user": "Who created this bot?", "bot": "It was built by our project team."},
+    ],
+}
+# -----------------------------
+# Utility
+# -----------------------------
+def get_system_prompt(mode: str = "base") -> str:
+    """
+    Return a system-level prompt string.
+    mode: "base" | "faq" | "support"
+    """
+    if mode == "faq":
+        return SYSTEM_FAQ
+    if mode == "support":
+        return SYSTEM_SUPPORT
+    return SYSTEM_BASE
+def get_few_shots(intent: str) -> list:
+    """
+    Return few-shot examples for a given intent label.
+    """
+    return FEW_SHOTS.get(intent, [])
+if __name__ == "__main__":
+    print("System prompt:", get_system_prompt("faq"))
+    print("Examples for 'greeting':", get_few_shots("greeting"))

nlu/router.py CHANGED Viewed

	@@ -1 +1,143 @@
1	# /nlu/router.py

 # /nlu/router.py
+"""
+Lightweight NLU router.
+- Uses nlu.pipeline.analyze() to classify the user's intent.
+- Maps intents to high-level actions (GREETING, HELP, FAQ, ECHO, SUMMARIZE, GENERAL, GOODBYE).
+- Provides:
+    route(text, ctx=None)  -> dict with intent, action, handler, params
+    respond(text, history) -> quick deterministic reply for smoke tests
+This file deliberately avoids external dependencies so it works in anonymous mode.
+Later, you can swap 'handler' targets to real modules (e.g., anon_bot, logged_in_bot).
+"""
+from __future__ import annotations
+from dataclasses import dataclass, asdict
+from typing import Any, Dict, List, Optional, Tuple
+from .pipeline import analyze
+from .prompts import get_system_prompt, get_few_shots
+History = List[Tuple[str, str]]  # [("user","..."), ("bot","...")]
+# -----------------------------
+# Action / Route schema
+# -----------------------------
+@dataclass(frozen=True)
+class Route:
+    intent: str
+    action: str
+    handler: str                 # suggested dotted path or logical name
+    params: Dict[str, Any]       # arbitrary params (e.g., {"mode":"faq"})
+    confidence: float
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+# Intent -> (Action, Suggested Handler, Default Params)
+_ACTION_TABLE: Dict[str, Tuple[str, str, Dict[str, Any]]] = {
+    "greeting": ("GREETING",  "builtin.respond", {"mode": "base"}),
+    "goodbye":  ("GOODBYE",   "builtin.respond", {"mode": "base"}),
+    "help":     ("HELP",      "builtin.respond", {"mode": "support"}),
+    "faq":      ("FAQ",       "builtin.respond", {"mode": "faq"}),
+    # Sentiment intents come from pipeline; treat as GENERAL but note tag:
+    "sentiment_positive": ("GENERAL", "builtin.respond", {"mode": "base", "tag": "positive"}),
+    "sentiment_negative": ("GENERAL", "builtin.respond", {"mode": "base", "tag": "negative"}),
+    # Default:
+    "general":  ("GENERAL",   "builtin.respond", {"mode": "base"}),
+}
+_DEFAULT_ACTION = ("GENERAL", "builtin.respond", {"mode": "base"})
+# -----------------------------
+# Routing
+# -----------------------------
+def route(text: str, ctx: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """
+    Decide which action/handler should process the utterance.
+    """
+    nlu = analyze(text or "")
+    intent = nlu.get("intent", "general")
+    confidence = float(nlu.get("confidence", 0.0))
+    action, handler, params = _ACTION_TABLE.get(intent, _DEFAULT_ACTION)
+    # pass-through entities as params for downstream handlers
+    entities = nlu.get("entities") or []
+    if entities:
+        params = {**params, "entities": entities}
+    # include minimal context (optional)
+    if ctx:
+        params = {**params, "_ctx": ctx}
+    return Route(
+        intent=intent,
+        action=action,
+        handler=handler,
+        params=params,
+        confidence=confidence,
+    ).to_dict()
+# -----------------------------
+# Built-in deterministic responder (for smoke tests)
+# -----------------------------
+def respond(text: str, history: Optional[History] = None) -> str:
+    """
+    Produce a tiny, deterministic response using system/few-shot text.
+    This is only for local testing; replace with real handlers later.
+    """
+    r = route(text)
+    intent = r["intent"]
+    action = r["action"]
+    mode = r["params"].get("mode", "base")
+    # Choose a system flavor (not used to prompt a model here, but keeps wording consistent)
+    _ = get_system_prompt("support" if action == "HELP" else ("faq" if action == "FAQ" else "base"))
+    # Few-shots can inform canned replies (again: no model used, just tone)
+    shots = get_few_shots(intent)
+    if action == "GREETING":
+        return "Hi! How can I help you today?"
+    if action == "GOODBYE":
+        return "Goodbye! Have a great day."
+    if action == "HELP":
+        return "I can answer quick questions, echo text, or summarize short passages. What do you need help with?"
+    if action == "FAQ":
+        # Trivial FAQ-style echo; swap with RAG later
+        return "Ask a specific question (e.g., 'What is RAG?'), and I’ll answer briefly."
+    # GENERAL:
+    # If the pipeline flagged sentiment, acknowledge gently.
+    tag = r["params"].get("tag")
+    if tag == "positive":
+        prefix = "Glad to hear it! "
+    elif tag == "negative":
+        prefix = "Sorry to hear that. "
+    else:
+        prefix = ""
+    return prefix + "Noted. If you need help, type 'help'."
+# -----------------------------
+# Simple CLI smoke test
+# -----------------------------
+if __name__ == "__main__":
+    tests = [
+        "Hello there",
+        "Can you help me?",
+        "What is RAG in simple terms?",
+        "This is awful.",
+        "Bye!",
+        "random input with no keywords",
+    ]
+    for t in tests:
+        print(f"> {t}")
+        print(" route:", route(t))
+        print(" reply:", respond(t))
+        print()

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pytest>=7.4.0
+pytest-cov>=4.1.0
+black>=24.3.0
+isort>=5.13.0
+flake8>=7.0.0
+mypy>=1.10.0
+ruff>=0.5.0

requirements-ml.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers>=4.41.0
+torch>=2.2.0
+# extras commonly required by transformers
+safetensors>=0.4.0
+accelerate>=0.33.0
+sentencepiece>=0.2.0

requirements.txt CHANGED Viewed

@@ -1,15 +1,12 @@
-gradio>=4.0
-transformers>=4.41.0
-torch>=2.2.0
-scikit-learn>=1.3.0
-pandas>=2.1.0
 numpy>=1.26.0
-pytest>=7.4.0
-# Optional Azure
 azure-ai-textanalytics>=5.3.0
-python-dotenv>=1.0
-fastapi>=0.115.0
-uvicorn[standard]>=0.30.0
-# Optional for Bot Framework sample:
-# aiohttp>=3.9
-# botbuilder-core>=4.14

+gradio>=4.0,<5
+fastapi>=0.115.0,<0.116
+uvicorn[standard]>=0.30.0,<0.31
+python-dotenv>=1.0
+# light numeric stack
 numpy>=1.26.0
+pandas>=2.1.0
+scikit-learn>=1.3.0
+# optional Azure integration
 azure-ai-textanalytics>=5.3.0

scripts/check_compliance.py CHANGED Viewed

@@ -1,3 +1,81 @@
 # /scripts/check_compliance.py
-# Fails if disallowed deps appear (placeholder)

 # /scripts/check_compliance.py
+#!/usr/bin/env python3
+"""
+Compliance checker for disallowed dependencies.
+- Scans all .py files under project root (excluding venv/.git/etc).
+- Flags imports of disallowed packages (by prefix).
+- Exits nonzero if any violations are found.
+Run:
+    python scripts/check_compliance.py
+"""
+import sys
+import os
+import re
+from pathlib import Path
+# -----------------------------
+# Config
+# -----------------------------
+# Disallowed top-level import prefixes
+DISALLOWED = {
+    "torch",
+    "tensorflow",
+    "transformers",
+    "openai",
+    "azure.ai",       # heavy cloud SDK
+    "azureml",
+    "boto3",
+    "botbuilder",     # Microsoft Bot Framework
+}
+IGNORE_DIRS = {".git", "__pycache__", "venv", ".venv", "env", ".env", "node_modules"}
+IMPORT_RE = re.compile(r"^\s*(?:import|from)\s+([a-zA-Z0-9_.]+)")
+# -----------------------------
+# Scan
+# -----------------------------
+def scan_file(path: Path) -> list[str]:
+    bad = []
+    try:
+        lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
+    except Exception as e:
+        print(f"[warn] could not read {path}: {e}", file=sys.stderr)
+        return []
+    for i, line in enumerate(lines, 1):
+        m = IMPORT_RE.match(line)
+        if not m:
+            continue
+        mod = m.group(1)
+        for banned in DISALLOWED:
+            if mod == banned or mod.startswith(banned + "."):
+                bad.append(f"{path}:{i}: disallowed import '{mod}'")
+    return bad
+def main(root: str = ".") -> int:
+    root = Path(root)
+    failures: list[str] = []
+    for p in root.rglob("*.py"):
+        if any(part in IGNORE_DIRS for part in p.parts):
+            continue
+        failures.extend(scan_file(p))
+    if failures:
+        print("❌ Compliance check failed:")
+        for f in failures:
+            print("  ", f)
+        return 1
+    else:
+        print("✅ Compliance check passed (no disallowed deps).")
+        return 0
+if __name__ == "__main__":
+    sys.exit(main())

scripts/run_local.sh CHANGED Viewed

@@ -1,5 +1,45 @@
 # /scripts/run_local.sh
 #!/usr/bin/env bash
-set -euo pipefail
-export PYTHONPATH=.
-python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"

 # /scripts/run_local.sh
 #!/usr/bin/env bash
+set -Eeuo pipefail
+# Move to repo root
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT_DIR"
+# --- Configuration via env (with sane defaults) ---
+export PYTHONPATH="${PYTHONPATH:-.}"
+HOST="${HOST:-0.0.0.0}"
+PORT="${PORT:-7860}"
+MODE="${MODE:-gradio}"   # gradio | uvicorn
+RELOAD="${RELOAD:-false}" # only applies to MODE=uvicorn
+INSTALL="${INSTALL:-0}"   # set INSTALL=1 to pip install requirements
+# Load .env if present (ignore comments/blank lines)
+if [[ -f .env ]]; then
+  # shellcheck disable=SC2046
+  export $(grep -vE '^\s*#' .env | grep -vE '^\s*$' | xargs -0 -I{} bash -c 'printf "%s\0" "{}"' 2>/dev/null || true)
+fi
+if [[ "$INSTALL" == "1" ]]; then
+  echo "📦 Installing dependencies from requirements.txt ..."
+  python -m pip install -r requirements.txt
+fi
+trap 'echo; echo "⛔ Server terminated";' INT TERM
+if [[ "$MODE" == "uvicorn" ]]; then
+  # Dev-friendly server with optional reload (expects FastAPI app factory)
+  echo "▶ Starting Uvicorn on http://${HOST}:${PORT}  (reload=${RELOAD})"
+  # If you expose a FastAPI app object directly, adjust target accordingly (e.g., storefront_chatbot.app.app:app)
+  cmd=(python -m uvicorn storefront_chatbot.app.app:build --host "$HOST" --port "$PORT")
+  [[ "$RELOAD" == "true" ]] && cmd+=(--reload)
+  exec "${cmd[@]}"
+else
+  # Gradio path (matches your original build().launch)
+  echo "▶ Starting Gradio on http://${HOST}:${PORT}"
+  python - <<PY
+from storefront_chatbot.app.app import build
+app = build()
+app.launch(server_name="${HOST}", server_port=${PORT})
+PY
+fi

scripts/seed_data.py CHANGED Viewed

@@ -1,3 +1,94 @@
 # /scripts/seed_data.py
-# Load sample products/FAQs (placeholder)

 # /scripts/seed_data.py
+#!/usr/bin/env python3
+"""
+Seed script to load sample products and FAQs into local data files.
+- Creates ./data/products.json and ./data/faqs.json
+- Provides a CLI to re-seed or show contents
+- No external dependencies required
+Run:
+    python scripts/seed_data.py         # create seed files
+    python scripts/seed_data.py show    # print contents
+"""
+import sys
+import json
+from pathlib import Path
+import datetime
+ROOT = Path(__file__).resolve().parent.parent
+DATA_DIR = ROOT / "data"
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+PRODUCTS_PATH = DATA_DIR / "products.json"
+FAQS_PATH = DATA_DIR / "faqs.json"
+SAMPLE_PRODUCTS = [
+    {
+        "id": "p1",
+        "name": "Chatbot Pro Subscription",
+        "description": "Access advanced features of the chatbot platform.",
+        "price": 9.99,
+        "currency": "USD",
+        "tags": ["subscription", "chatbot"],
+    },
+    {
+        "id": "p2",
+        "name": "Custom Bot Avatar",
+        "description": "A personalized avatar for your chatbot.",
+        "price": 4.99,
+        "currency": "USD",
+        "tags": ["avatar", "customization"],
+    },
+    {
+        "id": "p3",
+        "name": "Analytics Dashboard",
+        "description": "Real-time analytics and reporting for your conversations.",
+        "price": 14.99,
+        "currency": "USD",
+        "tags": ["analytics", "dashboard"],
+    },
+]
+SAMPLE_FAQS = [
+    {
+        "q": "How do I reset my password?",
+        "a": "Click 'Forgot password' on the login page and follow the instructions.",
+    },
+    {
+        "q": "Can I export my chat history?",
+        "a": "Yes, you can export your chat history from the account settings page.",
+    },
+    {
+        "q": "Do you offer refunds?",
+        "a": "Refunds are available within 14 days of purchase. Contact support for help.",
+    },
+]
+def write_json(path: Path, data) -> None:
+    path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
+def seed() -> None:
+    write_json(PRODUCTS_PATH, SAMPLE_PRODUCTS)
+    write_json(FAQS_PATH, SAMPLE_FAQS)
+    print(f"✅ Seeded data at {datetime.date.today()} into {DATA_DIR}")
+def show() -> None:
+    if PRODUCTS_PATH.is_file():
+        print("Products:")
+        print(PRODUCTS_PATH.read_text(encoding="utf-8"))
+    if FAQS_PATH.is_file():
+        print("\nFAQs:")
+        print(FAQS_PATH.read_text(encoding="utf-8"))
+if __name__ == "__main__":
+    if len(sys.argv) > 1 and sys.argv[1] == "show":
+        show()
+    else:
+        seed()

tests/test_anon_bot.py CHANGED Viewed

@@ -1,3 +1,121 @@
 # /test/test_anon_bot.py
-def test_anon_stub(): assert True

 # /test/test_anon_bot.py
+"""
+Comprehensive smoke tests for anon_bot.
+Run with:  pytest -q
+"""
+import pytest
+from anon_bot import handler, rules
+# ---------- rules: intents & handlers ----------
+@pytest.mark.parametrize(
+    "msg,expected",
+    [
+        ("", "empty"),
+        ("help", "help"),
+        ("/help", "help"),
+        ("capabilities", "help"),
+        ("reverse abc", "reverse"),
+        ("echo hello world", "echo"),
+        ("hi", "greet"),
+        ("hello", "greet"),
+        ("hey", "greet"),
+        ("who are you", "chat"),
+    ],
+)
+def test_rules_intent_of(msg, expected):
+    assert rules.intent_of(msg) == expected
+def test_rules_capabilities_contains_expected_items():
+    caps = rules.capabilities()
+    assert "help" in caps
+    assert any(c.startswith("reverse") for c in caps)
+    assert any(c.startswith("echo") for c in caps)
+def test_rules_handlers_basic():
+    assert "I can:" in rules.handle_help().text
+    assert rules.handle_reverse("reverse hello").text == "olleh"
+    assert rules.handle_reverse("reverse").text == "(nothing to reverse)"
+    assert rules.handle_echo("echo one two").text == "one two"
+    assert rules.handle_echo("echo").text == "(nothing to echo)"
+    assert "Type 'help'" in rules.handle_greet().text
+def test_rules_reply_for_empty_and_chat_paths():
+    r = rules.reply_for("", [])
+    assert "Please type something" in r.text
+    r2 = rules.reply_for("who are you", [])
+    assert "tiny anonymous chatbot" in r2.text
+    r3 = rules.reply_for("can you help me", [])
+    assert "I can:" in r3.text  # chat fallback detects 'help' and returns help
+# ---------- handler: history & turn processing ----------
+def test_handle_turn_appends_user_and_bot():
+    hist = []
+    out = handler.handle_turn("hello", hist, user=None)
+    # last two entries should be ("user", ...), ("bot", ...)
+    assert out[-2][0] == "user" and out[-2][1] == "hello"
+    assert out[-1][0] == "bot" and "Type 'help'" in out[-1][1]
+def test_handle_turn_with_existing_history_preserves_items():
+    h2 = [("user", "prev"), ("bot", "ok")]
+    out2 = handler.handle_turn("echo ping", h2, user=None)
+    assert out2[:2] == h2  # preserved
+    assert out2[-1][0] == "bot"
+    assert out2[-1][1] == "ping"  # echo payload
+def test_handle_text_convenience():
+    reply = handler.handle_text("reverse abc")
+    assert reply == "cba"
+def test_handle_turn_empty_message_produces_prompt():
+    out = handler.handle_turn("", [], user=None)
+    assert out[-1][0] == "bot"
+    assert "Please type" in out[-1][1]
+def test_handler_coerces_weird_history_without_crashing():
+    # Mix of tuples, lists, malformed entries, and non-iterables
+    weird = [
+        ("user", "ok"),
+        ["bot", "fine"],
+        "garbage",
+        ("only_one_element",),
+        ("user", 123),
+        42,
+        None,
+    ]
+    out = handler.handle_turn("hi", weird, user=None)
+    # Should include a normalized user entry and a bot reply at the end
+    assert out[-2] == ("user", "hi")
+    assert out[-1][0] == "bot"
+# ---------- end-to-end mini scriptable checks ----------
+def test_greet_help_echo_reverse_flow():
+    h = []
+    h = handler.handle_turn("hi", h, None)
+    assert "help" in h[-1][1].lower()
+    h = handler.handle_turn("help", h, None)
+    assert "I can:" in h[-1][1]
+    h = handler.handle_turn("echo alpha beta", h, None)
+    assert h[-1][1] == "alpha beta"
+    h = handler.handle_turn("reverse zed", h, None)
+    assert h[-1][1] == "dez"

tests/test_guardrails.py CHANGED Viewed

@@ -1,2 +1,40 @@
 # /test/test_guardrails.py
-def test_guardrails_stub(): assert True

 # /test/test_guardrails.py
+"""
+Guardrail tests:
+- Ensure compliance checker passes (no disallowed deps imported).
+- Ensure anon_bot.rules doesn't produce unsafe replies for empty / bad input.
+"""
+import subprocess
+import sys
+import pathlib
+import pytest
+from anon_bot import rules
+def test_compliance_script_runs_clean():
+    root = pathlib.Path(__file__).resolve().parent.parent
+    script = root / "scripts" / "check_compliance.py"
+    # Run as a subprocess so we catch real exit code
+    proc = subprocess.run([sys.executable, str(script)], capture_output=True, text=True)
+    # If it fails, dump output for debugging
+    if proc.returncode != 0:
+        print(proc.stdout)
+        print(proc.stderr, file=sys.stderr)
+    assert proc.returncode == 0
+@pytest.mark.parametrize("msg", ["", None, "   "])
+def test_rules_empty_prompts_are_safe(msg):
+    r = rules.reply_for(msg or "", [])
+    # Should politely nudge the user, not crash
+    assert "Please" in r.text or "help" in r.text.lower()
+@pytest.mark.parametrize("msg", ["rm -rf /", "DROP TABLE users;"])
+def test_rules_handles_malicious_looking_input(msg):
+    r = rules.reply_for(msg, [])
+    # The bot should fall back safely to generic chat response
+    assert "Noted" in r.text or "help" in r.text

tests/test_indexer.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# /tests/test_indexer.py
+from pathlib import Path
+from memory.rag.data.indexer import TfidfIndex, search, DEFAULT_INDEX_PATH
+def test_add_and_search(tmp_path: Path):
+    p = tmp_path / "a.md"
+    p.write_text("Hello world. This is an anonymous chatbot.\nRules are simple.", encoding="utf-8")
+    idx = TfidfIndex()
+    idx.add_file(p)
+    hits = idx.search("anonymous rules", k=5)
+    assert hits and hits[0].doc_id == str(p.resolve())
+def test_persist_and_load(tmp_path: Path):
+    p = tmp_path / "index.json"
+    idx = TfidfIndex()
+    idx.add_text("id1", "cats are great, dogs are cool", meta=__meta("id1"))
+    idx.save(p)
+    loaded = TfidfIndex.load(p)
+    hits = loaded.search("dogs", k=1)
+    assert hits and hits[0].doc_id == "id1"
+def __meta(i: str):
+    from memory.rag.data.indexer import DocMeta
+    return DocMeta(doc_id=i, source="inline", title=i)

tests/test_logged_in_bot.py CHANGED Viewed

@@ -1,2 +1,84 @@
 # /test/test_logged_in_bot.py
-def test_logged_stub(): assert True

 # /test/test_logged_in_bot.py
+"""
+Tests for logged_in_bot.tools (no Azure required).
+Run: pytest -q
+"""
+import os
+import pytest
+from logged_in_bot import tools as L
+def test_help_route_and_reply():
+    resp = L.handle_logged_in_turn("help", history=[], user=None)
+    assert isinstance(resp, dict)
+    assert "I can:" in resp["reply"]
+    assert resp["meta"]["intent"] == "help"
+    assert "sentiment" in resp["meta"]  # attached even in help path
+def test_echo_payload():
+    resp = L.handle_logged_in_turn("echo hello world", history=[], user=None)
+    assert resp["reply"] == "hello world"
+    assert resp["meta"]["intent"] == "echo"
+def test_summarize_uses_first_sentence():
+    text = "This is the first sentence. This is the second sentence."
+    resp = L.handle_logged_in_turn(f"summarize {text}", history=[], user=None)
+    # naive summarizer returns the first sentence (possibly truncated)
+    assert "first sentence" in resp["reply"]
+    assert resp["meta"]["intent"] == "summarize"
+    assert "sentiment" in resp["meta"]  # sentiment computed on source text
+def test_empty_input_prompts_user():
+    resp = L.handle_logged_in_turn("", history=[], user=None)
+    assert "Please type" in resp["reply"]
+    assert resp["meta"]["intent"] == "empty"
+def test_general_chat_fallback_and_sentiment():
+    resp = L.handle_logged_in_turn("I love this project!", history=[], user=None)
+    assert isinstance(resp["reply"], str) and len(resp["reply"]) > 0
+    # sentiment present; backend may be "local" or "none" depending on env
+    sent = resp["meta"].get("sentiment", {})
+    assert sent.get("label") in {"positive", "neutral", "negative", None}
+def test_optional_redaction_is_honored(monkeypatch):
+    # Monkeypatch optional redactor to simulate PII masking
+    monkeypatch.setattr(L, "pii_redact", lambda s: s.replace("555-1234", "[REDACTED]"), raising=False)
+    resp = L.handle_logged_in_turn("echo call me at 555-1234", history=[], user=None)
+    assert resp["meta"]["redacted"] is True
+    assert resp["reply"] == "call me at [REDACTED]"
+def test_input_length_cap(monkeypatch):
+    # Cap input length to 10 chars; ensure ellipsis added
+    monkeypatch.setenv("MAX_INPUT_CHARS", "10")
+    long = "echo 1234567890ABCDEFGHIJ"
+    resp = L.handle_logged_in_turn(long, history=[], user=None)
+    # reply is payload of redacted/sanitized text; should end with ellipsis
+    assert resp["reply"].endswith("…") or resp["reply"].endswith("...")  # handle different ellipsis if changed
+def test_history_pass_through_shape():
+    # History should be accepted and not crash; we don't deeply inspect here
+    hist = [("user", "prev"), ("bot", "ok")]
+    resp = L.handle_logged_in_turn("echo ping", history=hist, user={"id": "u1"})
+    assert resp["reply"] == "ping"
+    assert isinstance(resp["meta"], dict)
+@pytest.mark.parametrize("msg,expected_intent", [
+    ("help", "help"),
+    ("echo abc", "echo"),
+    ("summarize One. Two.", "summarize"),
+    ("random chat", "chat"),
+])
+def test_intent_detection_smoke(msg, expected_intent):
+    r = L.handle_logged_in_turn(msg, history=[], user=None)
+    assert r["meta"]["intent"] == expected_intent

tests/test_memory.py CHANGED Viewed

@@ -1,2 +1,95 @@
 # /test/test_memory.py
-def test_memory_stub(): assert True

 # /test/test_memory.py
+"""
+Tests for memory.sessions
+Run: pytest -q
+"""
+import time
+from pathlib import Path
+from memory import sessions as S
+def test_create_and_append_history():
+    store = S.SessionStore(ttl_seconds=None, max_history=10)
+    sess = store.create(user_id="u1")
+    assert sess.session_id
+    sid = sess.session_id
+    store.append_user(sid, "hello")
+    store.append_bot(sid, "hi there")
+    hist = store.get_history(sid)
+    assert hist == [("user", "hello"), ("bot", "hi there")]
+    # ensure timestamps update
+    before = sess.updated_at
+    store.append_user(sid, "next")
+    assert store.get(sid).updated_at >= before
+def test_max_history_cap():
+    store = S.SessionStore(ttl_seconds=None, max_history=3)
+    s = store.create()
+    sid = s.session_id
+    # 4 appends → only last 3 kept
+    store.append_user(sid, "a")
+    store.append_bot(sid, "b")
+    store.append_user(sid, "c")
+    store.append_bot(sid, "d")
+    hist = store.get_history(sid)
+    assert hist == [("bot", "b"), ("user", "c"), ("bot", "d")]
+def test_ttl_sweep_expires_old_sessions():
+    store = S.SessionStore(ttl_seconds=0)  # expire immediately
+    s1 = store.create()
+    s2 = store.create()
+    # Nudge updated_at into the past
+    store._sessions[s1.session_id].updated_at -= 10
+    store._sessions[s2.session_id].updated_at -= 10
+    removed = store.sweep()
+    assert removed >= 1
+    # After sweep, remaining sessions (if any) must be fresh
+    for sid in store.all_ids():
+        assert not store._expired(store.get(sid))
+def test_key_value_store_helpers():
+    store = S.SessionStore(ttl_seconds=None)
+    s = store.create()
+    sid = s.session_id
+    store.set(sid, "mode", "anonymous")
+    store.set(sid, "counter", 1)
+    assert store.get_value(sid, "mode") == "anonymous"
+    assert store.data_dict(sid)["counter"] == 1
+    # get_value default
+    assert store.get_value(sid, "missing", default="x") == "x"
+def test_persistence_save_and_load(tmp_path: Path):
+    p = tmp_path / "sess.json"
+    st1 = S.SessionStore(ttl_seconds=None)
+    s = st1.create(user_id="uX")
+    st1.append_user(s.session_id, "hello")
+    st1.append_bot(s.session_id, "hi")
+    st1.save(p)
+    st2 = S.SessionStore.load(p)
+    hist2 = st2.get_history(s.session_id)
+    assert hist2 == [("user", "hello"), ("bot", "hi")]
+    assert st2.get(s.session_id).user_id == "uX"
+def test_module_level_singleton_and_helpers():
+    s = S.new_session(user_id="alice")
+    sid = s.session_id
+    S.append_user(sid, "hey")
+    S.append_bot(sid, "hello!")
+    assert S.history(sid)[-2:] == [("user", "hey"), ("bot", "hello!")]
+    S.set_value(sid, "flag", True)
+    assert S.get_value(sid, "flag") is True

tests/test_nlu.py CHANGED Viewed

@@ -1,2 +1,46 @@
 # /test/test_nlu.py
-def test_nlu_stub(): assert True

 # /test/test_nlu.py
+"""
+Basic tests for the NLU pipeline and router.
+Run with:  pytest -q
+"""
+import pytest
+from nlu import pipeline, router
+def test_pipeline_greeting():
+    out = pipeline.analyze("Hello there")
+    assert out["intent"] == "greeting"
+    assert out["confidence"] > 0.5
+def test_pipeline_general():
+    out = pipeline.analyze("completely random utterance")
+    assert out["intent"] == "general"
+    assert "entities" in out
+def test_router_route_and_respond():
+    # Route a help query
+    r = router.route("Can you help me?")
+    assert r["intent"] == "help"
+    assert r["action"] == "HELP"
+    reply = router.respond("Can you help me?")
+    assert isinstance(reply, str)
+    assert "help" in reply.lower()
+def test_router_sentiment_positive():
+    r = router.route("I love this bot!")
+    assert r["intent"] == "sentiment_positive"
+    reply = router.respond("I love this bot!")
+    assert "glad" in reply.lower() or "hear" in reply.lower()
+def test_router_goodbye():
+    r = router.route("bye")
+    assert r["action"] == "GOODBYE"
+    reply = router.respond("bye")
+    assert "goodbye" in reply.lower()

tests/test_retriever.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# tests/test_retriever.py
+from pathlib import Path
+from memory.rag.data.indexer import TfidfIndex, DocMeta
+from memory.rag.data.retriever import retrieve, Filters
+def _add(idx, did, text, title=None, tags=None):
+    meta = DocMeta(doc_id=did, source="inline", title=title, tags=tags)
+    idx.add_text(did, text, meta)
+def test_retrieve_passage(tmp_path: Path, monkeypatch):
+    # Build tiny in-memory index and save
+    from memory.rag.data.indexer import DEFAULT_INDEX_PATH
+    p = tmp_path / "idx.json"
+    from memory.rag.data.indexer import TfidfIndex
+    idx = TfidfIndex()
+    _add(idx, "d1", "Rules for an anonymous chatbot are simple and fast.", title="Design", tags=["doc","slide"])
+    _add(idx, "d2", "This document explains retrieval and index search.", title="RAG", tags=["doc"])
+    idx.save(p)
+    # Run retrieval against this saved index
+    res = retrieve("anonymous chatbot rules", k=2, index_path=p)
+    assert res and any("anonymous" in r.text.lower() for r in res)
+def test_filters(tmp_path: Path):
+    from memory.rag.data.indexer import TfidfIndex
+    idx = TfidfIndex()
+    _add(idx, "a", "hello world", title="Alpha", tags=["doc","slide"])
+    _add(idx, "b", "hello world", title="Beta", tags=["doc"])
+    p = tmp_path / "idx.json"
+    idx.save(p)
+    f = Filters(title_contains="alpha", require_tags=["doc","slide"])
+    res = retrieve("hello", k=5, index_path=p, filters=f)
+    assert len(res) == 1 and res[0].title == "Alpha"

tests/test_sessions.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# tests/test_sessions.py
+from memory.sessions import SessionStore
+def test_create_and_history():
+    st = SessionStore(ttl_seconds=None, max_history=3)
+    s = st.create(user_id="u1")
+    st.append_user(s.session_id, "a")
+    st.append_bot(s.session_id, "b")
+    st.append_user(s.session_id, "c")
+    st.append_bot(s.session_id, "d")  # caps to last 3
+    h = st.get_history(s.session_id)
+    assert h == [("bot","b"), ("user","c"), ("bot","d")]
+def test_save_load(tmp_path):
+    st = SessionStore(ttl_seconds=None)
+    s = st.create()
+    st.append_user(s.session_id, "hello")
+    p = tmp_path / "sess.json"
+    st.save(p)
+    st2 = SessionStore.load(p)
+    assert st2.get_history(s.session_id)[0] == ("user","hello")

tree.txt CHANGED Viewed

@@ -11,9 +11,6 @@ C:\Users\User\Agentic-Chat-bot-
 │   ├── handler.py
 │   └── rules.py
 ├── app
-│   ├── app
-│   │   ├── app.py
-│   │   └── routes.py
 │   ├── assets
 │   │   └── html
 │   │       ├── agenticcore_frontend.html
@@ -36,7 +33,6 @@ C:\Users\User\Agentic-Chat-bot-
 │   ├── architecture.md
 │   ├── design.md
 │   ├── DEV_DOC.md
-│   ├── flowchart.png
 │   └── results.md
 ├── examples
 │   └── example.py
@@ -84,10 +80,13 @@ C:\Users\User\Agentic-Chat-bot-
 │   ├── smoke_test.py
 │   ├── test_anon_bot.py
 │   ├── test_guardrails.py
 │   ├── test_logged_in_bot.py
 │   ├── test_memory.py
 │   ├── test_nlu.py
-│   └── test_routes.py
 ├── tools
 │   └── quick_sanity.py
 ├── .gitignore

 │   ├── handler.py
 │   └── rules.py
 ├── app
 │   ├── assets
 │   │   └── html
 │   │       ├── agenticcore_frontend.html
 │   ├── architecture.md
 │   ├── design.md
 │   ├── DEV_DOC.md
 │   └── results.md
 ├── examples
 │   └── example.py
 │   ├── smoke_test.py
 │   ├── test_anon_bot.py
 │   ├── test_guardrails.py
+│   ├── test_indexer.py
 │   ├── test_logged_in_bot.py
 │   ├── test_memory.py
 │   ├── test_nlu.py
+│   ├── test_retriever.py
+│   ├── test_routes.py
+│   └── test_sessions.py
 ├── tools
 │   └── quick_sanity.py
 ├── .gitignore