JerameeUC commited on
Commit
ecbc643
Β·
1 Parent(s): 071c820

7th Commit - All place holder code added.

Browse files
FLATTENED_CODE.txt CHANGED
The diff for this file is too large to render. See raw diff
 
Makefile CHANGED
@@ -1,11 +1,68 @@
1
- .PHONY: dev test run seed check
 
 
2
  dev:
3
  pip install -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  test:
5
- pytest -q
 
 
 
 
 
6
  run:
7
  export PYTHONPATH=. && python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"
 
 
 
 
 
 
8
  seed:
9
  python storefront_chatbot/scripts/seed_data.py
 
10
  check:
11
  python storefront_chatbot/scripts/check_compliance.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: dev ml dev-deps example example-dev test run seed check lint fmt typecheck clean serve all ci coverage docker-build docker-run
2
+
3
+ # --- setup ---
4
  dev:
5
  pip install -r requirements.txt
6
+
7
+ ml:
8
+ pip install -r requirements-ml.txt
9
+
10
+ dev-deps:
11
+ pip install -r requirements-dev.txt
12
+
13
+ # --- one-stop local env + tests ---
14
+ example-dev: dev dev-deps
15
+ pytest
16
+ @echo "βœ… Dev environment ready. Try 'make example' to run the CLI demo."
17
+
18
+ # --- tests & coverage ---
19
  test:
20
+ pytest
21
+
22
+ coverage:
23
+ pytest --cov=storefront_chatbot --cov-report=term-missing
24
+
25
+ # --- run app ---
26
  run:
27
  export PYTHONPATH=. && python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"
28
+
29
+ # --- example demo ---
30
+ example:
31
+ export PYTHONPATH=. && python example/example.py "hello world"
32
+
33
+ # --- data & checks ---
34
  seed:
35
  python storefront_chatbot/scripts/seed_data.py
36
+
37
  check:
38
  python storefront_chatbot/scripts/check_compliance.py
39
+
40
+ # --- quality gates ---
41
+ lint:
42
+ flake8 storefront_chatbot
43
+
44
+ fmt:
45
+ black .
46
+ isort .
47
+
48
+ typecheck:
49
+ mypy .
50
+
51
+ # --- hygiene ---
52
+ clean:
53
+ find . -type d -name "__pycache__" -exec rm -rf {} +
54
+ rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage
55
+
56
+ serve:
57
+ export PYTHONPATH=. && uvicorn storefront_chatbot.app.app:build --reload --host 0.0.0.0 --port 7860
58
+
59
+ # --- docker (optional) ---
60
+ docker-build:
61
+ docker build -t storefront-chatbot .
62
+
63
+ docker-run:
64
+ docker run -p 7860:7860 storefront-chatbot
65
+
66
+ # --- bundles ---
67
+ all: clean check test
68
+ ci: lint typecheck coverage
anon_bot/handler.py CHANGED
@@ -1,3 +1,42 @@
1
- # /anon_bot/handler.py
 
 
 
 
 
 
 
2
 
3
- def handle_turn(m,h,u): return (h or [])+[[m,'hi']]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # anon_bot/handler.py
2
+ """
3
+ Stateless(ish) turn handler for the anonymous chatbot.
4
+ Signature kept tiny: handle_turn(message, history, user) -> new_history
5
+ - message: str (user text)
6
+ - history: list of [speaker, text] or None
7
+ - user: dict-like info (ignored here, but accepted for compatibility)
8
+ """
9
 
10
+ from __future__ import annotations
11
+ from typing import List, Tuple, Any
12
+ from . import rules
13
+
14
+ History = List[Tuple[str, str]] # [("user","..."), ("bot","...")]
15
+
16
+ def _coerce_history(h: Any) -> History:
17
+ if not h:
18
+ return []
19
+ # normalize to tuple pairs
20
+ out: History = []
21
+ for item in h:
22
+ try:
23
+ who, text = item[0], item[1]
24
+ except Exception:
25
+ continue
26
+ out.append((str(who), str(text)))
27
+ return out
28
+
29
+ def handle_turn(message: str, history: History | None, user: dict | None) -> History:
30
+ hist = _coerce_history(history)
31
+ user_text = (message or "").strip()
32
+ if user_text:
33
+ hist.append(("user", user_text))
34
+ rep = rules.reply_for(user_text, hist)
35
+ hist.append(("bot", rep.text))
36
+ return hist
37
+
38
+ # Convenience: one-shot string→string (used by plain JSON endpoints)
39
+ def handle_text(message: str, history: History | None = None) -> str:
40
+ new_hist = handle_turn(message, history, user=None)
41
+ # last item is bot reply
42
+ return new_hist[-1][1] if new_hist else ""
anon_bot/rules.py CHANGED
@@ -1 +1,90 @@
1
- # /anon_bot/rules.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # anon_bot/rules.py
2
+ """
3
+ Lightweight rule set for an anonymous chatbot.
4
+ No external providers required. Pure-Python, deterministic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+ from dataclasses import dataclass
9
+ from typing import Dict, List, Tuple
10
+
11
+ # ---- Types ----
12
+ History = List[Tuple[str, str]] # e.g., [("user","hi"), ("bot","hello!")]
13
+
14
+ @dataclass(frozen=True)
15
+ class Reply:
16
+ text: str
17
+ meta: Dict[str, str] | None = None
18
+
19
+
20
+ def normalize(s: str) -> str:
21
+ return " ".join((s or "").strip().split()).lower()
22
+
23
+
24
+ def capabilities() -> List[str]:
25
+ return [
26
+ "help",
27
+ "reverse <text>",
28
+ "echo <text>",
29
+ "small talk (hi/hello/hey)",
30
+ ]
31
+
32
+
33
+ def intent_of(text: str) -> str:
34
+ t = normalize(text)
35
+ if not t:
36
+ return "empty"
37
+ if t in {"help", "/help", "capabilities"}:
38
+ return "help"
39
+ if t.startswith("reverse "):
40
+ return "reverse"
41
+ if t.startswith("echo "):
42
+ return "echo"
43
+ if t in {"hi", "hello", "hey"}:
44
+ return "greet"
45
+ return "chat"
46
+
47
+
48
+ def handle_help() -> Reply:
49
+ lines = ["I can:"]
50
+ for c in capabilities():
51
+ lines.append(f"- {c}")
52
+ return Reply("\n".join(lines))
53
+
54
+
55
+ def handle_reverse(t: str) -> Reply:
56
+ payload = t.split(" ", 1)[1] if " " in t else ""
57
+ return Reply(payload[::-1] if payload else "(nothing to reverse)")
58
+
59
+
60
+ def handle_echo(t: str) -> Reply:
61
+ payload = t.split(" ", 1)[1] if " " in t else ""
62
+ return Reply(payload or "(nothing to echo)")
63
+
64
+
65
+ def handle_greet() -> Reply:
66
+ return Reply("Hello! πŸ‘‹ Type 'help' to see what I can do.")
67
+
68
+
69
+ def handle_chat(t: str, history: History) -> Reply:
70
+ # Very simple β€œELIZA-ish” fallback.
71
+ if "help" in t:
72
+ return handle_help()
73
+ if "you" in t and "who" in t:
74
+ return Reply("I'm a tiny anonymous chatbot kernel.")
75
+ return Reply("Noted. (anonymous mode) Type 'help' for commands.")
76
+
77
+
78
+ def reply_for(text: str, history: History) -> Reply:
79
+ it = intent_of(text)
80
+ if it == "empty":
81
+ return Reply("Please type something. Try 'help'.")
82
+ if it == "help":
83
+ return handle_help()
84
+ if it == "reverse":
85
+ return handle_reverse(text)
86
+ if it == "echo":
87
+ return handle_echo(text)
88
+ if it == "greet":
89
+ return handle_greet()
90
+ return handle_chat(text.lower(), history)
docs/architecture.md CHANGED
@@ -1,2 +1,73 @@
1
  <!-- /docs/slides/architecture.md -->
2
- # Architecture\n\nShort explainer tied to the flowchart.\n
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <!-- /docs/slides/architecture.md -->
2
+ # Architecture
3
+
4
+ This system follows a **modular chatbot architecture** built around a clear flow of data from the user interface to external services and back. The design emphasizes separation of concerns, allowing each module to handle a specific responsibility while keeping the overall system simple to test and extend.
5
+
6
+ ---
7
+
8
+ ## High-Level Flow (tied to flowchart)
9
+
10
+ 1. **User Interface (UI)**
11
+ - The entry point for user interaction.
12
+ - Implemented through a web client (e.g., Gradio, HTML templates, or API endpoint).
13
+ - Captures user input and displays bot responses.
14
+
15
+ 2. **Router / Core Logic**
16
+ - Handles conversation state and routes messages.
17
+ - Delegates to either the anonymous bot, logged-in bot, or agentic extensions.
18
+ - Imports lightweight rules from `anon_bot/rules.py` for anonymous sessions, and integrates with advanced providers for logged-in sessions.
19
+
20
+ 3. **NLU (Natural Language Understanding)**
21
+ - Managed by the `nlu/` pipeline (intent recognition, prompts, and routing).
22
+ - Provides preprocessing, normalization, and optional summarization/RAG.
23
+ - Keeps the system extensible for additional models without changing the rest of the stack.
24
+
25
+ 4. **Memory & Context Layer**
26
+ - Implemented in `memory/` (sessions, store, and optional RAG retriever/indexer).
27
+ - Stores session history, enabling context-aware responses.
28
+ - Supports modular backends (in-memory, file-based, or vector index).
29
+
30
+ 5. **External AI Service Connector (optional)**
31
+ - For logged-in flows, integrates with cloud AIaaS (e.g., Azure, HuggingFace, or open-source LLMs).
32
+ - Uses `logged_in_bot/sentiment_azure.py` or `agenticcore/providers_unified.py`.
33
+ - Provides NLP services like sentiment analysis or summarization.
34
+ - Disabled in anonymous mode for privacy.
35
+
36
+ 6. **Guardrails & Safety**
37
+ - Defined in `guardrails/` (PII redaction, safety filters).
38
+ - Applied before responses are shown to the user.
39
+ - Ensures compliance with privacy/security requirements.
40
+
41
+ 7. **Outputs**
42
+ - Bot response returned to the UI.
43
+ - Logs written via `core/logging.py` for traceability and debugging.
44
+ - Optional screenshots and reports recorded for evaluation.
45
+
46
+ ---
47
+
48
+ ## Key Principles
49
+
50
+ - **Modularity**: Each part of the flow is a self-contained module (UI, NLU, memory, guardrails).
51
+ - **Swap-in Providers**: Agentic core can switch between local rules, RAG memory, or external APIs.
52
+ - **Anonymous vs Logged-In**: Anonymous bot uses lightweight rules with no external calls; logged-in bot can call providers.
53
+ - **Extensibility**: Flowchart design makes it easy to add summarization, conversation modes, or other β€œagentic” behaviors without rewriting the core.
54
+ - **Resilience**: If an external service fails, the system degrades gracefully to local responses.
55
+
56
+ ---
57
+
58
+ ## Mapping to Repo Structure
59
+
60
+ - `app/` β†’ User-facing entrypoint (routes, HTML, API).
61
+ - `anon_bot/` β†’ Anonymous chatbot rules + handler.
62
+ - `logged_in_bot/` β†’ Provider-based flows for authenticated users.
63
+ - `nlu/` β†’ Intent routing, prompts, pipeline.
64
+ - `memory/` β†’ Session management + RAG integration.
65
+ - `guardrails/` β†’ Safety filters + PII redaction.
66
+ - `agenticcore/` β†’ Core integration logic and unified providers.
67
+ - `docs/flowchart.png` β†’ Visual representation of this architecture.
68
+
69
+ ---
70
+
71
+ ## Summary
72
+
73
+ The architecture ensures a **clean separation between interface, logic, and services**, enabling experimentation with different providers while guaranteeing a safe, privacy-friendly anonymous mode. The flowchart illustrates this layered approach: input β†’ logic β†’ NLU/memory β†’ optional AIaaS β†’ guardrails β†’ output.
docs/design.md CHANGED
@@ -1,2 +1,72 @@
1
  <!-- /docs/slides/design.md -->
2
- # Design notes\n\nAPI notes, security, tradeoffs.\n
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <!-- /docs/slides/design.md -->
2
+ # Design Notes
3
+
4
+ These notes document the reasoning behind major design choices, focusing on **API usage**, **security considerations**, and **tradeoffs** made during development.
5
+
6
+ ---
7
+
8
+ ## API Notes
9
+
10
+ - **Anonymous vs Logged-In Flows**
11
+ - The **anonymous chatbot** relies purely on local rules (`anon_bot/rules.py`) and does not call any external services.
12
+ - The **logged-in chatbot** integrates with external AIaaS endpoints (e.g., Azure, HuggingFace, or other NLP providers) via modules in `logged_in_bot/` and `agenticcore/providers_unified.py`.
13
+
14
+ - **Endpoints**
15
+ - `/plain-chat` β†’ Anonymous flow; maps to `logic.handle_text`.
16
+ - `/api/messages` β†’ For framework compatibility (e.g., BotFramework or FastAPI demo).
17
+ - `/healthz` β†’ Lightweight health check for monitoring.
18
+
19
+ - **NLU Pipeline**
20
+ - Intent routing (`nlu/router.py`) determines if user input should be treated as a direct command, a small-talk message, or passed to providers.
21
+ - Prompts and transformations are managed in `nlu/prompts.py` to centralize natural language templates.
22
+
23
+ - **Memory Integration**
24
+ - Session memory stored in `memory/sessions.py`.
25
+ - Optional RAG indexer (`memory/rag/indexer.py`) allows document retrieval for extended context.
26
+
27
+ ---
28
+
29
+ ## Security Considerations
30
+
31
+ - **API Keys**
32
+ - Keys for external services are never hard-coded.
33
+ - They are pulled from environment variables or `.env` files (via `core/config.py`).
34
+
35
+ - **Data Handling**
36
+ - Anonymous mode never sends user text outside the local process.
37
+ - Logged-in mode applies guardrails before making external calls.
38
+ - Sensitive information (emails, IDs) is redacted using `guardrails/pii_redaction.py`.
39
+
40
+ - **Logging**
41
+ - Logs are structured (`core/logging.py`) and omit sensitive data by default.
42
+ - Debug mode can be enabled for local testing but should not be used in production.
43
+
44
+ - **Privacy**
45
+ - Anonymous sessions are ephemeral: conversation state is stored only in memory unless explicitly persisted.
46
+ - Logged-in sessions may optionally persist data, but only with user consent.
47
+
48
+ ---
49
+
50
+ ## Tradeoffs
51
+
52
+ - **Rule-Based vs AI-Powered**
53
+ - Rule-based responses are deterministic, fast, and private but limited in sophistication.
54
+ - AI-powered responses (via providers) allow richer understanding but introduce latency, costs, and privacy risks.
55
+
56
+ - **Extensibility vs Simplicity**
57
+ - Chose a **modular repo structure** (separate folders for `anon_bot`, `logged_in_bot`, `memory`, `nlu`) to allow future growth.
58
+ - This adds some boilerplate overhead but makes it easier to swap components.
59
+
60
+ - **Performance vs Accuracy**
61
+ - Non-functional requirement: responses within 2 seconds for 95% of requests.
62
+ - This meant prioritizing lightweight providers and caching over heavyweight models.
63
+
64
+ - **Anonymous Mode as Default**
65
+ - Defaulting to anonymous mode ensures the system works offline and avoids external dependencies.
66
+ - Tradeoff: limits functionality until the user explicitly opts in for a logged-in session.
67
+
68
+ ---
69
+
70
+ ## Summary
71
+
72
+ The design balances **privacy, modularity, and extensibility**. By cleanly separating anonymous and logged-in paths, the system can run entirely offline while still supporting richer AI features when configured. Security and privacy are first-class concerns, and tradeoffs were made to keep the system lightweight, testable, and compliant with project constraints.
docs/flowchart.png DELETED
examples/example-dev.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /example/example-dev.py
2
+ """
3
+ Dev environment sanity example.
4
+
5
+ - Imports ChatBot
6
+ - Sends a test message
7
+ - Prints the JSON reply
8
+ - Confirms basic dependencies work
9
+
10
+ Usage:
11
+ python example/example-dev.py
12
+ """
13
+
14
+ import json
15
+ import sys
16
+
17
+ try:
18
+ from agenticcore.chatbot.services import ChatBot
19
+ except ImportError as e:
20
+ print("❌ Could not import ChatBot. Did you set PYTHONPATH or install dependencies?")
21
+ sys.exit(1)
22
+
23
+
24
+ def main():
25
+ bot = ChatBot()
26
+ msg = "Hello from example-dev!"
27
+ result = bot.reply(msg)
28
+
29
+ print("βœ… Dev environment is working")
30
+ print("Input:", msg)
31
+ print("Reply JSON:")
32
+ print(json.dumps(result, indent=2))
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
examples/example.py CHANGED
@@ -1,9 +1,63 @@
1
  # /example/example.py
2
- """Simple CLI example that sends a message to the ChatBot and prints the JSON reply."""
 
 
 
 
 
 
 
 
3
  import json
4
- from agenticcore.chatbot.services import ChatBot
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  if __name__ == "__main__":
7
- bot = ChatBot()
8
- result = bot.reply("hello world")
9
- print(json.dumps(result, indent=2))
 
1
  # /example/example.py
2
+ """
3
+ Simple CLI/REPL example for the ChatBot.
4
+
5
+ Usage:
6
+ python example/example.py "hello world"
7
+ python example/example.py # enters interactive mode
8
+ """
9
+
10
+ import argparse
11
  import json
12
+ import sys
13
+
14
+ try:
15
+ from agenticcore.chatbot.services import ChatBot
16
+ except ImportError as e:
17
+ print("❌ Could not import ChatBot. Did you set PYTHONPATH or install agenticcore?")
18
+ sys.exit(1)
19
+
20
+
21
+ def main():
22
+ parser = argparse.ArgumentParser(description="ChatBot CLI/REPL example")
23
+ parser.add_argument(
24
+ "message",
25
+ nargs="*",
26
+ help="Message to send. Leave empty to start interactive mode.",
27
+ )
28
+ args = parser.parse_args()
29
+
30
+ try:
31
+ bot = ChatBot()
32
+ except Exception as e:
33
+ print(f"❌ Failed to initialize ChatBot: {e}")
34
+ sys.exit(1)
35
+
36
+ if args.message:
37
+ # One-shot mode
38
+ msg = " ".join(args.message)
39
+ result = bot.reply(msg)
40
+ print(json.dumps(result, indent=2))
41
+ else:
42
+ # Interactive REPL
43
+ print("πŸ’¬ Interactive mode. Type 'quit' or 'exit' to stop.")
44
+ while True:
45
+ try:
46
+ msg = input("> ").strip()
47
+ except (EOFError, KeyboardInterrupt):
48
+ print("\nπŸ‘‹ Exiting.")
49
+ break
50
+
51
+ if msg.lower() in {"quit", "exit"}:
52
+ print("πŸ‘‹ Goodbye.")
53
+ break
54
+
55
+ if not msg:
56
+ continue
57
+
58
+ result = bot.reply(msg)
59
+ print(json.dumps(result, indent=2))
60
+
61
 
62
  if __name__ == "__main__":
63
+ main()
 
 
integrations/azure/bot_framework.py CHANGED
@@ -1,2 +1,39 @@
1
- # /intergrations/azure/bot_framework.py
2
- # Azure Bot Framework (placeholder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # integrations/azure/bot_framework.py
2
+ """
3
+ Azure Bot Framework integration (stub).
4
+
5
+ This module is a placeholder for connecting the chatbot
6
+ to Microsoft Azure Bot Framework. It is optional β€”
7
+ the anonymous bot does not depend on this code.
8
+
9
+ If you want to enable Azure:
10
+ 1. Install `botbuilder` SDK (pip install botbuilder-core aiohttp).
11
+ 2. Fill in the adapter setup and message handling below.
12
+ """
13
+
14
+ from typing import Any, Dict
15
+
16
+
17
+ class AzureBotFrameworkNotConfigured(Exception):
18
+ """Raised when Azure Bot Framework is called but not set up."""
19
+
20
+
21
+ def init_adapter(config: Dict[str, Any] | None = None):
22
+ """
23
+ Placeholder for BotFrameworkAdapter initialization.
24
+ Returns a dummy object unless replaced with actual Azure code.
25
+ """
26
+ raise AzureBotFrameworkNotConfigured(
27
+ "Azure Bot Framework integration is not configured. "
28
+ "Use anon_bot for local testing."
29
+ )
30
+
31
+
32
+ def handle_activity(activity: Dict[str, Any]) -> Dict[str, Any]:
33
+ """
34
+ Placeholder for handling an incoming Bot Framework activity.
35
+ Echoes back a dummy response if called directly.
36
+ """
37
+ if not activity:
38
+ return {"type": "message", "text": "(no activity received)"}
39
+ return {"type": "message", "text": f"Echo: {activity.get('text', '')}"}
integrations/email/ticket_stub.py CHANGED
@@ -1,2 +1,57 @@
1
  # /intergrations/email/ticket_stub.py
2
- # Email ticket stub (placeholder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /intergrations/email/ticket_stub.py
2
+ """
3
+ Email / Ticket System Stub.
4
+
5
+ This module simulates creating a support ticket via email.
6
+ It is a placeholder β€” no actual emails are sent.
7
+ """
8
+
9
+ from typing import Dict, Any
10
+ import datetime
11
+ import uuid
12
+
13
+
14
+ class TicketStub:
15
+ """
16
+ A stub ticketing system that generates a fake ticket ID
17
+ and stores basic info in memory.
18
+ """
19
+
20
+ def __init__(self):
21
+ self.tickets: Dict[str, Dict[str, Any]] = {}
22
+
23
+ def create_ticket(self, subject: str, body: str, user: str | None = None) -> Dict[str, Any]:
24
+ """
25
+ Create a fake support ticket.
26
+ Returns a dictionary with ticket metadata.
27
+ """
28
+ ticket_id = str(uuid.uuid4())
29
+ ticket = {
30
+ "id": ticket_id,
31
+ "subject": subject,
32
+ "body": body,
33
+ "user": user or "anonymous",
34
+ "created_at": datetime.datetime.utcnow().isoformat() + "Z",
35
+ "status": "open",
36
+ }
37
+ self.tickets[ticket_id] = ticket
38
+ return ticket
39
+
40
+ def get_ticket(self, ticket_id: str) -> Dict[str, Any] | None:
41
+ """Retrieve a ticket by ID if it exists."""
42
+ return self.tickets.get(ticket_id)
43
+
44
+ def list_tickets(self) -> list[Dict[str, Any]]:
45
+ """Return all created tickets."""
46
+ return list(self.tickets.values())
47
+
48
+
49
+ # Singleton for convenience
50
+ stub = TicketStub()
51
+
52
+
53
+ def create_ticket(subject: str, body: str, user: str | None = None) -> Dict[str, Any]:
54
+ """
55
+ Module-level shortcut.
56
+ """
57
+ return stub.create_ticket(subject, body, user)
logged_in_bot/sentiment_azure.py CHANGED
@@ -1 +1,188 @@
1
  # /logged_in_bot/sentiment_azure.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /logged_in_bot/sentiment_azure.py
2
+ """
3
+ Optional Azure Sentiment integration with safe local fallback.
4
+
5
+ Usage:
6
+ from logged_in_bot.sentiment_azure import analyze_sentiment, SentimentResult
7
+
8
+ res = analyze_sentiment("I love this!")
9
+ print(res.label, res.score, res.backend) # e.g., "positive", 0.92, "local"
10
+
11
+ Environment (Azure path only):
12
+ - AZURE_LANGUAGE_ENDPOINT or MICROSOFT_AI_ENDPOINT
13
+ - AZURE_LANGUAGE_KEY or MICROSOFT_AI_KEY
14
+
15
+ If the Azure SDK or env vars are missing, we automatically fall back to a
16
+ deterministic, dependency-free heuristic that is fast and good enough for tests.
17
+ """
18
+
19
+ from __future__ import annotations
20
+ from dataclasses import dataclass
21
+ from typing import Optional, Tuple
22
+ import os
23
+ import re
24
+
25
+
26
+ # ---------------------------
27
+ # Public dataclass & API
28
+ # ---------------------------
29
+
30
+ @dataclass(frozen=True)
31
+ class SentimentResult:
32
+ label: str # "positive" | "neutral" | "negative"
33
+ score: float # 0.0 .. 1.0 (confidence-like)
34
+ backend: str # "azure" | "local"
35
+ raw: Optional[dict] = None # provider raw payload if available
36
+
37
+
38
+ def analyze_sentiment(text: str) -> SentimentResult:
39
+ """
40
+ Analyze sentiment using Azure if configured, otherwise use local heuristic.
41
+
42
+ Never raises on normal use β€” returns a result even if Azure is misconfigured,
43
+ satisfying 'graceful degradation' requirements.
44
+ """
45
+ text = (text or "").strip()
46
+ if not text:
47
+ return SentimentResult(label="neutral", score=0.5, backend="local", raw={"reason": "empty"})
48
+
49
+ # Try Azure first (only if fully configured and package available)
50
+ azure_ready, why = _is_azure_ready()
51
+ if azure_ready:
52
+ try:
53
+ return _azure_sentiment(text)
54
+ except Exception as e:
55
+ # Degrade gracefully to local
56
+ return _local_sentiment(text, note=f"azure_error: {e!r}")
57
+ else:
58
+ # Go local immediately
59
+ return _local_sentiment(text, note=why)
60
+
61
+
62
+ # ---------------------------
63
+ # Azure path (optional)
64
+ # ---------------------------
65
+
66
+ def _is_azure_ready() -> Tuple[bool, str]:
67
+ """
68
+ Check env + optional SDK presence without importing heavy modules unless needed.
69
+ """
70
+ endpoint = os.getenv("AZURE_LANGUAGE_ENDPOINT") or os.getenv("MICROSOFT_AI_ENDPOINT")
71
+ key = os.getenv("AZURE_LANGUAGE_KEY") or os.getenv("MICROSOFT_AI_KEY")
72
+ if not endpoint or not key:
73
+ return False, "missing_env"
74
+
75
+ try:
76
+ # Light import check
77
+ import importlib
78
+ client_mod = importlib.import_module("azure.ai.textanalytics")
79
+ cred_mod = importlib.import_module("azure.core.credentials")
80
+ # Quick sanity on expected attributes
81
+ getattr(client_mod, "TextAnalyticsClient")
82
+ getattr(cred_mod, "AzureKeyCredential")
83
+ except Exception:
84
+ return False, "sdk_not_installed"
85
+
86
+ return True, "ok"
87
+
88
+
89
+ def _azure_sentiment(text: str) -> SentimentResult:
90
+ """
91
+ Call Azure Text Analytics (Sentiment). Requires:
92
+ pip install azure-ai-textanalytics
93
+ """
94
+ from azure.ai.textanalytics import TextAnalyticsClient
95
+ from azure.core.credentials import AzureKeyCredential
96
+
97
+ endpoint = os.getenv("AZURE_LANGUAGE_ENDPOINT") or os.getenv("MICROSOFT_AI_ENDPOINT")
98
+ key = os.getenv("AZURE_LANGUAGE_KEY") or os.getenv("MICROSOFT_AI_KEY")
99
+
100
+ client = TextAnalyticsClient(endpoint=endpoint, credential=AzureKeyCredential(key))
101
+ # API expects a list of documents
102
+ resp = client.analyze_sentiment(documents=[text], show_opinion_mining=False)
103
+ doc = resp[0]
104
+
105
+ # Map Azure scores to our schema
106
+ label = (doc.sentiment or "neutral").lower()
107
+ # Choose max score among pos/neu/neg as "confidence-like"
108
+ score_map = {
109
+ "positive": doc.confidence_scores.positive,
110
+ "neutral": doc.confidence_scores.neutral,
111
+ "negative": doc.confidence_scores.negative,
112
+ }
113
+ score = float(score_map.get(label, max(score_map.values())))
114
+ raw = {
115
+ "sentiment": doc.sentiment,
116
+ "confidence_scores": {
117
+ "positive": doc.confidence_scores.positive,
118
+ "neutral": doc.confidence_scores.neutral,
119
+ "negative": doc.confidence_scores.negative,
120
+ },
121
+ }
122
+ return SentimentResult(label=label, score=score, backend="azure", raw=raw)
123
+
124
+
125
+ # ---------------------------
126
+ # Local fallback (no deps)
127
+ # ---------------------------
128
+
129
+ _POSITIVE = {
130
+ "good", "great", "love", "excellent", "amazing", "awesome", "happy",
131
+ "wonderful", "fantastic", "like", "enjoy", "cool", "nice", "positive",
132
+ }
133
+ _NEGATIVE = {
134
+ "bad", "terrible", "hate", "awful", "horrible", "sad", "angry",
135
+ "worse", "worst", "broken", "bug", "issue", "problem", "negative",
136
+ }
137
+ # Simple negation tokens to flip nearby polarity
138
+ _NEGATIONS = {"not", "no", "never", "n't"}
139
+
140
+ _WORD_RE = re.compile(r"[A-Za-z']+")
141
+
142
+
143
+ def _local_sentiment(text: str, note: str | None = None) -> SentimentResult:
144
+ """
145
+ Tiny lexicon + negation heuristic:
146
+ - Tokenize letters/apostrophes
147
+ - Score +1 for positive, -1 for negative
148
+ - If a negation appears within the previous 3 tokens, flip the sign
149
+ - Convert final score to pseudo-confidence 0..1
150
+ """
151
+ tokens = [t.lower() for t in _WORD_RE.findall(text)]
152
+ score = 0
153
+ for i, tok in enumerate(tokens):
154
+ window_neg = any(t in _NEGATIONS for t in tokens[max(0, i - 3):i])
155
+ if tok in _POSITIVE:
156
+ score += -1 if window_neg else 1
157
+ elif tok in _NEGATIVE:
158
+ score += 1 if window_neg else -1
159
+
160
+ # Map integer score β†’ label
161
+ if score > 0:
162
+ label = "positive"
163
+ elif score < 0:
164
+ label = "negative"
165
+ else:
166
+ label = "neutral"
167
+
168
+ # Confidence-like mapping: squash by arctan-ish shape without math imports
169
+ # Clamp |score| to 6 β†’ conf in ~[0.55, 0.95]
170
+ magnitude = min(abs(score), 6)
171
+ conf = 0.5 + (magnitude / 6) * 0.45 # 0.5..0.95
172
+
173
+ raw = {"engine": "heuristic", "score_raw": score, "note": note} if note else {"engine": "heuristic", "score_raw": score}
174
+ return SentimentResult(label=label, score=round(conf, 3), backend="local", raw=raw)
175
+
176
+
177
+ # ---------------------------
178
+ # Convenience (module-level)
179
+ # ---------------------------
180
+
181
+ def sentiment_label(text: str) -> str:
182
+ """Return only 'positive' | 'neutral' | 'negative'."""
183
+ return analyze_sentiment(text).label
184
+
185
+
186
+ def sentiment_score(text: str) -> float:
187
+ """Return only the 0..1 confidence-like score."""
188
+ return analyze_sentiment(text).score
logged_in_bot/tools.py CHANGED
@@ -1 +1,225 @@
1
  # /logged_in_bot/tools.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /logged_in_bot/tools.py
2
+ """
3
+ Utilities for the logged-in chatbot flow.
4
+
5
+ Features
6
+ - PII redaction (optional) via guardrails.pii_redaction
7
+ - Sentiment (optional) via logged_in_bot.sentiment_azure (falls back locally)
8
+ - Tiny intent router: summarize | echo | chat
9
+ - Deterministic, dependency-light; safe to import in any environment
10
+ """
11
+
12
+ from __future__ import annotations
13
+ from dataclasses import asdict, dataclass
14
+ from typing import Any, Dict, List, Optional, Tuple
15
+ import os
16
+ import re
17
+
18
+ # -------------------------
19
+ # Optional imports (safe)
20
+ # -------------------------
21
+
22
+ # Sentiment (ours): falls back to a local heuristic if Azure SDK/env missing
23
+ try:
24
+ from .sentiment_azure import analyze_sentiment, SentimentResult # type: ignore
25
+ except Exception: # pragma: no cover
26
+ analyze_sentiment = None
27
+ SentimentResult = None # type: ignore
28
+
29
+ # Guardrails redaction (optional)
30
+ try:
31
+ from guardrails.pii_redaction import redact as pii_redact # type: ignore
32
+ except Exception: # pragma: no cover
33
+ pii_redact = None
34
+
35
+ # core types (optional shape for JSON response)
36
+ try:
37
+ from core.types import PlainChatResponse # dataclass with .to_dict()
38
+ except Exception: # pragma: no cover
39
+ @dataclass
40
+ class PlainChatResponse: # lightweight fallback shape
41
+ reply: str
42
+ meta: Optional[Dict[str, Any]] = None
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ return asdict(self)
46
+
47
+
48
+ History = List[Tuple[str, str]] # [("user","..."), ("bot","...")]
49
+
50
+
51
+ # -------------------------
52
+ # Helpers
53
+ # -------------------------
54
+
55
+ _WHITESPACE_RE = re.compile(r"\s+")
56
+
57
+
58
+ def sanitize_text(text: str) -> str:
59
+ """Basic sanitize/normalize; keep CPU-cheap & deterministic."""
60
+ text = (text or "").strip()
61
+ text = _WHITESPACE_RE.sub(" ", text)
62
+ # Optionally cap extremely large payloads to protect inference/services
63
+ max_len = int(os.getenv("MAX_INPUT_CHARS", "4000"))
64
+ if len(text) > max_len:
65
+ text = text[:max_len] + "…"
66
+ return text
67
+
68
+
69
+ def redact_text(text: str) -> str:
70
+ """Apply optional PII redaction if available; otherwise return text."""
71
+ if pii_redact:
72
+ try:
73
+ return pii_redact(text)
74
+ except Exception:
75
+ # Fail open but safe
76
+ return text
77
+ return text
78
+
79
+
80
+ def intent_of(text: str) -> str:
81
+ """Ultra-tiny intent: summarize|echo|help|chat."""
82
+ t = text.lower().strip()
83
+ if not t:
84
+ return "empty"
85
+ if t.startswith("summarize ") or t.startswith("summarise ") or " summarize " in f" {t} ":
86
+ return "summarize"
87
+ if t.startswith("echo "):
88
+ return "echo"
89
+ if t in {"help", "/help", "capabilities"}:
90
+ return "help"
91
+ return "chat"
92
+
93
+
94
+ def summarize_text(text: str, target_len: int = 120) -> str:
95
+ """
96
+ CPU-cheap pseudo-summarizer:
97
+ - Extract first sentence; if long, truncate to target_len with ellipsis.
98
+ Later you can swap this for a real HF model while keeping the same API.
99
+ """
100
+ # naive sentence boundary
101
+ m = re.split(r"(?<=[.!?])\s+", text.strip())
102
+ first = m[0] if m else text.strip()
103
+ if len(first) <= target_len:
104
+ return first
105
+ return first[: target_len - 1].rstrip() + "…"
106
+
107
+
108
+ def capabilities() -> List[str]:
109
+ return [
110
+ "help",
111
+ "echo <text>",
112
+ "summarize <paragraph>",
113
+ "sentiment tagging (logged-in mode)",
114
+ ]
115
+
116
+
117
+ # -------------------------
118
+ # Main entry
119
+ # -------------------------
120
+
121
+ def handle_logged_in_turn(message: str, history: Optional[History], user: Optional[dict]) -> Dict[str, Any]:
122
+ """
123
+ Process one user turn in 'logged-in' mode.
124
+
125
+ Returns a PlainChatResponse (dict) with:
126
+ - reply: str
127
+ - meta: { intent, sentiment: {label, score, backend}, redacted: bool }
128
+ """
129
+ history = history or []
130
+ user_text_raw = message or ""
131
+ user_text = sanitize_text(user_text_raw)
132
+ redacted = False
133
+
134
+ # Redact PII if available
135
+ redacted_text = redact_text(user_text)
136
+ redacted = (redacted_text != user_text)
137
+
138
+ it = intent_of(redacted_text)
139
+
140
+ # ---------- route ----------
141
+ if it == "empty":
142
+ reply = "Please type something. Try 'help' for options."
143
+ meta = _meta(redacted, it, redacted_text)
144
+ return PlainChatResponse(reply=reply, meta=meta).to_dict()
145
+
146
+ if it == "help":
147
+ reply = "I can:\n" + "\n".join(f"- {c}" for c in capabilities())
148
+ meta = _meta(redacted, it, redacted_text)
149
+ return PlainChatResponse(reply=reply, meta=meta).to_dict()
150
+
151
+ if it == "echo":
152
+ payload = redacted_text.split(" ", 1)[1] if " " in redacted_text else ""
153
+ reply = payload or "(nothing to echo)"
154
+ meta = _meta(redacted, it, redacted_text)
155
+ _attach_sentiment(meta, reply) # sentiment on reply text
156
+ return PlainChatResponse(reply=reply, meta=meta).to_dict()
157
+
158
+ if it == "summarize":
159
+ # Use everything after the keyword if present
160
+ if redacted_text.lower().startswith("summarize "):
161
+ payload = redacted_text.split(" ", 1)[1]
162
+ elif redacted_text.lower().startswith("summarise "):
163
+ payload = redacted_text.split(" ", 1)[1]
164
+ else:
165
+ payload = redacted_text
166
+ reply = summarize_text(payload)
167
+ meta = _meta(redacted, it, redacted_text)
168
+ _attach_sentiment(meta, payload) # sentiment on source text
169
+ return PlainChatResponse(reply=reply, meta=meta).to_dict()
170
+
171
+ # default: chat
172
+ reply = _chat_fallback(redacted_text, history)
173
+ meta = _meta(redacted, it, redacted_text)
174
+ _attach_sentiment(meta, redacted_text)
175
+ return PlainChatResponse(reply=reply, meta=meta).to_dict()
176
+
177
+
178
+ # -------------------------
179
+ # Internals
180
+ # -------------------------
181
+
182
+ def _chat_fallback(text: str, history: History) -> str:
183
+ """
184
+ Minimal deterministic fallback for general chat in logged-in mode.
185
+ Swap this for a provider call if/when you enable one.
186
+ """
187
+ if "who are you" in text.lower():
188
+ return "I'm the logged-in chatbot. I can echo, summarize, and tag sentiment."
189
+ return "Noted! (logged-in mode). Type 'help' for options."
190
+
191
+ def _meta(redacted: bool, intent: str, redacted_text: str) -> Dict[str, Any]:
192
+ return {
193
+ "intent": intent,
194
+ "redacted": redacted,
195
+ "input_len": len(redacted_text),
196
+ }
197
+
198
+ def _attach_sentiment(meta: Dict[str, Any], text: str) -> None:
199
+ """Attach sentiment to meta if available; never raises."""
200
+ try:
201
+ if analyze_sentiment:
202
+ res = analyze_sentiment(text)
203
+ if hasattr(res, "__dict__"):
204
+ meta["sentiment"] = {
205
+ "label": res.label,
206
+ "score": res.score,
207
+ "backend": res.backend,
208
+ }
209
+ else: # unexpected object β€” store string
210
+ meta["sentiment"] = {"label": str(res)}
211
+ else:
212
+ # no module available
213
+ meta["sentiment"] = {"label": "neutral", "score": 0.5, "backend": "none"}
214
+ except Exception as e: # pragma: no cover
215
+ meta["sentiment"] = {"error": f"{type(e).__name__}: {e}"}
216
+
217
+
218
+ __all__ = [
219
+ "handle_logged_in_turn",
220
+ "sanitize_text",
221
+ "redact_text",
222
+ "intent_of",
223
+ "summarize_text",
224
+ "capabilities",
225
+ ]
memory/rag/indexer.py CHANGED
@@ -1 +1,344 @@
1
  # /memory/rag/data/indexer.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /memory/rag/data/indexer.py
2
+ """
3
+ Minimal, dependency-free TF-IDF indexer for RAG.
4
+
5
+ Features
6
+ - Build from folder (recursive), index plain-text files
7
+ - Add individual text blobs with metadata
8
+ - Persist/load inverted index to/from JSON
9
+ - Search with TF-IDF scoring and simple query normalization
10
+ - Return top-k with tiny context snippets
11
+
12
+ This module is intentionally small and pure-Python to keep local CPU demos simple.
13
+ """
14
+
15
+ from __future__ import annotations
16
+ from dataclasses import dataclass, asdict
17
+ from typing import Dict, List, Tuple, Iterable, Optional
18
+ from pathlib import Path
19
+ import json
20
+ import math
21
+ import hashlib
22
+ import re
23
+ import fnmatch
24
+ import time
25
+
26
+ # -----------------------------
27
+ # Types
28
+ # -----------------------------
29
+
30
+ @dataclass(frozen=True)
31
+ class DocMeta:
32
+ doc_id: str
33
+ source: str # e.g., absolute path or "inline"
34
+ title: str | None = None
35
+ tags: List[str] | None = None
36
+ mtime: float | None = None # source last modified (if file)
37
+ hash: str | None = None # content hash
38
+
39
+ @dataclass(frozen=True)
40
+ class Hit:
41
+ doc_id: str
42
+ score: float
43
+ source: str
44
+ snippet: str
45
+ title: str | None = None
46
+ tags: List[str] | None = None
47
+
48
+ # -----------------------------
49
+ # Tokenization
50
+ # -----------------------------
51
+
52
+ _WORD_RE = re.compile(r"[A-Za-z0-9']+")
53
+
54
+ def tokenize(text: str) -> List[str]:
55
+ # simple, deterministic tokenizer; lowercased
56
+ return [t.lower() for t in _WORD_RE.findall(text or "")]
57
+
58
+ # -----------------------------
59
+ # Index
60
+ # -----------------------------
61
+
62
+ class TfidfIndex:
63
+ """
64
+ Tiny TF-IDF inverted index with JSON persistence.
65
+
66
+ Structures:
67
+ - docs: doc_id -> {"meta": DocMeta, "len": int, "text": str (optional)}
68
+ - inv: term -> {doc_id: tf} (raw term frequency)
69
+ - df: term -> document frequency
70
+ - n_docs: total number of docs
71
+ """
72
+
73
+ def __init__(self) -> None:
74
+ self.docs: Dict[str, Dict] = {}
75
+ self.inv: Dict[str, Dict[str, int]] = {}
76
+ self.df: Dict[str, int] = {}
77
+ self.n_docs: int = 0
78
+
79
+ # ---------- add documents ----------
80
+
81
+ def add_text(self, doc_id: str, text: str, meta: DocMeta) -> None:
82
+ if not text:
83
+ return
84
+ if doc_id in self.docs:
85
+ # idempotent update: remove old postings first
86
+ self._remove_doc_terms(doc_id)
87
+
88
+ toks = tokenize(text)
89
+ if not toks:
90
+ return
91
+
92
+ tf: Dict[str, int] = {}
93
+ for t in toks:
94
+ tf[t] = tf.get(t, 0) + 1
95
+
96
+ # update inv + df
97
+ for term, cnt in tf.items():
98
+ bucket = self.inv.setdefault(term, {})
99
+ bucket[doc_id] = cnt
100
+ self.df[term] = len(bucket)
101
+
102
+ self.docs[doc_id] = {
103
+ "meta": meta,
104
+ "len": len(toks),
105
+ # keep original text for snippet extraction; you can drop this if size matters
106
+ "text": text,
107
+ }
108
+ self.n_docs = len(self.docs)
109
+
110
+ def add_file(self, path: Path, doc_id: str | None = None, title: str | None = None, tags: List[str] | None = None) -> Optional[str]:
111
+ path = Path(path)
112
+ if not path.is_file():
113
+ return None
114
+ text = path.read_text(encoding="utf-8", errors="ignore")
115
+ h = sha256_of(text)
116
+ stat = path.stat()
117
+ doc_id = doc_id or str(path.resolve())
118
+
119
+ # skip if unchanged
120
+ prev = self.docs.get(doc_id)
121
+ if prev:
122
+ old_meta: DocMeta = prev["meta"]
123
+ if old_meta.hash == h and old_meta.mtime == stat.st_mtime:
124
+ return doc_id # unchanged
125
+
126
+ meta = DocMeta(
127
+ doc_id=doc_id,
128
+ source=str(path.resolve()),
129
+ title=title or path.name,
130
+ tags=tags,
131
+ mtime=stat.st_mtime,
132
+ hash=h,
133
+ )
134
+ self.add_text(doc_id, text, meta)
135
+ return doc_id
136
+
137
+ # ---------- build / scan ----------
138
+
139
+ def build_from_folder(
140
+ self,
141
+ root: Path,
142
+ include: Iterable[str] = ("*.txt", "*.md"),
143
+ exclude: Iterable[str] = (".git/*",),
144
+ recursive: bool = True,
145
+ ) -> int:
146
+ """
147
+ Index all files under `root` matching any include pattern and not matching exclude.
148
+ Returns number of files indexed or updated.
149
+ """
150
+ root = Path(root)
151
+ if not root.exists():
152
+ return 0
153
+
154
+ count = 0
155
+ paths = (root.rglob("*") if recursive else root.glob("*"))
156
+ for p in paths:
157
+ if not p.is_file():
158
+ continue
159
+ rel = str(p.relative_to(root).as_posix())
160
+ if not any(fnmatch.fnmatch(rel, pat) for pat in include):
161
+ continue
162
+ if any(fnmatch.fnmatch(rel, pat) for pat in exclude):
163
+ continue
164
+ if self.add_file(p):
165
+ count += 1
166
+ return count
167
+
168
+ # ---------- search ----------
169
+
170
+ def search(self, query: str, k: int = 5) -> List[Hit]:
171
+ q_toks = tokenize(query)
172
+ if not q_toks or self.n_docs == 0:
173
+ return []
174
+
175
+ # compute query tf-idf (using binary or raw tf is fine; keep it simple)
176
+ q_tf: Dict[str, int] = {}
177
+ for t in q_toks:
178
+ q_tf[t] = q_tf.get(t, 0) + 1
179
+
180
+ # compute idf with +1 smoothing
181
+ idf: Dict[str, float] = {}
182
+ for t in q_tf:
183
+ df = self.df.get(t, 0)
184
+ idf[t] = math.log((1 + self.n_docs) / (1 + df)) + 1.0
185
+
186
+ # accumulate scores: cosine-like with length norm
187
+ scores: Dict[str, float] = {}
188
+ doc_len_norm: Dict[str, float] = {}
189
+ for term, qcnt in q_tf.items():
190
+ postings = self.inv.get(term)
191
+ if not postings:
192
+ continue
193
+ wq = (1 + math.log(qcnt)) * idf[term] # log tf * idf
194
+ for doc_id, dcnt in postings.items():
195
+ wd = (1 + math.log(dcnt)) * idf[term]
196
+ scores[doc_id] = scores.get(doc_id, 0.0) + (wq * wd)
197
+ # cache norm
198
+ if doc_id not in doc_len_norm:
199
+ L = max(1, self.docs[doc_id]["len"])
200
+ doc_len_norm[doc_id] = 1.0 / math.sqrt(L)
201
+
202
+ # apply a gentle length normalization
203
+ for d, s in list(scores.items()):
204
+ scores[d] = s * doc_len_norm.get(d, 1.0)
205
+
206
+ # rank and format
207
+ ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:k]
208
+ hits: List[Hit] = []
209
+ for doc_id, score in ranked:
210
+ d = self.docs[doc_id]
211
+ meta: DocMeta = d["meta"]
212
+ snippet = make_snippet(d.get("text", ""), q_toks)
213
+ hits.append(Hit(
214
+ doc_id=doc_id,
215
+ score=round(float(score), 4),
216
+ source=meta.source,
217
+ snippet=snippet,
218
+ title=meta.title,
219
+ tags=meta.tags,
220
+ ))
221
+ return hits
222
+
223
+ # ---------- persistence ----------
224
+
225
+ def save(self, path: Path) -> None:
226
+ path = Path(path)
227
+ path.parent.mkdir(parents=True, exist_ok=True)
228
+ # Store meta as dict to keep JSON serializable
229
+ serial_docs = {
230
+ doc_id: {
231
+ "meta": asdict(d["meta"]),
232
+ "len": d["len"],
233
+ # store text to allow snippet generation after load (optional)
234
+ "text": d.get("text", ""),
235
+ }
236
+ for doc_id, d in self.docs.items()
237
+ }
238
+ data = {
239
+ "docs": serial_docs,
240
+ "inv": self.inv,
241
+ "df": self.df,
242
+ "n_docs": self.n_docs,
243
+ "saved_at": time.time(),
244
+ }
245
+ path.write_text(json.dumps(data, ensure_ascii=False), encoding="utf-8")
246
+
247
+ @classmethod
248
+ def load(cls, path: Path) -> "TfidfIndex":
249
+ path = Path(path)
250
+ idx = cls()
251
+ if not path.is_file():
252
+ return idx
253
+ data = json.loads(path.read_text(encoding="utf-8"))
254
+ # reconstruct docs with DocMeta
255
+ docs: Dict[str, Dict] = {}
256
+ for doc_id, d in data.get("docs", {}).items():
257
+ m = d.get("meta", {})
258
+ meta = DocMeta(**m) if m else DocMeta(doc_id=doc_id, source="unknown")
259
+ docs[doc_id] = {
260
+ "meta": meta,
261
+ "len": d.get("len", 0),
262
+ "text": d.get("text", ""),
263
+ }
264
+ idx.docs = docs
265
+ idx.inv = {t: {k: int(v) for k, v in postings.items()} for t, postings in data.get("inv", {}).items()}
266
+ idx.df = {t: int(v) for t, v in data.get("df", {}).items()}
267
+ idx.n_docs = int(data.get("n_docs", len(idx.docs)))
268
+ return idx
269
+
270
+ # ---------- internals ----------
271
+
272
+ def _remove_doc_terms(self, doc_id: str) -> None:
273
+ """Remove a document's postings before re-adding."""
274
+ if doc_id not in self.docs:
275
+ return
276
+ # delete postings
277
+ for term, postings in list(self.inv.items()):
278
+ if doc_id in postings:
279
+ postings.pop(doc_id, None)
280
+ if postings:
281
+ self.df[term] = len(postings)
282
+ else:
283
+ # remove empty term
284
+ self.inv.pop(term, None)
285
+ self.df.pop(term, None)
286
+ # delete doc
287
+ self.docs.pop(doc_id, None)
288
+ self.n_docs = len(self.docs)
289
+
290
+
291
+ # -----------------------------
292
+ # Utilities
293
+ # -----------------------------
294
+
295
+ def sha256_of(text: str) -> str:
296
+ return hashlib.sha256((text or "").encode("utf-8")).hexdigest()
297
+
298
+ def make_snippet(text: str, query_tokens: List[str], radius: int = 60) -> str:
299
+ """
300
+ Extract a tiny context window around the first matched token.
301
+ """
302
+ if not text:
303
+ return ""
304
+ low = text.lower()
305
+ for qt in query_tokens:
306
+ i = low.find(qt.lower())
307
+ if i >= 0:
308
+ start = max(0, i - radius)
309
+ end = min(len(text), i + len(qt) + radius)
310
+ snippet = text[start:end].replace("\n", " ").strip()
311
+ if start > 0:
312
+ snippet = "…" + snippet
313
+ if end < len(text):
314
+ snippet = snippet + "…"
315
+ return snippet
316
+ # fallback: beginning of the doc
317
+ s = text[: 2 * radius].replace("\n", " ").strip()
318
+ return (s + "…") if len(text) > 2 * radius else s
319
+
320
+
321
+ # -----------------------------
322
+ # Convenience API (module-level)
323
+ # -----------------------------
324
+
325
+ DEFAULT_INDEX_PATH = Path("memory/rag/data/.index/tfidf_index.json")
326
+
327
+ def build_from_folder(
328
+ root: str | Path,
329
+ include: Iterable[str] = ("*.txt", "*.md"),
330
+ exclude: Iterable[str] = (".git/*",),
331
+ save_to: str | Path = DEFAULT_INDEX_PATH,
332
+ recursive: bool = True,
333
+ ) -> TfidfIndex:
334
+ idx = TfidfIndex()
335
+ idx.build_from_folder(Path(root), include=include, exclude=exclude, recursive=recursive)
336
+ idx.save(Path(save_to))
337
+ return idx
338
+
339
+ def load_index(path: str | Path = DEFAULT_INDEX_PATH) -> TfidfIndex:
340
+ return TfidfIndex.load(Path(path))
341
+
342
+ def search(query: str, k: int = 5, path: str | Path = DEFAULT_INDEX_PATH) -> List[Hit]:
343
+ idx = load_index(path)
344
+ return idx.search(query, k=k)
memory/rag/retriever.py CHANGED
@@ -1 +1,268 @@
1
  # /memory/rag/data/retriever.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /memory/rag/data/retriever.py
2
+ """
3
+ Minimal RAG retriever that sits on top of the TF-IDF indexer.
4
+
5
+ Features
6
+ - Top-k document retrieval via indexer.search()
7
+ - Optional filters (tags, title substring)
8
+ - Passage extraction around query terms with overlap
9
+ - Lightweight proximity-based reranking of passages
10
+
11
+ No third-party dependencies; pairs with memory/rag/data/indexer.py.
12
+ """
13
+
14
+ from __future__ import annotations
15
+ from dataclasses import dataclass
16
+ from typing import Dict, Iterable, List, Optional, Tuple
17
+ from pathlib import Path
18
+ import math
19
+ import re
20
+
21
+ from .indexer import (
22
+ load_index,
23
+ search as index_search,
24
+ DEFAULT_INDEX_PATH,
25
+ tokenize,
26
+ TfidfIndex,
27
+ DocMeta,
28
+ )
29
+
30
+ # -----------------------------
31
+ # Public types
32
+ # -----------------------------
33
+
34
+ @dataclass(frozen=True)
35
+ class Passage:
36
+ doc_id: str
37
+ source: str
38
+ title: Optional[str]
39
+ tags: Optional[List[str]]
40
+ score: float # combined score (index score +/- rerank)
41
+ start: int # char start in original text
42
+ end: int # char end in original text
43
+ text: str # extracted passage
44
+ snippet: str # human-friendly short snippet (may equal text if short)
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class Filters:
49
+ title_contains: Optional[str] = None # case-insensitive containment
50
+ require_tags: Optional[Iterable[str]] = None # all tags must be present (AND)
51
+
52
+
53
+ # -----------------------------
54
+ # Retrieval API
55
+ # -----------------------------
56
+
57
+ def retrieve(
58
+ query: str,
59
+ k: int = 5,
60
+ index_path: str | Path = DEFAULT_INDEX_PATH,
61
+ filters: Optional[Filters] = None,
62
+ passage_chars: int = 350,
63
+ passage_overlap: int = 60,
64
+ enable_rerank: bool = True,
65
+ ) -> List[Passage]:
66
+ """
67
+ Retrieve top-k passages for a query.
68
+
69
+ Steps:
70
+ 1. Run TF-IDF doc search
71
+ 2. Apply optional filters
72
+ 3. Extract a focused passage per doc
73
+ 4. (Optional) Rerank by term proximity within the passage
74
+ """
75
+ idx = load_index(index_path)
76
+ if idx.n_docs == 0 or not query.strip():
77
+ return []
78
+
79
+ # initial doc hits
80
+ hits = index_search(query, k=max(k * 3, k), path=index_path) # overshoot; filter + rerank will trim
81
+
82
+ # filter hits by title/tags if requested
83
+ if filters:
84
+ hits = _apply_filters(hits, idx, filters)
85
+
86
+ # extract best passage per remaining doc
87
+ q_tokens = tokenize(query)
88
+ passages: List[Passage] = []
89
+ for h in hits:
90
+ doc = idx.docs.get(h.doc_id)
91
+ if not doc:
92
+ continue
93
+ meta: DocMeta = doc["meta"]
94
+ full_text: str = doc.get("text", "") or ""
95
+ start, end, passage_text = _extract_passage(full_text, q_tokens, window=passage_chars, overlap=passage_overlap)
96
+ snippet = passage_text if len(passage_text) <= 220 else passage_text[:220].rstrip() + "…"
97
+ passages.append(Passage(
98
+ doc_id=h.doc_id,
99
+ source=meta.source,
100
+ title=meta.title,
101
+ tags=meta.tags,
102
+ score=float(h.score), # base score from index
103
+ start=start,
104
+ end=end,
105
+ text=passage_text,
106
+ snippet=snippet,
107
+ ))
108
+
109
+ if not passages:
110
+ return []
111
+
112
+ # optional rerank by proximity of query terms inside the passage
113
+ if enable_rerank:
114
+ passages = _rerank_by_proximity(passages, q_tokens)
115
+
116
+ # final top-k
117
+ passages.sort(key=lambda p: p.score, reverse=True)
118
+ return passages[:k]
119
+
120
+
121
+ def retrieve_texts(
122
+ query: str,
123
+ k: int = 5,
124
+ **kwargs,
125
+ ) -> List[str]:
126
+ """
127
+ Convenience: return only the passage texts for a query.
128
+ """
129
+ return [p.text for p in retrieve(query, k=k, **kwargs)]
130
+
131
+
132
+ # -----------------------------
133
+ # Internals
134
+ # -----------------------------
135
+
136
+ def _apply_filters(hits, idx: TfidfIndex, filters: Filters):
137
+ out = []
138
+ want_title = (filters.title_contains or "").strip().lower() or None
139
+ want_tags = set(t.strip().lower() for t in (filters.require_tags or []) if str(t).strip())
140
+
141
+ for h in hits:
142
+ d = idx.docs.get(h.doc_id)
143
+ if not d:
144
+ continue
145
+ meta: DocMeta = d["meta"]
146
+
147
+ if want_title:
148
+ t = (meta.title or "").lower()
149
+ if want_title not in t:
150
+ continue
151
+
152
+ if want_tags:
153
+ tags = set((meta.tags or []))
154
+ tags = set(x.lower() for x in tags)
155
+ if not want_tags.issubset(tags):
156
+ continue
157
+
158
+ out.append(h)
159
+ return out
160
+
161
+
162
+ _WORD_RE = re.compile(r"[A-Za-z0-9']+")
163
+
164
+ def _find_all(term: str, text: str) -> List[int]:
165
+ """Return starting indices of all case-insensitive matches of term in text."""
166
+ if not term or not text:
167
+ return []
168
+ term_l = term.lower()
169
+ low = text.lower()
170
+ out: List[int] = []
171
+ i = low.find(term_l)
172
+ while i >= 0:
173
+ out.append(i)
174
+ i = low.find(term_l, i + 1)
175
+ return out
176
+
177
+
178
+ def _extract_passage(text: str, q_tokens: List[str], window: int = 350, overlap: int = 60) -> Tuple[int, int, str]:
179
+ """
180
+ Pick a passage around the earliest match of any query token.
181
+ If no match found, return the first window.
182
+ """
183
+ if not text:
184
+ return 0, 0, ""
185
+
186
+ low = text.lower()
187
+ # choose the earliest hit among query tokens
188
+ hit_positions: List[int] = []
189
+ for qt in q_tokens:
190
+ hit_positions.extend(_find_all(qt, text))
191
+ start: int
192
+ end: int
193
+
194
+ if hit_positions:
195
+ i = max(0, min(hit_positions) - overlap)
196
+ start = i
197
+ end = min(len(text), start + window)
198
+ else:
199
+ start = 0
200
+ end = min(len(text), window)
201
+
202
+ return start, end, text[start:end].strip()
203
+
204
+
205
+ def _rerank_by_proximity(passages: List[Passage], q_tokens: List[str]) -> List[Passage]:
206
+ """
207
+ Adjust scores based on how tightly query tokens cluster inside the passage.
208
+ Heuristic:
209
+ - For each unique query token, find all positions in the passage (word indices).
210
+ - Compute average pairwise distance among the closest occurrences.
211
+ - Convert to a bonus in [0, 0.25] and add to base score.
212
+ """
213
+ q_unique = [t for t in dict.fromkeys(q_tokens)] # preserve order, dedupe
214
+ if not q_unique:
215
+ return passages
216
+
217
+ def word_positions(text: str, term: str) -> List[int]:
218
+ # word-level positions for term
219
+ positions: List[int] = []
220
+ words = [w.group(0).lower() for w in _WORD_RE.finditer(text)]
221
+ for i, w in enumerate(words):
222
+ if term == w:
223
+ positions.append(i)
224
+ return positions
225
+
226
+ def proximity_bonus(p: Passage) -> float:
227
+ # collect positions per term
228
+ pos_lists = [word_positions(p.text, t) for t in q_unique]
229
+ if all(len(ps) == 0 for ps in pos_lists):
230
+ return 0.0
231
+
232
+ # flatten a representative set of positions (closest aligned indices)
233
+ reps: List[int] = []
234
+ for ps in pos_lists:
235
+ reps.append(ps[0] if ps else 999999)
236
+
237
+ # average absolute distance to the median position
238
+ med = sorted([x for x in reps if x != 999999])
239
+ if not med:
240
+ return 0.0
241
+ mid = med[len(med) // 2]
242
+ avg_dist = sum(abs((x if x != 999999 else mid) - mid) for x in reps) / max(1, len(reps))
243
+
244
+ # squash distance β†’ bonus; closer = bigger bonus
245
+ # dist 0 β†’ 0.25 bonus; dist 10+ β†’ ~0 bonus
246
+ bonus = max(0.0, 0.25 * (1.0 - min(avg_dist, 10.0) / 10.0))
247
+ return float(bonus)
248
+
249
+ reranked: List[Passage] = []
250
+ for p in passages:
251
+ bonus = proximity_bonus(p)
252
+ reranked.append(Passage(
253
+ **{**p.__dict__, "score": p.score + bonus}
254
+ ))
255
+ return reranked
256
+
257
+
258
+ # -----------------------------
259
+ # CLI / quick test
260
+ # -----------------------------
261
+
262
+ if __name__ == "__main__":
263
+ import sys
264
+ q = " ".join(sys.argv[1:]) or "anonymous chatbot rules"
265
+ out = retrieve(q, k=3)
266
+ for i, p in enumerate(out, 1):
267
+ print(f"[{i}] {p.score:.4f} {p.title or '(untitled)'} β€” {p.source}")
268
+ print(" ", (p.snippet.replace("\n", " ") if p.snippet else "")[:200])
memory/sessions.py CHANGED
@@ -1 +1,244 @@
1
  # /memory/sessions.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /memory/sessions.py
2
+ """
3
+ Minimal session store for chat history + per-session data.
4
+
5
+ Features
6
+ - In-memory store with thread safety
7
+ - Create/get/update/delete sessions
8
+ - Append chat turns: ("user"| "bot", text)
9
+ - Optional TTL cleanup and max-history cap
10
+ - JSON persistence (save/load)
11
+ - Deterministic, dependency-free
12
+
13
+ Intended to interoperate with anon_bot and logged_in_bot:
14
+ - History shape: List[Tuple[str, str]] e.g., [("user","hi"), ("bot","hello")]
15
+ """
16
+
17
+ from __future__ import annotations
18
+ from dataclasses import dataclass, asdict, field
19
+ from typing import Any, Dict, List, Optional, Tuple
20
+ from pathlib import Path
21
+ import time
22
+ import uuid
23
+ import json
24
+ import threading
25
+
26
+ History = List[Tuple[str, str]] # [("user","..."), ("bot","...")]
27
+
28
+ # -----------------------------
29
+ # Data model
30
+ # -----------------------------
31
+
32
+ @dataclass
33
+ class Session:
34
+ session_id: str
35
+ user_id: Optional[str] = None
36
+ created_at: float = field(default_factory=lambda: time.time())
37
+ updated_at: float = field(default_factory=lambda: time.time())
38
+ data: Dict[str, Any] = field(default_factory=dict) # arbitrary per-session state
39
+ history: History = field(default_factory=list) # chat transcripts
40
+
41
+ def to_dict(self) -> Dict[str, Any]:
42
+ d = asdict(self)
43
+ # dataclasses with tuples serialize fine, ensure tuples not lost if reloaded
44
+ return d
45
+
46
+ @staticmethod
47
+ def from_dict(d: Dict[str, Any]) -> "Session":
48
+ s = Session(
49
+ session_id=d["session_id"],
50
+ user_id=d.get("user_id"),
51
+ created_at=float(d.get("created_at", time.time())),
52
+ updated_at=float(d.get("updated_at", time.time())),
53
+ data=dict(d.get("data", {})),
54
+ history=[(str(who), str(text)) for who, text in d.get("history", [])],
55
+ )
56
+ return s
57
+
58
+
59
+ # -----------------------------
60
+ # Store
61
+ # -----------------------------
62
+
63
+ class SessionStore:
64
+ """
65
+ Thread-safe in-memory session registry with optional TTL and persistence.
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ ttl_seconds: Optional[int] = 60 * 60, # 1 hour default; set None to disable
71
+ max_history: int = 200, # cap messages per session
72
+ ) -> None:
73
+ self._ttl = ttl_seconds
74
+ self._max_history = max_history
75
+ self._lock = threading.RLock()
76
+ self._sessions: Dict[str, Session] = {}
77
+
78
+ # ---- id helpers ----
79
+
80
+ @staticmethod
81
+ def new_id() -> str:
82
+ return uuid.uuid4().hex
83
+
84
+ # ---- CRUD ----
85
+
86
+ def create(self, user_id: Optional[str] = None, session_id: Optional[str] = None) -> Session:
87
+ with self._lock:
88
+ sid = session_id or self.new_id()
89
+ s = Session(session_id=sid, user_id=user_id)
90
+ self._sessions[sid] = s
91
+ return s
92
+
93
+ def get(self, session_id: str, create_if_missing: bool = False, user_id: Optional[str] = None) -> Optional[Session]:
94
+ with self._lock:
95
+ s = self._sessions.get(session_id)
96
+ if s is None and create_if_missing:
97
+ s = self.create(user_id=user_id, session_id=session_id)
98
+ return s
99
+
100
+ def delete(self, session_id: str) -> bool:
101
+ with self._lock:
102
+ return self._sessions.pop(session_id, None) is not None
103
+
104
+ def all_ids(self) -> List[str]:
105
+ with self._lock:
106
+ return list(self._sessions.keys())
107
+
108
+ # ---- housekeeping ----
109
+
110
+ def _expired(self, s: Session) -> bool:
111
+ if self._ttl is None:
112
+ return False
113
+ return (time.time() - s.updated_at) > self._ttl
114
+
115
+ def sweep(self) -> int:
116
+ """
117
+ Remove expired sessions. Returns number removed.
118
+ """
119
+ with self._lock:
120
+ dead = [sid for sid, s in self._sessions.items() if self._expired(s)]
121
+ for sid in dead:
122
+ self._sessions.pop(sid, None)
123
+ return len(dead)
124
+
125
+ # ---- history ops ----
126
+
127
+ def append_user(self, session_id: str, text: str) -> Session:
128
+ return self._append(session_id, "user", text)
129
+
130
+ def append_bot(self, session_id: str, text: str) -> Session:
131
+ return self._append(session_id, "bot", text)
132
+
133
+ def _append(self, session_id: str, who: str, text: str) -> Session:
134
+ with self._lock:
135
+ s = self._sessions.get(session_id)
136
+ if s is None:
137
+ s = self.create(session_id=session_id)
138
+ s.history.append((who, text))
139
+ if self._max_history and len(s.history) > self._max_history:
140
+ # Keep most recent N entries
141
+ s.history = s.history[-self._max_history :]
142
+ s.updated_at = time.time()
143
+ return s
144
+
145
+ def get_history(self, session_id: str) -> History:
146
+ with self._lock:
147
+ s = self._sessions.get(session_id)
148
+ return list(s.history) if s else []
149
+
150
+ def clear_history(self, session_id: str) -> bool:
151
+ with self._lock:
152
+ s = self._sessions.get(session_id)
153
+ if not s:
154
+ return False
155
+ s.history.clear()
156
+ s.updated_at = time.time()
157
+ return True
158
+
159
+ # ---- key/value per-session data ----
160
+
161
+ def set(self, session_id: str, key: str, value: Any) -> Session:
162
+ with self._lock:
163
+ s = self._sessions.get(session_id)
164
+ if s is None:
165
+ s = self.create(session_id=session_id)
166
+ s.data[key] = value
167
+ s.updated_at = time.time()
168
+ return s
169
+
170
+ def get_value(self, session_id: str, key: str, default: Any = None) -> Any:
171
+ with self._lock:
172
+ s = self._sessions.get(session_id)
173
+ if not s:
174
+ return default
175
+ return s.data.get(key, default)
176
+
177
+ def data_dict(self, session_id: str) -> Dict[str, Any]:
178
+ with self._lock:
179
+ s = self._sessions.get(session_id)
180
+ return dict(s.data) if s else {}
181
+
182
+ # ---- persistence ----
183
+
184
+ def save(self, path: str | Path) -> None:
185
+ p = Path(path)
186
+ p.parent.mkdir(parents=True, exist_ok=True)
187
+ with self._lock:
188
+ payload = {
189
+ "ttl_seconds": self._ttl,
190
+ "max_history": self._max_history,
191
+ "saved_at": time.time(),
192
+ "sessions": {sid: s.to_dict() for sid, s in self._sessions.items()},
193
+ }
194
+ p.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
195
+
196
+ @classmethod
197
+ def load(cls, path: str | Path) -> "SessionStore":
198
+ p = Path(path)
199
+ if not p.is_file():
200
+ return cls()
201
+ data = json.loads(p.read_text(encoding="utf-8"))
202
+ store = cls(
203
+ ttl_seconds=data.get("ttl_seconds"),
204
+ max_history=int(data.get("max_history", 200)),
205
+ )
206
+ sessions = data.get("sessions", {})
207
+ with store._lock:
208
+ for sid, sd in sessions.items():
209
+ store._sessions[sid] = Session.from_dict(sd)
210
+ return store
211
+
212
+
213
+ # -----------------------------
214
+ # Module-level singleton (optional)
215
+ # -----------------------------
216
+
217
+ _default_store: Optional[SessionStore] = None
218
+
219
+ def get_store() -> SessionStore:
220
+ global _default_store
221
+ if _default_store is None:
222
+ _default_store = SessionStore()
223
+ return _default_store
224
+
225
+ def new_session(user_id: Optional[str] = None) -> Session:
226
+ return get_store().create(user_id=user_id)
227
+
228
+ def append_user(session_id: str, text: str) -> Session:
229
+ return get_store().append_user(session_id, text)
230
+
231
+ def append_bot(session_id: str, text: str) -> Session:
232
+ return get_store().append_bot(session_id, text)
233
+
234
+ def history(session_id: str) -> History:
235
+ return get_store().get_history(session_id)
236
+
237
+ def set_value(session_id: str, key: str, value: Any) -> Session:
238
+ return get_store().set(session_id, key, value)
239
+
240
+ def get_value(session_id: str, key: str, default: Any = None) -> Any:
241
+ return get_store().get_value(session_id, key, default)
242
+
243
+ def sweep() -> int:
244
+ return get_store().sweep()
memory/store.py CHANGED
@@ -1,3 +1,145 @@
1
  # /memory/sessions.py
 
 
 
 
2
 
3
- DB={}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /memory/sessions.py
2
+ """
3
+ Simple in-memory session manager for chatbot history.
4
+ Supports TTL, max history, and JSON persistence.
5
+ """
6
 
7
+ from __future__ import annotations
8
+ import time, json, uuid
9
+ from pathlib import Path
10
+ from dataclasses import dataclass, field
11
+ from typing import Dict, List, Tuple, Optional, Any
12
+
13
+ History = List[Tuple[str, str]] # [("user","..."), ("bot","...")]
14
+
15
+
16
+ @dataclass
17
+ class Session:
18
+ session_id: str
19
+ user_id: Optional[str] = None
20
+ history: History = field(default_factory=list)
21
+ data: Dict[str, Any] = field(default_factory=dict)
22
+ created_at: float = field(default_factory=time.time)
23
+ updated_at: float = field(default_factory=time.time)
24
+
25
+
26
+ class SessionStore:
27
+ def __init__(self, ttl_seconds: Optional[int] = 3600, max_history: Optional[int] = 50):
28
+ self.ttl_seconds = ttl_seconds
29
+ self.max_history = max_history
30
+ self._sessions: Dict[str, Session] = {}
31
+
32
+ # --- internals ---
33
+ def _expired(self, sess: Session) -> bool:
34
+ if self.ttl_seconds is None:
35
+ return False
36
+ return (time.time() - sess.updated_at) > self.ttl_seconds
37
+
38
+ # --- CRUD ---
39
+ def create(self, user_id: Optional[str] = None) -> Session:
40
+ sid = str(uuid.uuid4())
41
+ sess = Session(session_id=sid, user_id=user_id)
42
+ self._sessions[sid] = sess
43
+ return sess
44
+
45
+ def get(self, sid: str) -> Optional[Session]:
46
+ return self._sessions.get(sid)
47
+
48
+ def get_history(self, sid: str) -> History:
49
+ sess = self.get(sid)
50
+ return list(sess.history) if sess else []
51
+
52
+ def append_user(self, sid: str, text: str) -> None:
53
+ self._append(sid, "user", text)
54
+
55
+ def append_bot(self, sid: str, text: str) -> None:
56
+ self._append(sid, "bot", text)
57
+
58
+ def _append(self, sid: str, who: str, text: str) -> None:
59
+ sess = self.get(sid)
60
+ if not sess:
61
+ return
62
+ sess.history.append((who, text))
63
+ if self.max_history and len(sess.history) > self.max_history:
64
+ sess.history = sess.history[-self.max_history:]
65
+ sess.updated_at = time.time()
66
+
67
+ # --- Data store ---
68
+ def set(self, sid: str, key: str, value: Any) -> None:
69
+ sess = self.get(sid)
70
+ if sess:
71
+ sess.data[key] = value
72
+ sess.updated_at = time.time()
73
+
74
+ def get_value(self, sid: str, key: str, default=None) -> Any:
75
+ sess = self.get(sid)
76
+ return sess.data.get(key, default) if sess else default
77
+
78
+ def data_dict(self, sid: str) -> Dict[str, Any]:
79
+ sess = self.get(sid)
80
+ return dict(sess.data) if sess else {}
81
+
82
+ # --- TTL management ---
83
+ def sweep(self) -> int:
84
+ """Remove expired sessions; return count removed."""
85
+ expired = [sid for sid, s in self._sessions.items() if self._expired(s)]
86
+ for sid in expired:
87
+ self._sessions.pop(sid, None)
88
+ return len(expired)
89
+
90
+ def all_ids(self):
91
+ return list(self._sessions.keys())
92
+
93
+ # --- persistence ---
94
+ def save(self, path: Path) -> None:
95
+ payload = {
96
+ sid: {
97
+ "user_id": s.user_id,
98
+ "history": s.history,
99
+ "data": s.data,
100
+ "created_at": s.created_at,
101
+ "updated_at": s.updated_at,
102
+ }
103
+ for sid, s in self._sessions.items()
104
+ }
105
+ path.write_text(json.dumps(payload, indent=2))
106
+
107
+ @classmethod
108
+ def load(cls, path: Path) -> "SessionStore":
109
+ store = cls()
110
+ if not path.exists():
111
+ return store
112
+ raw = json.loads(path.read_text())
113
+ for sid, d in raw.items():
114
+ s = Session(
115
+ session_id=sid,
116
+ user_id=d.get("user_id"),
117
+ history=d.get("history", []),
118
+ data=d.get("data", {}),
119
+ created_at=d.get("created_at", time.time()),
120
+ updated_at=d.get("updated_at", time.time()),
121
+ )
122
+ store._sessions[sid] = s
123
+ return store
124
+
125
+
126
+ # --- Module-level singleton for convenience ---
127
+ _store = SessionStore()
128
+
129
+ def new_session(user_id: Optional[str] = None) -> Session:
130
+ return _store.create(user_id)
131
+
132
+ def history(sid: str) -> History:
133
+ return _store.get_history(sid)
134
+
135
+ def append_user(sid: str, text: str) -> None:
136
+ _store.append_user(sid, text)
137
+
138
+ def append_bot(sid: str, text: str) -> None:
139
+ _store.append_bot(sid, text)
140
+
141
+ def set_value(sid: str, key: str, value: Any) -> None:
142
+ _store.set(sid, key, value)
143
+
144
+ def get_value(sid: str, key: str, default=None) -> Any:
145
+ return _store.get_value(sid, key, default)
nlu/pipeline.py CHANGED
@@ -1,3 +1,77 @@
1
  # /nlu/pipeline.py
 
 
2
 
3
- def analyze(t): return {'intent':'general'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /nlu/pipeline.py
2
+ """
3
+ Lightweight rule-based NLU pipeline.
4
 
5
+ No ML dependencies β€” just keyword matching and simple heuristics.
6
+ Provides intent classification and placeholder entity extraction.
7
+ """
8
+
9
+ from typing import Dict, List
10
+
11
+
12
+ # keyword β†’ intent maps
13
+ _INTENT_KEYWORDS = {
14
+ "greeting": {"hi", "hello", "hey", "good morning", "good evening"},
15
+ "goodbye": {"bye", "goodbye", "see you", "farewell"},
16
+ "help": {"help", "support", "assist", "how do i"},
17
+ "faq": {"what is", "who is", "where is", "when is", "how to"},
18
+ "sentiment_positive": {"great", "awesome", "fantastic", "love"},
19
+ "sentiment_negative": {"bad", "terrible", "hate", "awful"},
20
+ }
21
+
22
+
23
+ def _match_intent(text: str) -> str:
24
+ low = text.lower().strip()
25
+ for intent, kws in _INTENT_KEYWORDS.items():
26
+ for kw in kws:
27
+ if kw in low:
28
+ return intent
29
+ return "general"
30
+
31
+
32
+ def _extract_entities(text: str) -> List[str]:
33
+ """
34
+ Placeholder entity extractor.
35
+ For now just returns capitalized words (could be names/places).
36
+ """
37
+ return [w for w in text.split() if w.istitle()]
38
+
39
+
40
+ def analyze(text: str) -> Dict:
41
+ """
42
+ Analyze a user utterance.
43
+ Returns:
44
+ {
45
+ "intent": str,
46
+ "entities": list[str],
47
+ "confidence": float
48
+ }
49
+ """
50
+ if not text or not text.strip():
51
+ return {"intent": "general", "entities": [], "confidence": 0.0}
52
+
53
+ intent = _match_intent(text)
54
+ entities = _extract_entities(text)
55
+
56
+ # crude confidence: matched keyword = 0.9, else fallback = 0.5
57
+ confidence = 0.9 if intent != "general" else 0.5
58
+
59
+ return {
60
+ "intent": intent,
61
+ "entities": entities,
62
+ "confidence": confidence,
63
+ }
64
+
65
+
66
+ # quick test
67
+ if __name__ == "__main__":
68
+ tests = [
69
+ "Hello there",
70
+ "Can you help me?",
71
+ "I love this bot!",
72
+ "Bye now",
73
+ "Tell me what is RAG",
74
+ "random input with no keywords",
75
+ ]
76
+ for t in tests:
77
+ print(t, "->", analyze(t))
nlu/prompts.py CHANGED
@@ -1 +1,78 @@
1
  # /nlu/prompts.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /nlu/prompts.py
2
+ """
3
+ Reusable prompt templates for NLU and chatbot responses.
4
+
5
+ These can be imported anywhere in the app to keep wording consistent.
6
+ They are plain strings / dicts β€” no external deps required.
7
+ """
8
+
9
+ from typing import Dict
10
+
11
+ # -----------------------------
12
+ # System prompts
13
+ # -----------------------------
14
+
15
+ SYSTEM_BASE = """\
16
+ You are a helpful, polite chatbot.
17
+ Answer briefly unless asked for detail.
18
+ """
19
+
20
+ SYSTEM_FAQ = """\
21
+ You are a factual Q&A assistant.
22
+ Answer questions directly, citing facts when possible.
23
+ """
24
+
25
+ SYSTEM_SUPPORT = """\
26
+ You are a friendly support assistant.
27
+ Offer clear, step-by-step help when the user asks for guidance.
28
+ """
29
+
30
+ # -----------------------------
31
+ # Few-shot examples
32
+ # -----------------------------
33
+
34
+ FEW_SHOTS: Dict[str, list] = {
35
+ "greeting": [
36
+ {"user": "Hello", "bot": "Hi there! How can I help you today?"},
37
+ {"user": "Good morning", "bot": "Good morning! What’s up?"},
38
+ ],
39
+ "goodbye": [
40
+ {"user": "Bye", "bot": "Goodbye! Have a great day."},
41
+ {"user": "See you later", "bot": "See you!"},
42
+ ],
43
+ "help": [
44
+ {"user": "I need help", "bot": "Sure! What do you need help with?"},
45
+ {"user": "Can you assist me?", "bot": "Of course, happy to assist."},
46
+ ],
47
+ "faq": [
48
+ {"user": "What is RAG?", "bot": "RAG stands for Retrieval-Augmented Generation."},
49
+ {"user": "Who created this bot?", "bot": "It was built by our project team."},
50
+ ],
51
+ }
52
+
53
+ # -----------------------------
54
+ # Utility
55
+ # -----------------------------
56
+
57
+ def get_system_prompt(mode: str = "base") -> str:
58
+ """
59
+ Return a system-level prompt string.
60
+ mode: "base" | "faq" | "support"
61
+ """
62
+ if mode == "faq":
63
+ return SYSTEM_FAQ
64
+ if mode == "support":
65
+ return SYSTEM_SUPPORT
66
+ return SYSTEM_BASE
67
+
68
+
69
+ def get_few_shots(intent: str) -> list:
70
+ """
71
+ Return few-shot examples for a given intent label.
72
+ """
73
+ return FEW_SHOTS.get(intent, [])
74
+
75
+
76
+ if __name__ == "__main__":
77
+ print("System prompt:", get_system_prompt("faq"))
78
+ print("Examples for 'greeting':", get_few_shots("greeting"))
nlu/router.py CHANGED
@@ -1 +1,143 @@
1
  # /nlu/router.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /nlu/router.py
2
+ """
3
+ Lightweight NLU router.
4
+
5
+ - Uses nlu.pipeline.analyze() to classify the user's intent.
6
+ - Maps intents to high-level actions (GREETING, HELP, FAQ, ECHO, SUMMARIZE, GENERAL, GOODBYE).
7
+ - Provides:
8
+ route(text, ctx=None) -> dict with intent, action, handler, params
9
+ respond(text, history) -> quick deterministic reply for smoke tests
10
+
11
+ This file deliberately avoids external dependencies so it works in anonymous mode.
12
+ Later, you can swap 'handler' targets to real modules (e.g., anon_bot, logged_in_bot).
13
+ """
14
+
15
+ from __future__ import annotations
16
+ from dataclasses import dataclass, asdict
17
+ from typing import Any, Dict, List, Optional, Tuple
18
+
19
+ from .pipeline import analyze
20
+ from .prompts import get_system_prompt, get_few_shots
21
+
22
+ History = List[Tuple[str, str]] # [("user","..."), ("bot","...")]
23
+
24
+ # -----------------------------
25
+ # Action / Route schema
26
+ # -----------------------------
27
+
28
+ @dataclass(frozen=True)
29
+ class Route:
30
+ intent: str
31
+ action: str
32
+ handler: str # suggested dotted path or logical name
33
+ params: Dict[str, Any] # arbitrary params (e.g., {"mode":"faq"})
34
+ confidence: float
35
+
36
+ def to_dict(self) -> Dict[str, Any]:
37
+ return asdict(self)
38
+
39
+
40
+ # Intent -> (Action, Suggested Handler, Default Params)
41
+ _ACTION_TABLE: Dict[str, Tuple[str, str, Dict[str, Any]]] = {
42
+ "greeting": ("GREETING", "builtin.respond", {"mode": "base"}),
43
+ "goodbye": ("GOODBYE", "builtin.respond", {"mode": "base"}),
44
+ "help": ("HELP", "builtin.respond", {"mode": "support"}),
45
+ "faq": ("FAQ", "builtin.respond", {"mode": "faq"}),
46
+ # Sentiment intents come from pipeline; treat as GENERAL but note tag:
47
+ "sentiment_positive": ("GENERAL", "builtin.respond", {"mode": "base", "tag": "positive"}),
48
+ "sentiment_negative": ("GENERAL", "builtin.respond", {"mode": "base", "tag": "negative"}),
49
+ # Default:
50
+ "general": ("GENERAL", "builtin.respond", {"mode": "base"}),
51
+ }
52
+
53
+ _DEFAULT_ACTION = ("GENERAL", "builtin.respond", {"mode": "base"})
54
+
55
+
56
+ # -----------------------------
57
+ # Routing
58
+ # -----------------------------
59
+
60
+ def route(text: str, ctx: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
61
+ """
62
+ Decide which action/handler should process the utterance.
63
+ """
64
+ nlu = analyze(text or "")
65
+ intent = nlu.get("intent", "general")
66
+ confidence = float(nlu.get("confidence", 0.0))
67
+ action, handler, params = _ACTION_TABLE.get(intent, _DEFAULT_ACTION)
68
+
69
+ # pass-through entities as params for downstream handlers
70
+ entities = nlu.get("entities") or []
71
+ if entities:
72
+ params = {**params, "entities": entities}
73
+
74
+ # include minimal context (optional)
75
+ if ctx:
76
+ params = {**params, "_ctx": ctx}
77
+
78
+ return Route(
79
+ intent=intent,
80
+ action=action,
81
+ handler=handler,
82
+ params=params,
83
+ confidence=confidence,
84
+ ).to_dict()
85
+
86
+
87
+ # -----------------------------
88
+ # Built-in deterministic responder (for smoke tests)
89
+ # -----------------------------
90
+
91
+ def respond(text: str, history: Optional[History] = None) -> str:
92
+ """
93
+ Produce a tiny, deterministic response using system/few-shot text.
94
+ This is only for local testing; replace with real handlers later.
95
+ """
96
+ r = route(text)
97
+ intent = r["intent"]
98
+ action = r["action"]
99
+ mode = r["params"].get("mode", "base")
100
+
101
+ # Choose a system flavor (not used to prompt a model here, but keeps wording consistent)
102
+ _ = get_system_prompt("support" if action == "HELP" else ("faq" if action == "FAQ" else "base"))
103
+ # Few-shots can inform canned replies (again: no model used, just tone)
104
+ shots = get_few_shots(intent)
105
+
106
+ if action == "GREETING":
107
+ return "Hi! How can I help you today?"
108
+ if action == "GOODBYE":
109
+ return "Goodbye! Have a great day."
110
+ if action == "HELP":
111
+ return "I can answer quick questions, echo text, or summarize short passages. What do you need help with?"
112
+ if action == "FAQ":
113
+ # Trivial FAQ-style echo; swap with RAG later
114
+ return "Ask a specific question (e.g., 'What is RAG?'), and I’ll answer briefly."
115
+ # GENERAL:
116
+ # If the pipeline flagged sentiment, acknowledge gently.
117
+ tag = r["params"].get("tag")
118
+ if tag == "positive":
119
+ prefix = "Glad to hear it! "
120
+ elif tag == "negative":
121
+ prefix = "Sorry to hear that. "
122
+ else:
123
+ prefix = ""
124
+ return prefix + "Noted. If you need help, type 'help'."
125
+
126
+ # -----------------------------
127
+ # Simple CLI smoke test
128
+ # -----------------------------
129
+
130
+ if __name__ == "__main__":
131
+ tests = [
132
+ "Hello there",
133
+ "Can you help me?",
134
+ "What is RAG in simple terms?",
135
+ "This is awful.",
136
+ "Bye!",
137
+ "random input with no keywords",
138
+ ]
139
+ for t in tests:
140
+ print(f"> {t}")
141
+ print(" route:", route(t))
142
+ print(" reply:", respond(t))
143
+ print()
requirements-dev.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ pytest>=7.4.0
2
+ pytest-cov>=4.1.0
3
+ black>=24.3.0
4
+ isort>=5.13.0
5
+ flake8>=7.0.0
6
+ mypy>=1.10.0
7
+ ruff>=0.5.0
requirements-ml.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers>=4.41.0
2
+ torch>=2.2.0
3
+
4
+ # extras commonly required by transformers
5
+ safetensors>=0.4.0
6
+ accelerate>=0.33.0
7
+ sentencepiece>=0.2.0
requirements.txt CHANGED
@@ -1,15 +1,12 @@
1
- gradio>=4.0
2
- transformers>=4.41.0
3
- torch>=2.2.0
4
- scikit-learn>=1.3.0
5
- pandas>=2.1.0
 
6
  numpy>=1.26.0
7
- pytest>=7.4.0
8
- # Optional Azure
 
 
9
  azure-ai-textanalytics>=5.3.0
10
- python-dotenv>=1.0
11
- fastapi>=0.115.0
12
- uvicorn[standard]>=0.30.0
13
- # Optional for Bot Framework sample:
14
- # aiohttp>=3.9
15
- # botbuilder-core>=4.14
 
1
+ gradio>=4.0,<5
2
+ fastapi>=0.115.0,<0.116
3
+ uvicorn[standard]>=0.30.0,<0.31
4
+ python-dotenv>=1.0
5
+
6
+ # light numeric stack
7
  numpy>=1.26.0
8
+ pandas>=2.1.0
9
+ scikit-learn>=1.3.0
10
+
11
+ # optional Azure integration
12
  azure-ai-textanalytics>=5.3.0
 
 
 
 
 
 
scripts/check_compliance.py CHANGED
@@ -1,3 +1,81 @@
1
  # /scripts/check_compliance.py
 
 
 
2
 
3
- # Fails if disallowed deps appear (placeholder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /scripts/check_compliance.py
2
+ #!/usr/bin/env python3
3
+ """
4
+ Compliance checker for disallowed dependencies.
5
 
6
+ - Scans all .py files under project root (excluding venv/.git/etc).
7
+ - Flags imports of disallowed packages (by prefix).
8
+ - Exits nonzero if any violations are found.
9
+
10
+ Run:
11
+ python scripts/check_compliance.py
12
+ """
13
+
14
+ import sys
15
+ import os
16
+ import re
17
+ from pathlib import Path
18
+
19
+ # -----------------------------
20
+ # Config
21
+ # -----------------------------
22
+
23
+ # Disallowed top-level import prefixes
24
+ DISALLOWED = {
25
+ "torch",
26
+ "tensorflow",
27
+ "transformers",
28
+ "openai",
29
+ "azure.ai", # heavy cloud SDK
30
+ "azureml",
31
+ "boto3",
32
+ "botbuilder", # Microsoft Bot Framework
33
+ }
34
+
35
+ IGNORE_DIRS = {".git", "__pycache__", "venv", ".venv", "env", ".env", "node_modules"}
36
+
37
+ IMPORT_RE = re.compile(r"^\s*(?:import|from)\s+([a-zA-Z0-9_.]+)")
38
+
39
+ # -----------------------------
40
+ # Scan
41
+ # -----------------------------
42
+
43
+ def scan_file(path: Path) -> list[str]:
44
+ bad = []
45
+ try:
46
+ lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
47
+ except Exception as e:
48
+ print(f"[warn] could not read {path}: {e}", file=sys.stderr)
49
+ return []
50
+ for i, line in enumerate(lines, 1):
51
+ m = IMPORT_RE.match(line)
52
+ if not m:
53
+ continue
54
+ mod = m.group(1)
55
+ for banned in DISALLOWED:
56
+ if mod == banned or mod.startswith(banned + "."):
57
+ bad.append(f"{path}:{i}: disallowed import '{mod}'")
58
+ return bad
59
+
60
+
61
+ def main(root: str = ".") -> int:
62
+ root = Path(root)
63
+ failures: list[str] = []
64
+
65
+ for p in root.rglob("*.py"):
66
+ if any(part in IGNORE_DIRS for part in p.parts):
67
+ continue
68
+ failures.extend(scan_file(p))
69
+
70
+ if failures:
71
+ print("❌ Compliance check failed:")
72
+ for f in failures:
73
+ print(" ", f)
74
+ return 1
75
+ else:
76
+ print("βœ… Compliance check passed (no disallowed deps).")
77
+ return 0
78
+
79
+
80
+ if __name__ == "__main__":
81
+ sys.exit(main())
scripts/run_local.sh CHANGED
@@ -1,5 +1,45 @@
1
  # /scripts/run_local.sh
2
  #!/usr/bin/env bash
3
- set -euo pipefail
4
- export PYTHONPATH=.
5
- python -c "from storefront_chatbot.app.app import build; build().launch(server_name='0.0.0.0', server_port=7860)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /scripts/run_local.sh
2
  #!/usr/bin/env bash
3
+ set -Eeuo pipefail
4
+
5
+ # Move to repo root
6
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ cd "$ROOT_DIR"
8
+
9
+ # --- Configuration via env (with sane defaults) ---
10
+ export PYTHONPATH="${PYTHONPATH:-.}"
11
+ HOST="${HOST:-0.0.0.0}"
12
+ PORT="${PORT:-7860}"
13
+ MODE="${MODE:-gradio}" # gradio | uvicorn
14
+ RELOAD="${RELOAD:-false}" # only applies to MODE=uvicorn
15
+ INSTALL="${INSTALL:-0}" # set INSTALL=1 to pip install requirements
16
+
17
+ # Load .env if present (ignore comments/blank lines)
18
+ if [[ -f .env ]]; then
19
+ # shellcheck disable=SC2046
20
+ export $(grep -vE '^\s*#' .env | grep -vE '^\s*$' | xargs -0 -I{} bash -c 'printf "%s\0" "{}"' 2>/dev/null || true)
21
+ fi
22
+
23
+ if [[ "$INSTALL" == "1" ]]; then
24
+ echo "πŸ“¦ Installing dependencies from requirements.txt ..."
25
+ python -m pip install -r requirements.txt
26
+ fi
27
+
28
+ trap 'echo; echo "β›” Server terminated";' INT TERM
29
+
30
+ if [[ "$MODE" == "uvicorn" ]]; then
31
+ # Dev-friendly server with optional reload (expects FastAPI app factory)
32
+ echo "β–Ά Starting Uvicorn on http://${HOST}:${PORT} (reload=${RELOAD})"
33
+ # If you expose a FastAPI app object directly, adjust target accordingly (e.g., storefront_chatbot.app.app:app)
34
+ cmd=(python -m uvicorn storefront_chatbot.app.app:build --host "$HOST" --port "$PORT")
35
+ [[ "$RELOAD" == "true" ]] && cmd+=(--reload)
36
+ exec "${cmd[@]}"
37
+ else
38
+ # Gradio path (matches your original build().launch)
39
+ echo "β–Ά Starting Gradio on http://${HOST}:${PORT}"
40
+ python - <<PY
41
+ from storefront_chatbot.app.app import build
42
+ app = build()
43
+ app.launch(server_name="${HOST}", server_port=${PORT})
44
+ PY
45
+ fi
scripts/seed_data.py CHANGED
@@ -1,3 +1,94 @@
1
  # /scripts/seed_data.py
2
- # Load sample products/FAQs (placeholder)
 
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /scripts/seed_data.py
2
+ #!/usr/bin/env python3
3
+ """
4
+ Seed script to load sample products and FAQs into local data files.
5
 
6
+ - Creates ./data/products.json and ./data/faqs.json
7
+ - Provides a CLI to re-seed or show contents
8
+ - No external dependencies required
9
+
10
+ Run:
11
+ python scripts/seed_data.py # create seed files
12
+ python scripts/seed_data.py show # print contents
13
+ """
14
+
15
+ import sys
16
+ import json
17
+ from pathlib import Path
18
+ import datetime
19
+
20
+ ROOT = Path(__file__).resolve().parent.parent
21
+ DATA_DIR = ROOT / "data"
22
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
23
+
24
+ PRODUCTS_PATH = DATA_DIR / "products.json"
25
+ FAQS_PATH = DATA_DIR / "faqs.json"
26
+
27
+
28
+ SAMPLE_PRODUCTS = [
29
+ {
30
+ "id": "p1",
31
+ "name": "Chatbot Pro Subscription",
32
+ "description": "Access advanced features of the chatbot platform.",
33
+ "price": 9.99,
34
+ "currency": "USD",
35
+ "tags": ["subscription", "chatbot"],
36
+ },
37
+ {
38
+ "id": "p2",
39
+ "name": "Custom Bot Avatar",
40
+ "description": "A personalized avatar for your chatbot.",
41
+ "price": 4.99,
42
+ "currency": "USD",
43
+ "tags": ["avatar", "customization"],
44
+ },
45
+ {
46
+ "id": "p3",
47
+ "name": "Analytics Dashboard",
48
+ "description": "Real-time analytics and reporting for your conversations.",
49
+ "price": 14.99,
50
+ "currency": "USD",
51
+ "tags": ["analytics", "dashboard"],
52
+ },
53
+ ]
54
+
55
+ SAMPLE_FAQS = [
56
+ {
57
+ "q": "How do I reset my password?",
58
+ "a": "Click 'Forgot password' on the login page and follow the instructions.",
59
+ },
60
+ {
61
+ "q": "Can I export my chat history?",
62
+ "a": "Yes, you can export your chat history from the account settings page.",
63
+ },
64
+ {
65
+ "q": "Do you offer refunds?",
66
+ "a": "Refunds are available within 14 days of purchase. Contact support for help.",
67
+ },
68
+ ]
69
+
70
+
71
+ def write_json(path: Path, data) -> None:
72
+ path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
73
+
74
+
75
+ def seed() -> None:
76
+ write_json(PRODUCTS_PATH, SAMPLE_PRODUCTS)
77
+ write_json(FAQS_PATH, SAMPLE_FAQS)
78
+ print(f"βœ… Seeded data at {datetime.date.today()} into {DATA_DIR}")
79
+
80
+
81
+ def show() -> None:
82
+ if PRODUCTS_PATH.is_file():
83
+ print("Products:")
84
+ print(PRODUCTS_PATH.read_text(encoding="utf-8"))
85
+ if FAQS_PATH.is_file():
86
+ print("\nFAQs:")
87
+ print(FAQS_PATH.read_text(encoding="utf-8"))
88
+
89
+
90
+ if __name__ == "__main__":
91
+ if len(sys.argv) > 1 and sys.argv[1] == "show":
92
+ show()
93
+ else:
94
+ seed()
tests/test_anon_bot.py CHANGED
@@ -1,3 +1,121 @@
1
  # /test/test_anon_bot.py
2
- def test_anon_stub(): assert True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
 
1
  # /test/test_anon_bot.py
2
+ """
3
+ Comprehensive smoke tests for anon_bot.
4
+ Run with: pytest -q
5
+ """
6
+
7
+ import pytest
8
+ from anon_bot import handler, rules
9
+
10
+
11
+ # ---------- rules: intents & handlers ----------
12
+
13
+ @pytest.mark.parametrize(
14
+ "msg,expected",
15
+ [
16
+ ("", "empty"),
17
+ ("help", "help"),
18
+ ("/help", "help"),
19
+ ("capabilities", "help"),
20
+ ("reverse abc", "reverse"),
21
+ ("echo hello world", "echo"),
22
+ ("hi", "greet"),
23
+ ("hello", "greet"),
24
+ ("hey", "greet"),
25
+ ("who are you", "chat"),
26
+ ],
27
+ )
28
+ def test_rules_intent_of(msg, expected):
29
+ assert rules.intent_of(msg) == expected
30
+
31
+
32
+ def test_rules_capabilities_contains_expected_items():
33
+ caps = rules.capabilities()
34
+ assert "help" in caps
35
+ assert any(c.startswith("reverse") for c in caps)
36
+ assert any(c.startswith("echo") for c in caps)
37
+
38
+
39
+ def test_rules_handlers_basic():
40
+ assert "I can:" in rules.handle_help().text
41
+ assert rules.handle_reverse("reverse hello").text == "olleh"
42
+ assert rules.handle_reverse("reverse").text == "(nothing to reverse)"
43
+ assert rules.handle_echo("echo one two").text == "one two"
44
+ assert rules.handle_echo("echo").text == "(nothing to echo)"
45
+ assert "Type 'help'" in rules.handle_greet().text
46
+
47
+
48
+ def test_rules_reply_for_empty_and_chat_paths():
49
+ r = rules.reply_for("", [])
50
+ assert "Please type something" in r.text
51
+
52
+ r2 = rules.reply_for("who are you", [])
53
+ assert "tiny anonymous chatbot" in r2.text
54
+
55
+ r3 = rules.reply_for("can you help me", [])
56
+ assert "I can:" in r3.text # chat fallback detects 'help' and returns help
57
+
58
+
59
+ # ---------- handler: history & turn processing ----------
60
+
61
+ def test_handle_turn_appends_user_and_bot():
62
+ hist = []
63
+ out = handler.handle_turn("hello", hist, user=None)
64
+ # last two entries should be ("user", ...), ("bot", ...)
65
+ assert out[-2][0] == "user" and out[-2][1] == "hello"
66
+ assert out[-1][0] == "bot" and "Type 'help'" in out[-1][1]
67
+
68
+
69
+ def test_handle_turn_with_existing_history_preserves_items():
70
+ h2 = [("user", "prev"), ("bot", "ok")]
71
+ out2 = handler.handle_turn("echo ping", h2, user=None)
72
+ assert out2[:2] == h2 # preserved
73
+ assert out2[-1][0] == "bot"
74
+ assert out2[-1][1] == "ping" # echo payload
75
+
76
+
77
+ def test_handle_text_convenience():
78
+ reply = handler.handle_text("reverse abc")
79
+ assert reply == "cba"
80
+
81
+
82
+ def test_handle_turn_empty_message_produces_prompt():
83
+ out = handler.handle_turn("", [], user=None)
84
+ assert out[-1][0] == "bot"
85
+ assert "Please type" in out[-1][1]
86
+
87
+
88
+ def test_handler_coerces_weird_history_without_crashing():
89
+ # Mix of tuples, lists, malformed entries, and non-iterables
90
+ weird = [
91
+ ("user", "ok"),
92
+ ["bot", "fine"],
93
+ "garbage",
94
+ ("only_one_element",),
95
+ ("user", 123),
96
+ 42,
97
+ None,
98
+ ]
99
+ out = handler.handle_turn("hi", weird, user=None)
100
+ # Should include a normalized user entry and a bot reply at the end
101
+ assert out[-2] == ("user", "hi")
102
+ assert out[-1][0] == "bot"
103
+
104
+
105
+ # ---------- end-to-end mini scriptable checks ----------
106
+
107
+ def test_greet_help_echo_reverse_flow():
108
+ h = []
109
+ h = handler.handle_turn("hi", h, None)
110
+ assert "help" in h[-1][1].lower()
111
+
112
+ h = handler.handle_turn("help", h, None)
113
+ assert "I can:" in h[-1][1]
114
+
115
+ h = handler.handle_turn("echo alpha beta", h, None)
116
+ assert h[-1][1] == "alpha beta"
117
+
118
+ h = handler.handle_turn("reverse zed", h, None)
119
+ assert h[-1][1] == "dez"
120
+
121
 
tests/test_guardrails.py CHANGED
@@ -1,2 +1,40 @@
1
  # /test/test_guardrails.py
2
- def test_guardrails_stub(): assert True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /test/test_guardrails.py
2
+ """
3
+ Guardrail tests:
4
+ - Ensure compliance checker passes (no disallowed deps imported).
5
+ - Ensure anon_bot.rules doesn't produce unsafe replies for empty / bad input.
6
+ """
7
+
8
+ import subprocess
9
+ import sys
10
+ import pathlib
11
+
12
+ import pytest
13
+
14
+ from anon_bot import rules
15
+
16
+
17
+ def test_compliance_script_runs_clean():
18
+ root = pathlib.Path(__file__).resolve().parent.parent
19
+ script = root / "scripts" / "check_compliance.py"
20
+ # Run as a subprocess so we catch real exit code
21
+ proc = subprocess.run([sys.executable, str(script)], capture_output=True, text=True)
22
+ # If it fails, dump output for debugging
23
+ if proc.returncode != 0:
24
+ print(proc.stdout)
25
+ print(proc.stderr, file=sys.stderr)
26
+ assert proc.returncode == 0
27
+
28
+
29
+ @pytest.mark.parametrize("msg", ["", None, " "])
30
+ def test_rules_empty_prompts_are_safe(msg):
31
+ r = rules.reply_for(msg or "", [])
32
+ # Should politely nudge the user, not crash
33
+ assert "Please" in r.text or "help" in r.text.lower()
34
+
35
+
36
+ @pytest.mark.parametrize("msg", ["rm -rf /", "DROP TABLE users;"])
37
+ def test_rules_handles_malicious_looking_input(msg):
38
+ r = rules.reply_for(msg, [])
39
+ # The bot should fall back safely to generic chat response
40
+ assert "Noted" in r.text or "help" in r.text
tests/test_indexer.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /tests/test_indexer.py
2
+ from pathlib import Path
3
+ from memory.rag.data.indexer import TfidfIndex, search, DEFAULT_INDEX_PATH
4
+
5
+ def test_add_and_search(tmp_path: Path):
6
+ p = tmp_path / "a.md"
7
+ p.write_text("Hello world. This is an anonymous chatbot.\nRules are simple.", encoding="utf-8")
8
+ idx = TfidfIndex()
9
+ idx.add_file(p)
10
+ hits = idx.search("anonymous rules", k=5)
11
+ assert hits and hits[0].doc_id == str(p.resolve())
12
+
13
+ def test_persist_and_load(tmp_path: Path):
14
+ p = tmp_path / "index.json"
15
+ idx = TfidfIndex()
16
+ idx.add_text("id1", "cats are great, dogs are cool", meta=__meta("id1"))
17
+ idx.save(p)
18
+ loaded = TfidfIndex.load(p)
19
+ hits = loaded.search("dogs", k=1)
20
+ assert hits and hits[0].doc_id == "id1"
21
+
22
+ def __meta(i: str):
23
+ from memory.rag.data.indexer import DocMeta
24
+ return DocMeta(doc_id=i, source="inline", title=i)
tests/test_logged_in_bot.py CHANGED
@@ -1,2 +1,84 @@
1
  # /test/test_logged_in_bot.py
2
- def test_logged_stub(): assert True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /test/test_logged_in_bot.py
2
+ """
3
+ Tests for logged_in_bot.tools (no Azure required).
4
+ Run: pytest -q
5
+ """
6
+
7
+ import os
8
+ import pytest
9
+
10
+ from logged_in_bot import tools as L
11
+
12
+
13
+ def test_help_route_and_reply():
14
+ resp = L.handle_logged_in_turn("help", history=[], user=None)
15
+ assert isinstance(resp, dict)
16
+ assert "I can:" in resp["reply"]
17
+ assert resp["meta"]["intent"] == "help"
18
+ assert "sentiment" in resp["meta"] # attached even in help path
19
+
20
+
21
+ def test_echo_payload():
22
+ resp = L.handle_logged_in_turn("echo hello world", history=[], user=None)
23
+ assert resp["reply"] == "hello world"
24
+ assert resp["meta"]["intent"] == "echo"
25
+
26
+
27
+ def test_summarize_uses_first_sentence():
28
+ text = "This is the first sentence. This is the second sentence."
29
+ resp = L.handle_logged_in_turn(f"summarize {text}", history=[], user=None)
30
+ # naive summarizer returns the first sentence (possibly truncated)
31
+ assert "first sentence" in resp["reply"]
32
+ assert resp["meta"]["intent"] == "summarize"
33
+ assert "sentiment" in resp["meta"] # sentiment computed on source text
34
+
35
+
36
+ def test_empty_input_prompts_user():
37
+ resp = L.handle_logged_in_turn("", history=[], user=None)
38
+ assert "Please type" in resp["reply"]
39
+ assert resp["meta"]["intent"] == "empty"
40
+
41
+
42
+ def test_general_chat_fallback_and_sentiment():
43
+ resp = L.handle_logged_in_turn("I love this project!", history=[], user=None)
44
+ assert isinstance(resp["reply"], str) and len(resp["reply"]) > 0
45
+ # sentiment present; backend may be "local" or "none" depending on env
46
+ sent = resp["meta"].get("sentiment", {})
47
+ assert sent.get("label") in {"positive", "neutral", "negative", None}
48
+
49
+
50
+ def test_optional_redaction_is_honored(monkeypatch):
51
+ # Monkeypatch optional redactor to simulate PII masking
52
+ monkeypatch.setattr(L, "pii_redact", lambda s: s.replace("555-1234", "[REDACTED]"), raising=False)
53
+ resp = L.handle_logged_in_turn("echo call me at 555-1234", history=[], user=None)
54
+ assert resp["meta"]["redacted"] is True
55
+ assert resp["reply"] == "call me at [REDACTED]"
56
+
57
+
58
+ def test_input_length_cap(monkeypatch):
59
+ # Cap input length to 10 chars; ensure ellipsis added
60
+ monkeypatch.setenv("MAX_INPUT_CHARS", "10")
61
+ long = "echo 1234567890ABCDEFGHIJ"
62
+ resp = L.handle_logged_in_turn(long, history=[], user=None)
63
+ # reply is payload of redacted/sanitized text; should end with ellipsis
64
+ assert resp["reply"].endswith("…") or resp["reply"].endswith("...") # handle different ellipsis if changed
65
+
66
+
67
+ def test_history_pass_through_shape():
68
+ # History should be accepted and not crash; we don't deeply inspect here
69
+ hist = [("user", "prev"), ("bot", "ok")]
70
+ resp = L.handle_logged_in_turn("echo ping", history=hist, user={"id": "u1"})
71
+ assert resp["reply"] == "ping"
72
+ assert isinstance(resp["meta"], dict)
73
+
74
+
75
+ @pytest.mark.parametrize("msg,expected_intent", [
76
+ ("help", "help"),
77
+ ("echo abc", "echo"),
78
+ ("summarize One. Two.", "summarize"),
79
+ ("random chat", "chat"),
80
+ ])
81
+ def test_intent_detection_smoke(msg, expected_intent):
82
+ r = L.handle_logged_in_turn(msg, history=[], user=None)
83
+ assert r["meta"]["intent"] == expected_intent
84
+
tests/test_memory.py CHANGED
@@ -1,2 +1,95 @@
1
  # /test/test_memory.py
2
- def test_memory_stub(): assert True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /test/test_memory.py
2
+ """
3
+ Tests for memory.sessions
4
+ Run: pytest -q
5
+ """
6
+
7
+ import time
8
+ from pathlib import Path
9
+
10
+ from memory import sessions as S
11
+
12
+
13
+ def test_create_and_append_history():
14
+ store = S.SessionStore(ttl_seconds=None, max_history=10)
15
+ sess = store.create(user_id="u1")
16
+ assert sess.session_id
17
+ sid = sess.session_id
18
+
19
+ store.append_user(sid, "hello")
20
+ store.append_bot(sid, "hi there")
21
+ hist = store.get_history(sid)
22
+ assert hist == [("user", "hello"), ("bot", "hi there")]
23
+
24
+ # ensure timestamps update
25
+ before = sess.updated_at
26
+ store.append_user(sid, "next")
27
+ assert store.get(sid).updated_at >= before
28
+
29
+
30
+ def test_max_history_cap():
31
+ store = S.SessionStore(ttl_seconds=None, max_history=3)
32
+ s = store.create()
33
+ sid = s.session_id
34
+
35
+ # 4 appends β†’ only last 3 kept
36
+ store.append_user(sid, "a")
37
+ store.append_bot(sid, "b")
38
+ store.append_user(sid, "c")
39
+ store.append_bot(sid, "d")
40
+ hist = store.get_history(sid)
41
+ assert hist == [("bot", "b"), ("user", "c"), ("bot", "d")]
42
+
43
+
44
+ def test_ttl_sweep_expires_old_sessions():
45
+ store = S.SessionStore(ttl_seconds=0) # expire immediately
46
+ s1 = store.create()
47
+ s2 = store.create()
48
+ # Nudge updated_at into the past
49
+ store._sessions[s1.session_id].updated_at -= 10
50
+ store._sessions[s2.session_id].updated_at -= 10
51
+
52
+ removed = store.sweep()
53
+ assert removed >= 1
54
+ # After sweep, remaining sessions (if any) must be fresh
55
+ for sid in store.all_ids():
56
+ assert not store._expired(store.get(sid))
57
+
58
+
59
+ def test_key_value_store_helpers():
60
+ store = S.SessionStore(ttl_seconds=None)
61
+ s = store.create()
62
+ sid = s.session_id
63
+
64
+ store.set(sid, "mode", "anonymous")
65
+ store.set(sid, "counter", 1)
66
+ assert store.get_value(sid, "mode") == "anonymous"
67
+ assert store.data_dict(sid)["counter"] == 1
68
+
69
+ # get_value default
70
+ assert store.get_value(sid, "missing", default="x") == "x"
71
+
72
+
73
+ def test_persistence_save_and_load(tmp_path: Path):
74
+ p = tmp_path / "sess.json"
75
+
76
+ st1 = S.SessionStore(ttl_seconds=None)
77
+ s = st1.create(user_id="uX")
78
+ st1.append_user(s.session_id, "hello")
79
+ st1.append_bot(s.session_id, "hi")
80
+ st1.save(p)
81
+
82
+ st2 = S.SessionStore.load(p)
83
+ hist2 = st2.get_history(s.session_id)
84
+ assert hist2 == [("user", "hello"), ("bot", "hi")]
85
+ assert st2.get(s.session_id).user_id == "uX"
86
+
87
+
88
+ def test_module_level_singleton_and_helpers():
89
+ s = S.new_session(user_id="alice")
90
+ sid = s.session_id
91
+ S.append_user(sid, "hey")
92
+ S.append_bot(sid, "hello!")
93
+ assert S.history(sid)[-2:] == [("user", "hey"), ("bot", "hello!")]
94
+ S.set_value(sid, "flag", True)
95
+ assert S.get_value(sid, "flag") is True
tests/test_nlu.py CHANGED
@@ -1,2 +1,46 @@
1
  # /test/test_nlu.py
2
- def test_nlu_stub(): assert True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # /test/test_nlu.py
2
+ """
3
+ Basic tests for the NLU pipeline and router.
4
+ Run with: pytest -q
5
+ """
6
+
7
+ import pytest
8
+
9
+ from nlu import pipeline, router
10
+
11
+
12
+ def test_pipeline_greeting():
13
+ out = pipeline.analyze("Hello there")
14
+ assert out["intent"] == "greeting"
15
+ assert out["confidence"] > 0.5
16
+
17
+
18
+ def test_pipeline_general():
19
+ out = pipeline.analyze("completely random utterance")
20
+ assert out["intent"] == "general"
21
+ assert "entities" in out
22
+
23
+
24
+ def test_router_route_and_respond():
25
+ # Route a help query
26
+ r = router.route("Can you help me?")
27
+ assert r["intent"] == "help"
28
+ assert r["action"] == "HELP"
29
+
30
+ reply = router.respond("Can you help me?")
31
+ assert isinstance(reply, str)
32
+ assert "help" in reply.lower()
33
+
34
+
35
+ def test_router_sentiment_positive():
36
+ r = router.route("I love this bot!")
37
+ assert r["intent"] == "sentiment_positive"
38
+ reply = router.respond("I love this bot!")
39
+ assert "glad" in reply.lower() or "hear" in reply.lower()
40
+
41
+
42
+ def test_router_goodbye():
43
+ r = router.route("bye")
44
+ assert r["action"] == "GOODBYE"
45
+ reply = router.respond("bye")
46
+ assert "goodbye" in reply.lower()
tests/test_retriever.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tests/test_retriever.py
2
+ from pathlib import Path
3
+ from memory.rag.data.indexer import TfidfIndex, DocMeta
4
+ from memory.rag.data.retriever import retrieve, Filters
5
+
6
+ def _add(idx, did, text, title=None, tags=None):
7
+ meta = DocMeta(doc_id=did, source="inline", title=title, tags=tags)
8
+ idx.add_text(did, text, meta)
9
+
10
+ def test_retrieve_passage(tmp_path: Path, monkeypatch):
11
+ # Build tiny in-memory index and save
12
+ from memory.rag.data.indexer import DEFAULT_INDEX_PATH
13
+ p = tmp_path / "idx.json"
14
+ from memory.rag.data.indexer import TfidfIndex
15
+ idx = TfidfIndex()
16
+ _add(idx, "d1", "Rules for an anonymous chatbot are simple and fast.", title="Design", tags=["doc","slide"])
17
+ _add(idx, "d2", "This document explains retrieval and index search.", title="RAG", tags=["doc"])
18
+ idx.save(p)
19
+
20
+ # Run retrieval against this saved index
21
+ res = retrieve("anonymous chatbot rules", k=2, index_path=p)
22
+ assert res and any("anonymous" in r.text.lower() for r in res)
23
+
24
+ def test_filters(tmp_path: Path):
25
+ from memory.rag.data.indexer import TfidfIndex
26
+ idx = TfidfIndex()
27
+ _add(idx, "a", "hello world", title="Alpha", tags=["doc","slide"])
28
+ _add(idx, "b", "hello world", title="Beta", tags=["doc"])
29
+ p = tmp_path / "idx.json"
30
+ idx.save(p)
31
+
32
+ f = Filters(title_contains="alpha", require_tags=["doc","slide"])
33
+ res = retrieve("hello", k=5, index_path=p, filters=f)
34
+ assert len(res) == 1 and res[0].title == "Alpha"
tests/test_sessions.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tests/test_sessions.py
2
+ from memory.sessions import SessionStore
3
+
4
+ def test_create_and_history():
5
+ st = SessionStore(ttl_seconds=None, max_history=3)
6
+ s = st.create(user_id="u1")
7
+ st.append_user(s.session_id, "a")
8
+ st.append_bot(s.session_id, "b")
9
+ st.append_user(s.session_id, "c")
10
+ st.append_bot(s.session_id, "d") # caps to last 3
11
+ h = st.get_history(s.session_id)
12
+ assert h == [("bot","b"), ("user","c"), ("bot","d")]
13
+
14
+ def test_save_load(tmp_path):
15
+ st = SessionStore(ttl_seconds=None)
16
+ s = st.create()
17
+ st.append_user(s.session_id, "hello")
18
+ p = tmp_path / "sess.json"
19
+ st.save(p)
20
+ st2 = SessionStore.load(p)
21
+ assert st2.get_history(s.session_id)[0] == ("user","hello")
tree.txt CHANGED
@@ -11,9 +11,6 @@ C:\Users\User\Agentic-Chat-bot-
11
  β”‚ β”œβ”€β”€ handler.py
12
  β”‚ └── rules.py
13
  β”œβ”€β”€ app
14
- β”‚ β”œβ”€β”€ app
15
- β”‚ β”‚ β”œβ”€β”€ app.py
16
- β”‚ β”‚ └── routes.py
17
  β”‚ β”œβ”€β”€ assets
18
  β”‚ β”‚ └── html
19
  β”‚ β”‚ β”œβ”€β”€ agenticcore_frontend.html
@@ -36,7 +33,6 @@ C:\Users\User\Agentic-Chat-bot-
36
  β”‚ β”œβ”€β”€ architecture.md
37
  β”‚ β”œβ”€β”€ design.md
38
  β”‚ β”œβ”€β”€ DEV_DOC.md
39
- β”‚ β”œβ”€β”€ flowchart.png
40
  β”‚ └── results.md
41
  β”œβ”€β”€ examples
42
  β”‚ └── example.py
@@ -84,10 +80,13 @@ C:\Users\User\Agentic-Chat-bot-
84
  β”‚ β”œβ”€β”€ smoke_test.py
85
  β”‚ β”œβ”€β”€ test_anon_bot.py
86
  β”‚ β”œβ”€β”€ test_guardrails.py
 
87
  β”‚ β”œβ”€β”€ test_logged_in_bot.py
88
  β”‚ β”œβ”€β”€ test_memory.py
89
  β”‚ β”œβ”€β”€ test_nlu.py
90
- β”‚ └── test_routes.py
 
 
91
  β”œβ”€β”€ tools
92
  β”‚ └── quick_sanity.py
93
  β”œβ”€β”€ .gitignore
 
11
  β”‚ β”œβ”€β”€ handler.py
12
  β”‚ └── rules.py
13
  β”œβ”€β”€ app
 
 
 
14
  β”‚ β”œβ”€β”€ assets
15
  β”‚ β”‚ └── html
16
  β”‚ β”‚ β”œβ”€β”€ agenticcore_frontend.html
 
33
  β”‚ β”œβ”€β”€ architecture.md
34
  β”‚ β”œβ”€β”€ design.md
35
  β”‚ β”œβ”€β”€ DEV_DOC.md
 
36
  β”‚ └── results.md
37
  β”œβ”€β”€ examples
38
  β”‚ └── example.py
 
80
  β”‚ β”œβ”€β”€ smoke_test.py
81
  β”‚ β”œβ”€β”€ test_anon_bot.py
82
  β”‚ β”œβ”€β”€ test_guardrails.py
83
+ β”‚ β”œβ”€β”€ test_indexer.py
84
  β”‚ β”œβ”€β”€ test_logged_in_bot.py
85
  β”‚ β”œβ”€β”€ test_memory.py
86
  β”‚ β”œβ”€β”€ test_nlu.py
87
+ β”‚ β”œβ”€β”€ test_retriever.py
88
+ β”‚ β”œβ”€β”€ test_routes.py
89
+ β”‚ └── test_sessions.py
90
  β”œβ”€β”€ tools
91
  β”‚ └── quick_sanity.py
92
  β”œβ”€β”€ .gitignore