Spaces:

ruslanmv
/

matrix-ai

Sleeping

App Files Files Community

ruslanmv commited on Sep 27, 2025

Commit

6338f31

1 Parent(s): 478dbbd

First version rag

Browse files

Files changed (6) hide show

app/core/rag/__init__.py +0 -0
app/core/rag/retriever.py +42 -0
app/main.py +13 -14
app/routers/chat.py +34 -26
app/services/chat_service.py +70 -10
data/kb.jsonl +70 -0

app/core/rag/__init__.py ADDED Viewed

File without changes

app/core/rag/retriever.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# app/core/rag/retriever.py
+from __future__ import annotations
+import json, logging
+from pathlib import Path
+from typing import List, Dict, Optional
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+log = logging.getLogger(__name__)
+class Retriever:
+    def __init__(self, kb_path: str = "data/kb.jsonl",
+                 model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
+                 top_k: int = 4):
+        self.kb_path = Path(kb_path)
+        self.top_k = top_k
+        if not self.kb_path.exists():
+            raise FileNotFoundError(f"KB file not found: {self.kb_path} (jsonl with {{text,source}})")
+        self.model = SentenceTransformer(model_name)
+        self.docs: List[Dict[str, str]] = []
+        with self.kb_path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line: continue
+                self.docs.append(json.loads(line))
+        texts = [d["text"] for d in self.docs]
+        emb = self.model.encode(texts, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
+        self.dim = int(emb.shape[1])
+        self.index = faiss.IndexFlatIP(self.dim)   # cosine via normalized vectors = dot product
+        self.index.add(emb.astype("float32"))
+    def retrieve(self, query: str, k: Optional[int] = None) -> List[Dict]:
+        k = k or self.top_k
+        vec = self.model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
+        D, I = self.index.search(vec.astype("float32"), k)
+        out: List[Dict] = []
+        for idx, score in zip(I[0], D[0]):
+            if int(idx) < 0: continue
+            d = self.docs[int(idx)]
+            out.append({"text": d["text"], "source": d.get("source", f"kb:{idx}"), "score": float(score)})
+        return out

app/main.py CHANGED Viewed

@@ -9,6 +9,10 @@ from typing import Any, Dict
 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
 # -----------------------------------------------------------------------------
 # Early: load .env (so HF_TOKEN, ADMIN_TOKEN, etc. are available locally)
 # -----------------------------------------------------------------------------
@@ -65,7 +69,6 @@ _load_env_file([".env", "configs/.env", ".env.local", "configs/.env.local"])
 # -----------------------------------------------------------------------------
 # Middlewares
 # -----------------------------------------------------------------------------
-# Prefer the canonical package name; if your repo uses "middlewares/", this tries both.
 try:
     from .middleware import attach_middlewares  # singular
 except Exception:
@@ -82,7 +85,6 @@ except Exception:
 # -----------------------------------------------------------------------------
 from .routers import health, plan, chat
-# Optional UI (Home/Chat/Dev). If missing, we gracefully fall back to a JSON root.
 try:
     from .ui import router as ui_router  # type: ignore
     HAS_UI = True
@@ -103,9 +105,14 @@ async def lifespan(app: FastAPI):
     app.state.started_at = time.time()
     app.state.version = os.getenv("APP_VERSION", "1.0.0")
-    # Minimal diagnostics; HF_TOKEN presence matters for inference
     hf_token_present = bool(os.getenv("HF_TOKEN"))
-    logging.getLogger("uvicorn.error").info(
         "matrix-ai starting (version=%s, port=%s, hf_token_present=%s)",
         app.state.version,
         os.getenv("PORT", "7860"),
@@ -115,7 +122,7 @@ async def lifespan(app: FastAPI):
         yield
     finally:
         uptime = time.time() - getattr(app.state, "started_at", time.time())
-        logging.getLogger("uvicorn.error").info(
             "matrix-ai shutting down (uptime=%.2fs)", uptime
         )
@@ -131,19 +138,14 @@ def create_app() -> FastAPI:
         lifespan=lifespan,
     )
-    # Middlewares (request-id, gzip, rate-limit, etc.)
     attach_middlewares(app)
-    # Core routers
     app.include_router(health.router, tags=["Health"])
     app.include_router(plan.router, prefix="/v1", tags=["Planning"])
     app.include_router(chat.router, prefix="/v1", tags=["Chat"])
-    # Optional UI (adds '/', '/chat', '/dev')
     if HAS_UI:
         app.include_router(ui_router, tags=["UI"])
     else:
-        # Minimal root so HF root probes pass even without UI
         @app.get("/", include_in_schema=False)
         async def root() -> Dict[str, Any]:
             return {
@@ -153,12 +155,9 @@ def create_app() -> FastAPI:
                 "docs": "/docs",
                 "endpoints": {"plan": "/v1/plan", "chat": "/v1/chat", "healthz": "/healthz"},
             }
         @app.get("/home", include_in_schema=False)
         async def home_redirect():
             return RedirectResponse(url="/docs", status_code=302)
     return app
-app = create_app()

 from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
+# --- ADDED: Import dependencies needed for pre-loading ---
+from .deps import get_settings
+from .services.chat_service import get_retriever
 # -----------------------------------------------------------------------------
 # Early: load .env (so HF_TOKEN, ADMIN_TOKEN, etc. are available locally)
 # -----------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
 # Middlewares
 # -----------------------------------------------------------------------------
 try:
     from .middleware import attach_middlewares  # singular
 except Exception:
 # -----------------------------------------------------------------------------
 from .routers import health, plan, chat
 try:
     from .ui import router as ui_router  # type: ignore
     HAS_UI = True
     app.state.started_at = time.time()
     app.state.version = os.getenv("APP_VERSION", "1.0.0")
+    # --- ADDED: Pre-load the RAG model and index on startup ---
+    logger = logging.getLogger("uvicorn.error")
+    logger.info("Warming up RAG retriever...")
+    get_retriever(get_settings())
+    logger.info("RAG retriever is ready.")
     hf_token_present = bool(os.getenv("HF_TOKEN"))
+    logger.info(
         "matrix-ai starting (version=%s, port=%s, hf_token_present=%s)",
         app.state.version,
         os.getenv("PORT", "7860"),
         yield
     finally:
         uptime = time.time() - getattr(app.state, "started_at", time.time())
+        logger.info(
             "matrix-ai shutting down (uptime=%.2fs)", uptime
         )
         lifespan=lifespan,
     )
     attach_middlewares(app)
     app.include_router(health.router, tags=["Health"])
     app.include_router(plan.router, prefix="/v1", tags=["Planning"])
     app.include_router(chat.router, prefix="/v1", tags=["Chat"])
     if HAS_UI:
         app.include_router(ui_router, tags=["UI"])
     else:
         @app.get("/", include_in_schema=False)
         async def root() -> Dict[str, Any]:
             return {
                 "docs": "/docs",
                 "endpoints": {"plan": "/v1/plan", "chat": "/v1/chat", "healthz": "/healthz"},
             }
         @app.get("/home", include_in_schema=False)
         async def home_redirect():
             return RedirectResponse(url="/docs", status_code=302)
     return app
+app = create_app()

app/routers/chat.py CHANGED Viewed

@@ -1,8 +1,12 @@
 from fastapi import APIRouter, Depends, HTTPException, Query
 from starlette.responses import StreamingResponse
-from pydantic import BaseModel
-from typing import List, Optional, Any, Iterator
-import json, time
 from ..deps import get_settings
 from ..core.config import Settings
@@ -10,10 +14,12 @@ from ..services.chat_service import ChatService
 router = APIRouter()
 class ChatMessage(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
     query: Optional[str] = None
     question: Optional[str] = None
@@ -26,13 +32,15 @@ class ChatRequest(BaseModel):
         if self.prompt: return self.prompt
         if self.messages:
             for m in reversed(self.messages):
-                if m.role.lower() == "user":
-                    return m.content
             return self.messages[-1].content
         raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
 class ChatResponse(BaseModel):
     answer: str
 @router.post("/chat", response_model=ChatResponse)
 async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
@@ -42,71 +50,71 @@ async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
         raise HTTPException(status_code=422, detail=str(e))
     svc = ChatService(settings)
     try:
-        answer = await svc.answer(text)
-        return ChatResponse(answer=answer)
     except PermissionError as e:
         raise HTTPException(status_code=403, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 @router.get("/chat", response_model=ChatResponse)
 async def chat_get(query: str = Query(...), settings: Settings = Depends(get_settings)):
     svc = ChatService(settings)
     try:
-        answer = await svc.answer(query)
-        return ChatResponse(answer=answer)
     except PermissionError as e:
         raise HTTPException(status_code=403, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 # ---------- Streaming (SSE) ----------
 def _sse_line(obj: Any) -> str:
     payload = obj if isinstance(obj, str) else json.dumps(obj, ensure_ascii=False)
     return f"data: {payload}\n\n"
 @router.get("/chat/stream")
 async def chat_stream(query: str = Query(...), settings: Settings = Depends(get_settings)):
-    """
-    SSE stream of token deltas: emits {"delta": "..."} chunks, then final [DONE].
-    """
     svc = ChatService(settings)
-    def gen() -> Iterator[str]:
-        # Anti-buffer padding & immediate ping to force first paint
         yield ":" + (" " * 2048) + "\n\n"
         yield "event: ping\ndata: 0\n\n"
-        any_tokens = False
         try:
-            for token in svc.stream_answer(query):
                 if token:
                     any_tokens = True
                     yield _sse_line({"delta": token})
             if not any_tokens:
                 yield _sse_line({"delta": ""})
             yield _sse_line("[DONE]")
-        except GeneratorExit:
-            return
         except Exception as e:
-            try:
-                yield _sse_line({"error": str(e)})
-            except Exception:
-                return
     headers = {
-        # Critical for proxies/browsers
         "Cache-Control": "no-cache, no-transform",
-        "X-Accel-Buffering": "no",          # Nginx
         "Connection": "keep-alive",
-        "Content-Encoding": "identity",     # Prevents Starlette gzip from buffering SSE
     }
     return StreamingResponse(gen(), media_type="text/event-stream; charset=utf-8", headers=headers)
 @router.post("/chat/stream")
 async def chat_stream_post(req: ChatRequest, settings: Settings = Depends(get_settings)):
     try:
         q = req.as_text()
     except ValueError as e:
         raise HTTPException(status_code=422, detail=str(e))
-    # Reuse GET logic to keep one code path
     return await chat_stream(query=q, settings=settings)

+from __future__ import annotations
+import json
+from typing import Any, AsyncIterator, List, Optional
 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel, Field
+from starlette.concurrency import run_in_threadpool
 from starlette.responses import StreamingResponse
 from ..deps import get_settings
 from ..core.config import Settings
 router = APIRouter()
 class ChatMessage(BaseModel):
     role: str
     content: str
 class ChatRequest(BaseModel):
     query: Optional[str] = None
     question: Optional[str] = None
         if self.prompt: return self.prompt
         if self.messages:
             for m in reversed(self.messages):
+                if m.role.lower() == "user": return m.content
             return self.messages[-1].content
         raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
 class ChatResponse(BaseModel):
     answer: str
+    sources: List[str] = Field(default_factory=list)
 @router.post("/chat", response_model=ChatResponse)
 async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
         raise HTTPException(status_code=422, detail=str(e))
     svc = ChatService(settings)
     try:
+        # Run the blocking call in a thread pool to avoid freezing the server
+        answer, sources = await run_in_threadpool(svc.answer_with_sources, text)
+        return ChatResponse(answer=answer, sources=sources)
     except PermissionError as e:
         raise HTTPException(status_code=403, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 @router.get("/chat", response_model=ChatResponse)
 async def chat_get(query: str = Query(...), settings: Settings = Depends(get_settings)):
     svc = ChatService(settings)
     try:
+        # Run the blocking call in a thread pool
+        answer, sources = await run_in_threadpool(svc.answer_with_sources, query)
+        return ChatResponse(answer=answer, sources=sources)
     except PermissionError as e:
         raise HTTPException(status_code=403, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=502, detail=f"Inference error: {e}")
 # ---------- Streaming (SSE) ----------
 def _sse_line(obj: Any) -> str:
     payload = obj if isinstance(obj, str) else json.dumps(obj, ensure_ascii=False)
     return f"data: {payload}\n\n"
 @router.get("/chat/stream")
 async def chat_stream(query: str = Query(...), settings: Settings = Depends(get_settings)):
     svc = ChatService(settings)
+    async def gen() -> AsyncIterator[str]:
+        # Anti-buffer padding and initial ping
         yield ":" + (" " * 2048) + "\n\n"
         yield "event: ping\ndata: 0\n\n"
         try:
+            # Run the blocking retrieval part in a thread pool, then stream the results
+            stream_generator = await run_in_threadpool(svc.stream_answer, query)
+            any_tokens = False
+            for token in stream_generator:
                 if token:
                     any_tokens = True
                     yield _sse_line({"delta": token})
             if not any_tokens:
                 yield _sse_line({"delta": ""})
             yield _sse_line("[DONE]")
         except Exception as e:
+            yield _sse_line({"error": str(e)})
     headers = {
         "Cache-Control": "no-cache, no-transform",
+        "X-Accel-Buffering": "no",
         "Connection": "keep-alive",
+        "Content-Encoding": "identity",
     }
     return StreamingResponse(gen(), media_type="text/event-stream; charset=utf-8", headers=headers)
 @router.post("/chat/stream")
 async def chat_stream_post(req: ChatRequest, settings: Settings = Depends(get_settings)):
     try:
         q = req.as_text()
     except ValueError as e:
         raise HTTPException(status_code=422, detail=str(e))
     return await chat_stream(query=q, settings=settings)

app/services/chat_service.py CHANGED Viewed

@@ -1,36 +1,96 @@
 from __future__ import annotations
 from ..core.config import Settings
 from ..core.inference.client import RouterRequestsClient
 SYSTEM_PROMPT = (
     "You are MATRIX-AI, a concise, helpful assistant for the Matrix EcoSystem. "
     "Answer clearly and briefly. If unsure, say so."
 )
 class ChatService:
     def __init__(self, settings: Settings):
         self.settings = settings
         self.client = RouterRequestsClient(
             model=settings.model.name,
             fallback=settings.model.fallback,
-            provider=settings.model.provider,
             max_retries=2,
-            connect_timeout=10.0,
-            read_timeout=60.0,
         )
-    async def answer(self, query: str) -> str:
-        # non-stream (compatible with current UI)
-        return self.client.chat_nonstream(
-            SYSTEM_PROMPT, query,
             max_tokens=self.settings.model.max_new_tokens,
             temperature=self.settings.model.temperature,
         )
-    # Expose a generator for streaming endpoints
     def stream_answer(self, query: str):
         return self.client.chat_stream(
-            SYSTEM_PROMPT, query,
             max_tokens=self.settings.model.max_new_tokens,
             temperature=self.settings.model.temperature,
-        )

 from __future__ import annotations
+import logging
+import os
+from pathlib import Path
+from typing import List, Tuple
 from ..core.config import Settings
 from ..core.inference.client import RouterRequestsClient
+from ..core.rag.retriever import Retriever
+logger = logging.getLogger(__name__)
 SYSTEM_PROMPT = (
     "You are MATRIX-AI, a concise, helpful assistant for the Matrix EcoSystem. "
     "Answer clearly and briefly. If unsure, say so."
 )
+# --- Singleton instance for the expensive Retriever class ---
+_retriever_instance: Retriever | None = None
+def get_retriever(settings: Settings) -> Retriever | None:
+    """Initializes and returns a single instance of the Retriever."""
+    global _retriever_instance
+    if _retriever_instance is not None:
+        return _retriever_instance
+    kb_path = os.getenv("RAG_KB_PATH", "data/kb.jsonl")
+    try:
+        if Path(kb_path).exists():
+            _retriever_instance = Retriever(kb_path=kb_path, top_k=settings.rag.top_k)
+            logger.info("RAG enabled with KB at %s (top_k=%d)", kb_path, settings.rag.top_k)
+        else:
+            logger.info("RAG KB not found at %s — running LLM-only.", kb_path)
+    except Exception as e:
+        logger.warning("RAG disabled (failed to initialize Retriever: %s)", e)
+    return _retriever_instance
 class ChatService:
     def __init__(self, settings: Settings):
         self.settings = settings
         self.client = RouterRequestsClient(
             model=settings.model.name,
             fallback=settings.model.fallback,
+            provider=getattr(settings.model, "provider", None),
             max_retries=2,
         )
+        # Get the singleton retriever instance
+        self.retriever = get_retriever(settings)
+    def _build_context(self, query: str) -> Tuple[str, List[str]]:
+        if not self.retriever:
+            return "", []
+        docs = self.retriever.retrieve(query, self.settings.rag.top_k)
+        if not docs:
+            return "", []
+        blocks = [f"[{i+1}] {d['text']} (source: {d['source']})" for i, d in enumerate(docs)]
+        context = "CONTEXT (use only these facts; if missing, say you don't know):\n" + "\n\n".join(blocks)
+        sources = [d["source"] for d in docs]
+        return context, sources
+    def _augment(self, query: str) -> Tuple[str, List[str]]:
+        """
+        Build the final user message (with optional CONTEXT) and return sources.
+        """
+        ctx, sources = self._build_context(query)
+        # --- THIS IS THE CORRECTED PROMPT ---
+        if ctx:
+            # New, clearer instruction format
+            augmented = f"{ctx}\n\nBased only on the context provided above, answer the following question.\nQuestion: {query}"
+        else:
+            # If no context, just pass the original query
+            augmented = query
+        return augmented, sources
+    # Note: These methods are now called from a thread pool in the router
+    def answer_with_sources(self, query: str) -> Tuple[str, List[str]]:
+        user_msg, sources = self._augment(query)
+        text = self.client.chat_nonstream(
+            SYSTEM_PROMPT, user_msg,
             max_tokens=self.settings.model.max_new_tokens,
             temperature=self.settings.model.temperature,
         )
+        return text, sources
     def stream_answer(self, query: str):
+        user_msg, _ = self._augment(query)
         return self.client.chat_stream(
+            SYSTEM_PROMPT, user_msg,
             max_tokens=self.settings.model.max_new_tokens,
             temperature=self.settings.model.temperature,
+        )

data/kb.jsonl ADDED Viewed

	@@ -0,0 +1,70 @@

+{"text":"MatrixHub is the API and registry for Matrix apps, tracking health and LKG metadata.","source":"kb:matrixhub"}
+{"text":"Matrix Guardian coordinates probes, writes /status to MatrixHub, and asks matrix-ai for plans.","source":"kb:guardian"}
+{"text":"Matrix System 1.0 turns a static registry into an alive, policy-governed, self-healing platform (observe → plan → approve/execute → audit).","source":"kb:overview"}
+{"text":"Core components: Matrix-Hub (API/registry), MatrixDB (Postgres schema), Matrix-Guardian (control plane), Matrix-AI (planning service on Hugging Face).","source":"kb:overview:components"}
+{"text":"End-to-end loop: Guardian probes targets → POST /status to Hub → AI returns a low-risk JSON plan → policy gate decides HITL or Autopilot → actions recorded as events.","source":"kb:overview:loop"}
+{"text":"MatrixHub is the API and registry for Matrix apps, tracking health and LKG metadata.","source":"kb:matrixhub"}
+{"text":"Matrix-Hub Stage-1 routes: GET /apps, GET /apps/{id}, GET /apps/{id}/status, GET /apps/{id}/bundle, POST /apps, POST /status, POST /guardian/approve|reject.","source":"kb:hub:routes:stage1"}
+{"text":"Matrix-Hub Stage-2 (flagged) routes: POST /bundles, GET /advisories, POST /patches/proposals, POST /patches/accept.","source":"kb:hub:routes:stage2"}
+{"text":"New middlewares (additive): in-memory RateLimit (fixed window per IP/route) and Idempotency-Key propagation to request.state.","source":"kb:hub:middleware"}
+{"text":"Security: optional JWT RS256 role checks via JWT_PUBLIC_KEY_PEM; falls back to API token dependency if configured; otherwise dev-mode allow.","source":"kb:hub:security"}
+{"text":"ETag behavior: GET endpoints return weak ETags on stable payloads; If-None-Match yields 304 to save bandwidth and enable client caching.","source":"kb:http:etag"}
+{"text":"Idempotency: POST endpoints accept Idempotency-Key; server persists request body hash and response to avoid duplicate side effects on retries.","source":"kb:http:idempotency"}
+{"text":"Entity additive fields: health_score (float), health_status (text), health_last_checked (timestamptz), lkg_version (text), lkg_digest (text).","source":"kb:hub:models:entity"}
+{"text":"Tables (Hub models): HealthCheck(id, entity_uid, check, result, latency_ms, reasons, ts), Event(id, type, entity_uid, payload, ts), IdempotencyKey(key, route, body_hash, status_code, response_json, created_at), Advisory(id, entity_uid, source, severity, title, description, cve, cvss, detected_at, status, created_at).","source":"kb:hub:models:tables"}
+{"text":"CAS redirection: GET /apps/{id}/bundle issues 307 to CAS_BASE_URL/sha256/{lkg_digest}.","source":"kb:hub:cas"}
+{"text":"Environment flags (Hub): ADVISORIES_ENABLED, PATCHES_FACADE_ENABLED, RATE_LIMIT_PER_MIN, IDEMPOTENCY_ENABLED, CAS_BASE_URL, JWT_PUBLIC_KEY_PEM.","source":"kb:hub:env"}
+{"text":"Bundles push: POST /bundles accepts {entity_uid, digest, version, lkg?}; optionally updates Entity.lkg_* when lkg=true; records event=bundle.push; idempotent.","source":"kb:hub:bundles"}
+{"text":"Patches façade: POST /patches/proposals and /patches/accept record audit events; /patches/accept may set new lkg_version/lkg_digest.","source":"kb:hub:patches:facade"}
+{"text":"Advisories (optional): GET /advisories supports filter by q, entity_uid, severity; paginated with X-Total-Count and ETag.","source":"kb:hub:advisories"}
+{"text":"Matrix Guardian coordinates probes, writes /status to MatrixHub, and asks matrix-ai for plans.","source":"kb:guardian"}
+{"text":"Guardian probes: safe HTTP checks, optional MCP echo handshake; short timeouts, retries, and sandboxed execution by default.","source":"kb:guardian:probes"}
+{"text":"Guardian policy gate: evaluates AI plan risk vs policy thresholds and allowlists; enforces HITL (human-in-the-loop) by default; supports Autopilot when enabled.","source":"kb:guardian:policy"}
+{"text":"Autonomy levels: A0 Manual (observe/plan only), A1 Suggest (propose; auto re-probe/metadata), A2 Safe Autopilot (LKG pin/rollback, cache warm-ups), A3 Extended (sandboxed patchers).","source":"kb:guardian:autonomy"}
+{"text":"Autopilot (LangGraph): multi-agent orchestration with nodes for sensing (probes), planning (matrix-ai), critic, and execution; always emits audit events.","source":"kb:guardian:autopilot"}
+{"text":"Guardian REST (core): GET /healthz, GET /readyz; HITL resume endpoint POST /threads/{thread_id}/resume; optional Autopilot API when AUTOPILOT_API_ENABLED=true.","source":"kb:guardian:api"}
+{"text":"Guardian configuration (key vars): DATABASE_URL, MATRIXHUB_API_BASE, MATRIX_AI_BASE, API_TOKEN, AUTOPILOT_ENABLED, AUTOPILOT_INTERVAL_SEC, AUTOPILOT_POLICY, AUTOPILOT_SAFE_MODE.","source":"kb:guardian:env"}
+{"text":"Matrix-AI service (HF): POST /v1/plan returns short, low-risk JSON plan; POST /v1/chat reserved for RAG Q&A (optional).","source":"kb:ai:overview"}
+{"text":"Matrix-AI safety: PII redaction on prompts, strict JSON schema validation, exponential backoff to HF, structured logging, in-memory rate limiting.","source":"kb:ai:safety"}
+{"text":"Deployment Matrix-AI: recommend Hugging Face Spaces with HF_TOKEN secret; CPU for tests, GPU for larger models.","source":"kb:ai:deploy"}
+{"text":"MatrixDB (Postgres) additive schema: Stage‑1 tables versions, artifacts, health, checks, events; Stage‑2 tables bundles, proposals, optional jobs.","source":"kb:db:tables"}
+{"text":"Indexes (DB): time-series and lookup indexes such as idx_*_app_id_ts, idx_artifacts_sha256, idx_proposals_state; all created IF NOT EXISTS.","source":"kb:db:indexes"}
+{"text":"Why additive patches: zero-downtime upgrades; no destructive ALTER/DROP; feature flags hide Stage‑2 until enabled.","source":"kb:upgrade:rationale"}
+{"text":"Hub patch script: apply_matrixhub_patch_v2.sh creates new routers/middleware and appends guarded blocks to app.py, models.py, config.py, utils/jwt_helper.py, and optionally reqlog.","source":"kb:patch:hub"}
+{"text":"DB patch script: apply_matrixhub_db_patch_v2a.sh drops SQL init files for Stage‑1/2 into db/docker-entrypoint-initdb.d; safe to apply to running clusters via psql.","source":"kb:patch:db"}
+{"text":"Alembic migrations (Hub repo): stage1_health_lkg (entity health/LKG, tables health_check/event/idempotency_key), stage2_addons (advisory).","source":"kb:hub:migrations"}
+{"text":"Events audit trail: every plan/proposal/approval is appended to event(type, entity_uid, payload, ts) for compliance and explainability.","source":"kb:audit:events"}
+{"text":"Checks timeline: HealthCheck captures rolling probe history per entity; use for SLOs and anomaly detection.","source":"kb:observability:checks"}
+{"text":"LKG semantics: last-known-good digest/version allow quick rollback or pin; GET /apps/{id}/bundle redirects to CAS for the pinned artifact.","source":"kb:lkg"}
+{"text":"CAS integration: CAS_BASE_URL points to content-addressed storage; URL format /sha256/{digest}; digest is lowercased.","source":"kb:cas"}
+{"text":"Security posture: JWT roles for write operations (when JWT_PUBLIC_KEY_PEM set); otherwise API tokens or dev-mode; minimal logging of sensitive data.","source":"kb:security:auth"}
+{"text":"Rate limiting: simple in-memory fixed window per (client_ip, route) with default RATE_LIMIT_PER_MIN=600 (override via env).","source":"kb:security:ratelimit"}
+{"text":"Reqlog trace propagation: optional adapter appends trace_id from request.state.request_id into logging context for correlation.","source":"kb:observability:trace"}
+{"text":"Feature flags: Stage‑2 routes guarded by ADVISORIES_ENABLED and PATCHES_FACADE_ENABLED to allow gradual rollout.","source":"kb:featureflags"}
+{"text":"HITL approvals: POST /guardian/approve or /guardian/reject record decision events (202 Accepted) and allow workflows to resume.","source":"kb:hitl"}
+{"text":"Jobs table (optional): lightweight visibility/queue for background workers; not required for Stage‑1; helpful for Autopilot executors.","source":"kb:jobs"}
+{"text":"Deployment topologies: local Docker Compose (Hub+DB+Guardian), cloud k8s for Hub/Guardian with DBaaS, Matrix-AI on HF Space.","source":"kb:deploy:topologies"}
+{"text":"Rollout order: Patch DB → deploy Matrix-AI → patch & redeploy Hub → deploy Guardian → validate flow → enable Stage‑2 flags → optionally enable Autopilot.","source":"kb:deploy:rollout"}
+{"text":"Client contracts: GETs support ETag and X-Total-Count; POSTs support Idempotency-Key; responses are JSON and append-only state updates.","source":"kb:contracts"}
+{"text":"Guardian Autopilot policy file: YAML defining risk thresholds, allowed actions, protected entities, and autonomy level; referenced by AUTOPILOT_POLICY.","source":"kb:guardian:policy:file"}
+{"text":"Safety rails for Autopilot: dry-run mode, global kill-switch, blast-radius caps, rate limits, and human override at any step.","source":"kb:guardian:autopilot:safety"}
+{"text":"Adoption of orphaned apps: detect dead upstreams, pin LKG, mirror artifacts to CAS, open proposal of type 'orphan-adopt' based on policy.","source":"kb:ecosystem:orphanage"}
+{"text":"Consulting value: blueprint for AI-assisted SRE with policy gates, auditability, and staged autonomy; reduces MTTR and operational toil.","source":"kb:value:consulting"}
+{"text":"Community value: portable OSS pattern (HF + FastAPI + Postgres) that runs locally and scales to k8s; additive upgrades encourage safe contribution.","source":"kb:value:community"}
+{"text":"Matrix-AI request contract (/v1/plan): input context summarizing health/last checks; output JSON with {plan_id, steps[], risk, rationale}; strictly validated.","source":"kb:ai:contract"}
+{"text":"Guardian → AI interaction: Guardian sanitizes context, calls /v1/plan with retries/backoff, verifies schema, then emits guardian.plan event.","source":"kb:ai:interaction"}
+{"text":"Hub advisories feed: list potential risks or CVEs associated with entities; consumers can filter by severity and entity_uid.","source":"kb:advisories:semantics"}
+{"text":"Patch proposals workflow: create proposal with diff; on acceptance, optionally update Entity.lkg_* or create jobs; everything audited.","source":"kb:patches:workflow"}
+{"text":"Performance notes: DB indexes optimized for app_uid+ts scans; API supports pagination and caching via ETag to scale reads.","source":"kb:performance"}
+{"text":"Testing strategy: smoke tests for routers; schema migrations idempotent; Autopilot smoke test validates graph assembly and no-ops in safe mode.","source":"kb:testing"}
+{"text":"Observability checklist: ensure health lag SLO, events/sec, 4xx/5xx on APIs, probe latency, plan generation timing, Autopilot actions count.","source":"kb:observability:slo"}
+{"text":"Backup/DR: periodic backups of MatrixDB; versioned CAS artifacts; ability to roll back to previous Hub/Guardian images safely.","source":"kb:operations:dr"}
+{"text":"Compliance: append-only events and idempotent writes help satisfy change-management and audit requirements in regulated environments.","source":"kb:compliance"}
+{"text":"Security hardening: secrets in vault/Space Secrets, minimal scopes, locked egress for probes, signed artifacts, statement timeouts in DB.","source":"kb:security:hardening"}
+{"text":"Known limitations: in-memory rate limiting is single-instance only; for multi-node deploys prefer Redis-based limiter.","source":"kb:limitations:ratelimit"}
+{"text":"Known limitations: Autopilot should start in A1/A2 with conservative policies; sandboxed patchers require extra isolation and tests.","source":"kb:limitations:autopilot"}
+{"text":"API compatibility: patches are additive and use guarded markers; re-running patch scripts is idempotent and safe.","source":"kb:compatibility"}
+{"text":"Versioning: use semantic versioning for entities (versions table) and link artifacts by foreign key; LKG marks last-known-good release.","source":"kb:versioning"}
+{"text":"Data retention suggestion: keep detailed checks for N days, aggregate beyond; events retained longer for audits; configurable per org.","source":"kb:data:retention"}
+{"text":"CLI/SDK consumers: Matrix clients rely on stable Hub contracts, ETags, and LKG semantics to deliver reliable local installs.","source":"kb:clients"}
+{"text":"SRE workflow: measure, propose, approve, execute safe steps, validate via re-probe; everything tracked in events and health.","source":"kb:sre:workflow"}