ruslanmv commited on
Commit
6338f31
·
1 Parent(s): 478dbbd

First version rag

Browse files
app/core/rag/__init__.py ADDED
File without changes
app/core/rag/retriever.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/core/rag/retriever.py
2
+ from __future__ import annotations
3
+ import json, logging
4
+ from pathlib import Path
5
+ from typing import List, Dict, Optional
6
+ import numpy as np
7
+ import faiss
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+ class Retriever:
13
+ def __init__(self, kb_path: str = "data/kb.jsonl",
14
+ model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
15
+ top_k: int = 4):
16
+ self.kb_path = Path(kb_path)
17
+ self.top_k = top_k
18
+ if not self.kb_path.exists():
19
+ raise FileNotFoundError(f"KB file not found: {self.kb_path} (jsonl with {{text,source}})")
20
+ self.model = SentenceTransformer(model_name)
21
+ self.docs: List[Dict[str, str]] = []
22
+ with self.kb_path.open("r", encoding="utf-8") as f:
23
+ for line in f:
24
+ line = line.strip()
25
+ if not line: continue
26
+ self.docs.append(json.loads(line))
27
+ texts = [d["text"] for d in self.docs]
28
+ emb = self.model.encode(texts, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
29
+ self.dim = int(emb.shape[1])
30
+ self.index = faiss.IndexFlatIP(self.dim) # cosine via normalized vectors = dot product
31
+ self.index.add(emb.astype("float32"))
32
+
33
+ def retrieve(self, query: str, k: Optional[int] = None) -> List[Dict]:
34
+ k = k or self.top_k
35
+ vec = self.model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
36
+ D, I = self.index.search(vec.astype("float32"), k)
37
+ out: List[Dict] = []
38
+ for idx, score in zip(I[0], D[0]):
39
+ if int(idx) < 0: continue
40
+ d = self.docs[int(idx)]
41
+ out.append({"text": d["text"], "source": d.get("source", f"kb:{idx}"), "score": float(score)})
42
+ return out
app/main.py CHANGED
@@ -9,6 +9,10 @@ from typing import Any, Dict
9
  from fastapi import FastAPI
10
  from fastapi.responses import RedirectResponse
11
 
 
 
 
 
12
  # -----------------------------------------------------------------------------
13
  # Early: load .env (so HF_TOKEN, ADMIN_TOKEN, etc. are available locally)
14
  # -----------------------------------------------------------------------------
@@ -65,7 +69,6 @@ _load_env_file([".env", "configs/.env", ".env.local", "configs/.env.local"])
65
  # -----------------------------------------------------------------------------
66
  # Middlewares
67
  # -----------------------------------------------------------------------------
68
- # Prefer the canonical package name; if your repo uses "middlewares/", this tries both.
69
  try:
70
  from .middleware import attach_middlewares # singular
71
  except Exception:
@@ -82,7 +85,6 @@ except Exception:
82
  # -----------------------------------------------------------------------------
83
  from .routers import health, plan, chat
84
 
85
- # Optional UI (Home/Chat/Dev). If missing, we gracefully fall back to a JSON root.
86
  try:
87
  from .ui import router as ui_router # type: ignore
88
  HAS_UI = True
@@ -103,9 +105,14 @@ async def lifespan(app: FastAPI):
103
  app.state.started_at = time.time()
104
  app.state.version = os.getenv("APP_VERSION", "1.0.0")
105
 
106
- # Minimal diagnostics; HF_TOKEN presence matters for inference
 
 
 
 
 
107
  hf_token_present = bool(os.getenv("HF_TOKEN"))
108
- logging.getLogger("uvicorn.error").info(
109
  "matrix-ai starting (version=%s, port=%s, hf_token_present=%s)",
110
  app.state.version,
111
  os.getenv("PORT", "7860"),
@@ -115,7 +122,7 @@ async def lifespan(app: FastAPI):
115
  yield
116
  finally:
117
  uptime = time.time() - getattr(app.state, "started_at", time.time())
118
- logging.getLogger("uvicorn.error").info(
119
  "matrix-ai shutting down (uptime=%.2fs)", uptime
120
  )
121
 
@@ -131,19 +138,14 @@ def create_app() -> FastAPI:
131
  lifespan=lifespan,
132
  )
133
 
134
- # Middlewares (request-id, gzip, rate-limit, etc.)
135
  attach_middlewares(app)
136
-
137
- # Core routers
138
  app.include_router(health.router, tags=["Health"])
139
  app.include_router(plan.router, prefix="/v1", tags=["Planning"])
140
  app.include_router(chat.router, prefix="/v1", tags=["Chat"])
141
 
142
- # Optional UI (adds '/', '/chat', '/dev')
143
  if HAS_UI:
144
  app.include_router(ui_router, tags=["UI"])
145
  else:
146
- # Minimal root so HF root probes pass even without UI
147
  @app.get("/", include_in_schema=False)
148
  async def root() -> Dict[str, Any]:
149
  return {
@@ -153,12 +155,9 @@ def create_app() -> FastAPI:
153
  "docs": "/docs",
154
  "endpoints": {"plan": "/v1/plan", "chat": "/v1/chat", "healthz": "/healthz"},
155
  }
156
-
157
  @app.get("/home", include_in_schema=False)
158
  async def home_redirect():
159
  return RedirectResponse(url="/docs", status_code=302)
160
-
161
  return app
162
 
163
-
164
- app = create_app()
 
9
  from fastapi import FastAPI
10
  from fastapi.responses import RedirectResponse
11
 
12
+ # --- ADDED: Import dependencies needed for pre-loading ---
13
+ from .deps import get_settings
14
+ from .services.chat_service import get_retriever
15
+
16
  # -----------------------------------------------------------------------------
17
  # Early: load .env (so HF_TOKEN, ADMIN_TOKEN, etc. are available locally)
18
  # -----------------------------------------------------------------------------
 
69
  # -----------------------------------------------------------------------------
70
  # Middlewares
71
  # -----------------------------------------------------------------------------
 
72
  try:
73
  from .middleware import attach_middlewares # singular
74
  except Exception:
 
85
  # -----------------------------------------------------------------------------
86
  from .routers import health, plan, chat
87
 
 
88
  try:
89
  from .ui import router as ui_router # type: ignore
90
  HAS_UI = True
 
105
  app.state.started_at = time.time()
106
  app.state.version = os.getenv("APP_VERSION", "1.0.0")
107
 
108
+ # --- ADDED: Pre-load the RAG model and index on startup ---
109
+ logger = logging.getLogger("uvicorn.error")
110
+ logger.info("Warming up RAG retriever...")
111
+ get_retriever(get_settings())
112
+ logger.info("RAG retriever is ready.")
113
+
114
  hf_token_present = bool(os.getenv("HF_TOKEN"))
115
+ logger.info(
116
  "matrix-ai starting (version=%s, port=%s, hf_token_present=%s)",
117
  app.state.version,
118
  os.getenv("PORT", "7860"),
 
122
  yield
123
  finally:
124
  uptime = time.time() - getattr(app.state, "started_at", time.time())
125
+ logger.info(
126
  "matrix-ai shutting down (uptime=%.2fs)", uptime
127
  )
128
 
 
138
  lifespan=lifespan,
139
  )
140
 
 
141
  attach_middlewares(app)
 
 
142
  app.include_router(health.router, tags=["Health"])
143
  app.include_router(plan.router, prefix="/v1", tags=["Planning"])
144
  app.include_router(chat.router, prefix="/v1", tags=["Chat"])
145
 
 
146
  if HAS_UI:
147
  app.include_router(ui_router, tags=["UI"])
148
  else:
 
149
  @app.get("/", include_in_schema=False)
150
  async def root() -> Dict[str, Any]:
151
  return {
 
155
  "docs": "/docs",
156
  "endpoints": {"plan": "/v1/plan", "chat": "/v1/chat", "healthz": "/healthz"},
157
  }
 
158
  @app.get("/home", include_in_schema=False)
159
  async def home_redirect():
160
  return RedirectResponse(url="/docs", status_code=302)
 
161
  return app
162
 
163
+ app = create_app()
 
app/routers/chat.py CHANGED
@@ -1,8 +1,12 @@
 
 
 
 
 
1
  from fastapi import APIRouter, Depends, HTTPException, Query
 
 
2
  from starlette.responses import StreamingResponse
3
- from pydantic import BaseModel
4
- from typing import List, Optional, Any, Iterator
5
- import json, time
6
 
7
  from ..deps import get_settings
8
  from ..core.config import Settings
@@ -10,10 +14,12 @@ from ..services.chat_service import ChatService
10
 
11
  router = APIRouter()
12
 
 
13
  class ChatMessage(BaseModel):
14
  role: str
15
  content: str
16
 
 
17
  class ChatRequest(BaseModel):
18
  query: Optional[str] = None
19
  question: Optional[str] = None
@@ -26,13 +32,15 @@ class ChatRequest(BaseModel):
26
  if self.prompt: return self.prompt
27
  if self.messages:
28
  for m in reversed(self.messages):
29
- if m.role.lower() == "user":
30
- return m.content
31
  return self.messages[-1].content
32
  raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
33
 
 
34
  class ChatResponse(BaseModel):
35
  answer: str
 
 
36
 
37
  @router.post("/chat", response_model=ChatResponse)
38
  async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
@@ -42,71 +50,71 @@ async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
42
  raise HTTPException(status_code=422, detail=str(e))
43
  svc = ChatService(settings)
44
  try:
45
- answer = await svc.answer(text)
46
- return ChatResponse(answer=answer)
 
47
  except PermissionError as e:
48
  raise HTTPException(status_code=403, detail=str(e))
49
  except Exception as e:
50
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
51
 
 
52
  @router.get("/chat", response_model=ChatResponse)
53
  async def chat_get(query: str = Query(...), settings: Settings = Depends(get_settings)):
54
  svc = ChatService(settings)
55
  try:
56
- answer = await svc.answer(query)
57
- return ChatResponse(answer=answer)
 
58
  except PermissionError as e:
59
  raise HTTPException(status_code=403, detail=str(e))
60
  except Exception as e:
61
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
62
 
 
63
  # ---------- Streaming (SSE) ----------
64
  def _sse_line(obj: Any) -> str:
65
  payload = obj if isinstance(obj, str) else json.dumps(obj, ensure_ascii=False)
66
  return f"data: {payload}\n\n"
67
 
 
68
  @router.get("/chat/stream")
69
  async def chat_stream(query: str = Query(...), settings: Settings = Depends(get_settings)):
70
- """
71
- SSE stream of token deltas: emits {"delta": "..."} chunks, then final [DONE].
72
- """
73
  svc = ChatService(settings)
74
 
75
- def gen() -> Iterator[str]:
76
- # Anti-buffer padding & immediate ping to force first paint
77
  yield ":" + (" " * 2048) + "\n\n"
78
  yield "event: ping\ndata: 0\n\n"
79
- any_tokens = False
80
  try:
81
- for token in svc.stream_answer(query):
 
 
 
82
  if token:
83
  any_tokens = True
84
  yield _sse_line({"delta": token})
 
85
  if not any_tokens:
86
  yield _sse_line({"delta": ""})
87
  yield _sse_line("[DONE]")
88
- except GeneratorExit:
89
- return
90
  except Exception as e:
91
- try:
92
- yield _sse_line({"error": str(e)})
93
- except Exception:
94
- return
95
 
96
  headers = {
97
- # Critical for proxies/browsers
98
  "Cache-Control": "no-cache, no-transform",
99
- "X-Accel-Buffering": "no", # Nginx
100
  "Connection": "keep-alive",
101
- "Content-Encoding": "identity", # Prevents Starlette gzip from buffering SSE
102
  }
103
  return StreamingResponse(gen(), media_type="text/event-stream; charset=utf-8", headers=headers)
104
 
 
105
  @router.post("/chat/stream")
106
  async def chat_stream_post(req: ChatRequest, settings: Settings = Depends(get_settings)):
107
  try:
108
  q = req.as_text()
109
  except ValueError as e:
110
  raise HTTPException(status_code=422, detail=str(e))
111
- # Reuse GET logic to keep one code path
112
  return await chat_stream(query=q, settings=settings)
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from typing import Any, AsyncIterator, List, Optional
5
+
6
  from fastapi import APIRouter, Depends, HTTPException, Query
7
+ from pydantic import BaseModel, Field
8
+ from starlette.concurrency import run_in_threadpool
9
  from starlette.responses import StreamingResponse
 
 
 
10
 
11
  from ..deps import get_settings
12
  from ..core.config import Settings
 
14
 
15
  router = APIRouter()
16
 
17
+
18
  class ChatMessage(BaseModel):
19
  role: str
20
  content: str
21
 
22
+
23
  class ChatRequest(BaseModel):
24
  query: Optional[str] = None
25
  question: Optional[str] = None
 
32
  if self.prompt: return self.prompt
33
  if self.messages:
34
  for m in reversed(self.messages):
35
+ if m.role.lower() == "user": return m.content
 
36
  return self.messages[-1].content
37
  raise ValueError("Body must include 'query'/'question'/'prompt' or 'messages'")
38
 
39
+
40
  class ChatResponse(BaseModel):
41
  answer: str
42
+ sources: List[str] = Field(default_factory=list)
43
+
44
 
45
  @router.post("/chat", response_model=ChatResponse)
46
  async def chat(req: ChatRequest, settings: Settings = Depends(get_settings)):
 
50
  raise HTTPException(status_code=422, detail=str(e))
51
  svc = ChatService(settings)
52
  try:
53
+ # Run the blocking call in a thread pool to avoid freezing the server
54
+ answer, sources = await run_in_threadpool(svc.answer_with_sources, text)
55
+ return ChatResponse(answer=answer, sources=sources)
56
  except PermissionError as e:
57
  raise HTTPException(status_code=403, detail=str(e))
58
  except Exception as e:
59
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
60
 
61
+
62
  @router.get("/chat", response_model=ChatResponse)
63
  async def chat_get(query: str = Query(...), settings: Settings = Depends(get_settings)):
64
  svc = ChatService(settings)
65
  try:
66
+ # Run the blocking call in a thread pool
67
+ answer, sources = await run_in_threadpool(svc.answer_with_sources, query)
68
+ return ChatResponse(answer=answer, sources=sources)
69
  except PermissionError as e:
70
  raise HTTPException(status_code=403, detail=str(e))
71
  except Exception as e:
72
  raise HTTPException(status_code=502, detail=f"Inference error: {e}")
73
 
74
+
75
  # ---------- Streaming (SSE) ----------
76
  def _sse_line(obj: Any) -> str:
77
  payload = obj if isinstance(obj, str) else json.dumps(obj, ensure_ascii=False)
78
  return f"data: {payload}\n\n"
79
 
80
+
81
  @router.get("/chat/stream")
82
  async def chat_stream(query: str = Query(...), settings: Settings = Depends(get_settings)):
 
 
 
83
  svc = ChatService(settings)
84
 
85
+ async def gen() -> AsyncIterator[str]:
86
+ # Anti-buffer padding and initial ping
87
  yield ":" + (" " * 2048) + "\n\n"
88
  yield "event: ping\ndata: 0\n\n"
89
+
90
  try:
91
+ # Run the blocking retrieval part in a thread pool, then stream the results
92
+ stream_generator = await run_in_threadpool(svc.stream_answer, query)
93
+ any_tokens = False
94
+ for token in stream_generator:
95
  if token:
96
  any_tokens = True
97
  yield _sse_line({"delta": token})
98
+
99
  if not any_tokens:
100
  yield _sse_line({"delta": ""})
101
  yield _sse_line("[DONE]")
 
 
102
  except Exception as e:
103
+ yield _sse_line({"error": str(e)})
 
 
 
104
 
105
  headers = {
 
106
  "Cache-Control": "no-cache, no-transform",
107
+ "X-Accel-Buffering": "no",
108
  "Connection": "keep-alive",
109
+ "Content-Encoding": "identity",
110
  }
111
  return StreamingResponse(gen(), media_type="text/event-stream; charset=utf-8", headers=headers)
112
 
113
+
114
  @router.post("/chat/stream")
115
  async def chat_stream_post(req: ChatRequest, settings: Settings = Depends(get_settings)):
116
  try:
117
  q = req.as_text()
118
  except ValueError as e:
119
  raise HTTPException(status_code=422, detail=str(e))
 
120
  return await chat_stream(query=q, settings=settings)
app/services/chat_service.py CHANGED
@@ -1,36 +1,96 @@
1
  from __future__ import annotations
 
 
 
 
 
 
2
  from ..core.config import Settings
3
  from ..core.inference.client import RouterRequestsClient
 
 
 
4
 
5
  SYSTEM_PROMPT = (
6
  "You are MATRIX-AI, a concise, helpful assistant for the Matrix EcoSystem. "
7
  "Answer clearly and briefly. If unsure, say so."
8
  )
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  class ChatService:
11
  def __init__(self, settings: Settings):
12
  self.settings = settings
13
  self.client = RouterRequestsClient(
14
  model=settings.model.name,
15
  fallback=settings.model.fallback,
16
- provider=settings.model.provider,
17
  max_retries=2,
18
- connect_timeout=10.0,
19
- read_timeout=60.0,
20
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- async def answer(self, query: str) -> str:
23
- # non-stream (compatible with current UI)
24
- return self.client.chat_nonstream(
25
- SYSTEM_PROMPT, query,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  max_tokens=self.settings.model.max_new_tokens,
27
  temperature=self.settings.model.temperature,
28
  )
 
29
 
30
- # Expose a generator for streaming endpoints
31
  def stream_answer(self, query: str):
 
32
  return self.client.chat_stream(
33
- SYSTEM_PROMPT, query,
34
  max_tokens=self.settings.model.max_new_tokens,
35
  temperature=self.settings.model.temperature,
36
- )
 
1
  from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ from typing import List, Tuple
7
+
8
  from ..core.config import Settings
9
  from ..core.inference.client import RouterRequestsClient
10
+ from ..core.rag.retriever import Retriever
11
+
12
+ logger = logging.getLogger(__name__)
13
 
14
  SYSTEM_PROMPT = (
15
  "You are MATRIX-AI, a concise, helpful assistant for the Matrix EcoSystem. "
16
  "Answer clearly and briefly. If unsure, say so."
17
  )
18
 
19
+ # --- Singleton instance for the expensive Retriever class ---
20
+ _retriever_instance: Retriever | None = None
21
+
22
+ def get_retriever(settings: Settings) -> Retriever | None:
23
+ """Initializes and returns a single instance of the Retriever."""
24
+ global _retriever_instance
25
+ if _retriever_instance is not None:
26
+ return _retriever_instance
27
+
28
+ kb_path = os.getenv("RAG_KB_PATH", "data/kb.jsonl")
29
+ try:
30
+ if Path(kb_path).exists():
31
+ _retriever_instance = Retriever(kb_path=kb_path, top_k=settings.rag.top_k)
32
+ logger.info("RAG enabled with KB at %s (top_k=%d)", kb_path, settings.rag.top_k)
33
+ else:
34
+ logger.info("RAG KB not found at %s — running LLM-only.", kb_path)
35
+ except Exception as e:
36
+ logger.warning("RAG disabled (failed to initialize Retriever: %s)", e)
37
+
38
+ return _retriever_instance
39
+
40
+
41
  class ChatService:
42
  def __init__(self, settings: Settings):
43
  self.settings = settings
44
  self.client = RouterRequestsClient(
45
  model=settings.model.name,
46
  fallback=settings.model.fallback,
47
+ provider=getattr(settings.model, "provider", None),
48
  max_retries=2,
 
 
49
  )
50
+ # Get the singleton retriever instance
51
+ self.retriever = get_retriever(settings)
52
+
53
+ def _build_context(self, query: str) -> Tuple[str, List[str]]:
54
+ if not self.retriever:
55
+ return "", []
56
+ docs = self.retriever.retrieve(query, self.settings.rag.top_k)
57
+ if not docs:
58
+ return "", []
59
+ blocks = [f"[{i+1}] {d['text']} (source: {d['source']})" for i, d in enumerate(docs)]
60
+ context = "CONTEXT (use only these facts; if missing, say you don't know):\n" + "\n\n".join(blocks)
61
+ sources = [d["source"] for d in docs]
62
+ return context, sources
63
 
64
+ def _augment(self, query: str) -> Tuple[str, List[str]]:
65
+ """
66
+ Build the final user message (with optional CONTEXT) and return sources.
67
+ """
68
+ ctx, sources = self._build_context(query)
69
+
70
+ # --- THIS IS THE CORRECTED PROMPT ---
71
+ if ctx:
72
+ # New, clearer instruction format
73
+ augmented = f"{ctx}\n\nBased only on the context provided above, answer the following question.\nQuestion: {query}"
74
+ else:
75
+ # If no context, just pass the original query
76
+ augmented = query
77
+
78
+ return augmented, sources
79
+
80
+ # Note: These methods are now called from a thread pool in the router
81
+ def answer_with_sources(self, query: str) -> Tuple[str, List[str]]:
82
+ user_msg, sources = self._augment(query)
83
+ text = self.client.chat_nonstream(
84
+ SYSTEM_PROMPT, user_msg,
85
  max_tokens=self.settings.model.max_new_tokens,
86
  temperature=self.settings.model.temperature,
87
  )
88
+ return text, sources
89
 
 
90
  def stream_answer(self, query: str):
91
+ user_msg, _ = self._augment(query)
92
  return self.client.chat_stream(
93
+ SYSTEM_PROMPT, user_msg,
94
  max_tokens=self.settings.model.max_new_tokens,
95
  temperature=self.settings.model.temperature,
96
+ )
data/kb.jsonl ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text":"MatrixHub is the API and registry for Matrix apps, tracking health and LKG metadata.","source":"kb:matrixhub"}
2
+ {"text":"Matrix Guardian coordinates probes, writes /status to MatrixHub, and asks matrix-ai for plans.","source":"kb:guardian"}
3
+ {"text":"Matrix System 1.0 turns a static registry into an alive, policy-governed, self-healing platform (observe → plan → approve/execute → audit).","source":"kb:overview"}
4
+ {"text":"Core components: Matrix-Hub (API/registry), MatrixDB (Postgres schema), Matrix-Guardian (control plane), Matrix-AI (planning service on Hugging Face).","source":"kb:overview:components"}
5
+ {"text":"End-to-end loop: Guardian probes targets → POST /status to Hub → AI returns a low-risk JSON plan → policy gate decides HITL or Autopilot → actions recorded as events.","source":"kb:overview:loop"}
6
+ {"text":"MatrixHub is the API and registry for Matrix apps, tracking health and LKG metadata.","source":"kb:matrixhub"}
7
+ {"text":"Matrix-Hub Stage-1 routes: GET /apps, GET /apps/{id}, GET /apps/{id}/status, GET /apps/{id}/bundle, POST /apps, POST /status, POST /guardian/approve|reject.","source":"kb:hub:routes:stage1"}
8
+ {"text":"Matrix-Hub Stage-2 (flagged) routes: POST /bundles, GET /advisories, POST /patches/proposals, POST /patches/accept.","source":"kb:hub:routes:stage2"}
9
+ {"text":"New middlewares (additive): in-memory RateLimit (fixed window per IP/route) and Idempotency-Key propagation to request.state.","source":"kb:hub:middleware"}
10
+ {"text":"Security: optional JWT RS256 role checks via JWT_PUBLIC_KEY_PEM; falls back to API token dependency if configured; otherwise dev-mode allow.","source":"kb:hub:security"}
11
+ {"text":"ETag behavior: GET endpoints return weak ETags on stable payloads; If-None-Match yields 304 to save bandwidth and enable client caching.","source":"kb:http:etag"}
12
+ {"text":"Idempotency: POST endpoints accept Idempotency-Key; server persists request body hash and response to avoid duplicate side effects on retries.","source":"kb:http:idempotency"}
13
+ {"text":"Entity additive fields: health_score (float), health_status (text), health_last_checked (timestamptz), lkg_version (text), lkg_digest (text).","source":"kb:hub:models:entity"}
14
+ {"text":"Tables (Hub models): HealthCheck(id, entity_uid, check, result, latency_ms, reasons, ts), Event(id, type, entity_uid, payload, ts), IdempotencyKey(key, route, body_hash, status_code, response_json, created_at), Advisory(id, entity_uid, source, severity, title, description, cve, cvss, detected_at, status, created_at).","source":"kb:hub:models:tables"}
15
+ {"text":"CAS redirection: GET /apps/{id}/bundle issues 307 to CAS_BASE_URL/sha256/{lkg_digest}.","source":"kb:hub:cas"}
16
+ {"text":"Environment flags (Hub): ADVISORIES_ENABLED, PATCHES_FACADE_ENABLED, RATE_LIMIT_PER_MIN, IDEMPOTENCY_ENABLED, CAS_BASE_URL, JWT_PUBLIC_KEY_PEM.","source":"kb:hub:env"}
17
+ {"text":"Bundles push: POST /bundles accepts {entity_uid, digest, version, lkg?}; optionally updates Entity.lkg_* when lkg=true; records event=bundle.push; idempotent.","source":"kb:hub:bundles"}
18
+ {"text":"Patches façade: POST /patches/proposals and /patches/accept record audit events; /patches/accept may set new lkg_version/lkg_digest.","source":"kb:hub:patches:facade"}
19
+ {"text":"Advisories (optional): GET /advisories supports filter by q, entity_uid, severity; paginated with X-Total-Count and ETag.","source":"kb:hub:advisories"}
20
+ {"text":"Matrix Guardian coordinates probes, writes /status to MatrixHub, and asks matrix-ai for plans.","source":"kb:guardian"}
21
+ {"text":"Guardian probes: safe HTTP checks, optional MCP echo handshake; short timeouts, retries, and sandboxed execution by default.","source":"kb:guardian:probes"}
22
+ {"text":"Guardian policy gate: evaluates AI plan risk vs policy thresholds and allowlists; enforces HITL (human-in-the-loop) by default; supports Autopilot when enabled.","source":"kb:guardian:policy"}
23
+ {"text":"Autonomy levels: A0 Manual (observe/plan only), A1 Suggest (propose; auto re-probe/metadata), A2 Safe Autopilot (LKG pin/rollback, cache warm-ups), A3 Extended (sandboxed patchers).","source":"kb:guardian:autonomy"}
24
+ {"text":"Autopilot (LangGraph): multi-agent orchestration with nodes for sensing (probes), planning (matrix-ai), critic, and execution; always emits audit events.","source":"kb:guardian:autopilot"}
25
+ {"text":"Guardian REST (core): GET /healthz, GET /readyz; HITL resume endpoint POST /threads/{thread_id}/resume; optional Autopilot API when AUTOPILOT_API_ENABLED=true.","source":"kb:guardian:api"}
26
+ {"text":"Guardian configuration (key vars): DATABASE_URL, MATRIXHUB_API_BASE, MATRIX_AI_BASE, API_TOKEN, AUTOPILOT_ENABLED, AUTOPILOT_INTERVAL_SEC, AUTOPILOT_POLICY, AUTOPILOT_SAFE_MODE.","source":"kb:guardian:env"}
27
+ {"text":"Matrix-AI service (HF): POST /v1/plan returns short, low-risk JSON plan; POST /v1/chat reserved for RAG Q&A (optional).","source":"kb:ai:overview"}
28
+ {"text":"Matrix-AI safety: PII redaction on prompts, strict JSON schema validation, exponential backoff to HF, structured logging, in-memory rate limiting.","source":"kb:ai:safety"}
29
+ {"text":"Deployment Matrix-AI: recommend Hugging Face Spaces with HF_TOKEN secret; CPU for tests, GPU for larger models.","source":"kb:ai:deploy"}
30
+ {"text":"MatrixDB (Postgres) additive schema: Stage‑1 tables versions, artifacts, health, checks, events; Stage‑2 tables bundles, proposals, optional jobs.","source":"kb:db:tables"}
31
+ {"text":"Indexes (DB): time-series and lookup indexes such as idx_*_app_id_ts, idx_artifacts_sha256, idx_proposals_state; all created IF NOT EXISTS.","source":"kb:db:indexes"}
32
+ {"text":"Why additive patches: zero-downtime upgrades; no destructive ALTER/DROP; feature flags hide Stage‑2 until enabled.","source":"kb:upgrade:rationale"}
33
+ {"text":"Hub patch script: apply_matrixhub_patch_v2.sh creates new routers/middleware and appends guarded blocks to app.py, models.py, config.py, utils/jwt_helper.py, and optionally reqlog.","source":"kb:patch:hub"}
34
+ {"text":"DB patch script: apply_matrixhub_db_patch_v2a.sh drops SQL init files for Stage‑1/2 into db/docker-entrypoint-initdb.d; safe to apply to running clusters via psql.","source":"kb:patch:db"}
35
+ {"text":"Alembic migrations (Hub repo): stage1_health_lkg (entity health/LKG, tables health_check/event/idempotency_key), stage2_addons (advisory).","source":"kb:hub:migrations"}
36
+ {"text":"Events audit trail: every plan/proposal/approval is appended to event(type, entity_uid, payload, ts) for compliance and explainability.","source":"kb:audit:events"}
37
+ {"text":"Checks timeline: HealthCheck captures rolling probe history per entity; use for SLOs and anomaly detection.","source":"kb:observability:checks"}
38
+ {"text":"LKG semantics: last-known-good digest/version allow quick rollback or pin; GET /apps/{id}/bundle redirects to CAS for the pinned artifact.","source":"kb:lkg"}
39
+ {"text":"CAS integration: CAS_BASE_URL points to content-addressed storage; URL format /sha256/{digest}; digest is lowercased.","source":"kb:cas"}
40
+ {"text":"Security posture: JWT roles for write operations (when JWT_PUBLIC_KEY_PEM set); otherwise API tokens or dev-mode; minimal logging of sensitive data.","source":"kb:security:auth"}
41
+ {"text":"Rate limiting: simple in-memory fixed window per (client_ip, route) with default RATE_LIMIT_PER_MIN=600 (override via env).","source":"kb:security:ratelimit"}
42
+ {"text":"Reqlog trace propagation: optional adapter appends trace_id from request.state.request_id into logging context for correlation.","source":"kb:observability:trace"}
43
+ {"text":"Feature flags: Stage‑2 routes guarded by ADVISORIES_ENABLED and PATCHES_FACADE_ENABLED to allow gradual rollout.","source":"kb:featureflags"}
44
+ {"text":"HITL approvals: POST /guardian/approve or /guardian/reject record decision events (202 Accepted) and allow workflows to resume.","source":"kb:hitl"}
45
+ {"text":"Jobs table (optional): lightweight visibility/queue for background workers; not required for Stage‑1; helpful for Autopilot executors.","source":"kb:jobs"}
46
+ {"text":"Deployment topologies: local Docker Compose (Hub+DB+Guardian), cloud k8s for Hub/Guardian with DBaaS, Matrix-AI on HF Space.","source":"kb:deploy:topologies"}
47
+ {"text":"Rollout order: Patch DB → deploy Matrix-AI → patch & redeploy Hub → deploy Guardian → validate flow → enable Stage‑2 flags → optionally enable Autopilot.","source":"kb:deploy:rollout"}
48
+ {"text":"Client contracts: GETs support ETag and X-Total-Count; POSTs support Idempotency-Key; responses are JSON and append-only state updates.","source":"kb:contracts"}
49
+ {"text":"Guardian Autopilot policy file: YAML defining risk thresholds, allowed actions, protected entities, and autonomy level; referenced by AUTOPILOT_POLICY.","source":"kb:guardian:policy:file"}
50
+ {"text":"Safety rails for Autopilot: dry-run mode, global kill-switch, blast-radius caps, rate limits, and human override at any step.","source":"kb:guardian:autopilot:safety"}
51
+ {"text":"Adoption of orphaned apps: detect dead upstreams, pin LKG, mirror artifacts to CAS, open proposal of type 'orphan-adopt' based on policy.","source":"kb:ecosystem:orphanage"}
52
+ {"text":"Consulting value: blueprint for AI-assisted SRE with policy gates, auditability, and staged autonomy; reduces MTTR and operational toil.","source":"kb:value:consulting"}
53
+ {"text":"Community value: portable OSS pattern (HF + FastAPI + Postgres) that runs locally and scales to k8s; additive upgrades encourage safe contribution.","source":"kb:value:community"}
54
+ {"text":"Matrix-AI request contract (/v1/plan): input context summarizing health/last checks; output JSON with {plan_id, steps[], risk, rationale}; strictly validated.","source":"kb:ai:contract"}
55
+ {"text":"Guardian → AI interaction: Guardian sanitizes context, calls /v1/plan with retries/backoff, verifies schema, then emits guardian.plan event.","source":"kb:ai:interaction"}
56
+ {"text":"Hub advisories feed: list potential risks or CVEs associated with entities; consumers can filter by severity and entity_uid.","source":"kb:advisories:semantics"}
57
+ {"text":"Patch proposals workflow: create proposal with diff; on acceptance, optionally update Entity.lkg_* or create jobs; everything audited.","source":"kb:patches:workflow"}
58
+ {"text":"Performance notes: DB indexes optimized for app_uid+ts scans; API supports pagination and caching via ETag to scale reads.","source":"kb:performance"}
59
+ {"text":"Testing strategy: smoke tests for routers; schema migrations idempotent; Autopilot smoke test validates graph assembly and no-ops in safe mode.","source":"kb:testing"}
60
+ {"text":"Observability checklist: ensure health lag SLO, events/sec, 4xx/5xx on APIs, probe latency, plan generation timing, Autopilot actions count.","source":"kb:observability:slo"}
61
+ {"text":"Backup/DR: periodic backups of MatrixDB; versioned CAS artifacts; ability to roll back to previous Hub/Guardian images safely.","source":"kb:operations:dr"}
62
+ {"text":"Compliance: append-only events and idempotent writes help satisfy change-management and audit requirements in regulated environments.","source":"kb:compliance"}
63
+ {"text":"Security hardening: secrets in vault/Space Secrets, minimal scopes, locked egress for probes, signed artifacts, statement timeouts in DB.","source":"kb:security:hardening"}
64
+ {"text":"Known limitations: in-memory rate limiting is single-instance only; for multi-node deploys prefer Redis-based limiter.","source":"kb:limitations:ratelimit"}
65
+ {"text":"Known limitations: Autopilot should start in A1/A2 with conservative policies; sandboxed patchers require extra isolation and tests.","source":"kb:limitations:autopilot"}
66
+ {"text":"API compatibility: patches are additive and use guarded markers; re-running patch scripts is idempotent and safe.","source":"kb:compatibility"}
67
+ {"text":"Versioning: use semantic versioning for entities (versions table) and link artifacts by foreign key; LKG marks last-known-good release.","source":"kb:versioning"}
68
+ {"text":"Data retention suggestion: keep detailed checks for N days, aggregate beyond; events retained longer for audits; configurable per org.","source":"kb:data:retention"}
69
+ {"text":"CLI/SDK consumers: Matrix clients rely on stable Hub contracts, ETags, and LKG semantics to deliver reliable local installs.","source":"kb:clients"}
70
+ {"text":"SRE workflow: measure, propose, approve, execute safe steps, validate via re-probe; everything tracked in events and health.","source":"kb:sre:workflow"}