ruslanmv commited on
Commit
478dbbd
·
1 Parent(s): 0e040d4

First working version with plan

Browse files
app/core/prompts/plan.txt CHANGED
@@ -1,8 +1,17 @@
1
- You are Matrix-AI, an expert system that produces short, safe, and auditable remediation plans for software services.
2
 
3
- Your constraints are:
4
- 1. You must return a response in strictly JSON format.
5
- 2. The plan must not exceed the `max_steps` constraint.
6
- 3. Prioritize actions that are non-destructive, such as re-running health probes, pinning to a last-known-good (LKG) version, or running diagnostic tools in a sandbox.
7
- 4. The explanation should be a single, concise sentence.
8
- 5. The output JSON must have these exact keys: `plan_id`, `steps`, `risk`, `explanation`.
 
 
 
 
 
 
 
 
 
 
1
+ You are MATRIX-AI Planner.
2
 
3
+ Return ONLY a single JSON object. Do not include backticks, code fences, Markdown, or any prose.
4
+ The JSON MUST match this schema exactly:
5
+
6
+ {
7
+ "plan_id": "<string>",
8
+ "steps": ["<string>", "..."],
9
+ "risk": "low" | "medium" | "high",
10
+ "explanation": "<string>"
11
+ }
12
+
13
+ Rules:
14
+ - Keep steps short, safe, and auditable (1–3 steps).
15
+ - Prefer low risk actions.
16
+ - Do not add any extra keys.
17
+ - Start your reply with '{' and end with '}'.
app/core/schema.py CHANGED
@@ -1,31 +1,71 @@
1
- from pydantic import BaseModel, Field
2
- from typing import List, Optional, Literal
3
 
4
- Mode = Literal["plan", "summary", "patch-diff"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- class PlanConstraints(BaseModel):
7
- risk: Optional[str] = "low"
8
- max_steps: int = Field(default=3, ge=1, le=10)
9
 
10
  class PlanContext(BaseModel):
11
- app_id: str
12
- symptoms: List[str] = Field(default_factory=list)
 
 
 
 
 
 
 
 
 
 
13
  lkg: Optional[str] = None
 
 
 
 
 
 
 
 
 
14
 
15
  class PlanRequest(BaseModel):
16
- mode: Mode = "plan"
 
17
  context: PlanContext
18
  constraints: PlanConstraints = Field(default_factory=PlanConstraints)
19
 
 
20
  class PlanResponse(BaseModel):
21
  plan_id: str
22
  steps: List[str]
23
  risk: str
24
  explanation: str
25
 
 
 
 
 
 
26
  class ChatRequest(BaseModel):
27
  question: str = Field(..., min_length=3, max_length=512)
28
 
 
29
  class ChatResponse(BaseModel):
30
  answer: str
31
  sources: List[str] = Field(default_factory=list)
 
1
+ from __future__ import annotations
 
2
 
3
+ from typing import Optional, List, Literal
4
+ from pydantic import BaseModel, Field, ConfigDict
5
+
6
+ # ---------------------------
7
+ # Planning schema
8
+ # ---------------------------
9
+
10
+ class Health(BaseModel):
11
+ score: Optional[float] = None
12
+ status: Optional[str] = None
13
+ last_checked: Optional[str] = None # or use datetime if preferred
14
+
15
+
16
+ class RecentCheck(BaseModel):
17
+ check: str
18
+ result: str
19
+ latency_ms: Optional[float] = None
20
+ ts: Optional[str] = None # or use datetime if preferred
21
 
 
 
 
22
 
23
  class PlanContext(BaseModel):
24
+ """
25
+ Context is permissive: accept any extra keys from Guardian (or future sources).
26
+ Known fields are typed below; unknown fields pass through.
27
+ """
28
+ model_config = ConfigDict(extra="allow")
29
+
30
+ # Common identifiers
31
+ app_id: Optional[str] = None
32
+ entity_uid: Optional[str] = None
33
+
34
+ # Known structured bits
35
+ symptoms: Optional[List[str]] = None
36
  lkg: Optional[str] = None
37
+ lkg_version: Optional[str] = None
38
+ health: Optional[Health] = None
39
+ recent_checks: Optional[List[RecentCheck]] = None
40
+
41
+
42
+ class PlanConstraints(BaseModel):
43
+ max_steps: int = Field(default=3, ge=1, le=10)
44
+ risk: Literal["low", "medium", "high"] = "low"
45
+
46
 
47
  class PlanRequest(BaseModel):
48
+ # default to "plan" and only allow that value for now
49
+ mode: Literal["plan"] = "plan"
50
  context: PlanContext
51
  constraints: PlanConstraints = Field(default_factory=PlanConstraints)
52
 
53
+
54
  class PlanResponse(BaseModel):
55
  plan_id: str
56
  steps: List[str]
57
  risk: str
58
  explanation: str
59
 
60
+
61
+ # ---------------------------
62
+ # Chat (kept for compatibility; router uses its own flexible model)
63
+ # ---------------------------
64
+
65
  class ChatRequest(BaseModel):
66
  question: str = Field(..., min_length=3, max_length=512)
67
 
68
+
69
  class ChatResponse(BaseModel):
70
  answer: str
71
  sources: List[str] = Field(default_factory=list)
app/routers/plan.py CHANGED
@@ -1,4 +1,10 @@
 
 
 
 
1
  from fastapi import APIRouter, Depends, HTTPException
 
 
2
  from ..deps import get_settings
3
  from ..core.config import Settings
4
  from ..core.schema import PlanRequest, PlanResponse
@@ -6,19 +12,45 @@ from ..services.plan_service import generate_plan
6
 
7
  router = APIRouter()
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  @router.post("/plan", response_model=PlanResponse)
10
- async def v1_plan(
11
- req: PlanRequest,
12
- settings: Settings = Depends(get_settings)
13
- ):
14
- """Generates a structured remediation plan based on application health context."""
15
- if req.mode != "plan":
 
16
  raise HTTPException(
17
  status_code=400,
18
- detail=f"Mode '{req.mode}' is not enabled. Only 'plan' is supported in Stage 1."
19
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  try:
21
- data = await generate_plan(req, settings=settings)
22
- return data
23
  except Exception as e:
 
24
  raise HTTPException(status_code=503, detail=f"Inference service failed: {e}")
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
  from fastapi import APIRouter, Depends, HTTPException
6
+ from pydantic import BaseModel
7
+
8
  from ..deps import get_settings
9
  from ..core.config import Settings
10
  from ..core.schema import PlanRequest, PlanResponse
 
12
 
13
  router = APIRouter()
14
 
15
+
16
+ class PlanRequestIn(BaseModel):
17
+ """
18
+ Permissive boundary model so the Dev UI (and Guardian) can send richer payloads.
19
+ We normalize to the strict PlanRequest after basic checks.
20
+ """
21
+ mode: Optional[str] = "plan"
22
+ context: Dict[str, Any]
23
+ constraints: Dict[str, Any]
24
+
25
+
26
  @router.post("/plan", response_model=PlanResponse)
27
+ async def v1_plan(req_in: PlanRequestIn, settings: Settings = Depends(get_settings)):
28
+ """
29
+ Generate a structured remediation plan from health/context.
30
+ - Accepts permissive input (extra keys allowed).
31
+ - Coerces to strict PlanRequest (pydantic) before calling the service.
32
+ """
33
+ if (req_in.mode or "plan") != "plan":
34
  raise HTTPException(
35
  status_code=400,
36
+ detail=f"Mode '{req_in.mode}' is not enabled. Only 'plan' is supported in Stage 1.",
37
  )
38
+
39
+ try:
40
+ # Coerce to strict schema; pydantic will validate & coerce types
41
+ req = PlanRequest.model_validate(
42
+ {
43
+ "mode": "plan",
44
+ "context": req_in.context,
45
+ "constraints": req_in.constraints,
46
+ }
47
+ )
48
+ except Exception as e:
49
+ # Return a clear validation error rather than generic 500
50
+ raise HTTPException(status_code=422, detail=f"Invalid plan payload: {e}")
51
+
52
  try:
53
+ return await generate_plan(req, settings=settings)
 
54
  except Exception as e:
55
+ # Surface inference/backend errors as 503 (service unavailable)
56
  raise HTTPException(status_code=503, detail=f"Inference service failed: {e}")
app/services/plan_service.py CHANGED
@@ -169,9 +169,11 @@ class PlanService:
169
  async def generate(self, req: PlanRequest) -> PlanResponse:
170
  """
171
  Build prompt -> call Router (non-stream) -> robustly parse -> PlanResponse.
 
172
  """
173
  final_prompt = _build_prompt(req)
174
- # run the blocking requests call in a worker thread to avoid blocking the event loop
 
175
  raw_text = await asyncio.to_thread(
176
  self.client.plan_nonstream,
177
  SYSTEM_PLANNER,
@@ -179,6 +181,31 @@ class PlanService:
179
  self.settings.model.max_new_tokens,
180
  self.settings.model.temperature,
181
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  parsed = _safe_parse_or_fallback(raw_text, final_prompt)
183
  return PlanResponse.model_validate(parsed)
184
 
 
169
  async def generate(self, req: PlanRequest) -> PlanResponse:
170
  """
171
  Build prompt -> call Router (non-stream) -> robustly parse -> PlanResponse.
172
+ Includes a one-shot JSON reformat retry if the first output isn't valid JSON.
173
  """
174
  final_prompt = _build_prompt(req)
175
+
176
+ # 1) First pass: ask for the plan
177
  raw_text = await asyncio.to_thread(
178
  self.client.plan_nonstream,
179
  SYSTEM_PLANNER,
 
181
  self.settings.model.max_new_tokens,
182
  self.settings.model.temperature,
183
  )
184
+
185
+ # 2) If not valid JSON, ask the model to strictly reformat to JSON only (no fences)
186
+ needs_reformat = False
187
+ try:
188
+ _ = _extract_json_block(raw_text)
189
+ except Exception:
190
+ needs_reformat = True
191
+
192
+ if needs_reformat:
193
+ reformat = (
194
+ "Format the following content as a strict JSON object with EXACT keys "
195
+ "plan_id, steps (array of strings), risk (low|medium|high), explanation (string). "
196
+ "Output ONLY JSON. No backticks. No extra keys.\n\nCONTENT:\n"
197
+ + raw_text
198
+ )
199
+ re_text = await asyncio.to_thread(
200
+ self.client.plan_nonstream,
201
+ SYSTEM_PLANNER,
202
+ reformat,
203
+ self.settings.model.max_new_tokens,
204
+ max(0.05, float(self.settings.model.temperature) * 0.75),
205
+ )
206
+ raw_text = re_text # replace with reformatted text
207
+
208
+ # 3) Parse safely (or fallback) and validate against schema
209
  parsed = _safe_parse_or_fallback(raw_text, final_prompt)
210
  return PlanResponse.model_validate(parsed)
211
 
app/templates/dev.html CHANGED
@@ -2,11 +2,20 @@
2
  {% block body %}
3
  <div class="card">
4
  <h3>Dev — Exercise /v1/plan</h3>
5
- <form method="post" style="display:grid; gap:12px; margin-top:12px;">
6
- <textarea name="payload" rows="18" spellcheck="false">{{ sample }}</textarea>
7
- <div><button type="submit">Call /v1/plan</button></div>
 
 
 
8
  </form>
9
 
 
 
 
 
 
 
10
  {% if error %}
11
  <h4>Error</h4>
12
  <pre>{{ error }}</pre>
@@ -17,4 +26,102 @@
17
  <pre>{{ result }}</pre>
18
  {% endif %}
19
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  {% endblock %}
 
2
  {% block body %}
3
  <div class="card">
4
  <h3>Dev — Exercise /v1/plan</h3>
5
+
6
+ <form id="devForm" method="post" style="display:grid; gap:12px; margin-top:12px;">
7
+ <textarea id="payload" name="payload" rows="18" spellcheck="false">{{ sample }}</textarea>
8
+ <div>
9
+ <button id="devBtn" type="submit">Call /v1/plan</button>
10
+ </div>
11
  </form>
12
 
13
+ <!-- Client-side validation error (in addition to server-side) -->
14
+ <div id="clientError" class="dev-error" style="display:none; margin-top:10px;">
15
+ <h4>Error</h4>
16
+ <pre id="clientErrText"></pre>
17
+ </div>
18
+
19
  {% if error %}
20
  <h4>Error</h4>
21
  <pre>{{ error }}</pre>
 
26
  <pre>{{ result }}</pre>
27
  {% endif %}
28
  </div>
29
+
30
+ <!-- Fullscreen loader overlay -->
31
+ <div id="loader" class="loader-overlay" aria-hidden="true" style="display:none;">
32
+ <div class="loader-wrap">
33
+ <div class="loader-spinner"></div>
34
+ <div class="loader-text">GENERATING PLAN…</div>
35
+ </div>
36
+ </div>
37
+
38
+ <style>
39
+ /* Inline error block */
40
+ .dev-error pre {
41
+ background: #020a04;
42
+ border: 1px solid var(--border);
43
+ border-radius: 12px;
44
+ padding: 10px;
45
+ white-space: pre-wrap;
46
+ word-break: break-word;
47
+ }
48
+
49
+ /* Overlay */
50
+ .loader-overlay {
51
+ position: fixed;
52
+ inset: 0;
53
+ z-index: 9999;
54
+ display: none; /* toggled by JS */
55
+ align-items: center;
56
+ justify-content: center;
57
+ backdrop-filter: blur(3px);
58
+ background:
59
+ radial-gradient(800px 500px at 50% -20%, rgba(0,255,156,0.08), transparent 40%),
60
+ linear-gradient(180deg, rgba(0,0,0,0.72), rgba(0,0,0,0.65));
61
+ }
62
+ .loader-wrap {
63
+ display: flex;
64
+ flex-direction: column;
65
+ align-items: center;
66
+ gap: 14px;
67
+ padding: 22px 26px;
68
+ border-radius: 16px;
69
+ border: 1px solid var(--border);
70
+ background: rgba(6,16,6,0.75);
71
+ box-shadow: 0 10px 40px rgba(0,0,0,0.45), 0 0 0 1px rgba(0,255,156,0.06);
72
+ }
73
+ .loader-spinner {
74
+ width: 64px;
75
+ height: 64px;
76
+ border-radius: 50%;
77
+ border: 3px solid rgba(0,255,156,0.15);
78
+ border-top-color: var(--matrix);
79
+ border-right-color: var(--matrix);
80
+ box-shadow: 0 0 18px rgba(0,255,156,0.35);
81
+ animation: spin 0.9s linear infinite, glow 3.5s ease-in-out infinite;
82
+ }
83
+ .loader-text {
84
+ font-family: "Share Tech Mono", monospace;
85
+ letter-spacing: 0.08em;
86
+ color: var(--matrix);
87
+ text-shadow: 0 0 8px rgba(0,255,156,0.35);
88
+ opacity: 0.95;
89
+ }
90
+ @keyframes spin { to { transform: rotate(360deg); } }
91
+ </style>
92
+
93
+ <script>
94
+ (function () {
95
+ const form = document.getElementById('devForm');
96
+ const btn = document.getElementById('devBtn');
97
+ const ta = document.getElementById('payload');
98
+ const overlay= document.getElementById('loader');
99
+ const errBox = document.getElementById('clientError');
100
+ const errTxt = document.getElementById('clientErrText');
101
+
102
+ form.addEventListener('submit', (e) => {
103
+ // Clear client error if any
104
+ errBox.style.display = 'none';
105
+ errTxt.textContent = '';
106
+
107
+ // Quick client-side JSON validation for better UX
108
+ const raw = (ta.value || '').trim();
109
+ try {
110
+ JSON.parse(raw);
111
+ } catch (ex) {
112
+ e.preventDefault();
113
+ errTxt.textContent = "Invalid JSON: " + (ex && ex.message ? ex.message : String(ex));
114
+ errBox.style.display = 'block';
115
+ return;
116
+ }
117
+
118
+ // Show loader + disable button to prevent double submit
119
+ btn.disabled = true;
120
+ btn.textContent = 'Planning…';
121
+ overlay.style.display = 'flex';
122
+
123
+ // Let the normal form post proceed; overlay disappears on page reload
124
+ });
125
+ })();
126
+ </script>
127
  {% endblock %}
app/ui.py CHANGED
@@ -34,6 +34,7 @@ async def chat_post(request: Request, question: str = Form(...)):
34
  @router.get("/dev", response_class=HTMLResponse)
35
  async def dev_get(request: Request):
36
  sample = {
 
37
  "context": {
38
  "entity_uid": "matrix-ai",
39
  "health": {"score": 0.64, "status": "degraded", "last_checked": "2025-09-27T00:00:00Z"},
 
34
  @router.get("/dev", response_class=HTMLResponse)
35
  async def dev_get(request: Request):
36
  sample = {
37
+ "mode": "plan",
38
  "context": {
39
  "entity_uid": "matrix-ai",
40
  "health": {"score": 0.64, "status": "degraded", "last_checked": "2025-09-27T00:00:00Z"},