Spaces:

ruslanmv
/

matrix-ai

Sleeping

App Files Files Community

ruslanmv commited on Sep 27, 2025

Commit

478dbbd

1 Parent(s): 0e040d4

First working version with plan

Browse files

Files changed (6) hide show

app/core/prompts/plan.txt +16 -7
app/core/schema.py +49 -9
app/routers/plan.py +41 -9
app/services/plan_service.py +28 -1
app/templates/dev.html +110 -3
app/ui.py +1 -0

app/core/prompts/plan.txt CHANGED Viewed

@@ -1,8 +1,17 @@
-You are Matrix-AI, an expert system that produces short, safe, and auditable remediation plans for software services.
-Your constraints are:
-1. You must return a response in strictly JSON format.
-2. The plan must not exceed the `max_steps` constraint.
-3. Prioritize actions that are non-destructive, such as re-running health probes, pinning to a last-known-good (LKG) version, or running diagnostic tools in a sandbox.
-4. The explanation should be a single, concise sentence.
-5. The output JSON must have these exact keys: `plan_id`, `steps`, `risk`, `explanation`.

+You are MATRIX-AI Planner.
+Return ONLY a single JSON object. Do not include backticks, code fences, Markdown, or any prose.
+The JSON MUST match this schema exactly:
+{
+  "plan_id": "<string>",
+  "steps": ["<string>", "..."],
+  "risk": "low" | "medium" | "high",
+  "explanation": "<string>"
+}
+Rules:
+- Keep steps short, safe, and auditable (1–3 steps).
+- Prefer low risk actions.
+- Do not add any extra keys.
+- Start your reply with '{' and end with '}'.

app/core/schema.py CHANGED Viewed

@@ -1,31 +1,71 @@
-from pydantic import BaseModel, Field
-from typing import List, Optional, Literal
-Mode = Literal["plan", "summary", "patch-diff"]
-class PlanConstraints(BaseModel):
-    risk: Optional[str] = "low"
-    max_steps: int = Field(default=3, ge=1, le=10)
 class PlanContext(BaseModel):
-    app_id: str
-    symptoms: List[str] = Field(default_factory=list)
     lkg: Optional[str] = None
 class PlanRequest(BaseModel):
-    mode: Mode = "plan"
     context: PlanContext
     constraints: PlanConstraints = Field(default_factory=PlanConstraints)
 class PlanResponse(BaseModel):
     plan_id: str
     steps: List[str]
     risk: str
     explanation: str
 class ChatRequest(BaseModel):
     question: str = Field(..., min_length=3, max_length=512)
 class ChatResponse(BaseModel):
     answer: str
     sources: List[str] = Field(default_factory=list)

+from __future__ import annotations
+from typing import Optional, List, Literal
+from pydantic import BaseModel, Field, ConfigDict
+# ---------------------------
+# Planning schema
+# ---------------------------
+class Health(BaseModel):
+    score: Optional[float] = None
+    status: Optional[str] = None
+    last_checked: Optional[str] = None  # or use datetime if preferred
+class RecentCheck(BaseModel):
+    check: str
+    result: str
+    latency_ms: Optional[float] = None
+    ts: Optional[str] = None  # or use datetime if preferred
 class PlanContext(BaseModel):
+    """
+    Context is permissive: accept any extra keys from Guardian (or future sources).
+    Known fields are typed below; unknown fields pass through.
+    """
+    model_config = ConfigDict(extra="allow")
+    # Common identifiers
+    app_id: Optional[str] = None
+    entity_uid: Optional[str] = None
+    # Known structured bits
+    symptoms: Optional[List[str]] = None
     lkg: Optional[str] = None
+    lkg_version: Optional[str] = None
+    health: Optional[Health] = None
+    recent_checks: Optional[List[RecentCheck]] = None
+class PlanConstraints(BaseModel):
+    max_steps: int = Field(default=3, ge=1, le=10)
+    risk: Literal["low", "medium", "high"] = "low"
 class PlanRequest(BaseModel):
+    # default to "plan" and only allow that value for now
+    mode: Literal["plan"] = "plan"
     context: PlanContext
     constraints: PlanConstraints = Field(default_factory=PlanConstraints)
 class PlanResponse(BaseModel):
     plan_id: str
     steps: List[str]
     risk: str
     explanation: str
+# ---------------------------
+# Chat (kept for compatibility; router uses its own flexible model)
+# ---------------------------
 class ChatRequest(BaseModel):
     question: str = Field(..., min_length=3, max_length=512)
 class ChatResponse(BaseModel):
     answer: str
     sources: List[str] = Field(default_factory=list)

app/routers/plan.py CHANGED Viewed

@@ -1,4 +1,10 @@
 from fastapi import APIRouter, Depends, HTTPException
 from ..deps import get_settings
 from ..core.config import Settings
 from ..core.schema import PlanRequest, PlanResponse
@@ -6,19 +12,45 @@ from ..services.plan_service import generate_plan
 router = APIRouter()
 @router.post("/plan", response_model=PlanResponse)
-async def v1_plan(
-    req: PlanRequest,
-    settings: Settings = Depends(get_settings)
-):
-    """Generates a structured remediation plan based on application health context."""
-    if req.mode != "plan":
         raise HTTPException(
             status_code=400,
-            detail=f"Mode '{req.mode}' is not enabled. Only 'plan' is supported in Stage 1."
         )
     try:
-        data = await generate_plan(req, settings=settings)
-        return data
     except Exception as e:
         raise HTTPException(status_code=503, detail=f"Inference service failed: {e}")

+from __future__ import annotations
+from typing import Any, Dict, Optional
 from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel
 from ..deps import get_settings
 from ..core.config import Settings
 from ..core.schema import PlanRequest, PlanResponse
 router = APIRouter()
+class PlanRequestIn(BaseModel):
+    """
+    Permissive boundary model so the Dev UI (and Guardian) can send richer payloads.
+    We normalize to the strict PlanRequest after basic checks.
+    """
+    mode: Optional[str] = "plan"
+    context: Dict[str, Any]
+    constraints: Dict[str, Any]
 @router.post("/plan", response_model=PlanResponse)
+async def v1_plan(req_in: PlanRequestIn, settings: Settings = Depends(get_settings)):
+    """
+    Generate a structured remediation plan from health/context.
+    - Accepts permissive input (extra keys allowed).
+    - Coerces to strict PlanRequest (pydantic) before calling the service.
+    """
+    if (req_in.mode or "plan") != "plan":
         raise HTTPException(
             status_code=400,
+            detail=f"Mode '{req_in.mode}' is not enabled. Only 'plan' is supported in Stage 1.",
         )
+    try:
+        # Coerce to strict schema; pydantic will validate & coerce types
+        req = PlanRequest.model_validate(
+            {
+                "mode": "plan",
+                "context": req_in.context,
+                "constraints": req_in.constraints,
+            }
+        )
+    except Exception as e:
+        # Return a clear validation error rather than generic 500
+        raise HTTPException(status_code=422, detail=f"Invalid plan payload: {e}")
     try:
+        return await generate_plan(req, settings=settings)
     except Exception as e:
+        # Surface inference/backend errors as 503 (service unavailable)
         raise HTTPException(status_code=503, detail=f"Inference service failed: {e}")

app/services/plan_service.py CHANGED Viewed

@@ -169,9 +169,11 @@ class PlanService:
     async def generate(self, req: PlanRequest) -> PlanResponse:
         """
         Build prompt -> call Router (non-stream) -> robustly parse -> PlanResponse.
         """
         final_prompt = _build_prompt(req)
-        # run the blocking requests call in a worker thread to avoid blocking the event loop
         raw_text = await asyncio.to_thread(
             self.client.plan_nonstream,
             SYSTEM_PLANNER,
@@ -179,6 +181,31 @@ class PlanService:
             self.settings.model.max_new_tokens,
             self.settings.model.temperature,
         )
         parsed = _safe_parse_or_fallback(raw_text, final_prompt)
         return PlanResponse.model_validate(parsed)

     async def generate(self, req: PlanRequest) -> PlanResponse:
         """
         Build prompt -> call Router (non-stream) -> robustly parse -> PlanResponse.
+        Includes a one-shot JSON reformat retry if the first output isn't valid JSON.
         """
         final_prompt = _build_prompt(req)
+        # 1) First pass: ask for the plan
         raw_text = await asyncio.to_thread(
             self.client.plan_nonstream,
             SYSTEM_PLANNER,
             self.settings.model.max_new_tokens,
             self.settings.model.temperature,
         )
+        # 2) If not valid JSON, ask the model to strictly reformat to JSON only (no fences)
+        needs_reformat = False
+        try:
+            _ = _extract_json_block(raw_text)
+        except Exception:
+            needs_reformat = True
+        if needs_reformat:
+            reformat = (
+                "Format the following content as a strict JSON object with EXACT keys "
+                "plan_id, steps (array of strings), risk (low|medium|high), explanation (string). "
+                "Output ONLY JSON. No backticks. No extra keys.\n\nCONTENT:\n"
+                + raw_text
+            )
+            re_text = await asyncio.to_thread(
+                self.client.plan_nonstream,
+                SYSTEM_PLANNER,
+                reformat,
+                self.settings.model.max_new_tokens,
+                max(0.05, float(self.settings.model.temperature) * 0.75),
+            )
+            raw_text = re_text  # replace with reformatted text
+        # 3) Parse safely (or fallback) and validate against schema
         parsed = _safe_parse_or_fallback(raw_text, final_prompt)
         return PlanResponse.model_validate(parsed)

app/templates/dev.html CHANGED Viewed

@@ -2,11 +2,20 @@
 {% block body %}
   <div class="card">
     <h3>Dev — Exercise /v1/plan</h3>
-    <form method="post" style="display:grid; gap:12px; margin-top:12px;">
-      <textarea name="payload" rows="18" spellcheck="false">{{ sample }}</textarea>
-      <div><button type="submit">Call /v1/plan</button></div>
     </form>
     {% if error %}
       <h4>Error</h4>
       <pre>{{ error }}</pre>
@@ -17,4 +26,102 @@
       <pre>{{ result }}</pre>
     {% endif %}
   </div>
 {% endblock %}

 {% block body %}
   <div class="card">
     <h3>Dev — Exercise /v1/plan</h3>
+    <form id="devForm" method="post" style="display:grid; gap:12px; margin-top:12px;">
+      <textarea id="payload" name="payload" rows="18" spellcheck="false">{{ sample }}</textarea>
+      <div>
+        <button id="devBtn" type="submit">Call /v1/plan</button>
+      </div>
     </form>
+    <!-- Client-side validation error (in addition to server-side) -->
+    <div id="clientError" class="dev-error" style="display:none; margin-top:10px;">
+      <h4>Error</h4>
+      <pre id="clientErrText"></pre>
+    </div>
     {% if error %}
       <h4>Error</h4>
       <pre>{{ error }}</pre>
       <pre>{{ result }}</pre>
     {% endif %}
   </div>
+  <!-- Fullscreen loader overlay -->
+  <div id="loader" class="loader-overlay" aria-hidden="true" style="display:none;">
+    <div class="loader-wrap">
+      <div class="loader-spinner"></div>
+      <div class="loader-text">GENERATING PLAN…</div>
+    </div>
+  </div>
+  <style>
+    /* Inline error block */
+    .dev-error pre {
+      background: #020a04;
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      padding: 10px;
+      white-space: pre-wrap;
+      word-break: break-word;
+    }
+    /* Overlay */
+    .loader-overlay {
+      position: fixed;
+      inset: 0;
+      z-index: 9999;
+      display: none; /* toggled by JS */
+      align-items: center;
+      justify-content: center;
+      backdrop-filter: blur(3px);
+      background:
+        radial-gradient(800px 500px at 50% -20%, rgba(0,255,156,0.08), transparent 40%),
+        linear-gradient(180deg, rgba(0,0,0,0.72), rgba(0,0,0,0.65));
+    }
+    .loader-wrap {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 14px;
+      padding: 22px 26px;
+      border-radius: 16px;
+      border: 1px solid var(--border);
+      background: rgba(6,16,6,0.75);
+      box-shadow: 0 10px 40px rgba(0,0,0,0.45), 0 0 0 1px rgba(0,255,156,0.06);
+    }
+    .loader-spinner {
+      width: 64px;
+      height: 64px;
+      border-radius: 50%;
+      border: 3px solid rgba(0,255,156,0.15);
+      border-top-color: var(--matrix);
+      border-right-color: var(--matrix);
+      box-shadow: 0 0 18px rgba(0,255,156,0.35);
+      animation: spin 0.9s linear infinite, glow 3.5s ease-in-out infinite;
+    }
+    .loader-text {
+      font-family: "Share Tech Mono", monospace;
+      letter-spacing: 0.08em;
+      color: var(--matrix);
+      text-shadow: 0 0 8px rgba(0,255,156,0.35);
+      opacity: 0.95;
+    }
+    @keyframes spin { to { transform: rotate(360deg); } }
+  </style>
+  <script>
+    (function () {
+      const form   = document.getElementById('devForm');
+      const btn    = document.getElementById('devBtn');
+      const ta     = document.getElementById('payload');
+      const overlay= document.getElementById('loader');
+      const errBox = document.getElementById('clientError');
+      const errTxt = document.getElementById('clientErrText');
+      form.addEventListener('submit', (e) => {
+        // Clear client error if any
+        errBox.style.display = 'none';
+        errTxt.textContent = '';
+        // Quick client-side JSON validation for better UX
+        const raw = (ta.value || '').trim();
+        try {
+          JSON.parse(raw);
+        } catch (ex) {
+          e.preventDefault();
+          errTxt.textContent = "Invalid JSON: " + (ex && ex.message ? ex.message : String(ex));
+          errBox.style.display = 'block';
+          return;
+        }
+        // Show loader + disable button to prevent double submit
+        btn.disabled = true;
+        btn.textContent = 'Planning…';
+        overlay.style.display = 'flex';
+        // Let the normal form post proceed; overlay disappears on page reload
+      });
+    })();
+  </script>
 {% endblock %}

app/ui.py CHANGED Viewed

@@ -34,6 +34,7 @@ async def chat_post(request: Request, question: str = Form(...)):
 @router.get("/dev", response_class=HTMLResponse)
 async def dev_get(request: Request):
     sample = {
         "context": {
             "entity_uid": "matrix-ai",
             "health": {"score": 0.64, "status": "degraded", "last_checked": "2025-09-27T00:00:00Z"},

 @router.get("/dev", response_class=HTMLResponse)
 async def dev_get(request: Request):
     sample = {
+        "mode": "plan",
         "context": {
             "entity_uid": "matrix-ai",
             "health": {"score": 0.64, "status": "degraded", "last_checked": "2025-09-27T00:00:00Z"},