MavareeSwimmingPool commited on
Commit
079f062
·
verified ·
1 Parent(s): ae1347c

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. app.py +652 -0
  3. requirements.txt +15 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.11-slim
3
+
4
+ # Prevent Python from writing pyc files and buffering stdout
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Set working directory
9
+ WORKDIR /app
10
+
11
+ # System dependencies (minimal set)
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ build-essential \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Install Python dependencies
17
+ COPY requirements.txt /app/requirements.txt
18
+ RUN pip install --no-cache-dir --upgrade pip \
19
+ && pip install --no-cache-dir -r /app/requirements.txt
20
+
21
+ # Copy application code
22
+ COPY . /app
23
+
24
+ # Hugging Face Spaces uses port 7860 by default
25
+ ENV PORT=7860
26
+ EXPOSE 7860
27
+
28
+ # Start FastAPI app
29
+ CMD ["bash", "-lc", "python -m uvicorn app:app --host 0.0.0.0 --port ${PORT}"]
app.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import html
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import requests
7
+ from dotenv import load_dotenv
8
+ from fastapi import FastAPI
9
+ from fastapi.responses import HTMLResponse, JSONResponse
10
+ from pydantic import BaseModel
11
+
12
+ from openai import OpenAI
13
+
14
+ # ===============================
15
+ # ENV / CONFIG (PROD-like)
16
+ # ===============================
17
+ load_dotenv()
18
+
19
+ DEBUG_STARTUP_LOGS = os.getenv("DEBUG_STARTUP_LOGS", "0").strip().lower() in ("1", "true", "yes")
20
+
21
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "").strip()
22
+ if not OPENAI_API_KEY:
23
+ raise RuntimeError("OPENAI_API_KEY is missing. Put it into .env")
24
+
25
+ QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333").strip().rstrip("/")
26
+ QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", "pms_equipment").strip()
27
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "").strip()
28
+
29
+ EMBED_MODEL = os.getenv("EMBED_MODEL", "text-embedding-3-small").strip()
30
+ VECTOR_SIZE = int(os.getenv("VECTOR_SIZE", "1536").strip())
31
+ TOP_K = int(os.getenv("TOP_K", "5").strip())
32
+
33
+ # ===============================
34
+ # Evidence gate (PROD)
35
+ # ===============================
36
+ SCORE_THRESHOLD = float(os.getenv("SCORE_THRESHOLD", "0.62"))
37
+ MIN_STRONG_HITS = int(os.getenv("MIN_STRONG_HITS", "2"))
38
+
39
+ # ===============================
40
+ # Payload / token hygiene
41
+ # ===============================
42
+ MAX_QUERY_CHARS = int(os.getenv("MAX_QUERY_CHARS", "800").strip())
43
+ MIN_QUERY_CHARS = int(os.getenv("MIN_QUERY_CHARS", "3").strip())
44
+ MAX_EVIDENCE_CHARS = int(os.getenv("MAX_EVIDENCE_CHARS", "12000").strip())
45
+ RETURN_RAW_HITS = os.getenv("RETURN_RAW_HITS", "1").strip().lower() in ("1", "true", "yes")
46
+
47
+ # ===============================
48
+ # LLM
49
+ # ===============================
50
+ LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini").strip() # JSON-only audit answer
51
+
52
+ if DEBUG_STARTUP_LOGS:
53
+ print("QDRANT_URL =", QDRANT_URL)
54
+ print("QDRANT_COLLECTION =", QDRANT_COLLECTION)
55
+ print("QDRANT_API_KEY =", "SET" if QDRANT_API_KEY else "MISSING")
56
+ print("EMBED_MODEL =", EMBED_MODEL)
57
+ print("VECTOR_SIZE =", VECTOR_SIZE)
58
+ print("TOP_K =", TOP_K)
59
+ print("LLM_MODEL =", LLM_MODEL)
60
+
61
+ # ===============================
62
+ # CLIENTS
63
+ # ===============================
64
+ oai = OpenAI(api_key=OPENAI_API_KEY)
65
+
66
+ # ===============================
67
+ # APP
68
+ # ===============================
69
+ app = FastAPI(title="PMS Copilot — RAG MVP")
70
+
71
+
72
+ # ============================================================
73
+ # SCHEMAS
74
+ # ============================================================
75
+ class AskRequest(BaseModel):
76
+ q: str
77
+
78
+
79
+ # ============================================================
80
+ # HELPERS
81
+ # ============================================================
82
+ def embed(text: str) -> List[float]:
83
+ """OpenAI embeddings -> vector[VECTOR_SIZE]."""
84
+ resp = oai.embeddings.create(model=EMBED_MODEL, input=text)
85
+ vec = resp.data[0].embedding
86
+ if len(vec) != VECTOR_SIZE:
87
+ raise RuntimeError(
88
+ f"Embedding dim mismatch: got {len(vec)} but VECTOR_SIZE={VECTOR_SIZE}. "
89
+ f"Check EMBED_MODEL / VECTOR_SIZE in .env"
90
+ )
91
+ return vec
92
+
93
+
94
+ def qdrant_search_rest(query_vec: List[float], limit: int) -> List[Dict[str, Any]]:
95
+ """
96
+ Qdrant REST search (robust, avoids qdrant_client version/SyncApis issues).
97
+ Returns list of points: [{"id":..., "score":..., "payload": {...}}, ...]
98
+ """
99
+ url = f"{QDRANT_URL}/collections/{QDRANT_COLLECTION}/points/search"
100
+ payload = {
101
+ "vector": query_vec,
102
+ "limit": limit,
103
+ "with_payload": True,
104
+ "with_vectors": False,
105
+ }
106
+
107
+ headers: Dict[str, str] = {}
108
+ # Qdrant Cloud/self-host can require an API key. For Qdrant Cloud, "api-key" is commonly used.
109
+ if QDRANT_API_KEY:
110
+ headers["api-key"] = QDRANT_API_KEY
111
+
112
+ r = requests.post(url, json=payload, headers=headers, timeout=30)
113
+ r.raise_for_status()
114
+ data = r.json()
115
+ return data.get("result", [])
116
+
117
+
118
+ def pick_text_from_payload(payload: Dict[str, Any]) -> Optional[str]:
119
+ """Extract readable text from payload (support common field names)."""
120
+ for k in ("text", "chunk", "content", "page_content", "body", "passage", "PROCEDURE"):
121
+ v = payload.get(k)
122
+ if isinstance(v, str) and v.strip():
123
+ return v.strip()
124
+
125
+ if payload:
126
+ keys_pref = ["GROUPS", "FREQUENCY TYPE", "MAINTENANCE HEAD", "RESPONSIBILITY", "PROCEDURE"]
127
+ parts = []
128
+ for k in keys_pref:
129
+ if k in payload and payload[k] not in (None, ""):
130
+ parts.append(f"{k}: {payload[k]}")
131
+ if parts:
132
+ return " | ".join(parts)
133
+
134
+ return None
135
+
136
+
137
+ def build_evidence_blocks(hits: List[Dict[str, Any]]) -> Tuple[str, List[Dict[str, Any]]]:
138
+ """
139
+ Build evidence list for LLM:
140
+ - evidence_text: lines like [1] ...
141
+ - sources: minimal metadata for UI
142
+ """
143
+ evidence_lines: List[str] = []
144
+ sources: List[Dict[str, Any]] = []
145
+
146
+ for i, h in enumerate(hits, start=1):
147
+ payload = h.get("payload") or {}
148
+ text = pick_text_from_payload(payload) or ""
149
+ text = text.replace("\r", " ").replace("\n", " ").strip()
150
+ if not text:
151
+ text = json.dumps(payload, ensure_ascii=False)
152
+
153
+ evidence_lines.append(f"[{i}] {text}")
154
+
155
+ sources.append(
156
+ {
157
+ "n": i,
158
+ "id": h.get("id"),
159
+ "score": h.get("score"),
160
+ "GROUPS": payload.get("GROUPS"),
161
+ "FREQUENCY TYPE": payload.get("FREQUENCY TYPE"),
162
+ "MAINTENANCE HEAD": payload.get("MAINTENANCE HEAD"),
163
+ "RESPONSIBILITY": payload.get("RESPONSIBILITY"),
164
+ }
165
+ )
166
+
167
+ evidence_text = "\n".join(evidence_lines)
168
+ if len(evidence_text) > MAX_EVIDENCE_CHARS:
169
+ evidence_text = evidence_text[:MAX_EVIDENCE_CHARS] + "\n...[TRUNCATED]"
170
+
171
+ return evidence_text, sources
172
+
173
+
174
+ def _extract_first_json_object(s: str) -> str:
175
+ """
176
+ Best-effort recovery if LLM outputs extra text.
177
+ Returns substring from first '{' to last '}'.
178
+ """
179
+ if not s:
180
+ return s
181
+ start = s.find("{")
182
+ end = s.rfind("}")
183
+ if start == -1 or end == -1 or end <= start:
184
+ return s
185
+ return s[start : end + 1]
186
+
187
+
188
+ def run_llm_audit_json(query: str, evidence_text: str) -> Dict[str, Any]:
189
+ """
190
+ LLM audit-style answer.
191
+ STRICT JSON ONLY (enforced by system contract + JSON parse).
192
+ """
193
+ system_prompt = """
194
+ You are a maritime audit assistant.
195
+
196
+ RULES (MANDATORY):
197
+ - Output MUST be valid JSON
198
+ - NO markdown
199
+ - NO explanations
200
+ - NO text outside JSON
201
+ - Use ONLY the provided evidence
202
+ - If information is missing, use "Not found in provided records"
203
+
204
+ JSON SCHEMA (exact):
205
+ {
206
+ "summary": string,
207
+ "findings": [
208
+ {
209
+ "topic": string,
210
+ "requirement": string,
211
+ "observation": string,
212
+ "risk": string,
213
+ "evidence_refs": [number]
214
+ }
215
+ ],
216
+ "conclusion": string
217
+ }
218
+ """.strip()
219
+
220
+ user_prompt = f"""
221
+ AUDIT QUESTION:
222
+ {query}
223
+
224
+ EVIDENCE:
225
+ {evidence_text}
226
+ """.strip()
227
+
228
+ resp = oai.responses.create(
229
+ model=LLM_MODEL,
230
+ input=[
231
+ {"role": "system", "content": system_prompt},
232
+ {"role": "user", "content": user_prompt},
233
+ ],
234
+ temperature=0,
235
+ )
236
+
237
+ raw = resp.output_text or ""
238
+ candidate = _extract_first_json_object(raw)
239
+
240
+ try:
241
+ return json.loads(candidate)
242
+ except json.JSONDecodeError as e:
243
+ raise RuntimeError(f"LLM returned invalid JSON: {e}\n\nRAW OUTPUT:\n{raw}")
244
+
245
+
246
+ # ============================================================
247
+ # API: HEALTH
248
+ # ============================================================
249
+ @app.get("/health")
250
+ def health():
251
+ return {"status": "ok"}
252
+
253
+
254
+ # ============================================================
255
+ # UI (HTML)
256
+ # ============================================================
257
+ @app.get("/", response_class=HTMLResponse)
258
+ def home():
259
+ qdrant_url_html = html.escape(QDRANT_URL)
260
+ coll_html = html.escape(QDRANT_COLLECTION)
261
+ embed_html = html.escape(EMBED_MODEL)
262
+ llm_html = html.escape(LLM_MODEL)
263
+
264
+ return f"""
265
+ <!doctype html>
266
+ <html>
267
+ <head>
268
+ <meta charset="utf-8" />
269
+ <title>PMS Copilot — RAG MVP</title>
270
+ <style>
271
+ body {{
272
+ font-family: Arial, sans-serif;
273
+ max-width: 1200px;
274
+ margin: 34px auto;
275
+ padding: 0 16px;
276
+ }}
277
+ h1 {{ margin: 0 0 14px 0; font-size: 44px; letter-spacing: -0.5px; }}
278
+ .meta {{
279
+ color:#666; font-size: 13px; margin: 8px 0 18px 0;
280
+ }}
281
+ .row {{ display:flex; gap:10px; margin: 14px 0; align-items: stretch; }}
282
+ input {{
283
+ flex:1; padding: 14px; font-size: 16px;
284
+ border: 1px solid #bbb; border-radius: 6px;
285
+ }}
286
+ button {{
287
+ padding: 14px 18px; font-size: 16px; cursor: pointer;
288
+ border: 2px solid #222; background: #eee; border-radius: 6px;
289
+ min-width: 88px;
290
+ }}
291
+ .panel {{
292
+ background: #f6f6f6;
293
+ border-radius: 12px;
294
+ padding: 16px;
295
+ margin-top: 14px;
296
+ border: 1px solid #e2e2e2;
297
+ }}
298
+ .error {{
299
+ background: #fdecec;
300
+ border: 1px solid #f3b6b6;
301
+ }}
302
+ .title {{ font-size: 18px; font-weight: 700; margin: 0 0 10px 0; }}
303
+ .sub {{ color:#333; margin: 0 0 10px 0; }}
304
+ .kv {{ margin: 0; color:#111; }}
305
+ .kv b {{ display:inline-block; min-width: 140px; }}
306
+ .findings {{
307
+ margin-top: 14px;
308
+ display: grid;
309
+ grid-template-columns: 1fr;
310
+ gap: 10px;
311
+ }}
312
+ .card {{
313
+ background: #fff;
314
+ border-radius: 10px;
315
+ border: 1px solid #e5e5e5;
316
+ padding: 14px;
317
+ }}
318
+ .card h3 {{
319
+ margin: 0 0 8px 0;
320
+ font-size: 16px;
321
+ }}
322
+ .muted {{ color:#666; font-size: 13px; }}
323
+ .evidence {{
324
+ margin-top: 14px;
325
+ }}
326
+ table {{
327
+ width: 100%;
328
+ border-collapse: collapse;
329
+ background: #fff;
330
+ border-radius: 10px;
331
+ overflow: hidden;
332
+ border: 1px solid #e5e5e5;
333
+ }}
334
+ th, td {{
335
+ padding: 10px;
336
+ border-bottom: 1px solid #eee;
337
+ font-size: 13px;
338
+ vertical-align: top;
339
+ }}
340
+ th {{ text-align: left; background: #fafafa; }}
341
+ .row2 {{
342
+ display:flex; justify-content: space-between; align-items: center;
343
+ gap: 12px; margin-top: 10px;
344
+ }}
345
+ pre {{
346
+ margin: 0;
347
+ white-space: pre-wrap;
348
+ background: #111;
349
+ color: #eee;
350
+ padding: 12px;
351
+ border-radius: 10px;
352
+ overflow: auto;
353
+ font-size: 12px;
354
+ }}
355
+ .right {{
356
+ display:flex; gap: 10px; align-items: center;
357
+ }}
358
+ .checkbox {{
359
+ display:flex; gap: 8px; align-items: center;
360
+ font-size: 13px; color:#333;
361
+ }}
362
+ </style>
363
+ </head>
364
+ <body>
365
+ <h1>PMS Copilot — RAG MVP</h1>
366
+ <div class="meta">
367
+ Qdrant: <b>{qdrant_url_html}</b> · Collection: <b>{coll_html}</b> ·
368
+ Embed: <b>{embed_html}</b> · TopK: <b>{TOP_K}</b> · LLM: <b>{llm_html}</b>
369
+ </div>
370
+
371
+ <div class="row">
372
+ <input id="q" placeholder="Введите запрос..." />
373
+ <button onclick="send()">Ask</button>
374
+ </div>
375
+
376
+ <div id="result" class="panel" style="display:none;"></div>
377
+
378
+ <script>
379
+ function esc(s) {{
380
+ return String(s ?? "").replaceAll("&", "&amp;").replaceAll("<","&lt;").replaceAll(">","&gt;");
381
+ }}
382
+
383
+ function renderAudit(audit) {{
384
+ const summary = audit?.summary ?? "";
385
+ const findings = Array.isArray(audit?.findings) ? audit.findings : [];
386
+ const conclusion = audit?.conclusion ?? "";
387
+
388
+ let html = '';
389
+ html += `<div class="title">Summary</div>`;
390
+ html += `<div class="sub">${{esc(summary)}}</div>`;
391
+
392
+ html += `<div class="title" style="margin-top:14px;">Findings</div>`;
393
+ if (!findings.length) {{
394
+ html += `<div class="muted">No findings returned.</div>`;
395
+ }} else {{
396
+ html += `<div class="findings">`;
397
+ for (const f of findings) {{
398
+ const refs = Array.isArray(f?.evidence_refs) ? f.evidence_refs.join(", ") : "";
399
+ html += `
400
+ <div class="card">
401
+ <h3>${{esc(f?.topic ?? "Finding")}}</h3>
402
+ <p class="kv"><b>Requirement:</b> ${{esc(f?.requirement ?? "")}}</p>
403
+ <p class="kv"><b>Observation:</b> ${{esc(f?.observation ?? "")}}</p>
404
+ <p class="kv"><b>Risk:</b> ${{esc(f?.risk ?? "")}}</p>
405
+ <p class="muted"><b>Evidence refs:</b> ${{esc(refs)}}</p>
406
+ </div>
407
+ `;
408
+ }}
409
+ html += `</div>`;
410
+ }}
411
+
412
+ html += `<div class="title" style="margin-top:14px;">Conclusion</div>`;
413
+ html += `<div class="sub">${{esc(conclusion)}}</div>`;
414
+ return html;
415
+ }}
416
+
417
+ function renderEvidenceTable(sources) {{
418
+ if (!Array.isArray(sources) || !sources.length) return '';
419
+ let rows = '';
420
+ for (const s of sources) {{
421
+ rows += `
422
+ <tr>
423
+ <td>${{esc(s.n)}}</td>
424
+ <td>${{esc(s.id)}}</td>
425
+ <td>${{esc(s.score)}}</td>
426
+ <td>${{esc(s["GROUPS"])}}</td>
427
+ <td>${{esc(s["FREQUENCY TYPE"])}}</td>
428
+ <td>${{esc(s["RESPONSIBILITY"])}}</td>
429
+ </tr>
430
+ `;
431
+ }}
432
+ return `
433
+ <div class="evidence">
434
+ <div class="title">Evidence</div>
435
+ <table>
436
+ <thead>
437
+ <tr>
438
+ <th>#</th>
439
+ <th>ID</th>
440
+ <th>Score</th>
441
+ <th>GROUPS</th>
442
+ <th>FREQUENCY</th>
443
+ <th>RESPONSIBILITY</th>
444
+ </tr>
445
+ </thead>
446
+ <tbody>${{rows}}</tbody>
447
+ </table>
448
+ </div>
449
+ `;
450
+ }}
451
+
452
+ async function send() {{
453
+ const q = document.getElementById('q').value;
454
+ const panel = document.getElementById('result');
455
+ panel.style.display = 'block';
456
+ panel.className = 'panel';
457
+ panel.innerHTML = `<div class="title">Working...</div><div class="muted">Embedding → Qdrant → LLM</div>`;
458
+
459
+ try {{
460
+ const r = await fetch('/ask', {{
461
+ method: 'POST',
462
+ headers: {{ 'Content-Type': 'application/json' }},
463
+ body: JSON.stringify({{ q }})
464
+ }});
465
+
466
+ const data = await r.json();
467
+
468
+ if (!data.ok) {{
469
+ panel.className = 'panel error';
470
+ panel.innerHTML = `
471
+ <div class="title">Error</div>
472
+ <div class="sub">${{esc(data.error ?? "Request failed")}}</div>
473
+ <pre>${{esc(JSON.stringify(data, null, 2))}}</pre>
474
+ `;
475
+ return;
476
+ }}
477
+
478
+ const audit = data.audit;
479
+ const sources = data.sources;
480
+
481
+ const auditHtml = renderAudit(audit);
482
+ const evidenceHtml = renderEvidenceTable(sources);
483
+
484
+ panel.innerHTML = `
485
+ ${{auditHtml}}
486
+ ${{evidenceHtml}}
487
+ <div class="row2">
488
+ <div class="checkbox">
489
+ <input id="rawToggle" type="checkbox" onchange="toggleRaw()" />
490
+ <label for="rawToggle">Show raw JSON</label>
491
+ </div>
492
+ <div class="right muted">TopK: ${{esc(data.debug?.top_k)}}</div>
493
+ </div>
494
+ <div id="rawBlock" style="display:none; margin-top:10px;">
495
+ <pre>${{esc(JSON.stringify(data, null, 2))}}</pre>
496
+ </div>
497
+ `;
498
+
499
+ }} catch (e) {{
500
+ panel.className = 'panel error';
501
+ panel.innerHTML = `<div class="title">Error</div><pre>${{esc(String(e))}}</pre>`;
502
+ }}
503
+ }}
504
+
505
+ function toggleRaw() {{
506
+ const cb = document.getElementById('rawToggle');
507
+ const block = document.getElementById('rawBlock');
508
+ if (!cb || !block) return;
509
+ block.style.display = cb.checked ? 'block' : 'none';
510
+ }}
511
+ </script>
512
+ </body>
513
+ </html>
514
+ """.strip()
515
+
516
+
517
+ # ============================================================
518
+ # API
519
+ # ============================================================
520
+ @app.post("/ask")
521
+ def ask(req: AskRequest):
522
+ q = (req.q or "").strip()
523
+ if not q:
524
+ return JSONResponse({"ok": False, "error": "Empty query"}, status_code=400)
525
+
526
+ if len(q) < MIN_QUERY_CHARS:
527
+ return JSONResponse(
528
+ {"ok": False, "error": f"Query too short (min {MIN_QUERY_CHARS} chars)"},
529
+ status_code=400,
530
+ )
531
+
532
+ if len(q) > MAX_QUERY_CHARS:
533
+ q = q[:MAX_QUERY_CHARS]
534
+
535
+ # 1) Embedding
536
+ try:
537
+ query_vec = embed(q)
538
+ except Exception as e:
539
+ return JSONResponse(
540
+ {
541
+ "ok": False,
542
+ "error": "Embedding failed",
543
+ "details": str(e),
544
+ "debug": {
545
+ "embed_model": EMBED_MODEL,
546
+ "vector_size": VECTOR_SIZE,
547
+ },
548
+ },
549
+ status_code=500,
550
+ )
551
+
552
+ # 2) Qdrant search (REST)
553
+ try:
554
+ raw_points = qdrant_search_rest(query_vec, TOP_K)
555
+ except Exception as e:
556
+ return JSONResponse(
557
+ {
558
+ "ok": False,
559
+ "error": "Qdrant search failed",
560
+ "details": str(e),
561
+ "debug": {
562
+ "qdrant_url": QDRANT_URL,
563
+ "collection": QDRANT_COLLECTION,
564
+ "qdrant_api_key_set": bool(QDRANT_API_KEY),
565
+ },
566
+ },
567
+ status_code=500,
568
+ )
569
+
570
+ # Normalize hits for downstream
571
+ hits: List[Dict[str, Any]] = []
572
+ for p in raw_points:
573
+ hits.append(
574
+ {
575
+ "id": p.get("id"),
576
+ "score": p.get("score"),
577
+ "payload": p.get("payload") or {},
578
+ }
579
+ )
580
+
581
+ # Evidence gate
582
+ strong_hits = sum(1 for h in hits if (h.get("score") or 0) >= SCORE_THRESHOLD)
583
+ evidence_text, sources = build_evidence_blocks(hits)
584
+
585
+ if strong_hits < MIN_STRONG_HITS:
586
+ return {
587
+ "ok": True,
588
+ "query": q,
589
+ "audit": {
590
+ "summary": "Insufficient evidence found in PMS data for a grounded audit answer.",
591
+ "findings": [
592
+ {
593
+ "topic": "Evidence gating",
594
+ "requirement": f"At least {MIN_STRONG_HITS} hits with score >= {SCORE_THRESHOLD}",
595
+ "observation": f"Only {strong_hits} strong hits were retrieved.",
596
+ "risk": "Answer may be speculative without sufficient PMS evidence.",
597
+ "evidence_refs": [],
598
+ }
599
+ ],
600
+ "conclusion": "Please refine the question or ensure the relevant PMS/manual records exist in the collection.",
601
+ },
602
+ "sources": sources,
603
+ "hits": hits if RETURN_RAW_HITS else [],
604
+ "debug": {
605
+ "qdrant_url": QDRANT_URL,
606
+ "collection": QDRANT_COLLECTION,
607
+ "top_k": TOP_K,
608
+ "embed_model": EMBED_MODEL,
609
+ "vector_size": VECTOR_SIZE,
610
+ "llm_model": LLM_MODEL,
611
+ "strong_hits": strong_hits,
612
+ "score_threshold": SCORE_THRESHOLD,
613
+ "min_strong_hits": MIN_STRONG_HITS,
614
+ "llm_called": False,
615
+ },
616
+ }
617
+
618
+ # 3) LLM audit JSON (strict)
619
+ try:
620
+ audit = run_llm_audit_json(q, evidence_text)
621
+ except Exception as e:
622
+ return JSONResponse(
623
+ {
624
+ "ok": False,
625
+ "error": "LLM failed",
626
+ "details": str(e),
627
+ "debug": {"llm_model": LLM_MODEL},
628
+ "sources": sources,
629
+ "hits": hits if RETURN_RAW_HITS else [],
630
+ },
631
+ status_code=500,
632
+ )
633
+
634
+ return {
635
+ "ok": True,
636
+ "query": q,
637
+ "audit": audit, # STRICT JSON (parsed)
638
+ "sources": sources, # compact evidence table for UI
639
+ "hits": hits if RETURN_RAW_HITS else [],
640
+ "debug": {
641
+ "qdrant_url": QDRANT_URL,
642
+ "collection": QDRANT_COLLECTION,
643
+ "top_k": TOP_K,
644
+ "embed_model": EMBED_MODEL,
645
+ "vector_size": VECTOR_SIZE,
646
+ "llm_model": LLM_MODEL,
647
+ "strong_hits": strong_hits,
648
+ "score_threshold": SCORE_THRESHOLD,
649
+ "min_strong_hits": MIN_STRONG_HITS,
650
+ "llm_called": True,
651
+ },
652
+ }
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110
2
+ uvicorn[standard]>=0.27
3
+
4
+ openai>=2.0
5
+ requests>=2.31
6
+ python-dotenv>=1.0
7
+
8
+ pydantic>=2,<3
9
+ jinja2
10
+
11
+ qdrant-client>=1.7
12
+
13
+ # Optional (used for PMS Excel ingestion scripts)
14
+ pandas>=2.0
15
+ xlrd==2.0.1