Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -161,24 +161,138 @@ def _ensure_numbering(text: str) -> str:
|
|
| 161 |
out.append(f"{marker} {seg}")
|
| 162 |
return "\n".join(out)
|
| 163 |
|
|
|
|
| 164 |
def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
def _norm(s: str) -> str:
|
| 166 |
s = (s or "").lower()
|
| 167 |
s = re.sub(r"[^\w\s]", " ", s)
|
| 168 |
s = re.sub(r"\s+", " ", s).strip()
|
| 169 |
return s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
q = _norm(query)
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
ln_norm = _norm(ln)
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
def _friendly_permission_reply(raw: str) -> str:
|
| 184 |
line = (raw or "").strip()
|
|
|
|
| 161 |
out.append(f"{marker} {seg}")
|
| 162 |
return "\n".join(out)
|
| 163 |
|
| 164 |
+
|
| 165 |
def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
|
| 166 |
+
"""
|
| 167 |
+
Pick the most relevant 'Common Errors & Resolution' bullets for the user's message.
|
| 168 |
+
Generic across SOPs via error families + phrase overlap.
|
| 169 |
+
|
| 170 |
+
Prioritization:
|
| 171 |
+
1) error-family match (NOT_FOUND/MISMATCH/LOCKED/PERMISSION/TIMEOUT/SYNC),
|
| 172 |
+
2) anchored starts (line begins with the error phrase/heading),
|
| 173 |
+
3) multi-word overlap (bigrams/trigrams),
|
| 174 |
+
4) token overlap,
|
| 175 |
+
5) bullet/heading formatting bonus.
|
| 176 |
+
|
| 177 |
+
If no line matches positively, falls back to the first few lines.
|
| 178 |
+
"""
|
| 179 |
+
|
| 180 |
+
import re
|
| 181 |
+
from typing import List, Tuple
|
| 182 |
+
|
| 183 |
+
# --- Generic error families (SOP-wide) ---
|
| 184 |
+
ERROR_FAMILIES = {
|
| 185 |
+
"NOT_FOUND": (
|
| 186 |
+
"not found", "missing", "does not exist", "doesn't exist",
|
| 187 |
+
"unavailable", "not available", "cannot find", "no such", "not present", "absent"
|
| 188 |
+
),
|
| 189 |
+
"MISMATCH": (
|
| 190 |
+
"mismatch", "doesn't match", "does not match", "variance",
|
| 191 |
+
"difference", "discrepancy", "not equal"
|
| 192 |
+
),
|
| 193 |
+
"LOCKED": (
|
| 194 |
+
"locked", "status locked", "blocked", "read only", "read-only", "frozen", "freeze"
|
| 195 |
+
),
|
| 196 |
+
"PERMISSION": (
|
| 197 |
+
"permission", "permissions", "access denied", "not authorized",
|
| 198 |
+
"not authorised", "insufficient privileges", "no access", "authorization", "authorisation"
|
| 199 |
+
),
|
| 200 |
+
"TIMEOUT": (
|
| 201 |
+
"timeout", "timed out", "network", "connection", "unable to connect",
|
| 202 |
+
"disconnected", "no network"
|
| 203 |
+
),
|
| 204 |
+
"SYNC": (
|
| 205 |
+
"sync", "synchronization", "synchronisation", "replication",
|
| 206 |
+
"refresh", "out of sync", "stale", "delay", "lag"
|
| 207 |
+
),
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# Normalizer
|
| 211 |
def _norm(s: str) -> str:
|
| 212 |
s = (s or "").lower()
|
| 213 |
s = re.sub(r"[^\w\s]", " ", s)
|
| 214 |
s = re.sub(r"\s+", " ", s).strip()
|
| 215 |
return s
|
| 216 |
+
|
| 217 |
+
# Detect error families mentioned in a string
|
| 218 |
+
def _families_for(s: str) -> List[str]:
|
| 219 |
+
out = []
|
| 220 |
+
low = _norm(s)
|
| 221 |
+
for fam, syns in ERROR_FAMILIES.items():
|
| 222 |
+
if any(k in low for k in syns):
|
| 223 |
+
out.append(fam)
|
| 224 |
+
return out
|
| 225 |
+
|
| 226 |
+
# N-grams
|
| 227 |
+
def _ngrams(tokens: List[str], n: int) -> List[str]:
|
| 228 |
+
return [" ".join(tokens[i:i+n]) for i in range(len(tokens) - n + 1)]
|
| 229 |
+
|
| 230 |
+
# Normalize query
|
| 231 |
q = _norm(query)
|
| 232 |
+
q_tokens = [t for t in q.split() if len(t) > 1]
|
| 233 |
+
q_bi = _ngrams(q_tokens, 2)
|
| 234 |
+
q_tri = _ngrams(q_tokens, 3)
|
| 235 |
+
q_families = set(_families_for(query))
|
| 236 |
+
|
| 237 |
+
# Candidate lines
|
| 238 |
+
lines = _normalize_lines(text)
|
| 239 |
+
if not lines:
|
| 240 |
+
return (text or "").strip()
|
| 241 |
+
|
| 242 |
+
scored: List[Tuple[float, str]] = []
|
| 243 |
+
for ln in lines:
|
| 244 |
ln_norm = _norm(ln)
|
| 245 |
+
ln_families = set(_families_for(ln))
|
| 246 |
+
|
| 247 |
+
# --- Signals ---
|
| 248 |
+
# Family match (strong): any overlap between query families and line families
|
| 249 |
+
fam_overlap = len(q_families & ln_families)
|
| 250 |
+
fam_score = 1.60 * fam_overlap # strong boost when families line up
|
| 251 |
+
|
| 252 |
+
# Exact phrase (medium-strong)
|
| 253 |
+
exact_phrase = 1.00 if (q and q in ln_norm) else 0.0
|
| 254 |
+
|
| 255 |
+
# Anchored start (strong for bullet headings like "ASN not found: ...")
|
| 256 |
+
first2 = " ".join(q_tokens[:2]) if len(q_tokens) >= 2 else ""
|
| 257 |
+
first3 = " ".join(q_tokens[:3]) if len(q_tokens) >= 3 else ""
|
| 258 |
+
anchored = 1.00 if (first3 and ln_norm.startswith(first3)) or (first2 and ln_norm.startswith(first2)) else 0.0
|
| 259 |
+
|
| 260 |
+
# Multi-word phrase overlap
|
| 261 |
+
bigram_hits = sum(1 for bg in q_bi if bg and bg in ln_norm)
|
| 262 |
+
trigram_hits = sum(1 for tg in q_tri if tg and tg in ln_norm)
|
| 263 |
+
|
| 264 |
+
# Token overlap (fallback)
|
| 265 |
+
token_overlap = sum(1 for t in q_tokens if t and t in ln_norm)
|
| 266 |
+
|
| 267 |
+
# --- Score composition (tuned for generic SOPs) ---
|
| 268 |
+
score = (
|
| 269 |
+
fam_score +
|
| 270 |
+
0.90 * anchored +
|
| 271 |
+
0.80 * trigram_hits +
|
| 272 |
+
0.55 * bigram_hits +
|
| 273 |
+
0.45 * exact_phrase +
|
| 274 |
+
0.30 * token_overlap
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
# Small bonuses for bullets/heading-like lines
|
| 278 |
+
if re.match(r"^\s*[\-\*\u2022]\s*", ln): # bullet dot
|
| 279 |
+
score += 0.10
|
| 280 |
+
# Heading before ':' matches some part of the query
|
| 281 |
+
heading = ln_norm.split(":")[0].strip()
|
| 282 |
+
if heading and (heading in q or (first2 and first2 in heading)):
|
| 283 |
+
score += 0.15
|
| 284 |
+
|
| 285 |
+
scored.append((score, ln))
|
| 286 |
+
|
| 287 |
+
# Sort by score desc and take top max_lines
|
| 288 |
+
scored.sort(key=lambda x: x[0], reverse=True)
|
| 289 |
+
top = [ln for s, ln in scored[:max_lines] if s > 0.0]
|
| 290 |
+
|
| 291 |
+
# Fallback if everything scored zero
|
| 292 |
+
if not top:
|
| 293 |
+
top = lines[:max_lines]
|
| 294 |
+
|
| 295 |
+
return "\n".join(top).strip()
|
| 296 |
|
| 297 |
def _friendly_permission_reply(raw: str) -> str:
|
| 298 |
line = (raw or "").strip()
|