srilakshu012456 commited on
Commit
46344cb
·
verified ·
1 Parent(s): dd5e496

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +124 -10
main.py CHANGED
@@ -161,24 +161,138 @@ def _ensure_numbering(text: str) -> str:
161
  out.append(f"{marker} {seg}")
162
  return "\n".join(out)
163
 
 
164
  def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def _norm(s: str) -> str:
166
  s = (s or "").lower()
167
  s = re.sub(r"[^\w\s]", " ", s)
168
  s = re.sub(r"\s+", " ", s).strip()
169
  return s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  q = _norm(query)
171
- q_terms = [t for t in q.split() if len(t) > 2]
172
- if not q_terms:
173
- return text or ""
174
- kept: List[str] = []
175
- for ln in _normalize_lines(text):
 
 
 
 
 
 
 
176
  ln_norm = _norm(ln)
177
- if any(t in ln_norm for t in q_terms):
178
- kept.append(ln)
179
- if len(kept) >= max_lines:
180
- break
181
- return "\n".join(kept).strip() if kept else (text or "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  def _friendly_permission_reply(raw: str) -> str:
184
  line = (raw or "").strip()
 
161
  out.append(f"{marker} {seg}")
162
  return "\n".join(out)
163
 
164
+
165
  def _filter_error_lines_by_query(text: str, query: str, max_lines: int = 4) -> str:
166
+ """
167
+ Pick the most relevant 'Common Errors & Resolution' bullets for the user's message.
168
+ Generic across SOPs via error families + phrase overlap.
169
+
170
+ Prioritization:
171
+ 1) error-family match (NOT_FOUND/MISMATCH/LOCKED/PERMISSION/TIMEOUT/SYNC),
172
+ 2) anchored starts (line begins with the error phrase/heading),
173
+ 3) multi-word overlap (bigrams/trigrams),
174
+ 4) token overlap,
175
+ 5) bullet/heading formatting bonus.
176
+
177
+ If no line matches positively, falls back to the first few lines.
178
+ """
179
+
180
+ import re
181
+ from typing import List, Tuple
182
+
183
+ # --- Generic error families (SOP-wide) ---
184
+ ERROR_FAMILIES = {
185
+ "NOT_FOUND": (
186
+ "not found", "missing", "does not exist", "doesn't exist",
187
+ "unavailable", "not available", "cannot find", "no such", "not present", "absent"
188
+ ),
189
+ "MISMATCH": (
190
+ "mismatch", "doesn't match", "does not match", "variance",
191
+ "difference", "discrepancy", "not equal"
192
+ ),
193
+ "LOCKED": (
194
+ "locked", "status locked", "blocked", "read only", "read-only", "frozen", "freeze"
195
+ ),
196
+ "PERMISSION": (
197
+ "permission", "permissions", "access denied", "not authorized",
198
+ "not authorised", "insufficient privileges", "no access", "authorization", "authorisation"
199
+ ),
200
+ "TIMEOUT": (
201
+ "timeout", "timed out", "network", "connection", "unable to connect",
202
+ "disconnected", "no network"
203
+ ),
204
+ "SYNC": (
205
+ "sync", "synchronization", "synchronisation", "replication",
206
+ "refresh", "out of sync", "stale", "delay", "lag"
207
+ ),
208
+ }
209
+
210
+ # Normalizer
211
  def _norm(s: str) -> str:
212
  s = (s or "").lower()
213
  s = re.sub(r"[^\w\s]", " ", s)
214
  s = re.sub(r"\s+", " ", s).strip()
215
  return s
216
+
217
+ # Detect error families mentioned in a string
218
+ def _families_for(s: str) -> List[str]:
219
+ out = []
220
+ low = _norm(s)
221
+ for fam, syns in ERROR_FAMILIES.items():
222
+ if any(k in low for k in syns):
223
+ out.append(fam)
224
+ return out
225
+
226
+ # N-grams
227
+ def _ngrams(tokens: List[str], n: int) -> List[str]:
228
+ return [" ".join(tokens[i:i+n]) for i in range(len(tokens) - n + 1)]
229
+
230
+ # Normalize query
231
  q = _norm(query)
232
+ q_tokens = [t for t in q.split() if len(t) > 1]
233
+ q_bi = _ngrams(q_tokens, 2)
234
+ q_tri = _ngrams(q_tokens, 3)
235
+ q_families = set(_families_for(query))
236
+
237
+ # Candidate lines
238
+ lines = _normalize_lines(text)
239
+ if not lines:
240
+ return (text or "").strip()
241
+
242
+ scored: List[Tuple[float, str]] = []
243
+ for ln in lines:
244
  ln_norm = _norm(ln)
245
+ ln_families = set(_families_for(ln))
246
+
247
+ # --- Signals ---
248
+ # Family match (strong): any overlap between query families and line families
249
+ fam_overlap = len(q_families & ln_families)
250
+ fam_score = 1.60 * fam_overlap # strong boost when families line up
251
+
252
+ # Exact phrase (medium-strong)
253
+ exact_phrase = 1.00 if (q and q in ln_norm) else 0.0
254
+
255
+ # Anchored start (strong for bullet headings like "ASN not found: ...")
256
+ first2 = " ".join(q_tokens[:2]) if len(q_tokens) >= 2 else ""
257
+ first3 = " ".join(q_tokens[:3]) if len(q_tokens) >= 3 else ""
258
+ anchored = 1.00 if (first3 and ln_norm.startswith(first3)) or (first2 and ln_norm.startswith(first2)) else 0.0
259
+
260
+ # Multi-word phrase overlap
261
+ bigram_hits = sum(1 for bg in q_bi if bg and bg in ln_norm)
262
+ trigram_hits = sum(1 for tg in q_tri if tg and tg in ln_norm)
263
+
264
+ # Token overlap (fallback)
265
+ token_overlap = sum(1 for t in q_tokens if t and t in ln_norm)
266
+
267
+ # --- Score composition (tuned for generic SOPs) ---
268
+ score = (
269
+ fam_score +
270
+ 0.90 * anchored +
271
+ 0.80 * trigram_hits +
272
+ 0.55 * bigram_hits +
273
+ 0.45 * exact_phrase +
274
+ 0.30 * token_overlap
275
+ )
276
+
277
+ # Small bonuses for bullets/heading-like lines
278
+ if re.match(r"^\s*[\-\*\u2022]\s*", ln): # bullet dot
279
+ score += 0.10
280
+ # Heading before ':' matches some part of the query
281
+ heading = ln_norm.split(":")[0].strip()
282
+ if heading and (heading in q or (first2 and first2 in heading)):
283
+ score += 0.15
284
+
285
+ scored.append((score, ln))
286
+
287
+ # Sort by score desc and take top max_lines
288
+ scored.sort(key=lambda x: x[0], reverse=True)
289
+ top = [ln for s, ln in scored[:max_lines] if s > 0.0]
290
+
291
+ # Fallback if everything scored zero
292
+ if not top:
293
+ top = lines[:max_lines]
294
+
295
+ return "\n".join(top).strip()
296
 
297
  def _friendly_permission_reply(raw: str) -> str:
298
  line = (raw or "").strip()