Spaces:
Running
Running
Align n-gram selection rules across summary and optimizer.
Browse filesApply stage candidate rules for bi/tri-grams and keyword-derived unigrams, add n-gram target rotation to avoid single-term stalls, and sync summary n-gram logic with optimizer tolerance policy.
Made-with: Cursor
- docs/TEXT_OPTIMIZER_PRINCIPLES.md +17 -0
- optimizer.py +63 -8
- templates/index.html +40 -11
docs/TEXT_OPTIMIZER_PRINCIPLES.md
CHANGED
|
@@ -41,6 +41,23 @@ Update it whenever optimization policy changes.
|
|
| 41 |
- if `avg >= 4`: acceptable range is `avg +/- 20%`
|
| 42 |
- if `avg < 4`: acceptable range is `avg +/- 50%`
|
| 43 |
- N-gram signal is counted only when term is outside tolerance and present in enough competitors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
## 6) Local acceptance and batch accumulation
|
| 46 |
|
|
|
|
| 41 |
- if `avg >= 4`: acceptable range is `avg +/- 20%`
|
| 42 |
- if `avg < 4`: acceptable range is `avg +/- 50%`
|
| 43 |
- N-gram signal is counted only when term is outside tolerance and present in enough competitors.
|
| 44 |
+
- Selection rules (multi-competitor mode, `competitors > 1`):
|
| 45 |
+
- bi-grams and tri-grams are eligible when present in `>= 2` competitors;
|
| 46 |
+
- unigrams are eligible only if they are part of user keyword phrases and present in `>= 2` competitors.
|
| 47 |
+
- Iteration behavior:
|
| 48 |
+
- optimizer may work on one n-gram at a time per step,
|
| 49 |
+
- if no primary progress on current n-gram target, it rotates to the next eligible n-gram candidate.
|
| 50 |
+
|
| 51 |
+
## 5.1 Summary logic memory (current)
|
| 52 |
+
|
| 53 |
+
- Summary recommendation triggers:
|
| 54 |
+
- BERT warning when phrase score `< 0.70`;
|
| 55 |
+
- BM25 warning when `REMOVE >= 4`;
|
| 56 |
+
- N-gram warning when term is underrepresented among competitors;
|
| 57 |
+
- Title warning when Title BERT `< 0.65`;
|
| 58 |
+
- Semantic warning when keyword terms are weaker than competitor average.
|
| 59 |
+
- For N-grams in summary:
|
| 60 |
+
- summary renders top rows for readability, but optimizer runs against the full eligible candidate set.
|
| 61 |
|
| 62 |
## 6) Local acceptance and batch accumulation
|
| 63 |
|
optimizer.py
CHANGED
|
@@ -254,6 +254,38 @@ def _ngram_deviation_ratio(target_count: float, competitor_avg: float) -> float:
|
|
| 254 |
return abs(target_count - competitor_avg) / max(competitor_avg, 1e-6)
|
| 255 |
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
def _chunk_ngram_count(text: str, ngram_label: str, language: str) -> int:
|
| 258 |
toks = _filter_stopwords(_tokenize(text), language)
|
| 259 |
phrase_toks = _filter_stopwords(_tokenize(ngram_label), language)
|
|
@@ -293,11 +325,15 @@ def _compute_metrics(
|
|
| 293 |
|
| 294 |
ngram_signal_count = 0
|
| 295 |
ngram_gap_sum = 0.0
|
|
|
|
| 296 |
ngrams = analysis.get("ngram_stats", {}) or {}
|
| 297 |
-
for
|
| 298 |
-
|
|
|
|
|
|
|
| 299 |
comp_occ = int(item.get("comp_occurrence", 0))
|
| 300 |
-
|
|
|
|
| 301 |
continue
|
| 302 |
target = float(item.get("target_count", 0))
|
| 303 |
comp_avg = float(item.get("competitor_avg", 0))
|
|
@@ -393,6 +429,7 @@ def _choose_optimization_goal(
|
|
| 393 |
language: str,
|
| 394 |
stage: str = "bert",
|
| 395 |
bert_stage_target: float = BERT_TARGET_THRESHOLD,
|
|
|
|
| 396 |
) -> Dict[str, Any]:
|
| 397 |
candidates: Dict[str, Dict[str, Any]] = {}
|
| 398 |
bert_details = analysis.get("bert_analysis", {}).get("detailed", []) or []
|
|
@@ -434,6 +471,8 @@ def _choose_optimization_goal(
|
|
| 434 |
# N-gram balancing (toward competitor average with tolerance policy).
|
| 435 |
ngram_rows: List[Tuple[str, float, float, float, int, float]] = []
|
| 436 |
ngram_stats = analysis.get("ngram_stats", {}) or {}
|
|
|
|
|
|
|
| 437 |
for bucket_name, bucket in ngram_stats.items():
|
| 438 |
if not isinstance(bucket, list):
|
| 439 |
continue
|
|
@@ -444,7 +483,7 @@ def _choose_optimization_goal(
|
|
| 444 |
target = float(item.get("target_count", 0))
|
| 445 |
comp_avg = float(item.get("competitor_avg", 0))
|
| 446 |
comp_occ = int(item.get("comp_occurrence", 0))
|
| 447 |
-
if comp_occ
|
| 448 |
continue
|
| 449 |
if not _is_ngram_outside_tolerance(target, comp_avg):
|
| 450 |
continue
|
|
@@ -453,7 +492,8 @@ def _choose_optimization_goal(
|
|
| 453 |
ngram_rows.append((ngram_label, target, comp_avg, tol, comp_occ, dev_ratio))
|
| 454 |
if ngram_rows:
|
| 455 |
ngram_rows.sort(key=lambda x: (x[5], x[4], x[2]), reverse=True)
|
| 456 |
-
|
|
|
|
| 457 |
candidates["ngram"] = {
|
| 458 |
"type": "ngram",
|
| 459 |
"label": label,
|
|
@@ -465,6 +505,8 @@ def _choose_optimization_goal(
|
|
| 465 |
"ngram_lower_bound": round(comp_avg * (1.0 - tol), 3),
|
| 466 |
"ngram_upper_bound": round(comp_avg * (1.0 + tol), 3),
|
| 467 |
"ngram_direction": "increase" if target < comp_avg else "decrease",
|
|
|
|
|
|
|
| 468 |
}
|
| 469 |
|
| 470 |
title_bert = analysis.get("title_analysis", {}).get("bert", {}) or {}
|
|
@@ -1204,6 +1246,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1204 |
queued_candidates: List[Dict[str, Any]] = []
|
| 1205 |
stage_idx = 0
|
| 1206 |
stage_no_progress_steps = 0
|
|
|
|
| 1207 |
|
| 1208 |
for step in range(max_iterations):
|
| 1209 |
while stage_idx < len(STAGE_ORDER) and _is_stage_complete(
|
|
@@ -1223,6 +1266,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1223 |
language,
|
| 1224 |
stage=active_stage,
|
| 1225 |
bert_stage_target=bert_stage_target,
|
|
|
|
| 1226 |
)
|
| 1227 |
if goal["type"] == "none":
|
| 1228 |
stage_idx += 1
|
|
@@ -1545,10 +1589,13 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1545 |
current_analysis = best_local["analysis"]
|
| 1546 |
current_semantic = best_local["semantic"]
|
| 1547 |
current_metrics = best_local["metrics"]
|
| 1548 |
-
|
|
|
|
| 1549 |
stage_no_progress_steps = 0
|
| 1550 |
else:
|
| 1551 |
stage_no_progress_steps += 1
|
|
|
|
|
|
|
| 1552 |
applied_changes += 1
|
| 1553 |
queued_candidates = []
|
| 1554 |
|
|
@@ -1687,10 +1734,13 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1687 |
current_analysis = best_batch["batch_analysis"]
|
| 1688 |
current_semantic = best_batch["batch_semantic"]
|
| 1689 |
current_metrics = best_batch["batch_metrics"]
|
| 1690 |
-
|
|
|
|
| 1691 |
stage_no_progress_steps = 0
|
| 1692 |
else:
|
| 1693 |
stage_no_progress_steps += 1
|
|
|
|
|
|
|
| 1694 |
applied_changes += 1
|
| 1695 |
batch_applied = True
|
| 1696 |
batch_info = {
|
|
@@ -1809,6 +1859,8 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1809 |
}
|
| 1810 |
)
|
| 1811 |
stage_no_progress_steps += 1
|
|
|
|
|
|
|
| 1812 |
if stage_no_progress_steps >= 3 and stage_idx < len(STAGE_ORDER) - 1:
|
| 1813 |
stage_idx += 1
|
| 1814 |
stage_no_progress_steps = 0
|
|
@@ -1838,10 +1890,13 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1838 |
current_analysis = best["analysis"]
|
| 1839 |
current_semantic = best["semantic"]
|
| 1840 |
current_metrics = best["metrics"]
|
| 1841 |
-
|
|
|
|
| 1842 |
stage_no_progress_steps = 0
|
| 1843 |
else:
|
| 1844 |
stage_no_progress_steps += 1
|
|
|
|
|
|
|
| 1845 |
applied_changes += 1
|
| 1846 |
queued_candidates = []
|
| 1847 |
|
|
|
|
| 254 |
return abs(target_count - competitor_avg) / max(competitor_avg, 1e-6)
|
| 255 |
|
| 256 |
|
| 257 |
+
def _keyword_unigram_set(keywords: List[str], language: str) -> set:
|
| 258 |
+
out = set()
|
| 259 |
+
for kw in keywords:
|
| 260 |
+
toks = _filter_stopwords(_tokenize(kw), language)
|
| 261 |
+
for t in toks:
|
| 262 |
+
out.add(t)
|
| 263 |
+
return out
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def _is_ngram_stage_candidate(
|
| 267 |
+
ngram_label: str,
|
| 268 |
+
comp_occurrence: int,
|
| 269 |
+
competitor_count: int,
|
| 270 |
+
keyword_unigrams: set,
|
| 271 |
+
) -> bool:
|
| 272 |
+
ngram = (ngram_label or "").strip().lower()
|
| 273 |
+
if not ngram:
|
| 274 |
+
return False
|
| 275 |
+
tokens = _tokenize(ngram)
|
| 276 |
+
n = len(tokens)
|
| 277 |
+
if competitor_count > 1:
|
| 278 |
+
if comp_occurrence < 2:
|
| 279 |
+
return False
|
| 280 |
+
if n >= 2:
|
| 281 |
+
# For multi-competitor mode, bi/tri-grams with K>=2 are always candidates.
|
| 282 |
+
return True
|
| 283 |
+
# Unigrams are candidates only if they belong to key phrases.
|
| 284 |
+
return n == 1 and tokens[0] in keyword_unigrams
|
| 285 |
+
# Single-competitor mode: keep broader eligibility.
|
| 286 |
+
return comp_occurrence >= 1
|
| 287 |
+
|
| 288 |
+
|
| 289 |
def _chunk_ngram_count(text: str, ngram_label: str, language: str) -> int:
|
| 290 |
toks = _filter_stopwords(_tokenize(text), language)
|
| 291 |
phrase_toks = _filter_stopwords(_tokenize(ngram_label), language)
|
|
|
|
| 325 |
|
| 326 |
ngram_signal_count = 0
|
| 327 |
ngram_gap_sum = 0.0
|
| 328 |
+
keyword_unigrams = _keyword_unigram_set(keywords, language)
|
| 329 |
ngrams = analysis.get("ngram_stats", {}) or {}
|
| 330 |
+
for bucket in ngrams.values():
|
| 331 |
+
if not isinstance(bucket, list):
|
| 332 |
+
continue
|
| 333 |
+
for item in bucket:
|
| 334 |
comp_occ = int(item.get("comp_occurrence", 0))
|
| 335 |
+
ngram_label = str(item.get("ngram", ""))
|
| 336 |
+
if not _is_ngram_stage_candidate(ngram_label, comp_occ, competitor_count, keyword_unigrams):
|
| 337 |
continue
|
| 338 |
target = float(item.get("target_count", 0))
|
| 339 |
comp_avg = float(item.get("competitor_avg", 0))
|
|
|
|
| 429 |
language: str,
|
| 430 |
stage: str = "bert",
|
| 431 |
bert_stage_target: float = BERT_TARGET_THRESHOLD,
|
| 432 |
+
stage_cursor: int = 0,
|
| 433 |
) -> Dict[str, Any]:
|
| 434 |
candidates: Dict[str, Dict[str, Any]] = {}
|
| 435 |
bert_details = analysis.get("bert_analysis", {}).get("detailed", []) or []
|
|
|
|
| 471 |
# N-gram balancing (toward competitor average with tolerance policy).
|
| 472 |
ngram_rows: List[Tuple[str, float, float, float, int, float]] = []
|
| 473 |
ngram_stats = analysis.get("ngram_stats", {}) or {}
|
| 474 |
+
competitor_count = len((analysis.get("word_counts", {}) or {}).get("competitors", []) or [])
|
| 475 |
+
keyword_unigrams = _keyword_unigram_set(keywords, language)
|
| 476 |
for bucket_name, bucket in ngram_stats.items():
|
| 477 |
if not isinstance(bucket, list):
|
| 478 |
continue
|
|
|
|
| 483 |
target = float(item.get("target_count", 0))
|
| 484 |
comp_avg = float(item.get("competitor_avg", 0))
|
| 485 |
comp_occ = int(item.get("comp_occurrence", 0))
|
| 486 |
+
if not _is_ngram_stage_candidate(ngram_label, comp_occ, competitor_count, keyword_unigrams):
|
| 487 |
continue
|
| 488 |
if not _is_ngram_outside_tolerance(target, comp_avg):
|
| 489 |
continue
|
|
|
|
| 492 |
ngram_rows.append((ngram_label, target, comp_avg, tol, comp_occ, dev_ratio))
|
| 493 |
if ngram_rows:
|
| 494 |
ngram_rows.sort(key=lambda x: (x[5], x[4], x[2]), reverse=True)
|
| 495 |
+
pick = max(0, int(stage_cursor)) % len(ngram_rows)
|
| 496 |
+
label, target, comp_avg, tol, _, _ = ngram_rows[pick]
|
| 497 |
candidates["ngram"] = {
|
| 498 |
"type": "ngram",
|
| 499 |
"label": label,
|
|
|
|
| 505 |
"ngram_lower_bound": round(comp_avg * (1.0 - tol), 3),
|
| 506 |
"ngram_upper_bound": round(comp_avg * (1.0 + tol), 3),
|
| 507 |
"ngram_direction": "increase" if target < comp_avg else "decrease",
|
| 508 |
+
"ngram_rank_index": pick,
|
| 509 |
+
"ngram_candidates_total": len(ngram_rows),
|
| 510 |
}
|
| 511 |
|
| 512 |
title_bert = analysis.get("title_analysis", {}).get("bert", {}) or {}
|
|
|
|
| 1246 |
queued_candidates: List[Dict[str, Any]] = []
|
| 1247 |
stage_idx = 0
|
| 1248 |
stage_no_progress_steps = 0
|
| 1249 |
+
stage_goal_cursor: Dict[str, int] = {}
|
| 1250 |
|
| 1251 |
for step in range(max_iterations):
|
| 1252 |
while stage_idx < len(STAGE_ORDER) and _is_stage_complete(
|
|
|
|
| 1266 |
language,
|
| 1267 |
stage=active_stage,
|
| 1268 |
bert_stage_target=bert_stage_target,
|
| 1269 |
+
stage_cursor=int(stage_goal_cursor.get(active_stage, 0)),
|
| 1270 |
)
|
| 1271 |
if goal["type"] == "none":
|
| 1272 |
stage_idx += 1
|
|
|
|
| 1589 |
current_analysis = best_local["analysis"]
|
| 1590 |
current_semantic = best_local["semantic"]
|
| 1591 |
current_metrics = best_local["metrics"]
|
| 1592 |
+
progressed_stage = _stage_primary_progress(active_stage, prev_metrics, current_metrics)
|
| 1593 |
+
if progressed_stage:
|
| 1594 |
stage_no_progress_steps = 0
|
| 1595 |
else:
|
| 1596 |
stage_no_progress_steps += 1
|
| 1597 |
+
if active_stage == "ngram":
|
| 1598 |
+
stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
|
| 1599 |
applied_changes += 1
|
| 1600 |
queued_candidates = []
|
| 1601 |
|
|
|
|
| 1734 |
current_analysis = best_batch["batch_analysis"]
|
| 1735 |
current_semantic = best_batch["batch_semantic"]
|
| 1736 |
current_metrics = best_batch["batch_metrics"]
|
| 1737 |
+
progressed_stage = _stage_primary_progress(active_stage, prev_metrics, current_metrics)
|
| 1738 |
+
if progressed_stage:
|
| 1739 |
stage_no_progress_steps = 0
|
| 1740 |
else:
|
| 1741 |
stage_no_progress_steps += 1
|
| 1742 |
+
if active_stage == "ngram":
|
| 1743 |
+
stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
|
| 1744 |
applied_changes += 1
|
| 1745 |
batch_applied = True
|
| 1746 |
batch_info = {
|
|
|
|
| 1859 |
}
|
| 1860 |
)
|
| 1861 |
stage_no_progress_steps += 1
|
| 1862 |
+
if active_stage == "ngram":
|
| 1863 |
+
stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
|
| 1864 |
if stage_no_progress_steps >= 3 and stage_idx < len(STAGE_ORDER) - 1:
|
| 1865 |
stage_idx += 1
|
| 1866 |
stage_no_progress_steps = 0
|
|
|
|
| 1890 |
current_analysis = best["analysis"]
|
| 1891 |
current_semantic = best["semantic"]
|
| 1892 |
current_metrics = best["metrics"]
|
| 1893 |
+
progressed_stage = _stage_primary_progress(active_stage, prev_metrics, current_metrics)
|
| 1894 |
+
if progressed_stage:
|
| 1895 |
stage_no_progress_steps = 0
|
| 1896 |
else:
|
| 1897 |
stage_no_progress_steps += 1
|
| 1898 |
+
if active_stage == "ngram":
|
| 1899 |
+
stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
|
| 1900 |
applied_changes += 1
|
| 1901 |
queued_candidates = []
|
| 1902 |
|
templates/index.html
CHANGED
|
@@ -1412,22 +1412,51 @@
|
|
| 1412 |
|
| 1413 |
// === 3) N-grams: сигнал только если 2+ конкурента ===
|
| 1414 |
const ngramSignals = [];
|
| 1415 |
-
const
|
| 1416 |
-
|
| 1417 |
-
|
| 1418 |
-
|
| 1419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1420 |
const compOcc = Number(item.comp_occurrence || 0);
|
| 1421 |
const targetCount = Number(item.target_count || 0);
|
| 1422 |
const compAvg = Number(item.competitor_avg || 0);
|
| 1423 |
-
if (
|
| 1424 |
-
|
| 1425 |
-
|
| 1426 |
ngramSignals.push({
|
| 1427 |
ngram: item.ngram,
|
| 1428 |
compOcc,
|
| 1429 |
targetCount,
|
| 1430 |
-
compAvg
|
|
|
|
| 1431 |
});
|
| 1432 |
});
|
| 1433 |
});
|
|
@@ -1439,7 +1468,7 @@
|
|
| 1439 |
if (ngramSignals.length > 0) {
|
| 1440 |
const topSignals = ngramSignals
|
| 1441 |
.slice()
|
| 1442 |
-
.sort((a, b) => (b.compOcc - a.compOcc) || (b.compAvg - a.compAvg))
|
| 1443 |
.slice(0, 10)
|
| 1444 |
.map(x => ({
|
| 1445 |
ngram: x.ngram,
|
|
@@ -1661,7 +1690,7 @@
|
|
| 1661 |
container.innerHTML = `
|
| 1662 |
<div class="stat-card">
|
| 1663 |
<h5 class="card-title mb-3">Итоговые рекомендации (что сделать в первую очередь)</h5>
|
| 1664 |
-
<p class="text-muted small mb-3">Сводка формируется по правилам: BERT < 0.70, BM25 remove ≥ 4, n-граммы
|
| 1665 |
${recCards}
|
| 1666 |
</div>
|
| 1667 |
<div class="stat-card">
|
|
|
|
| 1412 |
|
| 1413 |
// === 3) N-grams: сигнал только если 2+ конкурента ===
|
| 1414 |
const ngramSignals = [];
|
| 1415 |
+
const ngramStats = analysisData.ngram_stats || {};
|
| 1416 |
+
const kwUnigrams = new Set();
|
| 1417 |
+
keywordsRaw.forEach(kw => {
|
| 1418 |
+
String(kw || '')
|
| 1419 |
+
.toLowerCase()
|
| 1420 |
+
.replace(/[^\p{L}\p{N}\s-]+/gu, ' ')
|
| 1421 |
+
.split(/\s+/)
|
| 1422 |
+
.map(v => v.trim())
|
| 1423 |
+
.filter(v => v.length >= 2)
|
| 1424 |
+
.forEach(t => kwUnigrams.add(t));
|
| 1425 |
+
});
|
| 1426 |
+
const isOutsideTolerance = (targetCount, compAvg) => {
|
| 1427 |
+
if (compAvg <= 0) return false;
|
| 1428 |
+
const tol = compAvg >= 4 ? 0.20 : 0.50;
|
| 1429 |
+
return targetCount < compAvg * (1 - tol) || targetCount > compAvg * (1 + tol);
|
| 1430 |
+
};
|
| 1431 |
+
const isEligibleNgram = (ngram, compOcc) => {
|
| 1432 |
+
const toks = String(ngram || '')
|
| 1433 |
+
.toLowerCase()
|
| 1434 |
+
.replace(/[^\p{L}\p{N}\s-]+/gu, ' ')
|
| 1435 |
+
.split(/\s+/)
|
| 1436 |
+
.map(v => v.trim())
|
| 1437 |
+
.filter(v => v.length >= 2);
|
| 1438 |
+
if (!toks.length) return false;
|
| 1439 |
+
if (competitorCount > 1) {
|
| 1440 |
+
if (compOcc < 2) return false;
|
| 1441 |
+
if (toks.length >= 2) return true; // bi/tri-grams
|
| 1442 |
+
return kwUnigrams.has(toks[0]); // unigram from keyword phrase
|
| 1443 |
+
}
|
| 1444 |
+
return compOcc >= 1;
|
| 1445 |
+
};
|
| 1446 |
+
Object.values(ngramStats).forEach(bucket => {
|
| 1447 |
+
(Array.isArray(bucket) ? bucket : []).forEach(item => {
|
| 1448 |
const compOcc = Number(item.comp_occurrence || 0);
|
| 1449 |
const targetCount = Number(item.target_count || 0);
|
| 1450 |
const compAvg = Number(item.competitor_avg || 0);
|
| 1451 |
+
if (!isEligibleNgram(item.ngram, compOcc)) return;
|
| 1452 |
+
if (!isOutsideTolerance(targetCount, compAvg)) return;
|
| 1453 |
+
const devRatio = compAvg > 0 ? Math.abs(targetCount - compAvg) / compAvg : 0;
|
| 1454 |
ngramSignals.push({
|
| 1455 |
ngram: item.ngram,
|
| 1456 |
compOcc,
|
| 1457 |
targetCount,
|
| 1458 |
+
compAvg,
|
| 1459 |
+
devRatio
|
| 1460 |
});
|
| 1461 |
});
|
| 1462 |
});
|
|
|
|
| 1468 |
if (ngramSignals.length > 0) {
|
| 1469 |
const topSignals = ngramSignals
|
| 1470 |
.slice()
|
| 1471 |
+
.sort((a, b) => (b.devRatio - a.devRatio) || (b.compOcc - a.compOcc) || (b.compAvg - a.compAvg))
|
| 1472 |
.slice(0, 10)
|
| 1473 |
.map(x => ({
|
| 1474 |
ngram: x.ngram,
|
|
|
|
| 1690 |
container.innerHTML = `
|
| 1691 |
<div class="stat-card">
|
| 1692 |
<h5 class="card-title mb-3">Итоговые рекомендации (что сделать в первую очередь)</h5>
|
| 1693 |
+
<p class="text-muted small mb-3">Сводка формируется по правилам: BERT < 0.70, BM25 remove ≥ 4, n-граммы по допускам (±20% при Avg≥4, ±50% при Avg<4) с фильтром K>=2 для multi-competitor, Title BERT < 0.65, Semantic Core-разрыв по словам из ключей.</p>
|
| 1694 |
${recCards}
|
| 1695 |
</div>
|
| 1696 |
<div class="stat-card">
|