Spaces:

lsdf
/

ai-seo-analyzer

Running

lsdf commited on about 1 month ago

Commit

a52efa4

1 Parent(s): f364498

Enforce per-term ngram loop and underrepresented-only targeting.

Limit n-gram stage targets to underrepresented terms, run three attempts per n-gram before advancing, and document the per-term attempt policy in optimizer principles.

Made-with: Cursor

Files changed (2) hide show

docs/TEXT_OPTIMIZER_PRINCIPLES.md +3 -2
optimizer.py +43 -25

docs/TEXT_OPTIMIZER_PRINCIPLES.md CHANGED Viewed

@@ -45,8 +45,9 @@ Update it whenever optimization policy changes.
   - bi-grams and tri-grams are eligible when present in `>= 2` competitors;
   - unigrams are eligible only if they are part of user keyword phrases and present in `>= 2` competitors.
 - Iteration behavior:
-  - optimizer may work on one n-gram at a time per step,
-  - if no primary progress on current n-gram target, it rotates to the next eligible n-gram candidate.
 ## 5.1 Summary logic memory (current)

   - bi-grams and tri-grams are eligible when present in `>= 2` competitors;
   - unigrams are eligible only if they are part of user keyword phrases and present in `>= 2` competitors.
 - Iteration behavior:
+  - optimizer works on one n-gram target at a time per step;
+  - per eligible n-gram target it allocates `3` attempts, then moves to the next target;
+  - if target list ends, stage advances to the next optimization stage.
 ## 5.1 Summary logic memory (current)

optimizer.py CHANGED Viewed

@@ -26,6 +26,7 @@ TITLE_TARGET_THRESHOLD = 0.65
 SEMANTIC_GAP_TOLERANCE_PCT = 0.15
 SEMANTIC_GAP_MIN_ABS = 3.0
 STAGE_ORDER = ["bert", "bm25", "ngram", "semantic", "title"]
 def _tokenize(text: str) -> List[str]:
@@ -487,27 +488,34 @@ def _choose_optimization_goal(
                 continue
             if not _is_ngram_outside_tolerance(target, comp_avg):
                 continue
             tol = _ngram_tolerance_pct(comp_avg)
             dev_ratio = _ngram_deviation_ratio(target, comp_avg)
             ngram_rows.append((ngram_label, target, comp_avg, tol, comp_occ, dev_ratio))
     if ngram_rows:
         ngram_rows.sort(key=lambda x: (x[5], x[4], x[2]), reverse=True)
-        pick = max(0, int(stage_cursor)) % len(ngram_rows)
-        label, target, comp_avg, tol, _, _ = ngram_rows[pick]
-        candidates["ngram"] = {
-            "type": "ngram",
-            "label": label,
-            "focus_terms": [label],
-            "avoid_terms": [],
-            "ngram_target_count": target,
-            "ngram_comp_avg": comp_avg,
-            "ngram_tolerance_pct": tol,
-            "ngram_lower_bound": round(comp_avg * (1.0 - tol), 3),
-            "ngram_upper_bound": round(comp_avg * (1.0 + tol), 3),
-            "ngram_direction": "increase" if target < comp_avg else "decrease",
-            "ngram_rank_index": pick,
-            "ngram_candidates_total": len(ngram_rows),
-        }
     title_bert = analysis.get("title_analysis", {}).get("bert", {}) or {}
     title_target_score = title_bert.get("target_score")
@@ -1110,6 +1118,16 @@ def _is_stage_complete(stage: str, metrics: Dict[str, Any], bert_stage_target: f
     return True
 def _candidate_utility(
     *,
     prev_metrics: Dict[str, Any],
@@ -1246,7 +1264,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
     queued_candidates: List[Dict[str, Any]] = []
     stage_idx = 0
     stage_no_progress_steps = 0
-    stage_goal_cursor: Dict[str, int] = {}
     for step in range(max_iterations):
         while stage_idx < len(STAGE_ORDER) and _is_stage_complete(
@@ -1266,7 +1284,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
             language,
             stage=active_stage,
             bert_stage_target=bert_stage_target,
-            stage_cursor=int(stage_goal_cursor.get(active_stage, 0)),
         )
         if goal["type"] == "none":
             stage_idx += 1
@@ -1594,8 +1612,8 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
                     stage_no_progress_steps = 0
                 else:
                     stage_no_progress_steps += 1
-                    if active_stage == "ngram":
-                        stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
                 applied_changes += 1
                 queued_candidates = []
@@ -1739,8 +1757,8 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
                         stage_no_progress_steps = 0
                     else:
                         stage_no_progress_steps += 1
-                        if active_stage == "ngram":
-                            stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
                     applied_changes += 1
                     batch_applied = True
                     batch_info = {
@@ -1860,7 +1878,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
             )
             stage_no_progress_steps += 1
             if active_stage == "ngram":
-                stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
             if stage_no_progress_steps >= 3 and stage_idx < len(STAGE_ORDER) - 1:
                 stage_idx += 1
                 stage_no_progress_steps = 0
@@ -1895,8 +1913,8 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
             stage_no_progress_steps = 0
         else:
             stage_no_progress_steps += 1
-            if active_stage == "ngram":
-                stage_goal_cursor[active_stage] = int(stage_goal_cursor.get(active_stage, 0)) + 1
         applied_changes += 1
         queued_candidates = []

 SEMANTIC_GAP_TOLERANCE_PCT = 0.15
 SEMANTIC_GAP_MIN_ABS = 3.0
 STAGE_ORDER = ["bert", "bm25", "ngram", "semantic", "title"]
+NGRAM_ATTEMPTS_PER_TERM = 3
 def _tokenize(text: str) -> List[str]:
                 continue
             if not _is_ngram_outside_tolerance(target, comp_avg):
                 continue
+            # N-gram stage is for underrepresented terms only.
+            if target >= comp_avg:
+                continue
             tol = _ngram_tolerance_pct(comp_avg)
             dev_ratio = _ngram_deviation_ratio(target, comp_avg)
             ngram_rows.append((ngram_label, target, comp_avg, tol, comp_occ, dev_ratio))
     if ngram_rows:
         ngram_rows.sort(key=lambda x: (x[5], x[4], x[2]), reverse=True)
+        pick = max(0, int(stage_cursor))
+        if pick >= len(ngram_rows):
+            # No more n-gram targets in current stage cursor window.
+            pass
+        else:
+            label, target, comp_avg, tol, _, _ = ngram_rows[pick]
+            candidates["ngram"] = {
+                "type": "ngram",
+                "label": label,
+                "focus_terms": [label],
+                "avoid_terms": [],
+                "ngram_target_count": target,
+                "ngram_comp_avg": comp_avg,
+                "ngram_tolerance_pct": tol,
+                "ngram_lower_bound": round(comp_avg * (1.0 - tol), 3),
+                "ngram_upper_bound": round(comp_avg * (1.0 + tol), 3),
+                "ngram_direction": "increase" if target < comp_avg else "decrease",
+                "ngram_rank_index": pick,
+                "ngram_candidates_total": len(ngram_rows),
+            }
     title_bert = analysis.get("title_analysis", {}).get("bert", {}) or {}
     title_target_score = title_bert.get("target_score")
     return True
+def _advance_ngram_term_cursor(cursor_state: Dict[str, Dict[str, int]], stage_key: str) -> None:
+    state = cursor_state.get(stage_key) or {"term_index": 0, "attempt_count": 0}
+    attempts = int(state.get("attempt_count", 0)) + 1
+    term_index = int(state.get("term_index", 0))
+    if attempts >= NGRAM_ATTEMPTS_PER_TERM:
+        term_index += 1
+        attempts = 0
+    cursor_state[stage_key] = {"term_index": term_index, "attempt_count": attempts}
 def _candidate_utility(
     *,
     prev_metrics: Dict[str, Any],
     queued_candidates: List[Dict[str, Any]] = []
     stage_idx = 0
     stage_no_progress_steps = 0
+    stage_goal_cursor: Dict[str, Dict[str, int]] = {}
     for step in range(max_iterations):
         while stage_idx < len(STAGE_ORDER) and _is_stage_complete(
             language,
             stage=active_stage,
             bert_stage_target=bert_stage_target,
+            stage_cursor=int((stage_goal_cursor.get(active_stage) or {}).get("term_index", 0)),
         )
         if goal["type"] == "none":
             stage_idx += 1
                     stage_no_progress_steps = 0
                 else:
                     stage_no_progress_steps += 1
+                if active_stage == "ngram":
+                    _advance_ngram_term_cursor(stage_goal_cursor, active_stage)
                 applied_changes += 1
                 queued_candidates = []
                         stage_no_progress_steps = 0
                     else:
                         stage_no_progress_steps += 1
+                    if active_stage == "ngram":
+                        _advance_ngram_term_cursor(stage_goal_cursor, active_stage)
                     applied_changes += 1
                     batch_applied = True
                     batch_info = {
             )
             stage_no_progress_steps += 1
             if active_stage == "ngram":
+                _advance_ngram_term_cursor(stage_goal_cursor, active_stage)
             if stage_no_progress_steps >= 3 and stage_idx < len(STAGE_ORDER) - 1:
                 stage_idx += 1
                 stage_no_progress_steps = 0
             stage_no_progress_steps = 0
         else:
             stage_no_progress_steps += 1
+        if active_stage == "ngram":
+            _advance_ngram_term_cursor(stage_goal_cursor, active_stage)
         applied_changes += 1
         queued_candidates = []