Spaces:

lsdf
/

ai-seo-analyzer

Running

App Files Files Community

ываваы

by coingimp - opened 24 days ago

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+25

-99

This PR is in draft mode

Files changed (3) hide show

app.py +1 -1
main.py +1 -1
optimizer.py +23 -97

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ async def startup_event():
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
-    return templates.TemplateResponse(request, "index.html")
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_text(request: AnalysisRequest):

 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_text(request: AnalysisRequest):

main.py CHANGED Viewed

@@ -25,7 +25,7 @@ async def startup_event():
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
     # Рендерим файл index.html
-    return templates.TemplateResponse(request, "index.html")
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_text(request: AnalysisRequest):

 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
     # Рендерим файл index.html
+    return templates.TemplateResponse("index.html", {"request": request})
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_text(request: AnalysisRequest):

optimizer.py CHANGED Viewed

@@ -1129,46 +1129,6 @@ def _choose_edit_span(
     return operation, span_start, span_end, sent_idx, variant_pick
-def _portfolio_span_trials(goal_type: str, cascade_level: int, eff_candidates: int) -> int:
-    """Try more distinct spans for hard local goals without exploding candidate count."""
-    t = (goal_type or "").strip().lower()
-    base = 2 if cascade_level <= 2 else 3
-    if t in {"bert", "ngram"}:
-        base += 1
-    if cascade_level >= 3 and t in {"bert", "ngram", "semantic"}:
-        base += 1
-    return max(1, min(5, max(base, min(3, int(eff_candidates or 1)))))
-def _local_prefilter_reasons(
-    *,
-    goal_type: str,
-    focus_terms: List[str],
-    edited_text: str,
-    chunk_delta: float,
-    before_rel: float,
-    after_rel: float,
-) -> List[str]:
-    """
-    Cheap local gate before the expensive full document metric rebuild.
-    It rejects only candidates that cannot plausibly help the current goal.
-    """
-    t = (goal_type or "").strip().lower()
-    reasons: List[str] = []
-    if t == "bm25" and chunk_delta <= 0:
-        reasons.append("local_bm25_term_not_reduced")
-    elif t == "ngram" and chunk_delta <= 0:
-        reasons.append("local_ngram_not_closer_to_target")
-    elif t == "bert" and after_rel < (before_rel - 0.003):
-        reasons.append("local_bert_relevance_dropped")
-    elif t == "semantic":
-        edited_l = (edited_text or "").lower()
-        has_focus = any(str(t).strip().lower() in edited_l for t in (focus_terms or []) if str(t).strip())
-        if chunk_delta <= 0 and not has_focus:
-            reasons.append("local_semantic_focus_missing")
-    return reasons
 def _is_noise_like_sentence(text: str) -> bool:
     s = (text or "").strip()
     if not s:
@@ -1982,19 +1942,6 @@ def optimize_text(
     def _cancelled() -> bool:
         return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
-    candidate_eval_cache: Dict[Tuple[str, str], Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]] = {}
-    def _evaluate_candidate_state(body_text: str, title_text: str) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
-        key = ((body_text or "").strip(), (title_text or "").strip())
-        cached = candidate_eval_cache.get(key)
-        if cached is not None:
-            return cached
-        analysis = _build_analysis_snapshot(key[0], competitors, keywords, language, key[1], competitor_titles)
-        semantic = _build_semantic_snapshot(key[0], competitors, language)
-        metrics = _compute_metrics(analysis, semantic, keywords, language, bert_stage_target=bert_stage_target)
-        candidate_eval_cache[key] = (analysis, semantic, metrics)
-        return analysis, semantic, metrics
     def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
         # Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
         _ta = (current_analysis or {}).get("title_analysis") or {}
@@ -2230,7 +2177,13 @@ def optimize_text(
                         continue
                     quality_issues = _validate_title_candidate(edited_text)
-                    cand_analysis, cand_semantic, cand_metrics = _evaluate_candidate_state(current_text, edited_text)
                     before_rel = float(current_metrics.get("title_bert_score") or 0.0)
                     after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
                     chunk_delta = round(after_rel - before_rel, 4)
@@ -2386,10 +2339,8 @@ def optimize_text(
                 logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
                 break
-            span_trials = _portfolio_span_trials(str(goal.get("type", "")), cascade_level, eff_cand)
-            local_candidates = max(eff_cand, span_trials)
-            if cascade_level > 2:
-                local_candidates = min(6, max(local_candidates, eff_cand + 1))
             span_trials_eff = span_trials
             for st in range(span_trials):
@@ -2531,43 +2482,6 @@ def optimize_text(
                             )
                             continue
-                        prefilter_reasons = _local_prefilter_reasons(
-                            goal_type=str(goal.get("type", "")),
-                            focus_terms=goal.get("focus_terms", []) or [],
-                            edited_text=edited_text,
-                            chunk_delta=chunk_delta,
-                            before_rel=before_rel,
-                            after_rel=after_rel,
-                        )
-                        if prefilter_reasons:
-                            candidates.append(
-                                {
-                                    "candidate_index": candidate_idx,
-                                    "error": "local_prefilter_rejected",
-                                    "valid": False,
-                                    "goal_improved": False,
-                                    "local_chunk_improved": local_chunk_improved,
-                                    "chunk_goal_delta": chunk_delta,
-                                    "invalid_reasons": prefilter_reasons,
-                                    "delta_score": -999.0,
-                                    "candidate_score": None,
-                                    "sentence_after": edited_text,
-                                    "chunk_relevance_before": before_rel,
-                                    "chunk_relevance_after": after_rel,
-                                    "term_diff": _term_diff(original_span_text, edited_text, language),
-                                    "llm_prompt_debug": prompt_debug,
-                                    "llm_rationale": llm_rationale,
-                                    "operation": operation,
-                                    "sentence_index": sent_idx,
-                                    "span_start": span_start,
-                                    "span_end": span_end,
-                                    "span_variant": span_variant,
-                                    "phrase_strategy_used": strategy_variant,
-                                    "sentence_before": original_span_text,
-                                }
-                            )
-                            continue
                         candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
                         if candidate_key in seen_candidate_rewrites:
                             candidates.append(
@@ -2604,7 +2518,13 @@ def optimize_text(
                             candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
                         candidate_text = " ".join(candidate_sentences).strip()
-                        cand_analysis, cand_semantic, cand_metrics = _evaluate_candidate_state(candidate_text, current_title)
                         valid, invalid_reasons, goal_improved = _is_candidate_valid(
                             current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
                         )
@@ -2840,7 +2760,13 @@ def optimize_text(
                         continue
                     batch_sentences = _apply_edits_to_sentences(sentences, edits)
                     batch_text = " ".join(batch_sentences).strip()
-                    batch_analysis, batch_semantic, batch_metrics = _evaluate_candidate_state(batch_text, current_title)
                     b_valid, b_reasons, b_goal = _is_candidate_valid(
                         current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
                     )

     return operation, span_start, span_end, sent_idx, variant_pick
 def _is_noise_like_sentence(text: str) -> bool:
     s = (text or "").strip()
     if not s:
     def _cancelled() -> bool:
         return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
     def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
         # Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
         _ta = (current_analysis or {}).get("title_analysis") or {}
                         continue
                     quality_issues = _validate_title_candidate(edited_text)
+                    cand_analysis = _build_analysis_snapshot(
+                        current_text, competitors, keywords, language, edited_text, competitor_titles
+                    )
+                    cand_semantic = _build_semantic_snapshot(current_text, competitors, language)
+                    cand_metrics = _compute_metrics(
+                        cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
+                    )
                     before_rel = float(current_metrics.get("title_bert_score") or 0.0)
                     after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
                     chunk_delta = round(after_rel - before_rel, 4)
                 logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
                 break
+            span_trials = 2 if cascade_level <= 2 else 3
+            local_candidates = eff_cand if cascade_level <= 2 else min(6, eff_cand + 1)
             span_trials_eff = span_trials
             for st in range(span_trials):
                             )
                             continue
                         candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
                         if candidate_key in seen_candidate_rewrites:
                             candidates.append(
                             candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
                         candidate_text = " ".join(candidate_sentences).strip()
+                        cand_analysis = _build_analysis_snapshot(
+                            candidate_text, competitors, keywords, language, current_title, competitor_titles
+                        )
+                        cand_semantic = _build_semantic_snapshot(candidate_text, competitors, language)
+                        cand_metrics = _compute_metrics(
+                            cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
+                        )
                         valid, invalid_reasons, goal_improved = _is_candidate_valid(
                             current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
                         )
                         continue
                     batch_sentences = _apply_edits_to_sentences(sentences, edits)
                     batch_text = " ".join(batch_sentences).strip()
+                    batch_analysis = _build_analysis_snapshot(
+                        batch_text, competitors, keywords, language, current_title, competitor_titles
+                    )
+                    batch_semantic = _build_semantic_snapshot(batch_text, competitors, language)
+                    batch_metrics = _compute_metrics(
+                        batch_analysis, batch_semantic, keywords, language, bert_stage_target=bert_stage_target
+                    )
                     b_valid, b_reasons, b_goal = _is_candidate_valid(
                         current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
                     )