ываваы

#4
by coingimp - opened
Files changed (3) hide show
  1. app.py +1 -1
  2. main.py +1 -1
  3. optimizer.py +23 -97
app.py CHANGED
@@ -58,7 +58,7 @@ async def startup_event():
58
 
59
  @app.get("/", response_class=HTMLResponse)
60
  async def read_root(request: Request):
61
- return templates.TemplateResponse(request, "index.html")
62
 
63
  @app.post("/analyze", response_model=AnalysisResponse)
64
  async def analyze_text(request: AnalysisRequest):
 
58
 
59
  @app.get("/", response_class=HTMLResponse)
60
  async def read_root(request: Request):
61
+ return templates.TemplateResponse("index.html", {"request": request})
62
 
63
  @app.post("/analyze", response_model=AnalysisResponse)
64
  async def analyze_text(request: AnalysisRequest):
main.py CHANGED
@@ -25,7 +25,7 @@ async def startup_event():
25
  @app.get("/", response_class=HTMLResponse)
26
  async def read_root(request: Request):
27
  # Рендерим файл index.html
28
- return templates.TemplateResponse(request, "index.html")
29
 
30
  @app.post("/analyze", response_model=AnalysisResponse)
31
  async def analyze_text(request: AnalysisRequest):
 
25
  @app.get("/", response_class=HTMLResponse)
26
  async def read_root(request: Request):
27
  # Рендерим файл index.html
28
+ return templates.TemplateResponse("index.html", {"request": request})
29
 
30
  @app.post("/analyze", response_model=AnalysisResponse)
31
  async def analyze_text(request: AnalysisRequest):
optimizer.py CHANGED
@@ -1129,46 +1129,6 @@ def _choose_edit_span(
1129
  return operation, span_start, span_end, sent_idx, variant_pick
1130
 
1131
 
1132
- def _portfolio_span_trials(goal_type: str, cascade_level: int, eff_candidates: int) -> int:
1133
- """Try more distinct spans for hard local goals without exploding candidate count."""
1134
- t = (goal_type or "").strip().lower()
1135
- base = 2 if cascade_level <= 2 else 3
1136
- if t in {"bert", "ngram"}:
1137
- base += 1
1138
- if cascade_level >= 3 and t in {"bert", "ngram", "semantic"}:
1139
- base += 1
1140
- return max(1, min(5, max(base, min(3, int(eff_candidates or 1)))))
1141
-
1142
-
1143
- def _local_prefilter_reasons(
1144
- *,
1145
- goal_type: str,
1146
- focus_terms: List[str],
1147
- edited_text: str,
1148
- chunk_delta: float,
1149
- before_rel: float,
1150
- after_rel: float,
1151
- ) -> List[str]:
1152
- """
1153
- Cheap local gate before the expensive full document metric rebuild.
1154
- It rejects only candidates that cannot plausibly help the current goal.
1155
- """
1156
- t = (goal_type or "").strip().lower()
1157
- reasons: List[str] = []
1158
- if t == "bm25" and chunk_delta <= 0:
1159
- reasons.append("local_bm25_term_not_reduced")
1160
- elif t == "ngram" and chunk_delta <= 0:
1161
- reasons.append("local_ngram_not_closer_to_target")
1162
- elif t == "bert" and after_rel < (before_rel - 0.003):
1163
- reasons.append("local_bert_relevance_dropped")
1164
- elif t == "semantic":
1165
- edited_l = (edited_text or "").lower()
1166
- has_focus = any(str(t).strip().lower() in edited_l for t in (focus_terms or []) if str(t).strip())
1167
- if chunk_delta <= 0 and not has_focus:
1168
- reasons.append("local_semantic_focus_missing")
1169
- return reasons
1170
-
1171
-
1172
  def _is_noise_like_sentence(text: str) -> bool:
1173
  s = (text or "").strip()
1174
  if not s:
@@ -1982,19 +1942,6 @@ def optimize_text(
1982
  def _cancelled() -> bool:
1983
  return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
1984
 
1985
- candidate_eval_cache: Dict[Tuple[str, str], Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]] = {}
1986
-
1987
- def _evaluate_candidate_state(body_text: str, title_text: str) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
1988
- key = ((body_text or "").strip(), (title_text or "").strip())
1989
- cached = candidate_eval_cache.get(key)
1990
- if cached is not None:
1991
- return cached
1992
- analysis = _build_analysis_snapshot(key[0], competitors, keywords, language, key[1], competitor_titles)
1993
- semantic = _build_semantic_snapshot(key[0], competitors, language)
1994
- metrics = _compute_metrics(analysis, semantic, keywords, language, bert_stage_target=bert_stage_target)
1995
- candidate_eval_cache[key] = (analysis, semantic, metrics)
1996
- return analysis, semantic, metrics
1997
-
1998
  def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
1999
  # Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
2000
  _ta = (current_analysis or {}).get("title_analysis") or {}
@@ -2230,7 +2177,13 @@ def optimize_text(
2230
  continue
2231
 
2232
  quality_issues = _validate_title_candidate(edited_text)
2233
- cand_analysis, cand_semantic, cand_metrics = _evaluate_candidate_state(current_text, edited_text)
 
 
 
 
 
 
2234
  before_rel = float(current_metrics.get("title_bert_score") or 0.0)
2235
  after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
2236
  chunk_delta = round(after_rel - before_rel, 4)
@@ -2386,10 +2339,8 @@ def optimize_text(
2386
  logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
2387
  break
2388
 
2389
- span_trials = _portfolio_span_trials(str(goal.get("type", "")), cascade_level, eff_cand)
2390
- local_candidates = max(eff_cand, span_trials)
2391
- if cascade_level > 2:
2392
- local_candidates = min(6, max(local_candidates, eff_cand + 1))
2393
  span_trials_eff = span_trials
2394
 
2395
  for st in range(span_trials):
@@ -2531,43 +2482,6 @@ def optimize_text(
2531
  )
2532
  continue
2533
 
2534
- prefilter_reasons = _local_prefilter_reasons(
2535
- goal_type=str(goal.get("type", "")),
2536
- focus_terms=goal.get("focus_terms", []) or [],
2537
- edited_text=edited_text,
2538
- chunk_delta=chunk_delta,
2539
- before_rel=before_rel,
2540
- after_rel=after_rel,
2541
- )
2542
- if prefilter_reasons:
2543
- candidates.append(
2544
- {
2545
- "candidate_index": candidate_idx,
2546
- "error": "local_prefilter_rejected",
2547
- "valid": False,
2548
- "goal_improved": False,
2549
- "local_chunk_improved": local_chunk_improved,
2550
- "chunk_goal_delta": chunk_delta,
2551
- "invalid_reasons": prefilter_reasons,
2552
- "delta_score": -999.0,
2553
- "candidate_score": None,
2554
- "sentence_after": edited_text,
2555
- "chunk_relevance_before": before_rel,
2556
- "chunk_relevance_after": after_rel,
2557
- "term_diff": _term_diff(original_span_text, edited_text, language),
2558
- "llm_prompt_debug": prompt_debug,
2559
- "llm_rationale": llm_rationale,
2560
- "operation": operation,
2561
- "sentence_index": sent_idx,
2562
- "span_start": span_start,
2563
- "span_end": span_end,
2564
- "span_variant": span_variant,
2565
- "phrase_strategy_used": strategy_variant,
2566
- "sentence_before": original_span_text,
2567
- }
2568
- )
2569
- continue
2570
-
2571
  candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
2572
  if candidate_key in seen_candidate_rewrites:
2573
  candidates.append(
@@ -2604,7 +2518,13 @@ def optimize_text(
2604
  candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
2605
  candidate_text = " ".join(candidate_sentences).strip()
2606
 
2607
- cand_analysis, cand_semantic, cand_metrics = _evaluate_candidate_state(candidate_text, current_title)
 
 
 
 
 
 
2608
  valid, invalid_reasons, goal_improved = _is_candidate_valid(
2609
  current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
2610
  )
@@ -2840,7 +2760,13 @@ def optimize_text(
2840
  continue
2841
  batch_sentences = _apply_edits_to_sentences(sentences, edits)
2842
  batch_text = " ".join(batch_sentences).strip()
2843
- batch_analysis, batch_semantic, batch_metrics = _evaluate_candidate_state(batch_text, current_title)
 
 
 
 
 
 
2844
  b_valid, b_reasons, b_goal = _is_candidate_valid(
2845
  current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
2846
  )
 
1129
  return operation, span_start, span_end, sent_idx, variant_pick
1130
 
1131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132
  def _is_noise_like_sentence(text: str) -> bool:
1133
  s = (text or "").strip()
1134
  if not s:
 
1942
  def _cancelled() -> bool:
1943
  return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
1944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1945
  def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
1946
  # Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
1947
  _ta = (current_analysis or {}).get("title_analysis") or {}
 
2177
  continue
2178
 
2179
  quality_issues = _validate_title_candidate(edited_text)
2180
+ cand_analysis = _build_analysis_snapshot(
2181
+ current_text, competitors, keywords, language, edited_text, competitor_titles
2182
+ )
2183
+ cand_semantic = _build_semantic_snapshot(current_text, competitors, language)
2184
+ cand_metrics = _compute_metrics(
2185
+ cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
2186
+ )
2187
  before_rel = float(current_metrics.get("title_bert_score") or 0.0)
2188
  after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
2189
  chunk_delta = round(after_rel - before_rel, 4)
 
2339
  logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
2340
  break
2341
 
2342
+ span_trials = 2 if cascade_level <= 2 else 3
2343
+ local_candidates = eff_cand if cascade_level <= 2 else min(6, eff_cand + 1)
 
 
2344
  span_trials_eff = span_trials
2345
 
2346
  for st in range(span_trials):
 
2482
  )
2483
  continue
2484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2485
  candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
2486
  if candidate_key in seen_candidate_rewrites:
2487
  candidates.append(
 
2518
  candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
2519
  candidate_text = " ".join(candidate_sentences).strip()
2520
 
2521
+ cand_analysis = _build_analysis_snapshot(
2522
+ candidate_text, competitors, keywords, language, current_title, competitor_titles
2523
+ )
2524
+ cand_semantic = _build_semantic_snapshot(candidate_text, competitors, language)
2525
+ cand_metrics = _compute_metrics(
2526
+ cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
2527
+ )
2528
  valid, invalid_reasons, goal_improved = _is_candidate_valid(
2529
  current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
2530
  )
 
2760
  continue
2761
  batch_sentences = _apply_edits_to_sentences(sentences, edits)
2762
  batch_text = " ".join(batch_sentences).strip()
2763
+ batch_analysis = _build_analysis_snapshot(
2764
+ batch_text, competitors, keywords, language, current_title, competitor_titles
2765
+ )
2766
+ batch_semantic = _build_semantic_snapshot(batch_text, competitors, language)
2767
+ batch_metrics = _compute_metrics(
2768
+ batch_analysis, batch_semantic, keywords, language, bert_stage_target=bert_stage_target
2769
+ )
2770
  b_valid, b_reasons, b_goal = _is_candidate_valid(
2771
  current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
2772
  )