Spaces:
Running
Running
ываваы
#4
by coingimp - opened
- app.py +1 -1
- main.py +1 -1
- optimizer.py +23 -97
app.py
CHANGED
|
@@ -58,7 +58,7 @@ async def startup_event():
|
|
| 58 |
|
| 59 |
@app.get("/", response_class=HTMLResponse)
|
| 60 |
async def read_root(request: Request):
|
| 61 |
-
return templates.TemplateResponse(
|
| 62 |
|
| 63 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 64 |
async def analyze_text(request: AnalysisRequest):
|
|
|
|
| 58 |
|
| 59 |
@app.get("/", response_class=HTMLResponse)
|
| 60 |
async def read_root(request: Request):
|
| 61 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 62 |
|
| 63 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 64 |
async def analyze_text(request: AnalysisRequest):
|
main.py
CHANGED
|
@@ -25,7 +25,7 @@ async def startup_event():
|
|
| 25 |
@app.get("/", response_class=HTMLResponse)
|
| 26 |
async def read_root(request: Request):
|
| 27 |
# Рендерим файл index.html
|
| 28 |
-
return templates.TemplateResponse(
|
| 29 |
|
| 30 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 31 |
async def analyze_text(request: AnalysisRequest):
|
|
|
|
| 25 |
@app.get("/", response_class=HTMLResponse)
|
| 26 |
async def read_root(request: Request):
|
| 27 |
# Рендерим файл index.html
|
| 28 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 29 |
|
| 30 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 31 |
async def analyze_text(request: AnalysisRequest):
|
optimizer.py
CHANGED
|
@@ -1129,46 +1129,6 @@ def _choose_edit_span(
|
|
| 1129 |
return operation, span_start, span_end, sent_idx, variant_pick
|
| 1130 |
|
| 1131 |
|
| 1132 |
-
def _portfolio_span_trials(goal_type: str, cascade_level: int, eff_candidates: int) -> int:
|
| 1133 |
-
"""Try more distinct spans for hard local goals without exploding candidate count."""
|
| 1134 |
-
t = (goal_type or "").strip().lower()
|
| 1135 |
-
base = 2 if cascade_level <= 2 else 3
|
| 1136 |
-
if t in {"bert", "ngram"}:
|
| 1137 |
-
base += 1
|
| 1138 |
-
if cascade_level >= 3 and t in {"bert", "ngram", "semantic"}:
|
| 1139 |
-
base += 1
|
| 1140 |
-
return max(1, min(5, max(base, min(3, int(eff_candidates or 1)))))
|
| 1141 |
-
|
| 1142 |
-
|
| 1143 |
-
def _local_prefilter_reasons(
|
| 1144 |
-
*,
|
| 1145 |
-
goal_type: str,
|
| 1146 |
-
focus_terms: List[str],
|
| 1147 |
-
edited_text: str,
|
| 1148 |
-
chunk_delta: float,
|
| 1149 |
-
before_rel: float,
|
| 1150 |
-
after_rel: float,
|
| 1151 |
-
) -> List[str]:
|
| 1152 |
-
"""
|
| 1153 |
-
Cheap local gate before the expensive full document metric rebuild.
|
| 1154 |
-
It rejects only candidates that cannot plausibly help the current goal.
|
| 1155 |
-
"""
|
| 1156 |
-
t = (goal_type or "").strip().lower()
|
| 1157 |
-
reasons: List[str] = []
|
| 1158 |
-
if t == "bm25" and chunk_delta <= 0:
|
| 1159 |
-
reasons.append("local_bm25_term_not_reduced")
|
| 1160 |
-
elif t == "ngram" and chunk_delta <= 0:
|
| 1161 |
-
reasons.append("local_ngram_not_closer_to_target")
|
| 1162 |
-
elif t == "bert" and after_rel < (before_rel - 0.003):
|
| 1163 |
-
reasons.append("local_bert_relevance_dropped")
|
| 1164 |
-
elif t == "semantic":
|
| 1165 |
-
edited_l = (edited_text or "").lower()
|
| 1166 |
-
has_focus = any(str(t).strip().lower() in edited_l for t in (focus_terms or []) if str(t).strip())
|
| 1167 |
-
if chunk_delta <= 0 and not has_focus:
|
| 1168 |
-
reasons.append("local_semantic_focus_missing")
|
| 1169 |
-
return reasons
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
def _is_noise_like_sentence(text: str) -> bool:
|
| 1173 |
s = (text or "").strip()
|
| 1174 |
if not s:
|
|
@@ -1982,19 +1942,6 @@ def optimize_text(
|
|
| 1982 |
def _cancelled() -> bool:
|
| 1983 |
return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
|
| 1984 |
|
| 1985 |
-
candidate_eval_cache: Dict[Tuple[str, str], Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]] = {}
|
| 1986 |
-
|
| 1987 |
-
def _evaluate_candidate_state(body_text: str, title_text: str) -> Tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
|
| 1988 |
-
key = ((body_text or "").strip(), (title_text or "").strip())
|
| 1989 |
-
cached = candidate_eval_cache.get(key)
|
| 1990 |
-
if cached is not None:
|
| 1991 |
-
return cached
|
| 1992 |
-
analysis = _build_analysis_snapshot(key[0], competitors, keywords, language, key[1], competitor_titles)
|
| 1993 |
-
semantic = _build_semantic_snapshot(key[0], competitors, language)
|
| 1994 |
-
metrics = _compute_metrics(analysis, semantic, keywords, language, bert_stage_target=bert_stage_target)
|
| 1995 |
-
candidate_eval_cache[key] = (analysis, semantic, metrics)
|
| 1996 |
-
return analysis, semantic, metrics
|
| 1997 |
-
|
| 1998 |
def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
|
| 1999 |
# Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
|
| 2000 |
_ta = (current_analysis or {}).get("title_analysis") or {}
|
|
@@ -2230,7 +2177,13 @@ def optimize_text(
|
|
| 2230 |
continue
|
| 2231 |
|
| 2232 |
quality_issues = _validate_title_candidate(edited_text)
|
| 2233 |
-
cand_analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2234 |
before_rel = float(current_metrics.get("title_bert_score") or 0.0)
|
| 2235 |
after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
|
| 2236 |
chunk_delta = round(after_rel - before_rel, 4)
|
|
@@ -2386,10 +2339,8 @@ def optimize_text(
|
|
| 2386 |
logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
|
| 2387 |
break
|
| 2388 |
|
| 2389 |
-
span_trials =
|
| 2390 |
-
local_candidates =
|
| 2391 |
-
if cascade_level > 2:
|
| 2392 |
-
local_candidates = min(6, max(local_candidates, eff_cand + 1))
|
| 2393 |
span_trials_eff = span_trials
|
| 2394 |
|
| 2395 |
for st in range(span_trials):
|
|
@@ -2531,43 +2482,6 @@ def optimize_text(
|
|
| 2531 |
)
|
| 2532 |
continue
|
| 2533 |
|
| 2534 |
-
prefilter_reasons = _local_prefilter_reasons(
|
| 2535 |
-
goal_type=str(goal.get("type", "")),
|
| 2536 |
-
focus_terms=goal.get("focus_terms", []) or [],
|
| 2537 |
-
edited_text=edited_text,
|
| 2538 |
-
chunk_delta=chunk_delta,
|
| 2539 |
-
before_rel=before_rel,
|
| 2540 |
-
after_rel=after_rel,
|
| 2541 |
-
)
|
| 2542 |
-
if prefilter_reasons:
|
| 2543 |
-
candidates.append(
|
| 2544 |
-
{
|
| 2545 |
-
"candidate_index": candidate_idx,
|
| 2546 |
-
"error": "local_prefilter_rejected",
|
| 2547 |
-
"valid": False,
|
| 2548 |
-
"goal_improved": False,
|
| 2549 |
-
"local_chunk_improved": local_chunk_improved,
|
| 2550 |
-
"chunk_goal_delta": chunk_delta,
|
| 2551 |
-
"invalid_reasons": prefilter_reasons,
|
| 2552 |
-
"delta_score": -999.0,
|
| 2553 |
-
"candidate_score": None,
|
| 2554 |
-
"sentence_after": edited_text,
|
| 2555 |
-
"chunk_relevance_before": before_rel,
|
| 2556 |
-
"chunk_relevance_after": after_rel,
|
| 2557 |
-
"term_diff": _term_diff(original_span_text, edited_text, language),
|
| 2558 |
-
"llm_prompt_debug": prompt_debug,
|
| 2559 |
-
"llm_rationale": llm_rationale,
|
| 2560 |
-
"operation": operation,
|
| 2561 |
-
"sentence_index": sent_idx,
|
| 2562 |
-
"span_start": span_start,
|
| 2563 |
-
"span_end": span_end,
|
| 2564 |
-
"span_variant": span_variant,
|
| 2565 |
-
"phrase_strategy_used": strategy_variant,
|
| 2566 |
-
"sentence_before": original_span_text,
|
| 2567 |
-
}
|
| 2568 |
-
)
|
| 2569 |
-
continue
|
| 2570 |
-
|
| 2571 |
candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
|
| 2572 |
if candidate_key in seen_candidate_rewrites:
|
| 2573 |
candidates.append(
|
|
@@ -2604,7 +2518,13 @@ def optimize_text(
|
|
| 2604 |
candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
|
| 2605 |
candidate_text = " ".join(candidate_sentences).strip()
|
| 2606 |
|
| 2607 |
-
cand_analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2608 |
valid, invalid_reasons, goal_improved = _is_candidate_valid(
|
| 2609 |
current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
|
| 2610 |
)
|
|
@@ -2840,7 +2760,13 @@ def optimize_text(
|
|
| 2840 |
continue
|
| 2841 |
batch_sentences = _apply_edits_to_sentences(sentences, edits)
|
| 2842 |
batch_text = " ".join(batch_sentences).strip()
|
| 2843 |
-
batch_analysis
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2844 |
b_valid, b_reasons, b_goal = _is_candidate_valid(
|
| 2845 |
current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
|
| 2846 |
)
|
|
|
|
| 1129 |
return operation, span_start, span_end, sent_idx, variant_pick
|
| 1130 |
|
| 1131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1132 |
def _is_noise_like_sentence(text: str) -> bool:
|
| 1133 |
s = (text or "").strip()
|
| 1134 |
if not s:
|
|
|
|
| 1942 |
def _cancelled() -> bool:
|
| 1943 |
return cancel_event is not None and getattr(cancel_event, "is_set", lambda: False)()
|
| 1944 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1945 |
def _pack_result(stopped_early: bool = False, stop_reason: str = "") -> Dict[str, Any]:
|
| 1946 |
# Title string must match what last metrics used (title_analysis.target_title), not only the mutable var.
|
| 1947 |
_ta = (current_analysis or {}).get("title_analysis") or {}
|
|
|
|
| 2177 |
continue
|
| 2178 |
|
| 2179 |
quality_issues = _validate_title_candidate(edited_text)
|
| 2180 |
+
cand_analysis = _build_analysis_snapshot(
|
| 2181 |
+
current_text, competitors, keywords, language, edited_text, competitor_titles
|
| 2182 |
+
)
|
| 2183 |
+
cand_semantic = _build_semantic_snapshot(current_text, competitors, language)
|
| 2184 |
+
cand_metrics = _compute_metrics(
|
| 2185 |
+
cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
|
| 2186 |
+
)
|
| 2187 |
before_rel = float(current_metrics.get("title_bert_score") or 0.0)
|
| 2188 |
after_rel = float(cand_metrics.get("title_bert_score") or 0.0)
|
| 2189 |
chunk_delta = round(after_rel - before_rel, 4)
|
|
|
|
| 2339 |
logs.append({"step": step + 1, "status": "stopped", "reason": "No sentences available for editing."})
|
| 2340 |
break
|
| 2341 |
|
| 2342 |
+
span_trials = 2 if cascade_level <= 2 else 3
|
| 2343 |
+
local_candidates = eff_cand if cascade_level <= 2 else min(6, eff_cand + 1)
|
|
|
|
|
|
|
| 2344 |
span_trials_eff = span_trials
|
| 2345 |
|
| 2346 |
for st in range(span_trials):
|
|
|
|
| 2482 |
)
|
| 2483 |
continue
|
| 2484 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2485 |
candidate_key = (operation, span_start, span_end, edited_text.strip().lower())
|
| 2486 |
if candidate_key in seen_candidate_rewrites:
|
| 2487 |
candidates.append(
|
|
|
|
| 2518 |
candidate_sentences = _replace_span(sentences, span_start, span_end, edited_text)
|
| 2519 |
candidate_text = " ".join(candidate_sentences).strip()
|
| 2520 |
|
| 2521 |
+
cand_analysis = _build_analysis_snapshot(
|
| 2522 |
+
candidate_text, competitors, keywords, language, current_title, competitor_titles
|
| 2523 |
+
)
|
| 2524 |
+
cand_semantic = _build_semantic_snapshot(candidate_text, competitors, language)
|
| 2525 |
+
cand_metrics = _compute_metrics(
|
| 2526 |
+
cand_analysis, cand_semantic, keywords, language, bert_stage_target=bert_stage_target
|
| 2527 |
+
)
|
| 2528 |
valid, invalid_reasons, goal_improved = _is_candidate_valid(
|
| 2529 |
current_metrics, cand_metrics, goal["type"], goal["label"], optimization_mode
|
| 2530 |
)
|
|
|
|
| 2760 |
continue
|
| 2761 |
batch_sentences = _apply_edits_to_sentences(sentences, edits)
|
| 2762 |
batch_text = " ".join(batch_sentences).strip()
|
| 2763 |
+
batch_analysis = _build_analysis_snapshot(
|
| 2764 |
+
batch_text, competitors, keywords, language, current_title, competitor_titles
|
| 2765 |
+
)
|
| 2766 |
+
batch_semantic = _build_semantic_snapshot(batch_text, competitors, language)
|
| 2767 |
+
batch_metrics = _compute_metrics(
|
| 2768 |
+
batch_analysis, batch_semantic, keywords, language, bert_stage_target=bert_stage_target
|
| 2769 |
+
)
|
| 2770 |
b_valid, b_reasons, b_goal = _is_candidate_valid(
|
| 2771 |
current_metrics, batch_metrics, goal["type"], goal["label"], optimization_mode
|
| 2772 |
)
|