Spaces:
Running
Running
Add multi-objective candidate utility for optimizer decisions.
Browse filesIntroduce a dynamic utility function that balances BERT phrase gains with cross-metric regression penalties, and use it in candidate selection/ranking to preserve future optimization capacity. Update full documentation to describe the utility-driven acceptance logic.
Made-with: Cursor
- docs/FULL_FUNCTIONAL_DOCUMENTATION.md +6 -0
- optimizer.py +72 -0
docs/FULL_FUNCTIONAL_DOCUMENTATION.md
CHANGED
|
@@ -439,6 +439,11 @@ HTML extraction pipeline:
|
|
| 439 |
- `_goal_improved`:
|
| 440 |
- для BERT: улучшение score целевой фразы минимум на `BERT_GOAL_DELTA_MIN=0.005` **или** снижение `bert_low_count`;
|
| 441 |
- для других целей: профильные метрики улучшения.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
- `_is_candidate_valid`:
|
| 443 |
- hard constraints (не ухудшать критичные метрики сверх допустимого);
|
| 444 |
- режимы `conservative/balanced/aggressive` задают пороги регрессии;
|
|
@@ -466,6 +471,7 @@ HTML extraction pipeline:
|
|
| 466 |
- если локально улучшает чанк, но глобально не проходит — кандидат кладется в queue.
|
| 467 |
- для BERT учитывается прямой документный `bert_phrase_delta` по целевой фразе: даже небольшой положительный рост считается полезным шагом при отсутствии регрессий по guardrails.
|
| 468 |
- если нет `promotable` кандидата, но есть guardrail-valid кандидат с `local_chunk_improved`, применяется режим `applied_local_progress`: правка принимается локально и оптимизация переходит к следующему чанку (накопительная стратегия).
|
|
|
|
| 469 |
9. batch-логика queue:
|
| 470 |
- optimizer пробует совместно применить комбинации из 2..4 локально сильных не конфликтующих правок;
|
| 471 |
- batch принимается только при прохождении глобальных ограничений и положительном совокупном локальном приросте.
|
|
|
|
| 439 |
- `_goal_improved`:
|
| 440 |
- для BERT: улучшение score целевой фразы минимум на `BERT_GOAL_DELTA_MIN=0.005` **или** снижение `bert_low_count`;
|
| 441 |
- для других целей: профильные метрики улучшения.
|
| 442 |
+
- `_candidate_utility`:
|
| 443 |
+
- многоцелевая функция полезности кандидата с динамическими весами;
|
| 444 |
+
- учитывает одновременно `bert_phrase_delta`, `chunk_goal_delta`, `score_delta`;
|
| 445 |
+
- добавляет мягкие штрафы за регрессии по BM25/BERT-low/N-gram/SemanticGap/Title;
|
| 446 |
+
- в BERT-push режиме (когда фраза ниже порога) усиливает вес phrase-level прогресса.
|
| 447 |
- `_is_candidate_valid`:
|
| 448 |
- hard constraints (не ухудшать критичные метрики сверх допустимого);
|
| 449 |
- режимы `conservative/balanced/aggressive` задают пороги регрессии;
|
|
|
|
| 471 |
- если локально улучшает чанк, но глобально не проходит — кандидат кладется в queue.
|
| 472 |
- для BERT учитывается прямой документный `bert_phrase_delta` по целевой фразе: даже небольшой положительный рост считается полезным шагом при отсутствии регрессий по guardrails.
|
| 473 |
- если нет `promotable` кандидата, но есть guardrail-valid кандидат с `local_chunk_improved`, применяется режим `applied_local_progress`: правка принимается локально и оптимизация переходит к следующему чанку (накопительная стратегия).
|
| 474 |
+
- ранжирование и выбор best-кандидата дополнительно учитывают `candidate_utility`, чтобы BERT-оптимизация не вредила следующим этапам по другим метрикам.
|
| 475 |
9. batch-логика queue:
|
| 476 |
- optimizer пробует совместно применить комбинации из 2..4 локально сильных не конфликтующих правок;
|
| 477 |
- batch принимается только при прохождении глобальных ограничений и положительном совокупном локальном приросте.
|
optimizer.py
CHANGED
|
@@ -815,6 +815,58 @@ def _bert_phrase_delta(goal_label: str, prev_metrics: Dict[str, Any], next_metri
|
|
| 815 |
return round(next_phrase - prev_phrase, 4)
|
| 816 |
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
def _is_candidate_valid(
|
| 819 |
prev_metrics: Dict[str, Any],
|
| 820 |
next_metrics: Dict[str, Any],
|
|
@@ -1069,6 +1121,15 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1069 |
)
|
| 1070 |
delta_score = round(cand_metrics["score"] - current_metrics["score"], 3)
|
| 1071 |
bert_phrase_delta = _bert_phrase_delta(goal["label"], current_metrics, cand_metrics) if goal.get("type") == "bert" else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1072 |
md = _metrics_delta(current_metrics, cand_metrics)
|
| 1073 |
candidates.append(
|
| 1074 |
{
|
|
@@ -1097,6 +1158,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1097 |
"invalid_reasons": invalid_reasons,
|
| 1098 |
"delta_score": delta_score,
|
| 1099 |
"candidate_score": cand_metrics.get("score"),
|
|
|
|
| 1100 |
"metrics_delta": md,
|
| 1101 |
"edit_payload": {
|
| 1102 |
"operation": operation,
|
|
@@ -1146,6 +1208,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1146 |
c.get("goal_improved")
|
| 1147 |
or (goal.get("type") == "bert" and float(c.get("bert_phrase_delta") or 0.0) > 0.0)
|
| 1148 |
or float(c.get("candidate_score") or -1) > float(current_metrics["score"])
|
|
|
|
| 1149 |
)
|
| 1150 |
and (
|
| 1151 |
goal.get("type") != "bert"
|
|
@@ -1166,6 +1229,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1166 |
best_local = sorted(
|
| 1167 |
local_progress_candidates,
|
| 1168 |
key=lambda c: (
|
|
|
|
| 1169 |
float(c.get("chunk_goal_delta") or 0.0),
|
| 1170 |
float(c.get("bert_phrase_delta") or 0.0),
|
| 1171 |
float(c.get("candidate_score") or -999.0),
|
|
@@ -1202,6 +1266,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1202 |
"chosen_candidate_index": best_local.get("candidate_index"),
|
| 1203 |
"chosen_chunk_goal_delta": best_local.get("chunk_goal_delta"),
|
| 1204 |
"chosen_bert_phrase_delta": best_local.get("bert_phrase_delta"),
|
|
|
|
| 1205 |
"chosen_metrics_delta": best_local.get("metrics_delta"),
|
| 1206 |
"candidates": [
|
| 1207 |
{
|
|
@@ -1217,6 +1282,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1217 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1218 |
"llm_rationale": c.get("llm_rationale"),
|
| 1219 |
"metrics_delta": c.get("metrics_delta"),
|
|
|
|
| 1220 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1221 |
"delta_score": c.get("delta_score"),
|
| 1222 |
"candidate_score": c.get("candidate_score"),
|
|
@@ -1241,6 +1307,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1241 |
]
|
| 1242 |
local_pool.sort(
|
| 1243 |
key=lambda c: (
|
|
|
|
| 1244 |
float(c.get("chunk_goal_delta") or -999.0),
|
| 1245 |
float(c.get("candidate_score") or -999.0),
|
| 1246 |
),
|
|
@@ -1364,6 +1431,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1364 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1365 |
"llm_rationale": c.get("llm_rationale"),
|
| 1366 |
"metrics_delta": c.get("metrics_delta"),
|
|
|
|
| 1367 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1368 |
"delta_score": c.get("delta_score"),
|
| 1369 |
"candidate_score": c.get("candidate_score"),
|
|
@@ -1413,6 +1481,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1413 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1414 |
"llm_rationale": c.get("llm_rationale"),
|
| 1415 |
"metrics_delta": c.get("metrics_delta"),
|
|
|
|
| 1416 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1417 |
"delta_score": c.get("delta_score"),
|
| 1418 |
"candidate_score": c.get("candidate_score"),
|
|
@@ -1434,6 +1503,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1434 |
valid_candidates,
|
| 1435 |
key=lambda c: (
|
| 1436 |
1 if c.get("goal_improved") else 0,
|
|
|
|
| 1437 |
float(c.get("bert_phrase_delta") or 0.0),
|
| 1438 |
float(c.get("chunk_goal_delta") or 0.0),
|
| 1439 |
c["metrics"]["score"],
|
|
@@ -1467,6 +1537,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1467 |
"metrics_after": current_metrics,
|
| 1468 |
"delta_score": round(current_metrics["score"] - prev_metrics["score"], 3),
|
| 1469 |
"chosen_candidate_index": best.get("candidate_index"),
|
|
|
|
| 1470 |
"candidates": [
|
| 1471 |
{
|
| 1472 |
"candidate_index": c.get("candidate_index"),
|
|
@@ -1481,6 +1552,7 @@ def optimize_text(request_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 1481 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1482 |
"llm_rationale": c.get("llm_rationale"),
|
| 1483 |
"metrics_delta": c.get("metrics_delta"),
|
|
|
|
| 1484 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1485 |
"delta_score": c.get("delta_score"),
|
| 1486 |
"candidate_score": c.get("candidate_score"),
|
|
|
|
| 815 |
return round(next_phrase - prev_phrase, 4)
|
| 816 |
|
| 817 |
|
| 818 |
+
def _safe_delta(prev_metrics: Dict[str, Any], next_metrics: Dict[str, Any], key: str) -> float:
|
| 819 |
+
try:
|
| 820 |
+
return float(next_metrics.get(key, 0.0)) - float(prev_metrics.get(key, 0.0))
|
| 821 |
+
except Exception:
|
| 822 |
+
return 0.0
|
| 823 |
+
|
| 824 |
+
|
| 825 |
+
def _candidate_utility(
|
| 826 |
+
*,
|
| 827 |
+
prev_metrics: Dict[str, Any],
|
| 828 |
+
next_metrics: Dict[str, Any],
|
| 829 |
+
goal_type: str,
|
| 830 |
+
goal_label: str,
|
| 831 |
+
bert_phrase_delta: float,
|
| 832 |
+
chunk_goal_delta: float,
|
| 833 |
+
local_chunk_improved: bool,
|
| 834 |
+
) -> float:
|
| 835 |
+
score_delta = _safe_delta(prev_metrics, next_metrics, "score")
|
| 836 |
+
bm25_delta = _safe_delta(prev_metrics, next_metrics, "bm25_remove_count")
|
| 837 |
+
bert_low_delta = _safe_delta(prev_metrics, next_metrics, "bert_low_count")
|
| 838 |
+
ngram_delta = _safe_delta(prev_metrics, next_metrics, "ngram_signal_count")
|
| 839 |
+
sem_gap_delta = _safe_delta(prev_metrics, next_metrics, "semantic_gap_count")
|
| 840 |
+
title_delta = _safe_delta(prev_metrics, next_metrics, "title_bert_score")
|
| 841 |
+
|
| 842 |
+
# Dynamic emphasis:
|
| 843 |
+
# - if target phrase is still far from threshold, prioritize phrase-level BERT gains
|
| 844 |
+
# - but keep non-BERT regressions as penalties to preserve future optimization capacity
|
| 845 |
+
key = (goal_label or "").strip().lower()
|
| 846 |
+
prev_phrase = float((prev_metrics.get("bert_phrase_scores") or {}).get(key, 0.0))
|
| 847 |
+
bert_push_mode = (goal_type == "bert" and prev_phrase < BERT_TARGET_THRESHOLD)
|
| 848 |
+
|
| 849 |
+
w_phrase = 7.5 if bert_push_mode else 3.0
|
| 850 |
+
w_chunk = 1.6 if bert_push_mode else 1.0
|
| 851 |
+
w_score = 1.0
|
| 852 |
+
|
| 853 |
+
utility = (
|
| 854 |
+
(w_phrase * float(bert_phrase_delta))
|
| 855 |
+
+ (w_chunk * float(chunk_goal_delta))
|
| 856 |
+
+ (w_score * float(score_delta))
|
| 857 |
+
)
|
| 858 |
+
if local_chunk_improved:
|
| 859 |
+
utility += 0.3
|
| 860 |
+
|
| 861 |
+
# Cross-metric guardrails as soft penalties (in addition to hard validity checks).
|
| 862 |
+
utility -= max(0.0, bm25_delta) * 1.8
|
| 863 |
+
utility -= max(0.0, bert_low_delta) * 2.4
|
| 864 |
+
utility -= max(0.0, ngram_delta) * 0.6
|
| 865 |
+
utility -= max(0.0, sem_gap_delta) * 1.5
|
| 866 |
+
utility += min(0.0, title_delta) * 1.2
|
| 867 |
+
return round(float(utility), 4)
|
| 868 |
+
|
| 869 |
+
|
| 870 |
def _is_candidate_valid(
|
| 871 |
prev_metrics: Dict[str, Any],
|
| 872 |
next_metrics: Dict[str, Any],
|
|
|
|
| 1121 |
)
|
| 1122 |
delta_score = round(cand_metrics["score"] - current_metrics["score"], 3)
|
| 1123 |
bert_phrase_delta = _bert_phrase_delta(goal["label"], current_metrics, cand_metrics) if goal.get("type") == "bert" else 0.0
|
| 1124 |
+
candidate_utility = _candidate_utility(
|
| 1125 |
+
prev_metrics=current_metrics,
|
| 1126 |
+
next_metrics=cand_metrics,
|
| 1127 |
+
goal_type=str(goal.get("type", "")),
|
| 1128 |
+
goal_label=str(goal.get("label", "")),
|
| 1129 |
+
bert_phrase_delta=bert_phrase_delta,
|
| 1130 |
+
chunk_goal_delta=chunk_delta,
|
| 1131 |
+
local_chunk_improved=local_chunk_improved,
|
| 1132 |
+
)
|
| 1133 |
md = _metrics_delta(current_metrics, cand_metrics)
|
| 1134 |
candidates.append(
|
| 1135 |
{
|
|
|
|
| 1158 |
"invalid_reasons": invalid_reasons,
|
| 1159 |
"delta_score": delta_score,
|
| 1160 |
"candidate_score": cand_metrics.get("score"),
|
| 1161 |
+
"candidate_utility": candidate_utility,
|
| 1162 |
"metrics_delta": md,
|
| 1163 |
"edit_payload": {
|
| 1164 |
"operation": operation,
|
|
|
|
| 1208 |
c.get("goal_improved")
|
| 1209 |
or (goal.get("type") == "bert" and float(c.get("bert_phrase_delta") or 0.0) > 0.0)
|
| 1210 |
or float(c.get("candidate_score") or -1) > float(current_metrics["score"])
|
| 1211 |
+
or float(c.get("candidate_utility") or -999.0) > 0.0
|
| 1212 |
)
|
| 1213 |
and (
|
| 1214 |
goal.get("type") != "bert"
|
|
|
|
| 1229 |
best_local = sorted(
|
| 1230 |
local_progress_candidates,
|
| 1231 |
key=lambda c: (
|
| 1232 |
+
float(c.get("candidate_utility") or -999.0),
|
| 1233 |
float(c.get("chunk_goal_delta") or 0.0),
|
| 1234 |
float(c.get("bert_phrase_delta") or 0.0),
|
| 1235 |
float(c.get("candidate_score") or -999.0),
|
|
|
|
| 1266 |
"chosen_candidate_index": best_local.get("candidate_index"),
|
| 1267 |
"chosen_chunk_goal_delta": best_local.get("chunk_goal_delta"),
|
| 1268 |
"chosen_bert_phrase_delta": best_local.get("bert_phrase_delta"),
|
| 1269 |
+
"chosen_candidate_utility": best_local.get("candidate_utility"),
|
| 1270 |
"chosen_metrics_delta": best_local.get("metrics_delta"),
|
| 1271 |
"candidates": [
|
| 1272 |
{
|
|
|
|
| 1282 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1283 |
"llm_rationale": c.get("llm_rationale"),
|
| 1284 |
"metrics_delta": c.get("metrics_delta"),
|
| 1285 |
+
"candidate_utility": c.get("candidate_utility"),
|
| 1286 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1287 |
"delta_score": c.get("delta_score"),
|
| 1288 |
"candidate_score": c.get("candidate_score"),
|
|
|
|
| 1307 |
]
|
| 1308 |
local_pool.sort(
|
| 1309 |
key=lambda c: (
|
| 1310 |
+
float(c.get("candidate_utility") or -999.0),
|
| 1311 |
float(c.get("chunk_goal_delta") or -999.0),
|
| 1312 |
float(c.get("candidate_score") or -999.0),
|
| 1313 |
),
|
|
|
|
| 1431 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1432 |
"llm_rationale": c.get("llm_rationale"),
|
| 1433 |
"metrics_delta": c.get("metrics_delta"),
|
| 1434 |
+
"candidate_utility": c.get("candidate_utility"),
|
| 1435 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1436 |
"delta_score": c.get("delta_score"),
|
| 1437 |
"candidate_score": c.get("candidate_score"),
|
|
|
|
| 1481 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1482 |
"llm_rationale": c.get("llm_rationale"),
|
| 1483 |
"metrics_delta": c.get("metrics_delta"),
|
| 1484 |
+
"candidate_utility": c.get("candidate_utility"),
|
| 1485 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1486 |
"delta_score": c.get("delta_score"),
|
| 1487 |
"candidate_score": c.get("candidate_score"),
|
|
|
|
| 1503 |
valid_candidates,
|
| 1504 |
key=lambda c: (
|
| 1505 |
1 if c.get("goal_improved") else 0,
|
| 1506 |
+
float(c.get("candidate_utility") or -999.0),
|
| 1507 |
float(c.get("bert_phrase_delta") or 0.0),
|
| 1508 |
float(c.get("chunk_goal_delta") or 0.0),
|
| 1509 |
c["metrics"]["score"],
|
|
|
|
| 1537 |
"metrics_after": current_metrics,
|
| 1538 |
"delta_score": round(current_metrics["score"] - prev_metrics["score"], 3),
|
| 1539 |
"chosen_candidate_index": best.get("candidate_index"),
|
| 1540 |
+
"chosen_candidate_utility": best.get("candidate_utility"),
|
| 1541 |
"candidates": [
|
| 1542 |
{
|
| 1543 |
"candidate_index": c.get("candidate_index"),
|
|
|
|
| 1552 |
"llm_prompt_debug": c.get("llm_prompt_debug"),
|
| 1553 |
"llm_rationale": c.get("llm_rationale"),
|
| 1554 |
"metrics_delta": c.get("metrics_delta"),
|
| 1555 |
+
"candidate_utility": c.get("candidate_utility"),
|
| 1556 |
"invalid_reasons": c.get("invalid_reasons", []),
|
| 1557 |
"delta_score": c.get("delta_score"),
|
| 1558 |
"candidate_score": c.get("candidate_score"),
|