Spaces:
Running
Running
Food Desert commited on
Commit ·
82fe126
1
Parent(s): 33fc1b0
Polish Gradio UI layout/tooltips and remove dead helper code
Browse files- app.py +486 -241
- data/runtime_metrics/ui_pipeline_timings.jsonl +44 -0
- mascotimages/transparentsquirrel.png +3 -0
- psq_rag/parsing/prompt_grammar.py +23 -60
- psq_rag/retrieval/psq_retrieval.py +0 -26
- psq_rag/retrieval/state.py +5 -49
- psq_rag/tagging/categorized_suggestions.py +0 -22
- psq_rag/ui/group_ranked_display.py +0 -39
app.py
CHANGED
|
@@ -1,21 +1,27 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import os
|
| 3 |
-
import logging
|
| 4 |
-
import time
|
| 5 |
-
import json
|
| 6 |
-
import csv
|
| 7 |
-
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
from
|
|
|
|
| 12 |
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 13 |
|
| 14 |
from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
|
| 15 |
from psq_rag.llm.rewrite import llm_rewrite_prompt
|
| 16 |
from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
|
| 17 |
from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
|
| 18 |
-
from psq_rag.retrieval.state import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from psq_rag.ui.group_ranked_display import rank_groups_from_tfidf, _load_enabled_groups
|
| 20 |
|
| 21 |
|
|
@@ -46,6 +52,15 @@ def _display_tag_text(tag: str) -> str:
|
|
| 46 |
return tag.replace("_", " ")
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def _normalize_selection_origin(origin: str) -> str:
|
| 50 |
o = (origin or "").strip().lower()
|
| 51 |
if o in {"rewrite", "selection", "probe", "structural", "user", "candidate"}:
|
|
@@ -53,11 +68,53 @@ def _normalize_selection_origin(origin: str) -> str:
|
|
| 53 |
return "selection"
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
def _choice_label_with_source_meta(tag: str, *, origin: str, preselected: bool) -> str:
|
| 57 |
-
# Marker is stripped client-side and converted into data attributes for CSS-driven colors.
|
| 58 |
origin_norm = _normalize_selection_origin(origin)
|
| 59 |
pre = "1" if preselected else "0"
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
def _selection_source_rank(origin: str) -> int:
|
|
@@ -246,6 +303,7 @@ def _build_toggle_rows(
|
|
| 246 |
*,
|
| 247 |
seed_terms: List[str],
|
| 248 |
selected_tags: List[str],
|
|
|
|
| 249 |
tag_selection_origins: Dict[str, str],
|
| 250 |
implied_parent_map: Dict[str, str],
|
| 251 |
top_groups: int,
|
|
@@ -267,18 +325,72 @@ def _build_toggle_rows(
|
|
| 267 |
)
|
| 268 |
)
|
| 269 |
selected_index: Dict[str, int] = {t: i for i, t in enumerate(selected_active)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
selected_other = _order_selected_tags_for_row(
|
| 283 |
row_selected_tags=selected_other_raw,
|
| 284 |
selected_index=selected_index,
|
|
@@ -295,7 +407,7 @@ def _build_toggle_rows(
|
|
| 295 |
row_defs.append(
|
| 296 |
{
|
| 297 |
"name": "selected_other",
|
| 298 |
-
"label":
|
| 299 |
"tags": selected_other,
|
| 300 |
"tag_meta": selected_other_meta,
|
| 301 |
}
|
|
@@ -329,16 +441,20 @@ def _build_toggle_rows(
|
|
| 329 |
row_defs.append(
|
| 330 |
{
|
| 331 |
"name": group_name,
|
| 332 |
-
"label":
|
| 333 |
"tags": merged,
|
| 334 |
"tag_meta": tag_meta,
|
| 335 |
}
|
| 336 |
)
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
|
| 341 |
-
def _build_display_audit_line(
|
| 342 |
row_defs: List[Dict[str, Any]],
|
| 343 |
*,
|
| 344 |
active_selected_tags: List[str],
|
|
@@ -366,12 +482,14 @@ def _build_display_audit_line(
|
|
| 366 |
row_name = row.get("name", "")
|
| 367 |
row_label = row.get("label", row_name)
|
| 368 |
for tag in row.get("tags", []):
|
| 369 |
-
rec = info_by_tag.setdefault(tag, {"rows": [], "sources": set()})
|
| 370 |
-
rec["rows"].append(row_label)
|
| 371 |
-
if row_name == "selected_other":
|
| 372 |
-
rec["sources"].add("selected_other_row")
|
| 373 |
-
|
| 374 |
-
rec["sources"].add("
|
|
|
|
|
|
|
| 375 |
if tag in active_set:
|
| 376 |
rec["sources"].add("selected_active")
|
| 377 |
if tag in direct_set:
|
|
@@ -390,12 +508,12 @@ def _build_display_audit_line(
|
|
| 390 |
for tag, rec in sorted(info_by_tag.items())
|
| 391 |
],
|
| 392 |
}
|
| 393 |
-
return "Display Tag Audit: " + json.dumps(payload, ensure_ascii=True)
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
def _build_row_component_updates(
|
| 397 |
-
row_defs: List[Dict[str, Any]],
|
| 398 |
-
selected_tags: List[str],
|
| 399 |
max_rows: int,
|
| 400 |
):
|
| 401 |
selected = {t for t in (selected_tags or []) if t}
|
|
@@ -410,7 +528,7 @@ def _build_row_component_updates(
|
|
| 410 |
values = [t for t in tags if t in selected]
|
| 411 |
row_values_state.append(values)
|
| 412 |
visible = bool(tags)
|
| 413 |
-
header_updates.append(gr.update(value=
|
| 414 |
tag_meta = row.get("tag_meta", {}) if isinstance(row.get("tag_meta", {}), dict) else {}
|
| 415 |
choices = []
|
| 416 |
for t in tags:
|
|
@@ -442,8 +560,9 @@ def _on_toggle_row(
|
|
| 442 |
max_rows: int,
|
| 443 |
):
|
| 444 |
row_defs = row_defs_state or []
|
|
|
|
| 445 |
selected = set(selected_tags_state or [])
|
| 446 |
-
row =
|
| 447 |
row_tags = list(dict.fromkeys(row.get("tags", [])))
|
| 448 |
row_tag_set = set(row_tags)
|
| 449 |
row_tag_by_norm = {_norm_tag_for_lookup(t): t for t in row_tags}
|
|
@@ -459,16 +578,32 @@ def _on_toggle_row(
|
|
| 459 |
if mapped:
|
| 460 |
new_set.add(mapped)
|
| 461 |
|
| 462 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
selected.difference_update(row_tag_set)
|
| 464 |
selected.update(new_set)
|
| 465 |
toggled_tags = prev_row_selected ^ new_set
|
| 466 |
|
| 467 |
-
# Recompute row selections, but only push UI updates to rows touched by the toggled tags.
|
| 468 |
new_row_values_state: List[List[str]] = []
|
| 469 |
affected_rows: Set[int] = {row_idx}
|
| 470 |
-
for idx,
|
| 471 |
-
tags = list(dict.fromkeys(
|
| 472 |
values = [t for t in tags if t in selected]
|
| 473 |
new_row_values_state.append(values)
|
| 474 |
if toggled_tags and any(t in toggled_tags for t in tags):
|
|
@@ -476,40 +611,60 @@ def _on_toggle_row(
|
|
| 476 |
|
| 477 |
checkbox_updates = []
|
| 478 |
for idx in range(max_rows):
|
| 479 |
-
if idx
|
|
|
|
|
|
|
|
|
|
| 480 |
checkbox_updates.append(gr.update(value=new_row_values_state[idx]))
|
| 481 |
else:
|
| 482 |
-
checkbox_updates.append(gr.
|
| 483 |
|
| 484 |
-
prompt_text = _compose_toggle_prompt_text(sorted(selected),
|
| 485 |
return [sorted(selected), new_row_values_state, prompt_text, *checkbox_updates]
|
| 486 |
|
| 487 |
|
| 488 |
-
def _build_ui_payload(
|
| 489 |
-
*,
|
| 490 |
-
console_text: str,
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
):
|
| 495 |
prompt_text, row_values_state, header_updates, checkbox_updates = _build_row_component_updates(
|
| 496 |
row_defs=row_defs,
|
| 497 |
selected_tags=selected_tags,
|
| 498 |
max_rows=display_max_rows_default,
|
| 499 |
)
|
| 500 |
-
return [
|
| 501 |
-
console_text,
|
| 502 |
-
|
| 503 |
-
prompt_text,
|
| 504 |
-
sorted(set(selected_tags or [])),
|
| 505 |
-
row_defs,
|
| 506 |
-
row_values_state,
|
| 507 |
-
*header_updates,
|
| 508 |
*checkbox_updates,
|
| 509 |
-
]
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
prompt_in: str,
|
| 514 |
rewritten: str,
|
| 515 |
structural_tags: List[str],
|
|
@@ -799,42 +954,112 @@ css = """
|
|
| 799 |
.source-legend {
|
| 800 |
display: flex;
|
| 801 |
flex-wrap: wrap;
|
|
|
|
| 802 |
gap: 8px;
|
| 803 |
margin: 4px 0 10px 0;
|
| 804 |
}
|
| 805 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
.source-legend .chip {
|
| 807 |
display: inline-flex;
|
| 808 |
align-items: center;
|
| 809 |
-
|
| 810 |
-
border
|
| 811 |
-
|
| 812 |
-
padding: 5px 10px;
|
| 813 |
font-size: 0.85rem;
|
| 814 |
-
font-weight:
|
| 815 |
-
color: #
|
| 816 |
background: #f3f6fb;
|
| 817 |
}
|
| 818 |
|
| 819 |
-
.source-legend .
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 824 |
}
|
| 825 |
|
| 826 |
-
.
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
"""
|
| 834 |
|
| 835 |
client_js = """
|
| 836 |
() => {
|
| 837 |
-
const markerRe = /\\s*\\[\\[psq:([a-z_]+):(0|1)\\]\\]\\s*$/;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 838 |
const applyTagMeta = () => {
|
| 839 |
const labels = document.querySelectorAll(".lego-tags label");
|
| 840 |
labels.forEach((label) => {
|
|
@@ -845,6 +1070,14 @@ client_js = """
|
|
| 845 |
if (!match) return;
|
| 846 |
label.dataset.psqOrigin = match[1];
|
| 847 |
label.dataset.psqPreselected = match[2];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 848 |
span.textContent = text.replace(markerRe, "");
|
| 849 |
});
|
| 850 |
};
|
|
@@ -939,12 +1172,11 @@ def rag_pipeline_ui(
|
|
| 939 |
log("Start: received prompt")
|
| 940 |
prompt_in = (user_prompt or "").strip()
|
| 941 |
if not prompt_in:
|
| 942 |
-
return _build_ui_payload(
|
| 943 |
-
console_text="Error: empty prompt",
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
| 947 |
-
)
|
| 948 |
|
| 949 |
log("Input:")
|
| 950 |
log(prompt_in)
|
|
@@ -1001,10 +1233,10 @@ def rag_pipeline_ui(
|
|
| 1001 |
|
| 1002 |
|
| 1003 |
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 1004 |
-
try:
|
| 1005 |
-
t0 = time.perf_counter()
|
| 1006 |
-
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 1007 |
-
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 1008 |
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 1009 |
rewrite_phrases=rewrite_phrases,
|
| 1010 |
allow_nsfw_tags=allow_nsfw_tags,
|
|
@@ -1021,10 +1253,10 @@ def rag_pipeline_ui(
|
|
| 1021 |
if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
|
| 1022 |
candidates = candidates[:selection_candidate_cap]
|
| 1023 |
log(f"Selection candidate cap applied: {selection_candidate_cap}")
|
| 1024 |
-
dt = time.perf_counter()-t0
|
| 1025 |
-
_record_timing("retrieval", dt)
|
| 1026 |
-
log(f"Retrieval: {dt:.2f}s")
|
| 1027 |
-
log(f"Retrieved {len(candidates)} candidate tags")
|
| 1028 |
if verbose_retrieval:
|
| 1029 |
log(f"Total unique candidates: {len(candidates)}")
|
| 1030 |
limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
|
|
@@ -1061,11 +1293,19 @@ def rag_pipeline_ui(
|
|
| 1061 |
f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
|
| 1062 |
f"combined={combined_str} count={count}"
|
| 1063 |
)
|
| 1064 |
-
if limit is not None and len(rows) > limit:
|
| 1065 |
-
log(f" ... ({len(rows) - limit} more)")
|
| 1066 |
-
except Exception as e:
|
| 1067 |
-
log(f"Retrieval fallback: {type(e).__name__}: {e}")
|
| 1068 |
-
candidates = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1069 |
|
| 1070 |
log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
|
| 1071 |
selection_query = _build_selection_query(
|
|
@@ -1204,6 +1444,7 @@ def rag_pipeline_ui(
|
|
| 1204 |
toggle_rows = _build_toggle_rows(
|
| 1205 |
seed_terms=seed_terms,
|
| 1206 |
selected_tags=active_selected_tags,
|
|
|
|
| 1207 |
tag_selection_origins=tag_selection_origins,
|
| 1208 |
implied_parent_map=implied_parent_map,
|
| 1209 |
top_groups=max(1, int(display_top_groups)),
|
|
@@ -1213,131 +1454,115 @@ def rag_pipeline_ui(
|
|
| 1213 |
dt = time.perf_counter()-t0
|
| 1214 |
_record_timing("group_display", dt)
|
| 1215 |
log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
|
| 1216 |
-
log(
|
| 1217 |
-
_build_display_audit_line(
|
| 1218 |
-
toggle_rows,
|
| 1219 |
-
active_selected_tags=active_selected_tags,
|
| 1220 |
-
direct_selected_tags=direct_selected_tags,
|
| 1221 |
-
implied_selected_tags=implied_selected_tags,
|
| 1222 |
-
)
|
| 1223 |
-
)
|
| 1224 |
-
|
| 1225 |
-
total_dt = time.perf_counter()-t_total0
|
| 1226 |
-
_emit_timing_summary(total_dt)
|
| 1227 |
_append_timing_jsonl(total_dt)
|
| 1228 |
log("Done: final prompt ready")
|
| 1229 |
-
return _build_ui_payload(
|
| 1230 |
-
console_text="\n".join(logs),
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
)
|
| 1235 |
|
| 1236 |
except Exception as e:
|
| 1237 |
log(f"Error: {type(e).__name__}: {e}")
|
| 1238 |
-
return _build_ui_payload(
|
| 1239 |
-
console_text="\n".join(logs),
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
)
|
| 1244 |
|
| 1245 |
|
| 1246 |
|
| 1247 |
with gr.Blocks(css=css, js=client_js) as app:
|
| 1248 |
-
with gr.Row():
|
| 1249 |
-
with gr.Column(scale=3, elem_classes=["prompt-col"]):
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
)
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
| 1279 |
-
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
suggested_prompt = gr.Textbox(
|
| 1286 |
-
label="Suggested Prompt (From Toggled Tags)",
|
| 1287 |
-
lines=3,
|
| 1288 |
-
interactive=False,
|
| 1289 |
-
show_copy_button=True,
|
| 1290 |
-
placeholder="Comma-separated tags selected in the rows below."
|
| 1291 |
-
)
|
| 1292 |
-
|
| 1293 |
-
with gr.Accordion("Legacy Pipeline Prompt (for reference)", open=False):
|
| 1294 |
-
legacy_final_prompt = gr.Textbox(
|
| 1295 |
-
label="Legacy Final Prompt",
|
| 1296 |
-
lines=3,
|
| 1297 |
-
interactive=False,
|
| 1298 |
-
show_copy_button=True,
|
| 1299 |
-
)
|
| 1300 |
|
| 1301 |
selected_tags_state = gr.State([])
|
| 1302 |
row_defs_state = gr.State([])
|
| 1303 |
row_values_state = gr.State([])
|
| 1304 |
|
| 1305 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1306 |
gr.HTML(
|
| 1307 |
"""
|
| 1308 |
<div class="source-legend">
|
| 1309 |
-
<span class="
|
| 1310 |
-
<span class="chip
|
| 1311 |
-
<span class="chip
|
| 1312 |
-
<span class="chip
|
| 1313 |
-
<span class="chip
|
| 1314 |
-
<span class="chip
|
| 1315 |
-
<span class="chip
|
|
|
|
| 1316 |
</div>
|
| 1317 |
"""
|
| 1318 |
)
|
| 1319 |
-
gr.Markdown(
|
| 1320 |
-
"Rows are ranked by expected tag count (E). Within each row: structural -> probe -> selected, "
|
| 1321 |
-
"implied tags follow their triggering selected tag when possible, then unselected tags in confidence order."
|
| 1322 |
-
)
|
| 1323 |
-
row_headers: List[gr.Markdown] = []
|
| 1324 |
-
row_checkboxes: List[gr.CheckboxGroup] = []
|
| 1325 |
-
for _ in range(display_max_rows_default):
|
| 1326 |
-
row_headers.append(gr.Markdown(value="", visible=False))
|
| 1327 |
-
row_checkboxes.append(
|
| 1328 |
-
gr.CheckboxGroup(
|
| 1329 |
-
choices=[],
|
| 1330 |
-
value=[],
|
| 1331 |
-
visible=False,
|
| 1332 |
-
interactive=True,
|
| 1333 |
-
container=False,
|
| 1334 |
-
elem_classes=["lego-tags"],
|
| 1335 |
-
)
|
| 1336 |
-
)
|
| 1337 |
-
|
| 1338 |
-
gr.Markdown(
|
| 1339 |
-
"Toggling a tag in any row toggles it everywhere else that tag appears."
|
| 1340 |
-
)
|
| 1341 |
|
| 1342 |
with gr.Accordion("Display Settings", open=False):
|
| 1343 |
with gr.Row():
|
|
@@ -1353,43 +1578,63 @@ then returns a cleaned, model-friendly prompt.
|
|
| 1353 |
label="Top Tags Shown Per Row",
|
| 1354 |
minimum=1,
|
| 1355 |
)
|
| 1356 |
-
display_rank_top_k = gr.Number(
|
| 1357 |
-
value=display_rank_top_k_default,
|
| 1358 |
-
precision=0,
|
| 1359 |
-
label="Top Tags Used for Row Ranking",
|
| 1360 |
-
minimum=1,
|
| 1361 |
-
)
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
console
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1370 |
*row_headers,
|
| 1371 |
*row_checkboxes,
|
| 1372 |
]
|
| 1373 |
|
| 1374 |
-
submit_button.click(
|
| 1375 |
-
|
| 1376 |
-
inputs=[
|
| 1377 |
-
outputs=run_outputs
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
|
| 1381 |
-
rag_pipeline_ui,
|
| 1382 |
-
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 1383 |
-
outputs=run_outputs
|
| 1384 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1385 |
|
| 1386 |
for idx, row_cb in enumerate(row_checkboxes):
|
| 1387 |
-
row_cb.
|
| 1388 |
fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
|
| 1389 |
i,
|
| 1390 |
changed_values,
|
| 1391 |
-
selected_state,
|
| 1392 |
-
row_defs,
|
| 1393 |
row_values,
|
| 1394 |
display_max_rows_default,
|
| 1395 |
),
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
import time
|
| 5 |
+
import json
|
| 6 |
+
import csv
|
| 7 |
+
import base64
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from functools import lru_cache
|
| 10 |
+
from PIL import Image
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Any, Dict, List, Set, Tuple
|
| 13 |
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 14 |
|
| 15 |
from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
|
| 16 |
from psq_rag.llm.rewrite import llm_rewrite_prompt
|
| 17 |
from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
|
| 18 |
from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
|
| 19 |
+
from psq_rag.retrieval.state import (
|
| 20 |
+
expand_tags_via_implications,
|
| 21 |
+
get_tag_type_name,
|
| 22 |
+
get_tag_implications,
|
| 23 |
+
get_tag_counts,
|
| 24 |
+
)
|
| 25 |
from psq_rag.ui.group_ranked_display import rank_groups_from_tfidf, _load_enabled_groups
|
| 26 |
|
| 27 |
|
|
|
|
| 52 |
return tag.replace("_", " ")
|
| 53 |
|
| 54 |
|
| 55 |
+
def _display_row_label(name: str) -> str:
|
| 56 |
+
n = (name or "").strip()
|
| 57 |
+
if not n:
|
| 58 |
+
return ""
|
| 59 |
+
if n == "selected_other":
|
| 60 |
+
return "Selected (Other)"
|
| 61 |
+
return n.replace("_", " ").title()
|
| 62 |
+
|
| 63 |
+
|
| 64 |
def _normalize_selection_origin(origin: str) -> str:
|
| 65 |
o = (origin or "").strip().lower()
|
| 66 |
if o in {"rewrite", "selection", "probe", "structural", "user", "candidate"}:
|
|
|
|
| 68 |
return "selection"
|
| 69 |
|
| 70 |
|
| 71 |
+
@lru_cache(maxsize=1)
|
| 72 |
+
def _load_tag_wiki_defs() -> Dict[str, str]:
|
| 73 |
+
p = Path("data/tag_wiki_defs.json")
|
| 74 |
+
if not p.exists():
|
| 75 |
+
return {}
|
| 76 |
+
try:
|
| 77 |
+
with p.open("r", encoding="utf-8") as f:
|
| 78 |
+
data = json.load(f)
|
| 79 |
+
out: Dict[str, str] = {}
|
| 80 |
+
if isinstance(data, dict):
|
| 81 |
+
for k, v in data.items():
|
| 82 |
+
tag = _norm_tag_for_lookup(str(k))
|
| 83 |
+
text = " ".join(str(v or "").split())
|
| 84 |
+
if tag and text:
|
| 85 |
+
out[tag] = text
|
| 86 |
+
return out
|
| 87 |
+
except Exception:
|
| 88 |
+
return {}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _tooltip_text_for_tag(tag: str) -> str:
|
| 92 |
+
t = _norm_tag_for_lookup(tag)
|
| 93 |
+
parts: List[str] = []
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
count = get_tag_counts().get(t)
|
| 97 |
+
except Exception:
|
| 98 |
+
count = None
|
| 99 |
+
if isinstance(count, int):
|
| 100 |
+
parts.append(f"Count: {count:,}")
|
| 101 |
+
|
| 102 |
+
d = _load_tag_wiki_defs().get(t, "")
|
| 103 |
+
if d:
|
| 104 |
+
parts.append(d)
|
| 105 |
+
|
| 106 |
+
return "\n".join(parts).strip()
|
| 107 |
+
|
| 108 |
+
|
| 109 |
def _choice_label_with_source_meta(tag: str, *, origin: str, preselected: bool) -> str:
|
| 110 |
+
# Marker is stripped client-side and converted into data attributes for CSS-driven colors/tooltips.
|
| 111 |
origin_norm = _normalize_selection_origin(origin)
|
| 112 |
pre = "1" if preselected else "0"
|
| 113 |
+
tooltip = _tooltip_text_for_tag(tag)
|
| 114 |
+
tip_b64 = ""
|
| 115 |
+
if tooltip:
|
| 116 |
+
tip_b64 = base64.urlsafe_b64encode(tooltip.encode("utf-8")).decode("ascii")
|
| 117 |
+
return f"{_display_tag_text(tag)} [[psq:{origin_norm}:{pre}:{tip_b64}]]"
|
| 118 |
|
| 119 |
|
| 120 |
def _selection_source_rank(origin: str) -> int:
|
|
|
|
| 303 |
*,
|
| 304 |
seed_terms: List[str],
|
| 305 |
selected_tags: List[str],
|
| 306 |
+
retrieved_candidate_tags: List[str],
|
| 307 |
tag_selection_origins: Dict[str, str],
|
| 308 |
implied_parent_map: Dict[str, str],
|
| 309 |
top_groups: int,
|
|
|
|
| 325 |
)
|
| 326 |
)
|
| 327 |
selected_index: Dict[str, int] = {t: i for i, t in enumerate(selected_active)}
|
| 328 |
+
|
| 329 |
+
row_defs: List[Dict[str, Any]] = []
|
| 330 |
+
enabled_group_tag_sets: Dict[str, Set[str]] = {
|
| 331 |
+
name: {t for t in tags if not _is_artist_tag(t)}
|
| 332 |
+
for name, tags in groups_map.items()
|
| 333 |
+
}
|
| 334 |
+
tags_in_any_enabled_group: Set[str] = set()
|
| 335 |
+
for tag_set in enabled_group_tag_sets.values():
|
| 336 |
+
tags_in_any_enabled_group.update(tag_set)
|
| 337 |
+
|
| 338 |
+
displayed_group_names = [r.group_name for r in ranked_rows]
|
| 339 |
+
displayed_group_tag_sets: Dict[str, Set[str]] = {
|
| 340 |
+
name: enabled_group_tag_sets.get(name, set())
|
| 341 |
+
for name in displayed_group_names
|
| 342 |
+
}
|
| 343 |
+
tags_in_any_displayed_group: Set[str] = set()
|
| 344 |
+
for tag_set in displayed_group_tag_sets.values():
|
| 345 |
+
tags_in_any_displayed_group.update(tag_set)
|
| 346 |
|
| 347 |
+
retrieved_uncategorized_ranked = list(
|
| 348 |
+
dict.fromkeys(
|
| 349 |
+
_norm_tag_for_lookup(t)
|
| 350 |
+
for t in (retrieved_candidate_tags or [])
|
| 351 |
+
if t
|
| 352 |
+
and not _is_artist_tag(t)
|
| 353 |
+
and not _is_excluded_recommendation_tag(t)
|
| 354 |
+
and _norm_tag_for_lookup(t) not in tags_in_any_enabled_group
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
retrieved_other_row: Dict[str, Any] | None = None
|
| 358 |
+
if retrieved_uncategorized_ranked:
|
| 359 |
+
retrieved_uncategorized_set = set(retrieved_uncategorized_ranked)
|
| 360 |
+
selected_in_retrieved_other_raw = [
|
| 361 |
+
t for t in selected_active if t in retrieved_uncategorized_set
|
| 362 |
+
]
|
| 363 |
+
selected_in_retrieved_other = _order_selected_tags_for_row(
|
| 364 |
+
row_selected_tags=selected_in_retrieved_other_raw,
|
| 365 |
+
selected_index=selected_index,
|
| 366 |
+
tag_selection_origins=tag_selection_origins,
|
| 367 |
+
implied_parent_map=implied_parent_map,
|
| 368 |
+
)
|
| 369 |
+
merged_retrieved_other = selected_in_retrieved_other + [
|
| 370 |
+
t for t in retrieved_uncategorized_ranked if t not in selected_in_retrieved_other
|
| 371 |
+
]
|
| 372 |
+
keep_n = max(max(1, int(top_tags_per_group)), len(selected_in_retrieved_other))
|
| 373 |
+
merged_retrieved_other = merged_retrieved_other[:keep_n]
|
| 374 |
+
retrieved_other_meta = {
|
| 375 |
+
t: {
|
| 376 |
+
"origin": _normalize_selection_origin(tag_selection_origins.get(t, "selection")),
|
| 377 |
+
"preselected": t in selected_active,
|
| 378 |
+
}
|
| 379 |
+
for t in merged_retrieved_other
|
| 380 |
+
}
|
| 381 |
+
retrieved_other_row = {
|
| 382 |
+
"name": "other_retrieved",
|
| 383 |
+
"label": "Other (Retrieved)",
|
| 384 |
+
"tags": merged_retrieved_other,
|
| 385 |
+
"tag_meta": retrieved_other_meta,
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
# "Selected (Other)" should contain selected tags not already shown in any displayed row.
|
| 389 |
+
# Include "Other (Retrieved)" in that displayed-row set to avoid duplicates across those rows.
|
| 390 |
+
tags_in_displayed_rows = set(tags_in_any_displayed_group)
|
| 391 |
+
if retrieved_other_row:
|
| 392 |
+
tags_in_displayed_rows.update(retrieved_other_row.get("tags", []))
|
| 393 |
+
selected_other_raw = [t for t in selected_active if t not in tags_in_displayed_rows]
|
| 394 |
selected_other = _order_selected_tags_for_row(
|
| 395 |
row_selected_tags=selected_other_raw,
|
| 396 |
selected_index=selected_index,
|
|
|
|
| 407 |
row_defs.append(
|
| 408 |
{
|
| 409 |
"name": "selected_other",
|
| 410 |
+
"label": _display_row_label("selected_other"),
|
| 411 |
"tags": selected_other,
|
| 412 |
"tag_meta": selected_other_meta,
|
| 413 |
}
|
|
|
|
| 441 |
row_defs.append(
|
| 442 |
{
|
| 443 |
"name": group_name,
|
| 444 |
+
"label": _display_row_label(group_name),
|
| 445 |
"tags": merged,
|
| 446 |
"tag_meta": tag_meta,
|
| 447 |
}
|
| 448 |
)
|
| 449 |
+
|
| 450 |
+
# Keep this row at the bottom so category/group rows remain contiguous.
|
| 451 |
+
if retrieved_other_row:
|
| 452 |
+
row_defs.append(retrieved_other_row)
|
| 453 |
+
|
| 454 |
+
return row_defs
|
| 455 |
|
| 456 |
|
| 457 |
+
def _build_display_audit_line(
|
| 458 |
row_defs: List[Dict[str, Any]],
|
| 459 |
*,
|
| 460 |
active_selected_tags: List[str],
|
|
|
|
| 482 |
row_name = row.get("name", "")
|
| 483 |
row_label = row.get("label", row_name)
|
| 484 |
for tag in row.get("tags", []):
|
| 485 |
+
rec = info_by_tag.setdefault(tag, {"rows": [], "sources": set()})
|
| 486 |
+
rec["rows"].append(row_label)
|
| 487 |
+
if row_name == "selected_other":
|
| 488 |
+
rec["sources"].add("selected_other_row")
|
| 489 |
+
elif row_name == "other_retrieved":
|
| 490 |
+
rec["sources"].add("other_retrieved_row")
|
| 491 |
+
else:
|
| 492 |
+
rec["sources"].add("ranked_group_row")
|
| 493 |
if tag in active_set:
|
| 494 |
rec["sources"].add("selected_active")
|
| 495 |
if tag in direct_set:
|
|
|
|
| 508 |
for tag, rec in sorted(info_by_tag.items())
|
| 509 |
],
|
| 510 |
}
|
| 511 |
+
return "Display Tag Audit: " + json.dumps(payload, ensure_ascii=True)
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
def _build_row_component_updates(
|
| 515 |
+
row_defs: List[Dict[str, Any]],
|
| 516 |
+
selected_tags: List[str],
|
| 517 |
max_rows: int,
|
| 518 |
):
|
| 519 |
selected = {t for t in (selected_tags or []) if t}
|
|
|
|
| 528 |
values = [t for t in tags if t in selected]
|
| 529 |
row_values_state.append(values)
|
| 530 |
visible = bool(tags)
|
| 531 |
+
header_updates.append(gr.update(value=row.get("label", ""), visible=visible))
|
| 532 |
tag_meta = row.get("tag_meta", {}) if isinstance(row.get("tag_meta", {}), dict) else {}
|
| 533 |
choices = []
|
| 534 |
for t in tags:
|
|
|
|
| 560 |
max_rows: int,
|
| 561 |
):
|
| 562 |
row_defs = row_defs_state or []
|
| 563 |
+
row_defs_ui = row_defs[: max(0, int(max_rows))]
|
| 564 |
selected = set(selected_tags_state or [])
|
| 565 |
+
row = row_defs_ui[row_idx] if 0 <= row_idx < len(row_defs_ui) else {}
|
| 566 |
row_tags = list(dict.fromkeys(row.get("tags", [])))
|
| 567 |
row_tag_set = set(row_tags)
|
| 568 |
row_tag_by_norm = {_norm_tag_for_lookup(t): t for t in row_tags}
|
|
|
|
| 578 |
if mapped:
|
| 579 |
new_set.add(mapped)
|
| 580 |
|
| 581 |
+
prev_values = list(row_values_state or [])
|
| 582 |
+
prev_row_values = prev_values[row_idx] if 0 <= row_idx < len(prev_values) else []
|
| 583 |
+
prev_row_selected = set()
|
| 584 |
+
for raw in (prev_row_values or []):
|
| 585 |
+
if raw in row_tag_set:
|
| 586 |
+
prev_row_selected.add(raw)
|
| 587 |
+
continue
|
| 588 |
+
raw_norm = _norm_tag_for_lookup(str(raw))
|
| 589 |
+
mapped = row_tag_by_norm.get(raw_norm)
|
| 590 |
+
if mapped:
|
| 591 |
+
prev_row_selected.add(mapped)
|
| 592 |
+
|
| 593 |
+
# Ignore non-user/no-op events (e.g., programmatic value re-sets) deterministically.
|
| 594 |
+
if new_set == prev_row_selected:
|
| 595 |
+
prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs_ui)
|
| 596 |
+
checkbox_updates = [gr.skip() for _ in range(max_rows)]
|
| 597 |
+
return [sorted(selected), prev_values, prompt_text, *checkbox_updates]
|
| 598 |
+
|
| 599 |
selected.difference_update(row_tag_set)
|
| 600 |
selected.update(new_set)
|
| 601 |
toggled_tags = prev_row_selected ^ new_set
|
| 602 |
|
|
|
|
| 603 |
new_row_values_state: List[List[str]] = []
|
| 604 |
affected_rows: Set[int] = {row_idx}
|
| 605 |
+
for idx, row_item in enumerate(row_defs_ui):
|
| 606 |
+
tags = list(dict.fromkeys(row_item.get("tags", [])))
|
| 607 |
values = [t for t in tags if t in selected]
|
| 608 |
new_row_values_state.append(values)
|
| 609 |
if toggled_tags and any(t in toggled_tags for t in tags):
|
|
|
|
| 611 |
|
| 612 |
checkbox_updates = []
|
| 613 |
for idx in range(max_rows):
|
| 614 |
+
if idx >= len(row_defs_ui):
|
| 615 |
+
checkbox_updates.append(gr.skip())
|
| 616 |
+
continue
|
| 617 |
+
if idx in affected_rows:
|
| 618 |
checkbox_updates.append(gr.update(value=new_row_values_state[idx]))
|
| 619 |
else:
|
| 620 |
+
checkbox_updates.append(gr.skip())
|
| 621 |
|
| 622 |
+
prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs_ui)
|
| 623 |
return [sorted(selected), new_row_values_state, prompt_text, *checkbox_updates]
|
| 624 |
|
| 625 |
|
| 626 |
+
def _build_ui_payload(
|
| 627 |
+
*,
|
| 628 |
+
console_text: str,
|
| 629 |
+
row_defs: List[Dict[str, Any]],
|
| 630 |
+
selected_tags: List[str],
|
| 631 |
+
):
|
|
|
|
| 632 |
prompt_text, row_values_state, header_updates, checkbox_updates = _build_row_component_updates(
|
| 633 |
row_defs=row_defs,
|
| 634 |
selected_tags=selected_tags,
|
| 635 |
max_rows=display_max_rows_default,
|
| 636 |
)
|
| 637 |
+
return [
|
| 638 |
+
console_text,
|
| 639 |
+
gr.update(visible=bool(row_defs)),
|
| 640 |
+
prompt_text,
|
| 641 |
+
sorted(set(selected_tags or [])),
|
| 642 |
+
row_defs,
|
| 643 |
+
row_values_state,
|
| 644 |
+
*header_updates,
|
| 645 |
*checkbox_updates,
|
| 646 |
+
]
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
def _prepare_run_ui() -> List[Any]:
|
| 650 |
+
header_updates = [gr.update(value="", visible=False) for _ in range(display_max_rows_default)]
|
| 651 |
+
checkbox_updates = [
|
| 652 |
+
gr.update(choices=[], value=[], visible=False)
|
| 653 |
+
for _ in range(display_max_rows_default)
|
| 654 |
+
]
|
| 655 |
+
return [
|
| 656 |
+
"Running...",
|
| 657 |
+
gr.skip(),
|
| 658 |
+
"Running... usually completes in about 20 seconds.",
|
| 659 |
+
[],
|
| 660 |
+
[],
|
| 661 |
+
[],
|
| 662 |
+
*header_updates,
|
| 663 |
+
*checkbox_updates,
|
| 664 |
+
]
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
def _build_selection_query(
|
| 668 |
prompt_in: str,
|
| 669 |
rewritten: str,
|
| 670 |
structural_tags: List[str],
|
|
|
|
| 954 |
.source-legend {
|
| 955 |
display: flex;
|
| 956 |
flex-wrap: wrap;
|
| 957 |
+
align-items: center;
|
| 958 |
gap: 8px;
|
| 959 |
margin: 4px 0 10px 0;
|
| 960 |
}
|
| 961 |
|
| 962 |
+
.source-legend .legend-title {
|
| 963 |
+
font-size: 0.92rem;
|
| 964 |
+
font-weight: 900;
|
| 965 |
+
color: #334155;
|
| 966 |
+
margin-right: 4px;
|
| 967 |
+
}
|
| 968 |
+
|
| 969 |
.source-legend .chip {
|
| 970 |
display: inline-flex;
|
| 971 |
align-items: center;
|
| 972 |
+
border-radius: 10px;
|
| 973 |
+
border: 1px solid #6c7788;
|
| 974 |
+
padding: 6px 12px;
|
|
|
|
| 975 |
font-size: 0.85rem;
|
| 976 |
+
font-weight: 800;
|
| 977 |
+
color: #111827;
|
| 978 |
background: #f3f6fb;
|
| 979 |
}
|
| 980 |
|
| 981 |
+
.source-legend .chip.rewrite { background: #26b9a3; color: #062923; border-color: #187869; }
|
| 982 |
+
.source-legend .chip.selection { background: #f0a93c; color: #382206; border-color: #a66f1f; }
|
| 983 |
+
.source-legend .chip.probe { background: #9a6cff; color: #ffffff; border-color: #6745b0; }
|
| 984 |
+
.source-legend .chip.structural { background: #53c368; color: #102d17; border-color: #2f8442; }
|
| 985 |
+
.source-legend .chip.implied { background: #a8b3c4; color: #1d2633; border-color: #6f7e95; }
|
| 986 |
+
.source-legend .chip.user { background: #4f86ff; color: #ffffff; border-color: #2f5fbf; }
|
| 987 |
+
.source-legend .chip.unselected { background: #c7ced8; color: #2d3440; border-color: #7d8897; }
|
| 988 |
+
|
| 989 |
+
.row-heading p {
|
| 990 |
+
margin: 8px 0 0 0 !important;
|
| 991 |
+
font-size: 1.18rem !important;
|
| 992 |
+
font-weight: 850 !important;
|
| 993 |
+
line-height: 1.2 !important;
|
| 994 |
+
}
|
| 995 |
+
|
| 996 |
+
.row-instruction {
|
| 997 |
+
text-align: center;
|
| 998 |
+
margin: 8px 0 12px 0;
|
| 999 |
+
}
|
| 1000 |
+
|
| 1001 |
+
.row-instruction p {
|
| 1002 |
+
margin: 0 !important;
|
| 1003 |
+
font-size: 1.02rem !important;
|
| 1004 |
+
font-style: italic !important;
|
| 1005 |
+
font-weight: 800 !important;
|
| 1006 |
+
color: #1d4ed8 !important;
|
| 1007 |
+
}
|
| 1008 |
+
|
| 1009 |
+
.top-instruction {
|
| 1010 |
+
text-align: center;
|
| 1011 |
+
margin: 2px 0 6px 0;
|
| 1012 |
+
}
|
| 1013 |
+
|
| 1014 |
+
.top-instruction p {
|
| 1015 |
+
margin: 0 !important;
|
| 1016 |
+
font-size: 1.02rem !important;
|
| 1017 |
+
font-style: italic !important;
|
| 1018 |
+
font-weight: 800 !important;
|
| 1019 |
+
color: #1d4ed8 !important;
|
| 1020 |
+
}
|
| 1021 |
+
|
| 1022 |
+
.run-hint {
|
| 1023 |
+
margin-top: 6px;
|
| 1024 |
+
text-align: center;
|
| 1025 |
}
|
| 1026 |
|
| 1027 |
+
.run-hint p {
|
| 1028 |
+
margin: 0 !important;
|
| 1029 |
+
font-size: 0.9rem !important;
|
| 1030 |
+
font-style: italic !important;
|
| 1031 |
+
color: #475569 !important;
|
| 1032 |
+
}
|
| 1033 |
+
|
| 1034 |
+
.prompt-card {
|
| 1035 |
+
background: transparent !important;
|
| 1036 |
+
border: none !important;
|
| 1037 |
+
box-shadow: none !important;
|
| 1038 |
+
padding: 0 !important;
|
| 1039 |
+
}
|
| 1040 |
+
|
| 1041 |
+
.suggested-prompt-box {
|
| 1042 |
+
margin-top: 2px !important;
|
| 1043 |
+
}
|
| 1044 |
+
|
| 1045 |
+
.suggested-prompt-card {
|
| 1046 |
+
margin-top: 10px !important;
|
| 1047 |
+
}
|
| 1048 |
"""
|
| 1049 |
|
| 1050 |
client_js = """
|
| 1051 |
() => {
|
| 1052 |
+
const markerRe = /\\s*\\[\\[psq:([a-z_]+):(0|1):([A-Za-z0-9_\\-=]*)\\]\\]\\s*$/;
|
| 1053 |
+
const decodeTip = (b64) => {
|
| 1054 |
+
if (!b64) return "";
|
| 1055 |
+
try {
|
| 1056 |
+
const binary = atob((b64 || "").replace(/-/g, "+").replace(/_/g, "/"));
|
| 1057 |
+
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
|
| 1058 |
+
return new TextDecoder("utf-8").decode(bytes);
|
| 1059 |
+
} catch (_) {
|
| 1060 |
+
return "";
|
| 1061 |
+
}
|
| 1062 |
+
};
|
| 1063 |
const applyTagMeta = () => {
|
| 1064 |
const labels = document.querySelectorAll(".lego-tags label");
|
| 1065 |
labels.forEach((label) => {
|
|
|
|
| 1070 |
if (!match) return;
|
| 1071 |
label.dataset.psqOrigin = match[1];
|
| 1072 |
label.dataset.psqPreselected = match[2];
|
| 1073 |
+
const tip = decodeTip(match[3] || "");
|
| 1074 |
+
if (tip) {
|
| 1075 |
+
label.title = tip;
|
| 1076 |
+
span.title = tip;
|
| 1077 |
+
} else {
|
| 1078 |
+
label.removeAttribute("title");
|
| 1079 |
+
span.removeAttribute("title");
|
| 1080 |
+
}
|
| 1081 |
span.textContent = text.replace(markerRe, "");
|
| 1082 |
});
|
| 1083 |
};
|
|
|
|
| 1172 |
log("Start: received prompt")
|
| 1173 |
prompt_in = (user_prompt or "").strip()
|
| 1174 |
if not prompt_in:
|
| 1175 |
+
return _build_ui_payload(
|
| 1176 |
+
console_text="Error: empty prompt",
|
| 1177 |
+
row_defs=[],
|
| 1178 |
+
selected_tags=[],
|
| 1179 |
+
)
|
|
|
|
| 1180 |
|
| 1181 |
log("Input:")
|
| 1182 |
log(prompt_in)
|
|
|
|
| 1233 |
|
| 1234 |
|
| 1235 |
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 1236 |
+
try:
|
| 1237 |
+
t0 = time.perf_counter()
|
| 1238 |
+
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 1239 |
+
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 1240 |
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 1241 |
rewrite_phrases=rewrite_phrases,
|
| 1242 |
allow_nsfw_tags=allow_nsfw_tags,
|
|
|
|
| 1253 |
if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
|
| 1254 |
candidates = candidates[:selection_candidate_cap]
|
| 1255 |
log(f"Selection candidate cap applied: {selection_candidate_cap}")
|
| 1256 |
+
dt = time.perf_counter()-t0
|
| 1257 |
+
_record_timing("retrieval", dt)
|
| 1258 |
+
log(f"Retrieval: {dt:.2f}s")
|
| 1259 |
+
log(f"Retrieved {len(candidates)} candidate tags")
|
| 1260 |
if verbose_retrieval:
|
| 1261 |
log(f"Total unique candidates: {len(candidates)}")
|
| 1262 |
limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
|
|
|
|
| 1293 |
f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
|
| 1294 |
f"combined={combined_str} count={count}"
|
| 1295 |
)
|
| 1296 |
+
if limit is not None and len(rows) > limit:
|
| 1297 |
+
log(f" ... ({len(rows) - limit} more)")
|
| 1298 |
+
except Exception as e:
|
| 1299 |
+
log(f"Retrieval fallback: {type(e).__name__}: {e}")
|
| 1300 |
+
candidates = []
|
| 1301 |
+
|
| 1302 |
+
retrieved_candidate_tags = list(
|
| 1303 |
+
dict.fromkeys(
|
| 1304 |
+
_norm_tag_for_lookup(c.tag)
|
| 1305 |
+
for c in (candidates or [])
|
| 1306 |
+
if getattr(c, "tag", None)
|
| 1307 |
+
)
|
| 1308 |
+
)
|
| 1309 |
|
| 1310 |
log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
|
| 1311 |
selection_query = _build_selection_query(
|
|
|
|
| 1444 |
toggle_rows = _build_toggle_rows(
|
| 1445 |
seed_terms=seed_terms,
|
| 1446 |
selected_tags=active_selected_tags,
|
| 1447 |
+
retrieved_candidate_tags=retrieved_candidate_tags,
|
| 1448 |
tag_selection_origins=tag_selection_origins,
|
| 1449 |
implied_parent_map=implied_parent_map,
|
| 1450 |
top_groups=max(1, int(display_top_groups)),
|
|
|
|
| 1454 |
dt = time.perf_counter()-t0
|
| 1455 |
_record_timing("group_display", dt)
|
| 1456 |
log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
|
| 1457 |
+
log(
|
| 1458 |
+
_build_display_audit_line(
|
| 1459 |
+
toggle_rows,
|
| 1460 |
+
active_selected_tags=active_selected_tags,
|
| 1461 |
+
direct_selected_tags=direct_selected_tags,
|
| 1462 |
+
implied_selected_tags=implied_selected_tags,
|
| 1463 |
+
)
|
| 1464 |
+
)
|
| 1465 |
+
|
| 1466 |
+
total_dt = time.perf_counter()-t_total0
|
| 1467 |
+
_emit_timing_summary(total_dt)
|
| 1468 |
_append_timing_jsonl(total_dt)
|
| 1469 |
log("Done: final prompt ready")
|
| 1470 |
+
return _build_ui_payload(
|
| 1471 |
+
console_text="\n".join(logs),
|
| 1472 |
+
row_defs=toggle_rows,
|
| 1473 |
+
selected_tags=active_selected_tags,
|
| 1474 |
+
)
|
|
|
|
| 1475 |
|
| 1476 |
except Exception as e:
|
| 1477 |
log(f"Error: {type(e).__name__}: {e}")
|
| 1478 |
+
return _build_ui_payload(
|
| 1479 |
+
console_text="\n".join(logs),
|
| 1480 |
+
row_defs=[],
|
| 1481 |
+
selected_tags=[],
|
| 1482 |
+
)
|
|
|
|
| 1483 |
|
| 1484 |
|
| 1485 |
|
| 1486 |
with gr.Blocks(css=css, js=client_js) as app:
|
| 1487 |
+
with gr.Row():
|
| 1488 |
+
with gr.Column(scale=3, elem_classes=["prompt-col"]):
|
| 1489 |
+
gr.Markdown(
|
| 1490 |
+
'Describe your image under "Enter Prompt" and click "Run". '
|
| 1491 |
+
'Prompt Squirrel will translate it into image board tags.',
|
| 1492 |
+
elem_classes=["top-instruction"],
|
| 1493 |
+
)
|
| 1494 |
+
with gr.Group(elem_classes=["prompt-card"]):
|
| 1495 |
+
image_tags = gr.Textbox(
|
| 1496 |
+
label="Enter Prompt",
|
| 1497 |
+
placeholder="e.g. fox, outside, detailed background, .",
|
| 1498 |
+
lines=1,
|
| 1499 |
+
elem_classes=["enter-prompt-box"],
|
| 1500 |
+
)
|
| 1501 |
+
with gr.Group(elem_classes=["prompt-card", "suggested-prompt-card"]):
|
| 1502 |
+
suggested_prompt = gr.Textbox(
|
| 1503 |
+
label="Suggested Prompt",
|
| 1504 |
+
lines=2,
|
| 1505 |
+
interactive=False,
|
| 1506 |
+
show_copy_button=True,
|
| 1507 |
+
placeholder="Comma-separated tags selected in the rows below.",
|
| 1508 |
+
elem_classes=["suggested-prompt-box"],
|
| 1509 |
+
)
|
| 1510 |
+
with gr.Column(scale=1):
|
| 1511 |
+
_mascot_pil = _load_mascot_image()
|
| 1512 |
+
if _mascot_pil is not None:
|
| 1513 |
+
mascot_img = gr.Image(
|
| 1514 |
+
value=_mascot_pil,
|
| 1515 |
+
show_label=False,
|
| 1516 |
+
interactive=False,
|
| 1517 |
+
height=240,
|
| 1518 |
+
elem_id="mascot"
|
| 1519 |
+
)
|
| 1520 |
+
else:
|
| 1521 |
+
mascot_img = gr.Markdown("`(mascot image unavailable)`")
|
| 1522 |
+
submit_button = gr.Button("Run", variant="primary")
|
| 1523 |
+
gr.Markdown("Typical runtime: up to ~20 seconds.", elem_classes=["run-hint"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1524 |
|
| 1525 |
selected_tags_state = gr.State([])
|
| 1526 |
row_defs_state = gr.State([])
|
| 1527 |
row_values_state = gr.State([])
|
| 1528 |
|
| 1529 |
+
toggle_instruction = gr.Markdown(
|
| 1530 |
+
"Click tag buttons to add or remove tags from the suggested prompt.",
|
| 1531 |
+
elem_classes=["row-instruction"],
|
| 1532 |
+
visible=False,
|
| 1533 |
+
)
|
| 1534 |
+
row_headers: List[gr.Markdown] = []
|
| 1535 |
+
row_checkboxes: List[gr.CheckboxGroup] = []
|
| 1536 |
+
for _ in range(display_max_rows_default):
|
| 1537 |
+
with gr.Row():
|
| 1538 |
+
with gr.Column(scale=2, min_width=170):
|
| 1539 |
+
row_headers.append(gr.Markdown(value="", visible=False, elem_classes=["row-heading"]))
|
| 1540 |
+
with gr.Column(scale=10):
|
| 1541 |
+
row_checkboxes.append(
|
| 1542 |
+
gr.CheckboxGroup(
|
| 1543 |
+
choices=[],
|
| 1544 |
+
value=[],
|
| 1545 |
+
visible=False,
|
| 1546 |
+
interactive=True,
|
| 1547 |
+
container=False,
|
| 1548 |
+
elem_classes=["lego-tags"],
|
| 1549 |
+
)
|
| 1550 |
+
)
|
| 1551 |
+
|
| 1552 |
gr.HTML(
|
| 1553 |
"""
|
| 1554 |
<div class="source-legend">
|
| 1555 |
+
<span class="legend-title">Legend:</span>
|
| 1556 |
+
<span class="chip rewrite">Rewrite phrase</span>
|
| 1557 |
+
<span class="chip selection">General selection</span>
|
| 1558 |
+
<span class="chip probe">Probe query</span>
|
| 1559 |
+
<span class="chip structural">Structural query</span>
|
| 1560 |
+
<span class="chip implied">Implied</span>
|
| 1561 |
+
<span class="chip user">User-toggled</span>
|
| 1562 |
+
<span class="chip unselected">Unselected</span>
|
| 1563 |
</div>
|
| 1564 |
"""
|
| 1565 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1566 |
|
| 1567 |
with gr.Accordion("Display Settings", open=False):
|
| 1568 |
with gr.Row():
|
|
|
|
| 1578 |
label="Top Tags Shown Per Row",
|
| 1579 |
minimum=1,
|
| 1580 |
)
|
| 1581 |
+
display_rank_top_k = gr.Number(
|
| 1582 |
+
value=display_rank_top_k_default,
|
| 1583 |
+
precision=0,
|
| 1584 |
+
label="Top Tags Used for Row Ranking",
|
| 1585 |
+
minimum=1,
|
| 1586 |
+
)
|
| 1587 |
+
|
| 1588 |
+
with gr.Accordion("Console", open=False):
|
| 1589 |
+
console = gr.Textbox(
|
| 1590 |
+
label="Console",
|
| 1591 |
+
lines=10,
|
| 1592 |
+
interactive=False,
|
| 1593 |
+
placeholder="Progress logs will appear here."
|
| 1594 |
+
)
|
| 1595 |
+
|
| 1596 |
+
run_outputs = [
|
| 1597 |
+
console,
|
| 1598 |
+
toggle_instruction,
|
| 1599 |
+
suggested_prompt,
|
| 1600 |
+
selected_tags_state,
|
| 1601 |
+
row_defs_state,
|
| 1602 |
+
row_values_state,
|
| 1603 |
*row_headers,
|
| 1604 |
*row_checkboxes,
|
| 1605 |
]
|
| 1606 |
|
| 1607 |
+
submit_button.click(
|
| 1608 |
+
_prepare_run_ui,
|
| 1609 |
+
inputs=[],
|
| 1610 |
+
outputs=run_outputs,
|
| 1611 |
+
queue=False,
|
| 1612 |
+
show_progress="hidden",
|
| 1613 |
+
).then(
|
| 1614 |
+
rag_pipeline_ui,
|
| 1615 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 1616 |
+
outputs=run_outputs,
|
| 1617 |
+
)
|
| 1618 |
+
|
| 1619 |
+
image_tags.submit(
|
| 1620 |
+
_prepare_run_ui,
|
| 1621 |
+
inputs=[],
|
| 1622 |
+
outputs=run_outputs,
|
| 1623 |
+
queue=False,
|
| 1624 |
+
show_progress="hidden",
|
| 1625 |
+
).then(
|
| 1626 |
+
rag_pipeline_ui,
|
| 1627 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 1628 |
+
outputs=run_outputs,
|
| 1629 |
+
)
|
| 1630 |
|
| 1631 |
for idx, row_cb in enumerate(row_checkboxes):
|
| 1632 |
+
row_cb.select(
|
| 1633 |
fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
|
| 1634 |
i,
|
| 1635 |
changed_values,
|
| 1636 |
+
selected_state,
|
| 1637 |
+
row_defs,
|
| 1638 |
row_values,
|
| 1639 |
display_max_rows_default,
|
| 1640 |
),
|
data/runtime_metrics/ui_pipeline_timings.jsonl
CHANGED
|
@@ -14,3 +14,47 @@
|
|
| 14 |
{"timestamp_utc": "2026-03-07T03:01:39Z", "stages_s": {"preprocess": 1.6900012269616127e-05, "rewrite": 1.713694000034593, "structural": 5.799985956400633e-06, "probe": 0.049874700023792684, "retrieval": 0.35970670002279803, "selection": 0.9267913000076078, "implication_expansion": 3.909994848072529e-05, "prompt_composition": 3.7299992982298136e-05, "group_display": 0.026757099956739694}, "total_s": 3.089661000005435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 15 |
{"timestamp_utc": "2026-03-07T03:09:53Z", "stages_s": {"preprocess": 0.00012510002125054598, "rewrite": 2.249713899975177, "structural": 0.5107482000021264, "probe": 3.300025127828121e-06, "retrieval": 2.3757353999535553, "selection": 2.9089593999669887, "implication_expansion": 0.2682994999922812, "prompt_composition": 3.070000093430281e-05, "group_display": 0.07982710003852844}, "total_s": 8.42714020004496, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 16 |
{"timestamp_utc": "2026-03-07T03:37:54Z", "stages_s": {"preprocess": 0.00011760002234950662, "rewrite": 1.968222199997399, "structural": 1.1845426999498159, "probe": 2.214354399999138, "retrieval": 2.452574900002219, "selection": 0.8585481999907643, "implication_expansion": 0.27041040000040084, "prompt_composition": 3.319996176287532e-05, "group_display": 0.07736879994627088}, "total_s": 9.059251800004859, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
{"timestamp_utc": "2026-03-07T03:01:39Z", "stages_s": {"preprocess": 1.6900012269616127e-05, "rewrite": 1.713694000034593, "structural": 5.799985956400633e-06, "probe": 0.049874700023792684, "retrieval": 0.35970670002279803, "selection": 0.9267913000076078, "implication_expansion": 3.909994848072529e-05, "prompt_composition": 3.7299992982298136e-05, "group_display": 0.026757099956739694}, "total_s": 3.089661000005435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 15 |
{"timestamp_utc": "2026-03-07T03:09:53Z", "stages_s": {"preprocess": 0.00012510002125054598, "rewrite": 2.249713899975177, "structural": 0.5107482000021264, "probe": 3.300025127828121e-06, "retrieval": 2.3757353999535553, "selection": 2.9089593999669887, "implication_expansion": 0.2682994999922812, "prompt_composition": 3.070000093430281e-05, "group_display": 0.07982710003852844}, "total_s": 8.42714020004496, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 16 |
{"timestamp_utc": "2026-03-07T03:37:54Z", "stages_s": {"preprocess": 0.00011760002234950662, "rewrite": 1.968222199997399, "structural": 1.1845426999498159, "probe": 2.214354399999138, "retrieval": 2.452574900002219, "selection": 0.8585481999907643, "implication_expansion": 0.27041040000040084, "prompt_composition": 3.319996176287532e-05, "group_display": 0.07736879994627088}, "total_s": 9.059251800004859, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 17 |
+
{"timestamp_utc": "2026-03-07T14:46:02Z", "stages_s": {"preprocess": 0.00026489997981116176, "rewrite": 1.9126524000312202, "structural": 4.675470399961341, "probe": 2.1218294000136666, "retrieval": 12.559957500023302, "selection": 1.0550536999944597, "implication_expansion": 0.37385119998361915, "prompt_composition": 2.589996438473463e-05, "group_display": 0.1274346000282094}, "total_s": 22.92461790004745, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 18 |
+
{"timestamp_utc": "2026-03-07T15:03:07Z", "stages_s": {"preprocess": 0.0002395000192336738, "rewrite": 5.185094600019511, "structural": 0.7354515999904834, "probe": 3.3999676816165447e-06, "retrieval": 7.362056100042537, "selection": 4.4499055000487715, "implication_expansion": 0.40566330001456663, "prompt_composition": 5.230004899203777e-05, "group_display": 0.13085790001787245}, "total_s": 18.33150090003619, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 19 |
+
{"timestamp_utc": "2026-03-07T15:24:07Z", "stages_s": {"preprocess": 0.00018759997328743339, "rewrite": 12.691507300012745, "structural": 6.099988240748644e-06, "probe": 3.3999676816165447e-06, "retrieval": 2.92752389999805, "selection": 3.720078700047452, "implication_expansion": 0.3129685999592766, "prompt_composition": 2.6300025638192892e-05, "group_display": 0.09746540000196546}, "total_s": 19.792910400021356, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 20 |
+
{"timestamp_utc": "2026-03-07T15:25:37Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 7.169413700001314, "structural": 4.699977580457926e-06, "probe": 2.300017513334751e-06, "retrieval": 0.5495104999863543, "selection": 1.0878360999631695, "implication_expansion": 2.940004924312234e-05, "prompt_composition": 2.9200047720223665e-05, "group_display": 0.03320260002510622}, "total_s": 8.852556700003333, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 21 |
+
{"timestamp_utc": "2026-03-07T15:34:51Z", "stages_s": {"preprocess": 8.460000390186906e-05, "rewrite": 20.807663900020998, "structural": 5.00003807246685e-06, "probe": 2.600019797682762e-06, "retrieval": 2.5111192999756895, "selection": 15.921769299951848, "implication_expansion": 0.27366950002033263, "prompt_composition": 3.250001464039087e-05, "group_display": 0.08310980000533164}, "total_s": 39.63195960002486, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 22 |
+
{"timestamp_utc": "2026-03-07T15:35:52Z", "stages_s": {"preprocess": 2.47000134550035e-05, "rewrite": 1.7436033999547362, "structural": 4.400033503770828e-06, "probe": 2.600019797682762e-06, "retrieval": 0.5218271000194363, "selection": 4.346306200022809, "implication_expansion": 3.0399998649954796e-05, "prompt_composition": 2.5800021830946207e-05, "group_display": 0.03283849998842925}, "total_s": 6.659919600002468, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 23 |
+
{"timestamp_utc": "2026-03-07T15:41:27Z", "stages_s": {"preprocess": 9.679998038336635e-05, "rewrite": 3.4287550000008196, "structural": 0.9089086999883875, "probe": 1.9048012000275776, "retrieval": 2.3640123999794014, "selection": 5.8771228000405245, "implication_expansion": 0.2728748000226915, "prompt_composition": 2.9499991796910763e-05, "group_display": 0.08227899996563792}, "total_s": 14.873475200030953, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 24 |
+
{"timestamp_utc": "2026-03-07T15:41:53Z", "stages_s": {"preprocess": 1.7300015315413475e-05, "rewrite": 1.8870358999702148, "structural": 5.099980626255274e-06, "probe": 0.31994039996061474, "retrieval": 0.407905500032939, "selection": 1.6604228000505827, "implication_expansion": 3.070000093430281e-05, "prompt_composition": 2.5900022592395544e-05, "group_display": 0.027770799992140383}, "total_s": 4.315684599976521, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 25 |
+
{"timestamp_utc": "2026-03-07T15:49:25Z", "stages_s": {"preprocess": 0.0001020999625325203, "rewrite": 14.022166899987496, "structural": 4.699977580457926e-06, "probe": 2.4999608285725117e-06, "retrieval": 3.3183021000004373, "selection": 11.949675500043668, "implication_expansion": 0.32175860001007095, "prompt_composition": 4.040001658722758e-05, "group_display": 0.08752110000932589}, "total_s": 29.732599700044375, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 26 |
+
{"timestamp_utc": "2026-03-07T15:50:22Z", "stages_s": {"preprocess": 1.7600017599761486e-05, "rewrite": 1.7429223000071943, "structural": 3.700028173625469e-06, "probe": 0.005793399992398918, "retrieval": 0.4191273999749683, "selection": 1.2126603999640793, "implication_expansion": 3.060000017285347e-05, "prompt_composition": 2.659996971487999e-05, "group_display": 0.027827800018712878}, "total_s": 3.4225500999600627, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 27 |
+
{"timestamp_utc": "2026-03-07T16:00:38Z", "stages_s": {"preprocess": 0.00010899995686486363, "rewrite": 8.79864729999099, "structural": 3.5999692045152187e-06, "probe": 2.100015990436077e-06, "retrieval": 2.8054729999857955, "selection": 1.2764754999661818, "implication_expansion": 0.3174371999921277, "prompt_composition": 2.6399968191981316e-05, "group_display": 0.08990299998549744}, "total_s": 13.324789500038605, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 28 |
+
{"timestamp_utc": "2026-03-07T16:02:59Z", "stages_s": {"preprocess": 2.5200017262250185e-05, "rewrite": 1.991041800007224, "structural": 5.099980626255274e-06, "probe": 17.524970299971756, "retrieval": 0.4314843000029214, "selection": 1.7358130000066012, "implication_expansion": 3.0499999411404133e-05, "prompt_composition": 2.8199981898069382e-05, "group_display": 0.027282700000796467}, "total_s": 21.72257919999538, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 29 |
+
{"timestamp_utc": "2026-03-07T16:04:30Z", "stages_s": {"preprocess": 2.559996210038662e-05, "rewrite": 2.176026900007855, "structural": 5.400041118264198e-06, "probe": 0.19614840002031997, "retrieval": 0.4781980999978259, "selection": 1.6205251999781467, "implication_expansion": 3.789999755099416e-05, "prompt_composition": 3.749999450519681e-05, "group_display": 0.03069300000788644}, "total_s": 4.5152849000296555, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 30 |
+
{"timestamp_utc": "2026-03-07T16:10:49Z", "stages_s": {"preprocess": 9.689998114481568e-05, "rewrite": 2.410708999959752, "structural": 0.8642730999854393, "probe": 0.4224375000339933, "retrieval": 2.802454399992712, "selection": 2.783213499991689, "implication_expansion": 0.31982660002540797, "prompt_composition": 2.7100031729787588e-05, "group_display": 0.09723489999305457}, "total_s": 9.736253200040665, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 31 |
+
{"timestamp_utc": "2026-03-07T16:11:31Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 1.7746342000318691, "structural": 0.2111163000226952, "probe": 4.599976819008589e-06, "retrieval": 0.5392439999850467, "selection": 2.572300500003621, "implication_expansion": 3.130000550299883e-05, "prompt_composition": 3.15000070258975e-05, "group_display": 0.03570860001491383}, "total_s": 5.145930600003339, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 32 |
+
{"timestamp_utc": "2026-03-07T16:17:15Z", "stages_s": {"preprocess": 9.919999865815043e-05, "rewrite": 2.202809799986426, "structural": 1.5271345999790356, "probe": 4.593681500002276, "retrieval": 2.751306999998633, "selection": 8.643455000012182, "implication_expansion": 0.35227009997470304, "prompt_composition": 3.160000778734684e-05, "group_display": 0.11034630000358447}, "total_s": 20.237214900029358, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 33 |
+
{"timestamp_utc": "2026-03-07T16:17:43Z", "stages_s": {"preprocess": 2.7300033252686262e-05, "rewrite": 1.0030765000265092, "structural": 2.59619329997804, "probe": 1.0295192999765277, "retrieval": 0.43122639995999634, "selection": 19.92651720001595, "implication_expansion": 2.989999484270811e-05, "prompt_composition": 3.0199997127056122e-05, "group_display": 0.028948699997272342}, "total_s": 25.028504199988674, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 34 |
+
{"timestamp_utc": "2026-03-07T16:30:10Z", "stages_s": {"preprocess": 8.229998638853431e-05, "rewrite": 6.530854899960104, "structural": 0.5428495000232942, "probe": 3.5999692045152187e-06, "retrieval": 2.2112261000438593, "selection": 37.76459150004666, "implication_expansion": 0.26712879998376593, "prompt_composition": 3.42000275850296e-05, "group_display": 0.07745989999966696}, "total_s": 47.426328199973796, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 35 |
+
{"timestamp_utc": "2026-03-07T16:30:23Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 0.929964899958577, "structural": 0.0898478000308387, "probe": 1.6804130000527948, "retrieval": 0.41350249998504296, "selection": 9.11415430001216, "implication_expansion": 3.300001844763756e-05, "prompt_composition": 3.2299954909831285e-05, "group_display": 0.027016500011086464}, "total_s": 12.266514900024049, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 36 |
+
{"timestamp_utc": "2026-03-07T16:30:38Z", "stages_s": {"preprocess": 3.239995567128062e-05, "rewrite": 7.538256199972238, "structural": 5.300040356814861e-06, "probe": 2.300017513334751e-06, "retrieval": 0.40969980001682416, "selection": 2.6479469999903813, "implication_expansion": 2.829998265951872e-05, "prompt_composition": 2.4800014216452837e-05, "group_display": 0.027821999974548817}, "total_s": 10.63382319995435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 37 |
+
{"timestamp_utc": "2026-03-07T16:39:56Z", "stages_s": {"preprocess": 9.499996667727828e-05, "rewrite": 2.1227241000160575, "structural": 0.8582198000513017, "probe": 4.121821600012481, "retrieval": 3.0876275000046007, "selection": 1.8579011999536306, "implication_expansion": 0.3362734999973327, "prompt_composition": 4.47999918833375e-05, "group_display": 0.09885080001549795}, "total_s": 12.54350390000036, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 38 |
+
{"timestamp_utc": "2026-03-07T16:42:46Z", "stages_s": {"preprocess": 1.650000922381878e-05, "rewrite": 1.8503554000053555, "structural": 4.999979864805937e-06, "probe": 0.1099030000041239, "retrieval": 0.4038925000349991, "selection": 3.962773200008087, "implication_expansion": 4.18000272475183e-05, "prompt_composition": 3.519997699186206e-05, "group_display": 0.0322487999801524}, "total_s": 6.370800500037149, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 39 |
+
{"timestamp_utc": "2026-03-07T16:45:56Z", "stages_s": {"preprocess": 3.549997927621007e-05, "rewrite": 2.710496100015007, "structural": 0.1680390000110492, "probe": 4.699977580457926e-06, "retrieval": 1.825858500029426, "selection": 3.9926271999720484, "implication_expansion": 3.15000070258975e-05, "prompt_composition": 5.5100012104958296e-05, "group_display": 0.03422769997268915}, "total_s": 8.741921000008006, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 40 |
+
{"timestamp_utc": "2026-03-07T16:46:30Z", "stages_s": {"preprocess": 3.660004585981369e-05, "rewrite": 3.5675273000379093, "structural": 1.2902518999762833, "probe": 4.299974534660578e-06, "retrieval": 0.6466077999793924, "selection": 0.9123659000033513, "implication_expansion": 2.010003663599491e-05, "prompt_composition": 2.239999594166875e-05, "group_display": 0.031055399973411113}, "total_s": 6.457136899989564, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 41 |
+
{"timestamp_utc": "2026-03-07T16:52:11Z", "stages_s": {"preprocess": 9.279994992539287e-05, "rewrite": 2.4995197000098415, "structural": 0.650494700006675, "probe": 0.13954110001213849, "retrieval": 2.8580662000458688, "selection": 4.111845000006724, "implication_expansion": 0.2978370999917388, "prompt_composition": 2.719997428357601e-05, "group_display": 0.08794649998890236}, "total_s": 10.679650100006256, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 42 |
+
{"timestamp_utc": "2026-03-07T16:54:21Z", "stages_s": {"preprocess": 2.6500027161091566e-05, "rewrite": 3.775604799971916, "structural": 4.799978341907263e-06, "probe": 2.7999631129205227e-06, "retrieval": 0.660035600012634, "selection": 2.061849199992139, "implication_expansion": 2.629996743053198e-05, "prompt_composition": 2.95999925583601e-05, "group_display": 0.03182149998610839}, "total_s": 6.542931500007398, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 43 |
+
{"timestamp_utc": "2026-03-07T16:56:48Z", "stages_s": {"preprocess": 1.1800031643360853e-05, "rewrite": 1.6858424000092782, "structural": 0.3115612000110559, "probe": 3.600027412176132e-06, "retrieval": 0.18196690001059324, "selection": 0.8783706999965943, "implication_expansion": 1.3899989426136017e-05, "prompt_composition": 1.1400028597563505e-05, "group_display": 0.026653499982785434}, "total_s": 3.09556500002509, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 44 |
+
{"timestamp_utc": "2026-03-07T17:00:41Z", "stages_s": {"preprocess": 7.049995474517345e-05, "rewrite": 2.107173199998215, "structural": 0.7115459000342526, "probe": 0.7592908999649808, "retrieval": 2.065408500027843, "selection": 0.881550399994012, "implication_expansion": 0.26592110004276037, "prompt_composition": 1.4399993233382702e-05, "group_display": 0.07751559995813295}, "total_s": 6.901116500026546, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 45 |
+
{"timestamp_utc": "2026-03-07T17:29:51Z", "stages_s": {"preprocess": 8.860003435984254e-05, "rewrite": 2.1579742000321858, "structural": 0.4430788999889046, "probe": 1.0411412000539713, "retrieval": 2.0500706000020728, "selection": 1.186861299967859, "implication_expansion": 0.26447719999123365, "prompt_composition": 1.5300000086426735e-05, "group_display": 0.08299089997308329}, "total_s": 7.258497399976477, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 46 |
+
{"timestamp_utc": "2026-03-07T17:35:31Z", "stages_s": {"preprocess": 1.009996049106121e-05, "rewrite": 2.5867222999804653, "structural": 1.5706295000272803, "probe": 0.5591535000130534, "retrieval": 0.09234020003350452, "selection": 17.545875800016802, "implication_expansion": 3.190001007169485e-05, "prompt_composition": 1.3300043065100908e-05, "group_display": 0.0324972000089474}, "total_s": 22.399744599999394, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 47 |
+
{"timestamp_utc": "2026-03-07T17:38:38Z", "stages_s": {"preprocess": 7.449998520314693e-05, "rewrite": 2.332632600038778, "structural": 0.7940433000330813, "probe": 0.8119671999593265, "retrieval": 2.234404999995604, "selection": 0.7451644000248052, "implication_expansion": 0.27489820000482723, "prompt_composition": 1.6200006939470768e-05, "group_display": 0.07717659999616444}, "total_s": 7.304320800001733, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 48 |
+
{"timestamp_utc": "2026-03-07T17:42:01Z", "stages_s": {"preprocess": 8.510000770911574e-05, "rewrite": 1.867464300012216, "structural": 1.647069400001783, "probe": 4.299974534660578e-06, "retrieval": 2.395426100003533, "selection": 1.200732600002084, "implication_expansion": 0.33177699998486787, "prompt_composition": 2.2199994418770075e-05, "group_display": 0.09814279997954145}, "total_s": 7.579292399983387, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 49 |
+
{"timestamp_utc": "2026-03-07T17:48:44Z", "stages_s": {"preprocess": 8.590001380071044e-05, "rewrite": 2.0132066000369377, "structural": 1.060188300034497, "probe": 0.4243395999656059, "retrieval": 2.132219100021757, "selection": 1.1945027000037953, "implication_expansion": 0.2687906000064686, "prompt_composition": 1.9000028260052204e-05, "group_display": 0.07895809999899939}, "total_s": 7.206461600027978, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 50 |
+
{"timestamp_utc": "2026-03-07T17:54:46Z", "stages_s": {"preprocess": 0.00010950001887977123, "rewrite": 1.3990224999724887, "structural": 2.04190930002369, "probe": 1.069002999982331, "retrieval": 2.1156343999900855, "selection": 1.9406172999879345, "implication_expansion": 0.2861632999847643, "prompt_composition": 1.6000005416572094e-05, "group_display": 0.09219529997790232}, "total_s": 8.978390700009186, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 51 |
+
{"timestamp_utc": "2026-03-07T17:54:54Z", "stages_s": {"preprocess": 2.6899971999228e-05, "rewrite": 1.5525400000042282, "structural": 3.999972250312567e-06, "probe": 2.1999585442245007e-06, "retrieval": 0.10463370004436001, "selection": 1.4364217999973334, "implication_expansion": 1.999997766688466e-05, "prompt_composition": 1.4999997802078724e-05, "group_display": 0.032557499944232404}, "total_s": 3.138574599986896, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 52 |
+
{"timestamp_utc": "2026-03-07T17:58:13Z", "stages_s": {"preprocess": 0.00011359999189153314, "rewrite": 2.2580789999919944, "structural": 1.3124472000054084, "probe": 3.5999692045152187e-06, "retrieval": 2.127778899972327, "selection": 1.0589646000298671, "implication_expansion": 0.29374579997966066, "prompt_composition": 1.4899997040629387e-05, "group_display": 0.08566429995698854}, "total_s": 7.171340400003828, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 53 |
+
{"timestamp_utc": "2026-03-07T18:01:07Z", "stages_s": {"preprocess": 7.969996659085155e-05, "rewrite": 1.745156999968458, "structural": 2.802765399974305, "probe": 0.9890876000281423, "retrieval": 2.023351099982392, "selection": 0.5886470000259578, "implication_expansion": 0.2654718999983743, "prompt_composition": 1.8899969290941954e-05, "group_display": 0.07822809997014701}, "total_s": 8.527449000044726, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 54 |
+
{"timestamp_utc": "2026-03-07T18:03:23Z", "stages_s": {"preprocess": 8.140003774315119e-05, "rewrite": 1.4975244000088423, "structural": 1.186394400021527, "probe": 0.6484065999975428, "retrieval": 2.452991199970711, "selection": 4.813816999958362, "implication_expansion": 0.1623875999939628, "prompt_composition": 4.579999949783087e-05, "group_display": 0.0797012000111863}, "total_s": 10.879897099977825, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 55 |
+
{"timestamp_utc": "2026-03-07T18:06:46Z", "stages_s": {"preprocess": 9.800016414374113e-06, "rewrite": 1.3867014999850653, "structural": 0.39007520000450313, "probe": 5.211147500027437, "retrieval": 0.17631519999122247, "selection": 6.730482600047253, "implication_expansion": 1.7700018361210823e-05, "prompt_composition": 1.2499978765845299e-05, "group_display": 0.031001800030935556}, "total_s": 13.936744500009809, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 56 |
+
{"timestamp_utc": "2026-03-07T18:07:41Z", "stages_s": {"preprocess": 1.0100018698722124e-05, "rewrite": 1.0803023999906145, "structural": 0.15050079999491572, "probe": 4.7244069000007585, "retrieval": 0.1667132000438869, "selection": 0.9119019000208937, "implication_expansion": 1.8200022168457508e-05, "prompt_composition": 1.4199991710484028e-05, "group_display": 0.027934999961871654}, "total_s": 7.071552800014615, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 57 |
+
{"timestamp_utc": "2026-03-07T18:09:40Z", "stages_s": {"preprocess": 1.5700003132224083e-05, "rewrite": 1.418132099963259, "structural": 1.423503800004255, "probe": 4.57354850001866, "retrieval": 0.10137270000996068, "selection": 1.4030677999835461, "implication_expansion": 2.789997961372137e-05, "prompt_composition": 1.8000020645558834e-05, "group_display": 0.03112399997189641}, "total_s": 8.961162999970838, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 58 |
+
{"timestamp_utc": "2026-03-07T18:11:33Z", "stages_s": {"preprocess": 8.610001532360911e-05, "rewrite": 1.9479332999908365, "structural": 1.865794699988328, "probe": 0.06017700000666082, "retrieval": 2.0440989999915473, "selection": 2.4227961000287905, "implication_expansion": 0.28120840003248304, "prompt_composition": 2.069998299703002e-05, "group_display": 0.0792113000061363}, "total_s": 8.734649899997748, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 59 |
+
{"timestamp_utc": "2026-03-07T18:17:24Z", "stages_s": {"preprocess": 0.00012939999578520656, "rewrite": 3.181579700030852, "structural": 0.45629230001941323, "probe": 4.243250800005626, "retrieval": 2.128536299976986, "selection": 4.2171271000406705, "implication_expansion": 0.29443830001400784, "prompt_composition": 2.110004425048828e-05, "group_display": 0.0817057000240311}, "total_s": 14.642313299991656, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 60 |
+
{"timestamp_utc": "2026-03-07T18:20:15Z", "stages_s": {"preprocess": 9.400013368576765e-06, "rewrite": 0.9211662000161596, "structural": 0.49053100001765415, "probe": 0.4501308999606408, "retrieval": 0.08053859998472035, "selection": 1.9059181000338867, "implication_expansion": 3.290001768618822e-05, "prompt_composition": 1.8200022168457508e-05, "group_display": 0.0299964000005275}, "total_s": 3.888701299962122, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
mascotimages/transparentsquirrel.png
ADDED
|
Git LFS Details
|
psq_rag/parsing/prompt_grammar.py
CHANGED
|
@@ -1,60 +1,23 @@
|
|
| 1 |
-
import
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
grammar
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
parser = Lark(grammar, start=
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
def
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
if isinstance(node, Token) and node.type == '__ANON_1':
|
| 25 |
-
tag_position = node.start_pos
|
| 26 |
-
tag_text = node.value
|
| 27 |
-
tags_with_positions.append((tag_text, tag_position, "tag"))
|
| 28 |
-
elif not isinstance(node, Token):
|
| 29 |
-
for child in node.children:
|
| 30 |
-
_traverse(child)
|
| 31 |
-
_traverse(tree)
|
| 32 |
-
return tags_with_positions
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def build_tag_offsets_dicts(new_image_tags_with_positions):
|
| 37 |
-
# Structure the data for HighlightedText
|
| 38 |
-
tag_data = []
|
| 39 |
-
for tag_text, start_pos, nodetype in new_image_tags_with_positions:
|
| 40 |
-
# Modify the tag
|
| 41 |
-
modified_tag = tag_text.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip()
|
| 42 |
-
artist_matrix_tag = tag_text.replace('_', ' ').replace('\\(', '\(').replace('\\)', '\)').strip()
|
| 43 |
-
tf_idf_matrix_tag = re.sub(r'\\([()])', r'\1', re.sub(r' ', '_', tag_text.strip().removeprefix('by ').removeprefix('by_')))
|
| 44 |
-
# Calculate the end position based on the original tag length
|
| 45 |
-
end_pos = start_pos + len(tag_text)
|
| 46 |
-
# Append the structured data for each tag
|
| 47 |
-
tag_data.append({
|
| 48 |
-
"original_tag": tag_text,
|
| 49 |
-
"start_pos": start_pos,
|
| 50 |
-
"end_pos": end_pos,
|
| 51 |
-
"modified_tag": modified_tag,
|
| 52 |
-
"artist_matrix_tag": artist_matrix_tag,
|
| 53 |
-
"tf_idf_matrix_tag": tf_idf_matrix_tag,
|
| 54 |
-
"node_type": nodetype
|
| 55 |
-
})
|
| 56 |
-
return tag_data
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
if __name__ == "__main__":
|
| 60 |
-
print("prompt_grammar.py imports ok")
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from lark import Lark
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# Minimal prompt grammar kept for import compatibility and simple parsing use.
|
| 7 |
+
grammar = r"""
|
| 8 |
+
!start: (prompt | /[][():]/+)*
|
| 9 |
+
prompt: (emphasized | plain | comma | WHITESPACE)*
|
| 10 |
+
!emphasized: "(" prompt ")"
|
| 11 |
+
| "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")"
|
| 12 |
+
comma: ","
|
| 13 |
+
WHITESPACE: /\s+/
|
| 14 |
+
plain: /([^,\\\[\]():|]|\\.)+/
|
| 15 |
+
%import common.SIGNED_NUMBER -> NUMBER
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
parser = Lark(grammar, start="start")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def parse_prompt(text: str):
|
| 22 |
+
return parser.parse(text or "")
|
| 23 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
psq_rag/retrieval/psq_retrieval.py
CHANGED
|
@@ -8,7 +8,6 @@ import pathlib
|
|
| 8 |
import re
|
| 9 |
from collections import Counter, OrderedDict
|
| 10 |
from dataclasses import dataclass
|
| 11 |
-
from itertools import islice
|
| 12 |
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
|
| 13 |
|
| 14 |
import numpy as np
|
|
@@ -42,14 +41,6 @@ def _norm_tag_for_lookup(s: str) -> str:
|
|
| 42 |
return s.replace(' ', '_').replace('\\(', '(').replace('\\)', ')')
|
| 43 |
|
| 44 |
|
| 45 |
-
special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9", "rating:s", "rating:q", "rating:e"]
|
| 46 |
-
def remove_special_tags(original_string):
|
| 47 |
-
tags = [tag.strip() for tag in original_string.split(",")]
|
| 48 |
-
remaining_tags = [tag for tag in tags if tag not in special_tags]
|
| 49 |
-
removed_tags = [tag for tag in tags if tag in special_tags]
|
| 50 |
-
return ", ".join(remaining_tags), removed_tags
|
| 51 |
-
|
| 52 |
-
|
| 53 |
def construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index):
|
| 54 |
cols, data = [], []
|
| 55 |
for term, w in pseudo_doc_terms.items():
|
|
@@ -121,23 +112,6 @@ def get_tfidf_reduced_similar_tags(pseudo_doc_terms, allow_nsfw_tags):
|
|
| 121 |
return transformed_sorted_tag_similarity_dict
|
| 122 |
|
| 123 |
|
| 124 |
-
def psq_candidates_from_terms(terms: Sequence[str], *, allow_nsfw_tags: bool, k: int = 300):
|
| 125 |
-
cand_dict = get_tfidf_reduced_similar_tags(dict(Counter(terms)), allow_nsfw_tags)
|
| 126 |
-
candidates = list(islice(cand_dict.items(), k))
|
| 127 |
-
tag_counts = get_tag_counts()
|
| 128 |
-
return [
|
| 129 |
-
Candidate(
|
| 130 |
-
tag=tag,
|
| 131 |
-
score_combined=float(score),
|
| 132 |
-
score_fasttext=None,
|
| 133 |
-
score_context=None,
|
| 134 |
-
count=tag_counts.get(tag),
|
| 135 |
-
sources=[],
|
| 136 |
-
)
|
| 137 |
-
for tag, score in candidates
|
| 138 |
-
]
|
| 139 |
-
|
| 140 |
-
|
| 141 |
def psq_candidates_from_rewrite_phrases(
|
| 142 |
rewrite_phrases: Sequence[str],
|
| 143 |
*,
|
|
|
|
| 8 |
import re
|
| 9 |
from collections import Counter, OrderedDict
|
| 10 |
from dataclasses import dataclass
|
|
|
|
| 11 |
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
|
| 12 |
|
| 13 |
import numpy as np
|
|
|
|
| 41 |
return s.replace(' ', '_').replace('\\(', '(').replace('\\)', ')')
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
def construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index):
|
| 45 |
cols, data = [], []
|
| 46 |
for term, w in pseudo_doc_terms.items():
|
|
|
|
| 112 |
return transformed_sorted_tag_similarity_dict
|
| 113 |
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
def psq_candidates_from_rewrite_phrases(
|
| 116 |
rewrite_phrases: Sequence[str],
|
| 117 |
*,
|
psq_rag/retrieval/state.py
CHANGED
|
@@ -18,7 +18,6 @@ TFIDF_PATH = pathlib.Path("tf_idf_files_420.joblib")
|
|
| 18 |
NSFW_CSV_PATH = pathlib.Path("word_rating_probabilities.csv")
|
| 19 |
NSFW_THRESHOLD = 0.95
|
| 20 |
|
| 21 |
-
HNSW_ART_PATH = pathlib.Path("tfidf_hnsw_artists.bin")
|
| 22 |
HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
|
| 23 |
FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
|
| 24 |
TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
|
|
@@ -37,9 +36,7 @@ _tag_implications: Optional[Dict[str, List[str]]] = None
|
|
| 37 |
|
| 38 |
|
| 39 |
_hnsw_tag_index: Optional["hnswlib.Index"] = None
|
| 40 |
-
_hnsw_artist_index: Optional["hnswlib.Index"] = None
|
| 41 |
_hnsw_tag_count: int = 0
|
| 42 |
-
_hnsw_artist_count: int = 0
|
| 43 |
|
| 44 |
# Tag type names inferred from e621 wiki documentation.
|
| 45 |
# Numeric IDs come from fluffyrock_3m.csv column 1; mapping is heuristic but
|
|
@@ -167,10 +164,6 @@ def get_artist_set() -> Set[str]:
|
|
| 167 |
return _artist_set
|
| 168 |
|
| 169 |
|
| 170 |
-
def is_artist(name: str) -> bool:
|
| 171 |
-
return name in get_artist_set()
|
| 172 |
-
|
| 173 |
-
|
| 174 |
def get_fasttext_model() -> Any:
|
| 175 |
global _fasttext_model
|
| 176 |
if _fasttext_model is not None:
|
|
@@ -380,18 +373,6 @@ def get_tfidf_tag_vectors() -> Dict[str, Any]:
|
|
| 380 |
}
|
| 381 |
return _tfidf_tag_vectors
|
| 382 |
|
| 383 |
-
|
| 384 |
-
def retrieval_assets_status() -> Dict[str, bool]:
|
| 385 |
-
return {
|
| 386 |
-
"tfidf": TFIDF_PATH.is_file(),
|
| 387 |
-
"nsfw_csv": NSFW_CSV_PATH.is_file(),
|
| 388 |
-
"fasttext_model": FASTTEXT_MODEL_PATH.is_file(),
|
| 389 |
-
"tag_aliases_csv": TAG_ALIASES_PATH.is_file(),
|
| 390 |
-
"hnsw_tags": HNSW_TAG_PATH.is_file(),
|
| 391 |
-
"hnsw_artists": HNSW_ART_PATH.is_file(),
|
| 392 |
-
}
|
| 393 |
-
|
| 394 |
-
|
| 395 |
def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, dim: int) -> "hnswlib.Index":
|
| 396 |
idx = hnswlib.Index(space="cosine", dim=dim)
|
| 397 |
need_build = True
|
|
@@ -425,51 +406,26 @@ def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, di
|
|
| 425 |
return idx
|
| 426 |
|
| 427 |
|
| 428 |
-
def _ensure_hnsw_indexes(
|
| 429 |
-
global _hnsw_tag_index,
|
| 430 |
|
| 431 |
if hnswlib is None:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
if _hnsw_tag_index is not None
|
| 435 |
return
|
| 436 |
|
| 437 |
components = get_tfidf_components()
|
| 438 |
reduced_matrix = components["reduced_matrix"]
|
| 439 |
-
row_to_tag = components["row_to_tag"]
|
| 440 |
rm = _l2_normalize_rows(reduced_matrix).astype(np.float32)
|
| 441 |
n_items, dim = rm.shape
|
| 442 |
|
| 443 |
-
|
| 444 |
-
artist_rows: list[int] = []
|
| 445 |
-
tag_rows: list[int] = []
|
| 446 |
-
|
| 447 |
-
for i in range(n_items):
|
| 448 |
-
tag = row_to_tag.get(i, "")
|
| 449 |
-
base = tag[3:] if tag.startswith("by_") else tag
|
| 450 |
-
|
| 451 |
-
if tag in {"by_unknown_artist", "by_conditional_dnp"}:
|
| 452 |
-
tag_rows.append(i)
|
| 453 |
-
continue
|
| 454 |
-
|
| 455 |
-
if artist_set and is_artist(base):
|
| 456 |
-
artist_rows.append(i)
|
| 457 |
-
else:
|
| 458 |
-
tag_rows.append(i)
|
| 459 |
|
| 460 |
_hnsw_tag_index = _build_or_load_index(HNSW_TAG_PATH, tag_rows, rm, dim)
|
| 461 |
_hnsw_tag_count = len(tag_rows)
|
| 462 |
|
| 463 |
-
if need_artists:
|
| 464 |
-
_hnsw_artist_index = _build_or_load_index(HNSW_ART_PATH, artist_rows, rm, dim)
|
| 465 |
-
_hnsw_artist_count = len(artist_rows)
|
| 466 |
-
|
| 467 |
|
| 468 |
def get_hnsw_tag_index() -> Tuple[Optional["hnswlib.Index"], int]:
|
| 469 |
-
_ensure_hnsw_indexes(
|
| 470 |
return _hnsw_tag_index, _hnsw_tag_count
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
def get_hnsw_artist_index() -> Tuple[Optional["hnswlib.Index"], int]:
|
| 474 |
-
_ensure_hnsw_indexes(need_artists=True)
|
| 475 |
-
return _hnsw_artist_index, _hnsw_artist_count
|
|
|
|
| 18 |
NSFW_CSV_PATH = pathlib.Path("word_rating_probabilities.csv")
|
| 19 |
NSFW_THRESHOLD = 0.95
|
| 20 |
|
|
|
|
| 21 |
HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
|
| 22 |
FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
|
| 23 |
TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
_hnsw_tag_index: Optional["hnswlib.Index"] = None
|
|
|
|
| 39 |
_hnsw_tag_count: int = 0
|
|
|
|
| 40 |
|
| 41 |
# Tag type names inferred from e621 wiki documentation.
|
| 42 |
# Numeric IDs come from fluffyrock_3m.csv column 1; mapping is heuristic but
|
|
|
|
| 164 |
return _artist_set
|
| 165 |
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
def get_fasttext_model() -> Any:
|
| 168 |
global _fasttext_model
|
| 169 |
if _fasttext_model is not None:
|
|
|
|
| 373 |
}
|
| 374 |
return _tfidf_tag_vectors
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, dim: int) -> "hnswlib.Index":
|
| 377 |
idx = hnswlib.Index(space="cosine", dim=dim)
|
| 378 |
need_build = True
|
|
|
|
| 406 |
return idx
|
| 407 |
|
| 408 |
|
| 409 |
+
def _ensure_hnsw_indexes() -> None:
|
| 410 |
+
global _hnsw_tag_index, _hnsw_tag_count
|
| 411 |
|
| 412 |
if hnswlib is None:
|
| 413 |
return
|
| 414 |
|
| 415 |
+
if _hnsw_tag_index is not None:
|
| 416 |
return
|
| 417 |
|
| 418 |
components = get_tfidf_components()
|
| 419 |
reduced_matrix = components["reduced_matrix"]
|
|
|
|
| 420 |
rm = _l2_normalize_rows(reduced_matrix).astype(np.float32)
|
| 421 |
n_items, dim = rm.shape
|
| 422 |
|
| 423 |
+
tag_rows = list(range(n_items))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
_hnsw_tag_index = _build_or_load_index(HNSW_TAG_PATH, tag_rows, rm, dim)
|
| 426 |
_hnsw_tag_count = len(tag_rows)
|
| 427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
def get_hnsw_tag_index() -> Tuple[Optional["hnswlib.Index"], int]:
|
| 430 |
+
_ensure_hnsw_indexes()
|
| 431 |
return _hnsw_tag_index, _hnsw_tag_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
psq_rag/tagging/categorized_suggestions.py
CHANGED
|
@@ -205,25 +205,3 @@ def generate_categorized_suggestions(
|
|
| 205 |
categories=categories,
|
| 206 |
)
|
| 207 |
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
def get_category_suggestions_dict(
|
| 211 |
-
categorized: CategorizedTagSuggestions
|
| 212 |
-
) -> Dict[str, List[str]]:
|
| 213 |
-
"""
|
| 214 |
-
Get simple dict of category -> suggested tags (without scores).
|
| 215 |
-
|
| 216 |
-
Args:
|
| 217 |
-
categorized: The categorized suggestions
|
| 218 |
-
|
| 219 |
-
Returns:
|
| 220 |
-
Dict mapping category_name -> [tag1, tag2, ...]
|
| 221 |
-
"""
|
| 222 |
-
result = {}
|
| 223 |
-
|
| 224 |
-
for cat_name, cat_sugg in categorized.by_category.items():
|
| 225 |
-
result[cat_name] = [tag for tag, _ in cat_sugg.suggestions]
|
| 226 |
-
|
| 227 |
-
result['other'] = [tag for tag, _ in categorized.other_suggestions]
|
| 228 |
-
|
| 229 |
-
return result
|
|
|
|
| 205 |
categories=categories,
|
| 206 |
)
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
psq_rag/ui/group_ranked_display.py
CHANGED
|
@@ -304,42 +304,3 @@ def rank_groups_from_tfidf(
|
|
| 304 |
|
| 305 |
rows_out.sort(key=lambda r: r.expected_count, reverse=True)
|
| 306 |
return rows_out[: max(1, int(top_groups))]
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
def _fmt_tag_cell(tag: str, p: float) -> str:
|
| 310 |
-
safe_tag = tag.replace("|", "\\|")
|
| 311 |
-
return f"`{safe_tag}` (p={p:.2f}, E={p:.2f})"
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
def render_group_rankings_markdown(
|
| 315 |
-
seed_terms: Sequence[str],
|
| 316 |
-
*,
|
| 317 |
-
top_groups: int,
|
| 318 |
-
top_tags_per_group: int,
|
| 319 |
-
group_rank_top_k: int,
|
| 320 |
-
) -> str:
|
| 321 |
-
rows = rank_groups_from_tfidf(
|
| 322 |
-
seed_terms,
|
| 323 |
-
top_groups=top_groups,
|
| 324 |
-
top_tags_per_group=top_tags_per_group,
|
| 325 |
-
group_rank_top_k=group_rank_top_k,
|
| 326 |
-
)
|
| 327 |
-
if not rows:
|
| 328 |
-
return "No ranked group display available (insufficient TF-IDF context)."
|
| 329 |
-
|
| 330 |
-
k = max(1, int(top_tags_per_group))
|
| 331 |
-
headers = ["Group/Category", f"Expected Tags (top {max(1, int(group_rank_top_k))})"]
|
| 332 |
-
headers.extend([f"Tag {i}" for i in range(1, k + 1)])
|
| 333 |
-
table = [
|
| 334 |
-
"| " + " | ".join(headers) + " |",
|
| 335 |
-
"| " + " | ".join(["---"] * len(headers)) + " |",
|
| 336 |
-
]
|
| 337 |
-
|
| 338 |
-
for row in rows:
|
| 339 |
-
cells = [row.group_name, f"{row.expected_count:.2f}"]
|
| 340 |
-
tag_cells = [_fmt_tag_cell(tag, p) for tag, p in row.tags]
|
| 341 |
-
if len(tag_cells) < k:
|
| 342 |
-
tag_cells.extend([""] * (k - len(tag_cells)))
|
| 343 |
-
cells.extend(tag_cells)
|
| 344 |
-
table.append("| " + " | ".join(cells) + " |")
|
| 345 |
-
return "\n".join(table)
|
|
|
|
| 304 |
|
| 305 |
rows_out.sort(key=lambda r: r.expected_count, reverse=True)
|
| 306 |
return rows_out[: max(1, int(top_groups))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|