Food Desert commited on
Commit
82fe126
·
1 Parent(s): 33fc1b0

Polish Gradio UI layout/tooltips and remove dead helper code

Browse files
app.py CHANGED
@@ -1,21 +1,27 @@
1
- import gradio as gr
2
- import os
3
- import logging
4
- import time
5
- import json
6
- import csv
7
- from datetime import datetime
8
- from functools import lru_cache
9
- from PIL import Image
10
- from pathlib import Path
11
- from typing import Any, Dict, List, Set, Tuple
 
12
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
13
 
14
  from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
15
  from psq_rag.llm.rewrite import llm_rewrite_prompt
16
  from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
17
  from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
18
- from psq_rag.retrieval.state import expand_tags_via_implications, get_tag_type_name, get_tag_implications
 
 
 
 
 
19
  from psq_rag.ui.group_ranked_display import rank_groups_from_tfidf, _load_enabled_groups
20
 
21
 
@@ -46,6 +52,15 @@ def _display_tag_text(tag: str) -> str:
46
  return tag.replace("_", " ")
47
 
48
 
 
 
 
 
 
 
 
 
 
49
  def _normalize_selection_origin(origin: str) -> str:
50
  o = (origin or "").strip().lower()
51
  if o in {"rewrite", "selection", "probe", "structural", "user", "candidate"}:
@@ -53,11 +68,53 @@ def _normalize_selection_origin(origin: str) -> str:
53
  return "selection"
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def _choice_label_with_source_meta(tag: str, *, origin: str, preselected: bool) -> str:
57
- # Marker is stripped client-side and converted into data attributes for CSS-driven colors.
58
  origin_norm = _normalize_selection_origin(origin)
59
  pre = "1" if preselected else "0"
60
- return f"{_display_tag_text(tag)} [[psq:{origin_norm}:{pre}]]"
 
 
 
 
61
 
62
 
63
  def _selection_source_rank(origin: str) -> int:
@@ -246,6 +303,7 @@ def _build_toggle_rows(
246
  *,
247
  seed_terms: List[str],
248
  selected_tags: List[str],
 
249
  tag_selection_origins: Dict[str, str],
250
  implied_parent_map: Dict[str, str],
251
  top_groups: int,
@@ -267,18 +325,72 @@ def _build_toggle_rows(
267
  )
268
  )
269
  selected_index: Dict[str, int] = {t: i for i, t in enumerate(selected_active)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- row_defs: List[Dict[str, Any]] = []
272
- displayed_group_names = [r.group_name for r in ranked_rows]
273
- displayed_group_tag_sets: Dict[str, Set[str]] = {
274
- name: {t for t in groups_map.get(name, []) if not _is_artist_tag(t)}
275
- for name in displayed_group_names
276
- }
277
- tags_in_any_displayed_group: Set[str] = set()
278
- for tag_set in displayed_group_tag_sets.values():
279
- tags_in_any_displayed_group.update(tag_set)
280
-
281
- selected_other_raw = [t for t in selected_active if t not in tags_in_any_displayed_group]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  selected_other = _order_selected_tags_for_row(
283
  row_selected_tags=selected_other_raw,
284
  selected_index=selected_index,
@@ -295,7 +407,7 @@ def _build_toggle_rows(
295
  row_defs.append(
296
  {
297
  "name": "selected_other",
298
- "label": "Selected (Other)",
299
  "tags": selected_other,
300
  "tag_meta": selected_other_meta,
301
  }
@@ -329,16 +441,20 @@ def _build_toggle_rows(
329
  row_defs.append(
330
  {
331
  "name": group_name,
332
- "label": f"{group_name} (E={row.expected_count:.2f})",
333
  "tags": merged,
334
  "tag_meta": tag_meta,
335
  }
336
  )
337
-
338
- return row_defs
 
 
 
 
339
 
340
 
341
- def _build_display_audit_line(
342
  row_defs: List[Dict[str, Any]],
343
  *,
344
  active_selected_tags: List[str],
@@ -366,12 +482,14 @@ def _build_display_audit_line(
366
  row_name = row.get("name", "")
367
  row_label = row.get("label", row_name)
368
  for tag in row.get("tags", []):
369
- rec = info_by_tag.setdefault(tag, {"rows": [], "sources": set()})
370
- rec["rows"].append(row_label)
371
- if row_name == "selected_other":
372
- rec["sources"].add("selected_other_row")
373
- else:
374
- rec["sources"].add("ranked_group_row")
 
 
375
  if tag in active_set:
376
  rec["sources"].add("selected_active")
377
  if tag in direct_set:
@@ -390,12 +508,12 @@ def _build_display_audit_line(
390
  for tag, rec in sorted(info_by_tag.items())
391
  ],
392
  }
393
- return "Display Tag Audit: " + json.dumps(payload, ensure_ascii=True)
394
-
395
-
396
- def _build_row_component_updates(
397
- row_defs: List[Dict[str, Any]],
398
- selected_tags: List[str],
399
  max_rows: int,
400
  ):
401
  selected = {t for t in (selected_tags or []) if t}
@@ -410,7 +528,7 @@ def _build_row_component_updates(
410
  values = [t for t in tags if t in selected]
411
  row_values_state.append(values)
412
  visible = bool(tags)
413
- header_updates.append(gr.update(value=f"**{row.get('label', '')}**", visible=visible))
414
  tag_meta = row.get("tag_meta", {}) if isinstance(row.get("tag_meta", {}), dict) else {}
415
  choices = []
416
  for t in tags:
@@ -442,8 +560,9 @@ def _on_toggle_row(
442
  max_rows: int,
443
  ):
444
  row_defs = row_defs_state or []
 
445
  selected = set(selected_tags_state or [])
446
- row = row_defs[row_idx] if 0 <= row_idx < len(row_defs) else {}
447
  row_tags = list(dict.fromkeys(row.get("tags", [])))
448
  row_tag_set = set(row_tags)
449
  row_tag_by_norm = {_norm_tag_for_lookup(t): t for t in row_tags}
@@ -459,16 +578,32 @@ def _on_toggle_row(
459
  if mapped:
460
  new_set.add(mapped)
461
 
462
- prev_row_selected = {t for t in selected if t in row_tag_set}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  selected.difference_update(row_tag_set)
464
  selected.update(new_set)
465
  toggled_tags = prev_row_selected ^ new_set
466
 
467
- # Recompute row selections, but only push UI updates to rows touched by the toggled tags.
468
  new_row_values_state: List[List[str]] = []
469
  affected_rows: Set[int] = {row_idx}
470
- for idx, row in enumerate(row_defs):
471
- tags = list(dict.fromkeys(row.get("tags", [])))
472
  values = [t for t in tags if t in selected]
473
  new_row_values_state.append(values)
474
  if toggled_tags and any(t in toggled_tags for t in tags):
@@ -476,40 +611,60 @@ def _on_toggle_row(
476
 
477
  checkbox_updates = []
478
  for idx in range(max_rows):
479
- if idx < len(row_defs) and idx in affected_rows:
 
 
 
480
  checkbox_updates.append(gr.update(value=new_row_values_state[idx]))
481
  else:
482
- checkbox_updates.append(gr.update())
483
 
484
- prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs)
485
  return [sorted(selected), new_row_values_state, prompt_text, *checkbox_updates]
486
 
487
 
488
- def _build_ui_payload(
489
- *,
490
- console_text: str,
491
- legacy_prompt_text: str,
492
- row_defs: List[Dict[str, Any]],
493
- selected_tags: List[str],
494
- ):
495
  prompt_text, row_values_state, header_updates, checkbox_updates = _build_row_component_updates(
496
  row_defs=row_defs,
497
  selected_tags=selected_tags,
498
  max_rows=display_max_rows_default,
499
  )
500
- return [
501
- console_text,
502
- legacy_prompt_text,
503
- prompt_text,
504
- sorted(set(selected_tags or [])),
505
- row_defs,
506
- row_values_state,
507
- *header_updates,
508
  *checkbox_updates,
509
- ]
510
-
511
-
512
- def _build_selection_query(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
  prompt_in: str,
514
  rewritten: str,
515
  structural_tags: List[str],
@@ -799,42 +954,112 @@ css = """
799
  .source-legend {
800
  display: flex;
801
  flex-wrap: wrap;
 
802
  gap: 8px;
803
  margin: 4px 0 10px 0;
804
  }
805
 
 
 
 
 
 
 
 
806
  .source-legend .chip {
807
  display: inline-flex;
808
  align-items: center;
809
- gap: 8px;
810
- border-radius: 999px;
811
- border: 1px solid #8792a2;
812
- padding: 5px 10px;
813
  font-size: 0.85rem;
814
- font-weight: 700;
815
- color: #1f2430;
816
  background: #f3f6fb;
817
  }
818
 
819
- .source-legend .swatch {
820
- width: 12px;
821
- height: 12px;
822
- border-radius: 50%;
823
- border: 1px solid rgba(0,0,0,0.2);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
824
  }
825
 
826
- .source-legend .rewrite { background: #26b9a3; }
827
- .source-legend .selection { background: #f0a93c; }
828
- .source-legend .probe { background: #9a6cff; }
829
- .source-legend .structural { background: #53c368; }
830
- .source-legend .implied { background: #a8b3c4; }
831
- .source-legend .user { background: #4f86ff; }
832
- .source-legend .unselected { background: #c7ced8; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
  """
834
 
835
  client_js = """
836
  () => {
837
- const markerRe = /\\s*\\[\\[psq:([a-z_]+):(0|1)\\]\\]\\s*$/;
 
 
 
 
 
 
 
 
 
 
838
  const applyTagMeta = () => {
839
  const labels = document.querySelectorAll(".lego-tags label");
840
  labels.forEach((label) => {
@@ -845,6 +1070,14 @@ client_js = """
845
  if (!match) return;
846
  label.dataset.psqOrigin = match[1];
847
  label.dataset.psqPreselected = match[2];
 
 
 
 
 
 
 
 
848
  span.textContent = text.replace(markerRe, "");
849
  });
850
  };
@@ -939,12 +1172,11 @@ def rag_pipeline_ui(
939
  log("Start: received prompt")
940
  prompt_in = (user_prompt or "").strip()
941
  if not prompt_in:
942
- return _build_ui_payload(
943
- console_text="Error: empty prompt",
944
- legacy_prompt_text="",
945
- row_defs=[],
946
- selected_tags=[],
947
- )
948
 
949
  log("Input:")
950
  log(prompt_in)
@@ -1001,10 +1233,10 @@ def rag_pipeline_ui(
1001
 
1002
 
1003
  log("Step 2: Prompt Squirrel retrieval (hidden)")
1004
- try:
1005
- t0 = time.perf_counter()
1006
- retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
1007
- rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
1008
  retrieval_result = psq_candidates_from_rewrite_phrases(
1009
  rewrite_phrases=rewrite_phrases,
1010
  allow_nsfw_tags=allow_nsfw_tags,
@@ -1021,10 +1253,10 @@ def rag_pipeline_ui(
1021
  if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
1022
  candidates = candidates[:selection_candidate_cap]
1023
  log(f"Selection candidate cap applied: {selection_candidate_cap}")
1024
- dt = time.perf_counter()-t0
1025
- _record_timing("retrieval", dt)
1026
- log(f"Retrieval: {dt:.2f}s")
1027
- log(f"Retrieved {len(candidates)} candidate tags")
1028
  if verbose_retrieval:
1029
  log(f"Total unique candidates: {len(candidates)}")
1030
  limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
@@ -1061,11 +1293,19 @@ def rag_pipeline_ui(
1061
  f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
1062
  f"combined={combined_str} count={count}"
1063
  )
1064
- if limit is not None and len(rows) > limit:
1065
- log(f" ... ({len(rows) - limit} more)")
1066
- except Exception as e:
1067
- log(f"Retrieval fallback: {type(e).__name__}: {e}")
1068
- candidates = []
 
 
 
 
 
 
 
 
1069
 
1070
  log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
1071
  selection_query = _build_selection_query(
@@ -1204,6 +1444,7 @@ def rag_pipeline_ui(
1204
  toggle_rows = _build_toggle_rows(
1205
  seed_terms=seed_terms,
1206
  selected_tags=active_selected_tags,
 
1207
  tag_selection_origins=tag_selection_origins,
1208
  implied_parent_map=implied_parent_map,
1209
  top_groups=max(1, int(display_top_groups)),
@@ -1213,131 +1454,115 @@ def rag_pipeline_ui(
1213
  dt = time.perf_counter()-t0
1214
  _record_timing("group_display", dt)
1215
  log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
1216
- log(
1217
- _build_display_audit_line(
1218
- toggle_rows,
1219
- active_selected_tags=active_selected_tags,
1220
- direct_selected_tags=direct_selected_tags,
1221
- implied_selected_tags=implied_selected_tags,
1222
- )
1223
- )
1224
-
1225
- total_dt = time.perf_counter()-t_total0
1226
- _emit_timing_summary(total_dt)
1227
  _append_timing_jsonl(total_dt)
1228
  log("Done: final prompt ready")
1229
- return _build_ui_payload(
1230
- console_text="\n".join(logs),
1231
- legacy_prompt_text=final_prompt,
1232
- row_defs=toggle_rows,
1233
- selected_tags=active_selected_tags,
1234
- )
1235
 
1236
  except Exception as e:
1237
  log(f"Error: {type(e).__name__}: {e}")
1238
- return _build_ui_payload(
1239
- console_text="\n".join(logs),
1240
- legacy_prompt_text="",
1241
- row_defs=[],
1242
- selected_tags=[],
1243
- )
1244
 
1245
 
1246
 
1247
  with gr.Blocks(css=css, js=client_js) as app:
1248
- with gr.Row():
1249
- with gr.Column(scale=3, elem_classes=["prompt-col"]):
1250
- image_tags = gr.Textbox(
1251
- label="Enter Prompt",
1252
- placeholder="e.g. fox, outside, detailed background, .",
1253
- lines=1
1254
- )
1255
- with gr.Column(scale=1):
1256
- _mascot_pil = _load_mascot_image()
1257
- if _mascot_pil is not None:
1258
- mascot_img = gr.Image(
1259
- value=_mascot_pil,
1260
- show_label=False,
1261
- interactive=False,
1262
- height=220,
1263
- elem_id="mascot"
1264
- )
1265
- else:
1266
- mascot_img = gr.Markdown("`(mascot image unavailable)`")
1267
- submit_button = gr.Button("Run", variant="primary")
1268
-
1269
- gr.Markdown(
1270
- """
1271
- ### Prompt Squirrel RAG (pipeline version)
1272
-
1273
- Type a rough prompt. This tool rewrites it and aligns it to an e621-style tag vocabulary using Prompt Squirrel internally,
1274
- then returns a cleaned, model-friendly prompt.
1275
- """.strip()
1276
- )
1277
-
1278
- console = gr.Textbox(
1279
- label="Console",
1280
- lines=10,
1281
- interactive=False,
1282
- placeholder="Progress logs will appear here."
1283
- )
1284
-
1285
- suggested_prompt = gr.Textbox(
1286
- label="Suggested Prompt (From Toggled Tags)",
1287
- lines=3,
1288
- interactive=False,
1289
- show_copy_button=True,
1290
- placeholder="Comma-separated tags selected in the rows below."
1291
- )
1292
-
1293
- with gr.Accordion("Legacy Pipeline Prompt (for reference)", open=False):
1294
- legacy_final_prompt = gr.Textbox(
1295
- label="Legacy Final Prompt",
1296
- lines=3,
1297
- interactive=False,
1298
- show_copy_button=True,
1299
- )
1300
 
1301
  selected_tags_state = gr.State([])
1302
  row_defs_state = gr.State([])
1303
  row_values_state = gr.State([])
1304
 
1305
- gr.Markdown("### Toggle Tag Rows")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1306
  gr.HTML(
1307
  """
1308
  <div class="source-legend">
1309
- <span class="chip"><span class="swatch rewrite"></span>Rewrite phrase</span>
1310
- <span class="chip"><span class="swatch selection"></span>General selection</span>
1311
- <span class="chip"><span class="swatch probe"></span>Probe query</span>
1312
- <span class="chip"><span class="swatch structural"></span>Structural query</span>
1313
- <span class="chip"><span class="swatch implied"></span>Implied</span>
1314
- <span class="chip"><span class="swatch user"></span>User-toggled</span>
1315
- <span class="chip"><span class="swatch unselected"></span>Unselected</span>
 
1316
  </div>
1317
  """
1318
  )
1319
- gr.Markdown(
1320
- "Rows are ranked by expected tag count (E). Within each row: structural -> probe -> selected, "
1321
- "implied tags follow their triggering selected tag when possible, then unselected tags in confidence order."
1322
- )
1323
- row_headers: List[gr.Markdown] = []
1324
- row_checkboxes: List[gr.CheckboxGroup] = []
1325
- for _ in range(display_max_rows_default):
1326
- row_headers.append(gr.Markdown(value="", visible=False))
1327
- row_checkboxes.append(
1328
- gr.CheckboxGroup(
1329
- choices=[],
1330
- value=[],
1331
- visible=False,
1332
- interactive=True,
1333
- container=False,
1334
- elem_classes=["lego-tags"],
1335
- )
1336
- )
1337
-
1338
- gr.Markdown(
1339
- "Toggling a tag in any row toggles it everywhere else that tag appears."
1340
- )
1341
 
1342
  with gr.Accordion("Display Settings", open=False):
1343
  with gr.Row():
@@ -1353,43 +1578,63 @@ then returns a cleaned, model-friendly prompt.
1353
  label="Top Tags Shown Per Row",
1354
  minimum=1,
1355
  )
1356
- display_rank_top_k = gr.Number(
1357
- value=display_rank_top_k_default,
1358
- precision=0,
1359
- label="Top Tags Used for Row Ranking",
1360
- minimum=1,
1361
- )
1362
-
1363
- run_outputs = [
1364
- console,
1365
- legacy_final_prompt,
1366
- suggested_prompt,
1367
- selected_tags_state,
1368
- row_defs_state,
1369
- row_values_state,
 
 
 
 
 
 
 
 
1370
  *row_headers,
1371
  *row_checkboxes,
1372
  ]
1373
 
1374
- submit_button.click(
1375
- rag_pipeline_ui,
1376
- inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
1377
- outputs=run_outputs
1378
- )
1379
-
1380
- image_tags.submit(
1381
- rag_pipeline_ui,
1382
- inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
1383
- outputs=run_outputs
1384
- )
 
 
 
 
 
 
 
 
 
 
 
 
1385
 
1386
  for idx, row_cb in enumerate(row_checkboxes):
1387
- row_cb.change(
1388
  fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
1389
  i,
1390
  changed_values,
1391
- selected_state,
1392
- row_defs,
1393
  row_values,
1394
  display_max_rows_default,
1395
  ),
 
1
+ import gradio as gr
2
+ import os
3
+ import logging
4
+ import time
5
+ import json
6
+ import csv
7
+ import base64
8
+ from datetime import datetime
9
+ from functools import lru_cache
10
+ from PIL import Image
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Set, Tuple
13
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
14
 
15
  from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
16
  from psq_rag.llm.rewrite import llm_rewrite_prompt
17
  from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
18
  from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
19
+ from psq_rag.retrieval.state import (
20
+ expand_tags_via_implications,
21
+ get_tag_type_name,
22
+ get_tag_implications,
23
+ get_tag_counts,
24
+ )
25
  from psq_rag.ui.group_ranked_display import rank_groups_from_tfidf, _load_enabled_groups
26
 
27
 
 
52
  return tag.replace("_", " ")
53
 
54
 
55
+ def _display_row_label(name: str) -> str:
56
+ n = (name or "").strip()
57
+ if not n:
58
+ return ""
59
+ if n == "selected_other":
60
+ return "Selected (Other)"
61
+ return n.replace("_", " ").title()
62
+
63
+
64
  def _normalize_selection_origin(origin: str) -> str:
65
  o = (origin or "").strip().lower()
66
  if o in {"rewrite", "selection", "probe", "structural", "user", "candidate"}:
 
68
  return "selection"
69
 
70
 
71
+ @lru_cache(maxsize=1)
72
+ def _load_tag_wiki_defs() -> Dict[str, str]:
73
+ p = Path("data/tag_wiki_defs.json")
74
+ if not p.exists():
75
+ return {}
76
+ try:
77
+ with p.open("r", encoding="utf-8") as f:
78
+ data = json.load(f)
79
+ out: Dict[str, str] = {}
80
+ if isinstance(data, dict):
81
+ for k, v in data.items():
82
+ tag = _norm_tag_for_lookup(str(k))
83
+ text = " ".join(str(v or "").split())
84
+ if tag and text:
85
+ out[tag] = text
86
+ return out
87
+ except Exception:
88
+ return {}
89
+
90
+
91
+ def _tooltip_text_for_tag(tag: str) -> str:
92
+ t = _norm_tag_for_lookup(tag)
93
+ parts: List[str] = []
94
+
95
+ try:
96
+ count = get_tag_counts().get(t)
97
+ except Exception:
98
+ count = None
99
+ if isinstance(count, int):
100
+ parts.append(f"Count: {count:,}")
101
+
102
+ d = _load_tag_wiki_defs().get(t, "")
103
+ if d:
104
+ parts.append(d)
105
+
106
+ return "\n".join(parts).strip()
107
+
108
+
109
  def _choice_label_with_source_meta(tag: str, *, origin: str, preselected: bool) -> str:
110
+ # Marker is stripped client-side and converted into data attributes for CSS-driven colors/tooltips.
111
  origin_norm = _normalize_selection_origin(origin)
112
  pre = "1" if preselected else "0"
113
+ tooltip = _tooltip_text_for_tag(tag)
114
+ tip_b64 = ""
115
+ if tooltip:
116
+ tip_b64 = base64.urlsafe_b64encode(tooltip.encode("utf-8")).decode("ascii")
117
+ return f"{_display_tag_text(tag)} [[psq:{origin_norm}:{pre}:{tip_b64}]]"
118
 
119
 
120
  def _selection_source_rank(origin: str) -> int:
 
303
  *,
304
  seed_terms: List[str],
305
  selected_tags: List[str],
306
+ retrieved_candidate_tags: List[str],
307
  tag_selection_origins: Dict[str, str],
308
  implied_parent_map: Dict[str, str],
309
  top_groups: int,
 
325
  )
326
  )
327
  selected_index: Dict[str, int] = {t: i for i, t in enumerate(selected_active)}
328
+
329
+ row_defs: List[Dict[str, Any]] = []
330
+ enabled_group_tag_sets: Dict[str, Set[str]] = {
331
+ name: {t for t in tags if not _is_artist_tag(t)}
332
+ for name, tags in groups_map.items()
333
+ }
334
+ tags_in_any_enabled_group: Set[str] = set()
335
+ for tag_set in enabled_group_tag_sets.values():
336
+ tags_in_any_enabled_group.update(tag_set)
337
+
338
+ displayed_group_names = [r.group_name for r in ranked_rows]
339
+ displayed_group_tag_sets: Dict[str, Set[str]] = {
340
+ name: enabled_group_tag_sets.get(name, set())
341
+ for name in displayed_group_names
342
+ }
343
+ tags_in_any_displayed_group: Set[str] = set()
344
+ for tag_set in displayed_group_tag_sets.values():
345
+ tags_in_any_displayed_group.update(tag_set)
346
 
347
+ retrieved_uncategorized_ranked = list(
348
+ dict.fromkeys(
349
+ _norm_tag_for_lookup(t)
350
+ for t in (retrieved_candidate_tags or [])
351
+ if t
352
+ and not _is_artist_tag(t)
353
+ and not _is_excluded_recommendation_tag(t)
354
+ and _norm_tag_for_lookup(t) not in tags_in_any_enabled_group
355
+ )
356
+ )
357
+ retrieved_other_row: Dict[str, Any] | None = None
358
+ if retrieved_uncategorized_ranked:
359
+ retrieved_uncategorized_set = set(retrieved_uncategorized_ranked)
360
+ selected_in_retrieved_other_raw = [
361
+ t for t in selected_active if t in retrieved_uncategorized_set
362
+ ]
363
+ selected_in_retrieved_other = _order_selected_tags_for_row(
364
+ row_selected_tags=selected_in_retrieved_other_raw,
365
+ selected_index=selected_index,
366
+ tag_selection_origins=tag_selection_origins,
367
+ implied_parent_map=implied_parent_map,
368
+ )
369
+ merged_retrieved_other = selected_in_retrieved_other + [
370
+ t for t in retrieved_uncategorized_ranked if t not in selected_in_retrieved_other
371
+ ]
372
+ keep_n = max(max(1, int(top_tags_per_group)), len(selected_in_retrieved_other))
373
+ merged_retrieved_other = merged_retrieved_other[:keep_n]
374
+ retrieved_other_meta = {
375
+ t: {
376
+ "origin": _normalize_selection_origin(tag_selection_origins.get(t, "selection")),
377
+ "preselected": t in selected_active,
378
+ }
379
+ for t in merged_retrieved_other
380
+ }
381
+ retrieved_other_row = {
382
+ "name": "other_retrieved",
383
+ "label": "Other (Retrieved)",
384
+ "tags": merged_retrieved_other,
385
+ "tag_meta": retrieved_other_meta,
386
+ }
387
+
388
+ # "Selected (Other)" should contain selected tags not already shown in any displayed row.
389
+ # Include "Other (Retrieved)" in that displayed-row set to avoid duplicates across those rows.
390
+ tags_in_displayed_rows = set(tags_in_any_displayed_group)
391
+ if retrieved_other_row:
392
+ tags_in_displayed_rows.update(retrieved_other_row.get("tags", []))
393
+ selected_other_raw = [t for t in selected_active if t not in tags_in_displayed_rows]
394
  selected_other = _order_selected_tags_for_row(
395
  row_selected_tags=selected_other_raw,
396
  selected_index=selected_index,
 
407
  row_defs.append(
408
  {
409
  "name": "selected_other",
410
+ "label": _display_row_label("selected_other"),
411
  "tags": selected_other,
412
  "tag_meta": selected_other_meta,
413
  }
 
441
  row_defs.append(
442
  {
443
  "name": group_name,
444
+ "label": _display_row_label(group_name),
445
  "tags": merged,
446
  "tag_meta": tag_meta,
447
  }
448
  )
449
+
450
+ # Keep this row at the bottom so category/group rows remain contiguous.
451
+ if retrieved_other_row:
452
+ row_defs.append(retrieved_other_row)
453
+
454
+ return row_defs
455
 
456
 
457
+ def _build_display_audit_line(
458
  row_defs: List[Dict[str, Any]],
459
  *,
460
  active_selected_tags: List[str],
 
482
  row_name = row.get("name", "")
483
  row_label = row.get("label", row_name)
484
  for tag in row.get("tags", []):
485
+ rec = info_by_tag.setdefault(tag, {"rows": [], "sources": set()})
486
+ rec["rows"].append(row_label)
487
+ if row_name == "selected_other":
488
+ rec["sources"].add("selected_other_row")
489
+ elif row_name == "other_retrieved":
490
+ rec["sources"].add("other_retrieved_row")
491
+ else:
492
+ rec["sources"].add("ranked_group_row")
493
  if tag in active_set:
494
  rec["sources"].add("selected_active")
495
  if tag in direct_set:
 
508
  for tag, rec in sorted(info_by_tag.items())
509
  ],
510
  }
511
+ return "Display Tag Audit: " + json.dumps(payload, ensure_ascii=True)
512
+
513
+
514
+ def _build_row_component_updates(
515
+ row_defs: List[Dict[str, Any]],
516
+ selected_tags: List[str],
517
  max_rows: int,
518
  ):
519
  selected = {t for t in (selected_tags or []) if t}
 
528
  values = [t for t in tags if t in selected]
529
  row_values_state.append(values)
530
  visible = bool(tags)
531
+ header_updates.append(gr.update(value=row.get("label", ""), visible=visible))
532
  tag_meta = row.get("tag_meta", {}) if isinstance(row.get("tag_meta", {}), dict) else {}
533
  choices = []
534
  for t in tags:
 
560
  max_rows: int,
561
  ):
562
  row_defs = row_defs_state or []
563
+ row_defs_ui = row_defs[: max(0, int(max_rows))]
564
  selected = set(selected_tags_state or [])
565
+ row = row_defs_ui[row_idx] if 0 <= row_idx < len(row_defs_ui) else {}
566
  row_tags = list(dict.fromkeys(row.get("tags", [])))
567
  row_tag_set = set(row_tags)
568
  row_tag_by_norm = {_norm_tag_for_lookup(t): t for t in row_tags}
 
578
  if mapped:
579
  new_set.add(mapped)
580
 
581
+ prev_values = list(row_values_state or [])
582
+ prev_row_values = prev_values[row_idx] if 0 <= row_idx < len(prev_values) else []
583
+ prev_row_selected = set()
584
+ for raw in (prev_row_values or []):
585
+ if raw in row_tag_set:
586
+ prev_row_selected.add(raw)
587
+ continue
588
+ raw_norm = _norm_tag_for_lookup(str(raw))
589
+ mapped = row_tag_by_norm.get(raw_norm)
590
+ if mapped:
591
+ prev_row_selected.add(mapped)
592
+
593
+ # Ignore non-user/no-op events (e.g., programmatic value re-sets) deterministically.
594
+ if new_set == prev_row_selected:
595
+ prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs_ui)
596
+ checkbox_updates = [gr.skip() for _ in range(max_rows)]
597
+ return [sorted(selected), prev_values, prompt_text, *checkbox_updates]
598
+
599
  selected.difference_update(row_tag_set)
600
  selected.update(new_set)
601
  toggled_tags = prev_row_selected ^ new_set
602
 
 
603
  new_row_values_state: List[List[str]] = []
604
  affected_rows: Set[int] = {row_idx}
605
+ for idx, row_item in enumerate(row_defs_ui):
606
+ tags = list(dict.fromkeys(row_item.get("tags", [])))
607
  values = [t for t in tags if t in selected]
608
  new_row_values_state.append(values)
609
  if toggled_tags and any(t in toggled_tags for t in tags):
 
611
 
612
  checkbox_updates = []
613
  for idx in range(max_rows):
614
+ if idx >= len(row_defs_ui):
615
+ checkbox_updates.append(gr.skip())
616
+ continue
617
+ if idx in affected_rows:
618
  checkbox_updates.append(gr.update(value=new_row_values_state[idx]))
619
  else:
620
+ checkbox_updates.append(gr.skip())
621
 
622
+ prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs_ui)
623
  return [sorted(selected), new_row_values_state, prompt_text, *checkbox_updates]
624
 
625
 
626
+ def _build_ui_payload(
627
+ *,
628
+ console_text: str,
629
+ row_defs: List[Dict[str, Any]],
630
+ selected_tags: List[str],
631
+ ):
 
632
  prompt_text, row_values_state, header_updates, checkbox_updates = _build_row_component_updates(
633
  row_defs=row_defs,
634
  selected_tags=selected_tags,
635
  max_rows=display_max_rows_default,
636
  )
637
+ return [
638
+ console_text,
639
+ gr.update(visible=bool(row_defs)),
640
+ prompt_text,
641
+ sorted(set(selected_tags or [])),
642
+ row_defs,
643
+ row_values_state,
644
+ *header_updates,
645
  *checkbox_updates,
646
+ ]
647
+
648
+
649
+ def _prepare_run_ui() -> List[Any]:
650
+ header_updates = [gr.update(value="", visible=False) for _ in range(display_max_rows_default)]
651
+ checkbox_updates = [
652
+ gr.update(choices=[], value=[], visible=False)
653
+ for _ in range(display_max_rows_default)
654
+ ]
655
+ return [
656
+ "Running...",
657
+ gr.skip(),
658
+ "Running... usually completes in about 20 seconds.",
659
+ [],
660
+ [],
661
+ [],
662
+ *header_updates,
663
+ *checkbox_updates,
664
+ ]
665
+
666
+
667
+ def _build_selection_query(
668
  prompt_in: str,
669
  rewritten: str,
670
  structural_tags: List[str],
 
954
  .source-legend {
955
  display: flex;
956
  flex-wrap: wrap;
957
+ align-items: center;
958
  gap: 8px;
959
  margin: 4px 0 10px 0;
960
  }
961
 
962
+ .source-legend .legend-title {
963
+ font-size: 0.92rem;
964
+ font-weight: 900;
965
+ color: #334155;
966
+ margin-right: 4px;
967
+ }
968
+
969
  .source-legend .chip {
970
  display: inline-flex;
971
  align-items: center;
972
+ border-radius: 10px;
973
+ border: 1px solid #6c7788;
974
+ padding: 6px 12px;
 
975
  font-size: 0.85rem;
976
+ font-weight: 800;
977
+ color: #111827;
978
  background: #f3f6fb;
979
  }
980
 
981
+ .source-legend .chip.rewrite { background: #26b9a3; color: #062923; border-color: #187869; }
982
+ .source-legend .chip.selection { background: #f0a93c; color: #382206; border-color: #a66f1f; }
983
+ .source-legend .chip.probe { background: #9a6cff; color: #ffffff; border-color: #6745b0; }
984
+ .source-legend .chip.structural { background: #53c368; color: #102d17; border-color: #2f8442; }
985
+ .source-legend .chip.implied { background: #a8b3c4; color: #1d2633; border-color: #6f7e95; }
986
+ .source-legend .chip.user { background: #4f86ff; color: #ffffff; border-color: #2f5fbf; }
987
+ .source-legend .chip.unselected { background: #c7ced8; color: #2d3440; border-color: #7d8897; }
988
+
989
+ .row-heading p {
990
+ margin: 8px 0 0 0 !important;
991
+ font-size: 1.18rem !important;
992
+ font-weight: 850 !important;
993
+ line-height: 1.2 !important;
994
+ }
995
+
996
+ .row-instruction {
997
+ text-align: center;
998
+ margin: 8px 0 12px 0;
999
+ }
1000
+
1001
+ .row-instruction p {
1002
+ margin: 0 !important;
1003
+ font-size: 1.02rem !important;
1004
+ font-style: italic !important;
1005
+ font-weight: 800 !important;
1006
+ color: #1d4ed8 !important;
1007
+ }
1008
+
1009
+ .top-instruction {
1010
+ text-align: center;
1011
+ margin: 2px 0 6px 0;
1012
+ }
1013
+
1014
+ .top-instruction p {
1015
+ margin: 0 !important;
1016
+ font-size: 1.02rem !important;
1017
+ font-style: italic !important;
1018
+ font-weight: 800 !important;
1019
+ color: #1d4ed8 !important;
1020
+ }
1021
+
1022
+ .run-hint {
1023
+ margin-top: 6px;
1024
+ text-align: center;
1025
  }
1026
 
1027
+ .run-hint p {
1028
+ margin: 0 !important;
1029
+ font-size: 0.9rem !important;
1030
+ font-style: italic !important;
1031
+ color: #475569 !important;
1032
+ }
1033
+
1034
+ .prompt-card {
1035
+ background: transparent !important;
1036
+ border: none !important;
1037
+ box-shadow: none !important;
1038
+ padding: 0 !important;
1039
+ }
1040
+
1041
+ .suggested-prompt-box {
1042
+ margin-top: 2px !important;
1043
+ }
1044
+
1045
+ .suggested-prompt-card {
1046
+ margin-top: 10px !important;
1047
+ }
1048
  """
1049
 
1050
  client_js = """
1051
  () => {
1052
+ const markerRe = /\\s*\\[\\[psq:([a-z_]+):(0|1):([A-Za-z0-9_\\-=]*)\\]\\]\\s*$/;
1053
+ const decodeTip = (b64) => {
1054
+ if (!b64) return "";
1055
+ try {
1056
+ const binary = atob((b64 || "").replace(/-/g, "+").replace(/_/g, "/"));
1057
+ const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
1058
+ return new TextDecoder("utf-8").decode(bytes);
1059
+ } catch (_) {
1060
+ return "";
1061
+ }
1062
+ };
1063
  const applyTagMeta = () => {
1064
  const labels = document.querySelectorAll(".lego-tags label");
1065
  labels.forEach((label) => {
 
1070
  if (!match) return;
1071
  label.dataset.psqOrigin = match[1];
1072
  label.dataset.psqPreselected = match[2];
1073
+ const tip = decodeTip(match[3] || "");
1074
+ if (tip) {
1075
+ label.title = tip;
1076
+ span.title = tip;
1077
+ } else {
1078
+ label.removeAttribute("title");
1079
+ span.removeAttribute("title");
1080
+ }
1081
  span.textContent = text.replace(markerRe, "");
1082
  });
1083
  };
 
1172
  log("Start: received prompt")
1173
  prompt_in = (user_prompt or "").strip()
1174
  if not prompt_in:
1175
+ return _build_ui_payload(
1176
+ console_text="Error: empty prompt",
1177
+ row_defs=[],
1178
+ selected_tags=[],
1179
+ )
 
1180
 
1181
  log("Input:")
1182
  log(prompt_in)
 
1233
 
1234
 
1235
  log("Step 2: Prompt Squirrel retrieval (hidden)")
1236
+ try:
1237
+ t0 = time.perf_counter()
1238
+ retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
1239
+ rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
1240
  retrieval_result = psq_candidates_from_rewrite_phrases(
1241
  rewrite_phrases=rewrite_phrases,
1242
  allow_nsfw_tags=allow_nsfw_tags,
 
1253
  if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
1254
  candidates = candidates[:selection_candidate_cap]
1255
  log(f"Selection candidate cap applied: {selection_candidate_cap}")
1256
+ dt = time.perf_counter()-t0
1257
+ _record_timing("retrieval", dt)
1258
+ log(f"Retrieval: {dt:.2f}s")
1259
+ log(f"Retrieved {len(candidates)} candidate tags")
1260
  if verbose_retrieval:
1261
  log(f"Total unique candidates: {len(candidates)}")
1262
  limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
 
1293
  f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
1294
  f"combined={combined_str} count={count}"
1295
  )
1296
+ if limit is not None and len(rows) > limit:
1297
+ log(f" ... ({len(rows) - limit} more)")
1298
+ except Exception as e:
1299
+ log(f"Retrieval fallback: {type(e).__name__}: {e}")
1300
+ candidates = []
1301
+
1302
+ retrieved_candidate_tags = list(
1303
+ dict.fromkeys(
1304
+ _norm_tag_for_lookup(c.tag)
1305
+ for c in (candidates or [])
1306
+ if getattr(c, "tag", None)
1307
+ )
1308
+ )
1309
 
1310
  log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
1311
  selection_query = _build_selection_query(
 
1444
  toggle_rows = _build_toggle_rows(
1445
  seed_terms=seed_terms,
1446
  selected_tags=active_selected_tags,
1447
+ retrieved_candidate_tags=retrieved_candidate_tags,
1448
  tag_selection_origins=tag_selection_origins,
1449
  implied_parent_map=implied_parent_map,
1450
  top_groups=max(1, int(display_top_groups)),
 
1454
  dt = time.perf_counter()-t0
1455
  _record_timing("group_display", dt)
1456
  log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
1457
+ log(
1458
+ _build_display_audit_line(
1459
+ toggle_rows,
1460
+ active_selected_tags=active_selected_tags,
1461
+ direct_selected_tags=direct_selected_tags,
1462
+ implied_selected_tags=implied_selected_tags,
1463
+ )
1464
+ )
1465
+
1466
+ total_dt = time.perf_counter()-t_total0
1467
+ _emit_timing_summary(total_dt)
1468
  _append_timing_jsonl(total_dt)
1469
  log("Done: final prompt ready")
1470
+ return _build_ui_payload(
1471
+ console_text="\n".join(logs),
1472
+ row_defs=toggle_rows,
1473
+ selected_tags=active_selected_tags,
1474
+ )
 
1475
 
1476
  except Exception as e:
1477
  log(f"Error: {type(e).__name__}: {e}")
1478
+ return _build_ui_payload(
1479
+ console_text="\n".join(logs),
1480
+ row_defs=[],
1481
+ selected_tags=[],
1482
+ )
 
1483
 
1484
 
1485
 
1486
  with gr.Blocks(css=css, js=client_js) as app:
1487
+ with gr.Row():
1488
+ with gr.Column(scale=3, elem_classes=["prompt-col"]):
1489
+ gr.Markdown(
1490
+ 'Describe your image under "Enter Prompt" and click "Run". '
1491
+ 'Prompt Squirrel will translate it into image board tags.',
1492
+ elem_classes=["top-instruction"],
1493
+ )
1494
+ with gr.Group(elem_classes=["prompt-card"]):
1495
+ image_tags = gr.Textbox(
1496
+ label="Enter Prompt",
1497
+ placeholder="e.g. fox, outside, detailed background, .",
1498
+ lines=1,
1499
+ elem_classes=["enter-prompt-box"],
1500
+ )
1501
+ with gr.Group(elem_classes=["prompt-card", "suggested-prompt-card"]):
1502
+ suggested_prompt = gr.Textbox(
1503
+ label="Suggested Prompt",
1504
+ lines=2,
1505
+ interactive=False,
1506
+ show_copy_button=True,
1507
+ placeholder="Comma-separated tags selected in the rows below.",
1508
+ elem_classes=["suggested-prompt-box"],
1509
+ )
1510
+ with gr.Column(scale=1):
1511
+ _mascot_pil = _load_mascot_image()
1512
+ if _mascot_pil is not None:
1513
+ mascot_img = gr.Image(
1514
+ value=_mascot_pil,
1515
+ show_label=False,
1516
+ interactive=False,
1517
+ height=240,
1518
+ elem_id="mascot"
1519
+ )
1520
+ else:
1521
+ mascot_img = gr.Markdown("`(mascot image unavailable)`")
1522
+ submit_button = gr.Button("Run", variant="primary")
1523
+ gr.Markdown("Typical runtime: up to ~20 seconds.", elem_classes=["run-hint"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1524
 
1525
  selected_tags_state = gr.State([])
1526
  row_defs_state = gr.State([])
1527
  row_values_state = gr.State([])
1528
 
1529
+ toggle_instruction = gr.Markdown(
1530
+ "Click tag buttons to add or remove tags from the suggested prompt.",
1531
+ elem_classes=["row-instruction"],
1532
+ visible=False,
1533
+ )
1534
+ row_headers: List[gr.Markdown] = []
1535
+ row_checkboxes: List[gr.CheckboxGroup] = []
1536
+ for _ in range(display_max_rows_default):
1537
+ with gr.Row():
1538
+ with gr.Column(scale=2, min_width=170):
1539
+ row_headers.append(gr.Markdown(value="", visible=False, elem_classes=["row-heading"]))
1540
+ with gr.Column(scale=10):
1541
+ row_checkboxes.append(
1542
+ gr.CheckboxGroup(
1543
+ choices=[],
1544
+ value=[],
1545
+ visible=False,
1546
+ interactive=True,
1547
+ container=False,
1548
+ elem_classes=["lego-tags"],
1549
+ )
1550
+ )
1551
+
1552
  gr.HTML(
1553
  """
1554
  <div class="source-legend">
1555
+ <span class="legend-title">Legend:</span>
1556
+ <span class="chip rewrite">Rewrite phrase</span>
1557
+ <span class="chip selection">General selection</span>
1558
+ <span class="chip probe">Probe query</span>
1559
+ <span class="chip structural">Structural query</span>
1560
+ <span class="chip implied">Implied</span>
1561
+ <span class="chip user">User-toggled</span>
1562
+ <span class="chip unselected">Unselected</span>
1563
  </div>
1564
  """
1565
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566
 
1567
  with gr.Accordion("Display Settings", open=False):
1568
  with gr.Row():
 
1578
  label="Top Tags Shown Per Row",
1579
  minimum=1,
1580
  )
1581
+ display_rank_top_k = gr.Number(
1582
+ value=display_rank_top_k_default,
1583
+ precision=0,
1584
+ label="Top Tags Used for Row Ranking",
1585
+ minimum=1,
1586
+ )
1587
+
1588
+ with gr.Accordion("Console", open=False):
1589
+ console = gr.Textbox(
1590
+ label="Console",
1591
+ lines=10,
1592
+ interactive=False,
1593
+ placeholder="Progress logs will appear here."
1594
+ )
1595
+
1596
+ run_outputs = [
1597
+ console,
1598
+ toggle_instruction,
1599
+ suggested_prompt,
1600
+ selected_tags_state,
1601
+ row_defs_state,
1602
+ row_values_state,
1603
  *row_headers,
1604
  *row_checkboxes,
1605
  ]
1606
 
1607
+ submit_button.click(
1608
+ _prepare_run_ui,
1609
+ inputs=[],
1610
+ outputs=run_outputs,
1611
+ queue=False,
1612
+ show_progress="hidden",
1613
+ ).then(
1614
+ rag_pipeline_ui,
1615
+ inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
1616
+ outputs=run_outputs,
1617
+ )
1618
+
1619
+ image_tags.submit(
1620
+ _prepare_run_ui,
1621
+ inputs=[],
1622
+ outputs=run_outputs,
1623
+ queue=False,
1624
+ show_progress="hidden",
1625
+ ).then(
1626
+ rag_pipeline_ui,
1627
+ inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
1628
+ outputs=run_outputs,
1629
+ )
1630
 
1631
  for idx, row_cb in enumerate(row_checkboxes):
1632
+ row_cb.select(
1633
  fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
1634
  i,
1635
  changed_values,
1636
+ selected_state,
1637
+ row_defs,
1638
  row_values,
1639
  display_max_rows_default,
1640
  ),
data/runtime_metrics/ui_pipeline_timings.jsonl CHANGED
@@ -14,3 +14,47 @@
14
  {"timestamp_utc": "2026-03-07T03:01:39Z", "stages_s": {"preprocess": 1.6900012269616127e-05, "rewrite": 1.713694000034593, "structural": 5.799985956400633e-06, "probe": 0.049874700023792684, "retrieval": 0.35970670002279803, "selection": 0.9267913000076078, "implication_expansion": 3.909994848072529e-05, "prompt_composition": 3.7299992982298136e-05, "group_display": 0.026757099956739694}, "total_s": 3.089661000005435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
15
  {"timestamp_utc": "2026-03-07T03:09:53Z", "stages_s": {"preprocess": 0.00012510002125054598, "rewrite": 2.249713899975177, "structural": 0.5107482000021264, "probe": 3.300025127828121e-06, "retrieval": 2.3757353999535553, "selection": 2.9089593999669887, "implication_expansion": 0.2682994999922812, "prompt_composition": 3.070000093430281e-05, "group_display": 0.07982710003852844}, "total_s": 8.42714020004496, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
16
  {"timestamp_utc": "2026-03-07T03:37:54Z", "stages_s": {"preprocess": 0.00011760002234950662, "rewrite": 1.968222199997399, "structural": 1.1845426999498159, "probe": 2.214354399999138, "retrieval": 2.452574900002219, "selection": 0.8585481999907643, "implication_expansion": 0.27041040000040084, "prompt_composition": 3.319996176287532e-05, "group_display": 0.07736879994627088}, "total_s": 9.059251800004859, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  {"timestamp_utc": "2026-03-07T03:01:39Z", "stages_s": {"preprocess": 1.6900012269616127e-05, "rewrite": 1.713694000034593, "structural": 5.799985956400633e-06, "probe": 0.049874700023792684, "retrieval": 0.35970670002279803, "selection": 0.9267913000076078, "implication_expansion": 3.909994848072529e-05, "prompt_composition": 3.7299992982298136e-05, "group_display": 0.026757099956739694}, "total_s": 3.089661000005435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
15
  {"timestamp_utc": "2026-03-07T03:09:53Z", "stages_s": {"preprocess": 0.00012510002125054598, "rewrite": 2.249713899975177, "structural": 0.5107482000021264, "probe": 3.300025127828121e-06, "retrieval": 2.3757353999535553, "selection": 2.9089593999669887, "implication_expansion": 0.2682994999922812, "prompt_composition": 3.070000093430281e-05, "group_display": 0.07982710003852844}, "total_s": 8.42714020004496, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
16
  {"timestamp_utc": "2026-03-07T03:37:54Z", "stages_s": {"preprocess": 0.00011760002234950662, "rewrite": 1.968222199997399, "structural": 1.1845426999498159, "probe": 2.214354399999138, "retrieval": 2.452574900002219, "selection": 0.8585481999907643, "implication_expansion": 0.27041040000040084, "prompt_composition": 3.319996176287532e-05, "group_display": 0.07736879994627088}, "total_s": 9.059251800004859, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
17
+ {"timestamp_utc": "2026-03-07T14:46:02Z", "stages_s": {"preprocess": 0.00026489997981116176, "rewrite": 1.9126524000312202, "structural": 4.675470399961341, "probe": 2.1218294000136666, "retrieval": 12.559957500023302, "selection": 1.0550536999944597, "implication_expansion": 0.37385119998361915, "prompt_composition": 2.589996438473463e-05, "group_display": 0.1274346000282094}, "total_s": 22.92461790004745, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
18
+ {"timestamp_utc": "2026-03-07T15:03:07Z", "stages_s": {"preprocess": 0.0002395000192336738, "rewrite": 5.185094600019511, "structural": 0.7354515999904834, "probe": 3.3999676816165447e-06, "retrieval": 7.362056100042537, "selection": 4.4499055000487715, "implication_expansion": 0.40566330001456663, "prompt_composition": 5.230004899203777e-05, "group_display": 0.13085790001787245}, "total_s": 18.33150090003619, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
19
+ {"timestamp_utc": "2026-03-07T15:24:07Z", "stages_s": {"preprocess": 0.00018759997328743339, "rewrite": 12.691507300012745, "structural": 6.099988240748644e-06, "probe": 3.3999676816165447e-06, "retrieval": 2.92752389999805, "selection": 3.720078700047452, "implication_expansion": 0.3129685999592766, "prompt_composition": 2.6300025638192892e-05, "group_display": 0.09746540000196546}, "total_s": 19.792910400021356, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
20
+ {"timestamp_utc": "2026-03-07T15:25:37Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 7.169413700001314, "structural": 4.699977580457926e-06, "probe": 2.300017513334751e-06, "retrieval": 0.5495104999863543, "selection": 1.0878360999631695, "implication_expansion": 2.940004924312234e-05, "prompt_composition": 2.9200047720223665e-05, "group_display": 0.03320260002510622}, "total_s": 8.852556700003333, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
21
+ {"timestamp_utc": "2026-03-07T15:34:51Z", "stages_s": {"preprocess": 8.460000390186906e-05, "rewrite": 20.807663900020998, "structural": 5.00003807246685e-06, "probe": 2.600019797682762e-06, "retrieval": 2.5111192999756895, "selection": 15.921769299951848, "implication_expansion": 0.27366950002033263, "prompt_composition": 3.250001464039087e-05, "group_display": 0.08310980000533164}, "total_s": 39.63195960002486, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
22
+ {"timestamp_utc": "2026-03-07T15:35:52Z", "stages_s": {"preprocess": 2.47000134550035e-05, "rewrite": 1.7436033999547362, "structural": 4.400033503770828e-06, "probe": 2.600019797682762e-06, "retrieval": 0.5218271000194363, "selection": 4.346306200022809, "implication_expansion": 3.0399998649954796e-05, "prompt_composition": 2.5800021830946207e-05, "group_display": 0.03283849998842925}, "total_s": 6.659919600002468, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
23
+ {"timestamp_utc": "2026-03-07T15:41:27Z", "stages_s": {"preprocess": 9.679998038336635e-05, "rewrite": 3.4287550000008196, "structural": 0.9089086999883875, "probe": 1.9048012000275776, "retrieval": 2.3640123999794014, "selection": 5.8771228000405245, "implication_expansion": 0.2728748000226915, "prompt_composition": 2.9499991796910763e-05, "group_display": 0.08227899996563792}, "total_s": 14.873475200030953, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
24
+ {"timestamp_utc": "2026-03-07T15:41:53Z", "stages_s": {"preprocess": 1.7300015315413475e-05, "rewrite": 1.8870358999702148, "structural": 5.099980626255274e-06, "probe": 0.31994039996061474, "retrieval": 0.407905500032939, "selection": 1.6604228000505827, "implication_expansion": 3.070000093430281e-05, "prompt_composition": 2.5900022592395544e-05, "group_display": 0.027770799992140383}, "total_s": 4.315684599976521, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
25
+ {"timestamp_utc": "2026-03-07T15:49:25Z", "stages_s": {"preprocess": 0.0001020999625325203, "rewrite": 14.022166899987496, "structural": 4.699977580457926e-06, "probe": 2.4999608285725117e-06, "retrieval": 3.3183021000004373, "selection": 11.949675500043668, "implication_expansion": 0.32175860001007095, "prompt_composition": 4.040001658722758e-05, "group_display": 0.08752110000932589}, "total_s": 29.732599700044375, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
26
+ {"timestamp_utc": "2026-03-07T15:50:22Z", "stages_s": {"preprocess": 1.7600017599761486e-05, "rewrite": 1.7429223000071943, "structural": 3.700028173625469e-06, "probe": 0.005793399992398918, "retrieval": 0.4191273999749683, "selection": 1.2126603999640793, "implication_expansion": 3.060000017285347e-05, "prompt_composition": 2.659996971487999e-05, "group_display": 0.027827800018712878}, "total_s": 3.4225500999600627, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
27
+ {"timestamp_utc": "2026-03-07T16:00:38Z", "stages_s": {"preprocess": 0.00010899995686486363, "rewrite": 8.79864729999099, "structural": 3.5999692045152187e-06, "probe": 2.100015990436077e-06, "retrieval": 2.8054729999857955, "selection": 1.2764754999661818, "implication_expansion": 0.3174371999921277, "prompt_composition": 2.6399968191981316e-05, "group_display": 0.08990299998549744}, "total_s": 13.324789500038605, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
28
+ {"timestamp_utc": "2026-03-07T16:02:59Z", "stages_s": {"preprocess": 2.5200017262250185e-05, "rewrite": 1.991041800007224, "structural": 5.099980626255274e-06, "probe": 17.524970299971756, "retrieval": 0.4314843000029214, "selection": 1.7358130000066012, "implication_expansion": 3.0499999411404133e-05, "prompt_composition": 2.8199981898069382e-05, "group_display": 0.027282700000796467}, "total_s": 21.72257919999538, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
29
+ {"timestamp_utc": "2026-03-07T16:04:30Z", "stages_s": {"preprocess": 2.559996210038662e-05, "rewrite": 2.176026900007855, "structural": 5.400041118264198e-06, "probe": 0.19614840002031997, "retrieval": 0.4781980999978259, "selection": 1.6205251999781467, "implication_expansion": 3.789999755099416e-05, "prompt_composition": 3.749999450519681e-05, "group_display": 0.03069300000788644}, "total_s": 4.5152849000296555, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
30
+ {"timestamp_utc": "2026-03-07T16:10:49Z", "stages_s": {"preprocess": 9.689998114481568e-05, "rewrite": 2.410708999959752, "structural": 0.8642730999854393, "probe": 0.4224375000339933, "retrieval": 2.802454399992712, "selection": 2.783213499991689, "implication_expansion": 0.31982660002540797, "prompt_composition": 2.7100031729787588e-05, "group_display": 0.09723489999305457}, "total_s": 9.736253200040665, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
31
+ {"timestamp_utc": "2026-03-07T16:11:31Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 1.7746342000318691, "structural": 0.2111163000226952, "probe": 4.599976819008589e-06, "retrieval": 0.5392439999850467, "selection": 2.572300500003621, "implication_expansion": 3.130000550299883e-05, "prompt_composition": 3.15000070258975e-05, "group_display": 0.03570860001491383}, "total_s": 5.145930600003339, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
32
+ {"timestamp_utc": "2026-03-07T16:17:15Z", "stages_s": {"preprocess": 9.919999865815043e-05, "rewrite": 2.202809799986426, "structural": 1.5271345999790356, "probe": 4.593681500002276, "retrieval": 2.751306999998633, "selection": 8.643455000012182, "implication_expansion": 0.35227009997470304, "prompt_composition": 3.160000778734684e-05, "group_display": 0.11034630000358447}, "total_s": 20.237214900029358, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
33
+ {"timestamp_utc": "2026-03-07T16:17:43Z", "stages_s": {"preprocess": 2.7300033252686262e-05, "rewrite": 1.0030765000265092, "structural": 2.59619329997804, "probe": 1.0295192999765277, "retrieval": 0.43122639995999634, "selection": 19.92651720001595, "implication_expansion": 2.989999484270811e-05, "prompt_composition": 3.0199997127056122e-05, "group_display": 0.028948699997272342}, "total_s": 25.028504199988674, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
34
+ {"timestamp_utc": "2026-03-07T16:30:10Z", "stages_s": {"preprocess": 8.229998638853431e-05, "rewrite": 6.530854899960104, "structural": 0.5428495000232942, "probe": 3.5999692045152187e-06, "retrieval": 2.2112261000438593, "selection": 37.76459150004666, "implication_expansion": 0.26712879998376593, "prompt_composition": 3.42000275850296e-05, "group_display": 0.07745989999966696}, "total_s": 47.426328199973796, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
35
+ {"timestamp_utc": "2026-03-07T16:30:23Z", "stages_s": {"preprocess": 1.71000137925148e-05, "rewrite": 0.929964899958577, "structural": 0.0898478000308387, "probe": 1.6804130000527948, "retrieval": 0.41350249998504296, "selection": 9.11415430001216, "implication_expansion": 3.300001844763756e-05, "prompt_composition": 3.2299954909831285e-05, "group_display": 0.027016500011086464}, "total_s": 12.266514900024049, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
36
+ {"timestamp_utc": "2026-03-07T16:30:38Z", "stages_s": {"preprocess": 3.239995567128062e-05, "rewrite": 7.538256199972238, "structural": 5.300040356814861e-06, "probe": 2.300017513334751e-06, "retrieval": 0.40969980001682416, "selection": 2.6479469999903813, "implication_expansion": 2.829998265951872e-05, "prompt_composition": 2.4800014216452837e-05, "group_display": 0.027821999974548817}, "total_s": 10.63382319995435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
37
+ {"timestamp_utc": "2026-03-07T16:39:56Z", "stages_s": {"preprocess": 9.499996667727828e-05, "rewrite": 2.1227241000160575, "structural": 0.8582198000513017, "probe": 4.121821600012481, "retrieval": 3.0876275000046007, "selection": 1.8579011999536306, "implication_expansion": 0.3362734999973327, "prompt_composition": 4.47999918833375e-05, "group_display": 0.09885080001549795}, "total_s": 12.54350390000036, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
38
+ {"timestamp_utc": "2026-03-07T16:42:46Z", "stages_s": {"preprocess": 1.650000922381878e-05, "rewrite": 1.8503554000053555, "structural": 4.999979864805937e-06, "probe": 0.1099030000041239, "retrieval": 0.4038925000349991, "selection": 3.962773200008087, "implication_expansion": 4.18000272475183e-05, "prompt_composition": 3.519997699186206e-05, "group_display": 0.0322487999801524}, "total_s": 6.370800500037149, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
39
+ {"timestamp_utc": "2026-03-07T16:45:56Z", "stages_s": {"preprocess": 3.549997927621007e-05, "rewrite": 2.710496100015007, "structural": 0.1680390000110492, "probe": 4.699977580457926e-06, "retrieval": 1.825858500029426, "selection": 3.9926271999720484, "implication_expansion": 3.15000070258975e-05, "prompt_composition": 5.5100012104958296e-05, "group_display": 0.03422769997268915}, "total_s": 8.741921000008006, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
40
+ {"timestamp_utc": "2026-03-07T16:46:30Z", "stages_s": {"preprocess": 3.660004585981369e-05, "rewrite": 3.5675273000379093, "structural": 1.2902518999762833, "probe": 4.299974534660578e-06, "retrieval": 0.6466077999793924, "selection": 0.9123659000033513, "implication_expansion": 2.010003663599491e-05, "prompt_composition": 2.239999594166875e-05, "group_display": 0.031055399973411113}, "total_s": 6.457136899989564, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
41
+ {"timestamp_utc": "2026-03-07T16:52:11Z", "stages_s": {"preprocess": 9.279994992539287e-05, "rewrite": 2.4995197000098415, "structural": 0.650494700006675, "probe": 0.13954110001213849, "retrieval": 2.8580662000458688, "selection": 4.111845000006724, "implication_expansion": 0.2978370999917388, "prompt_composition": 2.719997428357601e-05, "group_display": 0.08794649998890236}, "total_s": 10.679650100006256, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
42
+ {"timestamp_utc": "2026-03-07T16:54:21Z", "stages_s": {"preprocess": 2.6500027161091566e-05, "rewrite": 3.775604799971916, "structural": 4.799978341907263e-06, "probe": 2.7999631129205227e-06, "retrieval": 0.660035600012634, "selection": 2.061849199992139, "implication_expansion": 2.629996743053198e-05, "prompt_composition": 2.95999925583601e-05, "group_display": 0.03182149998610839}, "total_s": 6.542931500007398, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
43
+ {"timestamp_utc": "2026-03-07T16:56:48Z", "stages_s": {"preprocess": 1.1800031643360853e-05, "rewrite": 1.6858424000092782, "structural": 0.3115612000110559, "probe": 3.600027412176132e-06, "retrieval": 0.18196690001059324, "selection": 0.8783706999965943, "implication_expansion": 1.3899989426136017e-05, "prompt_composition": 1.1400028597563505e-05, "group_display": 0.026653499982785434}, "total_s": 3.09556500002509, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
44
+ {"timestamp_utc": "2026-03-07T17:00:41Z", "stages_s": {"preprocess": 7.049995474517345e-05, "rewrite": 2.107173199998215, "structural": 0.7115459000342526, "probe": 0.7592908999649808, "retrieval": 2.065408500027843, "selection": 0.881550399994012, "implication_expansion": 0.26592110004276037, "prompt_composition": 1.4399993233382702e-05, "group_display": 0.07751559995813295}, "total_s": 6.901116500026546, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
45
+ {"timestamp_utc": "2026-03-07T17:29:51Z", "stages_s": {"preprocess": 8.860003435984254e-05, "rewrite": 2.1579742000321858, "structural": 0.4430788999889046, "probe": 1.0411412000539713, "retrieval": 2.0500706000020728, "selection": 1.186861299967859, "implication_expansion": 0.26447719999123365, "prompt_composition": 1.5300000086426735e-05, "group_display": 0.08299089997308329}, "total_s": 7.258497399976477, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
46
+ {"timestamp_utc": "2026-03-07T17:35:31Z", "stages_s": {"preprocess": 1.009996049106121e-05, "rewrite": 2.5867222999804653, "structural": 1.5706295000272803, "probe": 0.5591535000130534, "retrieval": 0.09234020003350452, "selection": 17.545875800016802, "implication_expansion": 3.190001007169485e-05, "prompt_composition": 1.3300043065100908e-05, "group_display": 0.0324972000089474}, "total_s": 22.399744599999394, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
47
+ {"timestamp_utc": "2026-03-07T17:38:38Z", "stages_s": {"preprocess": 7.449998520314693e-05, "rewrite": 2.332632600038778, "structural": 0.7940433000330813, "probe": 0.8119671999593265, "retrieval": 2.234404999995604, "selection": 0.7451644000248052, "implication_expansion": 0.27489820000482723, "prompt_composition": 1.6200006939470768e-05, "group_display": 0.07717659999616444}, "total_s": 7.304320800001733, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
48
+ {"timestamp_utc": "2026-03-07T17:42:01Z", "stages_s": {"preprocess": 8.510000770911574e-05, "rewrite": 1.867464300012216, "structural": 1.647069400001783, "probe": 4.299974534660578e-06, "retrieval": 2.395426100003533, "selection": 1.200732600002084, "implication_expansion": 0.33177699998486787, "prompt_composition": 2.2199994418770075e-05, "group_display": 0.09814279997954145}, "total_s": 7.579292399983387, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
49
+ {"timestamp_utc": "2026-03-07T17:48:44Z", "stages_s": {"preprocess": 8.590001380071044e-05, "rewrite": 2.0132066000369377, "structural": 1.060188300034497, "probe": 0.4243395999656059, "retrieval": 2.132219100021757, "selection": 1.1945027000037953, "implication_expansion": 0.2687906000064686, "prompt_composition": 1.9000028260052204e-05, "group_display": 0.07895809999899939}, "total_s": 7.206461600027978, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
50
+ {"timestamp_utc": "2026-03-07T17:54:46Z", "stages_s": {"preprocess": 0.00010950001887977123, "rewrite": 1.3990224999724887, "structural": 2.04190930002369, "probe": 1.069002999982331, "retrieval": 2.1156343999900855, "selection": 1.9406172999879345, "implication_expansion": 0.2861632999847643, "prompt_composition": 1.6000005416572094e-05, "group_display": 0.09219529997790232}, "total_s": 8.978390700009186, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
51
+ {"timestamp_utc": "2026-03-07T17:54:54Z", "stages_s": {"preprocess": 2.6899971999228e-05, "rewrite": 1.5525400000042282, "structural": 3.999972250312567e-06, "probe": 2.1999585442245007e-06, "retrieval": 0.10463370004436001, "selection": 1.4364217999973334, "implication_expansion": 1.999997766688466e-05, "prompt_composition": 1.4999997802078724e-05, "group_display": 0.032557499944232404}, "total_s": 3.138574599986896, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
52
+ {"timestamp_utc": "2026-03-07T17:58:13Z", "stages_s": {"preprocess": 0.00011359999189153314, "rewrite": 2.2580789999919944, "structural": 1.3124472000054084, "probe": 3.5999692045152187e-06, "retrieval": 2.127778899972327, "selection": 1.0589646000298671, "implication_expansion": 0.29374579997966066, "prompt_composition": 1.4899997040629387e-05, "group_display": 0.08566429995698854}, "total_s": 7.171340400003828, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
53
+ {"timestamp_utc": "2026-03-07T18:01:07Z", "stages_s": {"preprocess": 7.969996659085155e-05, "rewrite": 1.745156999968458, "structural": 2.802765399974305, "probe": 0.9890876000281423, "retrieval": 2.023351099982392, "selection": 0.5886470000259578, "implication_expansion": 0.2654718999983743, "prompt_composition": 1.8899969290941954e-05, "group_display": 0.07822809997014701}, "total_s": 8.527449000044726, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
54
+ {"timestamp_utc": "2026-03-07T18:03:23Z", "stages_s": {"preprocess": 8.140003774315119e-05, "rewrite": 1.4975244000088423, "structural": 1.186394400021527, "probe": 0.6484065999975428, "retrieval": 2.452991199970711, "selection": 4.813816999958362, "implication_expansion": 0.1623875999939628, "prompt_composition": 4.579999949783087e-05, "group_display": 0.0797012000111863}, "total_s": 10.879897099977825, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
55
+ {"timestamp_utc": "2026-03-07T18:06:46Z", "stages_s": {"preprocess": 9.800016414374113e-06, "rewrite": 1.3867014999850653, "structural": 0.39007520000450313, "probe": 5.211147500027437, "retrieval": 0.17631519999122247, "selection": 6.730482600047253, "implication_expansion": 1.7700018361210823e-05, "prompt_composition": 1.2499978765845299e-05, "group_display": 0.031001800030935556}, "total_s": 13.936744500009809, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
56
+ {"timestamp_utc": "2026-03-07T18:07:41Z", "stages_s": {"preprocess": 1.0100018698722124e-05, "rewrite": 1.0803023999906145, "structural": 0.15050079999491572, "probe": 4.7244069000007585, "retrieval": 0.1667132000438869, "selection": 0.9119019000208937, "implication_expansion": 1.8200022168457508e-05, "prompt_composition": 1.4199991710484028e-05, "group_display": 0.027934999961871654}, "total_s": 7.071552800014615, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
57
+ {"timestamp_utc": "2026-03-07T18:09:40Z", "stages_s": {"preprocess": 1.5700003132224083e-05, "rewrite": 1.418132099963259, "structural": 1.423503800004255, "probe": 4.57354850001866, "retrieval": 0.10137270000996068, "selection": 1.4030677999835461, "implication_expansion": 2.789997961372137e-05, "prompt_composition": 1.8000020645558834e-05, "group_display": 0.03112399997189641}, "total_s": 8.961162999970838, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
58
+ {"timestamp_utc": "2026-03-07T18:11:33Z", "stages_s": {"preprocess": 8.610001532360911e-05, "rewrite": 1.9479332999908365, "structural": 1.865794699988328, "probe": 0.06017700000666082, "retrieval": 2.0440989999915473, "selection": 2.4227961000287905, "implication_expansion": 0.28120840003248304, "prompt_composition": 2.069998299703002e-05, "group_display": 0.0792113000061363}, "total_s": 8.734649899997748, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
59
+ {"timestamp_utc": "2026-03-07T18:17:24Z", "stages_s": {"preprocess": 0.00012939999578520656, "rewrite": 3.181579700030852, "structural": 0.45629230001941323, "probe": 4.243250800005626, "retrieval": 2.128536299976986, "selection": 4.2171271000406705, "implication_expansion": 0.29443830001400784, "prompt_composition": 2.110004425048828e-05, "group_display": 0.0817057000240311}, "total_s": 14.642313299991656, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
60
+ {"timestamp_utc": "2026-03-07T18:20:15Z", "stages_s": {"preprocess": 9.400013368576765e-06, "rewrite": 0.9211662000161596, "structural": 0.49053100001765415, "probe": 0.4501308999606408, "retrieval": 0.08053859998472035, "selection": 1.9059181000338867, "implication_expansion": 3.290001768618822e-05, "prompt_composition": 1.8200022168457508e-05, "group_display": 0.0299964000005275}, "total_s": 3.888701299962122, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
mascotimages/transparentsquirrel.png ADDED

Git LFS Details

  • SHA256: 8e18321c9051b82ab18932ef9ed4052915659b83ef2065050600d0c06bddb9e7
  • Pointer size: 131 Bytes
  • Size of remote file: 257 kB
psq_rag/parsing/prompt_grammar.py CHANGED
@@ -1,60 +1,23 @@
1
- import re
2
- from lark import Lark, Token
3
-
4
-
5
- #Parser
6
- grammar=r"""
7
- !start: (prompt | /[][():]/+)*
8
- prompt: (emphasized | plain | comma | WHITESPACE)*
9
- !emphasized: "(" prompt ")"
10
- | "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")"
11
- comma: ","
12
- WHITESPACE: /\s+/
13
- plain: /([^,\\\[\]():|]|\\.)+/
14
- %import common.SIGNED_NUMBER -> NUMBER
15
- """
16
-
17
- # Initialize the parser
18
- parser = Lark(grammar, start='start')
19
-
20
- # Function to extract tags
21
- def extract_tags(tree):
22
- tags_with_positions = []
23
- def _traverse(node):
24
- if isinstance(node, Token) and node.type == '__ANON_1':
25
- tag_position = node.start_pos
26
- tag_text = node.value
27
- tags_with_positions.append((tag_text, tag_position, "tag"))
28
- elif not isinstance(node, Token):
29
- for child in node.children:
30
- _traverse(child)
31
- _traverse(tree)
32
- return tags_with_positions
33
-
34
-
35
-
36
- def build_tag_offsets_dicts(new_image_tags_with_positions):
37
- # Structure the data for HighlightedText
38
- tag_data = []
39
- for tag_text, start_pos, nodetype in new_image_tags_with_positions:
40
- # Modify the tag
41
- modified_tag = tag_text.replace('_', ' ').replace('\\(', '(').replace('\\)', ')').strip()
42
- artist_matrix_tag = tag_text.replace('_', ' ').replace('\\(', '\(').replace('\\)', '\)').strip()
43
- tf_idf_matrix_tag = re.sub(r'\\([()])', r'\1', re.sub(r' ', '_', tag_text.strip().removeprefix('by ').removeprefix('by_')))
44
- # Calculate the end position based on the original tag length
45
- end_pos = start_pos + len(tag_text)
46
- # Append the structured data for each tag
47
- tag_data.append({
48
- "original_tag": tag_text,
49
- "start_pos": start_pos,
50
- "end_pos": end_pos,
51
- "modified_tag": modified_tag,
52
- "artist_matrix_tag": artist_matrix_tag,
53
- "tf_idf_matrix_tag": tf_idf_matrix_tag,
54
- "node_type": nodetype
55
- })
56
- return tag_data
57
-
58
-
59
- if __name__ == "__main__":
60
- print("prompt_grammar.py imports ok")
 
1
+ from __future__ import annotations
2
+
3
+ from lark import Lark
4
+
5
+
6
+ # Minimal prompt grammar kept for import compatibility and simple parsing use.
7
+ grammar = r"""
8
+ !start: (prompt | /[][():]/+)*
9
+ prompt: (emphasized | plain | comma | WHITESPACE)*
10
+ !emphasized: "(" prompt ")"
11
+ | "(" prompt ":" [WHITESPACE] NUMBER [WHITESPACE] ")"
12
+ comma: ","
13
+ WHITESPACE: /\s+/
14
+ plain: /([^,\\\[\]():|]|\\.)+/
15
+ %import common.SIGNED_NUMBER -> NUMBER
16
+ """
17
+
18
+ parser = Lark(grammar, start="start")
19
+
20
+
21
+ def parse_prompt(text: str):
22
+ return parser.parse(text or "")
23
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
psq_rag/retrieval/psq_retrieval.py CHANGED
@@ -8,7 +8,6 @@ import pathlib
8
  import re
9
  from collections import Counter, OrderedDict
10
  from dataclasses import dataclass
11
- from itertools import islice
12
  from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
13
 
14
  import numpy as np
@@ -42,14 +41,6 @@ def _norm_tag_for_lookup(s: str) -> str:
42
  return s.replace(' ', '_').replace('\\(', '(').replace('\\)', ')')
43
 
44
 
45
- special_tags = ["score:0", "score:1", "score:2", "score:3", "score:4", "score:5", "score:6", "score:7", "score:8", "score:9", "rating:s", "rating:q", "rating:e"]
46
- def remove_special_tags(original_string):
47
- tags = [tag.strip() for tag in original_string.split(",")]
48
- remaining_tags = [tag for tag in tags if tag not in special_tags]
49
- removed_tags = [tag for tag in tags if tag in special_tags]
50
- return ", ".join(remaining_tags), removed_tags
51
-
52
-
53
  def construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index):
54
  cols, data = [], []
55
  for term, w in pseudo_doc_terms.items():
@@ -121,23 +112,6 @@ def get_tfidf_reduced_similar_tags(pseudo_doc_terms, allow_nsfw_tags):
121
  return transformed_sorted_tag_similarity_dict
122
 
123
 
124
- def psq_candidates_from_terms(terms: Sequence[str], *, allow_nsfw_tags: bool, k: int = 300):
125
- cand_dict = get_tfidf_reduced_similar_tags(dict(Counter(terms)), allow_nsfw_tags)
126
- candidates = list(islice(cand_dict.items(), k))
127
- tag_counts = get_tag_counts()
128
- return [
129
- Candidate(
130
- tag=tag,
131
- score_combined=float(score),
132
- score_fasttext=None,
133
- score_context=None,
134
- count=tag_counts.get(tag),
135
- sources=[],
136
- )
137
- for tag, score in candidates
138
- ]
139
-
140
-
141
  def psq_candidates_from_rewrite_phrases(
142
  rewrite_phrases: Sequence[str],
143
  *,
 
8
  import re
9
  from collections import Counter, OrderedDict
10
  from dataclasses import dataclass
 
11
  from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
12
 
13
  import numpy as np
 
41
  return s.replace(' ', '_').replace('\\(', '(').replace('\\)', ')')
42
 
43
 
 
 
 
 
 
 
 
 
44
  def construct_pseudo_vector(pseudo_doc_terms, idf, term_to_column_index):
45
  cols, data = [], []
46
  for term, w in pseudo_doc_terms.items():
 
112
  return transformed_sorted_tag_similarity_dict
113
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def psq_candidates_from_rewrite_phrases(
116
  rewrite_phrases: Sequence[str],
117
  *,
psq_rag/retrieval/state.py CHANGED
@@ -18,7 +18,6 @@ TFIDF_PATH = pathlib.Path("tf_idf_files_420.joblib")
18
  NSFW_CSV_PATH = pathlib.Path("word_rating_probabilities.csv")
19
  NSFW_THRESHOLD = 0.95
20
 
21
- HNSW_ART_PATH = pathlib.Path("tfidf_hnsw_artists.bin")
22
  HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
23
  FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
24
  TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
@@ -37,9 +36,7 @@ _tag_implications: Optional[Dict[str, List[str]]] = None
37
 
38
 
39
  _hnsw_tag_index: Optional["hnswlib.Index"] = None
40
- _hnsw_artist_index: Optional["hnswlib.Index"] = None
41
  _hnsw_tag_count: int = 0
42
- _hnsw_artist_count: int = 0
43
 
44
  # Tag type names inferred from e621 wiki documentation.
45
  # Numeric IDs come from fluffyrock_3m.csv column 1; mapping is heuristic but
@@ -167,10 +164,6 @@ def get_artist_set() -> Set[str]:
167
  return _artist_set
168
 
169
 
170
- def is_artist(name: str) -> bool:
171
- return name in get_artist_set()
172
-
173
-
174
  def get_fasttext_model() -> Any:
175
  global _fasttext_model
176
  if _fasttext_model is not None:
@@ -380,18 +373,6 @@ def get_tfidf_tag_vectors() -> Dict[str, Any]:
380
  }
381
  return _tfidf_tag_vectors
382
 
383
-
384
- def retrieval_assets_status() -> Dict[str, bool]:
385
- return {
386
- "tfidf": TFIDF_PATH.is_file(),
387
- "nsfw_csv": NSFW_CSV_PATH.is_file(),
388
- "fasttext_model": FASTTEXT_MODEL_PATH.is_file(),
389
- "tag_aliases_csv": TAG_ALIASES_PATH.is_file(),
390
- "hnsw_tags": HNSW_TAG_PATH.is_file(),
391
- "hnsw_artists": HNSW_ART_PATH.is_file(),
392
- }
393
-
394
-
395
  def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, dim: int) -> "hnswlib.Index":
396
  idx = hnswlib.Index(space="cosine", dim=dim)
397
  need_build = True
@@ -425,51 +406,26 @@ def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, di
425
  return idx
426
 
427
 
428
- def _ensure_hnsw_indexes(need_artists: bool) -> None:
429
- global _hnsw_tag_index, _hnsw_artist_index, _hnsw_tag_count, _hnsw_artist_count
430
 
431
  if hnswlib is None:
432
  return
433
 
434
- if _hnsw_tag_index is not None and (not need_artists or _hnsw_artist_index is not None):
435
  return
436
 
437
  components = get_tfidf_components()
438
  reduced_matrix = components["reduced_matrix"]
439
- row_to_tag = components["row_to_tag"]
440
  rm = _l2_normalize_rows(reduced_matrix).astype(np.float32)
441
  n_items, dim = rm.shape
442
 
443
- artist_set = get_artist_set() if need_artists else set()
444
- artist_rows: list[int] = []
445
- tag_rows: list[int] = []
446
-
447
- for i in range(n_items):
448
- tag = row_to_tag.get(i, "")
449
- base = tag[3:] if tag.startswith("by_") else tag
450
-
451
- if tag in {"by_unknown_artist", "by_conditional_dnp"}:
452
- tag_rows.append(i)
453
- continue
454
-
455
- if artist_set and is_artist(base):
456
- artist_rows.append(i)
457
- else:
458
- tag_rows.append(i)
459
 
460
  _hnsw_tag_index = _build_or_load_index(HNSW_TAG_PATH, tag_rows, rm, dim)
461
  _hnsw_tag_count = len(tag_rows)
462
 
463
- if need_artists:
464
- _hnsw_artist_index = _build_or_load_index(HNSW_ART_PATH, artist_rows, rm, dim)
465
- _hnsw_artist_count = len(artist_rows)
466
-
467
 
468
  def get_hnsw_tag_index() -> Tuple[Optional["hnswlib.Index"], int]:
469
- _ensure_hnsw_indexes(need_artists=False)
470
  return _hnsw_tag_index, _hnsw_tag_count
471
-
472
-
473
- def get_hnsw_artist_index() -> Tuple[Optional["hnswlib.Index"], int]:
474
- _ensure_hnsw_indexes(need_artists=True)
475
- return _hnsw_artist_index, _hnsw_artist_count
 
18
  NSFW_CSV_PATH = pathlib.Path("word_rating_probabilities.csv")
19
  NSFW_THRESHOLD = 0.95
20
 
 
21
  HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
22
  FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
23
  TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
 
36
 
37
 
38
  _hnsw_tag_index: Optional["hnswlib.Index"] = None
 
39
  _hnsw_tag_count: int = 0
 
40
 
41
  # Tag type names inferred from e621 wiki documentation.
42
  # Numeric IDs come from fluffyrock_3m.csv column 1; mapping is heuristic but
 
164
  return _artist_set
165
 
166
 
 
 
 
 
167
  def get_fasttext_model() -> Any:
168
  global _fasttext_model
169
  if _fasttext_model is not None:
 
373
  }
374
  return _tfidf_tag_vectors
375
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  def _build_or_load_index(path: pathlib.Path, rows: list[int], rm: np.ndarray, dim: int) -> "hnswlib.Index":
377
  idx = hnswlib.Index(space="cosine", dim=dim)
378
  need_build = True
 
406
  return idx
407
 
408
 
409
+ def _ensure_hnsw_indexes() -> None:
410
+ global _hnsw_tag_index, _hnsw_tag_count
411
 
412
  if hnswlib is None:
413
  return
414
 
415
+ if _hnsw_tag_index is not None:
416
  return
417
 
418
  components = get_tfidf_components()
419
  reduced_matrix = components["reduced_matrix"]
 
420
  rm = _l2_normalize_rows(reduced_matrix).astype(np.float32)
421
  n_items, dim = rm.shape
422
 
423
+ tag_rows = list(range(n_items))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
  _hnsw_tag_index = _build_or_load_index(HNSW_TAG_PATH, tag_rows, rm, dim)
426
  _hnsw_tag_count = len(tag_rows)
427
 
 
 
 
 
428
 
429
  def get_hnsw_tag_index() -> Tuple[Optional["hnswlib.Index"], int]:
430
+ _ensure_hnsw_indexes()
431
  return _hnsw_tag_index, _hnsw_tag_count
 
 
 
 
 
psq_rag/tagging/categorized_suggestions.py CHANGED
@@ -205,25 +205,3 @@ def generate_categorized_suggestions(
205
  categories=categories,
206
  )
207
 
208
-
209
-
210
- def get_category_suggestions_dict(
211
- categorized: CategorizedTagSuggestions
212
- ) -> Dict[str, List[str]]:
213
- """
214
- Get simple dict of category -> suggested tags (without scores).
215
-
216
- Args:
217
- categorized: The categorized suggestions
218
-
219
- Returns:
220
- Dict mapping category_name -> [tag1, tag2, ...]
221
- """
222
- result = {}
223
-
224
- for cat_name, cat_sugg in categorized.by_category.items():
225
- result[cat_name] = [tag for tag, _ in cat_sugg.suggestions]
226
-
227
- result['other'] = [tag for tag, _ in categorized.other_suggestions]
228
-
229
- return result
 
205
  categories=categories,
206
  )
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
psq_rag/ui/group_ranked_display.py CHANGED
@@ -304,42 +304,3 @@ def rank_groups_from_tfidf(
304
 
305
  rows_out.sort(key=lambda r: r.expected_count, reverse=True)
306
  return rows_out[: max(1, int(top_groups))]
307
-
308
-
309
- def _fmt_tag_cell(tag: str, p: float) -> str:
310
- safe_tag = tag.replace("|", "\\|")
311
- return f"`{safe_tag}` (p={p:.2f}, E={p:.2f})"
312
-
313
-
314
- def render_group_rankings_markdown(
315
- seed_terms: Sequence[str],
316
- *,
317
- top_groups: int,
318
- top_tags_per_group: int,
319
- group_rank_top_k: int,
320
- ) -> str:
321
- rows = rank_groups_from_tfidf(
322
- seed_terms,
323
- top_groups=top_groups,
324
- top_tags_per_group=top_tags_per_group,
325
- group_rank_top_k=group_rank_top_k,
326
- )
327
- if not rows:
328
- return "No ranked group display available (insufficient TF-IDF context)."
329
-
330
- k = max(1, int(top_tags_per_group))
331
- headers = ["Group/Category", f"Expected Tags (top {max(1, int(group_rank_top_k))})"]
332
- headers.extend([f"Tag {i}" for i in range(1, k + 1)])
333
- table = [
334
- "| " + " | ".join(headers) + " |",
335
- "| " + " | ".join(["---"] * len(headers)) + " |",
336
- ]
337
-
338
- for row in rows:
339
- cells = [row.group_name, f"{row.expected_count:.2f}"]
340
- tag_cells = [_fmt_tag_cell(tag, p) for tag, p in row.tags]
341
- if len(tag_cells) < k:
342
- tag_cells.extend([""] * (k - len(tag_cells)))
343
- cells.extend(tag_cells)
344
- table.append("| " + " | ".join(cells) + " |")
345
- return "\n".join(table)
 
304
 
305
  rows_out.sort(key=lambda r: r.expected_count, reverse=True)
306
  return rows_out[: max(1, int(top_groups))]