Spaces:

FoodDesert
/

Prompt_Squirrel_RAG

Running

Claude commited on Feb 10

Commit

eeada1d

1 Parent(s): de8b5a3

Add tag implication expansion (fox→canine→canid→mammal)

Walks the e621 implication graph upward from each selected tag to add
ancestor taxonomy tags that were structurally unreachable by retrieval.

- state.py: get_tag_implications() loads+caches the directed graph,
expand_tags_via_implications() BFS-walks ancestors from a tag set
- app.py: expansion runs between Stage 3 and Stage 4 (always on)
- eval_pipeline.py: --expand-implications flag for controlled eval

https://claude.ai/code/session_019PY5TEXTWGtToUbowunSRG

Files changed (3) hide show

app.py +10 -0
psq_rag/retrieval/state.py +54 -0
scripts/eval_pipeline.py +27 -4

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
 from psq_rag.llm.rewrite import llm_rewrite_prompt
 from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
 from psq_rag.llm.select import llm_select_indices
 def _split_prompt_commas(s: str) -> List[str]:
@@ -223,6 +224,15 @@ def rag_pipeline_ui(user_prompt: str):
         selected_tags = [candidates[i].tag for i in picked_indices] if picked_indices else []
         log("Step 4: Compose final prompt")
         final_prompt = compose_final_prompt(rewritten, selected_tags)

 from psq_rag.llm.rewrite import llm_rewrite_prompt
 from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
 from psq_rag.llm.select import llm_select_indices
+from psq_rag.retrieval.state import expand_tags_via_implications
 def _split_prompt_commas(s: str) -> List[str]:
         selected_tags = [candidates[i].tag for i in picked_indices] if picked_indices else []
+        log("Step 3b: Expand via tag implications")
+        tag_set = set(selected_tags)
+        expanded, implied_only = expand_tags_via_implications(tag_set)
+        if implied_only:
+            selected_tags.extend(sorted(implied_only))
+            log(f"  Added {len(implied_only)} implied tags: {', '.join(sorted(implied_only))}")
+        else:
+            log("  No additional implied tags")
         log("Step 4: Compose final prompt")
         final_prompt = compose_final_prompt(rewritten, selected_tags)

psq_rag/retrieval/state.py CHANGED Viewed

@@ -22,6 +22,7 @@ HNSW_ART_PATH = pathlib.Path("tfidf_hnsw_artists.bin")
 HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
 FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
 TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
 _tfidf_components: Optional[Dict[str, Any]] = None
 _nsfw_tags: Optional[Set[str]] = None
@@ -32,6 +33,7 @@ _tfidf_tag_vectors: Optional[Dict[str, Any]] = None
 _alias_to_tags: Optional[Dict[str, List[str]]] = None
 _tag_to_aliases: Optional[Dict[str, List[str]]] = None
 _tag_type_id: Optional[Dict[str, int]] = None
 _hnsw_tag_index: Optional["hnswlib.Index"] = None
@@ -273,6 +275,58 @@ def get_tag2aliases() -> Dict[str, List[str]]:
     return _tag_to_aliases
 def get_tfidf_tag_vectors() -> Dict[str, Any]:
     global _tfidf_tag_vectors
     if _tfidf_tag_vectors is not None:

 HNSW_TAG_PATH = pathlib.Path("tfidf_hnsw_tags.bin")
 FASTTEXT_MODEL_PATH = pathlib.Path("e621FastTextModel010Replacement_small.bin")
 TAG_ALIASES_PATH = pathlib.Path("fluffyrock_3m.csv")
+TAG_IMPLICATIONS_PATH = pathlib.Path("tag_implications-2023-07-20.csv")
 _tfidf_components: Optional[Dict[str, Any]] = None
 _nsfw_tags: Optional[Set[str]] = None
 _alias_to_tags: Optional[Dict[str, List[str]]] = None
 _tag_to_aliases: Optional[Dict[str, List[str]]] = None
 _tag_type_id: Optional[Dict[str, int]] = None
+_tag_implications: Optional[Dict[str, List[str]]] = None
 _hnsw_tag_index: Optional["hnswlib.Index"] = None
     return _tag_to_aliases
+def get_tag_implications() -> Dict[str, List[str]]:
+    """Return antecedent_tag -> [consequent_tags] from the implications CSV.
+    Only active implications where both tags exist in the tag database are kept.
+    """
+    global _tag_implications
+    if _tag_implications is not None:
+        return _tag_implications
+    if not TAG_IMPLICATIONS_PATH.is_file():
+        logging.warning("Tag implications CSV not found: %s", TAG_IMPLICATIONS_PATH)
+        _tag_implications = {}
+        return _tag_implications
+    known_tags = set(get_tag_type_ids().keys())
+    impl: Dict[str, List[str]] = {}
+    with TAG_IMPLICATIONS_PATH.open("r", newline="", encoding="utf-8") as csvfile:
+        reader = csv.reader(csvfile)
+        next(reader, None)  # skip header
+        for row in reader:
+            if len(row) < 5 or row[4] != "active":
+                continue
+            antecedent = clean_tag(row[1])
+            consequent = clean_tag(row[2])
+            if antecedent in known_tags and consequent in known_tags:
+                impl.setdefault(antecedent, []).append(consequent)
+    _tag_implications = impl
+    logging.info("Loaded %d tag implications", sum(len(v) for v in impl.values()))
+    return _tag_implications
+def expand_tags_via_implications(tags: Set[str]) -> Tuple[Set[str], Set[str]]:
+    """Walk the implication graph upward from each tag, collecting ancestors.
+    Returns (all_tags, implied_only) where:
+      - all_tags = original tags + implied ancestors
+      - implied_only = tags that were added (not in the original set)
+    """
+    impl = get_tag_implications()
+    expanded = set(tags)
+    queue = list(tags)
+    while queue:
+        tag = queue.pop()
+        for parent in impl.get(tag, ()):
+            if parent not in expanded:
+                expanded.add(parent)
+                queue.append(parent)
+    implied_only = expanded - tags
+    return expanded, implied_only
 def get_tfidf_tag_vectors() -> Dict[str, Any]:
     global _tfidf_tag_vectors
     if _tfidf_tag_vectors is not None:

scripts/eval_pipeline.py CHANGED Viewed

@@ -133,6 +133,8 @@ class SampleResult:
     over_selection_ratio: float = 0.0       # |selected| / |gt|
     # Why distribution (from Stage 3 LLM)
     why_counts: Dict[str, int] = field(default_factory=dict)
     # Timing
     stage1_time: float = 0.0
     stage2_time: float = 0.0
@@ -171,12 +173,13 @@ def _process_one_sample(
     verbose: bool,
     print_lock: threading.Lock,
     min_why: Optional[str] = None,
 ) -> SampleResult:
     """Process a single eval sample through the full pipeline. Thread-safe."""
     from psq_rag.llm.rewrite import llm_rewrite_prompt
     from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases
     from psq_rag.llm.select import llm_select_indices
-    from psq_rag.retrieval.state import get_tag_type_name
     def log(msg: str) -> None:
         if verbose:
@@ -263,6 +266,13 @@ def _process_one_sample(
             why_counts[w] = why_counts.get(w, 0) + 1
         result.why_counts = why_counts
         # Overall selection metrics
         p, r, f1 = _compute_metrics(result.selected_tags, gt_tags)
         result.selection_precision = p
@@ -308,11 +318,12 @@ def _process_one_sample(
         char_info = ""
         if gt_char:
             char_info = f" char[gt={len(gt_char)} sel={len(sel_char)} P={cp:.2f} R={cr:.2f}]"
         with print_lock:
             print(
                 f"  [{index+1}] retrieval_recall={result.retrieval_recall:.3f} "
                 f"sel_P={p:.3f} sel_R={r:.3f} sel_F1={f1:.3f} "
-                f"selected={len(result.selected_tags)}{char_info} "
                 f"t1={result.stage1_time:.1f}s t2={result.stage2_time:.1f}s t3={result.stage3_time:.1f}s"
             )
@@ -330,12 +341,14 @@ def _prewarm_retrieval_assets() -> None:
         get_tfidf_components,
         get_tag2aliases,
         get_tag_type_name,
     )
     print("Pre-warming retrieval assets (TF-IDF, FastText, HNSW, aliases)...")
     t0 = time.time()
     get_tfidf_components()  # loads joblib, HNSW indexes, FastText model
     get_tag2aliases()       # loads CSV alias dict
     get_tag_type_name("_warmup_")  # ensures tag type dict is built
     print(f"  Assets loaded in {time.time() - t0:.1f}s")
@@ -354,6 +367,7 @@ def run_eval(
     seed: int = 42,
     workers: int = 1,
     min_why: Optional[str] = None,
 ) -> List[SampleResult]:
     # Load eval samples
@@ -403,7 +417,7 @@ def run_eval(
                 sample, i, total,
                 skip_rewrite, allow_nsfw, mode, chunk_size,
                 per_phrase_k, temperature, max_tokens, verbose,
-                print_lock, min_why,
             )
             results.append(result)
     else:
@@ -419,7 +433,7 @@ def run_eval(
                     sample, i, total,
                     skip_rewrite, allow_nsfw, mode, chunk_size,
                     per_phrase_k, temperature, max_tokens, verbose,
-                    print_lock, min_why,
                 ): i
                 for i, sample in enumerate(samples)
             }
@@ -487,12 +501,16 @@ def print_summary(results: List[SampleResult]) -> None:
                                    if (r.retrieved_tags & r.ground_truth_tags)])
     avg_over_sel = _safe_avg([r.over_selection_ratio for r in valid])
     print()
     print("Stage 3 - Selection (ALL tags):")
     print(f"  Avg precision:        {avg_sel_precision:.4f}")
     print(f"  Avg recall:           {avg_sel_recall:.4f}")
     print(f"  Avg F1:               {avg_sel_f1:.4f}")
     print(f"  Avg selected tags:    {avg_selected:.1f}")
     print(f"  Avg ground-truth tags:{avg_gt:.1f}")
     print()
     print("Diagnostic Metrics:")
@@ -653,6 +671,8 @@ def main(argv=None) -> int:
     ap.add_argument("--min-why", default=None,
                     choices=["explicit", "strong_implied", "weak_implied", "style_or_meta", "other"],
                     help="Minimum 'why' confidence to keep (e.g. 'explicit' keeps only explicit matches)")
     args = ap.parse_args(list(argv) if argv is not None else None)
@@ -671,6 +691,7 @@ def main(argv=None) -> int:
         seed=args.seed,
         workers=args.workers,
         min_why=args.min_why,
     )
     print_summary(results)
@@ -702,6 +723,7 @@ def main(argv=None) -> int:
         "seed": args.seed,
         "workers": args.workers,
         "min_why": args.min_why,
         "n_errors": sum(1 for r in results if r.error),
     }
@@ -738,6 +760,7 @@ def main(argv=None) -> int:
                 "selection_given_retrieval": round(r.selection_given_retrieval, 4),
                 "over_selection_ratio": round(r.over_selection_ratio, 2),
                 "why_counts": r.why_counts,
                 # Timing
                 "stage1_time": round(r.stage1_time, 3),
                 "stage2_time": round(r.stage2_time, 3),

     over_selection_ratio: float = 0.0       # |selected| / |gt|
     # Why distribution (from Stage 3 LLM)
     why_counts: Dict[str, int] = field(default_factory=dict)
+    # Tag implications
+    implied_tags: Set[str] = field(default_factory=set)  # tags added via implications (not LLM-selected)
     # Timing
     stage1_time: float = 0.0
     stage2_time: float = 0.0
     verbose: bool,
     print_lock: threading.Lock,
     min_why: Optional[str] = None,
+    expand_implications: bool = False,
 ) -> SampleResult:
     """Process a single eval sample through the full pipeline. Thread-safe."""
     from psq_rag.llm.rewrite import llm_rewrite_prompt
     from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases
     from psq_rag.llm.select import llm_select_indices
+    from psq_rag.retrieval.state import get_tag_type_name, expand_tags_via_implications
     def log(msg: str) -> None:
         if verbose:
             why_counts[w] = why_counts.get(w, 0) + 1
         result.why_counts = why_counts
+        # Tag implication expansion (post-Stage 3)
+        if expand_implications and result.selected_tags:
+            expanded, implied_only = expand_tags_via_implications(result.selected_tags)
+            result.implied_tags = implied_only
+            result.selected_tags = expanded
+            log(f"Implications: +{len(implied_only)} tags")
         # Overall selection metrics
         p, r, f1 = _compute_metrics(result.selected_tags, gt_tags)
         result.selection_precision = p
         char_info = ""
         if gt_char:
             char_info = f" char[gt={len(gt_char)} sel={len(sel_char)} P={cp:.2f} R={cr:.2f}]"
+        impl_info = f" (+{len(result.implied_tags)} implied)" if result.implied_tags else ""
         with print_lock:
             print(
                 f"  [{index+1}] retrieval_recall={result.retrieval_recall:.3f} "
                 f"sel_P={p:.3f} sel_R={r:.3f} sel_F1={f1:.3f} "
+                f"selected={len(result.selected_tags)}{impl_info}{char_info} "
                 f"t1={result.stage1_time:.1f}s t2={result.stage2_time:.1f}s t3={result.stage3_time:.1f}s"
             )
         get_tfidf_components,
         get_tag2aliases,
         get_tag_type_name,
+        get_tag_implications,
     )
     print("Pre-warming retrieval assets (TF-IDF, FastText, HNSW, aliases)...")
     t0 = time.time()
     get_tfidf_components()  # loads joblib, HNSW indexes, FastText model
     get_tag2aliases()       # loads CSV alias dict
     get_tag_type_name("_warmup_")  # ensures tag type dict is built
+    get_tag_implications()  # loads implication graph
     print(f"  Assets loaded in {time.time() - t0:.1f}s")
     seed: int = 42,
     workers: int = 1,
     min_why: Optional[str] = None,
+    expand_implications: bool = False,
 ) -> List[SampleResult]:
     # Load eval samples
                 sample, i, total,
                 skip_rewrite, allow_nsfw, mode, chunk_size,
                 per_phrase_k, temperature, max_tokens, verbose,
+                print_lock, min_why, expand_implications,
             )
             results.append(result)
     else:
                     sample, i, total,
                     skip_rewrite, allow_nsfw, mode, chunk_size,
                     per_phrase_k, temperature, max_tokens, verbose,
+                    print_lock, min_why, expand_implications,
                 ): i
                 for i, sample in enumerate(samples)
             }
                                    if (r.retrieved_tags & r.ground_truth_tags)])
     avg_over_sel = _safe_avg([r.over_selection_ratio for r in valid])
+    avg_implied = sum(len(r.implied_tags) for r in valid) / n
     print()
     print("Stage 3 - Selection (ALL tags):")
     print(f"  Avg precision:        {avg_sel_precision:.4f}")
     print(f"  Avg recall:           {avg_sel_recall:.4f}")
     print(f"  Avg F1:               {avg_sel_f1:.4f}")
     print(f"  Avg selected tags:    {avg_selected:.1f}")
+    if avg_implied > 0:
+        print(f"  Avg implied tags:     {avg_implied:.1f}  (added via tag implications)")
     print(f"  Avg ground-truth tags:{avg_gt:.1f}")
     print()
     print("Diagnostic Metrics:")
     ap.add_argument("--min-why", default=None,
                     choices=["explicit", "strong_implied", "weak_implied", "style_or_meta", "other"],
                     help="Minimum 'why' confidence to keep (e.g. 'explicit' keeps only explicit matches)")
+    ap.add_argument("--expand-implications", action="store_true", default=False,
+                    help="Expand selected tags via tag implication chains (e.g. fox→canine→canid→mammal)")
     args = ap.parse_args(list(argv) if argv is not None else None)
         seed=args.seed,
         workers=args.workers,
         min_why=args.min_why,
+        expand_implications=args.expand_implications,
     )
     print_summary(results)
         "seed": args.seed,
         "workers": args.workers,
         "min_why": args.min_why,
+        "expand_implications": args.expand_implications,
         "n_errors": sum(1 for r in results if r.error),
     }
                 "selection_given_retrieval": round(r.selection_given_retrieval, 4),
                 "over_selection_ratio": round(r.over_selection_ratio, 2),
                 "why_counts": r.why_counts,
+                "implied_tags": sorted(r.implied_tags),
                 # Timing
                 "stage1_time": round(r.stage1_time, 3),
                 "stage2_time": round(r.stage2_time, 3),