import pandas as pd from typing import List, Tuple, Dict, Any from langchain_community.vectorstores import faiss def multi_column(db: faiss.FAISS, df: pd.DataFrame, qc_pairs: Dict[str, str], threshold: float) -> List[Tuple[int, float, Dict[str, Any]]]: """Perform semantic search across multiple columns and return aggregated results. Args: db: FAISS vector database for search df: Original DataFrame containing the data qc_pairs: Dictionary mapping columns to query fragments threshold: Minimum similarity threshold to include a result Returns: List[Tuple[int, float, Dict[str, Any]]]: List of tuples (row_id, avg_score, row_dict) """ per_column_scores = [] for column, query in qc_pairs.items(): hits = db.similarity_search_with_score( query, k=db.index.ntotal, filter={'column': column}, distance_strategy=faiss.DistanceStrategy.COSINE ) score_map = { doc.metadata['row']: score for doc, score in hits if score >= threshold } per_column_scores.append(score_map) all_rows = set() for score_map in per_column_scores: all_rows.update(score_map.keys()) results = [] for rid in all_rows: scores = [score_map[rid] for score_map in per_column_scores if rid in score_map] if scores: avg_score = sum(scores) / len(scores) row_dict = df.loc[rid].to_dict() results.append((rid, avg_score, row_dict)) results.sort(key=lambda x: x[1], reverse=True) return results