Spaces:

tiffany101
/

EchoML

Build error

App Files Files Community

Tiffany Degbotse commited on Oct 20, 2025

Commit

2ae10e0

1 Parent(s): 6e3858e

query with your model

Browse files

Files changed (30) hide show

app/__pycache__/api_fastapi.cpython-313.pyc +0 -0
app/api_fastapi.py +101 -0
core/__init__.py +0 -0
core/__pycache__/__init__.cpython-313.pyc +0 -0
core/__pycache__/explain.cpython-313.pyc +0 -0
core/__pycache__/model_loader.cpython-313.pyc +0 -0
core/__pycache__/retrieval.cpython-313.pyc +0 -0
core/__pycache__/schemas.cpython-313.pyc +0 -0
core/__pycache__/storage.cpython-313.pyc +0 -0
core/__pycache__/utils.cpython-313.pyc +0 -0
core/explain.py +106 -0
core/model_loader.py +19 -0
core/retrieval.py +77 -0
core/schemas.py +37 -0
core/storage.py +95 -0
core/utils.py +20 -0
data/base_indices/iris_global/features.npy +0 -0
data/base_indices/iris_global/index.jsonl +100 -0
data/base_indices/iris_global/meta.jsonl +100 -0
data/base_indices/iris_global/shap.npy +0 -0
model_data/data.csv +151 -0
model_data/model.pkl +0 -0
requirements.txt +11 -0
scripts/__pycache__/build_base_index.cpython-313.pyc +0 -0
scripts/add_user_model.py +0 -0
scripts/build_base_index.py +62 -0
scripts/build_iris.bat +13 -0
scripts/demo_predict.py +0 -0
tests/__pycache__/test_similarity.cpython-313.pyc +0 -0
tests/test_similarity.py +28 -0

app/__pycache__/api_fastapi.cpython-313.pyc ADDED Viewed

Binary file (4.17 kB). View file

app/api_fastapi.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+from typing import Optional, List
+import numpy as np
+from Query_Your_Model.core.schemas import RetrievalConfig, ExplainResponse
+from Query_Your_Model.core.model_loader import load_model
+from Query_Your_Model.core.explain import explain_instance
+from Query_Your_Model.core.retrieval import retrieve_topk
+from Query_Your_Model.core.utils import safe_proba_to_scalar
+app = FastAPI(title="Reasoning-RAG XAI API")
+# Cached globals
+MODEL = None
+FEATURE_NAMES: Optional[List[str]] = None
+BACKGROUND = None
+NAMESPACE = "Query_Your_Model/data/base_indices/iris_global"
+# --- Target name mappings (extend per dataset/model) ---
+TARGET_NAMES = {
+    "iris": ["setosa", "versicolor", "virginica"],
+    # add more datasets here if needed
+}
+class ExplainRequest(BaseModel):
+    model_path: str
+    feature_names: List[str]
+    features: List[float]
+    namespace: Optional[str] = None
+    retrieval: Optional[RetrievalConfig] = None
+    background_path: Optional[str] = None
+@app.post("/explain", response_model=ExplainResponse)
+def explain(req: ExplainRequest):
+    global MODEL, FEATURE_NAMES, BACKGROUND
+    # Load model if not cached
+    if (MODEL is None) or (FEATURE_NAMES != req.feature_names):
+        MODEL = load_model(req.model_path)
+        FEATURE_NAMES = req.feature_names
+        BACKGROUND = None  # optionally load background data
+    # Convert input features
+    x = np.asarray(req.features, dtype="float32").reshape(1, -1)
+    # Prediction & probability
+    y_class = 0
+    proba_scalar = None
+    try:
+        y_pred = MODEL.predict(x)
+        y_class = int(y_pred[0])
+        if hasattr(MODEL, "predict_proba"):
+            proba = MODEL.predict_proba(x)
+            proba_scalar = float(proba[0][y_class])
+    except Exception as e:
+        print("Prediction error:", e)
+    # --- Map class ID -> human-readable label ---
+    model_key = "iris" if "iris" in req.model_path.lower() else None
+    if model_key and model_key in TARGET_NAMES:
+        y_label = TARGET_NAMES[model_key][y_class]
+    else:
+        y_label = str(y_class)
+    # SHAP explanation
+    exp = explain_instance(
+        MODEL,
+        x[0],
+        FEATURE_NAMES,
+        background_X=(BACKGROUND if BACKGROUND is not None else x),
+    )
+    # Retrieval
+    similar = None
+    ns = req.namespace or NAMESPACE
+    if req.retrieval and req.retrieval.use_retrieval:
+        shap_q = np.array(exp["shap_values"], dtype="float32")
+        similar = retrieve_topk(ns, shap_q, x[0], alpha=req.retrieval.alpha, k=req.retrieval.k)
+        # also map labels for retrieved cases
+        if model_key and model_key in TARGET_NAMES:
+            for case in similar:
+                if case.get("y_pred") is not None:
+                    try:
+                        case["y_pred"] = TARGET_NAMES[model_key][int(case["y_pred"])]
+                    except Exception:
+                        case["y_pred"] = str(case["y_pred"])
+    return ExplainResponse(
+        prediction={
+            "y_pred": y_label,     # now returns "setosa", "versicolor", etc.
+            "proba": proba_scalar,
+        },
+        explanation=exp,
+        similar_cases=similar or [],
+        ood_flag=False
+    )

core/__init__.py ADDED Viewed

File without changes

core/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (183 Bytes). View file

core/__pycache__/explain.cpython-313.pyc ADDED Viewed

Binary file (4.5 kB). View file

core/__pycache__/model_loader.cpython-313.pyc ADDED Viewed

Binary file (1.24 kB). View file

core/__pycache__/retrieval.cpython-313.pyc ADDED Viewed

Binary file (3.52 kB). View file

core/__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (2.6 kB). View file

core/__pycache__/storage.cpython-313.pyc ADDED Viewed

Binary file (6.2 kB). View file

core/__pycache__/utils.cpython-313.pyc ADDED Viewed

Binary file (1.47 kB). View file

core/explain.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from typing import List, Dict, Any
+import numpy as np
+import shap
+def _pick_explainer(model, X_background: np.ndarray):
+    """
+    Choose an appropriate SHAP explainer.
+    - TreeExplainer for tree-based models
+    - LinearExplainer for linear models
+    - KernelExplainer fallback (slow but general)
+    """
+    try:
+        import xgboost  # noqa: F401
+        is_tree = hasattr(model, "get_booster") or "xgb" in type(model).__name__.lower()
+    except Exception:
+        is_tree = False
+    is_tree = is_tree or any(
+        s in type(model).__name__.lower()
+        for s in ["randomforest", "gradientboost", "gbm", "lightgbm", "catboost"]
+    )
+    if is_tree:
+        return shap.TreeExplainer(model, feature_perturbation="tree_path_dependent")
+    is_linear = "linear" in type(model).__name__.lower() or hasattr(model, "coef_")
+    if is_linear:
+        return shap.LinearExplainer(model, X_background)
+    # Fallback for anything else
+    return shap.KernelExplainer(model.predict, X_background)
+def explain_instance(
+    model,
+    x: np.ndarray,
+    feature_names: List[str],
+    background_X: np.ndarray,
+    top_k: int = 8,
+) -> Dict[str, Any]:
+    """
+    Compute SHAP for a single instance x (shape: (n_features,)).
+    Always reduces SHAP output to a vector of length = n_features.
+    Handles multiclass by averaging across classes.
+    """
+    x = x.reshape(1, -1)
+    explainer = _pick_explainer(model, background_X)
+    values = explainer.shap_values(x)
+    # SHAP returns different shapes depending on model type
+    if isinstance(values, list):  # multiclass -> list of arrays
+        # stack into shape (n_classes, n_samples, n_features)
+        values_arr = np.stack(values, axis=0)
+        # average across classes -> shape (n_samples, n_features)
+        values_arr = np.mean(values_arr, axis=0)
+    else:
+        values_arr = values  # already (n_samples, n_features)
+    # Always flatten to 1D vector
+    shap_vec = np.array(values_arr[0]).reshape(-1)
+    # Ensure length matches feature_names
+    n_features = len(feature_names)
+    if len(shap_vec) != n_features:
+        shap_vec = shap_vec[:n_features]
+    base_value = explainer.expected_value
+    if isinstance(base_value, (list, np.ndarray)):
+        base_value = float(np.mean(base_value))
+    # Top-k by absolute impact
+    abs_imp = np.abs(shap_vec)
+    idx = np.argsort(-abs_imp)[:top_k].ravel()
+    top = []
+    for i in idx:
+        i = int(i)
+        if i >= n_features:  # safety check
+            continue
+        shap_val = shap_vec[i]
+        if isinstance(shap_val, (np.ndarray, list)):
+            shap_val = float(np.mean(shap_val))
+        else:
+            shap_val = float(shap_val)
+        abs_val = abs_imp[i]
+        if isinstance(abs_val, (np.ndarray, list)):
+            abs_val = float(np.mean(abs_val))
+        else:
+            abs_val = float(abs_val)
+        top.append({
+            "feature": feature_names[i],
+            "value": float(x[0, i]),
+            "shap": shap_val,
+            "abs_impact": abs_val,
+        })
+    return {
+        "shap_values": shap_vec.tolist(),
+        "base_value": float(base_value),
+        "topk": top,
+    }

core/model_loader.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import joblib
+from typing import Any, Tuple, Optional
+import numpy as np
+def load_model(path: str) -> Any:
+    """Load a pickled sklearn-compatible model."""
+    model = joblib.load(path)
+    return model
+def predict(model: Any, X: np.ndarray) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+    """Return (pred, proba_or_none). Handles regressors & classifiers."""
+    y_pred = model.predict(X)
+    proba = None
+    if hasattr(model, "predict_proba"):
+        try:
+            proba = model.predict_proba(X)
+        except Exception:
+            proba = None
+    return y_pred, proba

core/retrieval.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from typing import Dict, Any, List
+import numpy as np
+from .storage import load_matrices
+def _cosine(a: np.ndarray, b: np.ndarray) -> float:
+    na = np.linalg.norm(a) + 1e-12
+    nb = np.linalg.norm(b) + 1e-12
+    return float(np.dot(a, b) / (na * nb))
+def combined_similarity(
+    shap_q: np.ndarray,
+    feat_q: np.ndarray,
+    shap_i: np.ndarray,
+    feat_i: np.ndarray,
+    alpha: float
+) -> float:
+    """similarity = alpha * cos(SHAP) + (1 - alpha) * cos(features)"""
+    return alpha * _cosine(shap_q, shap_i) + (1.0 - alpha) * _cosine(feat_q, feat_i)
+def retrieve_topk(
+    namespace: str,
+    shap_q: np.ndarray,
+    x_q: np.ndarray,
+    alpha: float = 0.5,
+    k: int = 5
+) -> List[Dict[str, Any]]:
+    """
+    Retrieve top-k similar cases from a namespace.
+    Returns dicts with case_id, similarity, y_pred, shap_values, features, meta.
+    """
+    # Load stored matrices and metadata
+    X, SHAP, metas, case_ids = load_matrices(namespace)
+    # Flatten metas into a dict keyed by case_id
+    meta_dict: Dict[str, Dict[str, Any]] = {}
+    for m in metas:
+        if isinstance(m, dict):
+            meta_dict.update(m)
+    sims: List[Dict[str, Any]] = []
+    for i, cid in enumerate(case_ids):
+        feat = X[i]
+        shap = SHAP[i]
+        # compute similarity
+        score = combined_similarity(shap_q, x_q, shap, feat, alpha=alpha)
+        # get meta (safe fallback)
+        m = meta_dict.get(cid, {})
+        sims.append({
+            "case_id": cid,
+            "similarity": float(score),
+            "y_pred": m.get("y_pred"),
+            "shap_values": shap.tolist(),
+            "features": feat.tolist(),
+            "meta": m
+        })
+    # sort and return top-k
+    sims = sorted(sims, key=lambda d: -d["similarity"])
+    return sims[:k]
+def ood_score(shap_query: np.ndarray, shaps_matrix: np.ndarray) -> float:
+    """Simple OOD heuristic: 1 - max cosine against corpus SHAPs."""
+    if shaps_matrix.size == 0:
+        return 1.0
+    best = -1.0
+    for i in range(shaps_matrix.shape[0]):
+        c = _cosine(shap_query, shaps_matrix[i])
+        if c > best:
+            best = c
+    return float(1.0 - best)

core/schemas.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel
+class Instance(BaseModel):
+    # Ordered feature vector for your model
+    features: List[float]
+    feature_names: List[str]
+class PredictionResult(BaseModel):
+    y_pred: float
+    proba: Optional[float] = None
+class Explanation(BaseModel):
+    shap_values: List[float]          # reasoning vector
+    base_value: float
+    topk: List[Dict[str, Any]]
+class RetrievalConfig(BaseModel):
+    alpha: float = 0.7                # weight for SHAP cosine vs feature cosine
+    k: int = 5
+    use_retrieval: bool = True
+    namespace: str = "global_default"
+class RetrievedCase(BaseModel):
+    case_id: str
+    similarity: float
+    y_pred: Optional[float] = None
+    shap_values: Optional[List[float]] = None
+    features: Optional[List[float]] = None
+    meta: Optional[Dict[str, Any]] = None
+class ExplainResponse(BaseModel):
+    prediction: PredictionResult
+    explanation: Explanation
+    similar_cases: Optional[List[RetrievedCase]] = None
+    ood_flag: bool = False
+    ood_reason: Optional[str] = None

core/storage.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import json
+import os
+from typing import List, Dict, Any, Tuple
+import numpy as np
+INDEX_FILE = "index.jsonl"
+FEATURE_FILE = "features.npy"
+SHAP_FILE = "shap.npy"
+META_FILE = "meta.jsonl"
+def ensure_dir(path: str):
+    os.makedirs(path, exist_ok=True)
+def append_jsonl(path: str, row: Dict[str, Any]):
+    with open(path, "a", encoding="utf-8") as f:
+        f.write(json.dumps(row) + "\n")
+def load_index(path: str) -> List[Dict[str, Any]]:
+    rows = []
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            if line.strip():
+                rows.append(json.loads(line))
+    return rows
+def init_matrix_files(namespace_dir: str, feature_dim: int, shap_dim: int):
+    """Create empty .npy matrices if they don't exist."""
+    feat_path = os.path.join(namespace_dir, FEATURE_FILE)
+    shap_path = os.path.join(namespace_dir, SHAP_FILE)
+    if not os.path.exists(feat_path):
+        np.save(feat_path, np.zeros((0, feature_dim), dtype="float32"))
+    if not os.path.exists(shap_path):
+        np.save(shap_path, np.zeros((0, shap_dim), dtype="float32"))
+def append_case(namespace_dir: str, case_id: str, features: np.ndarray, shap_vec: np.ndarray, meta: Dict[str, Any]):
+    """Append one case to the namespace store."""
+    ensure_dir(namespace_dir)
+    # grow matrices
+    feat_path = os.path.join(namespace_dir, FEATURE_FILE)
+    shap_path = os.path.join(namespace_dir, SHAP_FILE)
+    feats = np.load(feat_path)
+    shaps = np.load(shap_path)
+    feats = np.vstack([feats, features.reshape(1, -1).astype("float32")])
+    shaps = np.vstack([shaps, shap_vec.reshape(1, -1).astype("float32")])
+    np.save(feat_path, feats)
+    np.save(shap_path, shaps)
+    # index & meta
+    idx_path = os.path.join(namespace_dir, INDEX_FILE)
+    append_jsonl(idx_path, {"case_id": case_id, "row": feats.shape[0] - 1})
+    meta_path = os.path.join(namespace_dir, META_FILE)
+    append_jsonl(meta_path, {case_id: meta})
+def load_matrices(namespace_dir: str) -> Tuple[np.ndarray, np.ndarray, List[Dict[str, Any]], List[str]]:
+    """
+    Load all stored matrices and metadata for retrieval.
+    Returns:
+        X (np.ndarray)      : Features matrix
+        SHAP (np.ndarray)   : SHAP values matrix
+        metas (list[dict])  : Metadata entries
+        case_ids (list[str]): Case IDs
+    """
+    # Load features & shap
+    feat_path = os.path.join(namespace_dir, FEATURE_FILE)
+    shap_path = os.path.join(namespace_dir, SHAP_FILE)
+    X = np.load(feat_path)
+    SHAP = np.load(shap_path)
+    # Load metadata
+    metas = []
+    meta_path = os.path.join(namespace_dir, META_FILE)
+    if os.path.exists(meta_path):
+        with open(meta_path, "r", encoding="utf-8") as f:
+            for line in f:
+                if line.strip():
+                    metas.append(json.loads(line))
+    # Load case IDs
+    case_ids = []
+    idx_path = os.path.join(namespace_dir, INDEX_FILE)
+    if os.path.exists(idx_path):
+        with open(idx_path, "r", encoding="utf-8") as f:
+            for line in f:
+                if line.strip():
+                    entry = json.loads(line)
+                    case_ids.append(entry.get("case_id"))
+    return X, SHAP, metas, case_ids

core/utils.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import hashlib
+from typing import List
+import numpy as np
+def case_id_from_vector(x: np.ndarray, prefix: str = "case") -> str:
+    h = hashlib.md5(x.tobytes()).hexdigest()[:10]
+    return f"{prefix}_{h}"
+def to_numpy(lst, dtype="float32"):
+    return np.asarray(lst, dtype=dtype)
+def safe_proba_to_scalar(proba, positive_index: int = 1):
+    """Return a single probability for binary classifiers when possible."""
+    if proba is None:
+        return None
+    arr = np.asarray(proba)
+    if arr.ndim == 2 and arr.shape[1] >= 2:
+        return float(arr[0, positive_index])
+    # fallback: average
+    return float(arr.mean())

data/base_indices/iris_global/features.npy ADDED Viewed

Binary file (1.73 kB). View file

data/base_indices/iris_global/index.jsonl ADDED Viewed

	@@ -0,0 +1,100 @@

+{"case_id": "iris_00bbac633c", "row": 0}
+{"case_id": "iris_b2a2c274fa", "row": 1}
+{"case_id": "iris_f79fee902c", "row": 2}
+{"case_id": "iris_9a1a194bc4", "row": 3}
+{"case_id": "iris_d5f6d63eb7", "row": 4}
+{"case_id": "iris_fb322465b8", "row": 5}
+{"case_id": "iris_751758f9a1", "row": 6}
+{"case_id": "iris_2a967bb0c8", "row": 7}
+{"case_id": "iris_e7d76ce04e", "row": 8}
+{"case_id": "iris_7be756b6c3", "row": 9}
+{"case_id": "iris_2a8b6920ad", "row": 10}
+{"case_id": "iris_95f30553cb", "row": 11}
+{"case_id": "iris_190a3c83bf", "row": 12}
+{"case_id": "iris_dddf43f88f", "row": 13}
+{"case_id": "iris_be9c18be01", "row": 14}
+{"case_id": "iris_0f92794e2b", "row": 15}
+{"case_id": "iris_41d43e7e9a", "row": 16}
+{"case_id": "iris_8f66265fc7", "row": 17}
+{"case_id": "iris_d3945eb482", "row": 18}
+{"case_id": "iris_4b1e78fdc5", "row": 19}
+{"case_id": "iris_401ec0e4cd", "row": 20}
+{"case_id": "iris_408e8d870d", "row": 21}
+{"case_id": "iris_9cafb2d428", "row": 22}
+{"case_id": "iris_e50ca52202", "row": 23}
+{"case_id": "iris_2babca4f93", "row": 24}
+{"case_id": "iris_306decaed1", "row": 25}
+{"case_id": "iris_8925772bb8", "row": 26}
+{"case_id": "iris_16f2c8a614", "row": 27}
+{"case_id": "iris_affabb42bd", "row": 28}
+{"case_id": "iris_cd147f78d3", "row": 29}
+{"case_id": "iris_60ceafb3b7", "row": 30}
+{"case_id": "iris_971bb14551", "row": 31}
+{"case_id": "iris_3c46aadfa8", "row": 32}
+{"case_id": "iris_8949d2093a", "row": 33}
+{"case_id": "iris_54db69a5ef", "row": 34}
+{"case_id": "iris_553603a759", "row": 35}
+{"case_id": "iris_1fbd72f69e", "row": 36}
+{"case_id": "iris_1aa1718647", "row": 37}
+{"case_id": "iris_4e47b9e277", "row": 38}
+{"case_id": "iris_0b3fb6e054", "row": 39}
+{"case_id": "iris_afb9f3ce89", "row": 40}
+{"case_id": "iris_d964678b78", "row": 41}
+{"case_id": "iris_d5afa1ffc3", "row": 42}
+{"case_id": "iris_8d176d6739", "row": 43}
+{"case_id": "iris_b3b9231f82", "row": 44}
+{"case_id": "iris_948f3351ef", "row": 45}
+{"case_id": "iris_cf7d9336af", "row": 46}
+{"case_id": "iris_1d9428989e", "row": 47}
+{"case_id": "iris_ca1177d767", "row": 48}
+{"case_id": "iris_7435ef9308", "row": 49}
+{"case_id": "iris_187546a192", "row": 50}
+{"case_id": "iris_f67c61b994", "row": 51}
+{"case_id": "iris_12ca8c3bc8", "row": 52}
+{"case_id": "iris_e883f0a96b", "row": 53}
+{"case_id": "iris_5d30ef01ab", "row": 54}
+{"case_id": "iris_06713bd1b2", "row": 55}
+{"case_id": "iris_cdea50b849", "row": 56}
+{"case_id": "iris_9a7d15fcb5", "row": 57}
+{"case_id": "iris_aa4ec334d1", "row": 58}
+{"case_id": "iris_1753b1a603", "row": 59}
+{"case_id": "iris_bd16db5e4c", "row": 60}
+{"case_id": "iris_45e9c6b8be", "row": 61}
+{"case_id": "iris_90355b0853", "row": 62}
+{"case_id": "iris_29f5ab1fcc", "row": 63}
+{"case_id": "iris_ba49dde13f", "row": 64}
+{"case_id": "iris_938819d7e3", "row": 65}
+{"case_id": "iris_ced4f5a163", "row": 66}
+{"case_id": "iris_a0555b0006", "row": 67}
+{"case_id": "iris_245849f78c", "row": 68}
+{"case_id": "iris_0315cdedea", "row": 69}
+{"case_id": "iris_678b362b66", "row": 70}
+{"case_id": "iris_495ee2afb0", "row": 71}
+{"case_id": "iris_ab99322692", "row": 72}
+{"case_id": "iris_afb9f3ce89", "row": 73}
+{"case_id": "iris_f873cbc152", "row": 74}
+{"case_id": "iris_63c413d1a9", "row": 75}
+{"case_id": "iris_42ca7166cd", "row": 76}
+{"case_id": "iris_31d4a40847", "row": 77}
+{"case_id": "iris_d458f158e0", "row": 78}
+{"case_id": "iris_373dfcf880", "row": 79}
+{"case_id": "iris_2d037a30fc", "row": 80}
+{"case_id": "iris_3954a347b6", "row": 81}
+{"case_id": "iris_7c437b3319", "row": 82}
+{"case_id": "iris_519f77cbe0", "row": 83}
+{"case_id": "iris_01fe6fa830", "row": 84}
+{"case_id": "iris_d0b253b7b8", "row": 85}
+{"case_id": "iris_0a27e6c142", "row": 86}
+{"case_id": "iris_46a04f9bf1", "row": 87}
+{"case_id": "iris_ef474c84d7", "row": 88}
+{"case_id": "iris_07a988927f", "row": 89}
+{"case_id": "iris_c93d7a8c57", "row": 90}
+{"case_id": "iris_3af34eed28", "row": 91}
+{"case_id": "iris_be68e3ed79", "row": 92}
+{"case_id": "iris_55c1ce18c8", "row": 93}
+{"case_id": "iris_0393b6cfa4", "row": 94}
+{"case_id": "iris_70d7d9f959", "row": 95}
+{"case_id": "iris_da32c2c5cb", "row": 96}
+{"case_id": "iris_44888f11a6", "row": 97}
+{"case_id": "iris_feaabdd51f", "row": 98}
+{"case_id": "iris_c6b9d16895", "row": 99}

data/base_indices/iris_global/meta.jsonl ADDED Viewed

	@@ -0,0 +1,100 @@

+{"iris_00bbac633c": {"y_pred": 1.0}}
+{"iris_b2a2c274fa": {"y_pred": 0.0}}
+{"iris_f79fee902c": {"y_pred": 2.0}}
+{"iris_9a1a194bc4": {"y_pred": 1.0}}
+{"iris_d5f6d63eb7": {"y_pred": 1.0}}
+{"iris_fb322465b8": {"y_pred": 0.0}}
+{"iris_751758f9a1": {"y_pred": 1.0}}
+{"iris_2a967bb0c8": {"y_pred": 2.0}}
+{"iris_e7d76ce04e": {"y_pred": 1.0}}
+{"iris_7be756b6c3": {"y_pred": 1.0}}
+{"iris_2a8b6920ad": {"y_pred": 2.0}}
+{"iris_95f30553cb": {"y_pred": 0.0}}
+{"iris_190a3c83bf": {"y_pred": 0.0}}
+{"iris_dddf43f88f": {"y_pred": 0.0}}
+{"iris_be9c18be01": {"y_pred": 0.0}}
+{"iris_0f92794e2b": {"y_pred": 1.0}}
+{"iris_41d43e7e9a": {"y_pred": 2.0}}
+{"iris_8f66265fc7": {"y_pred": 1.0}}
+{"iris_d3945eb482": {"y_pred": 1.0}}
+{"iris_4b1e78fdc5": {"y_pred": 2.0}}
+{"iris_401ec0e4cd": {"y_pred": 0.0}}
+{"iris_408e8d870d": {"y_pred": 2.0}}
+{"iris_9cafb2d428": {"y_pred": 0.0}}
+{"iris_e50ca52202": {"y_pred": 2.0}}
+{"iris_2babca4f93": {"y_pred": 2.0}}
+{"iris_306decaed1": {"y_pred": 2.0}}
+{"iris_8925772bb8": {"y_pred": 2.0}}
+{"iris_16f2c8a614": {"y_pred": 2.0}}
+{"iris_affabb42bd": {"y_pred": 0.0}}
+{"iris_cd147f78d3": {"y_pred": 0.0}}
+{"iris_60ceafb3b7": {"y_pred": 0.0}}
+{"iris_971bb14551": {"y_pred": 0.0}}
+{"iris_3c46aadfa8": {"y_pred": 1.0}}
+{"iris_8949d2093a": {"y_pred": 0.0}}
+{"iris_54db69a5ef": {"y_pred": 0.0}}
+{"iris_553603a759": {"y_pred": 2.0}}
+{"iris_1fbd72f69e": {"y_pred": 1.0}}
+{"iris_1aa1718647": {"y_pred": 0.0}}
+{"iris_4e47b9e277": {"y_pred": 0.0}}
+{"iris_0b3fb6e054": {"y_pred": 0.0}}
+{"iris_afb9f3ce89": {"y_pred": 2.0}}
+{"iris_d964678b78": {"y_pred": 1.0}}
+{"iris_d5afa1ffc3": {"y_pred": 1.0}}
+{"iris_8d176d6739": {"y_pred": 0.0}}
+{"iris_b3b9231f82": {"y_pred": 0.0}}
+{"iris_948f3351ef": {"y_pred": 1.0}}
+{"iris_cf7d9336af": {"y_pred": 2.0}}
+{"iris_1d9428989e": {"y_pred": 2.0}}
+{"iris_ca1177d767": {"y_pred": 1.0}}
+{"iris_7435ef9308": {"y_pred": 2.0}}
+{"iris_187546a192": {"y_pred": 1.0}}
+{"iris_f67c61b994": {"y_pred": 2.0}}
+{"iris_12ca8c3bc8": {"y_pred": 1.0}}
+{"iris_e883f0a96b": {"y_pred": 0.0}}
+{"iris_5d30ef01ab": {"y_pred": 2.0}}
+{"iris_06713bd1b2": {"y_pred": 1.0}}
+{"iris_cdea50b849": {"y_pred": 0.0}}
+{"iris_9a7d15fcb5": {"y_pred": 0.0}}
+{"iris_aa4ec334d1": {"y_pred": 0.0}}
+{"iris_1753b1a603": {"y_pred": 1.0}}
+{"iris_bd16db5e4c": {"y_pred": 2.0}}
+{"iris_45e9c6b8be": {"y_pred": 0.0}}
+{"iris_90355b0853": {"y_pred": 0.0}}
+{"iris_29f5ab1fcc": {"y_pred": 0.0}}
+{"iris_ba49dde13f": {"y_pred": 1.0}}
+{"iris_938819d7e3": {"y_pred": 0.0}}
+{"iris_ced4f5a163": {"y_pred": 1.0}}
+{"iris_a0555b0006": {"y_pred": 2.0}}
+{"iris_245849f78c": {"y_pred": 0.0}}
+{"iris_0315cdedea": {"y_pred": 1.0}}
+{"iris_678b362b66": {"y_pred": 2.0}}
+{"iris_495ee2afb0": {"y_pred": 0.0}}
+{"iris_ab99322692": {"y_pred": 2.0}}
+{"iris_afb9f3ce89": {"y_pred": 2.0}}
+{"iris_f873cbc152": {"y_pred": 1.0}}
+{"iris_63c413d1a9": {"y_pred": 1.0}}
+{"iris_42ca7166cd": {"y_pred": 2.0}}
+{"iris_31d4a40847": {"y_pred": 1.0}}
+{"iris_d458f158e0": {"y_pred": 0.0}}
+{"iris_373dfcf880": {"y_pred": 1.0}}
+{"iris_2d037a30fc": {"y_pred": 2.0}}
+{"iris_3954a347b6": {"y_pred": 0.0}}
+{"iris_7c437b3319": {"y_pred": 0.0}}
+{"iris_519f77cbe0": {"y_pred": 1.0}}
+{"iris_01fe6fa830": {"y_pred": 1.0}}
+{"iris_d0b253b7b8": {"y_pred": 0.0}}
+{"iris_0a27e6c142": {"y_pred": 2.0}}
+{"iris_46a04f9bf1": {"y_pred": 0.0}}
+{"iris_ef474c84d7": {"y_pred": 0.0}}
+{"iris_07a988927f": {"y_pred": 1.0}}
+{"iris_c93d7a8c57": {"y_pred": 1.0}}
+{"iris_3af34eed28": {"y_pred": 2.0}}
+{"iris_be68e3ed79": {"y_pred": 1.0}}
+{"iris_55c1ce18c8": {"y_pred": 2.0}}
+{"iris_0393b6cfa4": {"y_pred": 2.0}}
+{"iris_70d7d9f959": {"y_pred": 1.0}}
+{"iris_da32c2c5cb": {"y_pred": 0.0}}
+{"iris_44888f11a6": {"y_pred": 0.0}}
+{"iris_feaabdd51f": {"y_pred": 2.0}}
+{"iris_c6b9d16895": {"y_pred": 2.0}}

data/base_indices/iris_global/shap.npy ADDED Viewed

Binary file (1.73 kB). View file

model_data/data.csv ADDED Viewed

	@@ -0,0 +1,151 @@

+sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
+5.1,3.5,1.4,0.2,0
+4.9,3.0,1.4,0.2,0
+4.7,3.2,1.3,0.2,0
+4.6,3.1,1.5,0.2,0
+5.0,3.6,1.4,0.2,0
+5.4,3.9,1.7,0.4,0
+4.6,3.4,1.4,0.3,0
+5.0,3.4,1.5,0.2,0
+4.4,2.9,1.4,0.2,0
+4.9,3.1,1.5,0.1,0
+5.4,3.7,1.5,0.2,0
+4.8,3.4,1.6,0.2,0
+4.8,3.0,1.4,0.1,0
+4.3,3.0,1.1,0.1,0
+5.8,4.0,1.2,0.2,0
+5.7,4.4,1.5,0.4,0
+5.4,3.9,1.3,0.4,0
+5.1,3.5,1.4,0.3,0
+5.7,3.8,1.7,0.3,0
+5.1,3.8,1.5,0.3,0
+5.4,3.4,1.7,0.2,0
+5.1,3.7,1.5,0.4,0
+4.6,3.6,1.0,0.2,0
+5.1,3.3,1.7,0.5,0
+4.8,3.4,1.9,0.2,0
+5.0,3.0,1.6,0.2,0
+5.0,3.4,1.6,0.4,0
+5.2,3.5,1.5,0.2,0
+5.2,3.4,1.4,0.2,0
+4.7,3.2,1.6,0.2,0
+4.8,3.1,1.6,0.2,0
+5.4,3.4,1.5,0.4,0
+5.2,4.1,1.5,0.1,0
+5.5,4.2,1.4,0.2,0
+4.9,3.1,1.5,0.2,0
+5.0,3.2,1.2,0.2,0
+5.5,3.5,1.3,0.2,0
+4.9,3.6,1.4,0.1,0
+4.4,3.0,1.3,0.2,0
+5.1,3.4,1.5,0.2,0
+5.0,3.5,1.3,0.3,0
+4.5,2.3,1.3,0.3,0
+4.4,3.2,1.3,0.2,0
+5.0,3.5,1.6,0.6,0
+5.1,3.8,1.9,0.4,0
+4.8,3.0,1.4,0.3,0
+5.1,3.8,1.6,0.2,0
+4.6,3.2,1.4,0.2,0
+5.3,3.7,1.5,0.2,0
+5.0,3.3,1.4,0.2,0
+7.0,3.2,4.7,1.4,1
+6.4,3.2,4.5,1.5,1
+6.9,3.1,4.9,1.5,1
+5.5,2.3,4.0,1.3,1
+6.5,2.8,4.6,1.5,1
+5.7,2.8,4.5,1.3,1
+6.3,3.3,4.7,1.6,1
+4.9,2.4,3.3,1.0,1
+6.6,2.9,4.6,1.3,1
+5.2,2.7,3.9,1.4,1
+5.0,2.0,3.5,1.0,1
+5.9,3.0,4.2,1.5,1
+6.0,2.2,4.0,1.0,1
+6.1,2.9,4.7,1.4,1
+5.6,2.9,3.6,1.3,1
+6.7,3.1,4.4,1.4,1
+5.6,3.0,4.5,1.5,1
+5.8,2.7,4.1,1.0,1
+6.2,2.2,4.5,1.5,1
+5.6,2.5,3.9,1.1,1
+5.9,3.2,4.8,1.8,1
+6.1,2.8,4.0,1.3,1
+6.3,2.5,4.9,1.5,1
+6.1,2.8,4.7,1.2,1
+6.4,2.9,4.3,1.3,1
+6.6,3.0,4.4,1.4,1
+6.8,2.8,4.8,1.4,1
+6.7,3.0,5.0,1.7,1
+6.0,2.9,4.5,1.5,1
+5.7,2.6,3.5,1.0,1
+5.5,2.4,3.8,1.1,1
+5.5,2.4,3.7,1.0,1
+5.8,2.7,3.9,1.2,1
+6.0,2.7,5.1,1.6,1
+5.4,3.0,4.5,1.5,1
+6.0,3.4,4.5,1.6,1
+6.7,3.1,4.7,1.5,1
+6.3,2.3,4.4,1.3,1
+5.6,3.0,4.1,1.3,1
+5.5,2.5,4.0,1.3,1
+5.5,2.6,4.4,1.2,1
+6.1,3.0,4.6,1.4,1
+5.8,2.6,4.0,1.2,1
+5.0,2.3,3.3,1.0,1
+5.6,2.7,4.2,1.3,1
+5.7,3.0,4.2,1.2,1
+5.7,2.9,4.2,1.3,1
+6.2,2.9,4.3,1.3,1
+5.1,2.5,3.0,1.1,1
+5.7,2.8,4.1,1.3,1
+6.3,3.3,6.0,2.5,2
+5.8,2.7,5.1,1.9,2
+7.1,3.0,5.9,2.1,2
+6.3,2.9,5.6,1.8,2
+6.5,3.0,5.8,2.2,2
+7.6,3.0,6.6,2.1,2
+4.9,2.5,4.5,1.7,2
+7.3,2.9,6.3,1.8,2
+6.7,2.5,5.8,1.8,2
+7.2,3.6,6.1,2.5,2
+6.5,3.2,5.1,2.0,2
+6.4,2.7,5.3,1.9,2
+6.8,3.0,5.5,2.1,2
+5.7,2.5,5.0,2.0,2
+5.8,2.8,5.1,2.4,2
+6.4,3.2,5.3,2.3,2
+6.5,3.0,5.5,1.8,2
+7.7,3.8,6.7,2.2,2
+7.7,2.6,6.9,2.3,2
+6.0,2.2,5.0,1.5,2
+6.9,3.2,5.7,2.3,2
+5.6,2.8,4.9,2.0,2
+7.7,2.8,6.7,2.0,2
+6.3,2.7,4.9,1.8,2
+6.7,3.3,5.7,2.1,2
+7.2,3.2,6.0,1.8,2
+6.2,2.8,4.8,1.8,2
+6.1,3.0,4.9,1.8,2
+6.4,2.8,5.6,2.1,2
+7.2,3.0,5.8,1.6,2
+7.4,2.8,6.1,1.9,2
+7.9,3.8,6.4,2.0,2
+6.4,2.8,5.6,2.2,2
+6.3,2.8,5.1,1.5,2
+6.1,2.6,5.6,1.4,2
+7.7,3.0,6.1,2.3,2
+6.3,3.4,5.6,2.4,2
+6.4,3.1,5.5,1.8,2
+6.0,3.0,4.8,1.8,2
+6.9,3.1,5.4,2.1,2
+6.7,3.1,5.6,2.4,2
+6.9,3.1,5.1,2.3,2
+5.8,2.7,5.1,1.9,2
+6.8,3.2,5.9,2.3,2
+6.7,3.3,5.7,2.5,2
+6.7,3.0,5.2,2.3,2
+6.3,2.5,5.0,1.9,2
+6.5,3.0,5.2,2.0,2
+6.2,3.4,5.4,2.3,2
+5.9,3.0,5.1,1.8,2

model_data/model.pkl ADDED Viewed

Binary file (94.9 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+numpy==1.26.4
+pandas==2.2.2
+scikit-learn==1.4.2
+shap==0.45.0
+fastapi==0.115.0
+uvicorn==0.30.6
+python-multipart==0.0.9
+streamlit==1.39.0
+pydantic==2.9.2
+joblib==1.4.2
+matplotlib==3.9.2

scripts/__pycache__/build_base_index.cpython-313.pyc ADDED Viewed

Binary file (3.03 kB). View file

scripts/add_user_model.py ADDED Viewed

File without changes

scripts/build_base_index.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""
+Precompute a 'global' reasoning space from a baseline model + dataset.
+Usage:
+  python scripts/build_base_index.py \
+      --model_path path/to/model.pkl \
+      --csv path/to/data.csv \
+      --features col1,col2,col3 \
+      --target target_col \
+      --namespace data/base_indices/recidivism_global \
+      --sample 2000
+"""
+# Query_Your_Model/scripts/build_base_index.py
+import sys, os
+#sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+import os
+import pandas as pd
+import numpy as np
+from ..core.model_loader import load_model, predict
+from ..core.explain import explain_instance
+from ..core.storage import ensure_dir, init_matrix_files, append_case
+from ..core.utils import case_id_from_vector
+# Hardcoded defaults for Iris demo
+MODEL_PATH = "Query_Your_Model/model_data/model.pkl"
+CSV_PATH = "Query_Your_Model/model_data/data.csv"
+FEATURES = ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]
+TARGET = "target"
+NAMESPACE = "Query_Your_Model/data/base_indices/iris_global"
+SAMPLE = 100   # how many rows to sample
+def main():
+    print("Building reasoning index...")
+    df = pd.read_csv(CSV_PATH)
+    if SAMPLE and SAMPLE < len(df):
+        df = df.sample(SAMPLE, random_state=42)
+    X = df[FEATURES].values
+    model = load_model(MODEL_PATH)
+    ensure_dir(NAMESPACE)
+    init_matrix_files(NAMESPACE, feature_dim=len(FEATURES), shap_dim=len(FEATURES))
+    bg = df[FEATURES].sample(min(100, len(df)), random_state=0).values.astype("float32")
+    for i, row in df.iterrows():
+        x = row[FEATURES].values.astype("float32")
+        y_pred, _ = predict(model, x.reshape(1, -1))
+        exp = explain_instance(model, x, FEATURES, background_X=bg, top_k=8)
+        shap_vec = np.array(exp["shap_values"], dtype="float32")
+        cid = case_id_from_vector(x, prefix="iris")
+        meta = {"y_pred": float(y_pred[0])}
+        append_case(NAMESPACE, cid, x, shap_vec, meta)
+    print(f"Done! Index saved to {NAMESPACE}")
+if __name__ == "__main__":
+    main()

scripts/build_iris.bat ADDED Viewed

	@@ -0,0 +1,13 @@

+@echo off
+echo Building Iris reasoning index...
+python Query_Your_Model/scripts/build_base_index.py ^
+  --model_path Query_Your_Model/model_data/model.pkl ^
+  --csv Query_Your_Model/model_data/data.csv ^
+  --features "sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)" ^
+  --target target ^
+  --namespace Query_Your_Model/data/base_indices/iris_global ^
+  --sample 100
+echo Done! Index saved to Query_Your_Model/data/base_indices/iris_global
+pause

scripts/demo_predict.py ADDED Viewed

File without changes

tests/__pycache__/test_similarity.cpython-313.pyc ADDED Viewed

Binary file (1.48 kB). View file

tests/test_similarity.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Query_Your_Model/tests/test_similarity.py
+import numpy as np
+from Query_Your_Model.core.retrieval import combined_similarity
+def test_combined_similarity_basic():
+    a = np.array([1, 0, 0], dtype="float32")   # feature vector 1
+    b = np.array([0, 1, 0], dtype="float32")   # feature vector 2 (orthogonal)
+    shap_a = np.array([0.5, 0.2, 0.1], dtype="float32")  # shap for a
+    shap_b = np.array([-0.5, 0.0, 0.0], dtype="float32") # shap for b
+    # Similarity of identical pair (a,a)
+    s1 = combined_similarity(a, shap_a, a, shap_a, alpha=0.5)
+    print(f"Similarity (identical): {s1:.4f}")
+    assert s1 > 0.99, "Expected similarity close to 1 for identical vectors"
+    # Similarity of different pair (a,b)
+    s2 = combined_similarity(a, shap_a, b, shap_b, alpha=0.5)
+    print(f"Similarity (different/orthogonal): {s2:.4f}")
+    assert s2 < s1, "Expected orthogonal similarity to be smaller"
+    return s1, s2
+if __name__ == "__main__":
+    print("Running combined_similarity tests...\n")
+    s1, s2 = test_combined_similarity_basic()
+    print("\n Test passed!")