Spaces:

AlephBeth-AI
/

GuardLLM

Sleeping

App Files Files Community

AlephBeth-AI commited on Apr 13

Commit

79046c2

verified ·

1 Parent(s): 1c93979

Upload precompute.py with huggingface_hub

Browse files

Files changed (1) hide show

precompute.py +138 -153

precompute.py CHANGED Viewed

@@ -1,18 +1,14 @@
-"""
-GuardLLM - Precompute Embeddings & t-SNE
-Downloads the neuralchemy/Prompt-injection-dataset (core config),
-extracts CLS embeddings from Llama Prompt Guard 2 (86M),
-computes t-SNE 2D projection, and saves everything to a cache file.
-Run this script ONCE before launching the app (or let the app run it on first start).
 """
-import os
-import json
-import logging
 import numpy as np
 import torch
-from pathlib import Path
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger("precompute")
@@ -20,47 +16,33 @@ logger = logging.getLogger("precompute")
 CACHE_DIR = Path(__file__).parent / "cache"
 CACHE_FILE = CACHE_DIR / "embeddings_tsne.npz"
 META_FILE = CACHE_DIR / "metadata.json"
-MODEL_ID = "meta-llama/Llama-Prompt-Guard-2-86M"
 DATASET_ID = "neuralchemy/Prompt-injection-dataset"
 DATASET_CONFIG = "core"
-BATCH_SIZE = 32
-MAX_LENGTH = 512
 TSNE_PERPLEXITY = 30
 TSNE_SEED = 42
-def is_cached() -> bool:
-    """Check if precomputed data exists."""
-    return CACHE_FILE.exists() and META_FILE.exists()
-def load_cached():
-    """Load precomputed embeddings, t-SNE coords, and metadata."""
-    logger.info("Loading cached data from %s", CACHE_DIR)
-    data = np.load(CACHE_FILE)
-    with open(META_FILE, "r", encoding="utf-8") as f:
-        metadata = json.load(f)
-    return {
-        "embeddings": data["embeddings"],
-        "tsne_2d": data["tsne_2d"],
-        "metadata": metadata,
-    }
-def download_dataset():
-    """Download the neuralchemy dataset (core config)."""
     from datasets import load_dataset
-    logger.info("Downloading dataset %s (config=%s)...", DATASET_ID, DATASET_CONFIG)
     ds = load_dataset(DATASET_ID, DATASET_CONFIG)
-    # Combine all splits for the visualization
     all_samples = []
     for split_name in ["train", "validation", "test"]:
         if split_name in ds:
-            split = ds[split_name]
-            logger.info("  Split '%s': %d samples", split_name, len(split))
-            for row in split:
                 all_samples.append({
                     "text": row["text"],
                     "label": int(row["label"]),
@@ -69,128 +51,131 @@ def download_dataset():
                     "source": row.get("source", ""),
                     "split": split_name,
                 })
-    logger.info("Total samples: %d", len(all_samples))
     return all_samples
-def compute_embeddings(samples: list) -> np.ndarray:
-    """Extract CLS token embeddings from Llama Prompt Guard 2."""
-    from transformers import AutoTokenizer, AutoModelForSequenceClassification
-    logger.info("Loading model %s...", MODEL_ID)
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    model = AutoModelForSequenceClassification.from_pretrained(
-        MODEL_ID, output_hidden_states=True
-    )
-    model.eval()
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.to(device)
-    logger.info("Using device: %s", device)
-    texts = [s["text"] for s in samples]
-    all_embeddings = []
-    num_batches = (len(texts) + BATCH_SIZE - 1) // BATCH_SIZE
-    for i in range(0, len(texts), BATCH_SIZE):
-        batch_idx = i // BATCH_SIZE + 1
-        batch_texts = texts[i : i + BATCH_SIZE]
-        if batch_idx % 10 == 1 or batch_idx == num_batches:
-            logger.info("  Batch %d/%d (%d samples)...", batch_idx, num_batches, len(batch_texts))
-        inputs = tokenizer(
-            batch_texts,
-            return_tensors="pt",
-            truncation=True,
-            max_length=MAX_LENGTH,
-            padding=True,
-        ).to(device)
         with torch.no_grad():
-            outputs = model(**inputs)
-            # CLS token embedding from last hidden layer
-            hidden_states = outputs.hidden_states[-1]  # [batch, seq_len, 768]
-            cls_embeddings = hidden_states[:, 0, :].cpu().numpy()  # [batch, 768]
-            all_embeddings.append(cls_embeddings)
-    embeddings = np.concatenate(all_embeddings, axis=0)
-    logger.info("Embeddings shape: %s", embeddings.shape)
-    return embeddings
-def compute_tsne(embeddings: np.ndarray) -> np.ndarray:
-    """Run t-SNE dimensionality reduction to 2D."""
     from sklearn.manifold import TSNE
-    n_samples = embeddings.shape[0]
-    perplexity = min(TSNE_PERPLEXITY, n_samples - 1)
-    logger.info(
-        "Running t-SNE (n=%d, perplexity=%d, random_state=%d)...",
-        n_samples, perplexity, TSNE_SEED,
-    )
-    tsne = TSNE(
-        n_components=2,
-        perplexity=perplexity,
-        random_state=TSNE_SEED,
-        n_iter=1000,
-        learning_rate="auto",
-        init="pca",
-    )
-    coords_2d = tsne.fit_transform(embeddings)
-    logger.info("t-SNE done. Output shape: %s", coords_2d.shape)
-    return coords_2d
-def precompute_all():
-    """Full pipeline: download → embed → t-SNE → save."""
-    if is_cached():
-        logger.info("Cache already exists. Loading...")
-        return load_cached()
-    CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    # Step 1: Download dataset
-    samples = download_dataset()
-    # Step 2: Compute embeddings
-    embeddings = compute_embeddings(samples)
-    # Step 3: Compute t-SNE
-    tsne_2d = compute_tsne(embeddings)
-    # Step 4: Save
-    logger.info("Saving to cache...")
-    np.savez_compressed(
-        CACHE_FILE,
-        embeddings=embeddings,
-        tsne_2d=tsne_2d,
-    )
-    # Save metadata (text, labels, categories) as JSON
-    metadata = []
-    for s in samples:
-        metadata.append({
-            "text": s["text"],
-            "label": s["label"],
-            "category": s["category"],
-            "severity": s["severity"],
-            "source": s["source"],
-            "split": s["split"],
-        })
     with open(META_FILE, "w", encoding="utf-8") as f:
-        json.dump(metadata, f, ensure_ascii=False)
-    logger.info("Cache saved to %s", CACHE_DIR)
-    return {
-        "embeddings": embeddings,
-        "tsne_2d": tsne_2d,
-        "metadata": metadata,
-    }
 if __name__ == "__main__":
-    precompute_all()

+"""GuardLLM - Precompute Embeddings & t-SNE (resumable, MiniLM-based).
+Uses sentence-transformers/all-MiniLM-L6-v2 (22M params) to compute
+embeddings for t-SNE visualization. The downstream risk classifier
+(Llama Prompt Guard 2) is *not* loaded here - it is loaded by the
+Gradio app on-demand when a user clicks a point.
 """
+import sys, os, json, logging, time
+from pathlib import Path
 import numpy as np
 import torch
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger("precompute")
 CACHE_DIR = Path(__file__).parent / "cache"
 CACHE_FILE = CACHE_DIR / "embeddings_tsne.npz"
 META_FILE = CACHE_DIR / "metadata.json"
+SAMPLES_FILE = CACHE_DIR / "samples.json"
+EMB_CHUNKS_DIR = CACHE_DIR / "emb_chunks_mini"   # NEW folder so old chunks don't collide
+EMB_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
 DATASET_ID = "neuralchemy/Prompt-injection-dataset"
 DATASET_CONFIG = "core"
+BATCH_SIZE = int(os.environ.get("BATCH_SIZE", "32"))
+MAX_LENGTH = int(os.environ.get("MAX_LENGTH", "256"))
 TSNE_PERPLEXITY = 30
 TSNE_SEED = 42
+SAMPLE_SIZE = int(os.environ.get("SAMPLE_SIZE", "0")) or None
+TIME_BUDGET = int(os.environ.get("TIME_BUDGET", "35"))
+STAGE = os.environ.get("STAGE", "auto")
+def prepare_samples():
+    if SAMPLES_FILE.exists():
+        with open(SAMPLES_FILE, "r", encoding="utf-8") as f:
+            s = json.load(f)
+        logger.info("Loaded existing samples.json (%d samples)", len(s))
+        return s
     from datasets import load_dataset
+    logger.info("Downloading %s/%s", DATASET_ID, DATASET_CONFIG)
     ds = load_dataset(DATASET_ID, DATASET_CONFIG)
     all_samples = []
     for split_name in ["train", "validation", "test"]:
         if split_name in ds:
+            for row in ds[split_name]:
                 all_samples.append({
                     "text": row["text"],
                     "label": int(row["label"]),
                     "source": row.get("source", ""),
                     "split": split_name,
                 })
+    logger.info("Total %d", len(all_samples))
+    if SAMPLE_SIZE and SAMPLE_SIZE < len(all_samples):
+        import random
+        random.seed(42)
+        by_cat = {}
+        for s in all_samples:
+            by_cat.setdefault(s["category"], []).append(s)
+        total = len(all_samples)
+        sampled = []
+        for cat, items in by_cat.items():
+            n = max(1, round(len(items) / total * SAMPLE_SIZE))
+            sampled.extend(random.sample(items, min(n, len(items))))
+        random.shuffle(sampled)
+        all_samples = sampled
+        logger.info("Subsampled to %d", len(all_samples))
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    with open(SAMPLES_FILE, "w", encoding="utf-8") as f:
+        json.dump(all_samples, f, ensure_ascii=False)
     return all_samples
+def mean_pool(last_hidden, attention_mask):
+    mask = attention_mask.unsqueeze(-1).float()
+    s = (last_hidden * mask).sum(dim=1)
+    d = mask.sum(dim=1).clamp(min=1e-9)
+    return s / d
+def embed_chunked(samples):
+    EMB_CHUNKS_DIR.mkdir(parents=True, exist_ok=True)
+    num_batches = (len(samples) + BATCH_SIZE - 1) // BATCH_SIZE
+    done = {int(p.stem) for p in EMB_CHUNKS_DIR.glob("*.npy")}
+    todo = [b for b in range(num_batches) if b not in done]
+    logger.info("Batches: total=%d done=%d todo=%d", num_batches, len(done), len(todo))
+    if not todo:
+        return True
+    from transformers import AutoTokenizer, AutoModel
+    logger.info("Loading MiniLM model...")
+    t0 = time.time()
+    tok = AutoTokenizer.from_pretrained(EMB_MODEL_ID)
+    mdl = AutoModel.from_pretrained(EMB_MODEL_ID)
+    mdl.eval()
+    logger.info("Model loaded in %.1fs", time.time() - t0)
+    texts = [s["text"] for s in samples]
+    start = time.time()
+    processed = 0
+    for b in todo:
+        if time.time() - start > TIME_BUDGET:
+            logger.info("Time budget reached after %d batches", processed)
+            break
+        i = b * BATCH_SIZE
+        bt = texts[i:i + BATCH_SIZE]
+        inputs = tok(bt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH, padding=True)
         with torch.no_grad():
+            out = mdl(**inputs)
+            emb = mean_pool(out.last_hidden_state, inputs["attention_mask"])
+            emb = torch.nn.functional.normalize(emb, p=2, dim=1)
+            emb = emb.cpu().numpy().astype(np.float32)
+        np.save(EMB_CHUNKS_DIR / f"{b}.npy", emb)
+        processed += 1
+        if processed % 10 == 0 or processed == len(todo):
+            logger.info("batch %d/%d (this run=%d elapsed=%.1fs)", b+1, num_batches, processed, time.time()-start)
+    remaining = len(todo) - processed
+    logger.info("This run: %d batches; remaining: %d", processed, remaining)
+    return remaining == 0
+def assemble_and_tsne(samples):
     from sklearn.manifold import TSNE
+    num_batches = (len(samples) + BATCH_SIZE - 1) // BATCH_SIZE
+    parts = []
+    for b in range(num_batches):
+        parts.append(np.load(EMB_CHUNKS_DIR / f"{b}.npy"))
+    emb = np.concatenate(parts, axis=0)
+    logger.info("Embeddings shape %s", emb.shape)
+    n = emb.shape[0]
+    perp = min(TSNE_PERPLEXITY, max(5, n - 1))
+    logger.info("t-SNE perp=%d...", perp)
+    t0 = time.time()
+    try:
+        tsne = TSNE(n_components=2, perplexity=perp, random_state=TSNE_SEED, max_iter=1000, learning_rate="auto", init="pca")
+    except TypeError:
+        tsne = TSNE(n_components=2, perplexity=perp, random_state=TSNE_SEED, max_iter=1000, learning_rate="auto", init="pca")
+    coords = tsne.fit_transform(emb)
+    logger.info("t-SNE done %.1fs", time.time() - t0)
+    np.savez_compressed(CACHE_FILE, embeddings=emb, tsne_2d=coords)
+    meta = [{"text": s["text"], "label": s["label"], "category": s["category"],
+             "severity": s["severity"], "source": s["source"], "split": s["split"]}
+            for s in samples]
     with open(META_FILE, "w", encoding="utf-8") as f:
+        json.dump(meta, f, ensure_ascii=False)
+    logger.info("Cache complete at %s", CACHE_DIR)
+def status():
+    samples_exists = SAMPLES_FILE.exists()
+    n_samples = 0
+    if samples_exists:
+        with open(SAMPLES_FILE, "r", encoding="utf-8") as f:
+            n_samples = len(json.load(f))
+    n_done = len(list(EMB_CHUNKS_DIR.glob("*.npy"))) if EMB_CHUNKS_DIR.exists() else 0
+    n_batches = (n_samples + BATCH_SIZE - 1) // BATCH_SIZE if n_samples else 0
+    cache_done = CACHE_FILE.exists() and META_FILE.exists()
+    print(f"samples={n_samples} batches_done={n_done}/{n_batches} final_cache={cache_done}")
+def main():
+    if STAGE == "status":
+        status(); return
+    if STAGE in ("download", "auto"):
+        samples = prepare_samples()
+        if STAGE == "download":
+            return
+    else:
+        with open(SAMPLES_FILE, "r", encoding="utf-8") as f:
+            samples = json.load(f)
+    if STAGE in ("embed", "auto"):
+        all_done = embed_chunked(samples)
+        if STAGE == "embed" or not all_done:
+            return
+    if STAGE in ("tsne", "auto"):
+        assemble_and_tsne(samples)
 if __name__ == "__main__":
+    main()