""" TrueLens Forensic Suite - Batch Evaluation Script ================================================ Run this to evaluate the local forensic engine on your test dataset. Usage: python evaluate.py --fake-dir "data/v1_fake" --real-dir "data/v1_real" """ import os import sys import argparse import time from pathlib import Path from PIL import Image from src.engine.core_engine import ForensicScanner def load_images_from_dir(directory: str): """Load all image files from a directory.""" exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} files = [] for f in sorted(os.listdir(directory)): if Path(f).suffix.lower() in exts: files.append(os.path.join(directory, f)) return files def evaluate(scanner: ForensicScanner, fake_dir: str, real_dir: str): fake_images = load_images_from_dir(fake_dir) real_images = load_images_from_dir(real_dir) print(f"\n{'='*70}") print("TRUELENS FORENSIC SUITE — BATCH EVALUATION") print(f"{'='*70}") print(f"Mode: Local Forensic Engine") print(f"Fake samples: {len(fake_images)}") print(f"Real samples: {len(real_images)}") print(f"{'='*70}\n") results = { "tp": 0, "fp": 0, "tn": 0, "fn": 0, "uncertain_fake": 0, "uncertain_real": 0, "errors": 0, "details": [] } def process_batch(images, ground_truth): gt = ground_truth.lower() for path in images: fname = os.path.basename(path) try: start = time.time() label, confidence, status, margin, detail = scanner.predict_image(path) elapsed = time.time() - start pred = label.lower() record = { "file": fname, "ground_truth": gt, "prediction": pred, "confidence": confidence, "status": status, "latency": f"{elapsed:.2f}s" } if pred == "fake": if gt == "fake": results["tp"] += 1 record["result"] = "CORRECT" else: results["fp"] += 1 record["result"] = "FALSE POSITIVE" elif pred == "real": if gt == "real": results["tn"] += 1 record["result"] = "CORRECT" else: results["fn"] += 1 record["result"] = "FALSE NEGATIVE" else: # uncertain record["result"] = "UNCERTAIN" if gt == "fake": results["uncertain_fake"] += 1 else: results["uncertain_real"] += 1 results["details"].append(record) # Print live result symbol = "OK" if record["result"] == "CORRECT" else "XX" print(f" [{symbol}] {fname:50s} => {pred.upper():10s} (conf={confidence:.3f}) [{record['result']}]") except Exception as e: results["errors"] += 1 print(f" [ERR] {fname:50s} => ERROR: {e}") results["details"].append({ "file": fname, "ground_truth": gt, "prediction": "ERROR", "error": str(e) }) print("[1/2] Processing FAKE images...") process_batch(fake_images, "fake") print("\n[2/2] Processing REAL images...") process_batch(real_images, "real") # Calculate metrics total_fake = len(fake_images) total_real = len(real_images) total = total_fake + total_real tp, fp, tn, fn = results["tp"], results["fp"], results["tn"], results["fn"] uncertain = results["uncertain_fake"] + results["uncertain_real"] accuracy = (tp + tn) / total if total > 0 else 0 precision = tp / (tp + fp) if (tp + fp) > 0 else 0 recall = tp / (tp + fn) if (tp + fn) > 0 else 0 specificity = tn / (tn + fp) if (tn + fp) > 0 else 0 f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 print(f"\n{'='*70}") print("EVALUATION RESULTS") print(f"{'='*70}") print(f" Total Samples: {total}") print(f" Correct: {tp + tn} ({(tp+tn)/total*100:.1f}%)") print(f" Errors: {results['errors']}") print(f" Uncertain: {uncertain}") print() print(f" True Positives: {tp}") print(f" True Negatives: {tn}") print(f" False Positives: {fp}") print(f" False Negatives: {fn}") print() print(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") print(f" Precision: {precision:.4f}") print(f" Recall/Sensitivity: {recall:.4f}") print(f" Specificity: {specificity:.4f}") print(f" F1 Score: {f1:.4f}") print(f"{'='*70}\n") # Per-class breakdown print("PER-CLASS BREAKDOWN:") fake_correct = results["tp"] fake_wrong = results["fn"] + results["uncertain_fake"] real_correct = results["tn"] real_wrong = results["fp"] + results["uncertain_real"] print(f" Fake Detection Rate: {fake_correct}/{total_fake} = {fake_correct/total_fake*100:.1f}%") print(f" Real Detection Rate: {real_correct}/{total_real} = {real_correct/total_real*100:.1f}%") print() return results def main(): parser = argparse.ArgumentParser(description="Evaluate TrueLens Forensic Suite") parser.add_argument("--fake-dir", default="data/v1_fake", help="Directory with fake images") parser.add_argument("--real-dir", default="data/v1_real", help="Directory with real images") args = parser.parse_args() print("Initializing Forensic Scanner...") scanner = ForensicScanner(mode="CPU") if not os.path.exists(args.fake_dir): print(f"ERROR: Fake directory not found: {args.fake_dir}") sys.exit(1) if not os.path.exists(args.real_dir): print(f"ERROR: Real directory not found: {args.real_dir}") sys.exit(1) evaluate(scanner, args.fake_dir, args.real_dir) if __name__ == "__main__": main()