Spaces:
Sleeping
Sleeping
| """ | |
| TrueLens Forensic Suite - Batch Evaluation Script | |
| ================================================ | |
| Run this to evaluate the local forensic engine on your test dataset. | |
| Usage: | |
| python evaluate.py --fake-dir "data/v1_fake" --real-dir "data/v1_real" | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| import time | |
| from pathlib import Path | |
| from PIL import Image | |
| from src.engine.core_engine import ForensicScanner | |
| def load_images_from_dir(directory: str): | |
| """Load all image files from a directory.""" | |
| exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'} | |
| files = [] | |
| for f in sorted(os.listdir(directory)): | |
| if Path(f).suffix.lower() in exts: | |
| files.append(os.path.join(directory, f)) | |
| return files | |
| def evaluate(scanner: ForensicScanner, fake_dir: str, real_dir: str): | |
| fake_images = load_images_from_dir(fake_dir) | |
| real_images = load_images_from_dir(real_dir) | |
| print(f"\n{'='*70}") | |
| print("TRUELENS FORENSIC SUITE — BATCH EVALUATION") | |
| print(f"{'='*70}") | |
| print(f"Mode: Local Forensic Engine") | |
| print(f"Fake samples: {len(fake_images)}") | |
| print(f"Real samples: {len(real_images)}") | |
| print(f"{'='*70}\n") | |
| results = { | |
| "tp": 0, "fp": 0, "tn": 0, "fn": 0, | |
| "uncertain_fake": 0, "uncertain_real": 0, | |
| "errors": 0, | |
| "details": [] | |
| } | |
| def process_batch(images, ground_truth): | |
| gt = ground_truth.lower() | |
| for path in images: | |
| fname = os.path.basename(path) | |
| try: | |
| start = time.time() | |
| label, confidence, status, margin, detail = scanner.predict_image(path) | |
| elapsed = time.time() - start | |
| pred = label.lower() | |
| record = { | |
| "file": fname, | |
| "ground_truth": gt, | |
| "prediction": pred, | |
| "confidence": confidence, | |
| "status": status, | |
| "latency": f"{elapsed:.2f}s" | |
| } | |
| if pred == "fake": | |
| if gt == "fake": | |
| results["tp"] += 1 | |
| record["result"] = "CORRECT" | |
| else: | |
| results["fp"] += 1 | |
| record["result"] = "FALSE POSITIVE" | |
| elif pred == "real": | |
| if gt == "real": | |
| results["tn"] += 1 | |
| record["result"] = "CORRECT" | |
| else: | |
| results["fn"] += 1 | |
| record["result"] = "FALSE NEGATIVE" | |
| else: # uncertain | |
| record["result"] = "UNCERTAIN" | |
| if gt == "fake": | |
| results["uncertain_fake"] += 1 | |
| else: | |
| results["uncertain_real"] += 1 | |
| results["details"].append(record) | |
| # Print live result | |
| symbol = "OK" if record["result"] == "CORRECT" else "XX" | |
| print(f" [{symbol}] {fname:50s} => {pred.upper():10s} (conf={confidence:.3f}) [{record['result']}]") | |
| except Exception as e: | |
| results["errors"] += 1 | |
| print(f" [ERR] {fname:50s} => ERROR: {e}") | |
| results["details"].append({ | |
| "file": fname, | |
| "ground_truth": gt, | |
| "prediction": "ERROR", | |
| "error": str(e) | |
| }) | |
| print("[1/2] Processing FAKE images...") | |
| process_batch(fake_images, "fake") | |
| print("\n[2/2] Processing REAL images...") | |
| process_batch(real_images, "real") | |
| # Calculate metrics | |
| total_fake = len(fake_images) | |
| total_real = len(real_images) | |
| total = total_fake + total_real | |
| tp, fp, tn, fn = results["tp"], results["fp"], results["tn"], results["fn"] | |
| uncertain = results["uncertain_fake"] + results["uncertain_real"] | |
| accuracy = (tp + tn) / total if total > 0 else 0 | |
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0 | |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 | |
| specificity = tn / (tn + fp) if (tn + fp) > 0 else 0 | |
| f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 | |
| print(f"\n{'='*70}") | |
| print("EVALUATION RESULTS") | |
| print(f"{'='*70}") | |
| print(f" Total Samples: {total}") | |
| print(f" Correct: {tp + tn} ({(tp+tn)/total*100:.1f}%)") | |
| print(f" Errors: {results['errors']}") | |
| print(f" Uncertain: {uncertain}") | |
| print() | |
| print(f" True Positives: {tp}") | |
| print(f" True Negatives: {tn}") | |
| print(f" False Positives: {fp}") | |
| print(f" False Negatives: {fn}") | |
| print() | |
| print(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)") | |
| print(f" Precision: {precision:.4f}") | |
| print(f" Recall/Sensitivity: {recall:.4f}") | |
| print(f" Specificity: {specificity:.4f}") | |
| print(f" F1 Score: {f1:.4f}") | |
| print(f"{'='*70}\n") | |
| # Per-class breakdown | |
| print("PER-CLASS BREAKDOWN:") | |
| fake_correct = results["tp"] | |
| fake_wrong = results["fn"] + results["uncertain_fake"] | |
| real_correct = results["tn"] | |
| real_wrong = results["fp"] + results["uncertain_real"] | |
| print(f" Fake Detection Rate: {fake_correct}/{total_fake} = {fake_correct/total_fake*100:.1f}%") | |
| print(f" Real Detection Rate: {real_correct}/{total_real} = {real_correct/total_real*100:.1f}%") | |
| print() | |
| return results | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Evaluate TrueLens Forensic Suite") | |
| parser.add_argument("--fake-dir", default="data/v1_fake", help="Directory with fake images") | |
| parser.add_argument("--real-dir", default="data/v1_real", help="Directory with real images") | |
| args = parser.parse_args() | |
| print("Initializing Forensic Scanner...") | |
| scanner = ForensicScanner(mode="CPU") | |
| if not os.path.exists(args.fake_dir): | |
| print(f"ERROR: Fake directory not found: {args.fake_dir}") | |
| sys.exit(1) | |
| if not os.path.exists(args.real_dir): | |
| print(f"ERROR: Real directory not found: {args.real_dir}") | |
| sys.exit(1) | |
| evaluate(scanner, args.fake_dir, args.real_dir) | |
| if __name__ == "__main__": | |
| main() | |