TrueLens-Forensics / evaluate.py
abhisonu01's picture
Upload folder using huggingface_hub
5f1d750 verified
"""
TrueLens Forensic Suite - Batch Evaluation Script
================================================
Run this to evaluate the local forensic engine on your test dataset.
Usage:
python evaluate.py --fake-dir "data/v1_fake" --real-dir "data/v1_real"
"""
import os
import sys
import argparse
import time
from pathlib import Path
from PIL import Image
from src.engine.core_engine import ForensicScanner
def load_images_from_dir(directory: str):
"""Load all image files from a directory."""
exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp'}
files = []
for f in sorted(os.listdir(directory)):
if Path(f).suffix.lower() in exts:
files.append(os.path.join(directory, f))
return files
def evaluate(scanner: ForensicScanner, fake_dir: str, real_dir: str):
fake_images = load_images_from_dir(fake_dir)
real_images = load_images_from_dir(real_dir)
print(f"\n{'='*70}")
print("TRUELENS FORENSIC SUITE — BATCH EVALUATION")
print(f"{'='*70}")
print(f"Mode: Local Forensic Engine")
print(f"Fake samples: {len(fake_images)}")
print(f"Real samples: {len(real_images)}")
print(f"{'='*70}\n")
results = {
"tp": 0, "fp": 0, "tn": 0, "fn": 0,
"uncertain_fake": 0, "uncertain_real": 0,
"errors": 0,
"details": []
}
def process_batch(images, ground_truth):
gt = ground_truth.lower()
for path in images:
fname = os.path.basename(path)
try:
start = time.time()
label, confidence, status, margin, detail = scanner.predict_image(path)
elapsed = time.time() - start
pred = label.lower()
record = {
"file": fname,
"ground_truth": gt,
"prediction": pred,
"confidence": confidence,
"status": status,
"latency": f"{elapsed:.2f}s"
}
if pred == "fake":
if gt == "fake":
results["tp"] += 1
record["result"] = "CORRECT"
else:
results["fp"] += 1
record["result"] = "FALSE POSITIVE"
elif pred == "real":
if gt == "real":
results["tn"] += 1
record["result"] = "CORRECT"
else:
results["fn"] += 1
record["result"] = "FALSE NEGATIVE"
else: # uncertain
record["result"] = "UNCERTAIN"
if gt == "fake":
results["uncertain_fake"] += 1
else:
results["uncertain_real"] += 1
results["details"].append(record)
# Print live result
symbol = "OK" if record["result"] == "CORRECT" else "XX"
print(f" [{symbol}] {fname:50s} => {pred.upper():10s} (conf={confidence:.3f}) [{record['result']}]")
except Exception as e:
results["errors"] += 1
print(f" [ERR] {fname:50s} => ERROR: {e}")
results["details"].append({
"file": fname,
"ground_truth": gt,
"prediction": "ERROR",
"error": str(e)
})
print("[1/2] Processing FAKE images...")
process_batch(fake_images, "fake")
print("\n[2/2] Processing REAL images...")
process_batch(real_images, "real")
# Calculate metrics
total_fake = len(fake_images)
total_real = len(real_images)
total = total_fake + total_real
tp, fp, tn, fn = results["tp"], results["fp"], results["tn"], results["fn"]
uncertain = results["uncertain_fake"] + results["uncertain_real"]
accuracy = (tp + tn) / total if total > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
print(f"\n{'='*70}")
print("EVALUATION RESULTS")
print(f"{'='*70}")
print(f" Total Samples: {total}")
print(f" Correct: {tp + tn} ({(tp+tn)/total*100:.1f}%)")
print(f" Errors: {results['errors']}")
print(f" Uncertain: {uncertain}")
print()
print(f" True Positives: {tp}")
print(f" True Negatives: {tn}")
print(f" False Positives: {fp}")
print(f" False Negatives: {fn}")
print()
print(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
print(f" Precision: {precision:.4f}")
print(f" Recall/Sensitivity: {recall:.4f}")
print(f" Specificity: {specificity:.4f}")
print(f" F1 Score: {f1:.4f}")
print(f"{'='*70}\n")
# Per-class breakdown
print("PER-CLASS BREAKDOWN:")
fake_correct = results["tp"]
fake_wrong = results["fn"] + results["uncertain_fake"]
real_correct = results["tn"]
real_wrong = results["fp"] + results["uncertain_real"]
print(f" Fake Detection Rate: {fake_correct}/{total_fake} = {fake_correct/total_fake*100:.1f}%")
print(f" Real Detection Rate: {real_correct}/{total_real} = {real_correct/total_real*100:.1f}%")
print()
return results
def main():
parser = argparse.ArgumentParser(description="Evaluate TrueLens Forensic Suite")
parser.add_argument("--fake-dir", default="data/v1_fake", help="Directory with fake images")
parser.add_argument("--real-dir", default="data/v1_real", help="Directory with real images")
args = parser.parse_args()
print("Initializing Forensic Scanner...")
scanner = ForensicScanner(mode="CPU")
if not os.path.exists(args.fake_dir):
print(f"ERROR: Fake directory not found: {args.fake_dir}")
sys.exit(1)
if not os.path.exists(args.real_dir):
print(f"ERROR: Real directory not found: {args.real_dir}")
sys.exit(1)
evaluate(scanner, args.fake_dir, args.real_dir)
if __name__ == "__main__":
main()