Authentica / detree /utils /detectors /binoculars_evaluation.py
MAS-AI-0000's picture
Upload 6 files
3cdaafb verified
import argparse
import json
import logging
import random
from pathlib import Path
from typing import Sequence
import numpy as np
import torch
from tqdm import tqdm
from .binoculars_detector import Binoculars
from ..utils import evaluate_metrics
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
_LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
def load_jsonl(file_path):
out = []
with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
for line in jsonl_file:
item = json.loads(line)
out.append(item)
print(f"Loaded {len(out)} examples from {file_path}")
return out
def dict2str(metrics):
out_str=''
for key in metrics.keys():
out_str+=f"{key}:{metrics[key]} "
return out_str
def experiment(args):
# Initialize Binoculars (experiments in paper use the "accuracy" mode threshold wherever applicable)
bino = Binoculars(mode="accuracy", max_token_observed=args.tokens_seen)
logging.info(f"Test in {args.test_data_path}")
test_data = load_jsonl(args.test_data_path)
random.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
random.shuffle(test_data)
predictions = []
labels = []
for i, item in tqdm(enumerate(test_data), total=len(test_data)):
text = item["text"]
label = item["label"]
src = item["src"]
bino_score = -bino.compute_score(text)
if bino_score is None or np.isnan(bino_score) or np.isinf(bino_score):
bino_score = 0
if 'human' in src:
labels.append(0)
else:
labels.append(1)
predictions.append(bino_score)
metric = evaluate_metrics(labels, predictions)
print(dict2str(metric))
with _LOG_PATH.open("a+", encoding="utf-8") as f:
f.write(f"binoculars {args.test_data_path}\n")
f.write(f"{dict2str(metric)}\n")
def build_argument_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument(
'--test_data_path',
type=str,
default='/path/to/RealBench/Deepfake/no_attack/test.jsonl',
help="Path to the test data. could be several files with ','. Note that the data should have been perturbed.",
)
parser.add_argument("--tokens_seen", type=int, default=512, help="Number of tokens seen by the model")
parser.add_argument('--DEVICE', default="cuda", type=str, required=False)
parser.add_argument('--seed', default=2023, type=int, required=False)
return parser
def main(argv: Sequence[str] | None = None) -> None:
parser = build_argument_parser()
args = parser.parse_args(argv)
experiment(args)
if __name__ == '__main__':
main()