Spaces:

MAS-AI-0000
/

Authentica

Sleeping

File size: 2,943 Bytes

3cdaafb

import argparse
import json
import logging
import random
from pathlib import Path
from typing import Sequence

import numpy as np
import torch
from tqdm import tqdm

from .binoculars_detector import Binoculars
from ..utils import evaluate_metrics

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")

_LOG_PATH = Path(__file__).resolve().parents[3] / "runs" / "val-other_detector.txt"
_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

def load_jsonl(file_path):
    out = []
    with open(file_path, mode='r', encoding='utf-8') as jsonl_file:
        for line in jsonl_file:
            item = json.loads(line)
            out.append(item)
    print(f"Loaded {len(out)} examples from {file_path}")
    return out

def dict2str(metrics):
    out_str=''
    for key in metrics.keys():
        out_str+=f"{key}:{metrics[key]} "
    return out_str

def experiment(args):
    # Initialize Binoculars (experiments in paper use the "accuracy" mode threshold wherever applicable)
    bino = Binoculars(mode="accuracy", max_token_observed=args.tokens_seen)

    logging.info(f"Test in {args.test_data_path}")
    test_data = load_jsonl(args.test_data_path)

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.shuffle(test_data)
    predictions = []
    labels = []
    for i, item in tqdm(enumerate(test_data), total=len(test_data)):
        text = item["text"]
        label = item["label"]
        src = item["src"]
        bino_score = -bino.compute_score(text)
        
        if bino_score is None or np.isnan(bino_score) or np.isinf(bino_score):
            bino_score = 0
        if 'human' in src:
            labels.append(0)
        else:
            labels.append(1)
        predictions.append(bino_score)
    metric = evaluate_metrics(labels, predictions)
    print(dict2str(metric))
    with _LOG_PATH.open("a+", encoding="utf-8") as f:
        f.write(f"binoculars  {args.test_data_path}\n")
        f.write(f"{dict2str(metric)}\n")


def build_argument_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--test_data_path',
        type=str,
        default='/path/to/RealBench/Deepfake/no_attack/test.jsonl',
        help="Path to the test data. could be several files with ','. Note that the data should have been perturbed.",
    )
    parser.add_argument("--tokens_seen", type=int, default=512, help="Number of tokens seen by the model")
    parser.add_argument('--DEVICE', default="cuda", type=str, required=False)
    parser.add_argument('--seed', default=2023, type=int, required=False)
    return parser


def main(argv: Sequence[str] | None = None) -> None:
    parser = build_argument_parser()
    args = parser.parse_args(argv)
    experiment(args)


if __name__ == '__main__':
    main()