#!/usr/bin/env python """EC-SimToken standalone evaluation: score distribution + threshold sweep. Loads a saved checkpoint and reports: 1. p_exist distribution per split (mean/median/p10/p25/p75/p90) 2. AUC-ROC (test_n as null class vs test_s+test_u as positive class) 3. Threshold sweep 0.05→0.95: J&F, Null_S, null_tp_rate, positive_fnr Usage: cd /workspace/SimToken python tools/ec_simtoken_eval.py \ --checkpoint checkpoints/ec_simtoken/ec_simtoken_v1_ep2.pth \ --out_dir runs/ec_simtoken/eval_ep2 """ from __future__ import annotations import argparse, os, sys from functools import partial import numpy as np import torch import transformers from peft import LoraConfig, get_peft_model from torch.utils.data import DataLoader from transformers import AutoConfig from tqdm import tqdm ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, ROOT) os.environ["CUDA_VISIBLE_DEVICES"] = "0" from datasets.dataset_refavs import REFAVS from models.ec_simtoken_model import ECSimtoken_ForCausalLM from utils import utility # ── Defaults (match training command) ──────────────────────────────────────── MLLM = "/workspace/hf_models/Chat-UniVi-7B-v1.5" SAM_CKPT = "/workspace/SimToken/models/segment_anything/sam_vit_h_4b8939.pth" VISION_TOWER = "/workspace/hf_models/clip-vit-large-patch14" DATA_DIR = "data" IGNORE_INDEX = -100 IMAGE_TOKEN_INDEX = -200 AUDIO_TOKEN_INDEX = -300 import re def tokenizer_image_audio_token(prompt, tokenizer, image_token_index=IMAGE_TOKEN_INDEX, audio_token_index=AUDIO_TOKEN_INDEX, num_frames=10, return_tensors=None): prompt_chunks = re.split(r'(|