|
|
|
|
|
import os |
|
|
import json |
|
|
from tqdm import tqdm |
|
|
import torch |
|
|
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction |
|
|
from src.utils import count_encoder_decoder_params, load_experiment |
|
|
from src.inference import load_image, generate_caption |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
from pycocoevalcap.cider.cider import Cider |
|
|
from pycocoevalcap.rouge.rouge import Rouge |
|
|
HAS_COCOEVAL = True |
|
|
except ImportError: |
|
|
print("WARNING: pycocoevalcap not installed → CIDEr/ROUGE disabled.") |
|
|
HAS_COCOEVAL = False |
|
|
|
|
|
|
|
|
def evaluate(model, tokenizer, preprocess, image_size, data_dir="data/processed", save_dir="checkpoints", device="cuda"): |
|
|
|
|
|
|
|
|
captions_path = os.path.join(data_dir, "captions.json") |
|
|
splits_path = os.path.join(data_dir, "splits.json") |
|
|
|
|
|
captions = json.load(open(captions_path)) |
|
|
splits = json.load(open(splits_path)) |
|
|
val_ids = splits["val"] |
|
|
|
|
|
preds = [] |
|
|
refs_tokenized = [] |
|
|
refs_strings = [] |
|
|
|
|
|
print(f"Running evaluation on {len(val_ids)} images…\n") |
|
|
with torch.no_grad(): |
|
|
for idx, img_id in enumerate(tqdm(val_ids, desc="Evaluating")): |
|
|
img_path = os.path.join(data_dir, "images", f"{int(img_id):012d}.jpg") |
|
|
|
|
|
img_tensor = load_image(img_path, preprocess).to(device) |
|
|
|
|
|
|
|
|
pred_caption = generate_caption(model, tokenizer, img_tensor, device=device) |
|
|
gt_caps = captions[str(img_id)]["captions"] |
|
|
|
|
|
|
|
|
refs_tokenized.append([c.split() for c in gt_caps]) |
|
|
|
|
|
|
|
|
refs_strings.append(gt_caps) |
|
|
|
|
|
preds.append(pred_caption) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\nSample Predictions:\n") |
|
|
num_examples = 20 |
|
|
for i in range(min(num_examples, len(preds))): |
|
|
img_id = val_ids[i] |
|
|
print(f"Image ID: {img_id}") |
|
|
print(f"Prediction: {preds[i]}") |
|
|
print(f"Ground Truths:") |
|
|
for ref in refs_strings[i]: |
|
|
print(f" - {ref}") |
|
|
print("-" * 60) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
smoothie = SmoothingFunction().method3 |
|
|
|
|
|
bleu1 = corpus_bleu( |
|
|
refs_tokenized, [p.split() for p in preds], |
|
|
weights=(1, 0, 0, 0), |
|
|
smoothing_function=smoothie |
|
|
) |
|
|
|
|
|
bleu4 = corpus_bleu( |
|
|
refs_tokenized, [p.split() for p in preds], |
|
|
weights=(0.25, 0.25, 0.25, 0.25), |
|
|
smoothing_function=smoothie |
|
|
) |
|
|
|
|
|
scores = {"BLEU-1": bleu1, "BLEU-4": bleu4} |
|
|
|
|
|
|
|
|
if HAS_COCOEVAL: |
|
|
cider_refs = {} |
|
|
cider_preds = {} |
|
|
|
|
|
for i in range(len(preds)): |
|
|
img_id = val_ids[i] |
|
|
cid = str(int(img_id)) |
|
|
cider_refs[cid] = captions[cid]["captions"] |
|
|
cider_preds[cid] = [preds[i]] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cider = Cider() |
|
|
cider_score, _ = cider.compute_score(cider_refs, cider_preds) |
|
|
scores["CIDEr"] = cider_score |
|
|
|
|
|
rouge = Rouge() |
|
|
rouge_score, _ = rouge.compute_score(cider_refs, cider_preds) |
|
|
scores["ROUGE-L"] = rouge_score |
|
|
|
|
|
|
|
|
samples_full = [] |
|
|
for i in range(len(preds)): |
|
|
img_id = val_ids[i] |
|
|
samples_full.append({ |
|
|
"id": int(img_id), |
|
|
"prediction": preds[i], |
|
|
"references": refs_strings[i], |
|
|
"image": f"{int(img_id):012d}.jpg", |
|
|
}) |
|
|
|
|
|
|
|
|
samples_preview = samples_full[:20] |
|
|
|
|
|
param_info = count_encoder_decoder_params(model) |
|
|
|
|
|
out_path = os.path.join(save_dir, "eval_results.json") |
|
|
|
|
|
with open(out_path, "w") as f: |
|
|
json.dump({ |
|
|
"scores": scores, |
|
|
"derived_params": param_info, |
|
|
"samples_preview": samples_preview, |
|
|
"samples_full": samples_full |
|
|
}, f, indent=2) |
|
|
|
|
|
|
|
|
print("\nEvaluation Scores:") |
|
|
for k, v in scores.items(): |
|
|
print(f"{k}: {v:.4f}") |
|
|
|
|
|
print(f"\nSaved detailed results to: {out_path}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import argparse |
|
|
parser = argparse.ArgumentParser() |
|
|
|
|
|
parser.add_argument("--checkpoint", type=str, default="checkpoints/vision_t5") |
|
|
parser.add_argument("--data_dir", type=str, default="data/processed") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Device: {device}") |
|
|
|
|
|
model, tokenizer, meta, config = load_experiment(args.checkpoint, device=device) |
|
|
image_size = config["model"].get("image_size", 224) |
|
|
preprocess = build_coco_transform(image_size=image_size) |
|
|
|
|
|
evaluate( |
|
|
model, |
|
|
tokenizer, |
|
|
preprocess=preprocess, |
|
|
data_dir=args.data_dir, |
|
|
save_dir=args.checkpoint, |
|
|
device=device |
|
|
) |
|
|
|
|
|
|