Spaces:

evanec
/

coco-demo

Running

File size: 6,136 Bytes
# eval.py
import os
import json
from tqdm import tqdm
import torch
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from src.utils import count_encoder_decoder_params, load_experiment
from src.inference import load_image, generate_caption
from PIL import Image


# COCO metrics
try:
    from pycocoevalcap.cider.cider import Cider
    from pycocoevalcap.rouge.rouge import Rouge
    HAS_COCOEVAL = True
except ImportError:
    print("WARNING: pycocoevalcap not installed → CIDEr/ROUGE disabled.")
    HAS_COCOEVAL = False


def evaluate(model, tokenizer, preprocess, image_size, data_dir="data/processed", save_dir="checkpoints", device="cuda"):


    captions_path = os.path.join(data_dir, "captions.json")
    splits_path = os.path.join(data_dir, "splits.json")

    captions = json.load(open(captions_path))
    splits = json.load(open(splits_path))
    val_ids = splits["val"]

    preds = []
    refs_tokenized = []   # for BLEU
    refs_strings = []     # for JSON log

    print(f"Running evaluation on {len(val_ids)} images…\n")
    with torch.no_grad():
        for idx, img_id in enumerate(tqdm(val_ids, desc="Evaluating")):
            img_path = os.path.join(data_dir, "images", f"{int(img_id):012d}.jpg")

            img_tensor = load_image(img_path, preprocess).to(device)


            pred_caption = generate_caption(model, tokenizer, img_tensor, device=device)
            gt_caps = captions[str(img_id)]["captions"]

            # Tokenized refs for BLEU
            refs_tokenized.append([c.split() for c in gt_caps])

            # String refs for JSON
            refs_strings.append(gt_caps)

            preds.append(pred_caption)

            #if idx >= 20:
            #    break

        # Print 20 sample predictions
        print("\nSample Predictions:\n")
        num_examples = 20
        for i in range(min(num_examples, len(preds))):
            img_id = val_ids[i]
            print(f"Image ID: {img_id}")
            print(f"Prediction: {preds[i]}")
            print(f"Ground Truths:")
            for ref in refs_strings[i]:
                print(f"  - {ref}")
            print("-" * 60)


        #print("Number of preds:", len(preds))
        #print("Number of refs_tokenized:", len(refs_tokenized))
        #print("Example hypothesis:", preds[0])
        #print("Example hypothesis tokens:", preds[0].split())
        #print("Example references:", refs_strings[0])
        #print("Example references tokenized:", refs_tokenized[0])

        #if HAS_COCOEVAL:
            # Show first 2 examples only
        #    for i in range(min(2, len(preds))):
        #        img_id = str(int(val_ids[i]))
        #        print(f"\nImage ID: {img_id}")
        #        print("  COCOEvalCap refs (list of strings):")
        #        print(" ", captions[img_id]["captions"])
        #        print("  COCOEvalCap pred:")
        #        print(" ", preds[i])

            
        # BLEU
        smoothie = SmoothingFunction().method3

        bleu1 = corpus_bleu(
            refs_tokenized, [p.split() for p in preds],
            weights=(1, 0, 0, 0),
            smoothing_function=smoothie
        )

        bleu4 = corpus_bleu(
            refs_tokenized, [p.split() for p in preds],
            weights=(0.25, 0.25, 0.25, 0.25),
            smoothing_function=smoothie
        )

        scores = {"BLEU-1": bleu1, "BLEU-4": bleu4}

        # CIDEr / ROUGE
        if HAS_COCOEVAL:
            cider_refs = {}
            cider_preds = {}

            for i in range(len(preds)):
                img_id = val_ids[i]
                cid = str(int(img_id))  
                cider_refs[cid] = captions[cid]["captions"]
                cider_preds[cid] = [preds[i]]

            #keys = list(cider_refs.keys())[:5]
            #for k in keys:
            #    print(f"{k}: {cider_refs[k]}")

            #keys = list(cider_preds.keys())[:5]
            #for k in keys:
            #    print(f"{k}: {cider_preds[k]}")
        
            cider = Cider()
            cider_score, _ = cider.compute_score(cider_refs, cider_preds)
            scores["CIDEr"] = cider_score

            rouge = Rouge()
            rouge_score, _ = rouge.compute_score(cider_refs, cider_preds)
            scores["ROUGE-L"] = rouge_score

        # Save all samples
        samples_full = []
        for i in range(len(preds)):
            img_id = val_ids[i]
            samples_full.append({
                "id": int(img_id),
                "prediction": preds[i],
                "references": refs_strings[i],
                "image": f"{int(img_id):012d}.jpg",
            })

        # Save a preview subset (first 20)
        samples_preview = samples_full[:20]
        
        param_info = count_encoder_decoder_params(model)

        out_path = os.path.join(save_dir, "eval_results.json")

        with open(out_path, "w") as f:
            json.dump({
                "scores": scores,
                "derived_params": param_info,
                "samples_preview": samples_preview,
                "samples_full": samples_full
            }, f, indent=2)

        # Print final scores
        print("\nEvaluation Scores:")
        for k, v in scores.items():
            print(f"{k}: {v:.4f}")

        print(f"\nSaved detailed results to: {out_path}")


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument("--checkpoint", type=str, default="checkpoints/vision_t5")
    parser.add_argument("--data_dir", type=str, default="data/processed")

    args = parser.parse_args()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Device: {device}")

    model, tokenizer, meta, config = load_experiment(args.checkpoint, device=device)
    image_size = config["model"].get("image_size", 224)
    preprocess = build_coco_transform(image_size=image_size)

    evaluate(
        model,
        tokenizer,
        preprocess=preprocess,
        data_dir=args.data_dir,
        save_dir=args.checkpoint,
        device=device
    )
    
    # python evaluate.py --checkpoint checkpoints/vision_t5/20251117_171912