Spaces:
Running
Running
File size: 3,524 Bytes
a745a5e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import os
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
from dataset_advanced import COCODataset
from torch.utils.data import random_split
from tqdm import tqdm
from PIL import Image
from pycocoevalcap.cider.cider import Cider
def generate_caption(model, processor, image, device,
num_beams=5,
max_length=20,
length_penalty=1.0):
inputs = processor(images=image, return_tensors="pt").to(device)
with torch.no_grad():
generated_ids = model.generate(
**inputs,
num_beams=num_beams,
max_length=max_length,
length_penalty=length_penalty
)
caption = processor.decode(
generated_ids[0],
skip_special_tokens=True
)
return caption
def evaluate_config(model, processor, val_dataset, device,
num_beams, max_length, length_penalty,
max_samples=200):
model.eval()
cider_scorer = Cider()
ground_truth = {}
predictions = {}
print(f"\nTesting: beams={num_beams}, "
f"max_len={max_length}, "
f"len_penalty={length_penalty}")
for idx in tqdm(range(min(max_samples, len(val_dataset)))):
real_idx = val_dataset.indices[idx]
ann = val_dataset.dataset.annotations[real_idx]
image_path = os.path.join("train2017", ann["image"])
image = Image.open(image_path).convert("RGB")
pred_caption = generate_caption(
model,
processor,
image,
device,
num_beams=num_beams,
max_length=max_length,
length_penalty=length_penalty
)
ground_truth[idx] = ann["captions"]
predictions[idx] = [pred_caption]
score, _ = cider_scorer.compute_score(ground_truth, predictions)
print(f"CIDEr: {score:.4f}")
model.train()
return score
def main():
if not torch.backends.mps.is_available():
raise RuntimeError("MPS not available.")
device = torch.device("mps")
print("Using device:", device)
# Load best Phase 2 model
model_dir = "saved_model_phase2"
processor = BlipProcessor.from_pretrained(model_dir)
model = BlipForConditionalGeneration.from_pretrained(model_dir)
model.to(device)
# Load validation split
full_dataset = COCODataset(
"annotations/subset_10k.jsonl",
"train2017",
processor
)
train_size = int(0.9 * len(full_dataset))
val_size = len(full_dataset) - train_size
_, val_dataset = random_split(
full_dataset,
[train_size, val_size]
)
# =========================
# Experiment Grid
# =========================
beam_sizes = [5]
max_lengths = [20]
length_penalties = [1.0]
results = []
for beams in beam_sizes:
for max_len in max_lengths:
for lp in length_penalties:
score = evaluate_config(
model,
processor,
val_dataset,
device,
num_beams=beams,
max_length=max_len,
length_penalty=lp
)
results.append((beams, max_len, lp, score))
print("\n===== FINAL RESULTS =====")
for r in results:
print(f"Beams={r[0]}, MaxLen={r[1]}, "
f"LenPenalty={r[2]} -> CIDEr={r[3]:.4f}")
if __name__ == "__main__":
main() |