MuhammedKsee commited on
Commit
e1af245
·
verified ·
1 Parent(s): a11ccb2

Upload utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. utils.py +118 -0
utils.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import matplotlib.pyplot as plt
4
+ from PIL import Image
5
+ from torchvision import transforms
6
+ from transformers import CLIPTokenizer
7
+ from config import HParams
8
+ from model import CLIP
9
+
10
+ # Load Tokenizer (Must match the training tokenizer)
11
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch16")
12
+
13
+ # Define Transform (Resize + CenterCrop + Normalize)
14
+ test_transform = transforms.Compose([
15
+ transforms.Resize(HParams.IMAGE_SIZE),
16
+ transforms.CenterCrop(HParams.IMAGE_SIZE),
17
+ transforms.ToTensor(),
18
+ transforms.Normalize((0.481, 0.457, 0.408), (0.268, 0.261, 0.275))
19
+ ])
20
+
21
+ def load_model(model_path=HParams.MODEL_PATH, device=HParams.DEVICE):
22
+ print(f"⚙️ Device: {device}")
23
+ model = CLIP(len(tokenizer)).to(device)
24
+
25
+ if os.path.exists(model_path):
26
+ print(f"📂 Loading model weights from: {model_path}")
27
+ ckpt = torch.load(model_path, map_location=device)
28
+
29
+ # Clean '_orig_mod.' prefix if the model was compiled during training
30
+ state_dict = ckpt['model'] if 'model' in ckpt else ckpt
31
+ new_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
32
+
33
+ try:
34
+ model.load_state_dict(new_state_dict, strict=False)
35
+ model.eval()
36
+
37
+ # OPTIMIZATION: Enable FP16 (Half Precision) if on CUDA
38
+ if device == "cuda":
39
+ model = model.half()
40
+
41
+ print("✅ Model loaded successfully!")
42
+ return model
43
+ except Exception as e:
44
+ print(f"❌ Error loading weights: {e}")
45
+ return None
46
+ else:
47
+ print(f"❌ Model file not found: {model_path}")
48
+ print(" -> Please download 'best_model.pt' from Hugging Face and place it in the root directory.")
49
+ return None
50
+
51
+ def predict(model, image_path, text_options):
52
+ if not os.path.exists(image_path):
53
+ print(f"❌ Image file not found: {image_path}")
54
+ return
55
+
56
+ # 1. Prepare Image
57
+ try:
58
+ img_pil = Image.open(image_path).convert("RGB")
59
+ except:
60
+ print("❌ Failed to open image file.")
61
+ return
62
+
63
+ img_tensor = test_transform(img_pil).unsqueeze(0).to(HParams.DEVICE)
64
+
65
+ # Convert to FP16 if using CUDA
66
+ if HParams.DEVICE == "cuda":
67
+ img_tensor = img_tensor.half()
68
+
69
+ # 2. Prepare Text
70
+ text_inputs = tokenizer(
71
+ text_options,
72
+ padding="max_length",
73
+ max_length=HParams.MAX_TOKENS,
74
+ truncation=True,
75
+ return_tensors="pt"
76
+ ).to(HParams.DEVICE)
77
+
78
+ # 3. Inference
79
+ with torch.no_grad():
80
+ img_features = model.visual(img_tensor)
81
+ text_features = model.text(text_inputs["input_ids"], text_inputs["attention_mask"])
82
+
83
+ # Normalization
84
+ img_features /= img_features.norm(dim=-1, keepdim=True)
85
+ text_features /= text_features.norm(dim=-1, keepdim=True)
86
+
87
+ # Calculate Similarity
88
+ similarity = (100.0 * img_features @ text_features.T).softmax(dim=-1)
89
+ values, indices = similarity[0].topk(len(text_options))
90
+
91
+ # 4. Visualize Results
92
+ plt.figure(figsize=(12, 6))
93
+
94
+ # Show Image
95
+ plt.subplot(1, 2, 1)
96
+ plt.imshow(img_pil)
97
+ plt.axis("off")
98
+ plt.title("Input Image")
99
+
100
+ # Show Chart
101
+ plt.subplot(1, 2, 2)
102
+ scores = values.cpu().float().numpy() * 100
103
+ labels = [text_options[idx] for idx in indices.cpu().numpy()]
104
+
105
+ # Color logic: Green for >50%, Blue for others
106
+ colors = ['#4CAF50' if s > 50 else '#2196F3' for s in scores]
107
+ plt.barh(range(len(labels)), scores, color=colors)
108
+ plt.yticks(range(len(labels)), labels)
109
+ plt.xlabel('Confidence Score (%)')
110
+ plt.xlim(0, 100)
111
+ plt.gca().invert_yaxis() # Display highest score at top
112
+
113
+ # Add labels to bars
114
+ for i, v in enumerate(scores):
115
+ plt.text(v + 1, i, f"{v:.1f}%", va='center', fontweight='bold')
116
+
117
+ plt.tight_layout()
118
+ plt.show()