|
|
import torch |
|
|
import numpy as np |
|
|
import kagglehub |
|
|
import glob |
|
|
import os |
|
|
import pandas as pd |
|
|
from PIL import Image |
|
|
from mapminer import models |
|
|
from tqdm import tqdm |
|
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.model_selection import StratifiedKFold |
|
|
from sklearn.metrics import accuracy_score |
|
|
|
|
|
|
|
|
def load_models(): |
|
|
dinov3 = models.DINOv3(architecture="vit-l-sat", pretrained=True).cuda() |
|
|
_ = dinov3.eval() |
|
|
|
|
|
lejepa = models.DINOv3(architecture="vit-l-sat", pretrained=True).cuda() |
|
|
|
|
|
weight_path = "lejepa-l.pt" |
|
|
state_dict = torch.load(weight_path) |
|
|
|
|
|
from collections import OrderedDict |
|
|
|
|
|
new_state_dict = OrderedDict() |
|
|
for k, v in state_dict.items(): |
|
|
if k.startswith('encoder.model.'): |
|
|
new_key = k.replace('encoder.model.', 'model.') |
|
|
new_state_dict[new_key] = v |
|
|
else: |
|
|
new_state_dict[k] = v |
|
|
|
|
|
state_dict = new_state_dict |
|
|
lejepa.load_state_dict(state_dict,strict=False) |
|
|
|
|
|
_ = lejepa.eval() |
|
|
return dinov3, lejepa |
|
|
|
|
|
def load_dataset(): |
|
|
path = kagglehub.dataset_download("nifulislam/rsscn7-dataset") |
|
|
print("Path to dataset files:", path) |
|
|
return path |
|
|
|
|
|
def get_embedding(model, img_path, device="cuda"): |
|
|
img = Image.open(img_path).convert("RGB").resize((256, 256)) |
|
|
|
|
|
|
|
|
arr = np.asarray(img).transpose(2,0,1) |
|
|
x = torch.tensor(arr, dtype=torch.float32)[None].to(device) |
|
|
|
|
|
|
|
|
x = model.normalize(x) |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
out = model.model.forward_features(x) |
|
|
emb = out['x_norm_clstoken'].squeeze().cpu().numpy() |
|
|
return emb |
|
|
|
|
|
|
|
|
def evaluate(): |
|
|
path = load_dataset() |
|
|
root = "/root/.cache/kagglehub/datasets/nifulislam/rsscn7-dataset/versions/2/gamma-correct" |
|
|
classes = sorted(os.listdir(root)) |
|
|
files = [] |
|
|
for cls in classes: |
|
|
imgs = glob.glob(f"{root}/{cls}/*.jpg") |
|
|
for f in imgs: |
|
|
files.append((f, cls)) |
|
|
|
|
|
df = pd.DataFrame(files,columns=['path','class']) |
|
|
files = df |
|
|
files = files.sample(frac=1.0,random_state=42) |
|
|
|
|
|
dinov3, lejepa = load_models() |
|
|
X_dino_ = [] |
|
|
X_jepa_ = [] |
|
|
y_ = [] |
|
|
|
|
|
for index in tqdm(range(len(files))): |
|
|
path = files.iloc[index]['path'] |
|
|
cls = files.iloc[index]['class'] |
|
|
emb_d = get_embedding(dinov3, path) |
|
|
emb_j = get_embedding(lejepa, path) |
|
|
|
|
|
X_dino_.append(emb_d) |
|
|
X_jepa_.append(emb_j) |
|
|
y_.append(cls) |
|
|
|
|
|
X_dino = np.array(X_dino_) |
|
|
X_jepa = np.array(X_jepa_) |
|
|
|
|
|
|
|
|
le = LabelEncoder() |
|
|
y_enc = le.fit_transform(y_) |
|
|
|
|
|
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) |
|
|
|
|
|
accs_dino = [] |
|
|
accs_jepa = [] |
|
|
|
|
|
for train_idx, test_idx in kf.split(X_dino, y_enc): |
|
|
|
|
|
Xtr_d, Xte_d = X_dino[train_idx], X_dino[test_idx] |
|
|
Xtr_j, Xte_j = X_jepa[train_idx], X_jepa[test_idx] |
|
|
ytr, yte = y_enc[train_idx], y_enc[test_idx] |
|
|
|
|
|
clf_dino = LogisticRegression(max_iter=8000, n_jobs=-1) |
|
|
clf_jepa = LogisticRegression(max_iter=8000, n_jobs=-1) |
|
|
|
|
|
clf_dino.fit(Xtr_d, ytr) |
|
|
clf_jepa.fit(Xtr_j, ytr) |
|
|
|
|
|
pred_dino = clf_dino.predict(Xte_d) |
|
|
pred_jepa = clf_jepa.predict(Xte_j) |
|
|
|
|
|
accs_dino.append(accuracy_score(yte, pred_dino)) |
|
|
accs_jepa.append(accuracy_score(yte, pred_jepa)) |
|
|
|
|
|
print("DINOv3 K-fold Accuracy:", round(np.mean(accs_dino)*100,1)) |
|
|
print("LeJEPA K-fold Accuracy:", round(np.mean(accs_jepa)*100,1)) |
|
|
|
|
|
|
|
|
|