| | import os
|
| |
|
| | import torch
|
| | import open_clip
|
| |
|
| | import numpy as np
|
| | from sklearn.linear_model import LogisticRegression
|
| | from torchvision.datasets import CIFAR100
|
| | from tqdm import tqdm
|
| | from joblib import dump, load
|
| | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
|
| | import torchvision.transforms as transforms
|
| |
|
| | import torchvision
|
| |
|
| | import pandas as pd
|
| | from pathlib import Path
|
| | from PIL import Image
|
| | from torch.utils.data import Dataset, DataLoader
|
| | import pickle
|
| |
|
| | class PHASE(Dataset):
|
| | """PHASE dataset."""
|
| |
|
| | def __init__(self, csv_file, root_dir, transform=None, resolution=224):
|
| | """
|
| | Arguments:
|
| | csv_file (string): Path to the csv file with annotations.
|
| | root_dir (string): Directory with all the images.
|
| | transform (callable, optional): Optional transform to be applied
|
| | on a sample.
|
| | """
|
| | self.annotations = pd.read_csv(csv_file, sep=' ', header=None)
|
| |
|
| | self.root_dir = root_dir
|
| | self.transform = transform
|
| | self.base_transforms = Compose([
|
| | Resize((resolution, resolution), interpolation=Image.BICUBIC)
|
| | ])
|
| |
|
| | def __len__(self):
|
| | return len(self.annotations)
|
| |
|
| | def __getitem__(self, idx):
|
| | if torch.is_tensor(idx):
|
| | idx = idx.tolist()
|
| |
|
| | img_name = os.path.join(self.root_dir,
|
| | self.annotations.iloc[idx, 0])
|
| | image = Image.open(img_name).convert('RGB')
|
| | label = self.annotations.iloc[idx, 1]
|
| |
|
| | image = self.base_transforms(image)
|
| |
|
| | if self.transform:
|
| | image = self.transform(image)
|
| |
|
| | sample = {'image': image, 'label': label}
|
| |
|
| |
|
| | return image, label
|
| |
|
| |
|
| | class FACET(Dataset):
|
| | """Face Landmarks dataset."""
|
| |
|
| | def __init__(self, csv_file, root_dir, transform=None):
|
| | """
|
| | Arguments:
|
| | csv_file (string): Path to the csv file with annotations.
|
| | root_dir (string): Directory with all the images.
|
| | transform (callable, optional): Optional transform to be applied
|
| | on a sample.
|
| | """
|
| | self.annotations = pd.read_csv(csv_file, sep=' ', header=None)
|
| | self.root_dir = root_dir
|
| | self.transform = transform
|
| |
|
| | def __len__(self):
|
| | return len(self.annotations)
|
| |
|
| | def __getitem__(self, idx):
|
| | if torch.is_tensor(idx):
|
| | idx = idx.tolist()
|
| |
|
| | img_name = os.path.join(self.root_dir,
|
| | self.annotations.iloc[idx, 0])
|
| | image = Image.open(img_name).convert('RGB')
|
| | label = self.annotations.iloc[idx, 1]
|
| |
|
| | base_transforms = Compose([
|
| | Resize((224, 224), interpolation=Image.BICUBIC)
|
| | ])
|
| |
|
| | image = base_transforms(image)
|
| |
|
| | if self.transform:
|
| | image = self.transform(image)
|
| |
|
| | return image, label
|
| |
|
| |
|
| | class MORPH(Dataset):
|
| | """MORPH dataset."""
|
| |
|
| | def __init__(self, csv_file, root_dir, transform=None):
|
| | """
|
| | Arguments:
|
| | csv_file (string): Path to the csv file with annotations.
|
| | root_dir (string): Directory with all the images.
|
| | transform (callable, optional): Optional transform to be applied
|
| | on a sample.
|
| | """
|
| | self.annotations = pd.read_csv(csv_file, sep=',', header=0)
|
| | self.root_dir = root_dir
|
| | self.transform = transform
|
| |
|
| | def __len__(self):
|
| | return len(self.annotations)
|
| |
|
| | def __getitem__(self, idx):
|
| | if torch.is_tensor(idx):
|
| | idx = idx.tolist()
|
| |
|
| | img_name = os.path.join(self.annotations.iloc[idx]["filepath"])
|
| |
|
| | image = Image.open(f"{img_name}").convert('RGB')
|
| | label = self.annotations.iloc[idx]["gender"]
|
| |
|
| | base_transforms = Compose([
|
| | Resize((224, 224), interpolation=Image.BICUBIC)
|
| | ])
|
| |
|
| | image = base_transforms(image)
|
| |
|
| | if self.transform:
|
| | image = self.transform(image)
|
| |
|
| |
|
| | return image, label
|
| |
|
| |
|
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu"
|
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu"
|
| | resnet_model = torchvision.models.resnet50(pretrained=False)
|
| | resnet_model.fc = torch.nn.Identity()
|
| | resnet_model.eval()
|
| | features_root = "features_facet_training_set"
|
| | cls_root = "classifiers_facet"
|
| | models = (
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | "vit_b_16_cc3m_50_30ep",
|
| |
|
| | "vit_b_16_cc3m_original",
|
| | "vit_b_16_cc3m_50_30ep_difficult_batches",
|
| |
|
| |
|
| |
|
| | "rn50_cc3m_mix_000",
|
| | "rn50_cc3m_mix_100",
|
| | )
|
| |
|
| |
|
| | weights = (
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_08_27-11_48_49-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt",
|
| |
|
| | "/home/kis/Desktop/rhome/kis/code/open_clip/logs/2024_07_12-19_17_23-model_ViT-B-16-lr_0.001-b_410-j_4-p_amp/checkpoints/epoch_30.pt",
|
| | "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_09_15-14_07_26-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt",
|
| |
|
| |
|
| | "/home/kis/code/models/models/cc3m_mix_000/epoch_50.pt",
|
| | "/home/kis/code/models/models/cc3m_mix_100/epoch_50.pt",
|
| | )
|
| |
|
| |
|
| |
|
| |
|
| | model_idx=0
|
| | model_type = 'transformer'
|
| | for model_name, weight in zip(models, weights):
|
| | print( "\n\n",model_name)
|
| |
|
| | preprocess = None
|
| | clip_like = False
|
| | if model_name == 'dino_v1_cnn':
|
| | model = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50')
|
| | model.fc = torch.nn.Identity()
|
| | model.eval()
|
| | model_type = 'cnn'
|
| |
|
| | elif model_name == "vit_b_16_cc3m_50":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
| | elif model_name == "vit_b_16_cc3m_50_28ep":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
| | elif model_name == "vit_b_16_cc3m_50_30ep_difficult_batches":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
| | elif model_name == "vit_b_16_cc3m_50_30ep":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
| | elif model_name == "rn50_cc3m_mix_000":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight)
|
| | elif model_name == "vit_b_16_cc3m_future_models":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight)
|
| | elif model_name == "vit_b_16_cc3m_original":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight)
|
| | elif model_name == 'dino_v1_vit_s_16':
|
| | model = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')
|
| | elif model_name == 'dino_v1_vit_b_16':
|
| | model = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16')
|
| | elif model_name == 'dino_v2_vit_s_14':
|
| | model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
|
| | elif model_name == 'dino_v2_vit_b_14':
|
| | model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')
|
| | elif model_name == 'dino_v2_vit_l_14':
|
| | model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
|
| | elif model_name == 'dino_v2_vit_g_14':
|
| | model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitg14')
|
| | elif model_name == "vit_b_16_400m":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion400m_e32')
|
| | clip_like = True
|
| | elif model_name == "vit_b_16_2b":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k')
|
| | clip_like = True
|
| | elif model_name == "vit_b_32_400m":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion400m_e32')
|
| | clip_like = True
|
| | elif model_name == "vit_b_32_2b":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')
|
| | clip_like = True
|
| | elif model_name == "vit_l_14_400m":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion400m_e32')
|
| | clip_like = True
|
| | elif model_name == "vit_l_14_2b":
|
| | model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k')
|
| | clip_like = True
|
| | elif "resnet" in model_name:
|
| | model = torch.hub.load('pytorch/vision:v0.10.0', model_name, pretrained=True)
|
| | model.fc = torch.nn.Identity()
|
| | model.eval()
|
| | model_type = 'cnn'
|
| | elif "vit" in model_name:
|
| | model = torch.hub.load('pytorch/vision', model_name, weights='IMAGENET1K_V1')
|
| | model.heads = torch.nn.Identity()
|
| | elif "ViT" in model_name:
|
| | model, preprocess = clip.load(model_name, device)
|
| | clip_like = True
|
| | elif "RN" in model_name:
|
| | model, preprocess = clip.load(model_name, device)
|
| | model.visual.attnpool = torch.nn.AdaptiveAvgPool2d((1,1))
|
| | clip_like = True
|
| | model_type = 'cnn'
|
| | else:
|
| | w = torch.load(weight)
|
| | model = torchvision.models.resnet50(pretrained=False)
|
| | model.fc = torch.nn.Identity()
|
| | model.eval()
|
| | model.load_state_dict(update_keys(w['state_dict']), strict=True)
|
| | model_type = 'cnn'
|
| |
|
| | model.cuda()
|
| |
|
| |
|
| | if 'simclr' in model_name:
|
| | img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.])
|
| | else:
|
| | img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
| |
|
| |
|
| | transform_test = transforms.Compose([
|
| | transforms.CenterCrop((224, 224)),
|
| | transforms.ToTensor(),
|
| | transforms.Normalize(**img_norm_cfg)
|
| | ])
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | features_root = "../../features/PHASE_EMOTIONS/"
|
| |
|
| |
|
| | train_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/train_annotations_emotion.txt',
|
| | root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/train_bb/',
|
| | transform=transform_test
|
| | )
|
| | val_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/val_annotations_emotion.txt',
|
| | root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/val_bb/',
|
| | transform=transform_test
|
| | )
|
| |
|
| |
|
| |
|
| | def get_features(dataset):
|
| | all_features = []
|
| | all_labels = []
|
| |
|
| | with torch.no_grad():
|
| | for images, labels in tqdm(DataLoader(dataset, batch_size=512)):
|
| | if model_type == 'cnn':
|
| | features = model(images.to(device))
|
| | else:
|
| | features = model.encode_image(images.to(device))
|
| |
|
| | print(features.shape)
|
| | all_features.append(features)
|
| | all_labels.append(labels)
|
| |
|
| | return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()
|
| |
|
| |
|
| |
|
| | feat_path = Path(f"{features_root}/{model_name}_features.pkl")
|
| | if feat_path.exists():
|
| | print("Already extracted!")
|
| | with open(f"{features_root}/{model_name}_features.pkl", 'rb') as f:
|
| | features = pickle.load(f)
|
| | with open(f"{features_root}/{model_name}_labels.pkl", 'rb') as f:
|
| | labels = pickle.load(f)
|
| | with open(f"{features_root}/{model_name}_features_val.pkl", 'rb') as f:
|
| | features_val = pickle.load(f)
|
| | with open(f"{features_root}/{model_name}_labels_val.pkl", 'rb') as f:
|
| | labels_val = pickle.load(f)
|
| |
|
| | else:
|
| | features, labels = get_features(train_dataset)
|
| | with open(f"{features_root}/{model_name}_features.pkl", 'wb') as f:
|
| | pickle.dump(features, f)
|
| |
|
| | with open(f"{features_root}/{model_name}_labels.pkl", 'wb') as f:
|
| | pickle.dump(labels, f)
|
| |
|
| | features_val, labels_val = get_features(val_dataset)
|
| | with open(f"{features_root}/{model_name}_features_val.pkl", 'wb') as f:
|
| | pickle.dump(features_val, f)
|
| |
|
| | with open(f"{features_root}/{model_name}_labels_val.pkl", 'wb') as f:
|
| | pickle.dump(labels_val, f)
|
| |
|
| |
|
| | print("Done!")
|
| |
|
| |
|
| | for i in range(1, 10):
|
| | c = i * 0.1
|
| | classifier = LogisticRegression(random_state=0, C=c, max_iter=10000, verbose=0, class_weight="balanced")
|
| | classifier.fit(features, labels)
|
| |
|
| | predictions = classifier.predict(features_val)
|
| | dump(classifier, f'{features_root}/{model_name}_logistic_regression_classifier_c_{c}.joblib')
|
| | with open(f'{features_root}/{model_name}_predictions_c_{c}.pkl', 'wb') as f:
|
| | pickle.dump(predictions, f)
|
| |
|
| | pd.DataFrame(predictions).to_csv(f"{features_root}/{model_name}_predictions_c_{c}.txt")
|
| | accuracy = np.mean((labels_val == predictions).astype(float)) * 100.
|
| | print(f"C={c}, Accuracy = {accuracy:.3f}") |