justhariharan commited on
Commit
26c2a4a
·
verified ·
1 Parent(s): 128ec79

Upload 22 files

Browse files
models_saved/dinov2_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f27538747e42e7e90e493c782aad5500ebb7573ed738aa183d3173747ac8514c
3
+ size 88696519
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (141 Bytes). View file
 
src/api/__init__.py ADDED
File without changes
src/data/__init__.py ADDED
File without changes
src/data/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (146 Bytes). View file
 
src/data/__pycache__/data_loader.cpython-311.pyc ADDED
Binary file (3.45 kB). View file
 
src/data/data_loader.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torchvision import datasets, transforms
3
+ from torch.utils.data import DataLoader, random_split
4
+ import yaml
5
+ import os
6
+
7
+ def get_transforms(cfg):
8
+ """
9
+ DINOv2 expects ImageNet normalization.
10
+ We also add some light augmentation to prevent overfitting.
11
+ """
12
+ img_size = cfg['data']['image_size']
13
+
14
+ # Training Transforms (with Augmentation)
15
+ train_transform = transforms.Compose([
16
+ transforms.Resize((img_size, img_size)),
17
+ transforms.RandomHorizontalFlip(p=0.5), # 50% chance to flip
18
+ transforms.ColorJitter(brightness=0.1, contrast=0.1), # Slight color changes
19
+ transforms.ToTensor(),
20
+ transforms.Normalize(
21
+ mean=[0.485, 0.456, 0.406], # DINOv2 Expected Mean
22
+ std=[0.229, 0.224, 0.225] # DINOv2 Expected Std
23
+ )
24
+ ])
25
+
26
+ # Validation/Test Transforms (No Augmentation)
27
+ val_transform = transforms.Compose([
28
+ transforms.Resize((img_size, img_size)),
29
+ transforms.ToTensor(),
30
+ transforms.Normalize(
31
+ mean=[0.485, 0.456, 0.406],
32
+ std=[0.229, 0.224, 0.225]
33
+ )
34
+ ])
35
+
36
+ return train_transform, val_transform
37
+
38
+ def create_dataloaders(config_path="configs/config.yaml"):
39
+ # Load config
40
+ with open(config_path, 'r') as f:
41
+ cfg = yaml.safe_load(f)
42
+
43
+ train_transform, val_transform = get_transforms(cfg)
44
+ data_dir = cfg['data']['train_dir'] # Should be "data/raw"
45
+
46
+ # 1. Load the Entire Dataset (REAL + FAKE)
47
+ full_dataset = datasets.ImageFolder(root=data_dir)
48
+
49
+ # 2. Split: 80% Train, 20% Validation
50
+ total_size = len(full_dataset)
51
+ train_size = int(0.8 * total_size)
52
+ val_size = total_size - train_size
53
+
54
+ train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
55
+
56
+ # Apply specific transforms
57
+ train_dataset.dataset.transform = train_transform
58
+ val_dataset.dataset.transform = val_transform
59
+
60
+ # 3. Create Loaders
61
+ train_loader = DataLoader(
62
+ train_dataset,
63
+ batch_size=cfg['data']['batch_size'],
64
+ shuffle=True,
65
+ num_workers=cfg['data']['num_workers']
66
+ )
67
+
68
+ val_loader = DataLoader(
69
+ val_dataset,
70
+ batch_size=cfg['data']['batch_size'],
71
+ shuffle=False,
72
+ num_workers=cfg['data']['num_workers']
73
+ )
74
+
75
+ print(f"✅ Data Ready:")
76
+ print(f" - Train: {len(train_dataset)} images")
77
+ print(f" - Val: {len(val_dataset)} images")
78
+ print(f" - Classes: {full_dataset.class_to_idx}")
79
+
80
+ return train_loader, val_loader
81
+
82
+ if __name__ == "__main__":
83
+ create_dataloaders()
src/inference/__init__.py ADDED
File without changes
src/inference/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (151 Bytes). View file
 
src/inference/__pycache__/predictor.cpython-311.pyc ADDED
Binary file (5.45 kB). View file
 
src/inference/predictor.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torchvision import transforms
4
+ from PIL import Image
5
+ import numpy as np
6
+ import cv2
7
+ import os
8
+
9
+ # Import GradCAM tools
10
+ from pytorch_grad_cam import GradCAM
11
+ from pytorch_grad_cam.utils.image import show_cam_on_image
12
+
13
+ from src.models.model import VisionGuardModel
14
+
15
+ class VisionGuardPredictor:
16
+ def __init__(self, model_path, config_path="configs/config.yaml"):
17
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ print(f"🚀 Loading Inference Engine on: {self.device}")
19
+
20
+ # 1. Load Model
21
+ self.model = VisionGuardModel(num_classes=2, pretrained=False)
22
+ checkpoint = torch.load(model_path, map_location=self.device)
23
+ self.model.load_state_dict(checkpoint)
24
+ self.model.to(self.device)
25
+ self.model.eval()
26
+
27
+ # 2. Setup GradCAM (The Explainability Tool)
28
+ # We target the last normalization layer of the backbone
29
+ target_layers = [self.model.backbone.blocks[-1].norm1]
30
+
31
+ # DINOv2 requires a special reshape transform because it outputs 1D sequences
32
+ def reshape_transform(tensor):
33
+ # DINOv2 small outputs: [Batch, 257, 384] (1 CLS token + 256 Patches)
34
+ # We discard the CLS token (index 0) and keep the 256 patches
35
+ result = tensor[:, 1:, :]
36
+
37
+ # Reshape 256 -> 16x16 grid (since 224/14 = 16)
38
+ height = 14
39
+ width = 14
40
+ # Note: If image size is 224x224, grid is 16x16.
41
+ # DINOv2-S/14 means patch size is 14. 224/14 = 16.
42
+ grid_size = 16
43
+
44
+ result = result.reshape(tensor.size(0), grid_size, grid_size, tensor.size(2))
45
+
46
+ # Bring channels first: [Batch, Channels, Height, Width]
47
+ result = result.transpose(2, 3).transpose(1, 2)
48
+ return result
49
+
50
+ self.cam = GradCAM(model=self.model, target_layers=target_layers, reshape_transform=reshape_transform)
51
+
52
+ # 3. Preprocessing
53
+ self.transform = transforms.Compose([
54
+ transforms.Resize((224, 224)),
55
+ transforms.ToTensor(),
56
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
57
+ ])
58
+
59
+ self.labels = ['FAKE', 'REAL']
60
+
61
+ def predict(self, image_path):
62
+ # 1. Load Image
63
+ image = Image.open(image_path).convert('RGB')
64
+
65
+ # Keep a clean copy for visualization (resized to 224x224)
66
+ vis_image = image.resize((224, 224))
67
+ vis_image = np.float32(vis_image) / 255.0 # Normalize 0-1 for OpenCV
68
+
69
+ # 2. Transform for Model
70
+ input_tensor = self.transform(image).unsqueeze(0).to(self.device)
71
+
72
+ # 3. Inference
73
+ with torch.no_grad():
74
+ outputs = self.model(input_tensor)
75
+ probs = F.softmax(outputs, dim=1)
76
+ confidence, predicted_class = torch.max(probs, 1)
77
+
78
+ # 4. Generate Heatmap
79
+ # We tell GradCAM to look for the predicted class
80
+ grayscale_cam = self.cam(input_tensor=input_tensor, targets=None)
81
+ grayscale_cam = grayscale_cam[0, :]
82
+
83
+ # Overlay heatmap on image
84
+ visualization = show_cam_on_image(vis_image, grayscale_cam, use_rgb=True)
85
+
86
+ # Convert back to PIL for Gradio
87
+ heatmap_pil = Image.fromarray(visualization)
88
+
89
+ # 5. Format Output
90
+ idx = predicted_class.item()
91
+ return {
92
+ "verdict": self.labels[idx],
93
+ "confidence": round(float(confidence.item()) * 100, 2),
94
+ "probabilities": {
95
+ "FAKE": round(float(probs[0][0].item()), 4),
96
+ "REAL": round(float(probs[0][1].item()), 4)
97
+ },
98
+ "heatmap": heatmap_pil
99
+ }
src/models/__init__.py ADDED
File without changes
src/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (148 Bytes). View file
 
src/models/__pycache__/model.cpython-311.pyc ADDED
Binary file (1.7 kB). View file
 
src/models/model.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class VisionGuardModel(nn.Module):
5
+ def __init__(self, num_classes=2, pretrained=True):
6
+ super(VisionGuardModel, self).__init__()
7
+ # Load DINOv2 (The "Eye")
8
+ self.backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
9
+
10
+ # Classification Head (The "Brain")
11
+ self.head = nn.Sequential(
12
+ nn.Linear(384, 256),
13
+ nn.BatchNorm1d(256),
14
+ nn.ReLU(),
15
+ nn.Dropout(0.3),
16
+ nn.Linear(256, num_classes)
17
+ )
18
+
19
+ def forward(self, x):
20
+ features = self.backbone(x)
21
+ logits = self.head(features)
22
+ return logits
src/modules/__init__.py ADDED
File without changes
src/training/__init__.py ADDED
File without changes
src/training/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (150 Bytes). View file
 
src/training/__pycache__/trainer.cpython-311.pyc ADDED
Binary file (5.84 kB). View file
 
src/training/trainer.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from tqdm import tqdm
5
+ import yaml
6
+ import os
7
+
8
+ # Import our project modules
9
+ from src.data.data_loader import create_dataloaders
10
+ from src.models.model import VisionGuardModel
11
+
12
+ def train_one_epoch(model, loader, criterion, optimizer, device):
13
+ model.train()
14
+ running_loss = 0.0
15
+ correct = 0
16
+ total = 0
17
+
18
+ loop = tqdm(loader, leave=False)
19
+
20
+ for images, labels in loop:
21
+ images, labels = images.to(device), labels.to(device)
22
+
23
+ # Forward
24
+ outputs = model(images)
25
+ loss = criterion(outputs, labels)
26
+
27
+ # Backward
28
+ optimizer.zero_grad()
29
+ loss.backward()
30
+ optimizer.step()
31
+
32
+ # Stats
33
+ _, predicted = torch.max(outputs.data, 1)
34
+ total += labels.size(0)
35
+ correct += (predicted == labels).sum().item()
36
+ running_loss += loss.item()
37
+
38
+ loop.set_description(f"Loss: {loss.item():.4f}")
39
+
40
+ return running_loss / len(loader), 100 * correct / total
41
+
42
+ def validate(model, loader, criterion, device):
43
+ model.eval()
44
+ correct = 0
45
+ total = 0
46
+ running_loss = 0.0
47
+
48
+ with torch.no_grad():
49
+ for images, labels in loader:
50
+ images, labels = images.to(device), labels.to(device)
51
+ outputs = model(images)
52
+ loss = criterion(outputs, labels)
53
+
54
+ _, predicted = torch.max(outputs.data, 1)
55
+ total += labels.size(0)
56
+ correct += (predicted == labels).sum().item()
57
+ running_loss += loss.item()
58
+
59
+ return running_loss / len(loader), 100 * correct / total
60
+
61
+ def main():
62
+ # 1. Config & Device
63
+ with open("configs/config.yaml", "r") as f:
64
+ cfg = yaml.safe_load(f)
65
+
66
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
+ print(f"🚀 Training on: {device}")
68
+
69
+ # 2. Save Path (Google Drive)
70
+ save_dir = "/content/drive/MyDrive/VisionGuard_Models"
71
+ os.makedirs(save_dir, exist_ok=True)
72
+ save_path = f"{save_dir}/dinov2_best.pt"
73
+
74
+ # 3. Load Data & Model
75
+ train_loader, val_loader = create_dataloaders("configs/config.yaml")
76
+ model = VisionGuardModel(num_classes=2).to(device)
77
+
78
+ # 4. Optimizer (Only training the Head)
79
+ # Note: We only pass model.head.parameters() to optimizer because backbone is frozen!
80
+ optimizer = optim.AdamW(model.head.parameters(), lr=float(cfg['model']['learning_rate_head']))
81
+ criterion = nn.CrossEntropyLoss()
82
+
83
+ # 5. Training Loop
84
+ best_acc = 0.0
85
+ epochs = cfg['model']['epochs']
86
+
87
+ print(f"\n🔥 Starting Training for {epochs} Epochs...")
88
+ print(f"💾 Best model will be saved to: {save_path}")
89
+
90
+ for epoch in range(epochs):
91
+ print(f"\nEpoch {epoch+1}/{epochs}")
92
+
93
+ train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
94
+ val_loss, val_acc = validate(model, val_loader, criterion, device)
95
+
96
+ print(f" Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
97
+ print(f" Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
98
+
99
+ # Save Best
100
+ if val_acc > best_acc:
101
+ best_acc = val_acc
102
+ torch.save(model.state_dict(), save_path)
103
+ print(f" ⭐ Saved New Best Model ({best_acc:.2f}%)")
104
+
105
+ if __name__ == "__main__":
106
+ main()
src/utils/__init__.py ADDED
File without changes