File size: 2,880 Bytes
5787f5d
 
a8449c1
 
5787f5d
 
 
 
 
a8449c1
5787f5d
 
 
a8449c1
 
5787f5d
 
 
 
 
 
 
 
a8449c1
5787f5d
 
 
 
 
 
 
 
 
 
 
a8449c1
5787f5d
 
 
 
 
 
 
a8449c1
5787f5d
 
 
 
a8449c1
5787f5d
 
a8449c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, ViTFeatureExtractor

class ToxicImageClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(ToxicImageClassifier, self).__init__()
        # ResNet50
        self.resnet = models.resnet50(pretrained=True)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

        # Vision Transformer
        self.vit = ViTForImageClassification.from_pretrained(
            'google/vit-base-patch16-224',
            num_labels=num_classes,
            ignore_mismatched_sizes=True
        )
        self.feature_extractor = ViTFeatureExtractor.from_pretrained(
            'google/vit-base-patch16-224'
        )

    def forward(self, x, model_type='resnet'):
        if model_type == 'resnet':
            return self.resnet(x)
        else:  # vit
            return self.vit(x).logits

def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def get_data_loaders(batch_size=32):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = datasets.ImageFolder('data/train', transform=transform)
    test_dataset = datasets.ImageFolder('data/test', transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    return train_loader, test_loader

# Example training script (run separately or comment out)
"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ToxicImageClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_loader, test_loader = get_data_loaders()

for epoch in range(10):  # Adjust epochs as needed
    loss, acc = train_model(model, train_loader, criterion, optimizer, device)
    print(f"Epoch {epoch+1}: Loss = {loss:.4f}, Accuracy = {acc:.2f}%")

torch.save(model.state_dict(), "toxic_classifier.pth")
"""