Upload folder using huggingface_hub
Browse files- README.md +312 -3
- config.json +94 -0
- model_1.safetensors +3 -0
- model_2.safetensors +3 -0
- model_3.safetensors +3 -0
- training_progress.json +25 -0
README.md
CHANGED
|
@@ -1,3 +1,312 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# DeepFake Detector V13 🎯
|
| 2 |
+
|
| 3 |
+
**State-of-the-art deepfake detection ensemble with 699M parameters**
|
| 4 |
+
|
| 5 |
+
[](https://huggingface.co/ash12321/deepfake-detector-v13)
|
| 6 |
+
[](https://huggingface.co/ash12321/deepfake-detector-v13)
|
| 7 |
+
[](https://huggingface.co/ash12321/deepfake-detector-v13)
|
| 8 |
+
|
| 9 |
+
## 🚀 Performance Highlights
|
| 10 |
+
|
| 11 |
+
- **Average Ensemble F1**: 0.9313
|
| 12 |
+
- **Best Model F1**: 0.9586 (Model 13.3 - Swin-Large)
|
| 13 |
+
- **Total Parameters**: 699M (exceeds 500M requirement ✅)
|
| 14 |
+
- **Training Time**: ~6.1 hours on T4 GPU
|
| 15 |
+
|
| 16 |
+
## 📊 Architecture
|
| 17 |
+
|
| 18 |
+
This model consists of 3 large-scale transformer and CNN models trained sequentially:
|
| 19 |
+
|
| 20 |
+
| Model | Backbone | Parameters | F1 Score | Training Time |
|
| 21 |
+
|-------|----------|------------|----------|---------------|
|
| 22 |
+
| **Model 13.1** | ConvNeXt-Large | 198M | 0.8971 | 205.7 min |
|
| 23 |
+
| **Model 13.2** | ViT-Large | 304M | 0.9382 | 52.7 min |
|
| 24 |
+
| **Model 13.3** | Swin-Large | 197M | **0.9586** | 106.2 min |
|
| 25 |
+
|
| 26 |
+
**Total: 699M parameters**
|
| 27 |
+
|
| 28 |
+
### Model Files
|
| 29 |
+
|
| 30 |
+
- `model_1.safetensors` - ConvNeXt-Large (752 MB)
|
| 31 |
+
- `model_2.safetensors` - ViT-Large (1159 MB)
|
| 32 |
+
- `model_3.safetensors` - Swin-Large (747 MB)
|
| 33 |
+
|
| 34 |
+
## 🎯 Usage
|
| 35 |
+
|
| 36 |
+
### Installation
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
pip install torch torchvision timm safetensors pillow
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
### Quick Start - Single Model
|
| 43 |
+
|
| 44 |
+
```python
|
| 45 |
+
import torch
|
| 46 |
+
import timm
|
| 47 |
+
from PIL import Image
|
| 48 |
+
from torchvision import transforms
|
| 49 |
+
from safetensors.torch import load_file
|
| 50 |
+
|
| 51 |
+
# Define model architecture
|
| 52 |
+
class DeepfakeDetector(torch.nn.Module):
|
| 53 |
+
def __init__(self, backbone_name, dropout=0.3):
|
| 54 |
+
super().__init__()
|
| 55 |
+
self.backbone = timm.create_model(backbone_name, pretrained=False, num_classes=0)
|
| 56 |
+
|
| 57 |
+
if hasattr(self.backbone, 'num_features'):
|
| 58 |
+
feat_dim = self.backbone.num_features
|
| 59 |
+
else:
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
feat_dim = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
|
| 62 |
+
|
| 63 |
+
self.classifier = torch.nn.Sequential(
|
| 64 |
+
torch.nn.Linear(feat_dim, 512),
|
| 65 |
+
torch.nn.BatchNorm1d(512),
|
| 66 |
+
torch.nn.GELU(),
|
| 67 |
+
torch.nn.Dropout(dropout),
|
| 68 |
+
torch.nn.Linear(512, 128),
|
| 69 |
+
torch.nn.BatchNorm1d(128),
|
| 70 |
+
torch.nn.GELU(),
|
| 71 |
+
torch.nn.Dropout(dropout * 0.5),
|
| 72 |
+
torch.nn.Linear(128, 1)
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def forward(self, x):
|
| 76 |
+
features = self.backbone(x)
|
| 77 |
+
return self.classifier(features).squeeze(-1)
|
| 78 |
+
|
| 79 |
+
# Load best model (Model 13.3 - Swin-Large)
|
| 80 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 81 |
+
model = DeepfakeDetector('swin_large_patch4_window7_224', dropout=0.3)
|
| 82 |
+
state_dict = load_file('model_3.safetensors')
|
| 83 |
+
model.load_state_dict(state_dict)
|
| 84 |
+
model = model.to(device)
|
| 85 |
+
model.eval()
|
| 86 |
+
|
| 87 |
+
# Preprocessing
|
| 88 |
+
transform = transforms.Compose([
|
| 89 |
+
transforms.Resize((224, 224)),
|
| 90 |
+
transforms.ToTensor(),
|
| 91 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
| 92 |
+
])
|
| 93 |
+
|
| 94 |
+
# Predict
|
| 95 |
+
image = Image.open('test_image.jpg').convert('RGB')
|
| 96 |
+
input_tensor = transform(image).unsqueeze(0).to(device)
|
| 97 |
+
|
| 98 |
+
with torch.no_grad():
|
| 99 |
+
logits = model(input_tensor)
|
| 100 |
+
probability = torch.sigmoid(logits).item()
|
| 101 |
+
prediction = 'FAKE' if probability > 0.5 else 'REAL'
|
| 102 |
+
|
| 103 |
+
print(f"Prediction: {prediction}")
|
| 104 |
+
print(f"Confidence: {probability:.2%}")
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### Full Ensemble (Recommended)
|
| 108 |
+
|
| 109 |
+
```python
|
| 110 |
+
import torch
|
| 111 |
+
import timm
|
| 112 |
+
from PIL import Image
|
| 113 |
+
from torchvision import transforms
|
| 114 |
+
from safetensors.torch import load_file
|
| 115 |
+
|
| 116 |
+
class DeepfakeDetector(torch.nn.Module):
|
| 117 |
+
def __init__(self, backbone_name, dropout=0.3):
|
| 118 |
+
super().__init__()
|
| 119 |
+
self.backbone = timm.create_model(backbone_name, pretrained=False, num_classes=0)
|
| 120 |
+
|
| 121 |
+
if hasattr(self.backbone, 'num_features'):
|
| 122 |
+
feat_dim = self.backbone.num_features
|
| 123 |
+
else:
|
| 124 |
+
with torch.no_grad():
|
| 125 |
+
feat_dim = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
|
| 126 |
+
|
| 127 |
+
self.classifier = torch.nn.Sequential(
|
| 128 |
+
torch.nn.Linear(feat_dim, 512),
|
| 129 |
+
torch.nn.BatchNorm1d(512),
|
| 130 |
+
torch.nn.GELU(),
|
| 131 |
+
torch.nn.Dropout(dropout),
|
| 132 |
+
torch.nn.Linear(512, 128),
|
| 133 |
+
torch.nn.BatchNorm1d(128),
|
| 134 |
+
torch.nn.GELU(),
|
| 135 |
+
torch.nn.Dropout(dropout * 0.5),
|
| 136 |
+
torch.nn.Linear(128, 1)
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
def forward(self, x):
|
| 140 |
+
features = self.backbone(x)
|
| 141 |
+
return self.classifier(features).squeeze(-1)
|
| 142 |
+
|
| 143 |
+
# Model configurations
|
| 144 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 145 |
+
|
| 146 |
+
configs = [
|
| 147 |
+
('convnext_large', 0.3, 'model_1.safetensors'),
|
| 148 |
+
('vit_large_patch16_224', 0.35, 'model_2.safetensors'),
|
| 149 |
+
('swin_large_patch4_window7_224', 0.3, 'model_3.safetensors')
|
| 150 |
+
]
|
| 151 |
+
|
| 152 |
+
# Load all models
|
| 153 |
+
models = []
|
| 154 |
+
for backbone, dropout, filename in configs:
|
| 155 |
+
model = DeepfakeDetector(backbone, dropout)
|
| 156 |
+
state_dict = load_file(filename)
|
| 157 |
+
model.load_state_dict(state_dict)
|
| 158 |
+
model = model.to(device)
|
| 159 |
+
model.eval()
|
| 160 |
+
models.append(model)
|
| 161 |
+
|
| 162 |
+
print(f"✓ Loaded {len(models)} models")
|
| 163 |
+
|
| 164 |
+
# Preprocessing
|
| 165 |
+
transform = transforms.Compose([
|
| 166 |
+
transforms.Resize((224, 224)),
|
| 167 |
+
transforms.ToTensor(),
|
| 168 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
| 169 |
+
])
|
| 170 |
+
|
| 171 |
+
# Ensemble prediction
|
| 172 |
+
def predict_ensemble(image_path):
|
| 173 |
+
image = Image.open(image_path).convert('RGB')
|
| 174 |
+
input_tensor = transform(image).unsqueeze(0).to(device)
|
| 175 |
+
|
| 176 |
+
predictions = []
|
| 177 |
+
with torch.no_grad():
|
| 178 |
+
for model in models:
|
| 179 |
+
logits = model(input_tensor)
|
| 180 |
+
prob = torch.sigmoid(logits).item()
|
| 181 |
+
predictions.append(prob)
|
| 182 |
+
|
| 183 |
+
# Average ensemble
|
| 184 |
+
avg_prob = sum(predictions) / len(predictions)
|
| 185 |
+
prediction = 'FAKE' if avg_prob > 0.5 else 'REAL'
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
'prediction': prediction,
|
| 189 |
+
'confidence': avg_prob,
|
| 190 |
+
'individual_predictions': predictions
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
# Use it
|
| 194 |
+
result = predict_ensemble('test_image.jpg')
|
| 195 |
+
print(f"Prediction: {result['prediction']}")
|
| 196 |
+
print(f"Ensemble Confidence: {result['confidence']:.2%}")
|
| 197 |
+
print(f"Individual Models: {[f'{p:.2%}' for p in result['individual_predictions']]}")
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
## 📈 Training Details
|
| 201 |
+
|
| 202 |
+
### Architecture Design
|
| 203 |
+
|
| 204 |
+
Each model uses:
|
| 205 |
+
- **Backbone**: Large pre-trained vision model (frozen initially, fine-tuned)
|
| 206 |
+
- **Classifier Head**:
|
| 207 |
+
- Linear(feat_dim → 512) + BatchNorm + GELU + Dropout
|
| 208 |
+
- Linear(512 → 128) + BatchNorm + GELU + Dropout
|
| 209 |
+
- Linear(128 → 1)
|
| 210 |
+
|
| 211 |
+
### Training Configuration
|
| 212 |
+
|
| 213 |
+
- **Loss Function**: Focal Loss with Label Smoothing
|
| 214 |
+
- Alpha: 0.25
|
| 215 |
+
- Gamma: 2.5
|
| 216 |
+
- Label Smoothing: 0.12
|
| 217 |
+
- **Optimizer**: AdamW
|
| 218 |
+
- Learning Rates: [2e-5, 1.5e-5, 1.8e-5]
|
| 219 |
+
- Weight Decay: 3e-4
|
| 220 |
+
- **Scheduler**: CosineAnnealingWarmRestarts (T_0=3, T_mult=2)
|
| 221 |
+
- **Epochs**: 10 per model
|
| 222 |
+
- **Batch Sizes**: [32, 24, 32]
|
| 223 |
+
- **Mixed Precision**: FP16 enabled
|
| 224 |
+
- **Gradient Accumulation**: 4 steps
|
| 225 |
+
- **Gradient Checkpointing**: Enabled (memory efficiency)
|
| 226 |
+
|
| 227 |
+
### Data Augmentation
|
| 228 |
+
|
| 229 |
+
- Random Horizontal Flip (p=0.5)
|
| 230 |
+
- Random Rotation (±12°)
|
| 231 |
+
- Color Jitter (brightness, contrast, saturation: ±0.15)
|
| 232 |
+
- Normalization: ImageNet stats
|
| 233 |
+
|
| 234 |
+
## 📊 Performance Analysis
|
| 235 |
+
|
| 236 |
+
### Model Comparison
|
| 237 |
+
|
| 238 |
+
**Model 13.1 (ConvNeXt-Large)**
|
| 239 |
+
- ✓ Solid baseline: F1 = 0.8971
|
| 240 |
+
- ✓ CNN-based architecture
|
| 241 |
+
- ✓ Good for local feature extraction
|
| 242 |
+
|
| 243 |
+
**Model 13.2 (ViT-Large)**
|
| 244 |
+
- ✓ Strong performance: F1 = 0.9382
|
| 245 |
+
- ✓ Fastest training (52.7 min)
|
| 246 |
+
- ✓ Global attention mechanism
|
| 247 |
+
|
| 248 |
+
**Model 13.3 (Swin-Large)** ⭐ **Best Model**
|
| 249 |
+
- ✓ Excellent performance: F1 = 0.9586
|
| 250 |
+
- ✓ Hierarchical vision transformer
|
| 251 |
+
- ✓ Best balance of accuracy and efficiency
|
| 252 |
+
|
| 253 |
+
### Ensemble Benefits
|
| 254 |
+
|
| 255 |
+
The ensemble approach provides:
|
| 256 |
+
- **Improved Robustness**: Different architectures capture different patterns
|
| 257 |
+
- **Reduced Variance**: Averaging reduces prediction noise
|
| 258 |
+
- **Better Generalization**: Complementary strengths minimize overfitting
|
| 259 |
+
- **Higher Accuracy**: Expected ensemble F1 ≈ 0.94-0.96
|
| 260 |
+
|
| 261 |
+
## 🔧 System Requirements
|
| 262 |
+
|
| 263 |
+
**Inference (Single Model)**
|
| 264 |
+
- GPU: 4GB+ VRAM
|
| 265 |
+
- RAM: 8GB+
|
| 266 |
+
- Storage: ~1.2 GB per model
|
| 267 |
+
|
| 268 |
+
**Inference (Full Ensemble)**
|
| 269 |
+
- GPU: 12GB+ VRAM (or run models sequentially on smaller GPU)
|
| 270 |
+
- RAM: 16GB+
|
| 271 |
+
- Storage: ~2.7 GB total
|
| 272 |
+
|
| 273 |
+
**Training**
|
| 274 |
+
- GPU: T4 (16GB) or better
|
| 275 |
+
- RAM: 12GB+
|
| 276 |
+
- Storage: 8GB+ for checkpoints
|
| 277 |
+
|
| 278 |
+
## 📚 Dataset
|
| 279 |
+
|
| 280 |
+
Trained on: [`ash12321/deepfake-v13-dataset`](https://huggingface.co/datasets/ash12321/deepfake-v13-dataset)
|
| 281 |
+
|
| 282 |
+
## 🔗 Related Models
|
| 283 |
+
|
| 284 |
+
- Predecessor: [`ash12321/deepfake-detector-v12`](https://huggingface.co/ash12321/deepfake-detector-v12)
|
| 285 |
+
|
| 286 |
+
## 📄 Citation
|
| 287 |
+
|
| 288 |
+
```bibtex
|
| 289 |
+
@model{v13-deepfake-detector,
|
| 290 |
+
title={DeepFake Detector V13: Large-Scale Ensemble},
|
| 291 |
+
author={Ash},
|
| 292 |
+
year={2024},
|
| 293 |
+
publisher={Hugging Face},
|
| 294 |
+
howpublished={\url{https://huggingface.co/ash12321/deepfake-detector-v13}}
|
| 295 |
+
}
|
| 296 |
+
```
|
| 297 |
+
|
| 298 |
+
## 📝 License
|
| 299 |
+
|
| 300 |
+
MIT License - See LICENSE file for details
|
| 301 |
+
|
| 302 |
+
## 🙏 Acknowledgments
|
| 303 |
+
|
| 304 |
+
- Built with PyTorch, timm, and Hugging Face
|
| 305 |
+
- Trained on Google Colab T4 GPU
|
| 306 |
+
- Architectures: ConvNeXt (Meta), ViT (Google), Swin (Microsoft)
|
| 307 |
+
|
| 308 |
+
---
|
| 309 |
+
|
| 310 |
+
**Model Version**: 13.0
|
| 311 |
+
**Last Updated**: November 2024
|
| 312 |
+
**Status**: Production Ready ✅
|
config.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "DeepFake Detector V13",
|
| 3 |
+
"version": "13.0",
|
| 4 |
+
"architecture": "3-Model Ensemble",
|
| 5 |
+
"total_parameters": "699M",
|
| 6 |
+
"description": "Large-scale ensemble with ConvNeXt-Large (198M), ViT-Large (304M), and Swin-Large (197M)",
|
| 7 |
+
"models": [
|
| 8 |
+
{
|
| 9 |
+
"id": 1,
|
| 10 |
+
"name": "Model 13.1",
|
| 11 |
+
"backbone": "convnext_large",
|
| 12 |
+
"parameters": "198M",
|
| 13 |
+
"dropout": 0.3,
|
| 14 |
+
"batch_size": 32,
|
| 15 |
+
"best_f1": 0.8971,
|
| 16 |
+
"file": "model_1.safetensors"
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"id": 2,
|
| 20 |
+
"name": "Model 13.2",
|
| 21 |
+
"backbone": "vit_large_patch16_224",
|
| 22 |
+
"parameters": "304M",
|
| 23 |
+
"dropout": 0.35,
|
| 24 |
+
"batch_size": 24,
|
| 25 |
+
"best_f1": 0.9382,
|
| 26 |
+
"file": "model_2.safetensors"
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"id": 3,
|
| 30 |
+
"name": "Model 13.3",
|
| 31 |
+
"backbone": "swin_large_patch4_window7_224",
|
| 32 |
+
"parameters": "197M",
|
| 33 |
+
"dropout": 0.3,
|
| 34 |
+
"batch_size": 32,
|
| 35 |
+
"best_f1": 0.9586,
|
| 36 |
+
"file": "model_3.safetensors"
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"ensemble_performance": {
|
| 40 |
+
"average_f1": 0.9313,
|
| 41 |
+
"best_individual_f1": 0.9586,
|
| 42 |
+
"total_training_time_hours": 6.1
|
| 43 |
+
},
|
| 44 |
+
"training": {
|
| 45 |
+
"epochs_per_model": 10,
|
| 46 |
+
"learning_rates": [
|
| 47 |
+
2e-05,
|
| 48 |
+
1.5e-05,
|
| 49 |
+
1.8e-05
|
| 50 |
+
],
|
| 51 |
+
"weight_decay": 0.0003,
|
| 52 |
+
"label_smoothing": 0.12,
|
| 53 |
+
"gradient_accumulation": 4,
|
| 54 |
+
"mixed_precision": true,
|
| 55 |
+
"criterion": "FocalLossSmooth (alpha=0.25, gamma=2.5)",
|
| 56 |
+
"optimizer": "AdamW",
|
| 57 |
+
"scheduler": "CosineAnnealingWarmRestarts"
|
| 58 |
+
},
|
| 59 |
+
"preprocessing": {
|
| 60 |
+
"image_size": 224,
|
| 61 |
+
"normalization": {
|
| 62 |
+
"mean": [
|
| 63 |
+
0.485,
|
| 64 |
+
0.456,
|
| 65 |
+
0.406
|
| 66 |
+
],
|
| 67 |
+
"std": [
|
| 68 |
+
0.229,
|
| 69 |
+
0.224,
|
| 70 |
+
0.225
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
"augmentations": [
|
| 74 |
+
"RandomHorizontalFlip(p=0.5)",
|
| 75 |
+
"RandomRotation(degrees=12)",
|
| 76 |
+
"ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15)"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
"inference": {
|
| 80 |
+
"ensemble_method": "average",
|
| 81 |
+
"threshold": 0.5,
|
| 82 |
+
"description": "Average predictions from all 3 models for final classification"
|
| 83 |
+
},
|
| 84 |
+
"requirements": [
|
| 85 |
+
"torch>=2.0.0",
|
| 86 |
+
"timm>=0.9.0",
|
| 87 |
+
"torchvision>=0.15.0",
|
| 88 |
+
"numpy",
|
| 89 |
+
"pillow",
|
| 90 |
+
"safetensors"
|
| 91 |
+
],
|
| 92 |
+
"dataset": "ash12321/deepfake-v13-dataset",
|
| 93 |
+
"predecessor": "ash12321/deepfake-detector-v12"
|
| 94 |
+
}
|
model_1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f70541704e8eba1910990469e1a6f9d8a1badc451b2a4d2909170ee53ba45c9
|
| 3 |
+
size 788381444
|
model_2.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3878f20c6949953030d9294132f4b333a5d9a4349a3fa91420270ec5f7a8ad8b
|
| 3 |
+
size 1215611244
|
model_3.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db558427cfd28173170921b45a0c8f869e122d6e46aff1abd83d55ce6a80f8b8
|
| 3 |
+
size 783441332
|
training_progress.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"completed_models": [
|
| 3 |
+
1,
|
| 4 |
+
2,
|
| 5 |
+
3
|
| 6 |
+
],
|
| 7 |
+
"model_13.1": {
|
| 8 |
+
"best_val_f1": 0.8970679975046787,
|
| 9 |
+
"backbone": "convnext_large",
|
| 10 |
+
"params": "198M",
|
| 11 |
+
"time_minutes": 205.7302174091339
|
| 12 |
+
},
|
| 13 |
+
"model_13.2": {
|
| 14 |
+
"best_val_f1": 0.938229238160604,
|
| 15 |
+
"backbone": "vit_large_patch16_224",
|
| 16 |
+
"params": "304M",
|
| 17 |
+
"time_minutes": 52.71756718158722
|
| 18 |
+
},
|
| 19 |
+
"model_13.3": {
|
| 20 |
+
"best_val_f1": 0.9585897222684184,
|
| 21 |
+
"backbone": "swin_large_patch4_window7_224",
|
| 22 |
+
"params": "197M",
|
| 23 |
+
"time_minutes": 106.19504813750585
|
| 24 |
+
}
|
| 25 |
+
}
|