ash12321's picture
Upload complete model package with all files
6ecece2 verified
{
"model_name": "EnhancedTimeSformer",
"architecture": "TimeSformer with Optical Flow",
"task": "Video Anomaly Detection / Deepfake Detection",
"approach": "One-Class Classification (trained on real data only)",
"model_config": {
"img_size": 224,
"patch_size": 16,
"in_channels": 3,
"num_frames": 16,
"embedding_dim": 768,
"depth": 12,
"num_heads": 12,
"mlp_ratio": 4.0,
"dropout": 0.1
},
"training": {
"dataset": "WebVid-10M",
"training_type": "Self-supervised reconstruction",
"epochs": 15,
"final_epoch": 14,
"final_val_loss": 0.1821
},
"detection": {
"method": "Reconstruction error (MSE)",
"optimal_threshold": 0.3137,
"threshold_info": "Determined via grid search on ultra-extreme synthetic fakes"
},
"performance": {
"real_videos_mse_mean": 0.1445,
"real_videos_mse_std": 0.0846,
"fake_videos_mse_mean": 0.5559,
"fake_videos_mse_std": 0.0949,
"separation_ratio": 3.85,
"accuracy_on_extreme_fakes": 1.0,
"precision": 1.0,
"recall": 1.0,
"f1_score": 1.0,
"false_positive_rate": 0.0
},
"important_notes": [
"Model trained ONLY on real videos (one-class learning)",
"100% accuracy achieved on ultra-extreme synthetic deepfakes",
"Real deepfakes are more subtle and may have lower detection rates",
"Threshold of 0.3137 provides perfect separation for extreme cases",
"Model shows better generalization than supervised approaches",
"For production use, consider ensemble with other detectors"
],
"usage": {
"input_format": "Video tensor [B, C, T, H, W]",
"input_shape": [
1,
3,
16,
224,
224
],
"input_range": "[-1, 1] (normalized)",
"output": "Reconstructed middle frame + optical flow",
"detection_metric": "MSE between prediction and target frame"
},
"dependencies": [
"torch>=2.0.0",
"einops>=0.6.0",
"opencv-python>=4.8.0",
"numpy>=1.24.0"
],
"created_date": "2025-12-22T23:15:27.803422",
"checkpoint_name": "loss=0.1821.ckpt"
}