|
|
""" |
|
|
운전자 이상행동 감지 모델 |
|
|
|
|
|
- 백본: TorchVision Video Swin-T (Kinetics-400 사전학습) |
|
|
- 입력: [B, 3, 30, 224, 224] (배치, 채널, 프레임, 높이, 너비) |
|
|
- 출력: 5클래스 분류 (정상, 졸음운전, 물건찾기, 휴대폰 사용, 운전자 폭행) |
|
|
""" |
|
|
|
|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
from torchvision.models.video import swin3d_t, Swin3D_T_Weights |
|
|
from typing import Dict, Optional |
|
|
|
|
|
|
|
|
class DriverBehaviorModel(nn.Module): |
|
|
""" |
|
|
운전자 이상행동 감지 모델 |
|
|
|
|
|
Args: |
|
|
num_classes: 출력 클래스 수 (기본값: 5, 전체 버전) |
|
|
pretrained: Kinetics-400 사전학습 가중치 사용 여부 |
|
|
freeze_backbone: 백본 파라미터 동결 여부 (전이학습 시) |
|
|
""" |
|
|
|
|
|
|
|
|
CLASS_NAMES = ["정상", "졸음운전", "물건찾기", "휴대폰 사용", "운전자 폭행"] |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
num_classes: int = 5, |
|
|
pretrained: bool = True, |
|
|
freeze_backbone: bool = False, |
|
|
): |
|
|
super().__init__() |
|
|
|
|
|
self.num_classes = num_classes |
|
|
|
|
|
|
|
|
if pretrained: |
|
|
print("Loading Kinetics-400 pretrained weights...") |
|
|
self.backbone = swin3d_t(weights=Swin3D_T_Weights.KINETICS400_V1) |
|
|
else: |
|
|
self.backbone = swin3d_t(weights=None) |
|
|
|
|
|
|
|
|
|
|
|
in_features = self.backbone.head.in_features |
|
|
self.backbone.head = nn.Sequential( |
|
|
nn.LayerNorm(in_features), |
|
|
nn.Dropout(p=0.3), |
|
|
nn.Linear(in_features, num_classes), |
|
|
) |
|
|
|
|
|
|
|
|
if freeze_backbone: |
|
|
self._freeze_backbone() |
|
|
|
|
|
|
|
|
self._init_head() |
|
|
|
|
|
def _freeze_backbone(self): |
|
|
"""백본 파라미터 동결 (head 제외)""" |
|
|
for name, param in self.backbone.named_parameters(): |
|
|
if 'head' not in name: |
|
|
param.requires_grad = False |
|
|
print("Backbone parameters frozen (head trainable)") |
|
|
|
|
|
def _init_head(self): |
|
|
"""Head 가중치 초기화""" |
|
|
for m in self.backbone.head.modules(): |
|
|
if isinstance(m, nn.Linear): |
|
|
nn.init.trunc_normal_(m.weight, std=0.02) |
|
|
if m.bias is not None: |
|
|
nn.init.zeros_(m.bias) |
|
|
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor: |
|
|
""" |
|
|
순전파 |
|
|
|
|
|
Args: |
|
|
x: [B, C, T, H, W] 형태의 비디오 텐서 |
|
|
- B: 배치 크기 |
|
|
- C: 채널 (3) |
|
|
- T: 프레임 수 (30) |
|
|
- H, W: 높이, 너비 (224, 224) |
|
|
|
|
|
Returns: |
|
|
logits: [B, num_classes] 형태의 로짓 |
|
|
""" |
|
|
return self.backbone(x) |
|
|
|
|
|
def predict(self, x: torch.Tensor) -> Dict: |
|
|
""" |
|
|
추론용 예측 (단일 샘플) |
|
|
|
|
|
Args: |
|
|
x: [1, 3, 30, 224, 224] 형태의 비디오 텐서 |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"class": int (0~4), |
|
|
"confidence": float (0~1), |
|
|
"class_name": str |
|
|
} |
|
|
""" |
|
|
self.eval() |
|
|
with torch.no_grad(): |
|
|
logits = self.forward(x) |
|
|
probs = F.softmax(logits, dim=-1)[0] |
|
|
|
|
|
class_idx = probs.argmax().item() |
|
|
confidence = probs[class_idx].item() |
|
|
|
|
|
return { |
|
|
"class": class_idx, |
|
|
"confidence": confidence, |
|
|
"class_name": self.CLASS_NAMES[class_idx], |
|
|
} |
|
|
|
|
|
def get_all_probs(self, x: torch.Tensor) -> Dict: |
|
|
""" |
|
|
모든 클래스의 확률 반환 |
|
|
|
|
|
Args: |
|
|
x: [1, 3, 30, 224, 224] 형태의 비디오 텐서 |
|
|
|
|
|
Returns: |
|
|
{ |
|
|
"predictions": [{"class": int, "class_name": str, "probability": float}, ...], |
|
|
"top_class": int, |
|
|
"top_confidence": float |
|
|
} |
|
|
""" |
|
|
self.eval() |
|
|
with torch.no_grad(): |
|
|
logits = self.forward(x) |
|
|
probs = F.softmax(logits, dim=-1)[0] |
|
|
|
|
|
predictions = [] |
|
|
for i, prob in enumerate(probs): |
|
|
predictions.append({ |
|
|
"class": i, |
|
|
"class_name": self.CLASS_NAMES[i], |
|
|
"probability": prob.item(), |
|
|
}) |
|
|
|
|
|
|
|
|
predictions.sort(key=lambda x: x["probability"], reverse=True) |
|
|
|
|
|
return { |
|
|
"predictions": predictions, |
|
|
"top_class": predictions[0]["class"], |
|
|
"top_confidence": predictions[0]["probability"], |
|
|
} |
|
|
|
|
|
|
|
|
def create_model( |
|
|
num_classes: int = 3, |
|
|
pretrained: bool = True, |
|
|
freeze_backbone: bool = False, |
|
|
checkpoint_path: Optional[str] = None, |
|
|
) -> DriverBehaviorModel: |
|
|
""" |
|
|
모델 생성 헬퍼 함수 |
|
|
|
|
|
Args: |
|
|
num_classes: 출력 클래스 수 |
|
|
pretrained: 사전학습 가중치 사용 여부 |
|
|
freeze_backbone: 백본 동결 여부 |
|
|
checkpoint_path: 체크포인트 경로 (학습된 가중치 로드) |
|
|
|
|
|
Returns: |
|
|
DriverBehaviorModel 인스턴스 |
|
|
""" |
|
|
model = DriverBehaviorModel( |
|
|
num_classes=num_classes, |
|
|
pretrained=pretrained, |
|
|
freeze_backbone=freeze_backbone, |
|
|
) |
|
|
|
|
|
if checkpoint_path: |
|
|
print(f"Loading checkpoint from {checkpoint_path}...") |
|
|
checkpoint = torch.load(checkpoint_path, map_location="cpu") |
|
|
model.load_state_dict(checkpoint["model"]) |
|
|
print("Checkpoint loaded successfully") |
|
|
|
|
|
return model |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print("=" * 60) |
|
|
print("Model Test (3 classes - Demo)") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
model = DriverBehaviorModel(num_classes=5, pretrained=True) |
|
|
|
|
|
|
|
|
total_params = sum(p.numel() for p in model.parameters()) |
|
|
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
|
|
print(f"Total parameters: {total_params:,}") |
|
|
print(f"Trainable parameters: {trainable_params:,}") |
|
|
|
|
|
|
|
|
dummy_input = torch.randn(2, 3, 30, 224, 224) |
|
|
print(f"\nInput shape: {dummy_input.shape}") |
|
|
|
|
|
|
|
|
model.eval() |
|
|
with torch.no_grad(): |
|
|
output = model(dummy_input) |
|
|
print(f"Output shape: {output.shape}") |
|
|
|
|
|
|
|
|
single_input = torch.randn(1, 3, 30, 224, 224) |
|
|
prediction = model.predict(single_input) |
|
|
print(f"\nPrediction: {prediction}") |
|
|
|
|
|
|
|
|
all_probs = model.get_all_probs(single_input) |
|
|
print(f"\nAll probabilities:") |
|
|
for pred in all_probs["predictions"]: |
|
|
print(f" {pred['class_name']}: {pred['probability']:.4f}") |
|
|
|
|
|
print("\nModel test passed!") |
|
|
|