koreashin commited on
Commit
eb02aa3
·
verified ·
1 Parent(s): 1c4dc0b

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +535 -3
  2. config.json +52 -0
  3. pytorch_model.bin +3 -0
README.md CHANGED
@@ -1,3 +1,535 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - ko
5
+ tags:
6
+ - video-classification
7
+ - driver-behavior
8
+ - video-swin-transformer
9
+ - pytorch
10
+ - safety
11
+ - autonomous-driving
12
+ metrics:
13
+ - accuracy
14
+ - f1
15
+ pipeline_tag: video-classification
16
+ datasets:
17
+ - custom
18
+ ---
19
+
20
+ # 🚗 Driver Abnormal Behavior Detection Model
21
+
22
+ **운전자 이상행동 탐지 모델** - Video Swin Transformer 기반
23
+
24
+ 차량 내 카메라 영상에서 운전자의 이상행동을 실시간으로 탐지하는 딥러닝 모델입니다.
25
+
26
+ ## 📊 Model Performance
27
+
28
+ | Metric | Score |
29
+ |--------|-------|
30
+ | **Accuracy** | 95.51% |
31
+ | **Macro F1** | 0.9436 |
32
+ | **Inference Speed** | ~30 FPS (RTX 3090) |
33
+
34
+ ### Per-Class Performance
35
+
36
+ | Class | Korean | Precision | Recall | F1-Score | Support |
37
+ |-------|--------|-----------|--------|----------|---------|
38
+ | 0 | 정상 (Normal) | 0.93 | 0.92 | 0.92 | 159,224 |
39
+ | 1 | 졸음운전 (Drowsy) | 0.99 | 0.98 | 0.98 | 619,450 |
40
+ | 2 | 물건찾기 (Searching) | 0.90 | 0.94 | 0.92 | 261,435 |
41
+ | 3 | 휴대폰 사용 (Phone) | 0.91 | 0.88 | 0.90 | 150,981 |
42
+ | 4 | 운전자 폭행 (Assault) | 1.00 | 1.00 | 1.00 | 179,972 |
43
+
44
+ ---
45
+
46
+ ## 🛠️ Installation
47
+
48
+ ```bash
49
+ # PyTorch 2.0+ 필요
50
+ pip install torch torchvision
51
+
52
+ # 추가 dependencies
53
+ pip install opencv-python numpy
54
+
55
+ # (선택) HuggingFace에서 다운로드
56
+ pip install huggingface_hub
57
+ ```
58
+
59
+ ---
60
+
61
+ ## 🚀 Quick Start
62
+
63
+ ### 1. 모델 다운로드 및 로드
64
+
65
+ ```python
66
+ import torch
67
+ from torchvision.models.video import swin3d_t
68
+
69
+ # ===== 방법 1: 로컬 파일에서 로드 =====
70
+ model = swin3d_t(weights=None)
71
+ model.head = torch.nn.Linear(model.head.in_features, 5) # 5 classes
72
+
73
+ state_dict = torch.load("pytorch_model.bin", map_location="cpu", weights_only=True)
74
+ model.load_state_dict(state_dict)
75
+ model.eval()
76
+
77
+ # ===== 방법 2: HuggingFace Hub에서 로드 =====
78
+ from huggingface_hub import hf_hub_download
79
+
80
+ model_path = hf_hub_download(
81
+ repo_id="YOUR_USERNAME/driver-behavior-swin-t",
82
+ filename="pytorch_model.bin"
83
+ )
84
+ state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
85
+
86
+ model = swin3d_t(weights=None)
87
+ model.head = torch.nn.Linear(model.head.in_features, 5)
88
+ model.load_state_dict(state_dict)
89
+ model.eval()
90
+ ```
91
+
92
+ ### 2. 단일 비디오 추론
93
+
94
+ ```python
95
+ import cv2
96
+ import torch
97
+ import numpy as np
98
+
99
+ # 클래스 정의
100
+ CLASS_NAMES = ["정상", "졸음운전", "물건찾기", "휴대폰 사용", "운전자 폭행"]
101
+ CLASS_NAMES_EN = ["Normal", "Drowsy Driving", "Searching Objects", "Phone Usage", "Driver Assault"]
102
+
103
+ def load_video_frames(video_path, num_frames=30, size=(224, 224)):
104
+ """비디오에서 프레임 추출 및 전처리"""
105
+ cap = cv2.VideoCapture(video_path)
106
+ frames = []
107
+
108
+ while len(frames) < num_frames:
109
+ ret, frame = cap.read()
110
+ if not ret:
111
+ break
112
+ # BGR -> RGB
113
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
114
+ # Resize
115
+ frame = cv2.resize(frame, size)
116
+ frames.append(frame)
117
+
118
+ cap.release()
119
+
120
+ # 프레임 부족 시 마지막 프레임 복제
121
+ while len(frames) < num_frames:
122
+ frames.append(frames[-1] if frames else np.zeros((*size, 3), dtype=np.uint8))
123
+
124
+ # [T, H, W, C] -> [C, T, H, W]
125
+ frames = np.array(frames[:num_frames], dtype=np.float32)
126
+ frames = frames.transpose(3, 0, 1, 2) # [C, T, H, W]
127
+
128
+ # Normalize to [0, 1]
129
+ frames = frames / 255.0
130
+
131
+ # ImageNet normalization
132
+ mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1, 1)
133
+ std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1, 1)
134
+ frames = (frames - mean) / std
135
+
136
+ return torch.FloatTensor(frames)
137
+
138
+ def predict(model, video_path, device="cuda"):
139
+ """단일 비디오 추론"""
140
+ model = model.to(device)
141
+ model.eval()
142
+
143
+ # 프레임 로드
144
+ frames = load_video_frames(video_path)
145
+ frames = frames.unsqueeze(0).to(device) # [1, C, T, H, W]
146
+
147
+ # 추론
148
+ with torch.no_grad():
149
+ outputs = model(frames)
150
+ probs = torch.softmax(outputs, dim=1)
151
+ pred_idx = torch.argmax(probs, dim=1).item()
152
+ confidence = probs[0, pred_idx].item()
153
+
154
+ return {
155
+ "class_id": pred_idx,
156
+ "class_name_ko": CLASS_NAMES[pred_idx],
157
+ "class_name_en": CLASS_NAMES_EN[pred_idx],
158
+ "confidence": confidence,
159
+ "all_probabilities": {
160
+ CLASS_NAMES[i]: probs[0, i].item()
161
+ for i in range(len(CLASS_NAMES))
162
+ }
163
+ }
164
+
165
+ # 사용 예시
166
+ result = predict(model, "test_video.mp4")
167
+ print(f"예측: {result['class_name_ko']} ({result['confidence']:.2%})")
168
+ ```
169
+
170
+ ---
171
+
172
+ ## 📹 Real-time Inference (실시간 추론)
173
+
174
+ ```python
175
+ import cv2
176
+ import torch
177
+ import numpy as np
178
+ from collections import deque
179
+
180
+ class RealtimeDriverBehaviorDetector:
181
+ """실시간 운전자 이상행동 탐지기"""
182
+
183
+ CLASS_NAMES = ["정상", "졸음운전", "물건찾기", "휴대폰 사용", "운전자 폭행"]
184
+
185
+ def __init__(self, model_path, device="cuda", window_size=30, stride=15):
186
+ """
187
+ Args:
188
+ model_path: pytorch_model.bin 경로
189
+ device: 'cuda' 또는 'cpu'
190
+ window_size: 분석할 프레임 수 (기본 30 = 1초 @30fps)
191
+ stride: 슬라이딩 윈도우 간격 (기본 15 = 0.5초)
192
+ """
193
+ self.device = device
194
+ self.window_size = window_size
195
+ self.stride = stride
196
+
197
+ # 모델 로드
198
+ from torchvision.models.video import swin3d_t
199
+ self.model = swin3d_t(weights=None)
200
+ self.model.head = torch.nn.Linear(self.model.head.in_features, 5)
201
+
202
+ state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
203
+ self.model.load_state_dict(state_dict)
204
+ self.model.to(device)
205
+ self.model.eval()
206
+
207
+ # 프레임 버퍼
208
+ self.frame_buffer = deque(maxlen=window_size)
209
+ self.frame_count = 0
210
+
211
+ # Normalization 파라미터
212
+ self.mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1, 1)
213
+ self.std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1, 1)
214
+
215
+ def preprocess_frame(self, frame):
216
+ """단일 프레임 전처리"""
217
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
218
+ frame = cv2.resize(frame, (224, 224))
219
+ return frame
220
+
221
+ def predict(self):
222
+ """현재 버퍼의 프레임으로 추론"""
223
+ if len(self.frame_buffer) < self.window_size:
224
+ return None
225
+
226
+ # [T, H, W, C] -> [C, T, H, W]
227
+ frames = np.array(list(self.frame_buffer), dtype=np.float32)
228
+ frames = frames.transpose(3, 0, 1, 2) / 255.0
229
+ frames = (frames - self.mean) / self.std
230
+
231
+ # 추론
232
+ with torch.no_grad():
233
+ inputs = torch.FloatTensor(frames).unsqueeze(0).to(self.device)
234
+ outputs = self.model(inputs)
235
+ probs = torch.softmax(outputs, dim=1)
236
+ pred_idx = torch.argmax(probs, dim=1).item()
237
+ confidence = probs[0, pred_idx].item()
238
+
239
+ return {
240
+ "class_id": pred_idx,
241
+ "class_name": self.CLASS_NAMES[pred_idx],
242
+ "confidence": confidence,
243
+ "is_abnormal": pred_idx != 0, # 0 = 정상
244
+ "probabilities": probs[0].cpu().numpy()
245
+ }
246
+
247
+ def process_frame(self, frame):
248
+ """프레임 처리 (stride마다 추론)"""
249
+ processed = self.preprocess_frame(frame)
250
+ self.frame_buffer.append(processed)
251
+ self.frame_count += 1
252
+
253
+ # stride마다 추론
254
+ if self.frame_count % self.stride == 0:
255
+ return self.predict()
256
+ return None
257
+
258
+ def run_on_video(self, video_source=0, show_display=True):
259
+ """
260
+ 비디오 소스에서 실시간 추론
261
+
262
+ Args:
263
+ video_source: 웹캠(0) 또는 비디오 파일 경로
264
+ show_display: 화면 출력 여부
265
+ """
266
+ cap = cv2.VideoCapture(video_source)
267
+
268
+ # 색상 정의 (BGR)
269
+ colors = {
270
+ "정상": (0, 255, 0), # 초록
271
+ "졸음운전": (0, 165, 255), # 주황
272
+ "물건찾기": (0, 255, 255), # 노랑
273
+ "휴대폰 사용": (0, 0, 255), # 빨강
274
+ "운전자 폭행": (255, 0, 255) # 보라
275
+ }
276
+
277
+ current_result = None
278
+
279
+ while True:
280
+ ret, frame = cap.read()
281
+ if not ret:
282
+ break
283
+
284
+ # 추론
285
+ result = self.process_frame(frame)
286
+ if result:
287
+ current_result = result
288
+
289
+ # 화면 출력
290
+ if show_display and current_result:
291
+ label = current_result["class_name"]
292
+ conf = current_result["confidence"]
293
+ color = colors.get(label, (255, 255, 255))
294
+
295
+ # 상태 표시
296
+ text = f"{label}: {conf:.1%}"
297
+ cv2.putText(frame, text, (10, 40),
298
+ cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)
299
+
300
+ # 경고 (이상행동 탐지 시)
301
+ if current_result["is_abnormal"]:
302
+ cv2.putText(frame, "WARNING!", (10, 80),
303
+ cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)
304
+
305
+ cv2.imshow("Driver Behavior Detection", frame)
306
+
307
+ if cv2.waitKey(1) & 0xFF == ord('q'):
308
+ break
309
+
310
+ cap.release()
311
+ cv2.destroyAllWindows()
312
+
313
+
314
+ # ===== 사용 예시 =====
315
+
316
+ # 1. 웹캠 실시간 추론
317
+ detector = RealtimeDriverBehaviorDetector("pytorch_model.bin", device="cuda")
318
+ detector.run_on_video(video_source=0) # 웹캠
319
+
320
+ # 2. 비디오 파일 추론
321
+ detector.run_on_video(video_source="test_video.mp4")
322
+ ```
323
+
324
+ ---
325
+
326
+ ## 🔧 Batch Inference (배치 추론)
327
+
328
+ ```python
329
+ import torch
330
+ from pathlib import Path
331
+ from torch.utils.data import Dataset, DataLoader
332
+
333
+ class VideoDataset(Dataset):
334
+ """비디오 파일 배치 처리용 Dataset"""
335
+
336
+ def __init__(self, video_paths, num_frames=30):
337
+ self.video_paths = video_paths
338
+ self.num_frames = num_frames
339
+ self.mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1, 1)
340
+ self.std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1, 1)
341
+
342
+ def __len__(self):
343
+ return len(self.video_paths)
344
+
345
+ def __getitem__(self, idx):
346
+ video_path = self.video_paths[idx]
347
+
348
+ cap = cv2.VideoCapture(str(video_path))
349
+ frames = []
350
+
351
+ while len(frames) < self.num_frames:
352
+ ret, frame = cap.read()
353
+ if not ret:
354
+ break
355
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
356
+ frame = cv2.resize(frame, (224, 224))
357
+ frames.append(frame)
358
+
359
+ cap.release()
360
+
361
+ while len(frames) < self.num_frames:
362
+ frames.append(frames[-1] if frames else np.zeros((224, 224, 3), dtype=np.uint8))
363
+
364
+ frames = np.array(frames[:self.num_frames], dtype=np.float32)
365
+ frames = frames.transpose(3, 0, 1, 2) / 255.0
366
+ frames = (frames - self.mean) / self.std
367
+
368
+ return torch.FloatTensor(frames), str(video_path)
369
+
370
+
371
+ def batch_inference(model, video_folder, batch_size=8, device="cuda"):
372
+ """
373
+ 폴더 내 모든 비디오 배치 추론
374
+
375
+ Args:
376
+ model: 로드된 모델
377
+ video_folder: 비디오 폴더 경로
378
+ batch_size: 배치 크기
379
+ device: 'cuda' 또는 'cpu'
380
+
381
+ Returns:
382
+ List of (video_path, prediction) tuples
383
+ """
384
+ CLASS_NAMES = ["정상", "졸음운전", "물건찾기", "휴대폰 사용", "운전자 폭행"]
385
+
386
+ video_folder = Path(video_folder)
387
+ video_paths = list(video_folder.glob("*.mp4")) + list(video_folder.glob("*.avi"))
388
+
389
+ dataset = VideoDataset(video_paths)
390
+ dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=4)
391
+
392
+ model = model.to(device)
393
+ model.eval()
394
+
395
+ results = []
396
+
397
+ with torch.no_grad():
398
+ for frames, paths in dataloader:
399
+ frames = frames.to(device)
400
+ outputs = model(frames)
401
+ probs = torch.softmax(outputs, dim=1)
402
+ preds = torch.argmax(probs, dim=1)
403
+
404
+ for path, pred_idx, prob in zip(paths, preds, probs):
405
+ results.append({
406
+ "video_path": path,
407
+ "class_id": pred_idx.item(),
408
+ "class_name": CLASS_NAMES[pred_idx.item()],
409
+ "confidence": prob[pred_idx].item()
410
+ })
411
+
412
+ return results
413
+
414
+ # 사용 예시
415
+ results = batch_inference(model, "./videos/", batch_size=16)
416
+ for r in results:
417
+ print(f"{r['video_path']}: {r['class_name']} ({r['confidence']:.2%})")
418
+ ```
419
+
420
+ ---
421
+
422
+ ## 📐 Input/Output Specification
423
+
424
+ ### Input Format
425
+
426
+ | Parameter | Value |
427
+ |-----------|-------|
428
+ | **Shape** | `[batch, 3, 30, 224, 224]` |
429
+ | **Format** | `[B, C, T, H, W]` (Batch, Channel, Time, Height, Width) |
430
+ | **Channels** | RGB (not BGR) |
431
+ | **Normalization** | ImageNet (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
432
+ | **Value Range** | After normalization: approximately [-2.5, 2.5] |
433
+
434
+ ### Output Format
435
+
436
+ | Parameter | Value |
437
+ |-----------|-------|
438
+ | **Shape** | `[batch, 5]` |
439
+ | **Format** | Raw logits (use softmax for probabilities) |
440
+ | **Classes** | 0=정상, 1=졸음운전, 2=물건찾기, 3=휴대폰사용, 4=운전자폭행 |
441
+
442
+ ---
443
+
444
+ ## ⚙️ Model Architecture
445
+
446
+ ```
447
+ VideoSwinTransformer (swin3d_t)
448
+ ├── patch_embed: PatchEmbed3d
449
+ │ └── proj: Conv3d(3, 96, kernel_size=(2,4,4), stride=(2,4,4))
450
+ ├── layers: Sequential
451
+ │ ├── BasicLayer (depth=2, heads=3, dim=96)
452
+ │ ├── BasicLayer (depth=2, heads=6, dim=192)
453
+ │ ├── BasicLayer (depth=6, heads=12, dim=384)
454
+ │ └── BasicLayer (depth=2, heads=24, dim=768)
455
+ ├── norm: LayerNorm(768)
456
+ ├── avgpool: AdaptiveAvgPool3d(1)
457
+ └── head: Linear(768, 5) # Modified for 5 classes
458
+
459
+ Total Parameters: 27,855,851
460
+ Trainable Parameters: 27,855,851
461
+ ```
462
+
463
+ ---
464
+
465
+ ## 🏋️ Training Details
466
+
467
+ | Parameter | Value |
468
+ |-----------|-------|
469
+ | **Base Model** | swin3d_t (Kinetics-400 pretrained) |
470
+ | **Framework** | PyTorch 2.0+ |
471
+ | **GPUs** | 2x NVIDIA A6000 (48GB each) |
472
+ | **Training Method** | DistributedDataParallel (DDP) |
473
+ | **Batch Size** | 128 effective (16 per GPU × 2 GPUs × 4 accumulation) |
474
+ | **Optimizer** | AdamW (lr=1e-3, weight_decay=1e-4) |
475
+ | **Scheduler** | OneCycleLR (pct_start=0.2, anneal=cosine) |
476
+ | **Mixed Precision** | FP16 (torch.amp) |
477
+ | **Epochs** | 1 (of 5 total) |
478
+
479
+ ---
480
+
481
+ ## 📁 Dataset Information
482
+
483
+ | Property | Value |
484
+ |----------|-------|
485
+ | **Name** | Korean Driver Behavior Dataset |
486
+ | **Total Videos** | 243,979 |
487
+ | **Total Samples** | 1,371,062 (sliding window) |
488
+ | **Window Size** | 30 frames |
489
+ | **Stride** | 15 frames |
490
+ | **Resolution** | Various (resized to 224×224) |
491
+ | **FPS** | 30 |
492
+
493
+ ### Class Distribution
494
+
495
+ | Class | Samples | Percentage |
496
+ |-------|---------|------------|
497
+ | 정상 | 159,224 | 11.6% |
498
+ | 졸음운전 | 619,450 | 45.2% |
499
+ | 물건찾기 | 261,435 | 19.1% |
500
+ | 휴대폰 사용 | 150,981 | 11.0% |
501
+ | 운전자 폭행 | 179,972 | 13.1% |
502
+
503
+ ---
504
+
505
+ ## ⚠️ Limitations & Considerations
506
+
507
+ 1. **카메라 위치**: 운전석 정면 또는 측면 카메라에 최적화됨
508
+ 2. **조명 조건**: 야간/터널 등 저조도 환경에서 성능 저하 가능
509
+ 3. **가림 현상**: 선글라스, 마스크 착용 시 정확도 감소 가능
510
+ 4. **실시간 요구사항**: GPU 필요 (CPU에서는 느림)
511
+
512
+ ---
513
+
514
+ ## 📜 License
515
+
516
+ Apache 2.0
517
+
518
+ ---
519
+
520
+ ## 🔗 Citation
521
+
522
+ ```bibtex
523
+ @misc{driver-behavior-detection-2025,
524
+ title={Driver Abnormal Behavior Detection using Video Swin Transformer},
525
+ author={C-Team},
526
+ year={2025},
527
+ howpublished={\url{https://huggingface.co/YOUR_USERNAME/driver-behavior-swin-t}}
528
+ }
529
+ ```
530
+
531
+ ---
532
+
533
+ ## 📞 Contact
534
+
535
+ Issues and questions: [GitHub Issues](https://github.com/YOUR_USERNAME/driver-behavior-detection/issues)
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "VideoSwinTransformer"
4
+ ],
5
+ "model_type": "video-swin-transformer",
6
+ "backbone": "swin3d_t",
7
+ "pretrained_source": "kinetics400",
8
+ "num_classes": 5,
9
+ "class_names": [
10
+ "정상",
11
+ "졸음운전",
12
+ "물건찾기",
13
+ "휴대폰 사용",
14
+ "운전자 폭행"
15
+ ],
16
+ "input_size": {
17
+ "frames": 30,
18
+ "height": 224,
19
+ "width": 224,
20
+ "channels": 3
21
+ },
22
+ "input_format": "CTHW",
23
+ "training": {
24
+ "epochs_trained": 1,
25
+ "total_epochs": 5,
26
+ "batch_size": 16,
27
+ "effective_batch_size": 128,
28
+ "learning_rate": 0.001,
29
+ "optimizer": "AdamW",
30
+ "scheduler": "OneCycleLR",
31
+ "mixed_precision": true,
32
+ "gradient_accumulation_steps": 4
33
+ },
34
+ "metrics": {
35
+ "accuracy": 0.9551,
36
+ "macro_f1": 0.9436,
37
+ "per_class_f1": {
38
+ "정상": 0.92,
39
+ "졸음운전": 0.98,
40
+ "물건찾기": 0.92,
41
+ "휴대폰 사용": 0.9,
42
+ "운전자 폭행": 1.0
43
+ }
44
+ },
45
+ "dataset": {
46
+ "name": "Korean Driver Behavior Dataset",
47
+ "total_samples": 1371062,
48
+ "num_videos": 243979,
49
+ "sliding_window": 30,
50
+ "stride": 15
51
+ }
52
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7eb66a00e43a79a4db83cad13a36dc97b87d500a1a6f0bcec72779d22fdaf9
3
+ size 126244047