NAGA commited on
Commit
6748ade
·
1 Parent(s): 1298564

Upload 182 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +4 -0
  3. .gitignore +5 -0
  4. Readme.md +85 -0
  5. __pycache__/yolo_v7.cpython-310.pyc +0 -0
  6. __pycache__/yolo_v7.cpython-39.pyc +0 -0
  7. __pycache__/yolor.cpython-39.pyc +0 -0
  8. byte_track/__init__.py +3 -0
  9. byte_track/__pycache__/__init__.cpython-310.pyc +0 -0
  10. byte_track/__pycache__/__init__.cpython-39.pyc +0 -0
  11. byte_track/__pycache__/bytetracker.cpython-310.pyc +0 -0
  12. byte_track/__pycache__/bytetracker.cpython-39.pyc +0 -0
  13. byte_track/bytetracker.py +106 -0
  14. byte_track/tracker/__init__.py +4 -0
  15. byte_track/tracker/__pycache__/__init__.cpython-310.pyc +0 -0
  16. byte_track/tracker/__pycache__/__init__.cpython-39.pyc +0 -0
  17. byte_track/tracker/__pycache__/basetrack.cpython-310.pyc +0 -0
  18. byte_track/tracker/__pycache__/basetrack.cpython-39.pyc +0 -0
  19. byte_track/tracker/__pycache__/byte_tracker.cpython-310.pyc +0 -0
  20. byte_track/tracker/__pycache__/byte_tracker.cpython-39.pyc +0 -0
  21. byte_track/tracker/__pycache__/kalman_filter.cpython-310.pyc +0 -0
  22. byte_track/tracker/__pycache__/kalman_filter.cpython-39.pyc +0 -0
  23. byte_track/tracker/__pycache__/matching.cpython-310.pyc +0 -0
  24. byte_track/tracker/__pycache__/matching.cpython-39.pyc +0 -0
  25. byte_track/tracker/basetrack.py +52 -0
  26. byte_track/tracker/byte_tracker.py +326 -0
  27. byte_track/tracker/kalman_filter.py +270 -0
  28. byte_track/tracker/matching.py +178 -0
  29. cfg/yolor_csp.cfg +1376 -0
  30. cfg/yolor_csp_x.cfg +1576 -0
  31. cfg/yolor_p6.cfg +1760 -0
  32. cfg/yolor_w6.cfg +1760 -0
  33. cfg/yolov4_csp.cfg +1334 -0
  34. cfg/yolov4_csp_x.cfg +1534 -0
  35. cfg/yolov4_p6.cfg +2260 -0
  36. cfg/yolov4_p7.cfg +2714 -0
  37. data/coco.names +80 -0
  38. data/coco.yaml +18 -0
  39. data/hyp.finetune.1280.yaml +28 -0
  40. data/hyp.scratch.1280.yaml +28 -0
  41. data/hyp.scratch.640.yaml +28 -0
  42. deep_sort_pytorch/.DS_Store +0 -0
  43. deep_sort_pytorch/.gitignore +13 -0
  44. deep_sort_pytorch/LICENSE +21 -0
  45. deep_sort_pytorch/README.md +137 -0
  46. deep_sort_pytorch/configs/deep_sort.yaml +10 -0
  47. deep_sort_pytorch/deep_sort/.DS_Store +0 -0
  48. deep_sort_pytorch/deep_sort/README.md +3 -0
  49. deep_sort_pytorch/deep_sort/__init__.py +21 -0
  50. deep_sort_pytorch/deep_sort/__pycache__/__init__.cpython-310.pyc +0 -0
.DS_Store ADDED
Binary file (10.2 kB). View file
 
.gitattributes CHANGED
@@ -32,3 +32,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7 filter=lfs diff=lfs merge=lfs -text
36
+ inference/output/results.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ results.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ test.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .env/
2
+ **__pycache__**
3
+ *.onnx
4
+ *.pt
5
+ yolov7/weights/
Readme.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Yolov7 with ByteTrack
2
+
3
+ 1. Clone repo.
4
+
5
+ ```
6
+ git clone https://github.com/axcelerateai/yolov7-bytetrack-streamlit.git
7
+ cd yolov7-bytetrack-streamlit
8
+ ```
9
+
10
+ 2. Install requirements.
11
+
12
+ ### Pip
13
+
14
+ ```
15
+ python3 -m venv .env
16
+ source .env/bin/activate
17
+ ```
18
+ ```
19
+ pip install Cython numpy
20
+ ```
21
+ ```
22
+ pip install -r requirements.txt
23
+ ```
24
+
25
+ - [Note]: `cython_bbox` have no windows distribution on pypi. If you're a windows user then run following command to install `cython_bbox` from source.
26
+
27
+ ```
28
+ # for windows
29
+ pip install -e git+https://github.com/samson-wang/cython_bbox.git#egg=cython-bbox
30
+
31
+ # for linux
32
+ pip install cython-bbox
33
+
34
+ ```
35
+
36
+ ### conda
37
+
38
+ ```
39
+ conda env create -f environment.yml
40
+ ```
41
+
42
+ ```
43
+ conda activate yolov7_bytetrack
44
+ ```
45
+
46
+ - [Note]: `cython_bbox` have no windows distribution on pypi. If you're a windows user then run following command to install `cython_bbox` from source.
47
+
48
+ ```
49
+ # for windows
50
+ pip install -e git+https://github.com/samson-wang/cython_bbox.git#egg=cython-bbox
51
+
52
+ # for linux
53
+ pip install cython-bbox
54
+
55
+ ```
56
+
57
+
58
+ 3. Download weights.
59
+
60
+ ```
61
+ python download_weights.py
62
+ ```
63
+
64
+ 4. Run stremlit server
65
+
66
+ ```
67
+ streamlit run yolov7-tiny-demo.py --server.port [LPORT]
68
+ ```
69
+ - `LPORT` = Local port of system
70
+
71
+ ### Test yolov7-tiny
72
+
73
+ - To run Yolov7-Tiny
74
+ ```
75
+ streamlit run yolov7-tiny-demo.py --server.port 2085
76
+ ```
77
+
78
+ ### Test yolov7
79
+ ```
80
+ streamlit run yolov7-demo.py --server.port 2085
81
+ ```
82
+ ### Test yolor
83
+ ```
84
+ streamlit run yolor-demo.py --server.port 2085
85
+ ```
__pycache__/yolo_v7.cpython-310.pyc ADDED
Binary file (13.8 kB). View file
 
__pycache__/yolo_v7.cpython-39.pyc ADDED
Binary file (13.7 kB). View file
 
__pycache__/yolor.cpython-39.pyc ADDED
Binary file (12.4 kB). View file
 
byte_track/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.append(os.path.dirname(__file__))
byte_track/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (259 Bytes). View file
 
byte_track/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (247 Bytes). View file
 
byte_track/__pycache__/bytetracker.cpython-310.pyc ADDED
Binary file (2.76 kB). View file
 
byte_track/__pycache__/bytetracker.cpython-39.pyc ADDED
Binary file (2.74 kB). View file
 
byte_track/bytetracker.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .tracker.byte_tracker import BYTETracker
2
+ import cv2
3
+ import numpy as np
4
+
5
+ class ByteTrack(object):
6
+ def __init__(self, detector, min_box_area=10):
7
+ self.min_box_area = min_box_area
8
+
9
+ self.rgb_means = (0.485, 0.456, 0.406)
10
+ self.std = (0.229, 0.224, 0.225)
11
+
12
+ self.detector = detector
13
+ self.input_shape = tuple(detector.model.get_inputs()[0].shape[2:])
14
+ self.tracker = BYTETracker(frame_rate=30)
15
+
16
+ def inference(self, image, conf_thresh=0.25, classes=None):
17
+
18
+ dets, image_info = self.detector.detect(image, conf_thres=conf_thresh, input_shape=self.input_shape, classes=classes)
19
+
20
+ class_ids=[]
21
+ ids=[]
22
+ bboxes=[]
23
+ scores=[]
24
+
25
+ if isinstance(dets, np.ndarray) and len(dets) > 0:
26
+ class_ids = dets[:, -1].tolist()
27
+ bboxes, ids, scores = self._tracker_update(
28
+ dets,
29
+ image_info,
30
+ )
31
+ # image = self.draw_tracking_info(
32
+ # image,
33
+ # bboxes,
34
+ # ids,
35
+ # scores,
36
+ # )
37
+
38
+ # return image, len(bboxes), class_ids
39
+ return bboxes, ids, scores, class_ids
40
+
41
+ def get_id_color(self, index):
42
+ temp_index = abs(int(index)) * 3
43
+ color = ((37 * temp_index) % 255, (17 * temp_index) % 255,
44
+ (29 * temp_index) % 255)
45
+ return color
46
+
47
+ def draw_tracking_info(
48
+ self,
49
+ image,
50
+ tlwhs,
51
+ ids,
52
+ scores,
53
+ frame_id=0,
54
+ elapsed_time=0.,
55
+ ):
56
+ text_scale = 1.5
57
+ text_thickness = 2
58
+ line_thickness = 2
59
+
60
+ # text = 'frame: %d ' % (frame_id)
61
+ # text += 'elapsed time: %.0fms ' % (elapsed_time * 1000)
62
+ # text += 'num: %d' % (len(tlwhs))
63
+ # cv2.putText(
64
+ # image,
65
+ # text,
66
+ # (0, int(15 * text_scale)),
67
+ # cv2.FONT_HERSHEY_PLAIN,
68
+ # 2,
69
+ # (0, 255, 0),
70
+ # thickness=text_thickness,
71
+ # )
72
+
73
+ for index, tlwh in enumerate(tlwhs):
74
+ x1, y1 = int(tlwh[0]), int(tlwh[1])
75
+ x2, y2 = x1 + int(tlwh[2]), y1 + int(tlwh[3])
76
+ color = self.get_id_color(ids[index])
77
+ cv2.rectangle(image, (x1, y1), (x2, y2), color, line_thickness)
78
+
79
+ text = str(ids[index])
80
+ cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN,
81
+ text_scale, (0, 0, 0), text_thickness + 3)
82
+ cv2.putText(image, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN,
83
+ text_scale, (255, 255, 255), text_thickness)
84
+ return image
85
+
86
+ def _tracker_update(self, dets, image_info):
87
+ online_targets = []
88
+ if dets is not None:
89
+ online_targets = self.tracker.update(
90
+ dets[:, :-1],
91
+ [image_info['height'], image_info['width']],
92
+ [image_info['height'], image_info['width']],
93
+ )
94
+ online_tlwhs = []
95
+ online_ids = []
96
+ online_scores = []
97
+ for online_target in online_targets:
98
+ tlwh = online_target.tlwh
99
+ track_id = online_target.track_id
100
+ vertical = tlwh[2] / tlwh[3] > 1.6
101
+ if tlwh[2] * tlwh[3] > self.min_box_area and not vertical:
102
+ online_tlwhs.append(tlwh)
103
+ online_ids.append(track_id)
104
+ online_scores.append(online_target.score)
105
+
106
+ return online_tlwhs, online_ids, online_scores
byte_track/tracker/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.append(os.path.dirname(__file__))
4
+
byte_track/tracker/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (267 Bytes). View file
 
byte_track/tracker/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (255 Bytes). View file
 
byte_track/tracker/__pycache__/basetrack.cpython-310.pyc ADDED
Binary file (1.84 kB). View file
 
byte_track/tracker/__pycache__/basetrack.cpython-39.pyc ADDED
Binary file (1.87 kB). View file
 
byte_track/tracker/__pycache__/byte_tracker.cpython-310.pyc ADDED
Binary file (9.43 kB). View file
 
byte_track/tracker/__pycache__/byte_tracker.cpython-39.pyc ADDED
Binary file (9.43 kB). View file
 
byte_track/tracker/__pycache__/kalman_filter.cpython-310.pyc ADDED
Binary file (8.11 kB). View file
 
byte_track/tracker/__pycache__/kalman_filter.cpython-39.pyc ADDED
Binary file (8.09 kB). View file
 
byte_track/tracker/__pycache__/matching.cpython-310.pyc ADDED
Binary file (6.12 kB). View file
 
byte_track/tracker/__pycache__/matching.cpython-39.pyc ADDED
Binary file (6.36 kB). View file
 
byte_track/tracker/basetrack.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from collections import OrderedDict
3
+
4
+
5
+ class TrackState(object):
6
+ New = 0
7
+ Tracked = 1
8
+ Lost = 2
9
+ Removed = 3
10
+
11
+
12
+ class BaseTrack(object):
13
+ _count = 0
14
+
15
+ track_id = 0
16
+ is_activated = False
17
+ state = TrackState.New
18
+
19
+ history = OrderedDict()
20
+ features = []
21
+ curr_feature = None
22
+ score = 0
23
+ start_frame = 0
24
+ frame_id = 0
25
+ time_since_update = 0
26
+
27
+ # multi-camera
28
+ location = (np.inf, np.inf)
29
+
30
+ @property
31
+ def end_frame(self):
32
+ return self.frame_id
33
+
34
+ @staticmethod
35
+ def next_id():
36
+ BaseTrack._count += 1
37
+ return BaseTrack._count
38
+
39
+ def activate(self, *args):
40
+ raise NotImplementedError
41
+
42
+ def predict(self):
43
+ raise NotImplementedError
44
+
45
+ def update(self, *args, **kwargs):
46
+ raise NotImplementedError
47
+
48
+ def mark_lost(self):
49
+ self.state = TrackState.Lost
50
+
51
+ def mark_removed(self):
52
+ self.state = TrackState.Removed
byte_track/tracker/byte_tracker.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from .kalman_filter import KalmanFilter
3
+ import matching
4
+ from .basetrack import BaseTrack, TrackState
5
+
6
+ class STrack(BaseTrack):
7
+ shared_kalman = KalmanFilter()
8
+ def __init__(self, tlwh, score):
9
+
10
+ # wait activate
11
+ self._tlwh = np.asarray(tlwh, dtype=np.float)
12
+ self.kalman_filter = None
13
+ self.mean, self.covariance = None, None
14
+ self.is_activated = False
15
+
16
+ self.score = score
17
+ self.tracklet_len = 0
18
+
19
+ def predict(self):
20
+ mean_state = self.mean.copy()
21
+ if self.state != TrackState.Tracked:
22
+ mean_state[7] = 0
23
+ self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
24
+
25
+ @staticmethod
26
+ def multi_predict(stracks):
27
+ if len(stracks) > 0:
28
+ multi_mean = np.asarray([st.mean.copy() for st in stracks])
29
+ multi_covariance = np.asarray([st.covariance for st in stracks])
30
+ for i, st in enumerate(stracks):
31
+ if st.state != TrackState.Tracked:
32
+ multi_mean[i][7] = 0
33
+ multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
34
+ for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
35
+ stracks[i].mean = mean
36
+ stracks[i].covariance = cov
37
+
38
+ def activate(self, kalman_filter, frame_id):
39
+ """Start a new tracklet"""
40
+ self.kalman_filter = kalman_filter
41
+ self.track_id = self.next_id()
42
+ self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
43
+
44
+ self.tracklet_len = 0
45
+ self.state = TrackState.Tracked
46
+ if frame_id == 1:
47
+ self.is_activated = True
48
+ # self.is_activated = True
49
+ self.frame_id = frame_id
50
+ self.start_frame = frame_id
51
+
52
+ def re_activate(self, new_track, frame_id, new_id=False):
53
+ self.mean, self.covariance = self.kalman_filter.update(
54
+ self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
55
+ )
56
+ self.tracklet_len = 0
57
+ self.state = TrackState.Tracked
58
+ self.is_activated = True
59
+ self.frame_id = frame_id
60
+ if new_id:
61
+ self.track_id = self.next_id()
62
+ self.score = new_track.score
63
+
64
+ def update(self, new_track, frame_id):
65
+ """
66
+ Update a matched track
67
+ :type new_track: STrack
68
+ :type frame_id: int
69
+ :type update_feature: bool
70
+ :return:
71
+ """
72
+ self.frame_id = frame_id
73
+ self.tracklet_len += 1
74
+
75
+ new_tlwh = new_track.tlwh
76
+ self.mean, self.covariance = self.kalman_filter.update(
77
+ self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
78
+ self.state = TrackState.Tracked
79
+ self.is_activated = True
80
+
81
+ self.score = new_track.score
82
+
83
+ @property
84
+ # @jit(nopython=True)
85
+ def tlwh(self):
86
+ """Get current position in bounding box format `(top left x, top left y,
87
+ width, height)`.
88
+ """
89
+ if self.mean is None:
90
+ return self._tlwh.copy()
91
+ ret = self.mean[:4].copy()
92
+ ret[2] *= ret[3]
93
+ ret[:2] -= ret[2:] / 2
94
+ return ret
95
+
96
+ @property
97
+ # @jit(nopython=True)
98
+ def tlbr(self):
99
+ """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
100
+ `(top left, bottom right)`.
101
+ """
102
+ ret = self.tlwh.copy()
103
+ ret[2:] += ret[:2]
104
+ return ret
105
+
106
+ @staticmethod
107
+ # @jit(nopython=True)
108
+ def tlwh_to_xyah(tlwh):
109
+ """Convert bounding box to format `(center x, center y, aspect ratio,
110
+ height)`, where the aspect ratio is `width / height`.
111
+ """
112
+ ret = np.asarray(tlwh).copy()
113
+ ret[:2] += ret[2:] / 2
114
+ ret[2] /= ret[3]
115
+ return ret
116
+
117
+ def to_xyah(self):
118
+ return self.tlwh_to_xyah(self.tlwh)
119
+
120
+ @staticmethod
121
+ # @jit(nopython=True)
122
+ def tlbr_to_tlwh(tlbr):
123
+ ret = np.asarray(tlbr).copy()
124
+ ret[2:] -= ret[:2]
125
+ return ret
126
+
127
+ @staticmethod
128
+ # @jit(nopython=True)
129
+ def tlwh_to_tlbr(tlwh):
130
+ ret = np.asarray(tlwh).copy()
131
+ ret[2:] += ret[:2]
132
+ return ret
133
+
134
+ def __repr__(self):
135
+ return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
136
+
137
+
138
+ class BYTETracker(object):
139
+ def __init__(self, track_thresh=0.5,match_thresh=0.8, track_buffer=30, mot20=False, frame_rate=30):
140
+ self.tracked_stracks = [] # type: list[STrack]
141
+ self.lost_stracks = [] # type: list[STrack]
142
+ self.removed_stracks = [] # type: list[STrack]
143
+
144
+ self.track_thresh = track_thresh
145
+ self.track_buffer = track_buffer
146
+ self.mot20 = mot20
147
+ self.match_thresh = match_thresh
148
+
149
+ self.frame_id = 0
150
+ self.det_thresh = track_thresh + 0.1
151
+ self.buffer_size = int(frame_rate / 30.0 * self.track_buffer)
152
+ self.max_time_lost = self.buffer_size
153
+ self.kalman_filter = KalmanFilter()
154
+
155
+ def update(self, output_results, img_info, img_size):
156
+ self.frame_id += 1
157
+ activated_starcks = []
158
+ refind_stracks = []
159
+ lost_stracks = []
160
+ removed_stracks = []
161
+
162
+ if output_results.shape[1] == 5:
163
+ scores = output_results[:, 4]
164
+ bboxes = output_results[:, :4]
165
+ else:
166
+ output_results = output_results.cpu().numpy()
167
+ scores = output_results[:, 4] * output_results[:, 5]
168
+ bboxes = output_results[:, :4] # x1y1x2y2
169
+ img_h, img_w = img_info[0], img_info[1]
170
+ scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
171
+ bboxes /= scale
172
+
173
+ remain_inds = scores > self.track_thresh
174
+ inds_low = scores > 0.1
175
+ inds_high = scores < self.track_thresh
176
+
177
+ inds_second = np.logical_and(inds_low, inds_high)
178
+ dets_second = bboxes[inds_second]
179
+ dets = bboxes[remain_inds]
180
+ scores_keep = scores[remain_inds]
181
+ scores_second = scores[inds_second]
182
+
183
+ if len(dets) > 0:
184
+ '''Detections'''
185
+ detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
186
+ (tlbr, s) in zip(dets, scores_keep)]
187
+ else:
188
+ detections = []
189
+
190
+ ''' Add newly detected tracklets to tracked_stracks'''
191
+ unconfirmed = []
192
+ tracked_stracks = [] # type: list[STrack]
193
+ for track in self.tracked_stracks:
194
+ if not track.is_activated:
195
+ unconfirmed.append(track)
196
+ else:
197
+ tracked_stracks.append(track)
198
+
199
+ ''' Step 2: First association, with high score detection boxes'''
200
+ strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
201
+ # Predict the current location with KF
202
+ STrack.multi_predict(strack_pool)
203
+ dists = matching.iou_distance(strack_pool, detections)
204
+ if not self.mot20:
205
+ dists = matching.fuse_score(dists, detections)
206
+ matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.match_thresh)
207
+
208
+ for itracked, idet in matches:
209
+ track = strack_pool[itracked]
210
+ det = detections[idet]
211
+ if track.state == TrackState.Tracked:
212
+ track.update(detections[idet], self.frame_id)
213
+ activated_starcks.append(track)
214
+ else:
215
+ track.re_activate(det, self.frame_id, new_id=False)
216
+ refind_stracks.append(track)
217
+
218
+ ''' Step 3: Second association, with low score detection boxes'''
219
+ # association the untrack to the low score detections
220
+ if len(dets_second) > 0:
221
+ '''Detections'''
222
+ detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
223
+ (tlbr, s) in zip(dets_second, scores_second)]
224
+ else:
225
+ detections_second = []
226
+ r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
227
+ dists = matching.iou_distance(r_tracked_stracks, detections_second)
228
+ matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
229
+ for itracked, idet in matches:
230
+ track = r_tracked_stracks[itracked]
231
+ det = detections_second[idet]
232
+ if track.state == TrackState.Tracked:
233
+ track.update(det, self.frame_id)
234
+ activated_starcks.append(track)
235
+ else:
236
+ track.re_activate(det, self.frame_id, new_id=False)
237
+ refind_stracks.append(track)
238
+
239
+ for it in u_track:
240
+ track = r_tracked_stracks[it]
241
+ if not track.state == TrackState.Lost:
242
+ track.mark_lost()
243
+ lost_stracks.append(track)
244
+
245
+ '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
246
+ detections = [detections[i] for i in u_detection]
247
+ dists = matching.iou_distance(unconfirmed, detections)
248
+ if not self.mot20:
249
+ dists = matching.fuse_score(dists, detections)
250
+ matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
251
+ for itracked, idet in matches:
252
+ unconfirmed[itracked].update(detections[idet], self.frame_id)
253
+ activated_starcks.append(unconfirmed[itracked])
254
+ for it in u_unconfirmed:
255
+ track = unconfirmed[it]
256
+ track.mark_removed()
257
+ removed_stracks.append(track)
258
+
259
+ """ Step 4: Init new stracks"""
260
+ for inew in u_detection:
261
+ track = detections[inew]
262
+ if track.score < self.det_thresh:
263
+ continue
264
+ track.activate(self.kalman_filter, self.frame_id)
265
+ activated_starcks.append(track)
266
+ """ Step 5: Update state"""
267
+ for track in self.lost_stracks:
268
+ if self.frame_id - track.end_frame > self.max_time_lost:
269
+ track.mark_removed()
270
+ removed_stracks.append(track)
271
+
272
+ # print('Ramained match {} s'.format(t4-t3))
273
+
274
+ self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
275
+ self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
276
+ self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
277
+ self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
278
+ self.lost_stracks.extend(lost_stracks)
279
+ self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
280
+ self.removed_stracks.extend(removed_stracks)
281
+ self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
282
+ # get scores of lost tracks
283
+ output_stracks = [track for track in self.tracked_stracks if track.is_activated]
284
+
285
+ return output_stracks
286
+
287
+
288
+ def joint_stracks(tlista, tlistb):
289
+ exists = {}
290
+ res = []
291
+ for t in tlista:
292
+ exists[t.track_id] = 1
293
+ res.append(t)
294
+ for t in tlistb:
295
+ tid = t.track_id
296
+ if not exists.get(tid, 0):
297
+ exists[tid] = 1
298
+ res.append(t)
299
+ return res
300
+
301
+
302
+ def sub_stracks(tlista, tlistb):
303
+ stracks = {}
304
+ for t in tlista:
305
+ stracks[t.track_id] = t
306
+ for t in tlistb:
307
+ tid = t.track_id
308
+ if stracks.get(tid, 0):
309
+ del stracks[tid]
310
+ return list(stracks.values())
311
+
312
+
313
+ def remove_duplicate_stracks(stracksa, stracksb):
314
+ pdist = matching.iou_distance(stracksa, stracksb)
315
+ pairs = np.where(pdist < 0.15)
316
+ dupa, dupb = list(), list()
317
+ for p, q in zip(*pairs):
318
+ timep = stracksa[p].frame_id - stracksa[p].start_frame
319
+ timeq = stracksb[q].frame_id - stracksb[q].start_frame
320
+ if timep > timeq:
321
+ dupb.append(q)
322
+ else:
323
+ dupa.append(p)
324
+ resa = [t for i, t in enumerate(stracksa) if not i in dupa]
325
+ resb = [t for i, t in enumerate(stracksb) if not i in dupb]
326
+ return resa, resb
byte_track/tracker/kalman_filter.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import scipy.linalg
4
+
5
+
6
+ """
7
+ Table for the 0.95 quantile of the chi-square distribution with N degrees of
8
+ freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9
+ function and used as Mahalanobis gating threshold.
10
+ """
11
+ chi2inv95 = {
12
+ 1: 3.8415,
13
+ 2: 5.9915,
14
+ 3: 7.8147,
15
+ 4: 9.4877,
16
+ 5: 11.070,
17
+ 6: 12.592,
18
+ 7: 14.067,
19
+ 8: 15.507,
20
+ 9: 16.919}
21
+
22
+
23
+ class KalmanFilter(object):
24
+ """
25
+ A simple Kalman filter for tracking bounding boxes in image space.
26
+
27
+ The 8-dimensional state space
28
+
29
+ x, y, a, h, vx, vy, va, vh
30
+
31
+ contains the bounding box center position (x, y), aspect ratio a, height h,
32
+ and their respective velocities.
33
+
34
+ Object motion follows a constant velocity model. The bounding box location
35
+ (x, y, a, h) is taken as direct observation of the state space (linear
36
+ observation model).
37
+
38
+ """
39
+
40
+ def __init__(self):
41
+ ndim, dt = 4, 1.
42
+
43
+ # Create Kalman filter model matrices.
44
+ self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45
+ for i in range(ndim):
46
+ self._motion_mat[i, ndim + i] = dt
47
+ self._update_mat = np.eye(ndim, 2 * ndim)
48
+
49
+ # Motion and observation uncertainty are chosen relative to the current
50
+ # state estimate. These weights control the amount of uncertainty in
51
+ # the model. This is a bit hacky.
52
+ self._std_weight_position = 1. / 20
53
+ self._std_weight_velocity = 1. / 160
54
+
55
+ def initiate(self, measurement):
56
+ """Create track from unassociated measurement.
57
+
58
+ Parameters
59
+ ----------
60
+ measurement : ndarray
61
+ Bounding box coordinates (x, y, a, h) with center position (x, y),
62
+ aspect ratio a, and height h.
63
+
64
+ Returns
65
+ -------
66
+ (ndarray, ndarray)
67
+ Returns the mean vector (8 dimensional) and covariance matrix (8x8
68
+ dimensional) of the new track. Unobserved velocities are initialized
69
+ to 0 mean.
70
+
71
+ """
72
+ mean_pos = measurement
73
+ mean_vel = np.zeros_like(mean_pos)
74
+ mean = np.r_[mean_pos, mean_vel]
75
+
76
+ std = [
77
+ 2 * self._std_weight_position * measurement[3],
78
+ 2 * self._std_weight_position * measurement[3],
79
+ 1e-2,
80
+ 2 * self._std_weight_position * measurement[3],
81
+ 10 * self._std_weight_velocity * measurement[3],
82
+ 10 * self._std_weight_velocity * measurement[3],
83
+ 1e-5,
84
+ 10 * self._std_weight_velocity * measurement[3]]
85
+ covariance = np.diag(np.square(std))
86
+ return mean, covariance
87
+
88
+ def predict(self, mean, covariance):
89
+ """Run Kalman filter prediction step.
90
+
91
+ Parameters
92
+ ----------
93
+ mean : ndarray
94
+ The 8 dimensional mean vector of the object state at the previous
95
+ time step.
96
+ covariance : ndarray
97
+ The 8x8 dimensional covariance matrix of the object state at the
98
+ previous time step.
99
+
100
+ Returns
101
+ -------
102
+ (ndarray, ndarray)
103
+ Returns the mean vector and covariance matrix of the predicted
104
+ state. Unobserved velocities are initialized to 0 mean.
105
+
106
+ """
107
+ std_pos = [
108
+ self._std_weight_position * mean[3],
109
+ self._std_weight_position * mean[3],
110
+ 1e-2,
111
+ self._std_weight_position * mean[3]]
112
+ std_vel = [
113
+ self._std_weight_velocity * mean[3],
114
+ self._std_weight_velocity * mean[3],
115
+ 1e-5,
116
+ self._std_weight_velocity * mean[3]]
117
+ motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118
+
119
+ #mean = np.dot(self._motion_mat, mean)
120
+ mean = np.dot(mean, self._motion_mat.T)
121
+ covariance = np.linalg.multi_dot((
122
+ self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
123
+
124
+ return mean, covariance
125
+
126
+ def project(self, mean, covariance):
127
+ """Project state distribution to measurement space.
128
+
129
+ Parameters
130
+ ----------
131
+ mean : ndarray
132
+ The state's mean vector (8 dimensional array).
133
+ covariance : ndarray
134
+ The state's covariance matrix (8x8 dimensional).
135
+
136
+ Returns
137
+ -------
138
+ (ndarray, ndarray)
139
+ Returns the projected mean and covariance matrix of the given state
140
+ estimate.
141
+
142
+ """
143
+ std = [
144
+ self._std_weight_position * mean[3],
145
+ self._std_weight_position * mean[3],
146
+ 1e-1,
147
+ self._std_weight_position * mean[3]]
148
+ innovation_cov = np.diag(np.square(std))
149
+
150
+ mean = np.dot(self._update_mat, mean)
151
+ covariance = np.linalg.multi_dot((
152
+ self._update_mat, covariance, self._update_mat.T))
153
+ return mean, covariance + innovation_cov
154
+
155
+ def multi_predict(self, mean, covariance):
156
+ """Run Kalman filter prediction step (Vectorized version).
157
+ Parameters
158
+ ----------
159
+ mean : ndarray
160
+ The Nx8 dimensional mean matrix of the object states at the previous
161
+ time step.
162
+ covariance : ndarray
163
+ The Nx8x8 dimensional covariance matrics of the object states at the
164
+ previous time step.
165
+ Returns
166
+ -------
167
+ (ndarray, ndarray)
168
+ Returns the mean vector and covariance matrix of the predicted
169
+ state. Unobserved velocities are initialized to 0 mean.
170
+ """
171
+ std_pos = [
172
+ self._std_weight_position * mean[:, 3],
173
+ self._std_weight_position * mean[:, 3],
174
+ 1e-2 * np.ones_like(mean[:, 3]),
175
+ self._std_weight_position * mean[:, 3]]
176
+ std_vel = [
177
+ self._std_weight_velocity * mean[:, 3],
178
+ self._std_weight_velocity * mean[:, 3],
179
+ 1e-5 * np.ones_like(mean[:, 3]),
180
+ self._std_weight_velocity * mean[:, 3]]
181
+ sqr = np.square(np.r_[std_pos, std_vel]).T
182
+
183
+ motion_cov = []
184
+ for i in range(len(mean)):
185
+ motion_cov.append(np.diag(sqr[i]))
186
+ motion_cov = np.asarray(motion_cov)
187
+
188
+ mean = np.dot(mean, self._motion_mat.T)
189
+ left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
190
+ covariance = np.dot(left, self._motion_mat.T) + motion_cov
191
+
192
+ return mean, covariance
193
+
194
+ def update(self, mean, covariance, measurement):
195
+ """Run Kalman filter correction step.
196
+
197
+ Parameters
198
+ ----------
199
+ mean : ndarray
200
+ The predicted state's mean vector (8 dimensional).
201
+ covariance : ndarray
202
+ The state's covariance matrix (8x8 dimensional).
203
+ measurement : ndarray
204
+ The 4 dimensional measurement vector (x, y, a, h), where (x, y)
205
+ is the center position, a the aspect ratio, and h the height of the
206
+ bounding box.
207
+
208
+ Returns
209
+ -------
210
+ (ndarray, ndarray)
211
+ Returns the measurement-corrected state distribution.
212
+
213
+ """
214
+ projected_mean, projected_cov = self.project(mean, covariance)
215
+
216
+ chol_factor, lower = scipy.linalg.cho_factor(
217
+ projected_cov, lower=True, check_finite=False)
218
+ kalman_gain = scipy.linalg.cho_solve(
219
+ (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
220
+ check_finite=False).T
221
+ innovation = measurement - projected_mean
222
+
223
+ new_mean = mean + np.dot(innovation, kalman_gain.T)
224
+ new_covariance = covariance - np.linalg.multi_dot((
225
+ kalman_gain, projected_cov, kalman_gain.T))
226
+ return new_mean, new_covariance
227
+
228
+ def gating_distance(self, mean, covariance, measurements,
229
+ only_position=False, metric='maha'):
230
+ """Compute gating distance between state distribution and measurements.
231
+ A suitable distance threshold can be obtained from `chi2inv95`. If
232
+ `only_position` is False, the chi-square distribution has 4 degrees of
233
+ freedom, otherwise 2.
234
+ Parameters
235
+ ----------
236
+ mean : ndarray
237
+ Mean vector over the state distribution (8 dimensional).
238
+ covariance : ndarray
239
+ Covariance of the state distribution (8x8 dimensional).
240
+ measurements : ndarray
241
+ An Nx4 dimensional matrix of N measurements, each in
242
+ format (x, y, a, h) where (x, y) is the bounding box center
243
+ position, a the aspect ratio, and h the height.
244
+ only_position : Optional[bool]
245
+ If True, distance computation is done with respect to the bounding
246
+ box center position only.
247
+ Returns
248
+ -------
249
+ ndarray
250
+ Returns an array of length N, where the i-th element contains the
251
+ squared Mahalanobis distance between (mean, covariance) and
252
+ `measurements[i]`.
253
+ """
254
+ mean, covariance = self.project(mean, covariance)
255
+ if only_position:
256
+ mean, covariance = mean[:2], covariance[:2, :2]
257
+ measurements = measurements[:, :2]
258
+
259
+ d = measurements - mean
260
+ if metric == 'gaussian':
261
+ return np.sum(d * d, axis=1)
262
+ elif metric == 'maha':
263
+ cholesky_factor = np.linalg.cholesky(covariance)
264
+ z = scipy.linalg.solve_triangular(
265
+ cholesky_factor, d.T, lower=True, check_finite=False,
266
+ overwrite_b=True)
267
+ squared_maha = np.sum(z * z, axis=0)
268
+ return squared_maha
269
+ else:
270
+ raise ValueError('invalid distance metric')
byte_track/tracker/matching.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import scipy
3
+ import lap
4
+ from scipy.spatial.distance import cdist
5
+ from cython_bbox import bbox_overlaps as bbox_ious
6
+ import kalman_filter
7
+
8
+ def merge_matches(m1, m2, shape):
9
+ O,P,Q = shape
10
+ m1 = np.asarray(m1)
11
+ m2 = np.asarray(m2)
12
+
13
+ M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
14
+ M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
15
+
16
+ mask = M1*M2
17
+ match = mask.nonzero()
18
+ match = list(zip(match[0], match[1]))
19
+ unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
20
+ unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
21
+
22
+ return match, unmatched_O, unmatched_Q
23
+
24
+
25
+ def _indices_to_matches(cost_matrix, indices, thresh):
26
+ matched_cost = cost_matrix[tuple(zip(*indices))]
27
+ matched_mask = (matched_cost <= thresh)
28
+
29
+ matches = indices[matched_mask]
30
+ unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
31
+ unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
32
+
33
+ return matches, unmatched_a, unmatched_b
34
+
35
+
36
+ def linear_assignment(cost_matrix, thresh):
37
+ if cost_matrix.size == 0:
38
+ return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
39
+ matches, unmatched_a, unmatched_b = [], [], []
40
+ cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
41
+ for ix, mx in enumerate(x):
42
+ if mx >= 0:
43
+ matches.append([ix, mx])
44
+ unmatched_a = np.where(x < 0)[0]
45
+ unmatched_b = np.where(y < 0)[0]
46
+ matches = np.asarray(matches)
47
+ return matches, unmatched_a, unmatched_b
48
+
49
+
50
+ def ious(atlbrs, btlbrs):
51
+ """
52
+ Compute cost based on IoU
53
+ :type atlbrs: list[tlbr] | np.ndarray
54
+ :type atlbrs: list[tlbr] | np.ndarray
55
+
56
+ :rtype ious np.ndarray
57
+ """
58
+ ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
59
+ if ious.size == 0:
60
+ return ious
61
+
62
+ ious = bbox_ious(
63
+ np.ascontiguousarray(atlbrs, dtype=np.float),
64
+ np.ascontiguousarray(btlbrs, dtype=np.float)
65
+ )
66
+
67
+ return ious
68
+
69
+
70
+ def iou_distance(atracks, btracks):
71
+ """
72
+ Compute cost based on IoU
73
+ :type atracks: list[STrack]
74
+ :type btracks: list[STrack]
75
+
76
+ :rtype cost_matrix np.ndarray
77
+ """
78
+
79
+ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
80
+ atlbrs = atracks
81
+ btlbrs = btracks
82
+ else:
83
+ atlbrs = [track.tlbr for track in atracks]
84
+ btlbrs = [track.tlbr for track in btracks]
85
+ _ious = ious(atlbrs, btlbrs)
86
+ cost_matrix = 1 - _ious
87
+
88
+ return cost_matrix
89
+
90
+ def v_iou_distance(atracks, btracks):
91
+ """
92
+ Compute cost based on IoU
93
+ :type atracks: list[STrack]
94
+ :type btracks: list[STrack]
95
+
96
+ :rtype cost_matrix np.ndarray
97
+ """
98
+
99
+ if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
100
+ atlbrs = atracks
101
+ btlbrs = btracks
102
+ else:
103
+ atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
104
+ btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
105
+ _ious = ious(atlbrs, btlbrs)
106
+ cost_matrix = 1 - _ious
107
+
108
+ return cost_matrix
109
+
110
+ def embedding_distance(tracks, detections, metric='cosine'):
111
+ """
112
+ :param tracks: list[STrack]
113
+ :param detections: list[BaseTrack]
114
+ :param metric:
115
+ :return: cost_matrix np.ndarray
116
+ """
117
+
118
+ cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
119
+ if cost_matrix.size == 0:
120
+ return cost_matrix
121
+ det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
122
+ #for i, track in enumerate(tracks):
123
+ #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
124
+ track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
125
+ cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
126
+ return cost_matrix
127
+
128
+
129
+ def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
130
+ if cost_matrix.size == 0:
131
+ return cost_matrix
132
+ gating_dim = 2 if only_position else 4
133
+ gating_threshold = kalman_filter.chi2inv95[gating_dim]
134
+ measurements = np.asarray([det.to_xyah() for det in detections])
135
+ for row, track in enumerate(tracks):
136
+ gating_distance = kf.gating_distance(
137
+ track.mean, track.covariance, measurements, only_position)
138
+ cost_matrix[row, gating_distance > gating_threshold] = np.inf
139
+ return cost_matrix
140
+
141
+
142
+ def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
143
+ if cost_matrix.size == 0:
144
+ return cost_matrix
145
+ gating_dim = 2 if only_position else 4
146
+ gating_threshold = kalman_filter.chi2inv95[gating_dim]
147
+ measurements = np.asarray([det.to_xyah() for det in detections])
148
+ for row, track in enumerate(tracks):
149
+ gating_distance = kf.gating_distance(
150
+ track.mean, track.covariance, measurements, only_position, metric='maha')
151
+ cost_matrix[row, gating_distance > gating_threshold] = np.inf
152
+ cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
153
+ return cost_matrix
154
+
155
+
156
+ def fuse_iou(cost_matrix, tracks, detections):
157
+ if cost_matrix.size == 0:
158
+ return cost_matrix
159
+ reid_sim = 1 - cost_matrix
160
+ iou_dist = iou_distance(tracks, detections)
161
+ iou_sim = 1 - iou_dist
162
+ fuse_sim = reid_sim * (1 + iou_sim) / 2
163
+ det_scores = np.array([det.score for det in detections])
164
+ det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
165
+ #fuse_sim = fuse_sim * (1 + det_scores) / 2
166
+ fuse_cost = 1 - fuse_sim
167
+ return fuse_cost
168
+
169
+
170
+ def fuse_score(cost_matrix, detections):
171
+ if cost_matrix.size == 0:
172
+ return cost_matrix
173
+ iou_sim = 1 - cost_matrix
174
+ det_scores = np.array([det.score for det in detections])
175
+ det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
176
+ fuse_sim = iou_sim * det_scores
177
+ fuse_cost = 1 - fuse_sim
178
+ return fuse_cost
cfg/yolor_csp.cfg ADDED
@@ -0,0 +1,1376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=64
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=32
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=64
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=64
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=64
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=64
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=64
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=64
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=64
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ # Transition first
153
+
154
+ [convolutional]
155
+ batch_normalize=1
156
+ filters=64
157
+ size=1
158
+ stride=1
159
+ pad=1
160
+ activation=silu
161
+
162
+ # Merge [-1, -(3k+4)]
163
+
164
+ [route]
165
+ layers = -1,-10
166
+
167
+ # Transition last
168
+
169
+ # 17 (previous+7+3k)
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=128
173
+ size=1
174
+ stride=1
175
+ pad=1
176
+ activation=silu
177
+
178
+ # P3
179
+
180
+ # Downsample
181
+
182
+ [convolutional]
183
+ batch_normalize=1
184
+ filters=256
185
+ size=3
186
+ stride=2
187
+ pad=1
188
+ activation=silu
189
+
190
+ # Split
191
+
192
+ [convolutional]
193
+ batch_normalize=1
194
+ filters=128
195
+ size=1
196
+ stride=1
197
+ pad=1
198
+ activation=silu
199
+
200
+ [route]
201
+ layers = -2
202
+
203
+ [convolutional]
204
+ batch_normalize=1
205
+ filters=128
206
+ size=1
207
+ stride=1
208
+ pad=1
209
+ activation=silu
210
+
211
+ # Residual Block
212
+
213
+ [convolutional]
214
+ batch_normalize=1
215
+ filters=128
216
+ size=1
217
+ stride=1
218
+ pad=1
219
+ activation=silu
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=3
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [shortcut]
230
+ from=-3
231
+ activation=linear
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=128
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=128
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=128
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=128
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=128
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=128
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=128
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=128
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=128
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ # Transition first
374
+
375
+ [convolutional]
376
+ batch_normalize=1
377
+ filters=128
378
+ size=1
379
+ stride=1
380
+ pad=1
381
+ activation=silu
382
+
383
+ # Merge [-1 -(4+3k)]
384
+
385
+ [route]
386
+ layers = -1,-28
387
+
388
+ # Transition last
389
+
390
+ # 48 (previous+7+3k)
391
+ [convolutional]
392
+ batch_normalize=1
393
+ filters=256
394
+ size=1
395
+ stride=1
396
+ pad=1
397
+ activation=silu
398
+
399
+ # P4
400
+
401
+ # Downsample
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=512
406
+ size=3
407
+ stride=2
408
+ pad=1
409
+ activation=silu
410
+
411
+ # Split
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=256
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [route]
422
+ layers = -2
423
+
424
+ [convolutional]
425
+ batch_normalize=1
426
+ filters=256
427
+ size=1
428
+ stride=1
429
+ pad=1
430
+ activation=silu
431
+
432
+ # Residual Block
433
+
434
+ [convolutional]
435
+ batch_normalize=1
436
+ filters=256
437
+ size=1
438
+ stride=1
439
+ pad=1
440
+ activation=silu
441
+
442
+ [convolutional]
443
+ batch_normalize=1
444
+ filters=256
445
+ size=3
446
+ stride=1
447
+ pad=1
448
+ activation=silu
449
+
450
+ [shortcut]
451
+ from=-3
452
+ activation=linear
453
+
454
+ [convolutional]
455
+ batch_normalize=1
456
+ filters=256
457
+ size=1
458
+ stride=1
459
+ pad=1
460
+ activation=silu
461
+
462
+ [convolutional]
463
+ batch_normalize=1
464
+ filters=256
465
+ size=3
466
+ stride=1
467
+ pad=1
468
+ activation=silu
469
+
470
+ [shortcut]
471
+ from=-3
472
+ activation=linear
473
+
474
+ [convolutional]
475
+ batch_normalize=1
476
+ filters=256
477
+ size=1
478
+ stride=1
479
+ pad=1
480
+ activation=silu
481
+
482
+ [convolutional]
483
+ batch_normalize=1
484
+ filters=256
485
+ size=3
486
+ stride=1
487
+ pad=1
488
+ activation=silu
489
+
490
+ [shortcut]
491
+ from=-3
492
+ activation=linear
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=256
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=256
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=256
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=256
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=256
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=256
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=256
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=256
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=256
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=256
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ # Transition first
595
+
596
+ [convolutional]
597
+ batch_normalize=1
598
+ filters=256
599
+ size=1
600
+ stride=1
601
+ pad=1
602
+ activation=silu
603
+
604
+ # Merge [-1 -(3k+4)]
605
+
606
+ [route]
607
+ layers = -1,-28
608
+
609
+ # Transition last
610
+
611
+ # 79 (previous+7+3k)
612
+ [convolutional]
613
+ batch_normalize=1
614
+ filters=512
615
+ size=1
616
+ stride=1
617
+ pad=1
618
+ activation=silu
619
+
620
+ # P5
621
+
622
+ # Downsample
623
+
624
+ [convolutional]
625
+ batch_normalize=1
626
+ filters=1024
627
+ size=3
628
+ stride=2
629
+ pad=1
630
+ activation=silu
631
+
632
+ # Split
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=512
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [route]
643
+ layers = -2
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=512
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ # Residual Block
654
+
655
+ [convolutional]
656
+ batch_normalize=1
657
+ filters=512
658
+ size=1
659
+ stride=1
660
+ pad=1
661
+ activation=silu
662
+
663
+ [convolutional]
664
+ batch_normalize=1
665
+ filters=512
666
+ size=3
667
+ stride=1
668
+ pad=1
669
+ activation=silu
670
+
671
+ [shortcut]
672
+ from=-3
673
+ activation=linear
674
+
675
+ [convolutional]
676
+ batch_normalize=1
677
+ filters=512
678
+ size=1
679
+ stride=1
680
+ pad=1
681
+ activation=silu
682
+
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=512
686
+ size=3
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+ [shortcut]
692
+ from=-3
693
+ activation=linear
694
+
695
+ [convolutional]
696
+ batch_normalize=1
697
+ filters=512
698
+ size=1
699
+ stride=1
700
+ pad=1
701
+ activation=silu
702
+
703
+ [convolutional]
704
+ batch_normalize=1
705
+ filters=512
706
+ size=3
707
+ stride=1
708
+ pad=1
709
+ activation=silu
710
+
711
+ [shortcut]
712
+ from=-3
713
+ activation=linear
714
+
715
+ [convolutional]
716
+ batch_normalize=1
717
+ filters=512
718
+ size=1
719
+ stride=1
720
+ pad=1
721
+ activation=silu
722
+
723
+ [convolutional]
724
+ batch_normalize=1
725
+ filters=512
726
+ size=3
727
+ stride=1
728
+ pad=1
729
+ activation=silu
730
+
731
+ [shortcut]
732
+ from=-3
733
+ activation=linear
734
+
735
+ # Transition first
736
+
737
+ [convolutional]
738
+ batch_normalize=1
739
+ filters=512
740
+ size=1
741
+ stride=1
742
+ pad=1
743
+ activation=silu
744
+
745
+ # Merge [-1 -(3k+4)]
746
+
747
+ [route]
748
+ layers = -1,-16
749
+
750
+ # Transition last
751
+
752
+ # 98 (previous+7+3k)
753
+ [convolutional]
754
+ batch_normalize=1
755
+ filters=1024
756
+ size=1
757
+ stride=1
758
+ pad=1
759
+ activation=silu
760
+
761
+ # ============ End of Backbone ============ #
762
+
763
+ # ============ Neck ============ #
764
+
765
+ # CSPSPP
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=512
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [route]
776
+ layers = -2
777
+
778
+ [convolutional]
779
+ batch_normalize=1
780
+ filters=512
781
+ size=1
782
+ stride=1
783
+ pad=1
784
+ activation=silu
785
+
786
+ [convolutional]
787
+ batch_normalize=1
788
+ size=3
789
+ stride=1
790
+ pad=1
791
+ filters=512
792
+ activation=silu
793
+
794
+ [convolutional]
795
+ batch_normalize=1
796
+ filters=512
797
+ size=1
798
+ stride=1
799
+ pad=1
800
+ activation=silu
801
+
802
+ ### SPP ###
803
+ [maxpool]
804
+ stride=1
805
+ size=5
806
+
807
+ [route]
808
+ layers=-2
809
+
810
+ [maxpool]
811
+ stride=1
812
+ size=9
813
+
814
+ [route]
815
+ layers=-4
816
+
817
+ [maxpool]
818
+ stride=1
819
+ size=13
820
+
821
+ [route]
822
+ layers=-1,-3,-5,-6
823
+ ### End SPP ###
824
+
825
+ [convolutional]
826
+ batch_normalize=1
827
+ filters=512
828
+ size=1
829
+ stride=1
830
+ pad=1
831
+ activation=silu
832
+
833
+ [convolutional]
834
+ batch_normalize=1
835
+ size=3
836
+ stride=1
837
+ pad=1
838
+ filters=512
839
+ activation=silu
840
+
841
+ [route]
842
+ layers = -1, -13
843
+
844
+ # 113 (previous+6+5+2k)
845
+ [convolutional]
846
+ batch_normalize=1
847
+ filters=512
848
+ size=1
849
+ stride=1
850
+ pad=1
851
+ activation=silu
852
+
853
+ # End of CSPSPP
854
+
855
+
856
+ # FPN-4
857
+
858
+ [convolutional]
859
+ batch_normalize=1
860
+ filters=256
861
+ size=1
862
+ stride=1
863
+ pad=1
864
+ activation=silu
865
+
866
+ [upsample]
867
+ stride=2
868
+
869
+ [route]
870
+ layers = 79
871
+
872
+ [convolutional]
873
+ batch_normalize=1
874
+ filters=256
875
+ size=1
876
+ stride=1
877
+ pad=1
878
+ activation=silu
879
+
880
+ [route]
881
+ layers = -1, -3
882
+
883
+ [convolutional]
884
+ batch_normalize=1
885
+ filters=256
886
+ size=1
887
+ stride=1
888
+ pad=1
889
+ activation=silu
890
+
891
+ # Split
892
+
893
+ [convolutional]
894
+ batch_normalize=1
895
+ filters=256
896
+ size=1
897
+ stride=1
898
+ pad=1
899
+ activation=silu
900
+
901
+ [route]
902
+ layers = -2
903
+
904
+ # Plain Block
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ filters=256
909
+ size=1
910
+ stride=1
911
+ pad=1
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ size=3
917
+ stride=1
918
+ pad=1
919
+ filters=256
920
+ activation=silu
921
+
922
+ [convolutional]
923
+ batch_normalize=1
924
+ filters=256
925
+ size=1
926
+ stride=1
927
+ pad=1
928
+ activation=silu
929
+
930
+ [convolutional]
931
+ batch_normalize=1
932
+ size=3
933
+ stride=1
934
+ pad=1
935
+ filters=256
936
+ activation=silu
937
+
938
+ # Merge [-1, -(2k+2)]
939
+
940
+ [route]
941
+ layers = -1, -6
942
+
943
+ # Transition last
944
+
945
+ # 127 (previous+6+4+2k)
946
+ [convolutional]
947
+ batch_normalize=1
948
+ filters=256
949
+ size=1
950
+ stride=1
951
+ pad=1
952
+ activation=silu
953
+
954
+
955
+ # FPN-3
956
+
957
+ [convolutional]
958
+ batch_normalize=1
959
+ filters=128
960
+ size=1
961
+ stride=1
962
+ pad=1
963
+ activation=silu
964
+
965
+ [upsample]
966
+ stride=2
967
+
968
+ [route]
969
+ layers = 48
970
+
971
+ [convolutional]
972
+ batch_normalize=1
973
+ filters=128
974
+ size=1
975
+ stride=1
976
+ pad=1
977
+ activation=silu
978
+
979
+ [route]
980
+ layers = -1, -3
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ filters=128
985
+ size=1
986
+ stride=1
987
+ pad=1
988
+ activation=silu
989
+
990
+ # Split
991
+
992
+ [convolutional]
993
+ batch_normalize=1
994
+ filters=128
995
+ size=1
996
+ stride=1
997
+ pad=1
998
+ activation=silu
999
+
1000
+ [route]
1001
+ layers = -2
1002
+
1003
+ # Plain Block
1004
+
1005
+ [convolutional]
1006
+ batch_normalize=1
1007
+ filters=128
1008
+ size=1
1009
+ stride=1
1010
+ pad=1
1011
+ activation=silu
1012
+
1013
+ [convolutional]
1014
+ batch_normalize=1
1015
+ size=3
1016
+ stride=1
1017
+ pad=1
1018
+ filters=128
1019
+ activation=silu
1020
+
1021
+ [convolutional]
1022
+ batch_normalize=1
1023
+ filters=128
1024
+ size=1
1025
+ stride=1
1026
+ pad=1
1027
+ activation=silu
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ size=3
1032
+ stride=1
1033
+ pad=1
1034
+ filters=128
1035
+ activation=silu
1036
+
1037
+ # Merge [-1, -(2k+2)]
1038
+
1039
+ [route]
1040
+ layers = -1, -6
1041
+
1042
+ # Transition last
1043
+
1044
+ # 141 (previous+6+4+2k)
1045
+ [convolutional]
1046
+ batch_normalize=1
1047
+ filters=128
1048
+ size=1
1049
+ stride=1
1050
+ pad=1
1051
+ activation=silu
1052
+
1053
+
1054
+ # PAN-4
1055
+
1056
+ [convolutional]
1057
+ batch_normalize=1
1058
+ size=3
1059
+ stride=2
1060
+ pad=1
1061
+ filters=256
1062
+ activation=silu
1063
+
1064
+ [route]
1065
+ layers = -1, 127
1066
+
1067
+ [convolutional]
1068
+ batch_normalize=1
1069
+ filters=256
1070
+ size=1
1071
+ stride=1
1072
+ pad=1
1073
+ activation=silu
1074
+
1075
+ # Split
1076
+
1077
+ [convolutional]
1078
+ batch_normalize=1
1079
+ filters=256
1080
+ size=1
1081
+ stride=1
1082
+ pad=1
1083
+ activation=silu
1084
+
1085
+ [route]
1086
+ layers = -2
1087
+
1088
+ # Plain Block
1089
+
1090
+ [convolutional]
1091
+ batch_normalize=1
1092
+ filters=256
1093
+ size=1
1094
+ stride=1
1095
+ pad=1
1096
+ activation=silu
1097
+
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ size=3
1101
+ stride=1
1102
+ pad=1
1103
+ filters=256
1104
+ activation=silu
1105
+
1106
+ [convolutional]
1107
+ batch_normalize=1
1108
+ filters=256
1109
+ size=1
1110
+ stride=1
1111
+ pad=1
1112
+ activation=silu
1113
+
1114
+ [convolutional]
1115
+ batch_normalize=1
1116
+ size=3
1117
+ stride=1
1118
+ pad=1
1119
+ filters=256
1120
+ activation=silu
1121
+
1122
+ [route]
1123
+ layers = -1,-6
1124
+
1125
+ # Transition last
1126
+
1127
+ # 152 (previous+3+4+2k)
1128
+ [convolutional]
1129
+ batch_normalize=1
1130
+ filters=256
1131
+ size=1
1132
+ stride=1
1133
+ pad=1
1134
+ activation=silu
1135
+
1136
+
1137
+ # PAN-5
1138
+
1139
+ [convolutional]
1140
+ batch_normalize=1
1141
+ size=3
1142
+ stride=2
1143
+ pad=1
1144
+ filters=512
1145
+ activation=silu
1146
+
1147
+ [route]
1148
+ layers = -1, 113
1149
+
1150
+ [convolutional]
1151
+ batch_normalize=1
1152
+ filters=512
1153
+ size=1
1154
+ stride=1
1155
+ pad=1
1156
+ activation=silu
1157
+
1158
+ # Split
1159
+
1160
+ [convolutional]
1161
+ batch_normalize=1
1162
+ filters=512
1163
+ size=1
1164
+ stride=1
1165
+ pad=1
1166
+ activation=silu
1167
+
1168
+ [route]
1169
+ layers = -2
1170
+
1171
+ # Plain Block
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=512
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=512
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=512
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=512
1203
+ activation=silu
1204
+
1205
+ [route]
1206
+ layers = -1,-6
1207
+
1208
+ # Transition last
1209
+
1210
+ # 163 (previous+3+4+2k)
1211
+ [convolutional]
1212
+ batch_normalize=1
1213
+ filters=512
1214
+ size=1
1215
+ stride=1
1216
+ pad=1
1217
+ activation=silu
1218
+
1219
+ # ============ End of Neck ============ #
1220
+
1221
+ # 164
1222
+ [implicit_add]
1223
+ filters=256
1224
+
1225
+ # 165
1226
+ [implicit_add]
1227
+ filters=512
1228
+
1229
+ # 166
1230
+ [implicit_add]
1231
+ filters=1024
1232
+
1233
+ # 167
1234
+ [implicit_mul]
1235
+ filters=255
1236
+
1237
+ # 168
1238
+ [implicit_mul]
1239
+ filters=255
1240
+
1241
+ # 169
1242
+ [implicit_mul]
1243
+ filters=255
1244
+
1245
+ # ============ Head ============ #
1246
+
1247
+ # YOLO-3
1248
+
1249
+ [route]
1250
+ layers = 141
1251
+
1252
+ [convolutional]
1253
+ batch_normalize=1
1254
+ size=3
1255
+ stride=1
1256
+ pad=1
1257
+ filters=256
1258
+ activation=silu
1259
+
1260
+ [shift_channels]
1261
+ from=164
1262
+
1263
+ [convolutional]
1264
+ size=1
1265
+ stride=1
1266
+ pad=1
1267
+ filters=255
1268
+ activation=linear
1269
+
1270
+ [control_channels]
1271
+ from=167
1272
+
1273
+ [yolo]
1274
+ mask = 0,1,2
1275
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1276
+ classes=80
1277
+ num=9
1278
+ jitter=.3
1279
+ ignore_thresh = .7
1280
+ truth_thresh = 1
1281
+ random=1
1282
+ scale_x_y = 1.05
1283
+ iou_thresh=0.213
1284
+ cls_normalizer=1.0
1285
+ iou_normalizer=0.07
1286
+ iou_loss=ciou
1287
+ nms_kind=greedynms
1288
+ beta_nms=0.6
1289
+
1290
+
1291
+ # YOLO-4
1292
+
1293
+ [route]
1294
+ layers = 152
1295
+
1296
+ [convolutional]
1297
+ batch_normalize=1
1298
+ size=3
1299
+ stride=1
1300
+ pad=1
1301
+ filters=512
1302
+ activation=silu
1303
+
1304
+ [shift_channels]
1305
+ from=165
1306
+
1307
+ [convolutional]
1308
+ size=1
1309
+ stride=1
1310
+ pad=1
1311
+ filters=255
1312
+ activation=linear
1313
+
1314
+ [control_channels]
1315
+ from=168
1316
+
1317
+ [yolo]
1318
+ mask = 3,4,5
1319
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1320
+ classes=80
1321
+ num=9
1322
+ jitter=.3
1323
+ ignore_thresh = .7
1324
+ truth_thresh = 1
1325
+ random=1
1326
+ scale_x_y = 1.05
1327
+ iou_thresh=0.213
1328
+ cls_normalizer=1.0
1329
+ iou_normalizer=0.07
1330
+ iou_loss=ciou
1331
+ nms_kind=greedynms
1332
+ beta_nms=0.6
1333
+
1334
+
1335
+ # YOLO-5
1336
+
1337
+ [route]
1338
+ layers = 163
1339
+
1340
+ [convolutional]
1341
+ batch_normalize=1
1342
+ size=3
1343
+ stride=1
1344
+ pad=1
1345
+ filters=1024
1346
+ activation=silu
1347
+
1348
+ [shift_channels]
1349
+ from=166
1350
+
1351
+ [convolutional]
1352
+ size=1
1353
+ stride=1
1354
+ pad=1
1355
+ filters=255
1356
+ activation=linear
1357
+
1358
+ [control_channels]
1359
+ from=169
1360
+
1361
+ [yolo]
1362
+ mask = 6,7,8
1363
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1364
+ classes=80
1365
+ num=9
1366
+ jitter=.3
1367
+ ignore_thresh = .7
1368
+ truth_thresh = 1
1369
+ random=1
1370
+ scale_x_y = 1.05
1371
+ iou_thresh=0.213
1372
+ cls_normalizer=1.0
1373
+ iou_normalizer=0.07
1374
+ iou_loss=ciou
1375
+ nms_kind=greedynms
1376
+ beta_nms=0.6
cfg/yolor_csp_x.cfg ADDED
@@ -0,0 +1,1576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=80
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=40
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=80
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=160
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=80
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=80
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=80
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=80
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=80
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=80
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ [convolutional]
153
+ batch_normalize=1
154
+ filters=80
155
+ size=1
156
+ stride=1
157
+ pad=1
158
+ activation=silu
159
+
160
+ [convolutional]
161
+ batch_normalize=1
162
+ filters=80
163
+ size=3
164
+ stride=1
165
+ pad=1
166
+ activation=silu
167
+
168
+ [shortcut]
169
+ from=-3
170
+ activation=linear
171
+
172
+ # Transition first
173
+
174
+ [convolutional]
175
+ batch_normalize=1
176
+ filters=80
177
+ size=1
178
+ stride=1
179
+ pad=1
180
+ activation=silu
181
+
182
+ # Merge [-1, -(3k+4)]
183
+
184
+ [route]
185
+ layers = -1,-13
186
+
187
+ # Transition last
188
+
189
+ # 20 (previous+7+3k)
190
+ [convolutional]
191
+ batch_normalize=1
192
+ filters=160
193
+ size=1
194
+ stride=1
195
+ pad=1
196
+ activation=silu
197
+
198
+ # P3
199
+
200
+ # Downsample
201
+
202
+ [convolutional]
203
+ batch_normalize=1
204
+ filters=320
205
+ size=3
206
+ stride=2
207
+ pad=1
208
+ activation=silu
209
+
210
+ # Split
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=160
215
+ size=1
216
+ stride=1
217
+ pad=1
218
+ activation=silu
219
+
220
+ [route]
221
+ layers = -2
222
+
223
+ [convolutional]
224
+ batch_normalize=1
225
+ filters=160
226
+ size=1
227
+ stride=1
228
+ pad=1
229
+ activation=silu
230
+
231
+ # Residual Block
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=160
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=160
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=160
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=160
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=160
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=160
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=160
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=160
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=160
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=160
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=160
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=160
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=160
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=160
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ [convolutional]
374
+ batch_normalize=1
375
+ filters=160
376
+ size=1
377
+ stride=1
378
+ pad=1
379
+ activation=silu
380
+
381
+ [convolutional]
382
+ batch_normalize=1
383
+ filters=160
384
+ size=3
385
+ stride=1
386
+ pad=1
387
+ activation=silu
388
+
389
+ [shortcut]
390
+ from=-3
391
+ activation=linear
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=160
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ [convolutional]
402
+ batch_normalize=1
403
+ filters=160
404
+ size=3
405
+ stride=1
406
+ pad=1
407
+ activation=silu
408
+
409
+ [shortcut]
410
+ from=-3
411
+ activation=linear
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=160
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [convolutional]
422
+ batch_normalize=1
423
+ filters=160
424
+ size=3
425
+ stride=1
426
+ pad=1
427
+ activation=silu
428
+
429
+ [shortcut]
430
+ from=-3
431
+ activation=linear
432
+
433
+ # Transition first
434
+
435
+ [convolutional]
436
+ batch_normalize=1
437
+ filters=160
438
+ size=1
439
+ stride=1
440
+ pad=1
441
+ activation=silu
442
+
443
+ # Merge [-1 -(4+3k)]
444
+
445
+ [route]
446
+ layers = -1,-34
447
+
448
+ # Transition last
449
+
450
+ # 57 (previous+7+3k)
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=320
454
+ size=1
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ # P4
460
+
461
+ # Downsample
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=640
466
+ size=3
467
+ stride=2
468
+ pad=1
469
+ activation=silu
470
+
471
+ # Split
472
+
473
+ [convolutional]
474
+ batch_normalize=1
475
+ filters=320
476
+ size=1
477
+ stride=1
478
+ pad=1
479
+ activation=silu
480
+
481
+ [route]
482
+ layers = -2
483
+
484
+ [convolutional]
485
+ batch_normalize=1
486
+ filters=320
487
+ size=1
488
+ stride=1
489
+ pad=1
490
+ activation=silu
491
+
492
+ # Residual Block
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=320
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=320
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=320
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=320
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=320
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=320
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=320
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=320
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=320
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=320
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ [convolutional]
595
+ batch_normalize=1
596
+ filters=320
597
+ size=1
598
+ stride=1
599
+ pad=1
600
+ activation=silu
601
+
602
+ [convolutional]
603
+ batch_normalize=1
604
+ filters=320
605
+ size=3
606
+ stride=1
607
+ pad=1
608
+ activation=silu
609
+
610
+ [shortcut]
611
+ from=-3
612
+ activation=linear
613
+
614
+ [convolutional]
615
+ batch_normalize=1
616
+ filters=320
617
+ size=1
618
+ stride=1
619
+ pad=1
620
+ activation=silu
621
+
622
+ [convolutional]
623
+ batch_normalize=1
624
+ filters=320
625
+ size=3
626
+ stride=1
627
+ pad=1
628
+ activation=silu
629
+
630
+ [shortcut]
631
+ from=-3
632
+ activation=linear
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=320
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [convolutional]
643
+ batch_normalize=1
644
+ filters=320
645
+ size=3
646
+ stride=1
647
+ pad=1
648
+ activation=silu
649
+
650
+ [shortcut]
651
+ from=-3
652
+ activation=linear
653
+
654
+ [convolutional]
655
+ batch_normalize=1
656
+ filters=320
657
+ size=1
658
+ stride=1
659
+ pad=1
660
+ activation=silu
661
+
662
+ [convolutional]
663
+ batch_normalize=1
664
+ filters=320
665
+ size=3
666
+ stride=1
667
+ pad=1
668
+ activation=silu
669
+
670
+ [shortcut]
671
+ from=-3
672
+ activation=linear
673
+
674
+ [convolutional]
675
+ batch_normalize=1
676
+ filters=320
677
+ size=1
678
+ stride=1
679
+ pad=1
680
+ activation=silu
681
+
682
+ [convolutional]
683
+ batch_normalize=1
684
+ filters=320
685
+ size=3
686
+ stride=1
687
+ pad=1
688
+ activation=silu
689
+
690
+ [shortcut]
691
+ from=-3
692
+ activation=linear
693
+
694
+ # Transition first
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=320
699
+ size=1
700
+ stride=1
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Merge [-1 -(3k+4)]
705
+
706
+ [route]
707
+ layers = -1,-34
708
+
709
+ # Transition last
710
+
711
+ # 94 (previous+7+3k)
712
+ [convolutional]
713
+ batch_normalize=1
714
+ filters=640
715
+ size=1
716
+ stride=1
717
+ pad=1
718
+ activation=silu
719
+
720
+ # P5
721
+
722
+ # Downsample
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=1280
727
+ size=3
728
+ stride=2
729
+ pad=1
730
+ activation=silu
731
+
732
+ # Split
733
+
734
+ [convolutional]
735
+ batch_normalize=1
736
+ filters=640
737
+ size=1
738
+ stride=1
739
+ pad=1
740
+ activation=silu
741
+
742
+ [route]
743
+ layers = -2
744
+
745
+ [convolutional]
746
+ batch_normalize=1
747
+ filters=640
748
+ size=1
749
+ stride=1
750
+ pad=1
751
+ activation=silu
752
+
753
+ # Residual Block
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=640
758
+ size=1
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [convolutional]
764
+ batch_normalize=1
765
+ filters=640
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ activation=silu
770
+
771
+ [shortcut]
772
+ from=-3
773
+ activation=linear
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=640
778
+ size=1
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [convolutional]
784
+ batch_normalize=1
785
+ filters=640
786
+ size=3
787
+ stride=1
788
+ pad=1
789
+ activation=silu
790
+
791
+ [shortcut]
792
+ from=-3
793
+ activation=linear
794
+
795
+ [convolutional]
796
+ batch_normalize=1
797
+ filters=640
798
+ size=1
799
+ stride=1
800
+ pad=1
801
+ activation=silu
802
+
803
+ [convolutional]
804
+ batch_normalize=1
805
+ filters=640
806
+ size=3
807
+ stride=1
808
+ pad=1
809
+ activation=silu
810
+
811
+ [shortcut]
812
+ from=-3
813
+ activation=linear
814
+
815
+ [convolutional]
816
+ batch_normalize=1
817
+ filters=640
818
+ size=1
819
+ stride=1
820
+ pad=1
821
+ activation=silu
822
+
823
+ [convolutional]
824
+ batch_normalize=1
825
+ filters=640
826
+ size=3
827
+ stride=1
828
+ pad=1
829
+ activation=silu
830
+
831
+ [shortcut]
832
+ from=-3
833
+ activation=linear
834
+
835
+ [convolutional]
836
+ batch_normalize=1
837
+ filters=640
838
+ size=1
839
+ stride=1
840
+ pad=1
841
+ activation=silu
842
+
843
+ [convolutional]
844
+ batch_normalize=1
845
+ filters=640
846
+ size=3
847
+ stride=1
848
+ pad=1
849
+ activation=silu
850
+
851
+ [shortcut]
852
+ from=-3
853
+ activation=linear
854
+
855
+ # Transition first
856
+
857
+ [convolutional]
858
+ batch_normalize=1
859
+ filters=640
860
+ size=1
861
+ stride=1
862
+ pad=1
863
+ activation=silu
864
+
865
+ # Merge [-1 -(3k+4)]
866
+
867
+ [route]
868
+ layers = -1,-19
869
+
870
+ # Transition last
871
+
872
+ # 116 (previous+7+3k)
873
+ [convolutional]
874
+ batch_normalize=1
875
+ filters=1280
876
+ size=1
877
+ stride=1
878
+ pad=1
879
+ activation=silu
880
+
881
+ # ============ End of Backbone ============ #
882
+
883
+ # ============ Neck ============ #
884
+
885
+ # CSPSPP
886
+
887
+ [convolutional]
888
+ batch_normalize=1
889
+ filters=640
890
+ size=1
891
+ stride=1
892
+ pad=1
893
+ activation=silu
894
+
895
+ [route]
896
+ layers = -2
897
+
898
+ [convolutional]
899
+ batch_normalize=1
900
+ filters=640
901
+ size=1
902
+ stride=1
903
+ pad=1
904
+ activation=silu
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ size=3
909
+ stride=1
910
+ pad=1
911
+ filters=640
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ filters=640
917
+ size=1
918
+ stride=1
919
+ pad=1
920
+ activation=silu
921
+
922
+ ### SPP ###
923
+ [maxpool]
924
+ stride=1
925
+ size=5
926
+
927
+ [route]
928
+ layers=-2
929
+
930
+ [maxpool]
931
+ stride=1
932
+ size=9
933
+
934
+ [route]
935
+ layers=-4
936
+
937
+ [maxpool]
938
+ stride=1
939
+ size=13
940
+
941
+ [route]
942
+ layers=-1,-3,-5,-6
943
+ ### End SPP ###
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=640
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [convolutional]
954
+ batch_normalize=1
955
+ size=3
956
+ stride=1
957
+ pad=1
958
+ filters=640
959
+ activation=silu
960
+
961
+ [convolutional]
962
+ batch_normalize=1
963
+ filters=640
964
+ size=1
965
+ stride=1
966
+ pad=1
967
+ activation=silu
968
+
969
+ [convolutional]
970
+ batch_normalize=1
971
+ size=3
972
+ stride=1
973
+ pad=1
974
+ filters=640
975
+ activation=silu
976
+
977
+ [route]
978
+ layers = -1, -15
979
+
980
+ # 133 (previous+6+5+2k)
981
+ [convolutional]
982
+ batch_normalize=1
983
+ filters=640
984
+ size=1
985
+ stride=1
986
+ pad=1
987
+ activation=silu
988
+
989
+ # End of CSPSPP
990
+
991
+
992
+ # FPN-4
993
+
994
+ [convolutional]
995
+ batch_normalize=1
996
+ filters=320
997
+ size=1
998
+ stride=1
999
+ pad=1
1000
+ activation=silu
1001
+
1002
+ [upsample]
1003
+ stride=2
1004
+
1005
+ [route]
1006
+ layers = 94
1007
+
1008
+ [convolutional]
1009
+ batch_normalize=1
1010
+ filters=320
1011
+ size=1
1012
+ stride=1
1013
+ pad=1
1014
+ activation=silu
1015
+
1016
+ [route]
1017
+ layers = -1, -3
1018
+
1019
+ [convolutional]
1020
+ batch_normalize=1
1021
+ filters=320
1022
+ size=1
1023
+ stride=1
1024
+ pad=1
1025
+ activation=silu
1026
+
1027
+ # Split
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ filters=320
1032
+ size=1
1033
+ stride=1
1034
+ pad=1
1035
+ activation=silu
1036
+
1037
+ [route]
1038
+ layers = -2
1039
+
1040
+ # Plain Block
1041
+
1042
+ [convolutional]
1043
+ batch_normalize=1
1044
+ filters=320
1045
+ size=1
1046
+ stride=1
1047
+ pad=1
1048
+ activation=silu
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ size=3
1053
+ stride=1
1054
+ pad=1
1055
+ filters=320
1056
+ activation=silu
1057
+
1058
+ [convolutional]
1059
+ batch_normalize=1
1060
+ filters=320
1061
+ size=1
1062
+ stride=1
1063
+ pad=1
1064
+ activation=silu
1065
+
1066
+ [convolutional]
1067
+ batch_normalize=1
1068
+ size=3
1069
+ stride=1
1070
+ pad=1
1071
+ filters=320
1072
+ activation=silu
1073
+
1074
+ [convolutional]
1075
+ batch_normalize=1
1076
+ filters=320
1077
+ size=1
1078
+ stride=1
1079
+ pad=1
1080
+ activation=silu
1081
+
1082
+ [convolutional]
1083
+ batch_normalize=1
1084
+ size=3
1085
+ stride=1
1086
+ pad=1
1087
+ filters=320
1088
+ activation=silu
1089
+
1090
+ # Merge [-1, -(2k+2)]
1091
+
1092
+ [route]
1093
+ layers = -1, -8
1094
+
1095
+ # Transition last
1096
+
1097
+ # 149 (previous+6+4+2k)
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ filters=320
1101
+ size=1
1102
+ stride=1
1103
+ pad=1
1104
+ activation=silu
1105
+
1106
+
1107
+ # FPN-3
1108
+
1109
+ [convolutional]
1110
+ batch_normalize=1
1111
+ filters=160
1112
+ size=1
1113
+ stride=1
1114
+ pad=1
1115
+ activation=silu
1116
+
1117
+ [upsample]
1118
+ stride=2
1119
+
1120
+ [route]
1121
+ layers = 57
1122
+
1123
+ [convolutional]
1124
+ batch_normalize=1
1125
+ filters=160
1126
+ size=1
1127
+ stride=1
1128
+ pad=1
1129
+ activation=silu
1130
+
1131
+ [route]
1132
+ layers = -1, -3
1133
+
1134
+ [convolutional]
1135
+ batch_normalize=1
1136
+ filters=160
1137
+ size=1
1138
+ stride=1
1139
+ pad=1
1140
+ activation=silu
1141
+
1142
+ # Split
1143
+
1144
+ [convolutional]
1145
+ batch_normalize=1
1146
+ filters=160
1147
+ size=1
1148
+ stride=1
1149
+ pad=1
1150
+ activation=silu
1151
+
1152
+ [route]
1153
+ layers = -2
1154
+
1155
+ # Plain Block
1156
+
1157
+ [convolutional]
1158
+ batch_normalize=1
1159
+ filters=160
1160
+ size=1
1161
+ stride=1
1162
+ pad=1
1163
+ activation=silu
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ size=3
1168
+ stride=1
1169
+ pad=1
1170
+ filters=160
1171
+ activation=silu
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=160
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=160
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=160
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=160
1203
+ activation=silu
1204
+
1205
+ # Merge [-1, -(2k+2)]
1206
+
1207
+ [route]
1208
+ layers = -1, -8
1209
+
1210
+ # Transition last
1211
+
1212
+ # 165 (previous+6+4+2k)
1213
+ [convolutional]
1214
+ batch_normalize=1
1215
+ filters=160
1216
+ size=1
1217
+ stride=1
1218
+ pad=1
1219
+ activation=silu
1220
+
1221
+
1222
+ # PAN-4
1223
+
1224
+ [convolutional]
1225
+ batch_normalize=1
1226
+ size=3
1227
+ stride=2
1228
+ pad=1
1229
+ filters=320
1230
+ activation=silu
1231
+
1232
+ [route]
1233
+ layers = -1, 149
1234
+
1235
+ [convolutional]
1236
+ batch_normalize=1
1237
+ filters=320
1238
+ size=1
1239
+ stride=1
1240
+ pad=1
1241
+ activation=silu
1242
+
1243
+ # Split
1244
+
1245
+ [convolutional]
1246
+ batch_normalize=1
1247
+ filters=320
1248
+ size=1
1249
+ stride=1
1250
+ pad=1
1251
+ activation=silu
1252
+
1253
+ [route]
1254
+ layers = -2
1255
+
1256
+ # Plain Block
1257
+
1258
+ [convolutional]
1259
+ batch_normalize=1
1260
+ filters=320
1261
+ size=1
1262
+ stride=1
1263
+ pad=1
1264
+ activation=silu
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ size=3
1269
+ stride=1
1270
+ pad=1
1271
+ filters=320
1272
+ activation=silu
1273
+
1274
+ [convolutional]
1275
+ batch_normalize=1
1276
+ filters=320
1277
+ size=1
1278
+ stride=1
1279
+ pad=1
1280
+ activation=silu
1281
+
1282
+ [convolutional]
1283
+ batch_normalize=1
1284
+ size=3
1285
+ stride=1
1286
+ pad=1
1287
+ filters=320
1288
+ activation=silu
1289
+
1290
+ [convolutional]
1291
+ batch_normalize=1
1292
+ filters=320
1293
+ size=1
1294
+ stride=1
1295
+ pad=1
1296
+ activation=silu
1297
+
1298
+ [convolutional]
1299
+ batch_normalize=1
1300
+ size=3
1301
+ stride=1
1302
+ pad=1
1303
+ filters=320
1304
+ activation=silu
1305
+
1306
+ [route]
1307
+ layers = -1,-8
1308
+
1309
+ # Transition last
1310
+
1311
+ # 178 (previous+3+4+2k)
1312
+ [convolutional]
1313
+ batch_normalize=1
1314
+ filters=320
1315
+ size=1
1316
+ stride=1
1317
+ pad=1
1318
+ activation=silu
1319
+
1320
+
1321
+ # PAN-5
1322
+
1323
+ [convolutional]
1324
+ batch_normalize=1
1325
+ size=3
1326
+ stride=2
1327
+ pad=1
1328
+ filters=640
1329
+ activation=silu
1330
+
1331
+ [route]
1332
+ layers = -1, 133
1333
+
1334
+ [convolutional]
1335
+ batch_normalize=1
1336
+ filters=640
1337
+ size=1
1338
+ stride=1
1339
+ pad=1
1340
+ activation=silu
1341
+
1342
+ # Split
1343
+
1344
+ [convolutional]
1345
+ batch_normalize=1
1346
+ filters=640
1347
+ size=1
1348
+ stride=1
1349
+ pad=1
1350
+ activation=silu
1351
+
1352
+ [route]
1353
+ layers = -2
1354
+
1355
+ # Plain Block
1356
+
1357
+ [convolutional]
1358
+ batch_normalize=1
1359
+ filters=640
1360
+ size=1
1361
+ stride=1
1362
+ pad=1
1363
+ activation=silu
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ size=3
1368
+ stride=1
1369
+ pad=1
1370
+ filters=640
1371
+ activation=silu
1372
+
1373
+ [convolutional]
1374
+ batch_normalize=1
1375
+ filters=640
1376
+ size=1
1377
+ stride=1
1378
+ pad=1
1379
+ activation=silu
1380
+
1381
+ [convolutional]
1382
+ batch_normalize=1
1383
+ size=3
1384
+ stride=1
1385
+ pad=1
1386
+ filters=640
1387
+ activation=silu
1388
+
1389
+ [convolutional]
1390
+ batch_normalize=1
1391
+ filters=640
1392
+ size=1
1393
+ stride=1
1394
+ pad=1
1395
+ activation=silu
1396
+
1397
+ [convolutional]
1398
+ batch_normalize=1
1399
+ size=3
1400
+ stride=1
1401
+ pad=1
1402
+ filters=640
1403
+ activation=silu
1404
+
1405
+ [route]
1406
+ layers = -1,-8
1407
+
1408
+ # Transition last
1409
+
1410
+ # 191 (previous+3+4+2k)
1411
+ [convolutional]
1412
+ batch_normalize=1
1413
+ filters=640
1414
+ size=1
1415
+ stride=1
1416
+ pad=1
1417
+ activation=silu
1418
+
1419
+ # ============ End of Neck ============ #
1420
+
1421
+ # 192
1422
+ [implicit_add]
1423
+ filters=320
1424
+
1425
+ # 193
1426
+ [implicit_add]
1427
+ filters=640
1428
+
1429
+ # 194
1430
+ [implicit_add]
1431
+ filters=1280
1432
+
1433
+ # 195
1434
+ [implicit_mul]
1435
+ filters=255
1436
+
1437
+ # 196
1438
+ [implicit_mul]
1439
+ filters=255
1440
+
1441
+ # 197
1442
+ [implicit_mul]
1443
+ filters=255
1444
+
1445
+ # ============ Head ============ #
1446
+
1447
+ # YOLO-3
1448
+
1449
+ [route]
1450
+ layers = 165
1451
+
1452
+ [convolutional]
1453
+ batch_normalize=1
1454
+ size=3
1455
+ stride=1
1456
+ pad=1
1457
+ filters=320
1458
+ activation=silu
1459
+
1460
+ [shift_channels]
1461
+ from=192
1462
+
1463
+ [convolutional]
1464
+ size=1
1465
+ stride=1
1466
+ pad=1
1467
+ filters=255
1468
+ activation=linear
1469
+
1470
+ [control_channels]
1471
+ from=195
1472
+
1473
+ [yolo]
1474
+ mask = 0,1,2
1475
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1476
+ classes=80
1477
+ num=9
1478
+ jitter=.3
1479
+ ignore_thresh = .7
1480
+ truth_thresh = 1
1481
+ random=1
1482
+ scale_x_y = 1.05
1483
+ iou_thresh=0.213
1484
+ cls_normalizer=1.0
1485
+ iou_normalizer=0.07
1486
+ iou_loss=ciou
1487
+ nms_kind=greedynms
1488
+ beta_nms=0.6
1489
+
1490
+
1491
+ # YOLO-4
1492
+
1493
+ [route]
1494
+ layers = 178
1495
+
1496
+ [convolutional]
1497
+ batch_normalize=1
1498
+ size=3
1499
+ stride=1
1500
+ pad=1
1501
+ filters=640
1502
+ activation=silu
1503
+
1504
+ [shift_channels]
1505
+ from=193
1506
+
1507
+ [convolutional]
1508
+ size=1
1509
+ stride=1
1510
+ pad=1
1511
+ filters=255
1512
+ activation=linear
1513
+
1514
+ [control_channels]
1515
+ from=196
1516
+
1517
+ [yolo]
1518
+ mask = 3,4,5
1519
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1520
+ classes=80
1521
+ num=9
1522
+ jitter=.3
1523
+ ignore_thresh = .7
1524
+ truth_thresh = 1
1525
+ random=1
1526
+ scale_x_y = 1.05
1527
+ iou_thresh=0.213
1528
+ cls_normalizer=1.0
1529
+ iou_normalizer=0.07
1530
+ iou_loss=ciou
1531
+ nms_kind=greedynms
1532
+ beta_nms=0.6
1533
+
1534
+
1535
+ # YOLO-5
1536
+
1537
+ [route]
1538
+ layers = 191
1539
+
1540
+ [convolutional]
1541
+ batch_normalize=1
1542
+ size=3
1543
+ stride=1
1544
+ pad=1
1545
+ filters=1280
1546
+ activation=silu
1547
+
1548
+ [shift_channels]
1549
+ from=194
1550
+
1551
+ [convolutional]
1552
+ size=1
1553
+ stride=1
1554
+ pad=1
1555
+ filters=255
1556
+ activation=linear
1557
+
1558
+ [control_channels]
1559
+ from=197
1560
+
1561
+ [yolo]
1562
+ mask = 6,7,8
1563
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1564
+ classes=80
1565
+ num=9
1566
+ jitter=.3
1567
+ ignore_thresh = .7
1568
+ truth_thresh = 1
1569
+ random=1
1570
+ scale_x_y = 1.05
1571
+ iou_thresh=0.213
1572
+ cls_normalizer=1.0
1573
+ iou_normalizer=0.07
1574
+ iou_loss=ciou
1575
+ nms_kind=greedynms
1576
+ beta_nms=0.6
cfg/yolor_p6.cfg ADDED
@@ -0,0 +1,1760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=1280
5
+ height=1280
6
+ channels=3
7
+ momentum=0.949
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.00261
15
+ burn_in=1000
16
+ max_batches = 500500
17
+ policy=steps
18
+ steps=400000,450000
19
+ scales=.1,.1
20
+
21
+ mosaic=1
22
+
23
+
24
+ # ============ Backbone ============ #
25
+
26
+ # Stem
27
+
28
+ # P1
29
+
30
+ # Downsample
31
+
32
+ # 0
33
+ [reorg]
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=1
40
+ pad=1
41
+ activation=silu
42
+
43
+
44
+ # P2
45
+
46
+ # Downsample
47
+
48
+ [convolutional]
49
+ batch_normalize=1
50
+ filters=128
51
+ size=3
52
+ stride=2
53
+ pad=1
54
+ activation=silu
55
+
56
+ # Split
57
+
58
+ [convolutional]
59
+ batch_normalize=1
60
+ filters=64
61
+ size=1
62
+ stride=1
63
+ pad=1
64
+ activation=silu
65
+
66
+ [route]
67
+ layers = -2
68
+
69
+ [convolutional]
70
+ batch_normalize=1
71
+ filters=64
72
+ size=1
73
+ stride=1
74
+ pad=1
75
+ activation=silu
76
+
77
+ # Residual Block
78
+
79
+ [convolutional]
80
+ batch_normalize=1
81
+ filters=64
82
+ size=1
83
+ stride=1
84
+ pad=1
85
+ activation=silu
86
+
87
+ [convolutional]
88
+ batch_normalize=1
89
+ filters=64
90
+ size=3
91
+ stride=1
92
+ pad=1
93
+ activation=silu
94
+
95
+ [shortcut]
96
+ from=-3
97
+ activation=linear
98
+
99
+ [convolutional]
100
+ batch_normalize=1
101
+ filters=64
102
+ size=1
103
+ stride=1
104
+ pad=1
105
+ activation=silu
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ filters=64
110
+ size=3
111
+ stride=1
112
+ pad=1
113
+ activation=silu
114
+
115
+ [shortcut]
116
+ from=-3
117
+ activation=linear
118
+
119
+ [convolutional]
120
+ batch_normalize=1
121
+ filters=64
122
+ size=1
123
+ stride=1
124
+ pad=1
125
+ activation=silu
126
+
127
+ [convolutional]
128
+ batch_normalize=1
129
+ filters=64
130
+ size=3
131
+ stride=1
132
+ pad=1
133
+ activation=silu
134
+
135
+ [shortcut]
136
+ from=-3
137
+ activation=linear
138
+
139
+ # Transition first
140
+ #
141
+ #[convolutional]
142
+ #batch_normalize=1
143
+ #filters=64
144
+ #size=1
145
+ #stride=1
146
+ #pad=1
147
+ #activation=silu
148
+
149
+ # Merge [-1, -(3k+3)]
150
+
151
+ [route]
152
+ layers = -1,-12
153
+
154
+ # Transition last
155
+
156
+ # 16 (previous+6+3k)
157
+ [convolutional]
158
+ batch_normalize=1
159
+ filters=128
160
+ size=1
161
+ stride=1
162
+ pad=1
163
+ activation=silu
164
+
165
+
166
+ # P3
167
+
168
+ # Downsample
169
+
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=256
173
+ size=3
174
+ stride=2
175
+ pad=1
176
+ activation=silu
177
+
178
+ # Split
179
+
180
+ [convolutional]
181
+ batch_normalize=1
182
+ filters=128
183
+ size=1
184
+ stride=1
185
+ pad=1
186
+ activation=silu
187
+
188
+ [route]
189
+ layers = -2
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=128
194
+ size=1
195
+ stride=1
196
+ pad=1
197
+ activation=silu
198
+
199
+ # Residual Block
200
+
201
+ [convolutional]
202
+ batch_normalize=1
203
+ filters=128
204
+ size=1
205
+ stride=1
206
+ pad=1
207
+ activation=silu
208
+
209
+ [convolutional]
210
+ batch_normalize=1
211
+ filters=128
212
+ size=3
213
+ stride=1
214
+ pad=1
215
+ activation=silu
216
+
217
+ [shortcut]
218
+ from=-3
219
+ activation=linear
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=1
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [convolutional]
230
+ batch_normalize=1
231
+ filters=128
232
+ size=3
233
+ stride=1
234
+ pad=1
235
+ activation=silu
236
+
237
+ [shortcut]
238
+ from=-3
239
+ activation=linear
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=1
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [convolutional]
250
+ batch_normalize=1
251
+ filters=128
252
+ size=3
253
+ stride=1
254
+ pad=1
255
+ activation=silu
256
+
257
+ [shortcut]
258
+ from=-3
259
+ activation=linear
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=1
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [convolutional]
270
+ batch_normalize=1
271
+ filters=128
272
+ size=3
273
+ stride=1
274
+ pad=1
275
+ activation=silu
276
+
277
+ [shortcut]
278
+ from=-3
279
+ activation=linear
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=1
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [convolutional]
290
+ batch_normalize=1
291
+ filters=128
292
+ size=3
293
+ stride=1
294
+ pad=1
295
+ activation=silu
296
+
297
+ [shortcut]
298
+ from=-3
299
+ activation=linear
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=1
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [convolutional]
310
+ batch_normalize=1
311
+ filters=128
312
+ size=3
313
+ stride=1
314
+ pad=1
315
+ activation=silu
316
+
317
+ [shortcut]
318
+ from=-3
319
+ activation=linear
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=1
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [convolutional]
330
+ batch_normalize=1
331
+ filters=128
332
+ size=3
333
+ stride=1
334
+ pad=1
335
+ activation=silu
336
+
337
+ [shortcut]
338
+ from=-3
339
+ activation=linear
340
+
341
+ # Transition first
342
+ #
343
+ #[convolutional]
344
+ #batch_normalize=1
345
+ #filters=128
346
+ #size=1
347
+ #stride=1
348
+ #pad=1
349
+ #activation=silu
350
+
351
+ # Merge [-1, -(3k+3)]
352
+
353
+ [route]
354
+ layers = -1,-24
355
+
356
+ # Transition last
357
+
358
+ # 43 (previous+6+3k)
359
+ [convolutional]
360
+ batch_normalize=1
361
+ filters=256
362
+ size=1
363
+ stride=1
364
+ pad=1
365
+ activation=silu
366
+
367
+
368
+ # P4
369
+
370
+ # Downsample
371
+
372
+ [convolutional]
373
+ batch_normalize=1
374
+ filters=384
375
+ size=3
376
+ stride=2
377
+ pad=1
378
+ activation=silu
379
+
380
+ # Split
381
+
382
+ [convolutional]
383
+ batch_normalize=1
384
+ filters=192
385
+ size=1
386
+ stride=1
387
+ pad=1
388
+ activation=silu
389
+
390
+ [route]
391
+ layers = -2
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=192
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ # Residual Block
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=192
406
+ size=1
407
+ stride=1
408
+ pad=1
409
+ activation=silu
410
+
411
+ [convolutional]
412
+ batch_normalize=1
413
+ filters=192
414
+ size=3
415
+ stride=1
416
+ pad=1
417
+ activation=silu
418
+
419
+ [shortcut]
420
+ from=-3
421
+ activation=linear
422
+
423
+ [convolutional]
424
+ batch_normalize=1
425
+ filters=192
426
+ size=1
427
+ stride=1
428
+ pad=1
429
+ activation=silu
430
+
431
+ [convolutional]
432
+ batch_normalize=1
433
+ filters=192
434
+ size=3
435
+ stride=1
436
+ pad=1
437
+ activation=silu
438
+
439
+ [shortcut]
440
+ from=-3
441
+ activation=linear
442
+
443
+ [convolutional]
444
+ batch_normalize=1
445
+ filters=192
446
+ size=1
447
+ stride=1
448
+ pad=1
449
+ activation=silu
450
+
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=192
454
+ size=3
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ [shortcut]
460
+ from=-3
461
+ activation=linear
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=192
466
+ size=1
467
+ stride=1
468
+ pad=1
469
+ activation=silu
470
+
471
+ [convolutional]
472
+ batch_normalize=1
473
+ filters=192
474
+ size=3
475
+ stride=1
476
+ pad=1
477
+ activation=silu
478
+
479
+ [shortcut]
480
+ from=-3
481
+ activation=linear
482
+
483
+ [convolutional]
484
+ batch_normalize=1
485
+ filters=192
486
+ size=1
487
+ stride=1
488
+ pad=1
489
+ activation=silu
490
+
491
+ [convolutional]
492
+ batch_normalize=1
493
+ filters=192
494
+ size=3
495
+ stride=1
496
+ pad=1
497
+ activation=silu
498
+
499
+ [shortcut]
500
+ from=-3
501
+ activation=linear
502
+
503
+ [convolutional]
504
+ batch_normalize=1
505
+ filters=192
506
+ size=1
507
+ stride=1
508
+ pad=1
509
+ activation=silu
510
+
511
+ [convolutional]
512
+ batch_normalize=1
513
+ filters=192
514
+ size=3
515
+ stride=1
516
+ pad=1
517
+ activation=silu
518
+
519
+ [shortcut]
520
+ from=-3
521
+ activation=linear
522
+
523
+ [convolutional]
524
+ batch_normalize=1
525
+ filters=192
526
+ size=1
527
+ stride=1
528
+ pad=1
529
+ activation=silu
530
+
531
+ [convolutional]
532
+ batch_normalize=1
533
+ filters=192
534
+ size=3
535
+ stride=1
536
+ pad=1
537
+ activation=silu
538
+
539
+ [shortcut]
540
+ from=-3
541
+ activation=linear
542
+
543
+ # Transition first
544
+ #
545
+ #[convolutional]
546
+ #batch_normalize=1
547
+ #filters=192
548
+ #size=1
549
+ #stride=1
550
+ #pad=1
551
+ #activation=silu
552
+
553
+ # Merge [-1, -(3k+3)]
554
+
555
+ [route]
556
+ layers = -1,-24
557
+
558
+ # Transition last
559
+
560
+ # 70 (previous+6+3k)
561
+ [convolutional]
562
+ batch_normalize=1
563
+ filters=384
564
+ size=1
565
+ stride=1
566
+ pad=1
567
+ activation=silu
568
+
569
+
570
+ # P5
571
+
572
+ # Downsample
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=512
577
+ size=3
578
+ stride=2
579
+ pad=1
580
+ activation=silu
581
+
582
+ # Split
583
+
584
+ [convolutional]
585
+ batch_normalize=1
586
+ filters=256
587
+ size=1
588
+ stride=1
589
+ pad=1
590
+ activation=silu
591
+
592
+ [route]
593
+ layers = -2
594
+
595
+ [convolutional]
596
+ batch_normalize=1
597
+ filters=256
598
+ size=1
599
+ stride=1
600
+ pad=1
601
+ activation=silu
602
+
603
+ # Residual Block
604
+
605
+ [convolutional]
606
+ batch_normalize=1
607
+ filters=256
608
+ size=1
609
+ stride=1
610
+ pad=1
611
+ activation=silu
612
+
613
+ [convolutional]
614
+ batch_normalize=1
615
+ filters=256
616
+ size=3
617
+ stride=1
618
+ pad=1
619
+ activation=silu
620
+
621
+ [shortcut]
622
+ from=-3
623
+ activation=linear
624
+
625
+ [convolutional]
626
+ batch_normalize=1
627
+ filters=256
628
+ size=1
629
+ stride=1
630
+ pad=1
631
+ activation=silu
632
+
633
+ [convolutional]
634
+ batch_normalize=1
635
+ filters=256
636
+ size=3
637
+ stride=1
638
+ pad=1
639
+ activation=silu
640
+
641
+ [shortcut]
642
+ from=-3
643
+ activation=linear
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=256
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=3
657
+ stride=1
658
+ pad=1
659
+ activation=silu
660
+
661
+ [shortcut]
662
+ from=-3
663
+ activation=linear
664
+
665
+ # Transition first
666
+ #
667
+ #[convolutional]
668
+ #batch_normalize=1
669
+ #filters=256
670
+ #size=1
671
+ #stride=1
672
+ #pad=1
673
+ #activation=silu
674
+
675
+ # Merge [-1, -(3k+3)]
676
+
677
+ [route]
678
+ layers = -1,-12
679
+
680
+ # Transition last
681
+
682
+ # 85 (previous+6+3k)
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=512
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+
692
+ # P6
693
+
694
+ # Downsample
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=640
699
+ size=3
700
+ stride=2
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Split
705
+
706
+ [convolutional]
707
+ batch_normalize=1
708
+ filters=320
709
+ size=1
710
+ stride=1
711
+ pad=1
712
+ activation=silu
713
+
714
+ [route]
715
+ layers = -2
716
+
717
+ [convolutional]
718
+ batch_normalize=1
719
+ filters=320
720
+ size=1
721
+ stride=1
722
+ pad=1
723
+ activation=silu
724
+
725
+ # Residual Block
726
+
727
+ [convolutional]
728
+ batch_normalize=1
729
+ filters=320
730
+ size=1
731
+ stride=1
732
+ pad=1
733
+ activation=silu
734
+
735
+ [convolutional]
736
+ batch_normalize=1
737
+ filters=320
738
+ size=3
739
+ stride=1
740
+ pad=1
741
+ activation=silu
742
+
743
+ [shortcut]
744
+ from=-3
745
+ activation=linear
746
+
747
+ [convolutional]
748
+ batch_normalize=1
749
+ filters=320
750
+ size=1
751
+ stride=1
752
+ pad=1
753
+ activation=silu
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=320
758
+ size=3
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [shortcut]
764
+ from=-3
765
+ activation=linear
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=320
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=320
778
+ size=3
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [shortcut]
784
+ from=-3
785
+ activation=linear
786
+
787
+ # Transition first
788
+ #
789
+ #[convolutional]
790
+ #batch_normalize=1
791
+ #filters=320
792
+ #size=1
793
+ #stride=1
794
+ #pad=1
795
+ #activation=silu
796
+
797
+ # Merge [-1, -(3k+3)]
798
+
799
+ [route]
800
+ layers = -1,-12
801
+
802
+ # Transition last
803
+
804
+ # 100 (previous+6+3k)
805
+ [convolutional]
806
+ batch_normalize=1
807
+ filters=640
808
+ size=1
809
+ stride=1
810
+ pad=1
811
+ activation=silu
812
+
813
+ # ============ End of Backbone ============ #
814
+
815
+ # ============ Neck ============ #
816
+
817
+ # CSPSPP
818
+
819
+ [convolutional]
820
+ batch_normalize=1
821
+ filters=320
822
+ size=1
823
+ stride=1
824
+ pad=1
825
+ activation=silu
826
+
827
+ [route]
828
+ layers = -2
829
+
830
+ [convolutional]
831
+ batch_normalize=1
832
+ filters=320
833
+ size=1
834
+ stride=1
835
+ pad=1
836
+ activation=silu
837
+
838
+ [convolutional]
839
+ batch_normalize=1
840
+ size=3
841
+ stride=1
842
+ pad=1
843
+ filters=320
844
+ activation=silu
845
+
846
+ [convolutional]
847
+ batch_normalize=1
848
+ filters=320
849
+ size=1
850
+ stride=1
851
+ pad=1
852
+ activation=silu
853
+
854
+ ### SPP ###
855
+ [maxpool]
856
+ stride=1
857
+ size=5
858
+
859
+ [route]
860
+ layers=-2
861
+
862
+ [maxpool]
863
+ stride=1
864
+ size=9
865
+
866
+ [route]
867
+ layers=-4
868
+
869
+ [maxpool]
870
+ stride=1
871
+ size=13
872
+
873
+ [route]
874
+ layers=-1,-3,-5,-6
875
+ ### End SPP ###
876
+
877
+ [convolutional]
878
+ batch_normalize=1
879
+ filters=320
880
+ size=1
881
+ stride=1
882
+ pad=1
883
+ activation=silu
884
+
885
+ [convolutional]
886
+ batch_normalize=1
887
+ size=3
888
+ stride=1
889
+ pad=1
890
+ filters=320
891
+ activation=silu
892
+
893
+ [route]
894
+ layers = -1, -13
895
+
896
+ # 115 (previous+6+5+2k)
897
+ [convolutional]
898
+ batch_normalize=1
899
+ filters=320
900
+ size=1
901
+ stride=1
902
+ pad=1
903
+ activation=silu
904
+
905
+ # End of CSPSPP
906
+
907
+
908
+ # FPN-5
909
+
910
+ [convolutional]
911
+ batch_normalize=1
912
+ filters=256
913
+ size=1
914
+ stride=1
915
+ pad=1
916
+ activation=silu
917
+
918
+ [upsample]
919
+ stride=2
920
+
921
+ [route]
922
+ layers = 85
923
+
924
+ [convolutional]
925
+ batch_normalize=1
926
+ filters=256
927
+ size=1
928
+ stride=1
929
+ pad=1
930
+ activation=silu
931
+
932
+ [route]
933
+ layers = -1, -3
934
+
935
+ [convolutional]
936
+ batch_normalize=1
937
+ filters=256
938
+ size=1
939
+ stride=1
940
+ pad=1
941
+ activation=silu
942
+
943
+ # Split
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=256
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [route]
954
+ layers = -2
955
+
956
+ # Plain Block
957
+
958
+ [convolutional]
959
+ batch_normalize=1
960
+ filters=256
961
+ size=1
962
+ stride=1
963
+ pad=1
964
+ activation=silu
965
+
966
+ [convolutional]
967
+ batch_normalize=1
968
+ size=3
969
+ stride=1
970
+ pad=1
971
+ filters=256
972
+ activation=silu
973
+
974
+ [convolutional]
975
+ batch_normalize=1
976
+ filters=256
977
+ size=1
978
+ stride=1
979
+ pad=1
980
+ activation=silu
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ size=3
985
+ stride=1
986
+ pad=1
987
+ filters=256
988
+ activation=silu
989
+
990
+ [convolutional]
991
+ batch_normalize=1
992
+ filters=256
993
+ size=1
994
+ stride=1
995
+ pad=1
996
+ activation=silu
997
+
998
+ [convolutional]
999
+ batch_normalize=1
1000
+ size=3
1001
+ stride=1
1002
+ pad=1
1003
+ filters=256
1004
+ activation=silu
1005
+
1006
+ # Merge [-1, -(2k+2)]
1007
+
1008
+ [route]
1009
+ layers = -1, -8
1010
+
1011
+ # Transition last
1012
+
1013
+ # 131 (previous+6+4+2k)
1014
+ [convolutional]
1015
+ batch_normalize=1
1016
+ filters=256
1017
+ size=1
1018
+ stride=1
1019
+ pad=1
1020
+ activation=silu
1021
+
1022
+
1023
+ # FPN-4
1024
+
1025
+ [convolutional]
1026
+ batch_normalize=1
1027
+ filters=192
1028
+ size=1
1029
+ stride=1
1030
+ pad=1
1031
+ activation=silu
1032
+
1033
+ [upsample]
1034
+ stride=2
1035
+
1036
+ [route]
1037
+ layers = 70
1038
+
1039
+ [convolutional]
1040
+ batch_normalize=1
1041
+ filters=192
1042
+ size=1
1043
+ stride=1
1044
+ pad=1
1045
+ activation=silu
1046
+
1047
+ [route]
1048
+ layers = -1, -3
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ filters=192
1053
+ size=1
1054
+ stride=1
1055
+ pad=1
1056
+ activation=silu
1057
+
1058
+ # Split
1059
+
1060
+ [convolutional]
1061
+ batch_normalize=1
1062
+ filters=192
1063
+ size=1
1064
+ stride=1
1065
+ pad=1
1066
+ activation=silu
1067
+
1068
+ [route]
1069
+ layers = -2
1070
+
1071
+ # Plain Block
1072
+
1073
+ [convolutional]
1074
+ batch_normalize=1
1075
+ filters=192
1076
+ size=1
1077
+ stride=1
1078
+ pad=1
1079
+ activation=silu
1080
+
1081
+ [convolutional]
1082
+ batch_normalize=1
1083
+ size=3
1084
+ stride=1
1085
+ pad=1
1086
+ filters=192
1087
+ activation=silu
1088
+
1089
+ [convolutional]
1090
+ batch_normalize=1
1091
+ filters=192
1092
+ size=1
1093
+ stride=1
1094
+ pad=1
1095
+ activation=silu
1096
+
1097
+ [convolutional]
1098
+ batch_normalize=1
1099
+ size=3
1100
+ stride=1
1101
+ pad=1
1102
+ filters=192
1103
+ activation=silu
1104
+
1105
+ [convolutional]
1106
+ batch_normalize=1
1107
+ filters=192
1108
+ size=1
1109
+ stride=1
1110
+ pad=1
1111
+ activation=silu
1112
+
1113
+ [convolutional]
1114
+ batch_normalize=1
1115
+ size=3
1116
+ stride=1
1117
+ pad=1
1118
+ filters=192
1119
+ activation=silu
1120
+
1121
+ # Merge [-1, -(2k+2)]
1122
+
1123
+ [route]
1124
+ layers = -1, -8
1125
+
1126
+ # Transition last
1127
+
1128
+ # 147 (previous+6+4+2k)
1129
+ [convolutional]
1130
+ batch_normalize=1
1131
+ filters=192
1132
+ size=1
1133
+ stride=1
1134
+ pad=1
1135
+ activation=silu
1136
+
1137
+
1138
+ # FPN-3
1139
+
1140
+ [convolutional]
1141
+ batch_normalize=1
1142
+ filters=128
1143
+ size=1
1144
+ stride=1
1145
+ pad=1
1146
+ activation=silu
1147
+
1148
+ [upsample]
1149
+ stride=2
1150
+
1151
+ [route]
1152
+ layers = 43
1153
+
1154
+ [convolutional]
1155
+ batch_normalize=1
1156
+ filters=128
1157
+ size=1
1158
+ stride=1
1159
+ pad=1
1160
+ activation=silu
1161
+
1162
+ [route]
1163
+ layers = -1, -3
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ filters=128
1168
+ size=1
1169
+ stride=1
1170
+ pad=1
1171
+ activation=silu
1172
+
1173
+ # Split
1174
+
1175
+ [convolutional]
1176
+ batch_normalize=1
1177
+ filters=128
1178
+ size=1
1179
+ stride=1
1180
+ pad=1
1181
+ activation=silu
1182
+
1183
+ [route]
1184
+ layers = -2
1185
+
1186
+ # Plain Block
1187
+
1188
+ [convolutional]
1189
+ batch_normalize=1
1190
+ filters=128
1191
+ size=1
1192
+ stride=1
1193
+ pad=1
1194
+ activation=silu
1195
+
1196
+ [convolutional]
1197
+ batch_normalize=1
1198
+ size=3
1199
+ stride=1
1200
+ pad=1
1201
+ filters=128
1202
+ activation=silu
1203
+
1204
+ [convolutional]
1205
+ batch_normalize=1
1206
+ filters=128
1207
+ size=1
1208
+ stride=1
1209
+ pad=1
1210
+ activation=silu
1211
+
1212
+ [convolutional]
1213
+ batch_normalize=1
1214
+ size=3
1215
+ stride=1
1216
+ pad=1
1217
+ filters=128
1218
+ activation=silu
1219
+
1220
+ [convolutional]
1221
+ batch_normalize=1
1222
+ filters=128
1223
+ size=1
1224
+ stride=1
1225
+ pad=1
1226
+ activation=silu
1227
+
1228
+ [convolutional]
1229
+ batch_normalize=1
1230
+ size=3
1231
+ stride=1
1232
+ pad=1
1233
+ filters=128
1234
+ activation=silu
1235
+
1236
+ # Merge [-1, -(2k+2)]
1237
+
1238
+ [route]
1239
+ layers = -1, -8
1240
+
1241
+ # Transition last
1242
+
1243
+ # 163 (previous+6+4+2k)
1244
+ [convolutional]
1245
+ batch_normalize=1
1246
+ filters=128
1247
+ size=1
1248
+ stride=1
1249
+ pad=1
1250
+ activation=silu
1251
+
1252
+
1253
+ # PAN-4
1254
+
1255
+ [convolutional]
1256
+ batch_normalize=1
1257
+ size=3
1258
+ stride=2
1259
+ pad=1
1260
+ filters=192
1261
+ activation=silu
1262
+
1263
+ [route]
1264
+ layers = -1, 147
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ filters=192
1269
+ size=1
1270
+ stride=1
1271
+ pad=1
1272
+ activation=silu
1273
+
1274
+ # Split
1275
+
1276
+ [convolutional]
1277
+ batch_normalize=1
1278
+ filters=192
1279
+ size=1
1280
+ stride=1
1281
+ pad=1
1282
+ activation=silu
1283
+
1284
+ [route]
1285
+ layers = -2
1286
+
1287
+ # Plain Block
1288
+
1289
+ [convolutional]
1290
+ batch_normalize=1
1291
+ filters=192
1292
+ size=1
1293
+ stride=1
1294
+ pad=1
1295
+ activation=silu
1296
+
1297
+ [convolutional]
1298
+ batch_normalize=1
1299
+ size=3
1300
+ stride=1
1301
+ pad=1
1302
+ filters=192
1303
+ activation=silu
1304
+
1305
+ [convolutional]
1306
+ batch_normalize=1
1307
+ filters=192
1308
+ size=1
1309
+ stride=1
1310
+ pad=1
1311
+ activation=silu
1312
+
1313
+ [convolutional]
1314
+ batch_normalize=1
1315
+ size=3
1316
+ stride=1
1317
+ pad=1
1318
+ filters=192
1319
+ activation=silu
1320
+
1321
+ [convolutional]
1322
+ batch_normalize=1
1323
+ filters=192
1324
+ size=1
1325
+ stride=1
1326
+ pad=1
1327
+ activation=silu
1328
+
1329
+ [convolutional]
1330
+ batch_normalize=1
1331
+ size=3
1332
+ stride=1
1333
+ pad=1
1334
+ filters=192
1335
+ activation=silu
1336
+
1337
+ [route]
1338
+ layers = -1,-8
1339
+
1340
+ # Transition last
1341
+
1342
+ # 176 (previous+3+4+2k)
1343
+ [convolutional]
1344
+ batch_normalize=1
1345
+ filters=192
1346
+ size=1
1347
+ stride=1
1348
+ pad=1
1349
+ activation=silu
1350
+
1351
+
1352
+ # PAN-5
1353
+
1354
+ [convolutional]
1355
+ batch_normalize=1
1356
+ size=3
1357
+ stride=2
1358
+ pad=1
1359
+ filters=256
1360
+ activation=silu
1361
+
1362
+ [route]
1363
+ layers = -1, 131
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ filters=256
1368
+ size=1
1369
+ stride=1
1370
+ pad=1
1371
+ activation=silu
1372
+
1373
+ # Split
1374
+
1375
+ [convolutional]
1376
+ batch_normalize=1
1377
+ filters=256
1378
+ size=1
1379
+ stride=1
1380
+ pad=1
1381
+ activation=silu
1382
+
1383
+ [route]
1384
+ layers = -2
1385
+
1386
+ # Plain Block
1387
+
1388
+ [convolutional]
1389
+ batch_normalize=1
1390
+ filters=256
1391
+ size=1
1392
+ stride=1
1393
+ pad=1
1394
+ activation=silu
1395
+
1396
+ [convolutional]
1397
+ batch_normalize=1
1398
+ size=3
1399
+ stride=1
1400
+ pad=1
1401
+ filters=256
1402
+ activation=silu
1403
+
1404
+ [convolutional]
1405
+ batch_normalize=1
1406
+ filters=256
1407
+ size=1
1408
+ stride=1
1409
+ pad=1
1410
+ activation=silu
1411
+
1412
+ [convolutional]
1413
+ batch_normalize=1
1414
+ size=3
1415
+ stride=1
1416
+ pad=1
1417
+ filters=256
1418
+ activation=silu
1419
+
1420
+ [convolutional]
1421
+ batch_normalize=1
1422
+ filters=256
1423
+ size=1
1424
+ stride=1
1425
+ pad=1
1426
+ activation=silu
1427
+
1428
+ [convolutional]
1429
+ batch_normalize=1
1430
+ size=3
1431
+ stride=1
1432
+ pad=1
1433
+ filters=256
1434
+ activation=silu
1435
+
1436
+ [route]
1437
+ layers = -1,-8
1438
+
1439
+ # Transition last
1440
+
1441
+ # 189 (previous+3+4+2k)
1442
+ [convolutional]
1443
+ batch_normalize=1
1444
+ filters=256
1445
+ size=1
1446
+ stride=1
1447
+ pad=1
1448
+ activation=silu
1449
+
1450
+
1451
+ # PAN-6
1452
+
1453
+ [convolutional]
1454
+ batch_normalize=1
1455
+ size=3
1456
+ stride=2
1457
+ pad=1
1458
+ filters=320
1459
+ activation=silu
1460
+
1461
+ [route]
1462
+ layers = -1, 115
1463
+
1464
+ [convolutional]
1465
+ batch_normalize=1
1466
+ filters=320
1467
+ size=1
1468
+ stride=1
1469
+ pad=1
1470
+ activation=silu
1471
+
1472
+ # Split
1473
+
1474
+ [convolutional]
1475
+ batch_normalize=1
1476
+ filters=320
1477
+ size=1
1478
+ stride=1
1479
+ pad=1
1480
+ activation=silu
1481
+
1482
+ [route]
1483
+ layers = -2
1484
+
1485
+ # Plain Block
1486
+
1487
+ [convolutional]
1488
+ batch_normalize=1
1489
+ filters=320
1490
+ size=1
1491
+ stride=1
1492
+ pad=1
1493
+ activation=silu
1494
+
1495
+ [convolutional]
1496
+ batch_normalize=1
1497
+ size=3
1498
+ stride=1
1499
+ pad=1
1500
+ filters=320
1501
+ activation=silu
1502
+
1503
+ [convolutional]
1504
+ batch_normalize=1
1505
+ filters=320
1506
+ size=1
1507
+ stride=1
1508
+ pad=1
1509
+ activation=silu
1510
+
1511
+ [convolutional]
1512
+ batch_normalize=1
1513
+ size=3
1514
+ stride=1
1515
+ pad=1
1516
+ filters=320
1517
+ activation=silu
1518
+
1519
+ [convolutional]
1520
+ batch_normalize=1
1521
+ filters=320
1522
+ size=1
1523
+ stride=1
1524
+ pad=1
1525
+ activation=silu
1526
+
1527
+ [convolutional]
1528
+ batch_normalize=1
1529
+ size=3
1530
+ stride=1
1531
+ pad=1
1532
+ filters=320
1533
+ activation=silu
1534
+
1535
+ [route]
1536
+ layers = -1,-8
1537
+
1538
+ # Transition last
1539
+
1540
+ # 202 (previous+3+4+2k)
1541
+ [convolutional]
1542
+ batch_normalize=1
1543
+ filters=320
1544
+ size=1
1545
+ stride=1
1546
+ pad=1
1547
+ activation=silu
1548
+
1549
+ # ============ End of Neck ============ #
1550
+
1551
+ # 203
1552
+ [implicit_add]
1553
+ filters=256
1554
+
1555
+ # 204
1556
+ [implicit_add]
1557
+ filters=384
1558
+
1559
+ # 205
1560
+ [implicit_add]
1561
+ filters=512
1562
+
1563
+ # 206
1564
+ [implicit_add]
1565
+ filters=640
1566
+
1567
+ # 207
1568
+ [implicit_mul]
1569
+ filters=255
1570
+
1571
+ # 208
1572
+ [implicit_mul]
1573
+ filters=255
1574
+
1575
+ # 209
1576
+ [implicit_mul]
1577
+ filters=255
1578
+
1579
+ # 210
1580
+ [implicit_mul]
1581
+ filters=255
1582
+
1583
+ # ============ Head ============ #
1584
+
1585
+ # YOLO-3
1586
+
1587
+ [route]
1588
+ layers = 163
1589
+
1590
+ [convolutional]
1591
+ batch_normalize=1
1592
+ size=3
1593
+ stride=1
1594
+ pad=1
1595
+ filters=256
1596
+ activation=silu
1597
+
1598
+ [shift_channels]
1599
+ from=203
1600
+
1601
+ [convolutional]
1602
+ size=1
1603
+ stride=1
1604
+ pad=1
1605
+ filters=255
1606
+ activation=linear
1607
+
1608
+ [control_channels]
1609
+ from=207
1610
+
1611
+ [yolo]
1612
+ mask = 0,1,2
1613
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1614
+ classes=80
1615
+ num=12
1616
+ jitter=.3
1617
+ ignore_thresh = .7
1618
+ truth_thresh = 1
1619
+ random=1
1620
+ scale_x_y = 1.05
1621
+ iou_thresh=0.213
1622
+ cls_normalizer=1.0
1623
+ iou_normalizer=0.07
1624
+ iou_loss=ciou
1625
+ nms_kind=greedynms
1626
+ beta_nms=0.6
1627
+
1628
+
1629
+ # YOLO-4
1630
+
1631
+ [route]
1632
+ layers = 176
1633
+
1634
+ [convolutional]
1635
+ batch_normalize=1
1636
+ size=3
1637
+ stride=1
1638
+ pad=1
1639
+ filters=384
1640
+ activation=silu
1641
+
1642
+ [shift_channels]
1643
+ from=204
1644
+
1645
+ [convolutional]
1646
+ size=1
1647
+ stride=1
1648
+ pad=1
1649
+ filters=255
1650
+ activation=linear
1651
+
1652
+ [control_channels]
1653
+ from=208
1654
+
1655
+ [yolo]
1656
+ mask = 3,4,5
1657
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1658
+ classes=80
1659
+ num=12
1660
+ jitter=.3
1661
+ ignore_thresh = .7
1662
+ truth_thresh = 1
1663
+ random=1
1664
+ scale_x_y = 1.05
1665
+ iou_thresh=0.213
1666
+ cls_normalizer=1.0
1667
+ iou_normalizer=0.07
1668
+ iou_loss=ciou
1669
+ nms_kind=greedynms
1670
+ beta_nms=0.6
1671
+
1672
+
1673
+ # YOLO-5
1674
+
1675
+ [route]
1676
+ layers = 189
1677
+
1678
+ [convolutional]
1679
+ batch_normalize=1
1680
+ size=3
1681
+ stride=1
1682
+ pad=1
1683
+ filters=512
1684
+ activation=silu
1685
+
1686
+ [shift_channels]
1687
+ from=205
1688
+
1689
+ [convolutional]
1690
+ size=1
1691
+ stride=1
1692
+ pad=1
1693
+ filters=255
1694
+ activation=linear
1695
+
1696
+ [control_channels]
1697
+ from=209
1698
+
1699
+ [yolo]
1700
+ mask = 6,7,8
1701
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1702
+ classes=80
1703
+ num=12
1704
+ jitter=.3
1705
+ ignore_thresh = .7
1706
+ truth_thresh = 1
1707
+ random=1
1708
+ scale_x_y = 1.05
1709
+ iou_thresh=0.213
1710
+ cls_normalizer=1.0
1711
+ iou_normalizer=0.07
1712
+ iou_loss=ciou
1713
+ nms_kind=greedynms
1714
+ beta_nms=0.6
1715
+
1716
+
1717
+ # YOLO-6
1718
+
1719
+ [route]
1720
+ layers = 202
1721
+
1722
+ [convolutional]
1723
+ batch_normalize=1
1724
+ size=3
1725
+ stride=1
1726
+ pad=1
1727
+ filters=640
1728
+ activation=silu
1729
+
1730
+ [shift_channels]
1731
+ from=206
1732
+
1733
+ [convolutional]
1734
+ size=1
1735
+ stride=1
1736
+ pad=1
1737
+ filters=255
1738
+ activation=linear
1739
+
1740
+ [control_channels]
1741
+ from=210
1742
+
1743
+ [yolo]
1744
+ mask = 9,10,11
1745
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1746
+ classes=80
1747
+ num=12
1748
+ jitter=.3
1749
+ ignore_thresh = .7
1750
+ truth_thresh = 1
1751
+ random=1
1752
+ scale_x_y = 1.05
1753
+ iou_thresh=0.213
1754
+ cls_normalizer=1.0
1755
+ iou_normalizer=0.07
1756
+ iou_loss=ciou
1757
+ nms_kind=greedynms
1758
+ beta_nms=0.6
1759
+
1760
+ # ============ End of Head ============ #
cfg/yolor_w6.cfg ADDED
@@ -0,0 +1,1760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=1280
5
+ height=1280
6
+ channels=3
7
+ momentum=0.949
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.00261
15
+ burn_in=1000
16
+ max_batches = 500500
17
+ policy=steps
18
+ steps=400000,450000
19
+ scales=.1,.1
20
+
21
+ mosaic=1
22
+
23
+
24
+ # ============ Backbone ============ #
25
+
26
+ # Stem
27
+
28
+ # P1
29
+
30
+ # Downsample
31
+
32
+ # 0
33
+ [reorg]
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=1
40
+ pad=1
41
+ activation=silu
42
+
43
+
44
+ # P2
45
+
46
+ # Downsample
47
+
48
+ [convolutional]
49
+ batch_normalize=1
50
+ filters=128
51
+ size=3
52
+ stride=2
53
+ pad=1
54
+ activation=silu
55
+
56
+ # Split
57
+
58
+ [convolutional]
59
+ batch_normalize=1
60
+ filters=64
61
+ size=1
62
+ stride=1
63
+ pad=1
64
+ activation=silu
65
+
66
+ [route]
67
+ layers = -2
68
+
69
+ [convolutional]
70
+ batch_normalize=1
71
+ filters=64
72
+ size=1
73
+ stride=1
74
+ pad=1
75
+ activation=silu
76
+
77
+ # Residual Block
78
+
79
+ [convolutional]
80
+ batch_normalize=1
81
+ filters=64
82
+ size=1
83
+ stride=1
84
+ pad=1
85
+ activation=silu
86
+
87
+ [convolutional]
88
+ batch_normalize=1
89
+ filters=64
90
+ size=3
91
+ stride=1
92
+ pad=1
93
+ activation=silu
94
+
95
+ [shortcut]
96
+ from=-3
97
+ activation=linear
98
+
99
+ [convolutional]
100
+ batch_normalize=1
101
+ filters=64
102
+ size=1
103
+ stride=1
104
+ pad=1
105
+ activation=silu
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ filters=64
110
+ size=3
111
+ stride=1
112
+ pad=1
113
+ activation=silu
114
+
115
+ [shortcut]
116
+ from=-3
117
+ activation=linear
118
+
119
+ [convolutional]
120
+ batch_normalize=1
121
+ filters=64
122
+ size=1
123
+ stride=1
124
+ pad=1
125
+ activation=silu
126
+
127
+ [convolutional]
128
+ batch_normalize=1
129
+ filters=64
130
+ size=3
131
+ stride=1
132
+ pad=1
133
+ activation=silu
134
+
135
+ [shortcut]
136
+ from=-3
137
+ activation=linear
138
+
139
+ # Transition first
140
+ #
141
+ #[convolutional]
142
+ #batch_normalize=1
143
+ #filters=64
144
+ #size=1
145
+ #stride=1
146
+ #pad=1
147
+ #activation=silu
148
+
149
+ # Merge [-1, -(3k+3)]
150
+
151
+ [route]
152
+ layers = -1,-12
153
+
154
+ # Transition last
155
+
156
+ # 16 (previous+6+3k)
157
+ [convolutional]
158
+ batch_normalize=1
159
+ filters=128
160
+ size=1
161
+ stride=1
162
+ pad=1
163
+ activation=silu
164
+
165
+
166
+ # P3
167
+
168
+ # Downsample
169
+
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=256
173
+ size=3
174
+ stride=2
175
+ pad=1
176
+ activation=silu
177
+
178
+ # Split
179
+
180
+ [convolutional]
181
+ batch_normalize=1
182
+ filters=128
183
+ size=1
184
+ stride=1
185
+ pad=1
186
+ activation=silu
187
+
188
+ [route]
189
+ layers = -2
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=128
194
+ size=1
195
+ stride=1
196
+ pad=1
197
+ activation=silu
198
+
199
+ # Residual Block
200
+
201
+ [convolutional]
202
+ batch_normalize=1
203
+ filters=128
204
+ size=1
205
+ stride=1
206
+ pad=1
207
+ activation=silu
208
+
209
+ [convolutional]
210
+ batch_normalize=1
211
+ filters=128
212
+ size=3
213
+ stride=1
214
+ pad=1
215
+ activation=silu
216
+
217
+ [shortcut]
218
+ from=-3
219
+ activation=linear
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=1
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [convolutional]
230
+ batch_normalize=1
231
+ filters=128
232
+ size=3
233
+ stride=1
234
+ pad=1
235
+ activation=silu
236
+
237
+ [shortcut]
238
+ from=-3
239
+ activation=linear
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=1
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [convolutional]
250
+ batch_normalize=1
251
+ filters=128
252
+ size=3
253
+ stride=1
254
+ pad=1
255
+ activation=silu
256
+
257
+ [shortcut]
258
+ from=-3
259
+ activation=linear
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=1
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [convolutional]
270
+ batch_normalize=1
271
+ filters=128
272
+ size=3
273
+ stride=1
274
+ pad=1
275
+ activation=silu
276
+
277
+ [shortcut]
278
+ from=-3
279
+ activation=linear
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=1
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [convolutional]
290
+ batch_normalize=1
291
+ filters=128
292
+ size=3
293
+ stride=1
294
+ pad=1
295
+ activation=silu
296
+
297
+ [shortcut]
298
+ from=-3
299
+ activation=linear
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=1
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [convolutional]
310
+ batch_normalize=1
311
+ filters=128
312
+ size=3
313
+ stride=1
314
+ pad=1
315
+ activation=silu
316
+
317
+ [shortcut]
318
+ from=-3
319
+ activation=linear
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=1
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [convolutional]
330
+ batch_normalize=1
331
+ filters=128
332
+ size=3
333
+ stride=1
334
+ pad=1
335
+ activation=silu
336
+
337
+ [shortcut]
338
+ from=-3
339
+ activation=linear
340
+
341
+ # Transition first
342
+ #
343
+ #[convolutional]
344
+ #batch_normalize=1
345
+ #filters=128
346
+ #size=1
347
+ #stride=1
348
+ #pad=1
349
+ #activation=silu
350
+
351
+ # Merge [-1, -(3k+3)]
352
+
353
+ [route]
354
+ layers = -1,-24
355
+
356
+ # Transition last
357
+
358
+ # 43 (previous+6+3k)
359
+ [convolutional]
360
+ batch_normalize=1
361
+ filters=256
362
+ size=1
363
+ stride=1
364
+ pad=1
365
+ activation=silu
366
+
367
+
368
+ # P4
369
+
370
+ # Downsample
371
+
372
+ [convolutional]
373
+ batch_normalize=1
374
+ filters=512
375
+ size=3
376
+ stride=2
377
+ pad=1
378
+ activation=silu
379
+
380
+ # Split
381
+
382
+ [convolutional]
383
+ batch_normalize=1
384
+ filters=256
385
+ size=1
386
+ stride=1
387
+ pad=1
388
+ activation=silu
389
+
390
+ [route]
391
+ layers = -2
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=256
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ # Residual Block
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=256
406
+ size=1
407
+ stride=1
408
+ pad=1
409
+ activation=silu
410
+
411
+ [convolutional]
412
+ batch_normalize=1
413
+ filters=256
414
+ size=3
415
+ stride=1
416
+ pad=1
417
+ activation=silu
418
+
419
+ [shortcut]
420
+ from=-3
421
+ activation=linear
422
+
423
+ [convolutional]
424
+ batch_normalize=1
425
+ filters=256
426
+ size=1
427
+ stride=1
428
+ pad=1
429
+ activation=silu
430
+
431
+ [convolutional]
432
+ batch_normalize=1
433
+ filters=256
434
+ size=3
435
+ stride=1
436
+ pad=1
437
+ activation=silu
438
+
439
+ [shortcut]
440
+ from=-3
441
+ activation=linear
442
+
443
+ [convolutional]
444
+ batch_normalize=1
445
+ filters=256
446
+ size=1
447
+ stride=1
448
+ pad=1
449
+ activation=silu
450
+
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=256
454
+ size=3
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ [shortcut]
460
+ from=-3
461
+ activation=linear
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=256
466
+ size=1
467
+ stride=1
468
+ pad=1
469
+ activation=silu
470
+
471
+ [convolutional]
472
+ batch_normalize=1
473
+ filters=256
474
+ size=3
475
+ stride=1
476
+ pad=1
477
+ activation=silu
478
+
479
+ [shortcut]
480
+ from=-3
481
+ activation=linear
482
+
483
+ [convolutional]
484
+ batch_normalize=1
485
+ filters=256
486
+ size=1
487
+ stride=1
488
+ pad=1
489
+ activation=silu
490
+
491
+ [convolutional]
492
+ batch_normalize=1
493
+ filters=256
494
+ size=3
495
+ stride=1
496
+ pad=1
497
+ activation=silu
498
+
499
+ [shortcut]
500
+ from=-3
501
+ activation=linear
502
+
503
+ [convolutional]
504
+ batch_normalize=1
505
+ filters=256
506
+ size=1
507
+ stride=1
508
+ pad=1
509
+ activation=silu
510
+
511
+ [convolutional]
512
+ batch_normalize=1
513
+ filters=256
514
+ size=3
515
+ stride=1
516
+ pad=1
517
+ activation=silu
518
+
519
+ [shortcut]
520
+ from=-3
521
+ activation=linear
522
+
523
+ [convolutional]
524
+ batch_normalize=1
525
+ filters=256
526
+ size=1
527
+ stride=1
528
+ pad=1
529
+ activation=silu
530
+
531
+ [convolutional]
532
+ batch_normalize=1
533
+ filters=256
534
+ size=3
535
+ stride=1
536
+ pad=1
537
+ activation=silu
538
+
539
+ [shortcut]
540
+ from=-3
541
+ activation=linear
542
+
543
+ # Transition first
544
+ #
545
+ #[convolutional]
546
+ #batch_normalize=1
547
+ #filters=256
548
+ #size=1
549
+ #stride=1
550
+ #pad=1
551
+ #activation=silu
552
+
553
+ # Merge [-1, -(3k+3)]
554
+
555
+ [route]
556
+ layers = -1,-24
557
+
558
+ # Transition last
559
+
560
+ # 70 (previous+6+3k)
561
+ [convolutional]
562
+ batch_normalize=1
563
+ filters=512
564
+ size=1
565
+ stride=1
566
+ pad=1
567
+ activation=silu
568
+
569
+
570
+ # P5
571
+
572
+ # Downsample
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=768
577
+ size=3
578
+ stride=2
579
+ pad=1
580
+ activation=silu
581
+
582
+ # Split
583
+
584
+ [convolutional]
585
+ batch_normalize=1
586
+ filters=384
587
+ size=1
588
+ stride=1
589
+ pad=1
590
+ activation=silu
591
+
592
+ [route]
593
+ layers = -2
594
+
595
+ [convolutional]
596
+ batch_normalize=1
597
+ filters=384
598
+ size=1
599
+ stride=1
600
+ pad=1
601
+ activation=silu
602
+
603
+ # Residual Block
604
+
605
+ [convolutional]
606
+ batch_normalize=1
607
+ filters=384
608
+ size=1
609
+ stride=1
610
+ pad=1
611
+ activation=silu
612
+
613
+ [convolutional]
614
+ batch_normalize=1
615
+ filters=384
616
+ size=3
617
+ stride=1
618
+ pad=1
619
+ activation=silu
620
+
621
+ [shortcut]
622
+ from=-3
623
+ activation=linear
624
+
625
+ [convolutional]
626
+ batch_normalize=1
627
+ filters=384
628
+ size=1
629
+ stride=1
630
+ pad=1
631
+ activation=silu
632
+
633
+ [convolutional]
634
+ batch_normalize=1
635
+ filters=384
636
+ size=3
637
+ stride=1
638
+ pad=1
639
+ activation=silu
640
+
641
+ [shortcut]
642
+ from=-3
643
+ activation=linear
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=384
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=384
656
+ size=3
657
+ stride=1
658
+ pad=1
659
+ activation=silu
660
+
661
+ [shortcut]
662
+ from=-3
663
+ activation=linear
664
+
665
+ # Transition first
666
+ #
667
+ #[convolutional]
668
+ #batch_normalize=1
669
+ #filters=384
670
+ #size=1
671
+ #stride=1
672
+ #pad=1
673
+ #activation=silu
674
+
675
+ # Merge [-1, -(3k+3)]
676
+
677
+ [route]
678
+ layers = -1,-12
679
+
680
+ # Transition last
681
+
682
+ # 85 (previous+6+3k)
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=768
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+
692
+ # P6
693
+
694
+ # Downsample
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=1024
699
+ size=3
700
+ stride=2
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Split
705
+
706
+ [convolutional]
707
+ batch_normalize=1
708
+ filters=512
709
+ size=1
710
+ stride=1
711
+ pad=1
712
+ activation=silu
713
+
714
+ [route]
715
+ layers = -2
716
+
717
+ [convolutional]
718
+ batch_normalize=1
719
+ filters=512
720
+ size=1
721
+ stride=1
722
+ pad=1
723
+ activation=silu
724
+
725
+ # Residual Block
726
+
727
+ [convolutional]
728
+ batch_normalize=1
729
+ filters=512
730
+ size=1
731
+ stride=1
732
+ pad=1
733
+ activation=silu
734
+
735
+ [convolutional]
736
+ batch_normalize=1
737
+ filters=512
738
+ size=3
739
+ stride=1
740
+ pad=1
741
+ activation=silu
742
+
743
+ [shortcut]
744
+ from=-3
745
+ activation=linear
746
+
747
+ [convolutional]
748
+ batch_normalize=1
749
+ filters=512
750
+ size=1
751
+ stride=1
752
+ pad=1
753
+ activation=silu
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=512
758
+ size=3
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [shortcut]
764
+ from=-3
765
+ activation=linear
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=512
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=512
778
+ size=3
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [shortcut]
784
+ from=-3
785
+ activation=linear
786
+
787
+ # Transition first
788
+ #
789
+ #[convolutional]
790
+ #batch_normalize=1
791
+ #filters=512
792
+ #size=1
793
+ #stride=1
794
+ #pad=1
795
+ #activation=silu
796
+
797
+ # Merge [-1, -(3k+3)]
798
+
799
+ [route]
800
+ layers = -1,-12
801
+
802
+ # Transition last
803
+
804
+ # 100 (previous+6+3k)
805
+ [convolutional]
806
+ batch_normalize=1
807
+ filters=1024
808
+ size=1
809
+ stride=1
810
+ pad=1
811
+ activation=silu
812
+
813
+ # ============ End of Backbone ============ #
814
+
815
+ # ============ Neck ============ #
816
+
817
+ # CSPSPP
818
+
819
+ [convolutional]
820
+ batch_normalize=1
821
+ filters=512
822
+ size=1
823
+ stride=1
824
+ pad=1
825
+ activation=silu
826
+
827
+ [route]
828
+ layers = -2
829
+
830
+ [convolutional]
831
+ batch_normalize=1
832
+ filters=512
833
+ size=1
834
+ stride=1
835
+ pad=1
836
+ activation=silu
837
+
838
+ [convolutional]
839
+ batch_normalize=1
840
+ size=3
841
+ stride=1
842
+ pad=1
843
+ filters=512
844
+ activation=silu
845
+
846
+ [convolutional]
847
+ batch_normalize=1
848
+ filters=512
849
+ size=1
850
+ stride=1
851
+ pad=1
852
+ activation=silu
853
+
854
+ ### SPP ###
855
+ [maxpool]
856
+ stride=1
857
+ size=5
858
+
859
+ [route]
860
+ layers=-2
861
+
862
+ [maxpool]
863
+ stride=1
864
+ size=9
865
+
866
+ [route]
867
+ layers=-4
868
+
869
+ [maxpool]
870
+ stride=1
871
+ size=13
872
+
873
+ [route]
874
+ layers=-1,-3,-5,-6
875
+ ### End SPP ###
876
+
877
+ [convolutional]
878
+ batch_normalize=1
879
+ filters=512
880
+ size=1
881
+ stride=1
882
+ pad=1
883
+ activation=silu
884
+
885
+ [convolutional]
886
+ batch_normalize=1
887
+ size=3
888
+ stride=1
889
+ pad=1
890
+ filters=512
891
+ activation=silu
892
+
893
+ [route]
894
+ layers = -1, -13
895
+
896
+ # 115 (previous+6+5+2k)
897
+ [convolutional]
898
+ batch_normalize=1
899
+ filters=512
900
+ size=1
901
+ stride=1
902
+ pad=1
903
+ activation=silu
904
+
905
+ # End of CSPSPP
906
+
907
+
908
+ # FPN-5
909
+
910
+ [convolutional]
911
+ batch_normalize=1
912
+ filters=384
913
+ size=1
914
+ stride=1
915
+ pad=1
916
+ activation=silu
917
+
918
+ [upsample]
919
+ stride=2
920
+
921
+ [route]
922
+ layers = 85
923
+
924
+ [convolutional]
925
+ batch_normalize=1
926
+ filters=384
927
+ size=1
928
+ stride=1
929
+ pad=1
930
+ activation=silu
931
+
932
+ [route]
933
+ layers = -1, -3
934
+
935
+ [convolutional]
936
+ batch_normalize=1
937
+ filters=384
938
+ size=1
939
+ stride=1
940
+ pad=1
941
+ activation=silu
942
+
943
+ # Split
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=384
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [route]
954
+ layers = -2
955
+
956
+ # Plain Block
957
+
958
+ [convolutional]
959
+ batch_normalize=1
960
+ filters=384
961
+ size=1
962
+ stride=1
963
+ pad=1
964
+ activation=silu
965
+
966
+ [convolutional]
967
+ batch_normalize=1
968
+ size=3
969
+ stride=1
970
+ pad=1
971
+ filters=384
972
+ activation=silu
973
+
974
+ [convolutional]
975
+ batch_normalize=1
976
+ filters=384
977
+ size=1
978
+ stride=1
979
+ pad=1
980
+ activation=silu
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ size=3
985
+ stride=1
986
+ pad=1
987
+ filters=384
988
+ activation=silu
989
+
990
+ [convolutional]
991
+ batch_normalize=1
992
+ filters=384
993
+ size=1
994
+ stride=1
995
+ pad=1
996
+ activation=silu
997
+
998
+ [convolutional]
999
+ batch_normalize=1
1000
+ size=3
1001
+ stride=1
1002
+ pad=1
1003
+ filters=384
1004
+ activation=silu
1005
+
1006
+ # Merge [-1, -(2k+2)]
1007
+
1008
+ [route]
1009
+ layers = -1, -8
1010
+
1011
+ # Transition last
1012
+
1013
+ # 131 (previous+6+4+2k)
1014
+ [convolutional]
1015
+ batch_normalize=1
1016
+ filters=384
1017
+ size=1
1018
+ stride=1
1019
+ pad=1
1020
+ activation=silu
1021
+
1022
+
1023
+ # FPN-4
1024
+
1025
+ [convolutional]
1026
+ batch_normalize=1
1027
+ filters=256
1028
+ size=1
1029
+ stride=1
1030
+ pad=1
1031
+ activation=silu
1032
+
1033
+ [upsample]
1034
+ stride=2
1035
+
1036
+ [route]
1037
+ layers = 70
1038
+
1039
+ [convolutional]
1040
+ batch_normalize=1
1041
+ filters=256
1042
+ size=1
1043
+ stride=1
1044
+ pad=1
1045
+ activation=silu
1046
+
1047
+ [route]
1048
+ layers = -1, -3
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ filters=256
1053
+ size=1
1054
+ stride=1
1055
+ pad=1
1056
+ activation=silu
1057
+
1058
+ # Split
1059
+
1060
+ [convolutional]
1061
+ batch_normalize=1
1062
+ filters=256
1063
+ size=1
1064
+ stride=1
1065
+ pad=1
1066
+ activation=silu
1067
+
1068
+ [route]
1069
+ layers = -2
1070
+
1071
+ # Plain Block
1072
+
1073
+ [convolutional]
1074
+ batch_normalize=1
1075
+ filters=256
1076
+ size=1
1077
+ stride=1
1078
+ pad=1
1079
+ activation=silu
1080
+
1081
+ [convolutional]
1082
+ batch_normalize=1
1083
+ size=3
1084
+ stride=1
1085
+ pad=1
1086
+ filters=256
1087
+ activation=silu
1088
+
1089
+ [convolutional]
1090
+ batch_normalize=1
1091
+ filters=256
1092
+ size=1
1093
+ stride=1
1094
+ pad=1
1095
+ activation=silu
1096
+
1097
+ [convolutional]
1098
+ batch_normalize=1
1099
+ size=3
1100
+ stride=1
1101
+ pad=1
1102
+ filters=256
1103
+ activation=silu
1104
+
1105
+ [convolutional]
1106
+ batch_normalize=1
1107
+ filters=256
1108
+ size=1
1109
+ stride=1
1110
+ pad=1
1111
+ activation=silu
1112
+
1113
+ [convolutional]
1114
+ batch_normalize=1
1115
+ size=3
1116
+ stride=1
1117
+ pad=1
1118
+ filters=256
1119
+ activation=silu
1120
+
1121
+ # Merge [-1, -(2k+2)]
1122
+
1123
+ [route]
1124
+ layers = -1, -8
1125
+
1126
+ # Transition last
1127
+
1128
+ # 147 (previous+6+4+2k)
1129
+ [convolutional]
1130
+ batch_normalize=1
1131
+ filters=256
1132
+ size=1
1133
+ stride=1
1134
+ pad=1
1135
+ activation=silu
1136
+
1137
+
1138
+ # FPN-3
1139
+
1140
+ [convolutional]
1141
+ batch_normalize=1
1142
+ filters=128
1143
+ size=1
1144
+ stride=1
1145
+ pad=1
1146
+ activation=silu
1147
+
1148
+ [upsample]
1149
+ stride=2
1150
+
1151
+ [route]
1152
+ layers = 43
1153
+
1154
+ [convolutional]
1155
+ batch_normalize=1
1156
+ filters=128
1157
+ size=1
1158
+ stride=1
1159
+ pad=1
1160
+ activation=silu
1161
+
1162
+ [route]
1163
+ layers = -1, -3
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ filters=128
1168
+ size=1
1169
+ stride=1
1170
+ pad=1
1171
+ activation=silu
1172
+
1173
+ # Split
1174
+
1175
+ [convolutional]
1176
+ batch_normalize=1
1177
+ filters=128
1178
+ size=1
1179
+ stride=1
1180
+ pad=1
1181
+ activation=silu
1182
+
1183
+ [route]
1184
+ layers = -2
1185
+
1186
+ # Plain Block
1187
+
1188
+ [convolutional]
1189
+ batch_normalize=1
1190
+ filters=128
1191
+ size=1
1192
+ stride=1
1193
+ pad=1
1194
+ activation=silu
1195
+
1196
+ [convolutional]
1197
+ batch_normalize=1
1198
+ size=3
1199
+ stride=1
1200
+ pad=1
1201
+ filters=128
1202
+ activation=silu
1203
+
1204
+ [convolutional]
1205
+ batch_normalize=1
1206
+ filters=128
1207
+ size=1
1208
+ stride=1
1209
+ pad=1
1210
+ activation=silu
1211
+
1212
+ [convolutional]
1213
+ batch_normalize=1
1214
+ size=3
1215
+ stride=1
1216
+ pad=1
1217
+ filters=128
1218
+ activation=silu
1219
+
1220
+ [convolutional]
1221
+ batch_normalize=1
1222
+ filters=128
1223
+ size=1
1224
+ stride=1
1225
+ pad=1
1226
+ activation=silu
1227
+
1228
+ [convolutional]
1229
+ batch_normalize=1
1230
+ size=3
1231
+ stride=1
1232
+ pad=1
1233
+ filters=128
1234
+ activation=silu
1235
+
1236
+ # Merge [-1, -(2k+2)]
1237
+
1238
+ [route]
1239
+ layers = -1, -8
1240
+
1241
+ # Transition last
1242
+
1243
+ # 163 (previous+6+4+2k)
1244
+ [convolutional]
1245
+ batch_normalize=1
1246
+ filters=128
1247
+ size=1
1248
+ stride=1
1249
+ pad=1
1250
+ activation=silu
1251
+
1252
+
1253
+ # PAN-4
1254
+
1255
+ [convolutional]
1256
+ batch_normalize=1
1257
+ size=3
1258
+ stride=2
1259
+ pad=1
1260
+ filters=256
1261
+ activation=silu
1262
+
1263
+ [route]
1264
+ layers = -1, 147
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ filters=256
1269
+ size=1
1270
+ stride=1
1271
+ pad=1
1272
+ activation=silu
1273
+
1274
+ # Split
1275
+
1276
+ [convolutional]
1277
+ batch_normalize=1
1278
+ filters=256
1279
+ size=1
1280
+ stride=1
1281
+ pad=1
1282
+ activation=silu
1283
+
1284
+ [route]
1285
+ layers = -2
1286
+
1287
+ # Plain Block
1288
+
1289
+ [convolutional]
1290
+ batch_normalize=1
1291
+ filters=256
1292
+ size=1
1293
+ stride=1
1294
+ pad=1
1295
+ activation=silu
1296
+
1297
+ [convolutional]
1298
+ batch_normalize=1
1299
+ size=3
1300
+ stride=1
1301
+ pad=1
1302
+ filters=256
1303
+ activation=silu
1304
+
1305
+ [convolutional]
1306
+ batch_normalize=1
1307
+ filters=256
1308
+ size=1
1309
+ stride=1
1310
+ pad=1
1311
+ activation=silu
1312
+
1313
+ [convolutional]
1314
+ batch_normalize=1
1315
+ size=3
1316
+ stride=1
1317
+ pad=1
1318
+ filters=256
1319
+ activation=silu
1320
+
1321
+ [convolutional]
1322
+ batch_normalize=1
1323
+ filters=256
1324
+ size=1
1325
+ stride=1
1326
+ pad=1
1327
+ activation=silu
1328
+
1329
+ [convolutional]
1330
+ batch_normalize=1
1331
+ size=3
1332
+ stride=1
1333
+ pad=1
1334
+ filters=256
1335
+ activation=silu
1336
+
1337
+ [route]
1338
+ layers = -1,-8
1339
+
1340
+ # Transition last
1341
+
1342
+ # 176 (previous+3+4+2k)
1343
+ [convolutional]
1344
+ batch_normalize=1
1345
+ filters=256
1346
+ size=1
1347
+ stride=1
1348
+ pad=1
1349
+ activation=silu
1350
+
1351
+
1352
+ # PAN-5
1353
+
1354
+ [convolutional]
1355
+ batch_normalize=1
1356
+ size=3
1357
+ stride=2
1358
+ pad=1
1359
+ filters=384
1360
+ activation=silu
1361
+
1362
+ [route]
1363
+ layers = -1, 131
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ filters=384
1368
+ size=1
1369
+ stride=1
1370
+ pad=1
1371
+ activation=silu
1372
+
1373
+ # Split
1374
+
1375
+ [convolutional]
1376
+ batch_normalize=1
1377
+ filters=384
1378
+ size=1
1379
+ stride=1
1380
+ pad=1
1381
+ activation=silu
1382
+
1383
+ [route]
1384
+ layers = -2
1385
+
1386
+ # Plain Block
1387
+
1388
+ [convolutional]
1389
+ batch_normalize=1
1390
+ filters=384
1391
+ size=1
1392
+ stride=1
1393
+ pad=1
1394
+ activation=silu
1395
+
1396
+ [convolutional]
1397
+ batch_normalize=1
1398
+ size=3
1399
+ stride=1
1400
+ pad=1
1401
+ filters=384
1402
+ activation=silu
1403
+
1404
+ [convolutional]
1405
+ batch_normalize=1
1406
+ filters=384
1407
+ size=1
1408
+ stride=1
1409
+ pad=1
1410
+ activation=silu
1411
+
1412
+ [convolutional]
1413
+ batch_normalize=1
1414
+ size=3
1415
+ stride=1
1416
+ pad=1
1417
+ filters=384
1418
+ activation=silu
1419
+
1420
+ [convolutional]
1421
+ batch_normalize=1
1422
+ filters=384
1423
+ size=1
1424
+ stride=1
1425
+ pad=1
1426
+ activation=silu
1427
+
1428
+ [convolutional]
1429
+ batch_normalize=1
1430
+ size=3
1431
+ stride=1
1432
+ pad=1
1433
+ filters=384
1434
+ activation=silu
1435
+
1436
+ [route]
1437
+ layers = -1,-8
1438
+
1439
+ # Transition last
1440
+
1441
+ # 189 (previous+3+4+2k)
1442
+ [convolutional]
1443
+ batch_normalize=1
1444
+ filters=384
1445
+ size=1
1446
+ stride=1
1447
+ pad=1
1448
+ activation=silu
1449
+
1450
+
1451
+ # PAN-6
1452
+
1453
+ [convolutional]
1454
+ batch_normalize=1
1455
+ size=3
1456
+ stride=2
1457
+ pad=1
1458
+ filters=512
1459
+ activation=silu
1460
+
1461
+ [route]
1462
+ layers = -1, 115
1463
+
1464
+ [convolutional]
1465
+ batch_normalize=1
1466
+ filters=512
1467
+ size=1
1468
+ stride=1
1469
+ pad=1
1470
+ activation=silu
1471
+
1472
+ # Split
1473
+
1474
+ [convolutional]
1475
+ batch_normalize=1
1476
+ filters=512
1477
+ size=1
1478
+ stride=1
1479
+ pad=1
1480
+ activation=silu
1481
+
1482
+ [route]
1483
+ layers = -2
1484
+
1485
+ # Plain Block
1486
+
1487
+ [convolutional]
1488
+ batch_normalize=1
1489
+ filters=512
1490
+ size=1
1491
+ stride=1
1492
+ pad=1
1493
+ activation=silu
1494
+
1495
+ [convolutional]
1496
+ batch_normalize=1
1497
+ size=3
1498
+ stride=1
1499
+ pad=1
1500
+ filters=512
1501
+ activation=silu
1502
+
1503
+ [convolutional]
1504
+ batch_normalize=1
1505
+ filters=512
1506
+ size=1
1507
+ stride=1
1508
+ pad=1
1509
+ activation=silu
1510
+
1511
+ [convolutional]
1512
+ batch_normalize=1
1513
+ size=3
1514
+ stride=1
1515
+ pad=1
1516
+ filters=512
1517
+ activation=silu
1518
+
1519
+ [convolutional]
1520
+ batch_normalize=1
1521
+ filters=512
1522
+ size=1
1523
+ stride=1
1524
+ pad=1
1525
+ activation=silu
1526
+
1527
+ [convolutional]
1528
+ batch_normalize=1
1529
+ size=3
1530
+ stride=1
1531
+ pad=1
1532
+ filters=512
1533
+ activation=silu
1534
+
1535
+ [route]
1536
+ layers = -1,-8
1537
+
1538
+ # Transition last
1539
+
1540
+ # 202 (previous+3+4+2k)
1541
+ [convolutional]
1542
+ batch_normalize=1
1543
+ filters=512
1544
+ size=1
1545
+ stride=1
1546
+ pad=1
1547
+ activation=silu
1548
+
1549
+ # ============ End of Neck ============ #
1550
+
1551
+ # 203
1552
+ [implicit_add]
1553
+ filters=256
1554
+
1555
+ # 204
1556
+ [implicit_add]
1557
+ filters=512
1558
+
1559
+ # 205
1560
+ [implicit_add]
1561
+ filters=768
1562
+
1563
+ # 206
1564
+ [implicit_add]
1565
+ filters=1024
1566
+
1567
+ # 207
1568
+ [implicit_mul]
1569
+ filters=255
1570
+
1571
+ # 208
1572
+ [implicit_mul]
1573
+ filters=255
1574
+
1575
+ # 209
1576
+ [implicit_mul]
1577
+ filters=255
1578
+
1579
+ # 210
1580
+ [implicit_mul]
1581
+ filters=255
1582
+
1583
+ # ============ Head ============ #
1584
+
1585
+ # YOLO-3
1586
+
1587
+ [route]
1588
+ layers = 163
1589
+
1590
+ [convolutional]
1591
+ batch_normalize=1
1592
+ size=3
1593
+ stride=1
1594
+ pad=1
1595
+ filters=256
1596
+ activation=silu
1597
+
1598
+ [shift_channels]
1599
+ from=203
1600
+
1601
+ [convolutional]
1602
+ size=1
1603
+ stride=1
1604
+ pad=1
1605
+ filters=255
1606
+ activation=linear
1607
+
1608
+ [control_channels]
1609
+ from=207
1610
+
1611
+ [yolo]
1612
+ mask = 0,1,2
1613
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1614
+ classes=80
1615
+ num=12
1616
+ jitter=.3
1617
+ ignore_thresh = .7
1618
+ truth_thresh = 1
1619
+ random=1
1620
+ scale_x_y = 1.05
1621
+ iou_thresh=0.213
1622
+ cls_normalizer=1.0
1623
+ iou_normalizer=0.07
1624
+ iou_loss=ciou
1625
+ nms_kind=greedynms
1626
+ beta_nms=0.6
1627
+
1628
+
1629
+ # YOLO-4
1630
+
1631
+ [route]
1632
+ layers = 176
1633
+
1634
+ [convolutional]
1635
+ batch_normalize=1
1636
+ size=3
1637
+ stride=1
1638
+ pad=1
1639
+ filters=512
1640
+ activation=silu
1641
+
1642
+ [shift_channels]
1643
+ from=204
1644
+
1645
+ [convolutional]
1646
+ size=1
1647
+ stride=1
1648
+ pad=1
1649
+ filters=255
1650
+ activation=linear
1651
+
1652
+ [control_channels]
1653
+ from=208
1654
+
1655
+ [yolo]
1656
+ mask = 3,4,5
1657
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1658
+ classes=80
1659
+ num=12
1660
+ jitter=.3
1661
+ ignore_thresh = .7
1662
+ truth_thresh = 1
1663
+ random=1
1664
+ scale_x_y = 1.05
1665
+ iou_thresh=0.213
1666
+ cls_normalizer=1.0
1667
+ iou_normalizer=0.07
1668
+ iou_loss=ciou
1669
+ nms_kind=greedynms
1670
+ beta_nms=0.6
1671
+
1672
+
1673
+ # YOLO-5
1674
+
1675
+ [route]
1676
+ layers = 189
1677
+
1678
+ [convolutional]
1679
+ batch_normalize=1
1680
+ size=3
1681
+ stride=1
1682
+ pad=1
1683
+ filters=768
1684
+ activation=silu
1685
+
1686
+ [shift_channels]
1687
+ from=205
1688
+
1689
+ [convolutional]
1690
+ size=1
1691
+ stride=1
1692
+ pad=1
1693
+ filters=255
1694
+ activation=linear
1695
+
1696
+ [control_channels]
1697
+ from=209
1698
+
1699
+ [yolo]
1700
+ mask = 6,7,8
1701
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1702
+ classes=80
1703
+ num=12
1704
+ jitter=.3
1705
+ ignore_thresh = .7
1706
+ truth_thresh = 1
1707
+ random=1
1708
+ scale_x_y = 1.05
1709
+ iou_thresh=0.213
1710
+ cls_normalizer=1.0
1711
+ iou_normalizer=0.07
1712
+ iou_loss=ciou
1713
+ nms_kind=greedynms
1714
+ beta_nms=0.6
1715
+
1716
+
1717
+ # YOLO-6
1718
+
1719
+ [route]
1720
+ layers = 202
1721
+
1722
+ [convolutional]
1723
+ batch_normalize=1
1724
+ size=3
1725
+ stride=1
1726
+ pad=1
1727
+ filters=1024
1728
+ activation=silu
1729
+
1730
+ [shift_channels]
1731
+ from=206
1732
+
1733
+ [convolutional]
1734
+ size=1
1735
+ stride=1
1736
+ pad=1
1737
+ filters=255
1738
+ activation=linear
1739
+
1740
+ [control_channels]
1741
+ from=210
1742
+
1743
+ [yolo]
1744
+ mask = 9,10,11
1745
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1746
+ classes=80
1747
+ num=12
1748
+ jitter=.3
1749
+ ignore_thresh = .7
1750
+ truth_thresh = 1
1751
+ random=1
1752
+ scale_x_y = 1.05
1753
+ iou_thresh=0.213
1754
+ cls_normalizer=1.0
1755
+ iou_normalizer=0.07
1756
+ iou_loss=ciou
1757
+ nms_kind=greedynms
1758
+ beta_nms=0.6
1759
+
1760
+ # ============ End of Head ============ #
cfg/yolov4_csp.cfg ADDED
@@ -0,0 +1,1334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=64
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=32
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=64
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=64
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=64
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=64
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=64
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=64
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=64
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ # Transition first
153
+
154
+ [convolutional]
155
+ batch_normalize=1
156
+ filters=64
157
+ size=1
158
+ stride=1
159
+ pad=1
160
+ activation=silu
161
+
162
+ # Merge [-1, -(3k+4)]
163
+
164
+ [route]
165
+ layers = -1,-10
166
+
167
+ # Transition last
168
+
169
+ # 17 (previous+7+3k)
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=128
173
+ size=1
174
+ stride=1
175
+ pad=1
176
+ activation=silu
177
+
178
+ # P3
179
+
180
+ # Downsample
181
+
182
+ [convolutional]
183
+ batch_normalize=1
184
+ filters=256
185
+ size=3
186
+ stride=2
187
+ pad=1
188
+ activation=silu
189
+
190
+ # Split
191
+
192
+ [convolutional]
193
+ batch_normalize=1
194
+ filters=128
195
+ size=1
196
+ stride=1
197
+ pad=1
198
+ activation=silu
199
+
200
+ [route]
201
+ layers = -2
202
+
203
+ [convolutional]
204
+ batch_normalize=1
205
+ filters=128
206
+ size=1
207
+ stride=1
208
+ pad=1
209
+ activation=silu
210
+
211
+ # Residual Block
212
+
213
+ [convolutional]
214
+ batch_normalize=1
215
+ filters=128
216
+ size=1
217
+ stride=1
218
+ pad=1
219
+ activation=silu
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=3
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [shortcut]
230
+ from=-3
231
+ activation=linear
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=128
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=128
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=128
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=128
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=128
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=128
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=128
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=128
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=128
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ # Transition first
374
+
375
+ [convolutional]
376
+ batch_normalize=1
377
+ filters=128
378
+ size=1
379
+ stride=1
380
+ pad=1
381
+ activation=silu
382
+
383
+ # Merge [-1 -(4+3k)]
384
+
385
+ [route]
386
+ layers = -1,-28
387
+
388
+ # Transition last
389
+
390
+ # 48 (previous+7+3k)
391
+ [convolutional]
392
+ batch_normalize=1
393
+ filters=256
394
+ size=1
395
+ stride=1
396
+ pad=1
397
+ activation=silu
398
+
399
+ # P4
400
+
401
+ # Downsample
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=512
406
+ size=3
407
+ stride=2
408
+ pad=1
409
+ activation=silu
410
+
411
+ # Split
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=256
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [route]
422
+ layers = -2
423
+
424
+ [convolutional]
425
+ batch_normalize=1
426
+ filters=256
427
+ size=1
428
+ stride=1
429
+ pad=1
430
+ activation=silu
431
+
432
+ # Residual Block
433
+
434
+ [convolutional]
435
+ batch_normalize=1
436
+ filters=256
437
+ size=1
438
+ stride=1
439
+ pad=1
440
+ activation=silu
441
+
442
+ [convolutional]
443
+ batch_normalize=1
444
+ filters=256
445
+ size=3
446
+ stride=1
447
+ pad=1
448
+ activation=silu
449
+
450
+ [shortcut]
451
+ from=-3
452
+ activation=linear
453
+
454
+ [convolutional]
455
+ batch_normalize=1
456
+ filters=256
457
+ size=1
458
+ stride=1
459
+ pad=1
460
+ activation=silu
461
+
462
+ [convolutional]
463
+ batch_normalize=1
464
+ filters=256
465
+ size=3
466
+ stride=1
467
+ pad=1
468
+ activation=silu
469
+
470
+ [shortcut]
471
+ from=-3
472
+ activation=linear
473
+
474
+ [convolutional]
475
+ batch_normalize=1
476
+ filters=256
477
+ size=1
478
+ stride=1
479
+ pad=1
480
+ activation=silu
481
+
482
+ [convolutional]
483
+ batch_normalize=1
484
+ filters=256
485
+ size=3
486
+ stride=1
487
+ pad=1
488
+ activation=silu
489
+
490
+ [shortcut]
491
+ from=-3
492
+ activation=linear
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=256
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=256
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=256
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=256
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=256
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=256
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=256
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=256
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=256
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=256
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ # Transition first
595
+
596
+ [convolutional]
597
+ batch_normalize=1
598
+ filters=256
599
+ size=1
600
+ stride=1
601
+ pad=1
602
+ activation=silu
603
+
604
+ # Merge [-1 -(3k+4)]
605
+
606
+ [route]
607
+ layers = -1,-28
608
+
609
+ # Transition last
610
+
611
+ # 79 (previous+7+3k)
612
+ [convolutional]
613
+ batch_normalize=1
614
+ filters=512
615
+ size=1
616
+ stride=1
617
+ pad=1
618
+ activation=silu
619
+
620
+ # P5
621
+
622
+ # Downsample
623
+
624
+ [convolutional]
625
+ batch_normalize=1
626
+ filters=1024
627
+ size=3
628
+ stride=2
629
+ pad=1
630
+ activation=silu
631
+
632
+ # Split
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=512
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [route]
643
+ layers = -2
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=512
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ # Residual Block
654
+
655
+ [convolutional]
656
+ batch_normalize=1
657
+ filters=512
658
+ size=1
659
+ stride=1
660
+ pad=1
661
+ activation=silu
662
+
663
+ [convolutional]
664
+ batch_normalize=1
665
+ filters=512
666
+ size=3
667
+ stride=1
668
+ pad=1
669
+ activation=silu
670
+
671
+ [shortcut]
672
+ from=-3
673
+ activation=linear
674
+
675
+ [convolutional]
676
+ batch_normalize=1
677
+ filters=512
678
+ size=1
679
+ stride=1
680
+ pad=1
681
+ activation=silu
682
+
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=512
686
+ size=3
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+ [shortcut]
692
+ from=-3
693
+ activation=linear
694
+
695
+ [convolutional]
696
+ batch_normalize=1
697
+ filters=512
698
+ size=1
699
+ stride=1
700
+ pad=1
701
+ activation=silu
702
+
703
+ [convolutional]
704
+ batch_normalize=1
705
+ filters=512
706
+ size=3
707
+ stride=1
708
+ pad=1
709
+ activation=silu
710
+
711
+ [shortcut]
712
+ from=-3
713
+ activation=linear
714
+
715
+ [convolutional]
716
+ batch_normalize=1
717
+ filters=512
718
+ size=1
719
+ stride=1
720
+ pad=1
721
+ activation=silu
722
+
723
+ [convolutional]
724
+ batch_normalize=1
725
+ filters=512
726
+ size=3
727
+ stride=1
728
+ pad=1
729
+ activation=silu
730
+
731
+ [shortcut]
732
+ from=-3
733
+ activation=linear
734
+
735
+ # Transition first
736
+
737
+ [convolutional]
738
+ batch_normalize=1
739
+ filters=512
740
+ size=1
741
+ stride=1
742
+ pad=1
743
+ activation=silu
744
+
745
+ # Merge [-1 -(3k+4)]
746
+
747
+ [route]
748
+ layers = -1,-16
749
+
750
+ # Transition last
751
+
752
+ # 98 (previous+7+3k)
753
+ [convolutional]
754
+ batch_normalize=1
755
+ filters=1024
756
+ size=1
757
+ stride=1
758
+ pad=1
759
+ activation=silu
760
+
761
+ # ============ End of Backbone ============ #
762
+
763
+ # ============ Neck ============ #
764
+
765
+ # CSPSPP
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=512
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [route]
776
+ layers = -2
777
+
778
+ [convolutional]
779
+ batch_normalize=1
780
+ filters=512
781
+ size=1
782
+ stride=1
783
+ pad=1
784
+ activation=silu
785
+
786
+ [convolutional]
787
+ batch_normalize=1
788
+ size=3
789
+ stride=1
790
+ pad=1
791
+ filters=512
792
+ activation=silu
793
+
794
+ [convolutional]
795
+ batch_normalize=1
796
+ filters=512
797
+ size=1
798
+ stride=1
799
+ pad=1
800
+ activation=silu
801
+
802
+ ### SPP ###
803
+ [maxpool]
804
+ stride=1
805
+ size=5
806
+
807
+ [route]
808
+ layers=-2
809
+
810
+ [maxpool]
811
+ stride=1
812
+ size=9
813
+
814
+ [route]
815
+ layers=-4
816
+
817
+ [maxpool]
818
+ stride=1
819
+ size=13
820
+
821
+ [route]
822
+ layers=-1,-3,-5,-6
823
+ ### End SPP ###
824
+
825
+ [convolutional]
826
+ batch_normalize=1
827
+ filters=512
828
+ size=1
829
+ stride=1
830
+ pad=1
831
+ activation=silu
832
+
833
+ [convolutional]
834
+ batch_normalize=1
835
+ size=3
836
+ stride=1
837
+ pad=1
838
+ filters=512
839
+ activation=silu
840
+
841
+ [route]
842
+ layers = -1, -13
843
+
844
+ # 113 (previous+6+5+2k)
845
+ [convolutional]
846
+ batch_normalize=1
847
+ filters=512
848
+ size=1
849
+ stride=1
850
+ pad=1
851
+ activation=silu
852
+
853
+ # End of CSPSPP
854
+
855
+
856
+ # FPN-4
857
+
858
+ [convolutional]
859
+ batch_normalize=1
860
+ filters=256
861
+ size=1
862
+ stride=1
863
+ pad=1
864
+ activation=silu
865
+
866
+ [upsample]
867
+ stride=2
868
+
869
+ [route]
870
+ layers = 79
871
+
872
+ [convolutional]
873
+ batch_normalize=1
874
+ filters=256
875
+ size=1
876
+ stride=1
877
+ pad=1
878
+ activation=silu
879
+
880
+ [route]
881
+ layers = -1, -3
882
+
883
+ [convolutional]
884
+ batch_normalize=1
885
+ filters=256
886
+ size=1
887
+ stride=1
888
+ pad=1
889
+ activation=silu
890
+
891
+ # Split
892
+
893
+ [convolutional]
894
+ batch_normalize=1
895
+ filters=256
896
+ size=1
897
+ stride=1
898
+ pad=1
899
+ activation=silu
900
+
901
+ [route]
902
+ layers = -2
903
+
904
+ # Plain Block
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ filters=256
909
+ size=1
910
+ stride=1
911
+ pad=1
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ size=3
917
+ stride=1
918
+ pad=1
919
+ filters=256
920
+ activation=silu
921
+
922
+ [convolutional]
923
+ batch_normalize=1
924
+ filters=256
925
+ size=1
926
+ stride=1
927
+ pad=1
928
+ activation=silu
929
+
930
+ [convolutional]
931
+ batch_normalize=1
932
+ size=3
933
+ stride=1
934
+ pad=1
935
+ filters=256
936
+ activation=silu
937
+
938
+ # Merge [-1, -(2k+2)]
939
+
940
+ [route]
941
+ layers = -1, -6
942
+
943
+ # Transition last
944
+
945
+ # 127 (previous+6+4+2k)
946
+ [convolutional]
947
+ batch_normalize=1
948
+ filters=256
949
+ size=1
950
+ stride=1
951
+ pad=1
952
+ activation=silu
953
+
954
+
955
+ # FPN-3
956
+
957
+ [convolutional]
958
+ batch_normalize=1
959
+ filters=128
960
+ size=1
961
+ stride=1
962
+ pad=1
963
+ activation=silu
964
+
965
+ [upsample]
966
+ stride=2
967
+
968
+ [route]
969
+ layers = 48
970
+
971
+ [convolutional]
972
+ batch_normalize=1
973
+ filters=128
974
+ size=1
975
+ stride=1
976
+ pad=1
977
+ activation=silu
978
+
979
+ [route]
980
+ layers = -1, -3
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ filters=128
985
+ size=1
986
+ stride=1
987
+ pad=1
988
+ activation=silu
989
+
990
+ # Split
991
+
992
+ [convolutional]
993
+ batch_normalize=1
994
+ filters=128
995
+ size=1
996
+ stride=1
997
+ pad=1
998
+ activation=silu
999
+
1000
+ [route]
1001
+ layers = -2
1002
+
1003
+ # Plain Block
1004
+
1005
+ [convolutional]
1006
+ batch_normalize=1
1007
+ filters=128
1008
+ size=1
1009
+ stride=1
1010
+ pad=1
1011
+ activation=silu
1012
+
1013
+ [convolutional]
1014
+ batch_normalize=1
1015
+ size=3
1016
+ stride=1
1017
+ pad=1
1018
+ filters=128
1019
+ activation=silu
1020
+
1021
+ [convolutional]
1022
+ batch_normalize=1
1023
+ filters=128
1024
+ size=1
1025
+ stride=1
1026
+ pad=1
1027
+ activation=silu
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ size=3
1032
+ stride=1
1033
+ pad=1
1034
+ filters=128
1035
+ activation=silu
1036
+
1037
+ # Merge [-1, -(2k+2)]
1038
+
1039
+ [route]
1040
+ layers = -1, -6
1041
+
1042
+ # Transition last
1043
+
1044
+ # 141 (previous+6+4+2k)
1045
+ [convolutional]
1046
+ batch_normalize=1
1047
+ filters=128
1048
+ size=1
1049
+ stride=1
1050
+ pad=1
1051
+ activation=silu
1052
+
1053
+
1054
+ # PAN-4
1055
+
1056
+ [convolutional]
1057
+ batch_normalize=1
1058
+ size=3
1059
+ stride=2
1060
+ pad=1
1061
+ filters=256
1062
+ activation=silu
1063
+
1064
+ [route]
1065
+ layers = -1, 127
1066
+
1067
+ [convolutional]
1068
+ batch_normalize=1
1069
+ filters=256
1070
+ size=1
1071
+ stride=1
1072
+ pad=1
1073
+ activation=silu
1074
+
1075
+ # Split
1076
+
1077
+ [convolutional]
1078
+ batch_normalize=1
1079
+ filters=256
1080
+ size=1
1081
+ stride=1
1082
+ pad=1
1083
+ activation=silu
1084
+
1085
+ [route]
1086
+ layers = -2
1087
+
1088
+ # Plain Block
1089
+
1090
+ [convolutional]
1091
+ batch_normalize=1
1092
+ filters=256
1093
+ size=1
1094
+ stride=1
1095
+ pad=1
1096
+ activation=silu
1097
+
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ size=3
1101
+ stride=1
1102
+ pad=1
1103
+ filters=256
1104
+ activation=silu
1105
+
1106
+ [convolutional]
1107
+ batch_normalize=1
1108
+ filters=256
1109
+ size=1
1110
+ stride=1
1111
+ pad=1
1112
+ activation=silu
1113
+
1114
+ [convolutional]
1115
+ batch_normalize=1
1116
+ size=3
1117
+ stride=1
1118
+ pad=1
1119
+ filters=256
1120
+ activation=silu
1121
+
1122
+ [route]
1123
+ layers = -1,-6
1124
+
1125
+ # Transition last
1126
+
1127
+ # 152 (previous+3+4+2k)
1128
+ [convolutional]
1129
+ batch_normalize=1
1130
+ filters=256
1131
+ size=1
1132
+ stride=1
1133
+ pad=1
1134
+ activation=silu
1135
+
1136
+
1137
+ # PAN-5
1138
+
1139
+ [convolutional]
1140
+ batch_normalize=1
1141
+ size=3
1142
+ stride=2
1143
+ pad=1
1144
+ filters=512
1145
+ activation=silu
1146
+
1147
+ [route]
1148
+ layers = -1, 113
1149
+
1150
+ [convolutional]
1151
+ batch_normalize=1
1152
+ filters=512
1153
+ size=1
1154
+ stride=1
1155
+ pad=1
1156
+ activation=silu
1157
+
1158
+ # Split
1159
+
1160
+ [convolutional]
1161
+ batch_normalize=1
1162
+ filters=512
1163
+ size=1
1164
+ stride=1
1165
+ pad=1
1166
+ activation=silu
1167
+
1168
+ [route]
1169
+ layers = -2
1170
+
1171
+ # Plain Block
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=512
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=512
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=512
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=512
1203
+ activation=silu
1204
+
1205
+ [route]
1206
+ layers = -1,-6
1207
+
1208
+ # Transition last
1209
+
1210
+ # 163 (previous+3+4+2k)
1211
+ [convolutional]
1212
+ batch_normalize=1
1213
+ filters=512
1214
+ size=1
1215
+ stride=1
1216
+ pad=1
1217
+ activation=silu
1218
+
1219
+ # ============ End of Neck ============ #
1220
+
1221
+ # ============ Head ============ #
1222
+
1223
+ # YOLO-3
1224
+
1225
+ [route]
1226
+ layers = 141
1227
+
1228
+ [convolutional]
1229
+ batch_normalize=1
1230
+ size=3
1231
+ stride=1
1232
+ pad=1
1233
+ filters=256
1234
+ activation=silu
1235
+
1236
+ [convolutional]
1237
+ size=1
1238
+ stride=1
1239
+ pad=1
1240
+ filters=255
1241
+ activation=linear
1242
+
1243
+ [yolo]
1244
+ mask = 0,1,2
1245
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1246
+ classes=80
1247
+ num=9
1248
+ jitter=.3
1249
+ ignore_thresh = .7
1250
+ truth_thresh = 1
1251
+ random=1
1252
+ scale_x_y = 1.05
1253
+ iou_thresh=0.213
1254
+ cls_normalizer=1.0
1255
+ iou_normalizer=0.07
1256
+ iou_loss=ciou
1257
+ nms_kind=greedynms
1258
+ beta_nms=0.6
1259
+
1260
+
1261
+ # YOLO-4
1262
+
1263
+ [route]
1264
+ layers = 152
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ size=3
1269
+ stride=1
1270
+ pad=1
1271
+ filters=512
1272
+ activation=silu
1273
+
1274
+ [convolutional]
1275
+ size=1
1276
+ stride=1
1277
+ pad=1
1278
+ filters=255
1279
+ activation=linear
1280
+
1281
+ [yolo]
1282
+ mask = 3,4,5
1283
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1284
+ classes=80
1285
+ num=9
1286
+ jitter=.3
1287
+ ignore_thresh = .7
1288
+ truth_thresh = 1
1289
+ random=1
1290
+ scale_x_y = 1.05
1291
+ iou_thresh=0.213
1292
+ cls_normalizer=1.0
1293
+ iou_normalizer=0.07
1294
+ iou_loss=ciou
1295
+ nms_kind=greedynms
1296
+ beta_nms=0.6
1297
+
1298
+
1299
+ # YOLO-5
1300
+
1301
+ [route]
1302
+ layers = 163
1303
+
1304
+ [convolutional]
1305
+ batch_normalize=1
1306
+ size=3
1307
+ stride=1
1308
+ pad=1
1309
+ filters=1024
1310
+ activation=silu
1311
+
1312
+ [convolutional]
1313
+ size=1
1314
+ stride=1
1315
+ pad=1
1316
+ filters=255
1317
+ activation=linear
1318
+
1319
+ [yolo]
1320
+ mask = 6,7,8
1321
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1322
+ classes=80
1323
+ num=9
1324
+ jitter=.3
1325
+ ignore_thresh = .7
1326
+ truth_thresh = 1
1327
+ random=1
1328
+ scale_x_y = 1.05
1329
+ iou_thresh=0.213
1330
+ cls_normalizer=1.0
1331
+ iou_normalizer=0.07
1332
+ iou_loss=ciou
1333
+ nms_kind=greedynms
1334
+ beta_nms=0.6
cfg/yolov4_csp_x.cfg ADDED
@@ -0,0 +1,1534 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=80
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=40
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=80
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=160
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=80
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=80
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=80
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=80
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=80
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=80
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ [convolutional]
153
+ batch_normalize=1
154
+ filters=80
155
+ size=1
156
+ stride=1
157
+ pad=1
158
+ activation=silu
159
+
160
+ [convolutional]
161
+ batch_normalize=1
162
+ filters=80
163
+ size=3
164
+ stride=1
165
+ pad=1
166
+ activation=silu
167
+
168
+ [shortcut]
169
+ from=-3
170
+ activation=linear
171
+
172
+ # Transition first
173
+
174
+ [convolutional]
175
+ batch_normalize=1
176
+ filters=80
177
+ size=1
178
+ stride=1
179
+ pad=1
180
+ activation=silu
181
+
182
+ # Merge [-1, -(3k+4)]
183
+
184
+ [route]
185
+ layers = -1,-13
186
+
187
+ # Transition last
188
+
189
+ # 20 (previous+7+3k)
190
+ [convolutional]
191
+ batch_normalize=1
192
+ filters=160
193
+ size=1
194
+ stride=1
195
+ pad=1
196
+ activation=silu
197
+
198
+ # P3
199
+
200
+ # Downsample
201
+
202
+ [convolutional]
203
+ batch_normalize=1
204
+ filters=320
205
+ size=3
206
+ stride=2
207
+ pad=1
208
+ activation=silu
209
+
210
+ # Split
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=160
215
+ size=1
216
+ stride=1
217
+ pad=1
218
+ activation=silu
219
+
220
+ [route]
221
+ layers = -2
222
+
223
+ [convolutional]
224
+ batch_normalize=1
225
+ filters=160
226
+ size=1
227
+ stride=1
228
+ pad=1
229
+ activation=silu
230
+
231
+ # Residual Block
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=160
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=160
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=160
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=160
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=160
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=160
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=160
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=160
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=160
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=160
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=160
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=160
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=160
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=160
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ [convolutional]
374
+ batch_normalize=1
375
+ filters=160
376
+ size=1
377
+ stride=1
378
+ pad=1
379
+ activation=silu
380
+
381
+ [convolutional]
382
+ batch_normalize=1
383
+ filters=160
384
+ size=3
385
+ stride=1
386
+ pad=1
387
+ activation=silu
388
+
389
+ [shortcut]
390
+ from=-3
391
+ activation=linear
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=160
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ [convolutional]
402
+ batch_normalize=1
403
+ filters=160
404
+ size=3
405
+ stride=1
406
+ pad=1
407
+ activation=silu
408
+
409
+ [shortcut]
410
+ from=-3
411
+ activation=linear
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=160
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [convolutional]
422
+ batch_normalize=1
423
+ filters=160
424
+ size=3
425
+ stride=1
426
+ pad=1
427
+ activation=silu
428
+
429
+ [shortcut]
430
+ from=-3
431
+ activation=linear
432
+
433
+ # Transition first
434
+
435
+ [convolutional]
436
+ batch_normalize=1
437
+ filters=160
438
+ size=1
439
+ stride=1
440
+ pad=1
441
+ activation=silu
442
+
443
+ # Merge [-1 -(4+3k)]
444
+
445
+ [route]
446
+ layers = -1,-34
447
+
448
+ # Transition last
449
+
450
+ # 57 (previous+7+3k)
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=320
454
+ size=1
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ # P4
460
+
461
+ # Downsample
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=640
466
+ size=3
467
+ stride=2
468
+ pad=1
469
+ activation=silu
470
+
471
+ # Split
472
+
473
+ [convolutional]
474
+ batch_normalize=1
475
+ filters=320
476
+ size=1
477
+ stride=1
478
+ pad=1
479
+ activation=silu
480
+
481
+ [route]
482
+ layers = -2
483
+
484
+ [convolutional]
485
+ batch_normalize=1
486
+ filters=320
487
+ size=1
488
+ stride=1
489
+ pad=1
490
+ activation=silu
491
+
492
+ # Residual Block
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=320
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=320
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=320
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=320
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=320
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=320
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=320
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=320
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=320
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=320
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ [convolutional]
595
+ batch_normalize=1
596
+ filters=320
597
+ size=1
598
+ stride=1
599
+ pad=1
600
+ activation=silu
601
+
602
+ [convolutional]
603
+ batch_normalize=1
604
+ filters=320
605
+ size=3
606
+ stride=1
607
+ pad=1
608
+ activation=silu
609
+
610
+ [shortcut]
611
+ from=-3
612
+ activation=linear
613
+
614
+ [convolutional]
615
+ batch_normalize=1
616
+ filters=320
617
+ size=1
618
+ stride=1
619
+ pad=1
620
+ activation=silu
621
+
622
+ [convolutional]
623
+ batch_normalize=1
624
+ filters=320
625
+ size=3
626
+ stride=1
627
+ pad=1
628
+ activation=silu
629
+
630
+ [shortcut]
631
+ from=-3
632
+ activation=linear
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=320
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [convolutional]
643
+ batch_normalize=1
644
+ filters=320
645
+ size=3
646
+ stride=1
647
+ pad=1
648
+ activation=silu
649
+
650
+ [shortcut]
651
+ from=-3
652
+ activation=linear
653
+
654
+ [convolutional]
655
+ batch_normalize=1
656
+ filters=320
657
+ size=1
658
+ stride=1
659
+ pad=1
660
+ activation=silu
661
+
662
+ [convolutional]
663
+ batch_normalize=1
664
+ filters=320
665
+ size=3
666
+ stride=1
667
+ pad=1
668
+ activation=silu
669
+
670
+ [shortcut]
671
+ from=-3
672
+ activation=linear
673
+
674
+ [convolutional]
675
+ batch_normalize=1
676
+ filters=320
677
+ size=1
678
+ stride=1
679
+ pad=1
680
+ activation=silu
681
+
682
+ [convolutional]
683
+ batch_normalize=1
684
+ filters=320
685
+ size=3
686
+ stride=1
687
+ pad=1
688
+ activation=silu
689
+
690
+ [shortcut]
691
+ from=-3
692
+ activation=linear
693
+
694
+ # Transition first
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=320
699
+ size=1
700
+ stride=1
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Merge [-1 -(3k+4)]
705
+
706
+ [route]
707
+ layers = -1,-34
708
+
709
+ # Transition last
710
+
711
+ # 94 (previous+7+3k)
712
+ [convolutional]
713
+ batch_normalize=1
714
+ filters=640
715
+ size=1
716
+ stride=1
717
+ pad=1
718
+ activation=silu
719
+
720
+ # P5
721
+
722
+ # Downsample
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=1280
727
+ size=3
728
+ stride=2
729
+ pad=1
730
+ activation=silu
731
+
732
+ # Split
733
+
734
+ [convolutional]
735
+ batch_normalize=1
736
+ filters=640
737
+ size=1
738
+ stride=1
739
+ pad=1
740
+ activation=silu
741
+
742
+ [route]
743
+ layers = -2
744
+
745
+ [convolutional]
746
+ batch_normalize=1
747
+ filters=640
748
+ size=1
749
+ stride=1
750
+ pad=1
751
+ activation=silu
752
+
753
+ # Residual Block
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=640
758
+ size=1
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [convolutional]
764
+ batch_normalize=1
765
+ filters=640
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ activation=silu
770
+
771
+ [shortcut]
772
+ from=-3
773
+ activation=linear
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=640
778
+ size=1
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [convolutional]
784
+ batch_normalize=1
785
+ filters=640
786
+ size=3
787
+ stride=1
788
+ pad=1
789
+ activation=silu
790
+
791
+ [shortcut]
792
+ from=-3
793
+ activation=linear
794
+
795
+ [convolutional]
796
+ batch_normalize=1
797
+ filters=640
798
+ size=1
799
+ stride=1
800
+ pad=1
801
+ activation=silu
802
+
803
+ [convolutional]
804
+ batch_normalize=1
805
+ filters=640
806
+ size=3
807
+ stride=1
808
+ pad=1
809
+ activation=silu
810
+
811
+ [shortcut]
812
+ from=-3
813
+ activation=linear
814
+
815
+ [convolutional]
816
+ batch_normalize=1
817
+ filters=640
818
+ size=1
819
+ stride=1
820
+ pad=1
821
+ activation=silu
822
+
823
+ [convolutional]
824
+ batch_normalize=1
825
+ filters=640
826
+ size=3
827
+ stride=1
828
+ pad=1
829
+ activation=silu
830
+
831
+ [shortcut]
832
+ from=-3
833
+ activation=linear
834
+
835
+ [convolutional]
836
+ batch_normalize=1
837
+ filters=640
838
+ size=1
839
+ stride=1
840
+ pad=1
841
+ activation=silu
842
+
843
+ [convolutional]
844
+ batch_normalize=1
845
+ filters=640
846
+ size=3
847
+ stride=1
848
+ pad=1
849
+ activation=silu
850
+
851
+ [shortcut]
852
+ from=-3
853
+ activation=linear
854
+
855
+ # Transition first
856
+
857
+ [convolutional]
858
+ batch_normalize=1
859
+ filters=640
860
+ size=1
861
+ stride=1
862
+ pad=1
863
+ activation=silu
864
+
865
+ # Merge [-1 -(3k+4)]
866
+
867
+ [route]
868
+ layers = -1,-19
869
+
870
+ # Transition last
871
+
872
+ # 116 (previous+7+3k)
873
+ [convolutional]
874
+ batch_normalize=1
875
+ filters=1280
876
+ size=1
877
+ stride=1
878
+ pad=1
879
+ activation=silu
880
+
881
+ # ============ End of Backbone ============ #
882
+
883
+ # ============ Neck ============ #
884
+
885
+ # CSPSPP
886
+
887
+ [convolutional]
888
+ batch_normalize=1
889
+ filters=640
890
+ size=1
891
+ stride=1
892
+ pad=1
893
+ activation=silu
894
+
895
+ [route]
896
+ layers = -2
897
+
898
+ [convolutional]
899
+ batch_normalize=1
900
+ filters=640
901
+ size=1
902
+ stride=1
903
+ pad=1
904
+ activation=silu
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ size=3
909
+ stride=1
910
+ pad=1
911
+ filters=640
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ filters=640
917
+ size=1
918
+ stride=1
919
+ pad=1
920
+ activation=silu
921
+
922
+ ### SPP ###
923
+ [maxpool]
924
+ stride=1
925
+ size=5
926
+
927
+ [route]
928
+ layers=-2
929
+
930
+ [maxpool]
931
+ stride=1
932
+ size=9
933
+
934
+ [route]
935
+ layers=-4
936
+
937
+ [maxpool]
938
+ stride=1
939
+ size=13
940
+
941
+ [route]
942
+ layers=-1,-3,-5,-6
943
+ ### End SPP ###
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=640
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [convolutional]
954
+ batch_normalize=1
955
+ size=3
956
+ stride=1
957
+ pad=1
958
+ filters=640
959
+ activation=silu
960
+
961
+ [convolutional]
962
+ batch_normalize=1
963
+ filters=640
964
+ size=1
965
+ stride=1
966
+ pad=1
967
+ activation=silu
968
+
969
+ [convolutional]
970
+ batch_normalize=1
971
+ size=3
972
+ stride=1
973
+ pad=1
974
+ filters=640
975
+ activation=silu
976
+
977
+ [route]
978
+ layers = -1, -15
979
+
980
+ # 133 (previous+6+5+2k)
981
+ [convolutional]
982
+ batch_normalize=1
983
+ filters=640
984
+ size=1
985
+ stride=1
986
+ pad=1
987
+ activation=silu
988
+
989
+ # End of CSPSPP
990
+
991
+
992
+ # FPN-4
993
+
994
+ [convolutional]
995
+ batch_normalize=1
996
+ filters=320
997
+ size=1
998
+ stride=1
999
+ pad=1
1000
+ activation=silu
1001
+
1002
+ [upsample]
1003
+ stride=2
1004
+
1005
+ [route]
1006
+ layers = 94
1007
+
1008
+ [convolutional]
1009
+ batch_normalize=1
1010
+ filters=320
1011
+ size=1
1012
+ stride=1
1013
+ pad=1
1014
+ activation=silu
1015
+
1016
+ [route]
1017
+ layers = -1, -3
1018
+
1019
+ [convolutional]
1020
+ batch_normalize=1
1021
+ filters=320
1022
+ size=1
1023
+ stride=1
1024
+ pad=1
1025
+ activation=silu
1026
+
1027
+ # Split
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ filters=320
1032
+ size=1
1033
+ stride=1
1034
+ pad=1
1035
+ activation=silu
1036
+
1037
+ [route]
1038
+ layers = -2
1039
+
1040
+ # Plain Block
1041
+
1042
+ [convolutional]
1043
+ batch_normalize=1
1044
+ filters=320
1045
+ size=1
1046
+ stride=1
1047
+ pad=1
1048
+ activation=silu
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ size=3
1053
+ stride=1
1054
+ pad=1
1055
+ filters=320
1056
+ activation=silu
1057
+
1058
+ [convolutional]
1059
+ batch_normalize=1
1060
+ filters=320
1061
+ size=1
1062
+ stride=1
1063
+ pad=1
1064
+ activation=silu
1065
+
1066
+ [convolutional]
1067
+ batch_normalize=1
1068
+ size=3
1069
+ stride=1
1070
+ pad=1
1071
+ filters=320
1072
+ activation=silu
1073
+
1074
+ [convolutional]
1075
+ batch_normalize=1
1076
+ filters=320
1077
+ size=1
1078
+ stride=1
1079
+ pad=1
1080
+ activation=silu
1081
+
1082
+ [convolutional]
1083
+ batch_normalize=1
1084
+ size=3
1085
+ stride=1
1086
+ pad=1
1087
+ filters=320
1088
+ activation=silu
1089
+
1090
+ # Merge [-1, -(2k+2)]
1091
+
1092
+ [route]
1093
+ layers = -1, -8
1094
+
1095
+ # Transition last
1096
+
1097
+ # 149 (previous+6+4+2k)
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ filters=320
1101
+ size=1
1102
+ stride=1
1103
+ pad=1
1104
+ activation=silu
1105
+
1106
+
1107
+ # FPN-3
1108
+
1109
+ [convolutional]
1110
+ batch_normalize=1
1111
+ filters=160
1112
+ size=1
1113
+ stride=1
1114
+ pad=1
1115
+ activation=silu
1116
+
1117
+ [upsample]
1118
+ stride=2
1119
+
1120
+ [route]
1121
+ layers = 57
1122
+
1123
+ [convolutional]
1124
+ batch_normalize=1
1125
+ filters=160
1126
+ size=1
1127
+ stride=1
1128
+ pad=1
1129
+ activation=silu
1130
+
1131
+ [route]
1132
+ layers = -1, -3
1133
+
1134
+ [convolutional]
1135
+ batch_normalize=1
1136
+ filters=160
1137
+ size=1
1138
+ stride=1
1139
+ pad=1
1140
+ activation=silu
1141
+
1142
+ # Split
1143
+
1144
+ [convolutional]
1145
+ batch_normalize=1
1146
+ filters=160
1147
+ size=1
1148
+ stride=1
1149
+ pad=1
1150
+ activation=silu
1151
+
1152
+ [route]
1153
+ layers = -2
1154
+
1155
+ # Plain Block
1156
+
1157
+ [convolutional]
1158
+ batch_normalize=1
1159
+ filters=160
1160
+ size=1
1161
+ stride=1
1162
+ pad=1
1163
+ activation=silu
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ size=3
1168
+ stride=1
1169
+ pad=1
1170
+ filters=160
1171
+ activation=silu
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=160
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=160
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=160
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=160
1203
+ activation=silu
1204
+
1205
+ # Merge [-1, -(2k+2)]
1206
+
1207
+ [route]
1208
+ layers = -1, -8
1209
+
1210
+ # Transition last
1211
+
1212
+ # 165 (previous+6+4+2k)
1213
+ [convolutional]
1214
+ batch_normalize=1
1215
+ filters=160
1216
+ size=1
1217
+ stride=1
1218
+ pad=1
1219
+ activation=silu
1220
+
1221
+
1222
+ # PAN-4
1223
+
1224
+ [convolutional]
1225
+ batch_normalize=1
1226
+ size=3
1227
+ stride=2
1228
+ pad=1
1229
+ filters=320
1230
+ activation=silu
1231
+
1232
+ [route]
1233
+ layers = -1, 149
1234
+
1235
+ [convolutional]
1236
+ batch_normalize=1
1237
+ filters=320
1238
+ size=1
1239
+ stride=1
1240
+ pad=1
1241
+ activation=silu
1242
+
1243
+ # Split
1244
+
1245
+ [convolutional]
1246
+ batch_normalize=1
1247
+ filters=320
1248
+ size=1
1249
+ stride=1
1250
+ pad=1
1251
+ activation=silu
1252
+
1253
+ [route]
1254
+ layers = -2
1255
+
1256
+ # Plain Block
1257
+
1258
+ [convolutional]
1259
+ batch_normalize=1
1260
+ filters=320
1261
+ size=1
1262
+ stride=1
1263
+ pad=1
1264
+ activation=silu
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ size=3
1269
+ stride=1
1270
+ pad=1
1271
+ filters=320
1272
+ activation=silu
1273
+
1274
+ [convolutional]
1275
+ batch_normalize=1
1276
+ filters=320
1277
+ size=1
1278
+ stride=1
1279
+ pad=1
1280
+ activation=silu
1281
+
1282
+ [convolutional]
1283
+ batch_normalize=1
1284
+ size=3
1285
+ stride=1
1286
+ pad=1
1287
+ filters=320
1288
+ activation=silu
1289
+
1290
+ [convolutional]
1291
+ batch_normalize=1
1292
+ filters=320
1293
+ size=1
1294
+ stride=1
1295
+ pad=1
1296
+ activation=silu
1297
+
1298
+ [convolutional]
1299
+ batch_normalize=1
1300
+ size=3
1301
+ stride=1
1302
+ pad=1
1303
+ filters=320
1304
+ activation=silu
1305
+
1306
+ [route]
1307
+ layers = -1,-8
1308
+
1309
+ # Transition last
1310
+
1311
+ # 178 (previous+3+4+2k)
1312
+ [convolutional]
1313
+ batch_normalize=1
1314
+ filters=320
1315
+ size=1
1316
+ stride=1
1317
+ pad=1
1318
+ activation=silu
1319
+
1320
+
1321
+ # PAN-5
1322
+
1323
+ [convolutional]
1324
+ batch_normalize=1
1325
+ size=3
1326
+ stride=2
1327
+ pad=1
1328
+ filters=640
1329
+ activation=silu
1330
+
1331
+ [route]
1332
+ layers = -1, 133
1333
+
1334
+ [convolutional]
1335
+ batch_normalize=1
1336
+ filters=640
1337
+ size=1
1338
+ stride=1
1339
+ pad=1
1340
+ activation=silu
1341
+
1342
+ # Split
1343
+
1344
+ [convolutional]
1345
+ batch_normalize=1
1346
+ filters=640
1347
+ size=1
1348
+ stride=1
1349
+ pad=1
1350
+ activation=silu
1351
+
1352
+ [route]
1353
+ layers = -2
1354
+
1355
+ # Plain Block
1356
+
1357
+ [convolutional]
1358
+ batch_normalize=1
1359
+ filters=640
1360
+ size=1
1361
+ stride=1
1362
+ pad=1
1363
+ activation=silu
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ size=3
1368
+ stride=1
1369
+ pad=1
1370
+ filters=640
1371
+ activation=silu
1372
+
1373
+ [convolutional]
1374
+ batch_normalize=1
1375
+ filters=640
1376
+ size=1
1377
+ stride=1
1378
+ pad=1
1379
+ activation=silu
1380
+
1381
+ [convolutional]
1382
+ batch_normalize=1
1383
+ size=3
1384
+ stride=1
1385
+ pad=1
1386
+ filters=640
1387
+ activation=silu
1388
+
1389
+ [convolutional]
1390
+ batch_normalize=1
1391
+ filters=640
1392
+ size=1
1393
+ stride=1
1394
+ pad=1
1395
+ activation=silu
1396
+
1397
+ [convolutional]
1398
+ batch_normalize=1
1399
+ size=3
1400
+ stride=1
1401
+ pad=1
1402
+ filters=640
1403
+ activation=silu
1404
+
1405
+ [route]
1406
+ layers = -1,-8
1407
+
1408
+ # Transition last
1409
+
1410
+ # 191 (previous+3+4+2k)
1411
+ [convolutional]
1412
+ batch_normalize=1
1413
+ filters=640
1414
+ size=1
1415
+ stride=1
1416
+ pad=1
1417
+ activation=silu
1418
+
1419
+ # ============ End of Neck ============ #
1420
+
1421
+ # ============ Head ============ #
1422
+
1423
+ # YOLO-3
1424
+
1425
+ [route]
1426
+ layers = 165
1427
+
1428
+ [convolutional]
1429
+ batch_normalize=1
1430
+ size=3
1431
+ stride=1
1432
+ pad=1
1433
+ filters=320
1434
+ activation=silu
1435
+
1436
+ [convolutional]
1437
+ size=1
1438
+ stride=1
1439
+ pad=1
1440
+ filters=255
1441
+ activation=linear
1442
+
1443
+ [yolo]
1444
+ mask = 0,1,2
1445
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1446
+ classes=80
1447
+ num=9
1448
+ jitter=.3
1449
+ ignore_thresh = .7
1450
+ truth_thresh = 1
1451
+ random=1
1452
+ scale_x_y = 1.05
1453
+ iou_thresh=0.213
1454
+ cls_normalizer=1.0
1455
+ iou_normalizer=0.07
1456
+ iou_loss=ciou
1457
+ nms_kind=greedynms
1458
+ beta_nms=0.6
1459
+
1460
+
1461
+ # YOLO-4
1462
+
1463
+ [route]
1464
+ layers = 178
1465
+
1466
+ [convolutional]
1467
+ batch_normalize=1
1468
+ size=3
1469
+ stride=1
1470
+ pad=1
1471
+ filters=640
1472
+ activation=silu
1473
+
1474
+ [convolutional]
1475
+ size=1
1476
+ stride=1
1477
+ pad=1
1478
+ filters=255
1479
+ activation=linear
1480
+
1481
+ [yolo]
1482
+ mask = 3,4,5
1483
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1484
+ classes=80
1485
+ num=9
1486
+ jitter=.3
1487
+ ignore_thresh = .7
1488
+ truth_thresh = 1
1489
+ random=1
1490
+ scale_x_y = 1.05
1491
+ iou_thresh=0.213
1492
+ cls_normalizer=1.0
1493
+ iou_normalizer=0.07
1494
+ iou_loss=ciou
1495
+ nms_kind=greedynms
1496
+ beta_nms=0.6
1497
+
1498
+
1499
+ # YOLO-5
1500
+
1501
+ [route]
1502
+ layers = 191
1503
+
1504
+ [convolutional]
1505
+ batch_normalize=1
1506
+ size=3
1507
+ stride=1
1508
+ pad=1
1509
+ filters=1280
1510
+ activation=silu
1511
+
1512
+ [convolutional]
1513
+ size=1
1514
+ stride=1
1515
+ pad=1
1516
+ filters=255
1517
+ activation=linear
1518
+
1519
+ [yolo]
1520
+ mask = 6,7,8
1521
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1522
+ classes=80
1523
+ num=9
1524
+ jitter=.3
1525
+ ignore_thresh = .7
1526
+ truth_thresh = 1
1527
+ random=1
1528
+ scale_x_y = 1.05
1529
+ iou_thresh=0.213
1530
+ cls_normalizer=1.0
1531
+ iou_normalizer=0.07
1532
+ iou_loss=ciou
1533
+ nms_kind=greedynms
1534
+ beta_nms=0.6
cfg/yolov4_p6.cfg ADDED
@@ -0,0 +1,2260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=1280
5
+ height=1280
6
+ channels=3
7
+ momentum=0.949
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.00261
15
+ burn_in=1000
16
+ max_batches = 500500
17
+ policy=steps
18
+ steps=400000,450000
19
+ scales=.1,.1
20
+
21
+ mosaic=1
22
+
23
+
24
+ # ============ Backbone ============ #
25
+
26
+ # Stem
27
+
28
+ # 0
29
+ [convolutional]
30
+ batch_normalize=1
31
+ filters=32
32
+ size=3
33
+ stride=1
34
+ pad=1
35
+ activation=mish
36
+
37
+
38
+ # P1
39
+
40
+ # Downsample
41
+
42
+ [convolutional]
43
+ batch_normalize=1
44
+ filters=64
45
+ size=3
46
+ stride=2
47
+ pad=1
48
+ activation=mish
49
+
50
+ # Split
51
+
52
+ [convolutional]
53
+ batch_normalize=1
54
+ filters=32
55
+ size=1
56
+ stride=1
57
+ pad=1
58
+ activation=mish
59
+
60
+ [route]
61
+ layers = -2
62
+
63
+ [convolutional]
64
+ batch_normalize=1
65
+ filters=32
66
+ size=1
67
+ stride=1
68
+ pad=1
69
+ activation=mish
70
+
71
+ # Residual Block
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=32
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=mish
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=32
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=mish
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ # Transition first
94
+
95
+ [convolutional]
96
+ batch_normalize=1
97
+ filters=32
98
+ size=1
99
+ stride=1
100
+ pad=1
101
+ activation=mish
102
+
103
+ # Merge [-1, -(3k+4)]
104
+
105
+ [route]
106
+ layers = -1,-7
107
+
108
+ # Transition last
109
+
110
+ # 10 (previous+7+3k)
111
+ [convolutional]
112
+ batch_normalize=1
113
+ filters=64
114
+ size=1
115
+ stride=1
116
+ pad=1
117
+ activation=mish
118
+
119
+
120
+ # P2
121
+
122
+ # Downsample
123
+
124
+ [convolutional]
125
+ batch_normalize=1
126
+ filters=128
127
+ size=3
128
+ stride=2
129
+ pad=1
130
+ activation=mish
131
+
132
+ # Split
133
+
134
+ [convolutional]
135
+ batch_normalize=1
136
+ filters=64
137
+ size=1
138
+ stride=1
139
+ pad=1
140
+ activation=mish
141
+
142
+ [route]
143
+ layers = -2
144
+
145
+ [convolutional]
146
+ batch_normalize=1
147
+ filters=64
148
+ size=1
149
+ stride=1
150
+ pad=1
151
+ activation=mish
152
+
153
+ # Residual Block
154
+
155
+ [convolutional]
156
+ batch_normalize=1
157
+ filters=64
158
+ size=1
159
+ stride=1
160
+ pad=1
161
+ activation=mish
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=64
166
+ size=3
167
+ stride=1
168
+ pad=1
169
+ activation=mish
170
+
171
+ [shortcut]
172
+ from=-3
173
+ activation=linear
174
+
175
+ [convolutional]
176
+ batch_normalize=1
177
+ filters=64
178
+ size=1
179
+ stride=1
180
+ pad=1
181
+ activation=mish
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=64
186
+ size=3
187
+ stride=1
188
+ pad=1
189
+ activation=mish
190
+
191
+ [shortcut]
192
+ from=-3
193
+ activation=linear
194
+
195
+ [convolutional]
196
+ batch_normalize=1
197
+ filters=64
198
+ size=1
199
+ stride=1
200
+ pad=1
201
+ activation=mish
202
+
203
+ [convolutional]
204
+ batch_normalize=1
205
+ filters=64
206
+ size=3
207
+ stride=1
208
+ pad=1
209
+ activation=mish
210
+
211
+ [shortcut]
212
+ from=-3
213
+ activation=linear
214
+
215
+ # Transition first
216
+
217
+ [convolutional]
218
+ batch_normalize=1
219
+ filters=64
220
+ size=1
221
+ stride=1
222
+ pad=1
223
+ activation=mish
224
+
225
+ # Merge [-1, -(3k+4)]
226
+
227
+ [route]
228
+ layers = -1,-13
229
+
230
+ # Transition last
231
+
232
+ # 26 (previous+7+3k)
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=128
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=mish
240
+
241
+
242
+ # P3
243
+
244
+ # Downsample
245
+
246
+ [convolutional]
247
+ batch_normalize=1
248
+ filters=256
249
+ size=3
250
+ stride=2
251
+ pad=1
252
+ activation=mish
253
+
254
+ # Split
255
+
256
+ [convolutional]
257
+ batch_normalize=1
258
+ filters=128
259
+ size=1
260
+ stride=1
261
+ pad=1
262
+ activation=mish
263
+
264
+ [route]
265
+ layers = -2
266
+
267
+ [convolutional]
268
+ batch_normalize=1
269
+ filters=128
270
+ size=1
271
+ stride=1
272
+ pad=1
273
+ activation=mish
274
+
275
+ # Residual Block
276
+
277
+ [convolutional]
278
+ batch_normalize=1
279
+ filters=128
280
+ size=1
281
+ stride=1
282
+ pad=1
283
+ activation=mish
284
+
285
+ [convolutional]
286
+ batch_normalize=1
287
+ filters=128
288
+ size=3
289
+ stride=1
290
+ pad=1
291
+ activation=mish
292
+
293
+ [shortcut]
294
+ from=-3
295
+ activation=linear
296
+
297
+ [convolutional]
298
+ batch_normalize=1
299
+ filters=128
300
+ size=1
301
+ stride=1
302
+ pad=1
303
+ activation=mish
304
+
305
+ [convolutional]
306
+ batch_normalize=1
307
+ filters=128
308
+ size=3
309
+ stride=1
310
+ pad=1
311
+ activation=mish
312
+
313
+ [shortcut]
314
+ from=-3
315
+ activation=linear
316
+
317
+ [convolutional]
318
+ batch_normalize=1
319
+ filters=128
320
+ size=1
321
+ stride=1
322
+ pad=1
323
+ activation=mish
324
+
325
+ [convolutional]
326
+ batch_normalize=1
327
+ filters=128
328
+ size=3
329
+ stride=1
330
+ pad=1
331
+ activation=mish
332
+
333
+ [shortcut]
334
+ from=-3
335
+ activation=linear
336
+
337
+ [convolutional]
338
+ batch_normalize=1
339
+ filters=128
340
+ size=1
341
+ stride=1
342
+ pad=1
343
+ activation=mish
344
+
345
+ [convolutional]
346
+ batch_normalize=1
347
+ filters=128
348
+ size=3
349
+ stride=1
350
+ pad=1
351
+ activation=mish
352
+
353
+ [shortcut]
354
+ from=-3
355
+ activation=linear
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=128
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=mish
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=128
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=mish
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=128
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=mish
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=128
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=mish
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+ [convolutional]
398
+ batch_normalize=1
399
+ filters=128
400
+ size=1
401
+ stride=1
402
+ pad=1
403
+ activation=mish
404
+
405
+ [convolutional]
406
+ batch_normalize=1
407
+ filters=128
408
+ size=3
409
+ stride=1
410
+ pad=1
411
+ activation=mish
412
+
413
+ [shortcut]
414
+ from=-3
415
+ activation=linear
416
+
417
+ [convolutional]
418
+ batch_normalize=1
419
+ filters=128
420
+ size=1
421
+ stride=1
422
+ pad=1
423
+ activation=mish
424
+
425
+ [convolutional]
426
+ batch_normalize=1
427
+ filters=128
428
+ size=3
429
+ stride=1
430
+ pad=1
431
+ activation=mish
432
+
433
+ [shortcut]
434
+ from=-3
435
+ activation=linear
436
+
437
+ [convolutional]
438
+ batch_normalize=1
439
+ filters=128
440
+ size=1
441
+ stride=1
442
+ pad=1
443
+ activation=mish
444
+
445
+ [convolutional]
446
+ batch_normalize=1
447
+ filters=128
448
+ size=3
449
+ stride=1
450
+ pad=1
451
+ activation=mish
452
+
453
+ [shortcut]
454
+ from=-3
455
+ activation=linear
456
+
457
+ [convolutional]
458
+ batch_normalize=1
459
+ filters=128
460
+ size=1
461
+ stride=1
462
+ pad=1
463
+ activation=mish
464
+
465
+ [convolutional]
466
+ batch_normalize=1
467
+ filters=128
468
+ size=3
469
+ stride=1
470
+ pad=1
471
+ activation=mish
472
+
473
+ [shortcut]
474
+ from=-3
475
+ activation=linear
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=128
480
+ size=1
481
+ stride=1
482
+ pad=1
483
+ activation=mish
484
+
485
+ [convolutional]
486
+ batch_normalize=1
487
+ filters=128
488
+ size=3
489
+ stride=1
490
+ pad=1
491
+ activation=mish
492
+
493
+ [shortcut]
494
+ from=-3
495
+ activation=linear
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=128
500
+ size=1
501
+ stride=1
502
+ pad=1
503
+ activation=mish
504
+
505
+ [convolutional]
506
+ batch_normalize=1
507
+ filters=128
508
+ size=3
509
+ stride=1
510
+ pad=1
511
+ activation=mish
512
+
513
+ [shortcut]
514
+ from=-3
515
+ activation=linear
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=128
520
+ size=1
521
+ stride=1
522
+ pad=1
523
+ activation=mish
524
+
525
+ [convolutional]
526
+ batch_normalize=1
527
+ filters=128
528
+ size=3
529
+ stride=1
530
+ pad=1
531
+ activation=mish
532
+
533
+ [shortcut]
534
+ from=-3
535
+ activation=linear
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=128
540
+ size=1
541
+ stride=1
542
+ pad=1
543
+ activation=mish
544
+
545
+ [convolutional]
546
+ batch_normalize=1
547
+ filters=128
548
+ size=3
549
+ stride=1
550
+ pad=1
551
+ activation=mish
552
+
553
+ [shortcut]
554
+ from=-3
555
+ activation=linear
556
+
557
+ [convolutional]
558
+ batch_normalize=1
559
+ filters=128
560
+ size=1
561
+ stride=1
562
+ pad=1
563
+ activation=mish
564
+
565
+ [convolutional]
566
+ batch_normalize=1
567
+ filters=128
568
+ size=3
569
+ stride=1
570
+ pad=1
571
+ activation=mish
572
+
573
+ [shortcut]
574
+ from=-3
575
+ activation=linear
576
+
577
+ # Transition first
578
+
579
+ [convolutional]
580
+ batch_normalize=1
581
+ filters=128
582
+ size=1
583
+ stride=1
584
+ pad=1
585
+ activation=mish
586
+
587
+ # Merge [-1, -(3k+4)]
588
+
589
+ [route]
590
+ layers = -1,-49
591
+
592
+ # Transition last
593
+
594
+ # 78 (previous+7+3k)
595
+ [convolutional]
596
+ batch_normalize=1
597
+ filters=256
598
+ size=1
599
+ stride=1
600
+ pad=1
601
+ activation=mish
602
+
603
+
604
+ # P4
605
+
606
+ # Downsample
607
+
608
+ [convolutional]
609
+ batch_normalize=1
610
+ filters=512
611
+ size=3
612
+ stride=2
613
+ pad=1
614
+ activation=mish
615
+
616
+ # Split
617
+
618
+ [convolutional]
619
+ batch_normalize=1
620
+ filters=256
621
+ size=1
622
+ stride=1
623
+ pad=1
624
+ activation=mish
625
+
626
+ [route]
627
+ layers = -2
628
+
629
+ [convolutional]
630
+ batch_normalize=1
631
+ filters=256
632
+ size=1
633
+ stride=1
634
+ pad=1
635
+ activation=mish
636
+
637
+ # Residual Block
638
+
639
+ [convolutional]
640
+ batch_normalize=1
641
+ filters=256
642
+ size=1
643
+ stride=1
644
+ pad=1
645
+ activation=mish
646
+
647
+ [convolutional]
648
+ batch_normalize=1
649
+ filters=256
650
+ size=3
651
+ stride=1
652
+ pad=1
653
+ activation=mish
654
+
655
+ [shortcut]
656
+ from=-3
657
+ activation=linear
658
+
659
+ [convolutional]
660
+ batch_normalize=1
661
+ filters=256
662
+ size=1
663
+ stride=1
664
+ pad=1
665
+ activation=mish
666
+
667
+ [convolutional]
668
+ batch_normalize=1
669
+ filters=256
670
+ size=3
671
+ stride=1
672
+ pad=1
673
+ activation=mish
674
+
675
+ [shortcut]
676
+ from=-3
677
+ activation=linear
678
+
679
+ [convolutional]
680
+ batch_normalize=1
681
+ filters=256
682
+ size=1
683
+ stride=1
684
+ pad=1
685
+ activation=mish
686
+
687
+ [convolutional]
688
+ batch_normalize=1
689
+ filters=256
690
+ size=3
691
+ stride=1
692
+ pad=1
693
+ activation=mish
694
+
695
+ [shortcut]
696
+ from=-3
697
+ activation=linear
698
+
699
+ [convolutional]
700
+ batch_normalize=1
701
+ filters=256
702
+ size=1
703
+ stride=1
704
+ pad=1
705
+ activation=mish
706
+
707
+ [convolutional]
708
+ batch_normalize=1
709
+ filters=256
710
+ size=3
711
+ stride=1
712
+ pad=1
713
+ activation=mish
714
+
715
+ [shortcut]
716
+ from=-3
717
+ activation=linear
718
+
719
+ [convolutional]
720
+ batch_normalize=1
721
+ filters=256
722
+ size=1
723
+ stride=1
724
+ pad=1
725
+ activation=mish
726
+
727
+ [convolutional]
728
+ batch_normalize=1
729
+ filters=256
730
+ size=3
731
+ stride=1
732
+ pad=1
733
+ activation=mish
734
+
735
+ [shortcut]
736
+ from=-3
737
+ activation=linear
738
+
739
+ [convolutional]
740
+ batch_normalize=1
741
+ filters=256
742
+ size=1
743
+ stride=1
744
+ pad=1
745
+ activation=mish
746
+
747
+ [convolutional]
748
+ batch_normalize=1
749
+ filters=256
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ activation=mish
754
+
755
+ [shortcut]
756
+ from=-3
757
+ activation=linear
758
+
759
+ [convolutional]
760
+ batch_normalize=1
761
+ filters=256
762
+ size=1
763
+ stride=1
764
+ pad=1
765
+ activation=mish
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=256
770
+ size=3
771
+ stride=1
772
+ pad=1
773
+ activation=mish
774
+
775
+ [shortcut]
776
+ from=-3
777
+ activation=linear
778
+
779
+ [convolutional]
780
+ batch_normalize=1
781
+ filters=256
782
+ size=1
783
+ stride=1
784
+ pad=1
785
+ activation=mish
786
+
787
+ [convolutional]
788
+ batch_normalize=1
789
+ filters=256
790
+ size=3
791
+ stride=1
792
+ pad=1
793
+ activation=mish
794
+
795
+ [shortcut]
796
+ from=-3
797
+ activation=linear
798
+
799
+ [convolutional]
800
+ batch_normalize=1
801
+ filters=256
802
+ size=1
803
+ stride=1
804
+ pad=1
805
+ activation=mish
806
+
807
+ [convolutional]
808
+ batch_normalize=1
809
+ filters=256
810
+ size=3
811
+ stride=1
812
+ pad=1
813
+ activation=mish
814
+
815
+ [shortcut]
816
+ from=-3
817
+ activation=linear
818
+
819
+ [convolutional]
820
+ batch_normalize=1
821
+ filters=256
822
+ size=1
823
+ stride=1
824
+ pad=1
825
+ activation=mish
826
+
827
+ [convolutional]
828
+ batch_normalize=1
829
+ filters=256
830
+ size=3
831
+ stride=1
832
+ pad=1
833
+ activation=mish
834
+
835
+ [shortcut]
836
+ from=-3
837
+ activation=linear
838
+
839
+ [convolutional]
840
+ batch_normalize=1
841
+ filters=256
842
+ size=1
843
+ stride=1
844
+ pad=1
845
+ activation=mish
846
+
847
+ [convolutional]
848
+ batch_normalize=1
849
+ filters=256
850
+ size=3
851
+ stride=1
852
+ pad=1
853
+ activation=mish
854
+
855
+ [shortcut]
856
+ from=-3
857
+ activation=linear
858
+
859
+ [convolutional]
860
+ batch_normalize=1
861
+ filters=256
862
+ size=1
863
+ stride=1
864
+ pad=1
865
+ activation=mish
866
+
867
+ [convolutional]
868
+ batch_normalize=1
869
+ filters=256
870
+ size=3
871
+ stride=1
872
+ pad=1
873
+ activation=mish
874
+
875
+ [shortcut]
876
+ from=-3
877
+ activation=linear
878
+
879
+ [convolutional]
880
+ batch_normalize=1
881
+ filters=256
882
+ size=1
883
+ stride=1
884
+ pad=1
885
+ activation=mish
886
+
887
+ [convolutional]
888
+ batch_normalize=1
889
+ filters=256
890
+ size=3
891
+ stride=1
892
+ pad=1
893
+ activation=mish
894
+
895
+ [shortcut]
896
+ from=-3
897
+ activation=linear
898
+
899
+ [convolutional]
900
+ batch_normalize=1
901
+ filters=256
902
+ size=1
903
+ stride=1
904
+ pad=1
905
+ activation=mish
906
+
907
+ [convolutional]
908
+ batch_normalize=1
909
+ filters=256
910
+ size=3
911
+ stride=1
912
+ pad=1
913
+ activation=mish
914
+
915
+ [shortcut]
916
+ from=-3
917
+ activation=linear
918
+
919
+ [convolutional]
920
+ batch_normalize=1
921
+ filters=256
922
+ size=1
923
+ stride=1
924
+ pad=1
925
+ activation=mish
926
+
927
+ [convolutional]
928
+ batch_normalize=1
929
+ filters=256
930
+ size=3
931
+ stride=1
932
+ pad=1
933
+ activation=mish
934
+
935
+ [shortcut]
936
+ from=-3
937
+ activation=linear
938
+
939
+ # Transition first
940
+
941
+ [convolutional]
942
+ batch_normalize=1
943
+ filters=256
944
+ size=1
945
+ stride=1
946
+ pad=1
947
+ activation=mish
948
+
949
+ # Merge [-1, -(3k+4)]
950
+
951
+ [route]
952
+ layers = -1,-49
953
+
954
+ # Transition last
955
+
956
+ # 130 (previous+7+3k)
957
+ [convolutional]
958
+ batch_normalize=1
959
+ filters=512
960
+ size=1
961
+ stride=1
962
+ pad=1
963
+ activation=mish
964
+
965
+
966
+ # P5
967
+
968
+ # Downsample
969
+
970
+ [convolutional]
971
+ batch_normalize=1
972
+ filters=1024
973
+ size=3
974
+ stride=2
975
+ pad=1
976
+ activation=mish
977
+
978
+ # Split
979
+
980
+ [convolutional]
981
+ batch_normalize=1
982
+ filters=512
983
+ size=1
984
+ stride=1
985
+ pad=1
986
+ activation=mish
987
+
988
+ [route]
989
+ layers = -2
990
+
991
+ [convolutional]
992
+ batch_normalize=1
993
+ filters=512
994
+ size=1
995
+ stride=1
996
+ pad=1
997
+ activation=mish
998
+
999
+ # Residual Block
1000
+
1001
+ [convolutional]
1002
+ batch_normalize=1
1003
+ filters=512
1004
+ size=1
1005
+ stride=1
1006
+ pad=1
1007
+ activation=mish
1008
+
1009
+ [convolutional]
1010
+ batch_normalize=1
1011
+ filters=512
1012
+ size=3
1013
+ stride=1
1014
+ pad=1
1015
+ activation=mish
1016
+
1017
+ [shortcut]
1018
+ from=-3
1019
+ activation=linear
1020
+
1021
+ [convolutional]
1022
+ batch_normalize=1
1023
+ filters=512
1024
+ size=1
1025
+ stride=1
1026
+ pad=1
1027
+ activation=mish
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ filters=512
1032
+ size=3
1033
+ stride=1
1034
+ pad=1
1035
+ activation=mish
1036
+
1037
+ [shortcut]
1038
+ from=-3
1039
+ activation=linear
1040
+
1041
+ [convolutional]
1042
+ batch_normalize=1
1043
+ filters=512
1044
+ size=1
1045
+ stride=1
1046
+ pad=1
1047
+ activation=mish
1048
+
1049
+ [convolutional]
1050
+ batch_normalize=1
1051
+ filters=512
1052
+ size=3
1053
+ stride=1
1054
+ pad=1
1055
+ activation=mish
1056
+
1057
+ [shortcut]
1058
+ from=-3
1059
+ activation=linear
1060
+
1061
+ [convolutional]
1062
+ batch_normalize=1
1063
+ filters=512
1064
+ size=1
1065
+ stride=1
1066
+ pad=1
1067
+ activation=mish
1068
+
1069
+ [convolutional]
1070
+ batch_normalize=1
1071
+ filters=512
1072
+ size=3
1073
+ stride=1
1074
+ pad=1
1075
+ activation=mish
1076
+
1077
+ [shortcut]
1078
+ from=-3
1079
+ activation=linear
1080
+
1081
+ [convolutional]
1082
+ batch_normalize=1
1083
+ filters=512
1084
+ size=1
1085
+ stride=1
1086
+ pad=1
1087
+ activation=mish
1088
+
1089
+ [convolutional]
1090
+ batch_normalize=1
1091
+ filters=512
1092
+ size=3
1093
+ stride=1
1094
+ pad=1
1095
+ activation=mish
1096
+
1097
+ [shortcut]
1098
+ from=-3
1099
+ activation=linear
1100
+
1101
+ [convolutional]
1102
+ batch_normalize=1
1103
+ filters=512
1104
+ size=1
1105
+ stride=1
1106
+ pad=1
1107
+ activation=mish
1108
+
1109
+ [convolutional]
1110
+ batch_normalize=1
1111
+ filters=512
1112
+ size=3
1113
+ stride=1
1114
+ pad=1
1115
+ activation=mish
1116
+
1117
+ [shortcut]
1118
+ from=-3
1119
+ activation=linear
1120
+
1121
+ [convolutional]
1122
+ batch_normalize=1
1123
+ filters=512
1124
+ size=1
1125
+ stride=1
1126
+ pad=1
1127
+ activation=mish
1128
+
1129
+ [convolutional]
1130
+ batch_normalize=1
1131
+ filters=512
1132
+ size=3
1133
+ stride=1
1134
+ pad=1
1135
+ activation=mish
1136
+
1137
+ [shortcut]
1138
+ from=-3
1139
+ activation=linear
1140
+
1141
+ # Transition first
1142
+
1143
+ [convolutional]
1144
+ batch_normalize=1
1145
+ filters=512
1146
+ size=1
1147
+ stride=1
1148
+ pad=1
1149
+ activation=mish
1150
+
1151
+ # Merge [-1, -(3k+4)]
1152
+
1153
+ [route]
1154
+ layers = -1,-25
1155
+
1156
+ # Transition last
1157
+
1158
+ # 158 (previous+7+3k)
1159
+ [convolutional]
1160
+ batch_normalize=1
1161
+ filters=1024
1162
+ size=1
1163
+ stride=1
1164
+ pad=1
1165
+ activation=mish
1166
+
1167
+
1168
+ # P6
1169
+
1170
+ # Downsample
1171
+
1172
+ [convolutional]
1173
+ batch_normalize=1
1174
+ filters=1024
1175
+ size=3
1176
+ stride=2
1177
+ pad=1
1178
+ activation=mish
1179
+
1180
+ # Split
1181
+
1182
+ [convolutional]
1183
+ batch_normalize=1
1184
+ filters=512
1185
+ size=1
1186
+ stride=1
1187
+ pad=1
1188
+ activation=mish
1189
+
1190
+ [route]
1191
+ layers = -2
1192
+
1193
+ [convolutional]
1194
+ batch_normalize=1
1195
+ filters=512
1196
+ size=1
1197
+ stride=1
1198
+ pad=1
1199
+ activation=mish
1200
+
1201
+ # Residual Block
1202
+
1203
+ [convolutional]
1204
+ batch_normalize=1
1205
+ filters=512
1206
+ size=1
1207
+ stride=1
1208
+ pad=1
1209
+ activation=mish
1210
+
1211
+ [convolutional]
1212
+ batch_normalize=1
1213
+ filters=512
1214
+ size=3
1215
+ stride=1
1216
+ pad=1
1217
+ activation=mish
1218
+
1219
+ [shortcut]
1220
+ from=-3
1221
+ activation=linear
1222
+
1223
+ [convolutional]
1224
+ batch_normalize=1
1225
+ filters=512
1226
+ size=1
1227
+ stride=1
1228
+ pad=1
1229
+ activation=mish
1230
+
1231
+ [convolutional]
1232
+ batch_normalize=1
1233
+ filters=512
1234
+ size=3
1235
+ stride=1
1236
+ pad=1
1237
+ activation=mish
1238
+
1239
+ [shortcut]
1240
+ from=-3
1241
+ activation=linear
1242
+
1243
+ [convolutional]
1244
+ batch_normalize=1
1245
+ filters=512
1246
+ size=1
1247
+ stride=1
1248
+ pad=1
1249
+ activation=mish
1250
+
1251
+ [convolutional]
1252
+ batch_normalize=1
1253
+ filters=512
1254
+ size=3
1255
+ stride=1
1256
+ pad=1
1257
+ activation=mish
1258
+
1259
+ [shortcut]
1260
+ from=-3
1261
+ activation=linear
1262
+
1263
+ [convolutional]
1264
+ batch_normalize=1
1265
+ filters=512
1266
+ size=1
1267
+ stride=1
1268
+ pad=1
1269
+ activation=mish
1270
+
1271
+ [convolutional]
1272
+ batch_normalize=1
1273
+ filters=512
1274
+ size=3
1275
+ stride=1
1276
+ pad=1
1277
+ activation=mish
1278
+
1279
+ [shortcut]
1280
+ from=-3
1281
+ activation=linear
1282
+
1283
+ [convolutional]
1284
+ batch_normalize=1
1285
+ filters=512
1286
+ size=1
1287
+ stride=1
1288
+ pad=1
1289
+ activation=mish
1290
+
1291
+ [convolutional]
1292
+ batch_normalize=1
1293
+ filters=512
1294
+ size=3
1295
+ stride=1
1296
+ pad=1
1297
+ activation=mish
1298
+
1299
+ [shortcut]
1300
+ from=-3
1301
+ activation=linear
1302
+
1303
+ [convolutional]
1304
+ batch_normalize=1
1305
+ filters=512
1306
+ size=1
1307
+ stride=1
1308
+ pad=1
1309
+ activation=mish
1310
+
1311
+ [convolutional]
1312
+ batch_normalize=1
1313
+ filters=512
1314
+ size=3
1315
+ stride=1
1316
+ pad=1
1317
+ activation=mish
1318
+
1319
+ [shortcut]
1320
+ from=-3
1321
+ activation=linear
1322
+
1323
+ [convolutional]
1324
+ batch_normalize=1
1325
+ filters=512
1326
+ size=1
1327
+ stride=1
1328
+ pad=1
1329
+ activation=mish
1330
+
1331
+ [convolutional]
1332
+ batch_normalize=1
1333
+ filters=512
1334
+ size=3
1335
+ stride=1
1336
+ pad=1
1337
+ activation=mish
1338
+
1339
+ [shortcut]
1340
+ from=-3
1341
+ activation=linear
1342
+
1343
+ # Transition first
1344
+
1345
+ [convolutional]
1346
+ batch_normalize=1
1347
+ filters=512
1348
+ size=1
1349
+ stride=1
1350
+ pad=1
1351
+ activation=mish
1352
+
1353
+ # Merge [-1, -(3k+4)]
1354
+
1355
+ [route]
1356
+ layers = -1,-25
1357
+
1358
+ # Transition last
1359
+
1360
+ # 186 (previous+7+3k)
1361
+ [convolutional]
1362
+ batch_normalize=1
1363
+ filters=1024
1364
+ size=1
1365
+ stride=1
1366
+ pad=1
1367
+ activation=mish
1368
+
1369
+ # ============ End of Backbone ============ #
1370
+
1371
+ # ============ Neck ============ #
1372
+
1373
+ # CSPSPP
1374
+
1375
+ [convolutional]
1376
+ batch_normalize=1
1377
+ filters=512
1378
+ size=1
1379
+ stride=1
1380
+ pad=1
1381
+ activation=mish
1382
+
1383
+ [route]
1384
+ layers = -2
1385
+
1386
+ [convolutional]
1387
+ batch_normalize=1
1388
+ filters=512
1389
+ size=1
1390
+ stride=1
1391
+ pad=1
1392
+ activation=mish
1393
+
1394
+ [convolutional]
1395
+ batch_normalize=1
1396
+ size=3
1397
+ stride=1
1398
+ pad=1
1399
+ filters=512
1400
+ activation=mish
1401
+
1402
+ [convolutional]
1403
+ batch_normalize=1
1404
+ filters=512
1405
+ size=1
1406
+ stride=1
1407
+ pad=1
1408
+ activation=mish
1409
+
1410
+ ### SPP ###
1411
+ [maxpool]
1412
+ stride=1
1413
+ size=5
1414
+
1415
+ [route]
1416
+ layers=-2
1417
+
1418
+ [maxpool]
1419
+ stride=1
1420
+ size=9
1421
+
1422
+ [route]
1423
+ layers=-4
1424
+
1425
+ [maxpool]
1426
+ stride=1
1427
+ size=13
1428
+
1429
+ [route]
1430
+ layers=-1,-3,-5,-6
1431
+ ### End SPP ###
1432
+
1433
+ [convolutional]
1434
+ batch_normalize=1
1435
+ filters=512
1436
+ size=1
1437
+ stride=1
1438
+ pad=1
1439
+ activation=mish
1440
+
1441
+ [convolutional]
1442
+ batch_normalize=1
1443
+ size=3
1444
+ stride=1
1445
+ pad=1
1446
+ filters=512
1447
+ activation=mish
1448
+
1449
+ [route]
1450
+ layers = -1, -13
1451
+
1452
+ # 201 (previous+6+5+2k)
1453
+ [convolutional]
1454
+ batch_normalize=1
1455
+ filters=512
1456
+ size=1
1457
+ stride=1
1458
+ pad=1
1459
+ activation=mish
1460
+
1461
+ # End of CSPSPP
1462
+
1463
+
1464
+ # FPN-5
1465
+
1466
+ [convolutional]
1467
+ batch_normalize=1
1468
+ filters=512
1469
+ size=1
1470
+ stride=1
1471
+ pad=1
1472
+ activation=mish
1473
+
1474
+ [upsample]
1475
+ stride=2
1476
+
1477
+ [route]
1478
+ layers = 158
1479
+
1480
+ [convolutional]
1481
+ batch_normalize=1
1482
+ filters=512
1483
+ size=1
1484
+ stride=1
1485
+ pad=1
1486
+ activation=mish
1487
+
1488
+ [route]
1489
+ layers = -1, -3
1490
+
1491
+ [convolutional]
1492
+ batch_normalize=1
1493
+ filters=512
1494
+ size=1
1495
+ stride=1
1496
+ pad=1
1497
+ activation=mish
1498
+
1499
+ # Split
1500
+
1501
+ [convolutional]
1502
+ batch_normalize=1
1503
+ filters=512
1504
+ size=1
1505
+ stride=1
1506
+ pad=1
1507
+ activation=mish
1508
+
1509
+ [route]
1510
+ layers = -2
1511
+
1512
+ # Plain Block
1513
+
1514
+ [convolutional]
1515
+ batch_normalize=1
1516
+ filters=512
1517
+ size=1
1518
+ stride=1
1519
+ pad=1
1520
+ activation=mish
1521
+
1522
+ [convolutional]
1523
+ batch_normalize=1
1524
+ size=3
1525
+ stride=1
1526
+ pad=1
1527
+ filters=512
1528
+ activation=mish
1529
+
1530
+ [convolutional]
1531
+ batch_normalize=1
1532
+ filters=512
1533
+ size=1
1534
+ stride=1
1535
+ pad=1
1536
+ activation=mish
1537
+
1538
+ [convolutional]
1539
+ batch_normalize=1
1540
+ size=3
1541
+ stride=1
1542
+ pad=1
1543
+ filters=512
1544
+ activation=mish
1545
+
1546
+ [convolutional]
1547
+ batch_normalize=1
1548
+ filters=512
1549
+ size=1
1550
+ stride=1
1551
+ pad=1
1552
+ activation=mish
1553
+
1554
+ [convolutional]
1555
+ batch_normalize=1
1556
+ size=3
1557
+ stride=1
1558
+ pad=1
1559
+ filters=512
1560
+ activation=mish
1561
+
1562
+ # Merge [-1, -(2k+2)]
1563
+
1564
+ [route]
1565
+ layers = -1, -8
1566
+
1567
+ # Transition last
1568
+
1569
+ # 217 (previous+6+4+2k)
1570
+ [convolutional]
1571
+ batch_normalize=1
1572
+ filters=512
1573
+ size=1
1574
+ stride=1
1575
+ pad=1
1576
+ activation=mish
1577
+
1578
+
1579
+ # FPN-4
1580
+
1581
+ [convolutional]
1582
+ batch_normalize=1
1583
+ filters=256
1584
+ size=1
1585
+ stride=1
1586
+ pad=1
1587
+ activation=mish
1588
+
1589
+ [upsample]
1590
+ stride=2
1591
+
1592
+ [route]
1593
+ layers = 130
1594
+
1595
+ [convolutional]
1596
+ batch_normalize=1
1597
+ filters=256
1598
+ size=1
1599
+ stride=1
1600
+ pad=1
1601
+ activation=mish
1602
+
1603
+ [route]
1604
+ layers = -1, -3
1605
+
1606
+ [convolutional]
1607
+ batch_normalize=1
1608
+ filters=256
1609
+ size=1
1610
+ stride=1
1611
+ pad=1
1612
+ activation=mish
1613
+
1614
+ # Split
1615
+
1616
+ [convolutional]
1617
+ batch_normalize=1
1618
+ filters=256
1619
+ size=1
1620
+ stride=1
1621
+ pad=1
1622
+ activation=mish
1623
+
1624
+ [route]
1625
+ layers = -2
1626
+
1627
+ # Plain Block
1628
+
1629
+ [convolutional]
1630
+ batch_normalize=1
1631
+ filters=256
1632
+ size=1
1633
+ stride=1
1634
+ pad=1
1635
+ activation=mish
1636
+
1637
+ [convolutional]
1638
+ batch_normalize=1
1639
+ size=3
1640
+ stride=1
1641
+ pad=1
1642
+ filters=256
1643
+ activation=mish
1644
+
1645
+ [convolutional]
1646
+ batch_normalize=1
1647
+ filters=256
1648
+ size=1
1649
+ stride=1
1650
+ pad=1
1651
+ activation=mish
1652
+
1653
+ [convolutional]
1654
+ batch_normalize=1
1655
+ size=3
1656
+ stride=1
1657
+ pad=1
1658
+ filters=256
1659
+ activation=mish
1660
+
1661
+ [convolutional]
1662
+ batch_normalize=1
1663
+ filters=256
1664
+ size=1
1665
+ stride=1
1666
+ pad=1
1667
+ activation=mish
1668
+
1669
+ [convolutional]
1670
+ batch_normalize=1
1671
+ size=3
1672
+ stride=1
1673
+ pad=1
1674
+ filters=256
1675
+ activation=mish
1676
+
1677
+ # Merge [-1, -(2k+2)]
1678
+
1679
+ [route]
1680
+ layers = -1, -8
1681
+
1682
+ # Transition last
1683
+
1684
+ # 233 (previous+6+4+2k)
1685
+ [convolutional]
1686
+ batch_normalize=1
1687
+ filters=256
1688
+ size=1
1689
+ stride=1
1690
+ pad=1
1691
+ activation=mish
1692
+
1693
+
1694
+ # FPN-3
1695
+
1696
+ [convolutional]
1697
+ batch_normalize=1
1698
+ filters=128
1699
+ size=1
1700
+ stride=1
1701
+ pad=1
1702
+ activation=mish
1703
+
1704
+ [upsample]
1705
+ stride=2
1706
+
1707
+ [route]
1708
+ layers = 78
1709
+
1710
+ [convolutional]
1711
+ batch_normalize=1
1712
+ filters=128
1713
+ size=1
1714
+ stride=1
1715
+ pad=1
1716
+ activation=mish
1717
+
1718
+ [route]
1719
+ layers = -1, -3
1720
+
1721
+ [convolutional]
1722
+ batch_normalize=1
1723
+ filters=128
1724
+ size=1
1725
+ stride=1
1726
+ pad=1
1727
+ activation=mish
1728
+
1729
+ # Split
1730
+
1731
+ [convolutional]
1732
+ batch_normalize=1
1733
+ filters=128
1734
+ size=1
1735
+ stride=1
1736
+ pad=1
1737
+ activation=mish
1738
+
1739
+ [route]
1740
+ layers = -2
1741
+
1742
+ # Plain Block
1743
+
1744
+ [convolutional]
1745
+ batch_normalize=1
1746
+ filters=128
1747
+ size=1
1748
+ stride=1
1749
+ pad=1
1750
+ activation=mish
1751
+
1752
+ [convolutional]
1753
+ batch_normalize=1
1754
+ size=3
1755
+ stride=1
1756
+ pad=1
1757
+ filters=128
1758
+ activation=mish
1759
+
1760
+ [convolutional]
1761
+ batch_normalize=1
1762
+ filters=128
1763
+ size=1
1764
+ stride=1
1765
+ pad=1
1766
+ activation=mish
1767
+
1768
+ [convolutional]
1769
+ batch_normalize=1
1770
+ size=3
1771
+ stride=1
1772
+ pad=1
1773
+ filters=128
1774
+ activation=mish
1775
+
1776
+ [convolutional]
1777
+ batch_normalize=1
1778
+ filters=128
1779
+ size=1
1780
+ stride=1
1781
+ pad=1
1782
+ activation=mish
1783
+
1784
+ [convolutional]
1785
+ batch_normalize=1
1786
+ size=3
1787
+ stride=1
1788
+ pad=1
1789
+ filters=128
1790
+ activation=mish
1791
+
1792
+ # Merge [-1, -(2k+2)]
1793
+
1794
+ [route]
1795
+ layers = -1, -8
1796
+
1797
+ # Transition last
1798
+
1799
+ # 249 (previous+6+4+2k)
1800
+ [convolutional]
1801
+ batch_normalize=1
1802
+ filters=128
1803
+ size=1
1804
+ stride=1
1805
+ pad=1
1806
+ activation=mish
1807
+
1808
+
1809
+ # PAN-4
1810
+
1811
+ [convolutional]
1812
+ batch_normalize=1
1813
+ size=3
1814
+ stride=2
1815
+ pad=1
1816
+ filters=256
1817
+ activation=mish
1818
+
1819
+ [route]
1820
+ layers = -1, 233
1821
+
1822
+ [convolutional]
1823
+ batch_normalize=1
1824
+ filters=256
1825
+ size=1
1826
+ stride=1
1827
+ pad=1
1828
+ activation=mish
1829
+
1830
+ # Split
1831
+
1832
+ [convolutional]
1833
+ batch_normalize=1
1834
+ filters=256
1835
+ size=1
1836
+ stride=1
1837
+ pad=1
1838
+ activation=mish
1839
+
1840
+ [route]
1841
+ layers = -2
1842
+
1843
+ # Plain Block
1844
+
1845
+ [convolutional]
1846
+ batch_normalize=1
1847
+ filters=256
1848
+ size=1
1849
+ stride=1
1850
+ pad=1
1851
+ activation=mish
1852
+
1853
+ [convolutional]
1854
+ batch_normalize=1
1855
+ size=3
1856
+ stride=1
1857
+ pad=1
1858
+ filters=256
1859
+ activation=mish
1860
+
1861
+ [convolutional]
1862
+ batch_normalize=1
1863
+ filters=256
1864
+ size=1
1865
+ stride=1
1866
+ pad=1
1867
+ activation=mish
1868
+
1869
+ [convolutional]
1870
+ batch_normalize=1
1871
+ size=3
1872
+ stride=1
1873
+ pad=1
1874
+ filters=256
1875
+ activation=mish
1876
+
1877
+ [convolutional]
1878
+ batch_normalize=1
1879
+ filters=256
1880
+ size=1
1881
+ stride=1
1882
+ pad=1
1883
+ activation=mish
1884
+
1885
+ [convolutional]
1886
+ batch_normalize=1
1887
+ size=3
1888
+ stride=1
1889
+ pad=1
1890
+ filters=256
1891
+ activation=mish
1892
+
1893
+ [route]
1894
+ layers = -1,-8
1895
+
1896
+ # Transition last
1897
+
1898
+ # 262 (previous+3+4+2k)
1899
+ [convolutional]
1900
+ batch_normalize=1
1901
+ filters=256
1902
+ size=1
1903
+ stride=1
1904
+ pad=1
1905
+ activation=mish
1906
+
1907
+
1908
+ # PAN-5
1909
+
1910
+ [convolutional]
1911
+ batch_normalize=1
1912
+ size=3
1913
+ stride=2
1914
+ pad=1
1915
+ filters=512
1916
+ activation=mish
1917
+
1918
+ [route]
1919
+ layers = -1, 217
1920
+
1921
+ [convolutional]
1922
+ batch_normalize=1
1923
+ filters=512
1924
+ size=1
1925
+ stride=1
1926
+ pad=1
1927
+ activation=mish
1928
+
1929
+ # Split
1930
+
1931
+ [convolutional]
1932
+ batch_normalize=1
1933
+ filters=512
1934
+ size=1
1935
+ stride=1
1936
+ pad=1
1937
+ activation=mish
1938
+
1939
+ [route]
1940
+ layers = -2
1941
+
1942
+ # Plain Block
1943
+
1944
+ [convolutional]
1945
+ batch_normalize=1
1946
+ filters=512
1947
+ size=1
1948
+ stride=1
1949
+ pad=1
1950
+ activation=mish
1951
+
1952
+ [convolutional]
1953
+ batch_normalize=1
1954
+ size=3
1955
+ stride=1
1956
+ pad=1
1957
+ filters=512
1958
+ activation=mish
1959
+
1960
+ [convolutional]
1961
+ batch_normalize=1
1962
+ filters=512
1963
+ size=1
1964
+ stride=1
1965
+ pad=1
1966
+ activation=mish
1967
+
1968
+ [convolutional]
1969
+ batch_normalize=1
1970
+ size=3
1971
+ stride=1
1972
+ pad=1
1973
+ filters=512
1974
+ activation=mish
1975
+
1976
+ [convolutional]
1977
+ batch_normalize=1
1978
+ filters=512
1979
+ size=1
1980
+ stride=1
1981
+ pad=1
1982
+ activation=mish
1983
+
1984
+ [convolutional]
1985
+ batch_normalize=1
1986
+ size=3
1987
+ stride=1
1988
+ pad=1
1989
+ filters=512
1990
+ activation=mish
1991
+
1992
+ [route]
1993
+ layers = -1,-8
1994
+
1995
+ # Transition last
1996
+
1997
+ # 275 (previous+3+4+2k)
1998
+ [convolutional]
1999
+ batch_normalize=1
2000
+ filters=512
2001
+ size=1
2002
+ stride=1
2003
+ pad=1
2004
+ activation=mish
2005
+
2006
+
2007
+ # PAN-6
2008
+
2009
+ [convolutional]
2010
+ batch_normalize=1
2011
+ size=3
2012
+ stride=2
2013
+ pad=1
2014
+ filters=512
2015
+ activation=mish
2016
+
2017
+ [route]
2018
+ layers = -1, 201
2019
+
2020
+ [convolutional]
2021
+ batch_normalize=1
2022
+ filters=512
2023
+ size=1
2024
+ stride=1
2025
+ pad=1
2026
+ activation=mish
2027
+
2028
+ # Split
2029
+
2030
+ [convolutional]
2031
+ batch_normalize=1
2032
+ filters=512
2033
+ size=1
2034
+ stride=1
2035
+ pad=1
2036
+ activation=mish
2037
+
2038
+ [route]
2039
+ layers = -2
2040
+
2041
+ # Plain Block
2042
+
2043
+ [convolutional]
2044
+ batch_normalize=1
2045
+ filters=512
2046
+ size=1
2047
+ stride=1
2048
+ pad=1
2049
+ activation=mish
2050
+
2051
+ [convolutional]
2052
+ batch_normalize=1
2053
+ size=3
2054
+ stride=1
2055
+ pad=1
2056
+ filters=512
2057
+ activation=mish
2058
+
2059
+ [convolutional]
2060
+ batch_normalize=1
2061
+ filters=512
2062
+ size=1
2063
+ stride=1
2064
+ pad=1
2065
+ activation=mish
2066
+
2067
+ [convolutional]
2068
+ batch_normalize=1
2069
+ size=3
2070
+ stride=1
2071
+ pad=1
2072
+ filters=512
2073
+ activation=mish
2074
+
2075
+ [convolutional]
2076
+ batch_normalize=1
2077
+ filters=512
2078
+ size=1
2079
+ stride=1
2080
+ pad=1
2081
+ activation=mish
2082
+
2083
+ [convolutional]
2084
+ batch_normalize=1
2085
+ size=3
2086
+ stride=1
2087
+ pad=1
2088
+ filters=512
2089
+ activation=mish
2090
+
2091
+ [route]
2092
+ layers = -1,-8
2093
+
2094
+ # Transition last
2095
+
2096
+ # 288 (previous+3+4+2k)
2097
+ [convolutional]
2098
+ batch_normalize=1
2099
+ filters=512
2100
+ size=1
2101
+ stride=1
2102
+ pad=1
2103
+ activation=mish
2104
+
2105
+ # ============ End of Neck ============ #
2106
+
2107
+ # ============ Head ============ #
2108
+
2109
+ # YOLO-3
2110
+
2111
+ [route]
2112
+ layers = 249
2113
+
2114
+ [convolutional]
2115
+ batch_normalize=1
2116
+ size=3
2117
+ stride=1
2118
+ pad=1
2119
+ filters=256
2120
+ activation=mish
2121
+
2122
+ [convolutional]
2123
+ size=1
2124
+ stride=1
2125
+ pad=1
2126
+ filters=340
2127
+ activation=linear
2128
+
2129
+ [yolo]
2130
+ mask = 0,1,2,3
2131
+ anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024
2132
+ classes=80
2133
+ num=16
2134
+ jitter=.3
2135
+ ignore_thresh = .7
2136
+ truth_thresh = 1
2137
+ random=1
2138
+ scale_x_y = 1.05
2139
+ iou_thresh=0.213
2140
+ cls_normalizer=1.0
2141
+ iou_normalizer=0.07
2142
+ iou_loss=ciou
2143
+ nms_kind=greedynms
2144
+ beta_nms=0.6
2145
+
2146
+
2147
+ # YOLO-4
2148
+
2149
+ [route]
2150
+ layers = 262
2151
+
2152
+ [convolutional]
2153
+ batch_normalize=1
2154
+ size=3
2155
+ stride=1
2156
+ pad=1
2157
+ filters=512
2158
+ activation=mish
2159
+
2160
+ [convolutional]
2161
+ size=1
2162
+ stride=1
2163
+ pad=1
2164
+ filters=340
2165
+ activation=linear
2166
+
2167
+ [yolo]
2168
+ mask = 4,5,6,7
2169
+ anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024
2170
+ classes=80
2171
+ num=16
2172
+ jitter=.3
2173
+ ignore_thresh = .7
2174
+ truth_thresh = 1
2175
+ random=1
2176
+ scale_x_y = 1.05
2177
+ iou_thresh=0.213
2178
+ cls_normalizer=1.0
2179
+ iou_normalizer=0.07
2180
+ iou_loss=ciou
2181
+ nms_kind=greedynms
2182
+ beta_nms=0.6
2183
+
2184
+
2185
+ # YOLO-5
2186
+
2187
+ [route]
2188
+ layers = 275
2189
+
2190
+ [convolutional]
2191
+ batch_normalize=1
2192
+ size=3
2193
+ stride=1
2194
+ pad=1
2195
+ filters=1024
2196
+ activation=mish
2197
+
2198
+ [convolutional]
2199
+ size=1
2200
+ stride=1
2201
+ pad=1
2202
+ filters=340
2203
+ activation=linear
2204
+
2205
+ [yolo]
2206
+ mask = 8,9,10,11
2207
+ anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024
2208
+ classes=80
2209
+ num=16
2210
+ jitter=.3
2211
+ ignore_thresh = .7
2212
+ truth_thresh = 1
2213
+ random=1
2214
+ scale_x_y = 1.05
2215
+ iou_thresh=0.213
2216
+ cls_normalizer=1.0
2217
+ iou_normalizer=0.07
2218
+ iou_loss=ciou
2219
+ nms_kind=greedynms
2220
+ beta_nms=0.6
2221
+
2222
+
2223
+ # YOLO-6
2224
+
2225
+ [route]
2226
+ layers = 288
2227
+
2228
+ [convolutional]
2229
+ batch_normalize=1
2230
+ size=3
2231
+ stride=1
2232
+ pad=1
2233
+ filters=1024
2234
+ activation=mish
2235
+
2236
+ [convolutional]
2237
+ size=1
2238
+ stride=1
2239
+ pad=1
2240
+ filters=340
2241
+ activation=linear
2242
+
2243
+ [yolo]
2244
+ mask = 12,13,14,15
2245
+ anchors = 13,17, 31,25, 24,51, 61,45, 61,45, 48,102, 119,96, 97,189, 97,189, 217,184, 171,384, 324,451, 324,451, 545,357, 616,618, 1024,1024
2246
+ classes=80
2247
+ num=16
2248
+ jitter=.3
2249
+ ignore_thresh = .7
2250
+ truth_thresh = 1
2251
+ random=1
2252
+ scale_x_y = 1.05
2253
+ iou_thresh=0.213
2254
+ cls_normalizer=1.0
2255
+ iou_normalizer=0.07
2256
+ iou_loss=ciou
2257
+ nms_kind=greedynms
2258
+ beta_nms=0.6
2259
+
2260
+ # ============ End of Head ============ #
cfg/yolov4_p7.cfg ADDED
@@ -0,0 +1,2714 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=1536
5
+ height=1536
6
+ channels=3
7
+ momentum=0.949
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.00261
15
+ burn_in=1000
16
+ max_batches = 500500
17
+ policy=steps
18
+ steps=400000,450000
19
+ scales=.1,.1
20
+
21
+ mosaic=1
22
+
23
+
24
+ # ============ Backbone ============ #
25
+
26
+ # Stem
27
+
28
+ # 0
29
+ [convolutional]
30
+ batch_normalize=1
31
+ filters=40
32
+ size=3
33
+ stride=1
34
+ pad=1
35
+ activation=mish
36
+
37
+
38
+ # P1
39
+
40
+ # Downsample
41
+
42
+ [convolutional]
43
+ batch_normalize=1
44
+ filters=80
45
+ size=3
46
+ stride=2
47
+ pad=1
48
+ activation=mish
49
+
50
+ # Split
51
+
52
+ [convolutional]
53
+ batch_normalize=1
54
+ filters=40
55
+ size=1
56
+ stride=1
57
+ pad=1
58
+ activation=mish
59
+
60
+ [route]
61
+ layers = -2
62
+
63
+ [convolutional]
64
+ batch_normalize=1
65
+ filters=40
66
+ size=1
67
+ stride=1
68
+ pad=1
69
+ activation=mish
70
+
71
+ # Residual Block
72
+
73
+ [convolutional]
74
+ batch_normalize=1
75
+ filters=40
76
+ size=1
77
+ stride=1
78
+ pad=1
79
+ activation=mish
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=40
84
+ size=3
85
+ stride=1
86
+ pad=1
87
+ activation=mish
88
+
89
+ [shortcut]
90
+ from=-3
91
+ activation=linear
92
+
93
+ # Transition first
94
+
95
+ [convolutional]
96
+ batch_normalize=1
97
+ filters=40
98
+ size=1
99
+ stride=1
100
+ pad=1
101
+ activation=mish
102
+
103
+ # Merge [-1, -(3k+4)]
104
+
105
+ [route]
106
+ layers = -1,-7
107
+
108
+ # Transition last
109
+
110
+ # 10 (previous+7+3k)
111
+ [convolutional]
112
+ batch_normalize=1
113
+ filters=80
114
+ size=1
115
+ stride=1
116
+ pad=1
117
+ activation=mish
118
+
119
+
120
+ # P2
121
+
122
+ # Downsample
123
+
124
+ [convolutional]
125
+ batch_normalize=1
126
+ filters=160
127
+ size=3
128
+ stride=2
129
+ pad=1
130
+ activation=mish
131
+
132
+ # Split
133
+
134
+ [convolutional]
135
+ batch_normalize=1
136
+ filters=80
137
+ size=1
138
+ stride=1
139
+ pad=1
140
+ activation=mish
141
+
142
+ [route]
143
+ layers = -2
144
+
145
+ [convolutional]
146
+ batch_normalize=1
147
+ filters=80
148
+ size=1
149
+ stride=1
150
+ pad=1
151
+ activation=mish
152
+
153
+ # Residual Block
154
+
155
+ [convolutional]
156
+ batch_normalize=1
157
+ filters=80
158
+ size=1
159
+ stride=1
160
+ pad=1
161
+ activation=mish
162
+
163
+ [convolutional]
164
+ batch_normalize=1
165
+ filters=80
166
+ size=3
167
+ stride=1
168
+ pad=1
169
+ activation=mish
170
+
171
+ [shortcut]
172
+ from=-3
173
+ activation=linear
174
+
175
+ [convolutional]
176
+ batch_normalize=1
177
+ filters=80
178
+ size=1
179
+ stride=1
180
+ pad=1
181
+ activation=mish
182
+
183
+ [convolutional]
184
+ batch_normalize=1
185
+ filters=80
186
+ size=3
187
+ stride=1
188
+ pad=1
189
+ activation=mish
190
+
191
+ [shortcut]
192
+ from=-3
193
+ activation=linear
194
+
195
+ [convolutional]
196
+ batch_normalize=1
197
+ filters=80
198
+ size=1
199
+ stride=1
200
+ pad=1
201
+ activation=mish
202
+
203
+ [convolutional]
204
+ batch_normalize=1
205
+ filters=80
206
+ size=3
207
+ stride=1
208
+ pad=1
209
+ activation=mish
210
+
211
+ [shortcut]
212
+ from=-3
213
+ activation=linear
214
+
215
+ # Transition first
216
+
217
+ [convolutional]
218
+ batch_normalize=1
219
+ filters=80
220
+ size=1
221
+ stride=1
222
+ pad=1
223
+ activation=mish
224
+
225
+ # Merge [-1, -(3k+4)]
226
+
227
+ [route]
228
+ layers = -1,-13
229
+
230
+ # Transition last
231
+
232
+ # 26 (previous+7+3k)
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=160
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=mish
240
+
241
+
242
+ # P3
243
+
244
+ # Downsample
245
+
246
+ [convolutional]
247
+ batch_normalize=1
248
+ filters=320
249
+ size=3
250
+ stride=2
251
+ pad=1
252
+ activation=mish
253
+
254
+ # Split
255
+
256
+ [convolutional]
257
+ batch_normalize=1
258
+ filters=160
259
+ size=1
260
+ stride=1
261
+ pad=1
262
+ activation=mish
263
+
264
+ [route]
265
+ layers = -2
266
+
267
+ [convolutional]
268
+ batch_normalize=1
269
+ filters=160
270
+ size=1
271
+ stride=1
272
+ pad=1
273
+ activation=mish
274
+
275
+ # Residual Block
276
+
277
+ [convolutional]
278
+ batch_normalize=1
279
+ filters=160
280
+ size=1
281
+ stride=1
282
+ pad=1
283
+ activation=mish
284
+
285
+ [convolutional]
286
+ batch_normalize=1
287
+ filters=160
288
+ size=3
289
+ stride=1
290
+ pad=1
291
+ activation=mish
292
+
293
+ [shortcut]
294
+ from=-3
295
+ activation=linear
296
+
297
+ [convolutional]
298
+ batch_normalize=1
299
+ filters=160
300
+ size=1
301
+ stride=1
302
+ pad=1
303
+ activation=mish
304
+
305
+ [convolutional]
306
+ batch_normalize=1
307
+ filters=160
308
+ size=3
309
+ stride=1
310
+ pad=1
311
+ activation=mish
312
+
313
+ [shortcut]
314
+ from=-3
315
+ activation=linear
316
+
317
+ [convolutional]
318
+ batch_normalize=1
319
+ filters=160
320
+ size=1
321
+ stride=1
322
+ pad=1
323
+ activation=mish
324
+
325
+ [convolutional]
326
+ batch_normalize=1
327
+ filters=160
328
+ size=3
329
+ stride=1
330
+ pad=1
331
+ activation=mish
332
+
333
+ [shortcut]
334
+ from=-3
335
+ activation=linear
336
+
337
+ [convolutional]
338
+ batch_normalize=1
339
+ filters=160
340
+ size=1
341
+ stride=1
342
+ pad=1
343
+ activation=mish
344
+
345
+ [convolutional]
346
+ batch_normalize=1
347
+ filters=160
348
+ size=3
349
+ stride=1
350
+ pad=1
351
+ activation=mish
352
+
353
+ [shortcut]
354
+ from=-3
355
+ activation=linear
356
+
357
+ [convolutional]
358
+ batch_normalize=1
359
+ filters=160
360
+ size=1
361
+ stride=1
362
+ pad=1
363
+ activation=mish
364
+
365
+ [convolutional]
366
+ batch_normalize=1
367
+ filters=160
368
+ size=3
369
+ stride=1
370
+ pad=1
371
+ activation=mish
372
+
373
+ [shortcut]
374
+ from=-3
375
+ activation=linear
376
+
377
+ [convolutional]
378
+ batch_normalize=1
379
+ filters=160
380
+ size=1
381
+ stride=1
382
+ pad=1
383
+ activation=mish
384
+
385
+ [convolutional]
386
+ batch_normalize=1
387
+ filters=160
388
+ size=3
389
+ stride=1
390
+ pad=1
391
+ activation=mish
392
+
393
+ [shortcut]
394
+ from=-3
395
+ activation=linear
396
+
397
+ [convolutional]
398
+ batch_normalize=1
399
+ filters=160
400
+ size=1
401
+ stride=1
402
+ pad=1
403
+ activation=mish
404
+
405
+ [convolutional]
406
+ batch_normalize=1
407
+ filters=160
408
+ size=3
409
+ stride=1
410
+ pad=1
411
+ activation=mish
412
+
413
+ [shortcut]
414
+ from=-3
415
+ activation=linear
416
+
417
+ [convolutional]
418
+ batch_normalize=1
419
+ filters=160
420
+ size=1
421
+ stride=1
422
+ pad=1
423
+ activation=mish
424
+
425
+ [convolutional]
426
+ batch_normalize=1
427
+ filters=160
428
+ size=3
429
+ stride=1
430
+ pad=1
431
+ activation=mish
432
+
433
+ [shortcut]
434
+ from=-3
435
+ activation=linear
436
+
437
+ [convolutional]
438
+ batch_normalize=1
439
+ filters=160
440
+ size=1
441
+ stride=1
442
+ pad=1
443
+ activation=mish
444
+
445
+ [convolutional]
446
+ batch_normalize=1
447
+ filters=160
448
+ size=3
449
+ stride=1
450
+ pad=1
451
+ activation=mish
452
+
453
+ [shortcut]
454
+ from=-3
455
+ activation=linear
456
+
457
+ [convolutional]
458
+ batch_normalize=1
459
+ filters=160
460
+ size=1
461
+ stride=1
462
+ pad=1
463
+ activation=mish
464
+
465
+ [convolutional]
466
+ batch_normalize=1
467
+ filters=160
468
+ size=3
469
+ stride=1
470
+ pad=1
471
+ activation=mish
472
+
473
+ [shortcut]
474
+ from=-3
475
+ activation=linear
476
+
477
+ [convolutional]
478
+ batch_normalize=1
479
+ filters=160
480
+ size=1
481
+ stride=1
482
+ pad=1
483
+ activation=mish
484
+
485
+ [convolutional]
486
+ batch_normalize=1
487
+ filters=160
488
+ size=3
489
+ stride=1
490
+ pad=1
491
+ activation=mish
492
+
493
+ [shortcut]
494
+ from=-3
495
+ activation=linear
496
+
497
+ [convolutional]
498
+ batch_normalize=1
499
+ filters=160
500
+ size=1
501
+ stride=1
502
+ pad=1
503
+ activation=mish
504
+
505
+ [convolutional]
506
+ batch_normalize=1
507
+ filters=160
508
+ size=3
509
+ stride=1
510
+ pad=1
511
+ activation=mish
512
+
513
+ [shortcut]
514
+ from=-3
515
+ activation=linear
516
+
517
+ [convolutional]
518
+ batch_normalize=1
519
+ filters=160
520
+ size=1
521
+ stride=1
522
+ pad=1
523
+ activation=mish
524
+
525
+ [convolutional]
526
+ batch_normalize=1
527
+ filters=160
528
+ size=3
529
+ stride=1
530
+ pad=1
531
+ activation=mish
532
+
533
+ [shortcut]
534
+ from=-3
535
+ activation=linear
536
+
537
+ [convolutional]
538
+ batch_normalize=1
539
+ filters=160
540
+ size=1
541
+ stride=1
542
+ pad=1
543
+ activation=mish
544
+
545
+ [convolutional]
546
+ batch_normalize=1
547
+ filters=160
548
+ size=3
549
+ stride=1
550
+ pad=1
551
+ activation=mish
552
+
553
+ [shortcut]
554
+ from=-3
555
+ activation=linear
556
+
557
+ [convolutional]
558
+ batch_normalize=1
559
+ filters=160
560
+ size=1
561
+ stride=1
562
+ pad=1
563
+ activation=mish
564
+
565
+ [convolutional]
566
+ batch_normalize=1
567
+ filters=160
568
+ size=3
569
+ stride=1
570
+ pad=1
571
+ activation=mish
572
+
573
+ [shortcut]
574
+ from=-3
575
+ activation=linear
576
+
577
+ # Transition first
578
+
579
+ [convolutional]
580
+ batch_normalize=1
581
+ filters=160
582
+ size=1
583
+ stride=1
584
+ pad=1
585
+ activation=mish
586
+
587
+ # Merge [-1, -(3k+4)]
588
+
589
+ [route]
590
+ layers = -1,-49
591
+
592
+ # Transition last
593
+
594
+ # 78 (previous+7+3k)
595
+ [convolutional]
596
+ batch_normalize=1
597
+ filters=320
598
+ size=1
599
+ stride=1
600
+ pad=1
601
+ activation=mish
602
+
603
+
604
+ # P4
605
+
606
+ # Downsample
607
+
608
+ [convolutional]
609
+ batch_normalize=1
610
+ filters=640
611
+ size=3
612
+ stride=2
613
+ pad=1
614
+ activation=mish
615
+
616
+ # Split
617
+
618
+ [convolutional]
619
+ batch_normalize=1
620
+ filters=320
621
+ size=1
622
+ stride=1
623
+ pad=1
624
+ activation=mish
625
+
626
+ [route]
627
+ layers = -2
628
+
629
+ [convolutional]
630
+ batch_normalize=1
631
+ filters=320
632
+ size=1
633
+ stride=1
634
+ pad=1
635
+ activation=mish
636
+
637
+ # Residual Block
638
+
639
+ [convolutional]
640
+ batch_normalize=1
641
+ filters=320
642
+ size=1
643
+ stride=1
644
+ pad=1
645
+ activation=mish
646
+
647
+ [convolutional]
648
+ batch_normalize=1
649
+ filters=320
650
+ size=3
651
+ stride=1
652
+ pad=1
653
+ activation=mish
654
+
655
+ [shortcut]
656
+ from=-3
657
+ activation=linear
658
+
659
+ [convolutional]
660
+ batch_normalize=1
661
+ filters=320
662
+ size=1
663
+ stride=1
664
+ pad=1
665
+ activation=mish
666
+
667
+ [convolutional]
668
+ batch_normalize=1
669
+ filters=320
670
+ size=3
671
+ stride=1
672
+ pad=1
673
+ activation=mish
674
+
675
+ [shortcut]
676
+ from=-3
677
+ activation=linear
678
+
679
+ [convolutional]
680
+ batch_normalize=1
681
+ filters=320
682
+ size=1
683
+ stride=1
684
+ pad=1
685
+ activation=mish
686
+
687
+ [convolutional]
688
+ batch_normalize=1
689
+ filters=320
690
+ size=3
691
+ stride=1
692
+ pad=1
693
+ activation=mish
694
+
695
+ [shortcut]
696
+ from=-3
697
+ activation=linear
698
+
699
+ [convolutional]
700
+ batch_normalize=1
701
+ filters=320
702
+ size=1
703
+ stride=1
704
+ pad=1
705
+ activation=mish
706
+
707
+ [convolutional]
708
+ batch_normalize=1
709
+ filters=320
710
+ size=3
711
+ stride=1
712
+ pad=1
713
+ activation=mish
714
+
715
+ [shortcut]
716
+ from=-3
717
+ activation=linear
718
+
719
+ [convolutional]
720
+ batch_normalize=1
721
+ filters=320
722
+ size=1
723
+ stride=1
724
+ pad=1
725
+ activation=mish
726
+
727
+ [convolutional]
728
+ batch_normalize=1
729
+ filters=320
730
+ size=3
731
+ stride=1
732
+ pad=1
733
+ activation=mish
734
+
735
+ [shortcut]
736
+ from=-3
737
+ activation=linear
738
+
739
+ [convolutional]
740
+ batch_normalize=1
741
+ filters=320
742
+ size=1
743
+ stride=1
744
+ pad=1
745
+ activation=mish
746
+
747
+ [convolutional]
748
+ batch_normalize=1
749
+ filters=320
750
+ size=3
751
+ stride=1
752
+ pad=1
753
+ activation=mish
754
+
755
+ [shortcut]
756
+ from=-3
757
+ activation=linear
758
+
759
+ [convolutional]
760
+ batch_normalize=1
761
+ filters=320
762
+ size=1
763
+ stride=1
764
+ pad=1
765
+ activation=mish
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=320
770
+ size=3
771
+ stride=1
772
+ pad=1
773
+ activation=mish
774
+
775
+ [shortcut]
776
+ from=-3
777
+ activation=linear
778
+
779
+ [convolutional]
780
+ batch_normalize=1
781
+ filters=320
782
+ size=1
783
+ stride=1
784
+ pad=1
785
+ activation=mish
786
+
787
+ [convolutional]
788
+ batch_normalize=1
789
+ filters=320
790
+ size=3
791
+ stride=1
792
+ pad=1
793
+ activation=mish
794
+
795
+ [shortcut]
796
+ from=-3
797
+ activation=linear
798
+
799
+ [convolutional]
800
+ batch_normalize=1
801
+ filters=320
802
+ size=1
803
+ stride=1
804
+ pad=1
805
+ activation=mish
806
+
807
+ [convolutional]
808
+ batch_normalize=1
809
+ filters=320
810
+ size=3
811
+ stride=1
812
+ pad=1
813
+ activation=mish
814
+
815
+ [shortcut]
816
+ from=-3
817
+ activation=linear
818
+
819
+ [convolutional]
820
+ batch_normalize=1
821
+ filters=320
822
+ size=1
823
+ stride=1
824
+ pad=1
825
+ activation=mish
826
+
827
+ [convolutional]
828
+ batch_normalize=1
829
+ filters=320
830
+ size=3
831
+ stride=1
832
+ pad=1
833
+ activation=mish
834
+
835
+ [shortcut]
836
+ from=-3
837
+ activation=linear
838
+
839
+ [convolutional]
840
+ batch_normalize=1
841
+ filters=320
842
+ size=1
843
+ stride=1
844
+ pad=1
845
+ activation=mish
846
+
847
+ [convolutional]
848
+ batch_normalize=1
849
+ filters=320
850
+ size=3
851
+ stride=1
852
+ pad=1
853
+ activation=mish
854
+
855
+ [shortcut]
856
+ from=-3
857
+ activation=linear
858
+
859
+ [convolutional]
860
+ batch_normalize=1
861
+ filters=320
862
+ size=1
863
+ stride=1
864
+ pad=1
865
+ activation=mish
866
+
867
+ [convolutional]
868
+ batch_normalize=1
869
+ filters=320
870
+ size=3
871
+ stride=1
872
+ pad=1
873
+ activation=mish
874
+
875
+ [shortcut]
876
+ from=-3
877
+ activation=linear
878
+
879
+ [convolutional]
880
+ batch_normalize=1
881
+ filters=320
882
+ size=1
883
+ stride=1
884
+ pad=1
885
+ activation=mish
886
+
887
+ [convolutional]
888
+ batch_normalize=1
889
+ filters=320
890
+ size=3
891
+ stride=1
892
+ pad=1
893
+ activation=mish
894
+
895
+ [shortcut]
896
+ from=-3
897
+ activation=linear
898
+
899
+ [convolutional]
900
+ batch_normalize=1
901
+ filters=320
902
+ size=1
903
+ stride=1
904
+ pad=1
905
+ activation=mish
906
+
907
+ [convolutional]
908
+ batch_normalize=1
909
+ filters=320
910
+ size=3
911
+ stride=1
912
+ pad=1
913
+ activation=mish
914
+
915
+ [shortcut]
916
+ from=-3
917
+ activation=linear
918
+
919
+ [convolutional]
920
+ batch_normalize=1
921
+ filters=320
922
+ size=1
923
+ stride=1
924
+ pad=1
925
+ activation=mish
926
+
927
+ [convolutional]
928
+ batch_normalize=1
929
+ filters=320
930
+ size=3
931
+ stride=1
932
+ pad=1
933
+ activation=mish
934
+
935
+ [shortcut]
936
+ from=-3
937
+ activation=linear
938
+
939
+ # Transition first
940
+
941
+ [convolutional]
942
+ batch_normalize=1
943
+ filters=320
944
+ size=1
945
+ stride=1
946
+ pad=1
947
+ activation=mish
948
+
949
+ # Merge [-1, -(3k+4)]
950
+
951
+ [route]
952
+ layers = -1,-49
953
+
954
+ # Transition last
955
+
956
+ # 130 (previous+7+3k)
957
+ [convolutional]
958
+ batch_normalize=1
959
+ filters=640
960
+ size=1
961
+ stride=1
962
+ pad=1
963
+ activation=mish
964
+
965
+
966
+ # P5
967
+
968
+ # Downsample
969
+
970
+ [convolutional]
971
+ batch_normalize=1
972
+ filters=1280
973
+ size=3
974
+ stride=2
975
+ pad=1
976
+ activation=mish
977
+
978
+ # Split
979
+
980
+ [convolutional]
981
+ batch_normalize=1
982
+ filters=640
983
+ size=1
984
+ stride=1
985
+ pad=1
986
+ activation=mish
987
+
988
+ [route]
989
+ layers = -2
990
+
991
+ [convolutional]
992
+ batch_normalize=1
993
+ filters=640
994
+ size=1
995
+ stride=1
996
+ pad=1
997
+ activation=mish
998
+
999
+ # Residual Block
1000
+
1001
+ [convolutional]
1002
+ batch_normalize=1
1003
+ filters=640
1004
+ size=1
1005
+ stride=1
1006
+ pad=1
1007
+ activation=mish
1008
+
1009
+ [convolutional]
1010
+ batch_normalize=1
1011
+ filters=640
1012
+ size=3
1013
+ stride=1
1014
+ pad=1
1015
+ activation=mish
1016
+
1017
+ [shortcut]
1018
+ from=-3
1019
+ activation=linear
1020
+
1021
+ [convolutional]
1022
+ batch_normalize=1
1023
+ filters=640
1024
+ size=1
1025
+ stride=1
1026
+ pad=1
1027
+ activation=mish
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ filters=640
1032
+ size=3
1033
+ stride=1
1034
+ pad=1
1035
+ activation=mish
1036
+
1037
+ [shortcut]
1038
+ from=-3
1039
+ activation=linear
1040
+
1041
+ [convolutional]
1042
+ batch_normalize=1
1043
+ filters=640
1044
+ size=1
1045
+ stride=1
1046
+ pad=1
1047
+ activation=mish
1048
+
1049
+ [convolutional]
1050
+ batch_normalize=1
1051
+ filters=640
1052
+ size=3
1053
+ stride=1
1054
+ pad=1
1055
+ activation=mish
1056
+
1057
+ [shortcut]
1058
+ from=-3
1059
+ activation=linear
1060
+
1061
+ [convolutional]
1062
+ batch_normalize=1
1063
+ filters=640
1064
+ size=1
1065
+ stride=1
1066
+ pad=1
1067
+ activation=mish
1068
+
1069
+ [convolutional]
1070
+ batch_normalize=1
1071
+ filters=640
1072
+ size=3
1073
+ stride=1
1074
+ pad=1
1075
+ activation=mish
1076
+
1077
+ [shortcut]
1078
+ from=-3
1079
+ activation=linear
1080
+
1081
+ [convolutional]
1082
+ batch_normalize=1
1083
+ filters=640
1084
+ size=1
1085
+ stride=1
1086
+ pad=1
1087
+ activation=mish
1088
+
1089
+ [convolutional]
1090
+ batch_normalize=1
1091
+ filters=640
1092
+ size=3
1093
+ stride=1
1094
+ pad=1
1095
+ activation=mish
1096
+
1097
+ [shortcut]
1098
+ from=-3
1099
+ activation=linear
1100
+
1101
+ [convolutional]
1102
+ batch_normalize=1
1103
+ filters=640
1104
+ size=1
1105
+ stride=1
1106
+ pad=1
1107
+ activation=mish
1108
+
1109
+ [convolutional]
1110
+ batch_normalize=1
1111
+ filters=640
1112
+ size=3
1113
+ stride=1
1114
+ pad=1
1115
+ activation=mish
1116
+
1117
+ [shortcut]
1118
+ from=-3
1119
+ activation=linear
1120
+
1121
+ [convolutional]
1122
+ batch_normalize=1
1123
+ filters=640
1124
+ size=1
1125
+ stride=1
1126
+ pad=1
1127
+ activation=mish
1128
+
1129
+ [convolutional]
1130
+ batch_normalize=1
1131
+ filters=640
1132
+ size=3
1133
+ stride=1
1134
+ pad=1
1135
+ activation=mish
1136
+
1137
+ [shortcut]
1138
+ from=-3
1139
+ activation=linear
1140
+
1141
+ # Transition first
1142
+
1143
+ [convolutional]
1144
+ batch_normalize=1
1145
+ filters=640
1146
+ size=1
1147
+ stride=1
1148
+ pad=1
1149
+ activation=mish
1150
+
1151
+ # Merge [-1, -(3k+4)]
1152
+
1153
+ [route]
1154
+ layers = -1,-25
1155
+
1156
+ # Transition last
1157
+
1158
+ # 158 (previous+7+3k)
1159
+ [convolutional]
1160
+ batch_normalize=1
1161
+ filters=1280
1162
+ size=1
1163
+ stride=1
1164
+ pad=1
1165
+ activation=mish
1166
+
1167
+
1168
+ # P6
1169
+
1170
+ # Downsample
1171
+
1172
+ [convolutional]
1173
+ batch_normalize=1
1174
+ filters=1280
1175
+ size=3
1176
+ stride=2
1177
+ pad=1
1178
+ activation=mish
1179
+
1180
+ # Split
1181
+
1182
+ [convolutional]
1183
+ batch_normalize=1
1184
+ filters=640
1185
+ size=1
1186
+ stride=1
1187
+ pad=1
1188
+ activation=mish
1189
+
1190
+ [route]
1191
+ layers = -2
1192
+
1193
+ [convolutional]
1194
+ batch_normalize=1
1195
+ filters=640
1196
+ size=1
1197
+ stride=1
1198
+ pad=1
1199
+ activation=mish
1200
+
1201
+ # Residual Block
1202
+
1203
+ [convolutional]
1204
+ batch_normalize=1
1205
+ filters=640
1206
+ size=1
1207
+ stride=1
1208
+ pad=1
1209
+ activation=mish
1210
+
1211
+ [convolutional]
1212
+ batch_normalize=1
1213
+ filters=640
1214
+ size=3
1215
+ stride=1
1216
+ pad=1
1217
+ activation=mish
1218
+
1219
+ [shortcut]
1220
+ from=-3
1221
+ activation=linear
1222
+
1223
+ [convolutional]
1224
+ batch_normalize=1
1225
+ filters=640
1226
+ size=1
1227
+ stride=1
1228
+ pad=1
1229
+ activation=mish
1230
+
1231
+ [convolutional]
1232
+ batch_normalize=1
1233
+ filters=640
1234
+ size=3
1235
+ stride=1
1236
+ pad=1
1237
+ activation=mish
1238
+
1239
+ [shortcut]
1240
+ from=-3
1241
+ activation=linear
1242
+
1243
+ [convolutional]
1244
+ batch_normalize=1
1245
+ filters=640
1246
+ size=1
1247
+ stride=1
1248
+ pad=1
1249
+ activation=mish
1250
+
1251
+ [convolutional]
1252
+ batch_normalize=1
1253
+ filters=640
1254
+ size=3
1255
+ stride=1
1256
+ pad=1
1257
+ activation=mish
1258
+
1259
+ [shortcut]
1260
+ from=-3
1261
+ activation=linear
1262
+
1263
+ [convolutional]
1264
+ batch_normalize=1
1265
+ filters=640
1266
+ size=1
1267
+ stride=1
1268
+ pad=1
1269
+ activation=mish
1270
+
1271
+ [convolutional]
1272
+ batch_normalize=1
1273
+ filters=640
1274
+ size=3
1275
+ stride=1
1276
+ pad=1
1277
+ activation=mish
1278
+
1279
+ [shortcut]
1280
+ from=-3
1281
+ activation=linear
1282
+
1283
+ [convolutional]
1284
+ batch_normalize=1
1285
+ filters=640
1286
+ size=1
1287
+ stride=1
1288
+ pad=1
1289
+ activation=mish
1290
+
1291
+ [convolutional]
1292
+ batch_normalize=1
1293
+ filters=640
1294
+ size=3
1295
+ stride=1
1296
+ pad=1
1297
+ activation=mish
1298
+
1299
+ [shortcut]
1300
+ from=-3
1301
+ activation=linear
1302
+
1303
+ [convolutional]
1304
+ batch_normalize=1
1305
+ filters=640
1306
+ size=1
1307
+ stride=1
1308
+ pad=1
1309
+ activation=mish
1310
+
1311
+ [convolutional]
1312
+ batch_normalize=1
1313
+ filters=640
1314
+ size=3
1315
+ stride=1
1316
+ pad=1
1317
+ activation=mish
1318
+
1319
+ [shortcut]
1320
+ from=-3
1321
+ activation=linear
1322
+
1323
+ [convolutional]
1324
+ batch_normalize=1
1325
+ filters=640
1326
+ size=1
1327
+ stride=1
1328
+ pad=1
1329
+ activation=mish
1330
+
1331
+ [convolutional]
1332
+ batch_normalize=1
1333
+ filters=640
1334
+ size=3
1335
+ stride=1
1336
+ pad=1
1337
+ activation=mish
1338
+
1339
+ [shortcut]
1340
+ from=-3
1341
+ activation=linear
1342
+
1343
+ # Transition first
1344
+
1345
+ [convolutional]
1346
+ batch_normalize=1
1347
+ filters=640
1348
+ size=1
1349
+ stride=1
1350
+ pad=1
1351
+ activation=mish
1352
+
1353
+ # Merge [-1, -(3k+4)]
1354
+
1355
+ [route]
1356
+ layers = -1,-25
1357
+
1358
+ # Transition last
1359
+
1360
+ # 186 (previous+7+3k)
1361
+ [convolutional]
1362
+ batch_normalize=1
1363
+ filters=1280
1364
+ size=1
1365
+ stride=1
1366
+ pad=1
1367
+ activation=mish
1368
+
1369
+
1370
+ # P7
1371
+
1372
+ # Downsample
1373
+
1374
+ [convolutional]
1375
+ batch_normalize=1
1376
+ filters=1280
1377
+ size=3
1378
+ stride=2
1379
+ pad=1
1380
+ activation=mish
1381
+
1382
+ # Split
1383
+
1384
+ [convolutional]
1385
+ batch_normalize=1
1386
+ filters=640
1387
+ size=1
1388
+ stride=1
1389
+ pad=1
1390
+ activation=mish
1391
+
1392
+ [route]
1393
+ layers = -2
1394
+
1395
+ [convolutional]
1396
+ batch_normalize=1
1397
+ filters=640
1398
+ size=1
1399
+ stride=1
1400
+ pad=1
1401
+ activation=mish
1402
+
1403
+ # Residual Block
1404
+
1405
+ [convolutional]
1406
+ batch_normalize=1
1407
+ filters=640
1408
+ size=1
1409
+ stride=1
1410
+ pad=1
1411
+ activation=mish
1412
+
1413
+ [convolutional]
1414
+ batch_normalize=1
1415
+ filters=640
1416
+ size=3
1417
+ stride=1
1418
+ pad=1
1419
+ activation=mish
1420
+
1421
+ [shortcut]
1422
+ from=-3
1423
+ activation=linear
1424
+
1425
+ [convolutional]
1426
+ batch_normalize=1
1427
+ filters=640
1428
+ size=1
1429
+ stride=1
1430
+ pad=1
1431
+ activation=mish
1432
+
1433
+ [convolutional]
1434
+ batch_normalize=1
1435
+ filters=640
1436
+ size=3
1437
+ stride=1
1438
+ pad=1
1439
+ activation=mish
1440
+
1441
+ [shortcut]
1442
+ from=-3
1443
+ activation=linear
1444
+
1445
+ [convolutional]
1446
+ batch_normalize=1
1447
+ filters=640
1448
+ size=1
1449
+ stride=1
1450
+ pad=1
1451
+ activation=mish
1452
+
1453
+ [convolutional]
1454
+ batch_normalize=1
1455
+ filters=640
1456
+ size=3
1457
+ stride=1
1458
+ pad=1
1459
+ activation=mish
1460
+
1461
+ [shortcut]
1462
+ from=-3
1463
+ activation=linear
1464
+
1465
+ [convolutional]
1466
+ batch_normalize=1
1467
+ filters=640
1468
+ size=1
1469
+ stride=1
1470
+ pad=1
1471
+ activation=mish
1472
+
1473
+ [convolutional]
1474
+ batch_normalize=1
1475
+ filters=640
1476
+ size=3
1477
+ stride=1
1478
+ pad=1
1479
+ activation=mish
1480
+
1481
+ [shortcut]
1482
+ from=-3
1483
+ activation=linear
1484
+
1485
+ [convolutional]
1486
+ batch_normalize=1
1487
+ filters=640
1488
+ size=1
1489
+ stride=1
1490
+ pad=1
1491
+ activation=mish
1492
+
1493
+ [convolutional]
1494
+ batch_normalize=1
1495
+ filters=640
1496
+ size=3
1497
+ stride=1
1498
+ pad=1
1499
+ activation=mish
1500
+
1501
+ [shortcut]
1502
+ from=-3
1503
+ activation=linear
1504
+
1505
+ [convolutional]
1506
+ batch_normalize=1
1507
+ filters=640
1508
+ size=1
1509
+ stride=1
1510
+ pad=1
1511
+ activation=mish
1512
+
1513
+ [convolutional]
1514
+ batch_normalize=1
1515
+ filters=640
1516
+ size=3
1517
+ stride=1
1518
+ pad=1
1519
+ activation=mish
1520
+
1521
+ [shortcut]
1522
+ from=-3
1523
+ activation=linear
1524
+
1525
+ [convolutional]
1526
+ batch_normalize=1
1527
+ filters=640
1528
+ size=1
1529
+ stride=1
1530
+ pad=1
1531
+ activation=mish
1532
+
1533
+ [convolutional]
1534
+ batch_normalize=1
1535
+ filters=640
1536
+ size=3
1537
+ stride=1
1538
+ pad=1
1539
+ activation=mish
1540
+
1541
+ [shortcut]
1542
+ from=-3
1543
+ activation=linear
1544
+
1545
+ # Transition first
1546
+
1547
+ [convolutional]
1548
+ batch_normalize=1
1549
+ filters=640
1550
+ size=1
1551
+ stride=1
1552
+ pad=1
1553
+ activation=mish
1554
+
1555
+ # Merge [-1, -(3k+4)]
1556
+
1557
+ [route]
1558
+ layers = -1,-25
1559
+
1560
+ # Transition last
1561
+
1562
+ # 214 (previous+7+3k)
1563
+ [convolutional]
1564
+ batch_normalize=1
1565
+ filters=1280
1566
+ size=1
1567
+ stride=1
1568
+ pad=1
1569
+ activation=mish
1570
+
1571
+ # ============ End of Backbone ============ #
1572
+
1573
+ # ============ Neck ============ #
1574
+
1575
+ # CSPSPP
1576
+
1577
+ [convolutional]
1578
+ batch_normalize=1
1579
+ filters=640
1580
+ size=1
1581
+ stride=1
1582
+ pad=1
1583
+ activation=mish
1584
+
1585
+ [route]
1586
+ layers = -2
1587
+
1588
+ [convolutional]
1589
+ batch_normalize=1
1590
+ filters=640
1591
+ size=1
1592
+ stride=1
1593
+ pad=1
1594
+ activation=mish
1595
+
1596
+ [convolutional]
1597
+ batch_normalize=1
1598
+ size=3
1599
+ stride=1
1600
+ pad=1
1601
+ filters=640
1602
+ activation=mish
1603
+
1604
+ [convolutional]
1605
+ batch_normalize=1
1606
+ filters=640
1607
+ size=1
1608
+ stride=1
1609
+ pad=1
1610
+ activation=mish
1611
+
1612
+ ### SPP ###
1613
+ [maxpool]
1614
+ stride=1
1615
+ size=5
1616
+
1617
+ [route]
1618
+ layers=-2
1619
+
1620
+ [maxpool]
1621
+ stride=1
1622
+ size=9
1623
+
1624
+ [route]
1625
+ layers=-4
1626
+
1627
+ [maxpool]
1628
+ stride=1
1629
+ size=13
1630
+
1631
+ [route]
1632
+ layers=-1,-3,-5,-6
1633
+ ### End SPP ###
1634
+
1635
+ [convolutional]
1636
+ batch_normalize=1
1637
+ filters=640
1638
+ size=1
1639
+ stride=1
1640
+ pad=1
1641
+ activation=mish
1642
+
1643
+ [convolutional]
1644
+ batch_normalize=1
1645
+ size=3
1646
+ stride=1
1647
+ pad=1
1648
+ filters=640
1649
+ activation=mish
1650
+
1651
+ [route]
1652
+ layers = -1, -13
1653
+
1654
+ # 229 (previous+6+5+2k)
1655
+ [convolutional]
1656
+ batch_normalize=1
1657
+ filters=640
1658
+ size=1
1659
+ stride=1
1660
+ pad=1
1661
+ activation=mish
1662
+
1663
+ # End of CSPSPP
1664
+
1665
+
1666
+ # FPN-6
1667
+
1668
+ [convolutional]
1669
+ batch_normalize=1
1670
+ filters=640
1671
+ size=1
1672
+ stride=1
1673
+ pad=1
1674
+ activation=mish
1675
+
1676
+ [upsample]
1677
+ stride=2
1678
+
1679
+ [route]
1680
+ layers = 186
1681
+
1682
+ [convolutional]
1683
+ batch_normalize=1
1684
+ filters=640
1685
+ size=1
1686
+ stride=1
1687
+ pad=1
1688
+ activation=mish
1689
+
1690
+ [route]
1691
+ layers = -1, -3
1692
+
1693
+ [convolutional]
1694
+ batch_normalize=1
1695
+ filters=640
1696
+ size=1
1697
+ stride=1
1698
+ pad=1
1699
+ activation=mish
1700
+
1701
+ # Split
1702
+
1703
+ [convolutional]
1704
+ batch_normalize=1
1705
+ filters=640
1706
+ size=1
1707
+ stride=1
1708
+ pad=1
1709
+ activation=mish
1710
+
1711
+ [route]
1712
+ layers = -2
1713
+
1714
+ # Plain Block
1715
+
1716
+ [convolutional]
1717
+ batch_normalize=1
1718
+ filters=640
1719
+ size=1
1720
+ stride=1
1721
+ pad=1
1722
+ activation=mish
1723
+
1724
+ [convolutional]
1725
+ batch_normalize=1
1726
+ size=3
1727
+ stride=1
1728
+ pad=1
1729
+ filters=640
1730
+ activation=mish
1731
+
1732
+ [convolutional]
1733
+ batch_normalize=1
1734
+ filters=640
1735
+ size=1
1736
+ stride=1
1737
+ pad=1
1738
+ activation=mish
1739
+
1740
+ [convolutional]
1741
+ batch_normalize=1
1742
+ size=3
1743
+ stride=1
1744
+ pad=1
1745
+ filters=640
1746
+ activation=mish
1747
+
1748
+ [convolutional]
1749
+ batch_normalize=1
1750
+ filters=640
1751
+ size=1
1752
+ stride=1
1753
+ pad=1
1754
+ activation=mish
1755
+
1756
+ [convolutional]
1757
+ batch_normalize=1
1758
+ size=3
1759
+ stride=1
1760
+ pad=1
1761
+ filters=640
1762
+ activation=mish
1763
+
1764
+ # Merge [-1, -(2k+2)]
1765
+
1766
+ [route]
1767
+ layers = -1, -8
1768
+
1769
+ # Transition last
1770
+
1771
+ # 245 (previous+6+4+2k)
1772
+ [convolutional]
1773
+ batch_normalize=1
1774
+ filters=640
1775
+ size=1
1776
+ stride=1
1777
+ pad=1
1778
+ activation=mish
1779
+
1780
+
1781
+ # FPN-5
1782
+
1783
+ [convolutional]
1784
+ batch_normalize=1
1785
+ filters=640
1786
+ size=1
1787
+ stride=1
1788
+ pad=1
1789
+ activation=mish
1790
+
1791
+ [upsample]
1792
+ stride=2
1793
+
1794
+ [route]
1795
+ layers = 158
1796
+
1797
+ [convolutional]
1798
+ batch_normalize=1
1799
+ filters=640
1800
+ size=1
1801
+ stride=1
1802
+ pad=1
1803
+ activation=mish
1804
+
1805
+ [route]
1806
+ layers = -1, -3
1807
+
1808
+ [convolutional]
1809
+ batch_normalize=1
1810
+ filters=640
1811
+ size=1
1812
+ stride=1
1813
+ pad=1
1814
+ activation=mish
1815
+
1816
+ # Split
1817
+
1818
+ [convolutional]
1819
+ batch_normalize=1
1820
+ filters=640
1821
+ size=1
1822
+ stride=1
1823
+ pad=1
1824
+ activation=mish
1825
+
1826
+ [route]
1827
+ layers = -2
1828
+
1829
+ # Plain Block
1830
+
1831
+ [convolutional]
1832
+ batch_normalize=1
1833
+ filters=640
1834
+ size=1
1835
+ stride=1
1836
+ pad=1
1837
+ activation=mish
1838
+
1839
+ [convolutional]
1840
+ batch_normalize=1
1841
+ size=3
1842
+ stride=1
1843
+ pad=1
1844
+ filters=640
1845
+ activation=mish
1846
+
1847
+ [convolutional]
1848
+ batch_normalize=1
1849
+ filters=640
1850
+ size=1
1851
+ stride=1
1852
+ pad=1
1853
+ activation=mish
1854
+
1855
+ [convolutional]
1856
+ batch_normalize=1
1857
+ size=3
1858
+ stride=1
1859
+ pad=1
1860
+ filters=640
1861
+ activation=mish
1862
+
1863
+ [convolutional]
1864
+ batch_normalize=1
1865
+ filters=640
1866
+ size=1
1867
+ stride=1
1868
+ pad=1
1869
+ activation=mish
1870
+
1871
+ [convolutional]
1872
+ batch_normalize=1
1873
+ size=3
1874
+ stride=1
1875
+ pad=1
1876
+ filters=640
1877
+ activation=mish
1878
+
1879
+ # Merge [-1, -(2k+2)]
1880
+
1881
+ [route]
1882
+ layers = -1, -8
1883
+
1884
+ # Transition last
1885
+
1886
+ # 261 (previous+6+4+2k)
1887
+ [convolutional]
1888
+ batch_normalize=1
1889
+ filters=640
1890
+ size=1
1891
+ stride=1
1892
+ pad=1
1893
+ activation=mish
1894
+
1895
+
1896
+ # FPN-4
1897
+
1898
+ [convolutional]
1899
+ batch_normalize=1
1900
+ filters=320
1901
+ size=1
1902
+ stride=1
1903
+ pad=1
1904
+ activation=mish
1905
+
1906
+ [upsample]
1907
+ stride=2
1908
+
1909
+ [route]
1910
+ layers = 130
1911
+
1912
+ [convolutional]
1913
+ batch_normalize=1
1914
+ filters=320
1915
+ size=1
1916
+ stride=1
1917
+ pad=1
1918
+ activation=mish
1919
+
1920
+ [route]
1921
+ layers = -1, -3
1922
+
1923
+ [convolutional]
1924
+ batch_normalize=1
1925
+ filters=320
1926
+ size=1
1927
+ stride=1
1928
+ pad=1
1929
+ activation=mish
1930
+
1931
+ # Split
1932
+
1933
+ [convolutional]
1934
+ batch_normalize=1
1935
+ filters=320
1936
+ size=1
1937
+ stride=1
1938
+ pad=1
1939
+ activation=mish
1940
+
1941
+ [route]
1942
+ layers = -2
1943
+
1944
+ # Plain Block
1945
+
1946
+ [convolutional]
1947
+ batch_normalize=1
1948
+ filters=320
1949
+ size=1
1950
+ stride=1
1951
+ pad=1
1952
+ activation=mish
1953
+
1954
+ [convolutional]
1955
+ batch_normalize=1
1956
+ size=3
1957
+ stride=1
1958
+ pad=1
1959
+ filters=320
1960
+ activation=mish
1961
+
1962
+ [convolutional]
1963
+ batch_normalize=1
1964
+ filters=320
1965
+ size=1
1966
+ stride=1
1967
+ pad=1
1968
+ activation=mish
1969
+
1970
+ [convolutional]
1971
+ batch_normalize=1
1972
+ size=3
1973
+ stride=1
1974
+ pad=1
1975
+ filters=320
1976
+ activation=mish
1977
+
1978
+ [convolutional]
1979
+ batch_normalize=1
1980
+ filters=320
1981
+ size=1
1982
+ stride=1
1983
+ pad=1
1984
+ activation=mish
1985
+
1986
+ [convolutional]
1987
+ batch_normalize=1
1988
+ size=3
1989
+ stride=1
1990
+ pad=1
1991
+ filters=320
1992
+ activation=mish
1993
+
1994
+ # Merge [-1, -(2k+2)]
1995
+
1996
+ [route]
1997
+ layers = -1, -8
1998
+
1999
+ # Transition last
2000
+
2001
+ # 277 (previous+6+4+2k)
2002
+ [convolutional]
2003
+ batch_normalize=1
2004
+ filters=320
2005
+ size=1
2006
+ stride=1
2007
+ pad=1
2008
+ activation=mish
2009
+
2010
+
2011
+ # FPN-3
2012
+
2013
+ [convolutional]
2014
+ batch_normalize=1
2015
+ filters=160
2016
+ size=1
2017
+ stride=1
2018
+ pad=1
2019
+ activation=mish
2020
+
2021
+ [upsample]
2022
+ stride=2
2023
+
2024
+ [route]
2025
+ layers = 78
2026
+
2027
+ [convolutional]
2028
+ batch_normalize=1
2029
+ filters=160
2030
+ size=1
2031
+ stride=1
2032
+ pad=1
2033
+ activation=mish
2034
+
2035
+ [route]
2036
+ layers = -1, -3
2037
+
2038
+ [convolutional]
2039
+ batch_normalize=1
2040
+ filters=160
2041
+ size=1
2042
+ stride=1
2043
+ pad=1
2044
+ activation=mish
2045
+
2046
+ # Split
2047
+
2048
+ [convolutional]
2049
+ batch_normalize=1
2050
+ filters=160
2051
+ size=1
2052
+ stride=1
2053
+ pad=1
2054
+ activation=mish
2055
+
2056
+ [route]
2057
+ layers = -2
2058
+
2059
+ # Plain Block
2060
+
2061
+ [convolutional]
2062
+ batch_normalize=1
2063
+ filters=160
2064
+ size=1
2065
+ stride=1
2066
+ pad=1
2067
+ activation=mish
2068
+
2069
+ [convolutional]
2070
+ batch_normalize=1
2071
+ size=3
2072
+ stride=1
2073
+ pad=1
2074
+ filters=160
2075
+ activation=mish
2076
+
2077
+ [convolutional]
2078
+ batch_normalize=1
2079
+ filters=160
2080
+ size=1
2081
+ stride=1
2082
+ pad=1
2083
+ activation=mish
2084
+
2085
+ [convolutional]
2086
+ batch_normalize=1
2087
+ size=3
2088
+ stride=1
2089
+ pad=1
2090
+ filters=160
2091
+ activation=mish
2092
+
2093
+ [convolutional]
2094
+ batch_normalize=1
2095
+ filters=160
2096
+ size=1
2097
+ stride=1
2098
+ pad=1
2099
+ activation=mish
2100
+
2101
+ [convolutional]
2102
+ batch_normalize=1
2103
+ size=3
2104
+ stride=1
2105
+ pad=1
2106
+ filters=160
2107
+ activation=mish
2108
+
2109
+ # Merge [-1, -(2k+2)]
2110
+
2111
+ [route]
2112
+ layers = -1, -8
2113
+
2114
+ # Transition last
2115
+
2116
+ # 293 (previous+6+4+2k)
2117
+ [convolutional]
2118
+ batch_normalize=1
2119
+ filters=160
2120
+ size=1
2121
+ stride=1
2122
+ pad=1
2123
+ activation=mish
2124
+
2125
+
2126
+ # PAN-4
2127
+
2128
+ [convolutional]
2129
+ batch_normalize=1
2130
+ size=3
2131
+ stride=2
2132
+ pad=1
2133
+ filters=320
2134
+ activation=mish
2135
+
2136
+ [route]
2137
+ layers = -1, 277
2138
+
2139
+ [convolutional]
2140
+ batch_normalize=1
2141
+ filters=320
2142
+ size=1
2143
+ stride=1
2144
+ pad=1
2145
+ activation=mish
2146
+
2147
+ # Split
2148
+
2149
+ [convolutional]
2150
+ batch_normalize=1
2151
+ filters=320
2152
+ size=1
2153
+ stride=1
2154
+ pad=1
2155
+ activation=mish
2156
+
2157
+ [route]
2158
+ layers = -2
2159
+
2160
+ # Plain Block
2161
+
2162
+ [convolutional]
2163
+ batch_normalize=1
2164
+ filters=320
2165
+ size=1
2166
+ stride=1
2167
+ pad=1
2168
+ activation=mish
2169
+
2170
+ [convolutional]
2171
+ batch_normalize=1
2172
+ size=3
2173
+ stride=1
2174
+ pad=1
2175
+ filters=320
2176
+ activation=mish
2177
+
2178
+ [convolutional]
2179
+ batch_normalize=1
2180
+ filters=320
2181
+ size=1
2182
+ stride=1
2183
+ pad=1
2184
+ activation=mish
2185
+
2186
+ [convolutional]
2187
+ batch_normalize=1
2188
+ size=3
2189
+ stride=1
2190
+ pad=1
2191
+ filters=320
2192
+ activation=mish
2193
+
2194
+ [convolutional]
2195
+ batch_normalize=1
2196
+ filters=320
2197
+ size=1
2198
+ stride=1
2199
+ pad=1
2200
+ activation=mish
2201
+
2202
+ [convolutional]
2203
+ batch_normalize=1
2204
+ size=3
2205
+ stride=1
2206
+ pad=1
2207
+ filters=320
2208
+ activation=mish
2209
+
2210
+ [route]
2211
+ layers = -1,-8
2212
+
2213
+ # Transition last
2214
+
2215
+ # 306 (previous+3+4+2k)
2216
+ [convolutional]
2217
+ batch_normalize=1
2218
+ filters=320
2219
+ size=1
2220
+ stride=1
2221
+ pad=1
2222
+ activation=mish
2223
+
2224
+
2225
+ # PAN-5
2226
+
2227
+ [convolutional]
2228
+ batch_normalize=1
2229
+ size=3
2230
+ stride=2
2231
+ pad=1
2232
+ filters=640
2233
+ activation=mish
2234
+
2235
+ [route]
2236
+ layers = -1, 261
2237
+
2238
+ [convolutional]
2239
+ batch_normalize=1
2240
+ filters=640
2241
+ size=1
2242
+ stride=1
2243
+ pad=1
2244
+ activation=mish
2245
+
2246
+ # Split
2247
+
2248
+ [convolutional]
2249
+ batch_normalize=1
2250
+ filters=640
2251
+ size=1
2252
+ stride=1
2253
+ pad=1
2254
+ activation=mish
2255
+
2256
+ [route]
2257
+ layers = -2
2258
+
2259
+ # Plain Block
2260
+
2261
+ [convolutional]
2262
+ batch_normalize=1
2263
+ filters=640
2264
+ size=1
2265
+ stride=1
2266
+ pad=1
2267
+ activation=mish
2268
+
2269
+ [convolutional]
2270
+ batch_normalize=1
2271
+ size=3
2272
+ stride=1
2273
+ pad=1
2274
+ filters=640
2275
+ activation=mish
2276
+
2277
+ [convolutional]
2278
+ batch_normalize=1
2279
+ filters=640
2280
+ size=1
2281
+ stride=1
2282
+ pad=1
2283
+ activation=mish
2284
+
2285
+ [convolutional]
2286
+ batch_normalize=1
2287
+ size=3
2288
+ stride=1
2289
+ pad=1
2290
+ filters=640
2291
+ activation=mish
2292
+
2293
+ [convolutional]
2294
+ batch_normalize=1
2295
+ filters=640
2296
+ size=1
2297
+ stride=1
2298
+ pad=1
2299
+ activation=mish
2300
+
2301
+ [convolutional]
2302
+ batch_normalize=1
2303
+ size=3
2304
+ stride=1
2305
+ pad=1
2306
+ filters=640
2307
+ activation=mish
2308
+
2309
+ [route]
2310
+ layers = -1,-8
2311
+
2312
+ # Transition last
2313
+
2314
+ # 319 (previous+3+4+2k)
2315
+ [convolutional]
2316
+ batch_normalize=1
2317
+ filters=640
2318
+ size=1
2319
+ stride=1
2320
+ pad=1
2321
+ activation=mish
2322
+
2323
+
2324
+ # PAN-6
2325
+
2326
+ [convolutional]
2327
+ batch_normalize=1
2328
+ size=3
2329
+ stride=2
2330
+ pad=1
2331
+ filters=640
2332
+ activation=mish
2333
+
2334
+ [route]
2335
+ layers = -1, 245
2336
+
2337
+ [convolutional]
2338
+ batch_normalize=1
2339
+ filters=640
2340
+ size=1
2341
+ stride=1
2342
+ pad=1
2343
+ activation=mish
2344
+
2345
+ # Split
2346
+
2347
+ [convolutional]
2348
+ batch_normalize=1
2349
+ filters=640
2350
+ size=1
2351
+ stride=1
2352
+ pad=1
2353
+ activation=mish
2354
+
2355
+ [route]
2356
+ layers = -2
2357
+
2358
+ # Plain Block
2359
+
2360
+ [convolutional]
2361
+ batch_normalize=1
2362
+ filters=640
2363
+ size=1
2364
+ stride=1
2365
+ pad=1
2366
+ activation=mish
2367
+
2368
+ [convolutional]
2369
+ batch_normalize=1
2370
+ size=3
2371
+ stride=1
2372
+ pad=1
2373
+ filters=640
2374
+ activation=mish
2375
+
2376
+ [convolutional]
2377
+ batch_normalize=1
2378
+ filters=640
2379
+ size=1
2380
+ stride=1
2381
+ pad=1
2382
+ activation=mish
2383
+
2384
+ [convolutional]
2385
+ batch_normalize=1
2386
+ size=3
2387
+ stride=1
2388
+ pad=1
2389
+ filters=640
2390
+ activation=mish
2391
+
2392
+ [convolutional]
2393
+ batch_normalize=1
2394
+ filters=640
2395
+ size=1
2396
+ stride=1
2397
+ pad=1
2398
+ activation=mish
2399
+
2400
+ [convolutional]
2401
+ batch_normalize=1
2402
+ size=3
2403
+ stride=1
2404
+ pad=1
2405
+ filters=640
2406
+ activation=mish
2407
+
2408
+ [route]
2409
+ layers = -1,-8
2410
+
2411
+ # Transition last
2412
+
2413
+ # 332 (previous+3+4+2k)
2414
+ [convolutional]
2415
+ batch_normalize=1
2416
+ filters=640
2417
+ size=1
2418
+ stride=1
2419
+ pad=1
2420
+ activation=mish
2421
+
2422
+
2423
+ # PAN-7
2424
+
2425
+ [convolutional]
2426
+ batch_normalize=1
2427
+ size=3
2428
+ stride=2
2429
+ pad=1
2430
+ filters=640
2431
+ activation=mish
2432
+
2433
+ [route]
2434
+ layers = -1, 229
2435
+
2436
+ [convolutional]
2437
+ batch_normalize=1
2438
+ filters=640
2439
+ size=1
2440
+ stride=1
2441
+ pad=1
2442
+ activation=mish
2443
+
2444
+ # Split
2445
+
2446
+ [convolutional]
2447
+ batch_normalize=1
2448
+ filters=640
2449
+ size=1
2450
+ stride=1
2451
+ pad=1
2452
+ activation=mish
2453
+
2454
+ [route]
2455
+ layers = -2
2456
+
2457
+ # Plain Block
2458
+
2459
+ [convolutional]
2460
+ batch_normalize=1
2461
+ filters=640
2462
+ size=1
2463
+ stride=1
2464
+ pad=1
2465
+ activation=mish
2466
+
2467
+ [convolutional]
2468
+ batch_normalize=1
2469
+ size=3
2470
+ stride=1
2471
+ pad=1
2472
+ filters=640
2473
+ activation=mish
2474
+
2475
+ [convolutional]
2476
+ batch_normalize=1
2477
+ filters=640
2478
+ size=1
2479
+ stride=1
2480
+ pad=1
2481
+ activation=mish
2482
+
2483
+ [convolutional]
2484
+ batch_normalize=1
2485
+ size=3
2486
+ stride=1
2487
+ pad=1
2488
+ filters=640
2489
+ activation=mish
2490
+
2491
+ [convolutional]
2492
+ batch_normalize=1
2493
+ filters=640
2494
+ size=1
2495
+ stride=1
2496
+ pad=1
2497
+ activation=mish
2498
+
2499
+ [convolutional]
2500
+ batch_normalize=1
2501
+ size=3
2502
+ stride=1
2503
+ pad=1
2504
+ filters=640
2505
+ activation=mish
2506
+
2507
+ [route]
2508
+ layers = -1,-8
2509
+
2510
+ # Transition last
2511
+
2512
+ # 345 (previous+3+4+2k)
2513
+ [convolutional]
2514
+ batch_normalize=1
2515
+ filters=640
2516
+ size=1
2517
+ stride=1
2518
+ pad=1
2519
+ activation=mish
2520
+
2521
+ # ============ End of Neck ============ #
2522
+
2523
+ # ============ Head ============ #
2524
+
2525
+ # YOLO-3
2526
+
2527
+ [route]
2528
+ layers = 293
2529
+
2530
+ [convolutional]
2531
+ batch_normalize=1
2532
+ size=3
2533
+ stride=1
2534
+ pad=1
2535
+ filters=320
2536
+ activation=mish
2537
+
2538
+ [convolutional]
2539
+ size=1
2540
+ stride=1
2541
+ pad=1
2542
+ filters=340
2543
+ activation=linear
2544
+
2545
+ [yolo]
2546
+ mask = 0,1,2,3
2547
+ anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408
2548
+ classes=80
2549
+ num=20
2550
+ jitter=.3
2551
+ ignore_thresh = .7
2552
+ truth_thresh = 1
2553
+ random=1
2554
+ scale_x_y = 1.05
2555
+ iou_thresh=0.213
2556
+ cls_normalizer=1.0
2557
+ iou_normalizer=0.07
2558
+ iou_loss=ciou
2559
+ nms_kind=greedynms
2560
+ beta_nms=0.6
2561
+
2562
+
2563
+ # YOLO-4
2564
+
2565
+ [route]
2566
+ layers = 306
2567
+
2568
+ [convolutional]
2569
+ batch_normalize=1
2570
+ size=3
2571
+ stride=1
2572
+ pad=1
2573
+ filters=640
2574
+ activation=mish
2575
+
2576
+ [convolutional]
2577
+ size=1
2578
+ stride=1
2579
+ pad=1
2580
+ filters=340
2581
+ activation=linear
2582
+
2583
+ [yolo]
2584
+ mask = 4,5,6,7
2585
+ anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408
2586
+ classes=80
2587
+ num=20
2588
+ jitter=.3
2589
+ ignore_thresh = .7
2590
+ truth_thresh = 1
2591
+ random=1
2592
+ scale_x_y = 1.05
2593
+ iou_thresh=0.213
2594
+ cls_normalizer=1.0
2595
+ iou_normalizer=0.07
2596
+ iou_loss=ciou
2597
+ nms_kind=greedynms
2598
+ beta_nms=0.6
2599
+
2600
+
2601
+ # YOLO-5
2602
+
2603
+ [route]
2604
+ layers = 319
2605
+
2606
+ [convolutional]
2607
+ batch_normalize=1
2608
+ size=3
2609
+ stride=1
2610
+ pad=1
2611
+ filters=1280
2612
+ activation=mish
2613
+
2614
+ [convolutional]
2615
+ size=1
2616
+ stride=1
2617
+ pad=1
2618
+ filters=340
2619
+ activation=linear
2620
+
2621
+ [yolo]
2622
+ mask = 8,9,10,11
2623
+ anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408
2624
+ classes=80
2625
+ num=20
2626
+ jitter=.3
2627
+ ignore_thresh = .7
2628
+ truth_thresh = 1
2629
+ random=1
2630
+ scale_x_y = 1.05
2631
+ iou_thresh=0.213
2632
+ cls_normalizer=1.0
2633
+ iou_normalizer=0.07
2634
+ iou_loss=ciou
2635
+ nms_kind=greedynms
2636
+ beta_nms=0.6
2637
+
2638
+
2639
+ # YOLO-6
2640
+
2641
+ [route]
2642
+ layers = 332
2643
+
2644
+ [convolutional]
2645
+ batch_normalize=1
2646
+ size=3
2647
+ stride=1
2648
+ pad=1
2649
+ filters=1280
2650
+ activation=mish
2651
+
2652
+ [convolutional]
2653
+ size=1
2654
+ stride=1
2655
+ pad=1
2656
+ filters=340
2657
+ activation=linear
2658
+
2659
+ [yolo]
2660
+ mask = 12,13,14,15
2661
+ anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408
2662
+ classes=80
2663
+ num=20
2664
+ jitter=.3
2665
+ ignore_thresh = .7
2666
+ truth_thresh = 1
2667
+ random=1
2668
+ scale_x_y = 1.05
2669
+ iou_thresh=0.213
2670
+ cls_normalizer=1.0
2671
+ iou_normalizer=0.07
2672
+ iou_loss=ciou
2673
+ nms_kind=greedynms
2674
+ beta_nms=0.6
2675
+
2676
+
2677
+ # YOLO-7
2678
+
2679
+ [route]
2680
+ layers = 345
2681
+
2682
+ [convolutional]
2683
+ batch_normalize=1
2684
+ size=3
2685
+ stride=1
2686
+ pad=1
2687
+ filters=1280
2688
+ activation=mish
2689
+
2690
+ [convolutional]
2691
+ size=1
2692
+ stride=1
2693
+ pad=1
2694
+ filters=340
2695
+ activation=linear
2696
+
2697
+ [yolo]
2698
+ mask = 16,17,18,19
2699
+ anchors = 13,17, 22,25, 27,66, 55,41, 57,88, 112,69, 69,177, 136,138, 136,138, 287,114, 134,275, 268,248, 268,248, 232,504, 445,416, 640,640, 812,393, 477,808, 1070,908, 1408,1408
2700
+ classes=80
2701
+ num=20
2702
+ jitter=.3
2703
+ ignore_thresh = .7
2704
+ truth_thresh = 1
2705
+ random=1
2706
+ scale_x_y = 1.05
2707
+ iou_thresh=0.213
2708
+ cls_normalizer=1.0
2709
+ iou_normalizer=0.07
2710
+ iou_loss=ciou
2711
+ nms_kind=greedynms
2712
+ beta_nms=0.6
2713
+
2714
+ # ============ End of Head ============ #
data/coco.names ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ person
2
+ bicycle
3
+ car
4
+ motorcycle
5
+ airplane
6
+ bus
7
+ train
8
+ truck
9
+ boat
10
+ traffic light
11
+ fire hydrant
12
+ stop sign
13
+ parking meter
14
+ bench
15
+ bird
16
+ cat
17
+ dog
18
+ horse
19
+ sheep
20
+ cow
21
+ elephant
22
+ bear
23
+ zebra
24
+ giraffe
25
+ backpack
26
+ umbrella
27
+ handbag
28
+ tie
29
+ suitcase
30
+ frisbee
31
+ skis
32
+ snowboard
33
+ sports ball
34
+ kite
35
+ baseball bat
36
+ baseball glove
37
+ skateboard
38
+ surfboard
39
+ tennis racket
40
+ bottle
41
+ wine glass
42
+ cup
43
+ fork
44
+ knife
45
+ spoon
46
+ bowl
47
+ banana
48
+ apple
49
+ sandwich
50
+ orange
51
+ broccoli
52
+ carrot
53
+ hot dog
54
+ pizza
55
+ donut
56
+ cake
57
+ chair
58
+ couch
59
+ potted plant
60
+ bed
61
+ dining table
62
+ toilet
63
+ tv
64
+ laptop
65
+ mouse
66
+ remote
67
+ keyboard
68
+ cell phone
69
+ microwave
70
+ oven
71
+ toaster
72
+ sink
73
+ refrigerator
74
+ book
75
+ clock
76
+ vase
77
+ scissors
78
+ teddy bear
79
+ hair drier
80
+ toothbrush
data/coco.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # train and val datasets (image directory or *.txt file with image paths)
2
+ train: ../coco/train2017.txt # 118k images
3
+ val: ../coco/val2017.txt # 5k images
4
+ test: ../coco/test-dev2017.txt # 20k images for submission to https://competitions.codalab.org/competitions/20794
5
+
6
+ # number of classes
7
+ nc: 80
8
+
9
+ # class names
10
+ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
11
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
12
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
13
+ 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
14
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
15
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
16
+ 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
17
+ 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
18
+ 'hair drier', 'toothbrush']
data/hyp.finetune.1280.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.5 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 1.0 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.5 # image translation (+/- fraction)
22
+ scale: 0.8 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.2 # image mixup (probability)
data/hyp.scratch.1280.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.5 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 1.0 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.5 # image translation (+/- fraction)
22
+ scale: 0.5 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.0 # image mixup (probability)
data/hyp.scratch.640.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
2
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
3
+ momentum: 0.937 # SGD momentum/Adam beta1
4
+ weight_decay: 0.0005 # optimizer weight decay 5e-4
5
+ warmup_epochs: 3.0 # warmup epochs (fractions ok)
6
+ warmup_momentum: 0.8 # warmup initial momentum
7
+ warmup_bias_lr: 0.1 # warmup initial bias lr
8
+ box: 0.05 # box loss gain
9
+ cls: 0.3 # cls loss gain
10
+ cls_pw: 1.0 # cls BCELoss positive_weight
11
+ obj: 0.7 # obj loss gain (scale with pixels)
12
+ obj_pw: 1.0 # obj BCELoss positive_weight
13
+ iou_t: 0.20 # IoU training threshold
14
+ anchor_t: 4.0 # anchor-multiple threshold
15
+ # anchors: 3 # anchors per output layer (0 to ignore)
16
+ fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
+ hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
+ hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
+ hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
+ degrees: 0.0 # image rotation (+/- deg)
21
+ translate: 0.1 # image translation (+/- fraction)
22
+ scale: 0.9 # image scale (+/- gain)
23
+ shear: 0.0 # image shear (+/- deg)
24
+ perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
+ flipud: 0.0 # image flip up-down (probability)
26
+ fliplr: 0.5 # image flip left-right (probability)
27
+ mosaic: 1.0 # image mosaic (probability)
28
+ mixup: 0.0 # image mixup (probability)
deep_sort_pytorch/.DS_Store ADDED
Binary file (6.15 kB). View file
 
deep_sort_pytorch/.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Folders
2
+ __pycache__/
3
+ build/
4
+ *.egg-info
5
+
6
+
7
+ # Files
8
+ *.weights
9
+ *.t7
10
+ *.mp4
11
+ *.avi
12
+ *.so
13
+ *.txt
deep_sort_pytorch/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Ziqiang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
deep_sort_pytorch/README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deep Sort with PyTorch
2
+
3
+ ![](demo/demo.gif)
4
+
5
+ ## Update(1-1-2020)
6
+ Changes
7
+ - fix bugs
8
+ - refactor code
9
+ - accerate detection by adding nms on gpu
10
+
11
+ ## Latest Update(07-22)
12
+ Changes
13
+ - bug fix (Thanks @JieChen91 and @yingsen1 for bug reporting).
14
+ - using batch for feature extracting for each frame, which lead to a small speed up.
15
+ - code improvement.
16
+
17
+ Futher improvement direction
18
+ - Train detector on specific dataset rather than the official one.
19
+ - Retrain REID model on pedestrain dataset for better performance.
20
+ - Replace YOLOv3 detector with advanced ones.
21
+
22
+ **Any contributions to this repository is welcome!**
23
+
24
+
25
+ ## Introduction
26
+ This is an implement of MOT tracking algorithm deep sort. Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. This CNN model is indeed a RE-ID model and the detector used in [PAPER](https://arxiv.org/abs/1703.07402) is FasterRCNN , and the original source code is [HERE](https://github.com/nwojke/deep_sort).
27
+ However in original code, the CNN model is implemented with tensorflow, which I'm not familier with. SO I re-implemented the CNN feature extraction model with PyTorch, and changed the CNN model a little bit. Also, I use **YOLOv3** to generate bboxes instead of FasterRCNN.
28
+
29
+ ## Dependencies
30
+ - python 3 (python2 not sure)
31
+ - numpy
32
+ - scipy
33
+ - opencv-python
34
+ - sklearn
35
+ - torch >= 0.4
36
+ - torchvision >= 0.1
37
+ - pillow
38
+ - vizer
39
+ - edict
40
+
41
+ ## Quick Start
42
+ 0. Check all dependencies installed
43
+ ```bash
44
+ pip install -r requirements.txt
45
+ ```
46
+ for user in china, you can specify pypi source to accelerate install like:
47
+ ```bash
48
+ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
49
+ ```
50
+
51
+ 1. Clone this repository
52
+ ```
53
+ git clone git@github.com:ZQPei/deep_sort_pytorch.git
54
+ ```
55
+
56
+ 2. Download YOLOv3 parameters
57
+ ```
58
+ cd detector/YOLOv3/weight/
59
+ wget https://pjreddie.com/media/files/yolov3.weights
60
+ wget https://pjreddie.com/media/files/yolov3-tiny.weights
61
+ cd ../../../
62
+ ```
63
+
64
+ 3. Download deepsort parameters ckpt.t7
65
+ ```
66
+ cd deep_sort/deep/checkpoint
67
+ # download ckpt.t7 from
68
+ https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder
69
+ cd ../../../
70
+ ```
71
+
72
+ 4. Compile nms module
73
+ ```bash
74
+ cd detector/YOLOv3/nms
75
+ sh build.sh
76
+ cd ../../..
77
+ ```
78
+
79
+ Notice:
80
+ If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`.
81
+
82
+ 5. Run demo
83
+ ```
84
+ usage: python yolov3_deepsort.py VIDEO_PATH
85
+ [--help]
86
+ [--frame_interval FRAME_INTERVAL]
87
+ [--config_detection CONFIG_DETECTION]
88
+ [--config_deepsort CONFIG_DEEPSORT]
89
+ [--display]
90
+ [--display_width DISPLAY_WIDTH]
91
+ [--display_height DISPLAY_HEIGHT]
92
+ [--save_path SAVE_PATH]
93
+ [--cpu]
94
+
95
+ # yolov3 + deepsort
96
+ python yolov3_deepsort.py [VIDEO_PATH]
97
+
98
+ # yolov3_tiny + deepsort
99
+ python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml
100
+
101
+ # yolov3 + deepsort on webcam
102
+ python3 yolov3_deepsort.py /dev/video0 --camera 0
103
+
104
+ # yolov3_tiny + deepsort on webcam
105
+ python3 yolov3_deepsort.py /dev/video0 --config_detection ./configs/yolov3_tiny.yaml --camera 0
106
+ ```
107
+ Use `--display` to enable display.
108
+ Results will be saved to `./output/results.avi` and `./output/results.txt`.
109
+
110
+ All files above can also be accessed from BaiduDisk!
111
+ linker:[BaiduDisk](https://pan.baidu.com/s/1YJ1iPpdFTlUyLFoonYvozg)
112
+ passwd:fbuw
113
+
114
+ ## Training the RE-ID model
115
+ The original model used in paper is in original_model.py, and its parameter here [original_ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6).
116
+
117
+ To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset.
118
+
119
+ Then you can try [train.py](deep_sort/deep/train.py) to train your own parameter and evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py).
120
+ ![train.jpg](deep_sort/deep/train.jpg)
121
+
122
+ ## Demo videos and images
123
+ [demo.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
124
+ [demo2.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
125
+
126
+ ![1.jpg](demo/1.jpg)
127
+ ![2.jpg](demo/2.jpg)
128
+
129
+
130
+ ## References
131
+ - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402)
132
+
133
+ - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort)
134
+
135
+ - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
136
+
137
+ - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/)
deep_sort_pytorch/configs/deep_sort.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ DEEPSORT:
2
+ REID_CKPT: "deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7"
3
+ MAX_DIST: 0.2
4
+ MIN_CONFIDENCE: 0.3
5
+ NMS_MAX_OVERLAP: 0.5
6
+ MAX_IOU_DISTANCE: 0.7
7
+ MAX_AGE: 70
8
+ N_INIT: 3
9
+ NN_BUDGET: 100
10
+
deep_sort_pytorch/deep_sort/.DS_Store ADDED
Binary file (8.2 kB). View file
 
deep_sort_pytorch/deep_sort/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Deep Sort
2
+
3
+ This is the implemention of deep sort with pytorch.
deep_sort_pytorch/deep_sort/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .deep_sort import DeepSort
2
+
3
+
4
+ __all__ = ['DeepSort', 'build_tracker']
5
+
6
+
7
+ def build_tracker(cfg, use_cuda):
8
+ return DeepSort(cfg.DEEPSORT.REID_CKPT,
9
+ max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
10
+ nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
11
+ max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
deep_sort_pytorch/deep_sort/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (648 Bytes). View file