kamcio1989 commited on
Commit
c289710
·
verified ·
1 Parent(s): e5ad597

Upload 8 files

Browse files
Files changed (8) hide show
  1. .gitattributes +35 -35
  2. MobileNetSSD_deploy.caffemodel +0 -0
  3. MobileNetSSD_deploy.prototxt +1912 -0
  4. README.md +14 -14
  5. app.py +234 -246
  6. models.py +3 -3
  7. requirements.txt +7 -7
  8. utils.py +117 -6
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
MobileNetSSD_deploy.caffemodel ADDED
The diff for this file is too large to render. See raw diff
 
MobileNetSSD_deploy.prototxt ADDED
@@ -0,0 +1,1912 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "MobileNet-SSD"
2
+ input: "data"
3
+ input_shape {
4
+ dim: 1
5
+ dim: 3
6
+ dim: 300
7
+ dim: 300
8
+ }
9
+ layer {
10
+ name: "conv0"
11
+ type: "Convolution"
12
+ bottom: "data"
13
+ top: "conv0"
14
+ param {
15
+ lr_mult: 1.0
16
+ decay_mult: 1.0
17
+ }
18
+ param {
19
+ lr_mult: 2.0
20
+ decay_mult: 0.0
21
+ }
22
+ convolution_param {
23
+ num_output: 32
24
+ pad: 1
25
+ kernel_size: 3
26
+ stride: 2
27
+ weight_filler {
28
+ type: "msra"
29
+ }
30
+ bias_filler {
31
+ type: "constant"
32
+ value: 0.0
33
+ }
34
+ }
35
+ }
36
+ layer {
37
+ name: "conv0/relu"
38
+ type: "ReLU"
39
+ bottom: "conv0"
40
+ top: "conv0"
41
+ }
42
+ layer {
43
+ name: "conv1/dw"
44
+ type: "Convolution"
45
+ bottom: "conv0"
46
+ top: "conv1/dw"
47
+ param {
48
+ lr_mult: 1.0
49
+ decay_mult: 1.0
50
+ }
51
+ param {
52
+ lr_mult: 2.0
53
+ decay_mult: 0.0
54
+ }
55
+ convolution_param {
56
+ num_output: 32
57
+ pad: 1
58
+ kernel_size: 3
59
+ group: 32
60
+ engine: CAFFE
61
+ weight_filler {
62
+ type: "msra"
63
+ }
64
+ bias_filler {
65
+ type: "constant"
66
+ value: 0.0
67
+ }
68
+ }
69
+ }
70
+ layer {
71
+ name: "conv1/dw/relu"
72
+ type: "ReLU"
73
+ bottom: "conv1/dw"
74
+ top: "conv1/dw"
75
+ }
76
+ layer {
77
+ name: "conv1"
78
+ type: "Convolution"
79
+ bottom: "conv1/dw"
80
+ top: "conv1"
81
+ param {
82
+ lr_mult: 1.0
83
+ decay_mult: 1.0
84
+ }
85
+ param {
86
+ lr_mult: 2.0
87
+ decay_mult: 0.0
88
+ }
89
+ convolution_param {
90
+ num_output: 64
91
+ kernel_size: 1
92
+ weight_filler {
93
+ type: "msra"
94
+ }
95
+ bias_filler {
96
+ type: "constant"
97
+ value: 0.0
98
+ }
99
+ }
100
+ }
101
+ layer {
102
+ name: "conv1/relu"
103
+ type: "ReLU"
104
+ bottom: "conv1"
105
+ top: "conv1"
106
+ }
107
+ layer {
108
+ name: "conv2/dw"
109
+ type: "Convolution"
110
+ bottom: "conv1"
111
+ top: "conv2/dw"
112
+ param {
113
+ lr_mult: 1.0
114
+ decay_mult: 1.0
115
+ }
116
+ param {
117
+ lr_mult: 2.0
118
+ decay_mult: 0.0
119
+ }
120
+ convolution_param {
121
+ num_output: 64
122
+ pad: 1
123
+ kernel_size: 3
124
+ stride: 2
125
+ group: 64
126
+ engine: CAFFE
127
+ weight_filler {
128
+ type: "msra"
129
+ }
130
+ bias_filler {
131
+ type: "constant"
132
+ value: 0.0
133
+ }
134
+ }
135
+ }
136
+ layer {
137
+ name: "conv2/dw/relu"
138
+ type: "ReLU"
139
+ bottom: "conv2/dw"
140
+ top: "conv2/dw"
141
+ }
142
+ layer {
143
+ name: "conv2"
144
+ type: "Convolution"
145
+ bottom: "conv2/dw"
146
+ top: "conv2"
147
+ param {
148
+ lr_mult: 1.0
149
+ decay_mult: 1.0
150
+ }
151
+ param {
152
+ lr_mult: 2.0
153
+ decay_mult: 0.0
154
+ }
155
+ convolution_param {
156
+ num_output: 128
157
+ kernel_size: 1
158
+ weight_filler {
159
+ type: "msra"
160
+ }
161
+ bias_filler {
162
+ type: "constant"
163
+ value: 0.0
164
+ }
165
+ }
166
+ }
167
+ layer {
168
+ name: "conv2/relu"
169
+ type: "ReLU"
170
+ bottom: "conv2"
171
+ top: "conv2"
172
+ }
173
+ layer {
174
+ name: "conv3/dw"
175
+ type: "Convolution"
176
+ bottom: "conv2"
177
+ top: "conv3/dw"
178
+ param {
179
+ lr_mult: 1.0
180
+ decay_mult: 1.0
181
+ }
182
+ param {
183
+ lr_mult: 2.0
184
+ decay_mult: 0.0
185
+ }
186
+ convolution_param {
187
+ num_output: 128
188
+ pad: 1
189
+ kernel_size: 3
190
+ group: 128
191
+ engine: CAFFE
192
+ weight_filler {
193
+ type: "msra"
194
+ }
195
+ bias_filler {
196
+ type: "constant"
197
+ value: 0.0
198
+ }
199
+ }
200
+ }
201
+ layer {
202
+ name: "conv3/dw/relu"
203
+ type: "ReLU"
204
+ bottom: "conv3/dw"
205
+ top: "conv3/dw"
206
+ }
207
+ layer {
208
+ name: "conv3"
209
+ type: "Convolution"
210
+ bottom: "conv3/dw"
211
+ top: "conv3"
212
+ param {
213
+ lr_mult: 1.0
214
+ decay_mult: 1.0
215
+ }
216
+ param {
217
+ lr_mult: 2.0
218
+ decay_mult: 0.0
219
+ }
220
+ convolution_param {
221
+ num_output: 128
222
+ kernel_size: 1
223
+ weight_filler {
224
+ type: "msra"
225
+ }
226
+ bias_filler {
227
+ type: "constant"
228
+ value: 0.0
229
+ }
230
+ }
231
+ }
232
+ layer {
233
+ name: "conv3/relu"
234
+ type: "ReLU"
235
+ bottom: "conv3"
236
+ top: "conv3"
237
+ }
238
+ layer {
239
+ name: "conv4/dw"
240
+ type: "Convolution"
241
+ bottom: "conv3"
242
+ top: "conv4/dw"
243
+ param {
244
+ lr_mult: 1.0
245
+ decay_mult: 1.0
246
+ }
247
+ param {
248
+ lr_mult: 2.0
249
+ decay_mult: 0.0
250
+ }
251
+ convolution_param {
252
+ num_output: 128
253
+ pad: 1
254
+ kernel_size: 3
255
+ stride: 2
256
+ group: 128
257
+ engine: CAFFE
258
+ weight_filler {
259
+ type: "msra"
260
+ }
261
+ bias_filler {
262
+ type: "constant"
263
+ value: 0.0
264
+ }
265
+ }
266
+ }
267
+ layer {
268
+ name: "conv4/dw/relu"
269
+ type: "ReLU"
270
+ bottom: "conv4/dw"
271
+ top: "conv4/dw"
272
+ }
273
+ layer {
274
+ name: "conv4"
275
+ type: "Convolution"
276
+ bottom: "conv4/dw"
277
+ top: "conv4"
278
+ param {
279
+ lr_mult: 1.0
280
+ decay_mult: 1.0
281
+ }
282
+ param {
283
+ lr_mult: 2.0
284
+ decay_mult: 0.0
285
+ }
286
+ convolution_param {
287
+ num_output: 256
288
+ kernel_size: 1
289
+ weight_filler {
290
+ type: "msra"
291
+ }
292
+ bias_filler {
293
+ type: "constant"
294
+ value: 0.0
295
+ }
296
+ }
297
+ }
298
+ layer {
299
+ name: "conv4/relu"
300
+ type: "ReLU"
301
+ bottom: "conv4"
302
+ top: "conv4"
303
+ }
304
+ layer {
305
+ name: "conv5/dw"
306
+ type: "Convolution"
307
+ bottom: "conv4"
308
+ top: "conv5/dw"
309
+ param {
310
+ lr_mult: 1.0
311
+ decay_mult: 1.0
312
+ }
313
+ param {
314
+ lr_mult: 2.0
315
+ decay_mult: 0.0
316
+ }
317
+ convolution_param {
318
+ num_output: 256
319
+ pad: 1
320
+ kernel_size: 3
321
+ group: 256
322
+ engine: CAFFE
323
+ weight_filler {
324
+ type: "msra"
325
+ }
326
+ bias_filler {
327
+ type: "constant"
328
+ value: 0.0
329
+ }
330
+ }
331
+ }
332
+ layer {
333
+ name: "conv5/dw/relu"
334
+ type: "ReLU"
335
+ bottom: "conv5/dw"
336
+ top: "conv5/dw"
337
+ }
338
+ layer {
339
+ name: "conv5"
340
+ type: "Convolution"
341
+ bottom: "conv5/dw"
342
+ top: "conv5"
343
+ param {
344
+ lr_mult: 1.0
345
+ decay_mult: 1.0
346
+ }
347
+ param {
348
+ lr_mult: 2.0
349
+ decay_mult: 0.0
350
+ }
351
+ convolution_param {
352
+ num_output: 256
353
+ kernel_size: 1
354
+ weight_filler {
355
+ type: "msra"
356
+ }
357
+ bias_filler {
358
+ type: "constant"
359
+ value: 0.0
360
+ }
361
+ }
362
+ }
363
+ layer {
364
+ name: "conv5/relu"
365
+ type: "ReLU"
366
+ bottom: "conv5"
367
+ top: "conv5"
368
+ }
369
+ layer {
370
+ name: "conv6/dw"
371
+ type: "Convolution"
372
+ bottom: "conv5"
373
+ top: "conv6/dw"
374
+ param {
375
+ lr_mult: 1.0
376
+ decay_mult: 1.0
377
+ }
378
+ param {
379
+ lr_mult: 2.0
380
+ decay_mult: 0.0
381
+ }
382
+ convolution_param {
383
+ num_output: 256
384
+ pad: 1
385
+ kernel_size: 3
386
+ stride: 2
387
+ group: 256
388
+ engine: CAFFE
389
+ weight_filler {
390
+ type: "msra"
391
+ }
392
+ bias_filler {
393
+ type: "constant"
394
+ value: 0.0
395
+ }
396
+ }
397
+ }
398
+ layer {
399
+ name: "conv6/dw/relu"
400
+ type: "ReLU"
401
+ bottom: "conv6/dw"
402
+ top: "conv6/dw"
403
+ }
404
+ layer {
405
+ name: "conv6"
406
+ type: "Convolution"
407
+ bottom: "conv6/dw"
408
+ top: "conv6"
409
+ param {
410
+ lr_mult: 1.0
411
+ decay_mult: 1.0
412
+ }
413
+ param {
414
+ lr_mult: 2.0
415
+ decay_mult: 0.0
416
+ }
417
+ convolution_param {
418
+ num_output: 512
419
+ kernel_size: 1
420
+ weight_filler {
421
+ type: "msra"
422
+ }
423
+ bias_filler {
424
+ type: "constant"
425
+ value: 0.0
426
+ }
427
+ }
428
+ }
429
+ layer {
430
+ name: "conv6/relu"
431
+ type: "ReLU"
432
+ bottom: "conv6"
433
+ top: "conv6"
434
+ }
435
+ layer {
436
+ name: "conv7/dw"
437
+ type: "Convolution"
438
+ bottom: "conv6"
439
+ top: "conv7/dw"
440
+ param {
441
+ lr_mult: 1.0
442
+ decay_mult: 1.0
443
+ }
444
+ param {
445
+ lr_mult: 2.0
446
+ decay_mult: 0.0
447
+ }
448
+ convolution_param {
449
+ num_output: 512
450
+ pad: 1
451
+ kernel_size: 3
452
+ group: 512
453
+ engine: CAFFE
454
+ weight_filler {
455
+ type: "msra"
456
+ }
457
+ bias_filler {
458
+ type: "constant"
459
+ value: 0.0
460
+ }
461
+ }
462
+ }
463
+ layer {
464
+ name: "conv7/dw/relu"
465
+ type: "ReLU"
466
+ bottom: "conv7/dw"
467
+ top: "conv7/dw"
468
+ }
469
+ layer {
470
+ name: "conv7"
471
+ type: "Convolution"
472
+ bottom: "conv7/dw"
473
+ top: "conv7"
474
+ param {
475
+ lr_mult: 1.0
476
+ decay_mult: 1.0
477
+ }
478
+ param {
479
+ lr_mult: 2.0
480
+ decay_mult: 0.0
481
+ }
482
+ convolution_param {
483
+ num_output: 512
484
+ kernel_size: 1
485
+ weight_filler {
486
+ type: "msra"
487
+ }
488
+ bias_filler {
489
+ type: "constant"
490
+ value: 0.0
491
+ }
492
+ }
493
+ }
494
+ layer {
495
+ name: "conv7/relu"
496
+ type: "ReLU"
497
+ bottom: "conv7"
498
+ top: "conv7"
499
+ }
500
+ layer {
501
+ name: "conv8/dw"
502
+ type: "Convolution"
503
+ bottom: "conv7"
504
+ top: "conv8/dw"
505
+ param {
506
+ lr_mult: 1.0
507
+ decay_mult: 1.0
508
+ }
509
+ param {
510
+ lr_mult: 2.0
511
+ decay_mult: 0.0
512
+ }
513
+ convolution_param {
514
+ num_output: 512
515
+ pad: 1
516
+ kernel_size: 3
517
+ group: 512
518
+ engine: CAFFE
519
+ weight_filler {
520
+ type: "msra"
521
+ }
522
+ bias_filler {
523
+ type: "constant"
524
+ value: 0.0
525
+ }
526
+ }
527
+ }
528
+ layer {
529
+ name: "conv8/dw/relu"
530
+ type: "ReLU"
531
+ bottom: "conv8/dw"
532
+ top: "conv8/dw"
533
+ }
534
+ layer {
535
+ name: "conv8"
536
+ type: "Convolution"
537
+ bottom: "conv8/dw"
538
+ top: "conv8"
539
+ param {
540
+ lr_mult: 1.0
541
+ decay_mult: 1.0
542
+ }
543
+ param {
544
+ lr_mult: 2.0
545
+ decay_mult: 0.0
546
+ }
547
+ convolution_param {
548
+ num_output: 512
549
+ kernel_size: 1
550
+ weight_filler {
551
+ type: "msra"
552
+ }
553
+ bias_filler {
554
+ type: "constant"
555
+ value: 0.0
556
+ }
557
+ }
558
+ }
559
+ layer {
560
+ name: "conv8/relu"
561
+ type: "ReLU"
562
+ bottom: "conv8"
563
+ top: "conv8"
564
+ }
565
+ layer {
566
+ name: "conv9/dw"
567
+ type: "Convolution"
568
+ bottom: "conv8"
569
+ top: "conv9/dw"
570
+ param {
571
+ lr_mult: 1.0
572
+ decay_mult: 1.0
573
+ }
574
+ param {
575
+ lr_mult: 2.0
576
+ decay_mult: 0.0
577
+ }
578
+ convolution_param {
579
+ num_output: 512
580
+ pad: 1
581
+ kernel_size: 3
582
+ group: 512
583
+ engine: CAFFE
584
+ weight_filler {
585
+ type: "msra"
586
+ }
587
+ bias_filler {
588
+ type: "constant"
589
+ value: 0.0
590
+ }
591
+ }
592
+ }
593
+ layer {
594
+ name: "conv9/dw/relu"
595
+ type: "ReLU"
596
+ bottom: "conv9/dw"
597
+ top: "conv9/dw"
598
+ }
599
+ layer {
600
+ name: "conv9"
601
+ type: "Convolution"
602
+ bottom: "conv9/dw"
603
+ top: "conv9"
604
+ param {
605
+ lr_mult: 1.0
606
+ decay_mult: 1.0
607
+ }
608
+ param {
609
+ lr_mult: 2.0
610
+ decay_mult: 0.0
611
+ }
612
+ convolution_param {
613
+ num_output: 512
614
+ kernel_size: 1
615
+ weight_filler {
616
+ type: "msra"
617
+ }
618
+ bias_filler {
619
+ type: "constant"
620
+ value: 0.0
621
+ }
622
+ }
623
+ }
624
+ layer {
625
+ name: "conv9/relu"
626
+ type: "ReLU"
627
+ bottom: "conv9"
628
+ top: "conv9"
629
+ }
630
+ layer {
631
+ name: "conv10/dw"
632
+ type: "Convolution"
633
+ bottom: "conv9"
634
+ top: "conv10/dw"
635
+ param {
636
+ lr_mult: 1.0
637
+ decay_mult: 1.0
638
+ }
639
+ param {
640
+ lr_mult: 2.0
641
+ decay_mult: 0.0
642
+ }
643
+ convolution_param {
644
+ num_output: 512
645
+ pad: 1
646
+ kernel_size: 3
647
+ group: 512
648
+ engine: CAFFE
649
+ weight_filler {
650
+ type: "msra"
651
+ }
652
+ bias_filler {
653
+ type: "constant"
654
+ value: 0.0
655
+ }
656
+ }
657
+ }
658
+ layer {
659
+ name: "conv10/dw/relu"
660
+ type: "ReLU"
661
+ bottom: "conv10/dw"
662
+ top: "conv10/dw"
663
+ }
664
+ layer {
665
+ name: "conv10"
666
+ type: "Convolution"
667
+ bottom: "conv10/dw"
668
+ top: "conv10"
669
+ param {
670
+ lr_mult: 1.0
671
+ decay_mult: 1.0
672
+ }
673
+ param {
674
+ lr_mult: 2.0
675
+ decay_mult: 0.0
676
+ }
677
+ convolution_param {
678
+ num_output: 512
679
+ kernel_size: 1
680
+ weight_filler {
681
+ type: "msra"
682
+ }
683
+ bias_filler {
684
+ type: "constant"
685
+ value: 0.0
686
+ }
687
+ }
688
+ }
689
+ layer {
690
+ name: "conv10/relu"
691
+ type: "ReLU"
692
+ bottom: "conv10"
693
+ top: "conv10"
694
+ }
695
+ layer {
696
+ name: "conv11/dw"
697
+ type: "Convolution"
698
+ bottom: "conv10"
699
+ top: "conv11/dw"
700
+ param {
701
+ lr_mult: 1.0
702
+ decay_mult: 1.0
703
+ }
704
+ param {
705
+ lr_mult: 2.0
706
+ decay_mult: 0.0
707
+ }
708
+ convolution_param {
709
+ num_output: 512
710
+ pad: 1
711
+ kernel_size: 3
712
+ group: 512
713
+ engine: CAFFE
714
+ weight_filler {
715
+ type: "msra"
716
+ }
717
+ bias_filler {
718
+ type: "constant"
719
+ value: 0.0
720
+ }
721
+ }
722
+ }
723
+ layer {
724
+ name: "conv11/dw/relu"
725
+ type: "ReLU"
726
+ bottom: "conv11/dw"
727
+ top: "conv11/dw"
728
+ }
729
+ layer {
730
+ name: "conv11"
731
+ type: "Convolution"
732
+ bottom: "conv11/dw"
733
+ top: "conv11"
734
+ param {
735
+ lr_mult: 1.0
736
+ decay_mult: 1.0
737
+ }
738
+ param {
739
+ lr_mult: 2.0
740
+ decay_mult: 0.0
741
+ }
742
+ convolution_param {
743
+ num_output: 512
744
+ kernel_size: 1
745
+ weight_filler {
746
+ type: "msra"
747
+ }
748
+ bias_filler {
749
+ type: "constant"
750
+ value: 0.0
751
+ }
752
+ }
753
+ }
754
+ layer {
755
+ name: "conv11/relu"
756
+ type: "ReLU"
757
+ bottom: "conv11"
758
+ top: "conv11"
759
+ }
760
+ layer {
761
+ name: "conv12/dw"
762
+ type: "Convolution"
763
+ bottom: "conv11"
764
+ top: "conv12/dw"
765
+ param {
766
+ lr_mult: 1.0
767
+ decay_mult: 1.0
768
+ }
769
+ param {
770
+ lr_mult: 2.0
771
+ decay_mult: 0.0
772
+ }
773
+ convolution_param {
774
+ num_output: 512
775
+ pad: 1
776
+ kernel_size: 3
777
+ stride: 2
778
+ group: 512
779
+ engine: CAFFE
780
+ weight_filler {
781
+ type: "msra"
782
+ }
783
+ bias_filler {
784
+ type: "constant"
785
+ value: 0.0
786
+ }
787
+ }
788
+ }
789
+ layer {
790
+ name: "conv12/dw/relu"
791
+ type: "ReLU"
792
+ bottom: "conv12/dw"
793
+ top: "conv12/dw"
794
+ }
795
+ layer {
796
+ name: "conv12"
797
+ type: "Convolution"
798
+ bottom: "conv12/dw"
799
+ top: "conv12"
800
+ param {
801
+ lr_mult: 1.0
802
+ decay_mult: 1.0
803
+ }
804
+ param {
805
+ lr_mult: 2.0
806
+ decay_mult: 0.0
807
+ }
808
+ convolution_param {
809
+ num_output: 1024
810
+ kernel_size: 1
811
+ weight_filler {
812
+ type: "msra"
813
+ }
814
+ bias_filler {
815
+ type: "constant"
816
+ value: 0.0
817
+ }
818
+ }
819
+ }
820
+ layer {
821
+ name: "conv12/relu"
822
+ type: "ReLU"
823
+ bottom: "conv12"
824
+ top: "conv12"
825
+ }
826
+ layer {
827
+ name: "conv13/dw"
828
+ type: "Convolution"
829
+ bottom: "conv12"
830
+ top: "conv13/dw"
831
+ param {
832
+ lr_mult: 1.0
833
+ decay_mult: 1.0
834
+ }
835
+ param {
836
+ lr_mult: 2.0
837
+ decay_mult: 0.0
838
+ }
839
+ convolution_param {
840
+ num_output: 1024
841
+ pad: 1
842
+ kernel_size: 3
843
+ group: 1024
844
+ engine: CAFFE
845
+ weight_filler {
846
+ type: "msra"
847
+ }
848
+ bias_filler {
849
+ type: "constant"
850
+ value: 0.0
851
+ }
852
+ }
853
+ }
854
+ layer {
855
+ name: "conv13/dw/relu"
856
+ type: "ReLU"
857
+ bottom: "conv13/dw"
858
+ top: "conv13/dw"
859
+ }
860
+ layer {
861
+ name: "conv13"
862
+ type: "Convolution"
863
+ bottom: "conv13/dw"
864
+ top: "conv13"
865
+ param {
866
+ lr_mult: 1.0
867
+ decay_mult: 1.0
868
+ }
869
+ param {
870
+ lr_mult: 2.0
871
+ decay_mult: 0.0
872
+ }
873
+ convolution_param {
874
+ num_output: 1024
875
+ kernel_size: 1
876
+ weight_filler {
877
+ type: "msra"
878
+ }
879
+ bias_filler {
880
+ type: "constant"
881
+ value: 0.0
882
+ }
883
+ }
884
+ }
885
+ layer {
886
+ name: "conv13/relu"
887
+ type: "ReLU"
888
+ bottom: "conv13"
889
+ top: "conv13"
890
+ }
891
+ layer {
892
+ name: "conv14_1"
893
+ type: "Convolution"
894
+ bottom: "conv13"
895
+ top: "conv14_1"
896
+ param {
897
+ lr_mult: 1.0
898
+ decay_mult: 1.0
899
+ }
900
+ param {
901
+ lr_mult: 2.0
902
+ decay_mult: 0.0
903
+ }
904
+ convolution_param {
905
+ num_output: 256
906
+ kernel_size: 1
907
+ weight_filler {
908
+ type: "msra"
909
+ }
910
+ bias_filler {
911
+ type: "constant"
912
+ value: 0.0
913
+ }
914
+ }
915
+ }
916
+ layer {
917
+ name: "conv14_1/relu"
918
+ type: "ReLU"
919
+ bottom: "conv14_1"
920
+ top: "conv14_1"
921
+ }
922
+ layer {
923
+ name: "conv14_2"
924
+ type: "Convolution"
925
+ bottom: "conv14_1"
926
+ top: "conv14_2"
927
+ param {
928
+ lr_mult: 1.0
929
+ decay_mult: 1.0
930
+ }
931
+ param {
932
+ lr_mult: 2.0
933
+ decay_mult: 0.0
934
+ }
935
+ convolution_param {
936
+ num_output: 512
937
+ pad: 1
938
+ kernel_size: 3
939
+ stride: 2
940
+ weight_filler {
941
+ type: "msra"
942
+ }
943
+ bias_filler {
944
+ type: "constant"
945
+ value: 0.0
946
+ }
947
+ }
948
+ }
949
+ layer {
950
+ name: "conv14_2/relu"
951
+ type: "ReLU"
952
+ bottom: "conv14_2"
953
+ top: "conv14_2"
954
+ }
955
+ layer {
956
+ name: "conv15_1"
957
+ type: "Convolution"
958
+ bottom: "conv14_2"
959
+ top: "conv15_1"
960
+ param {
961
+ lr_mult: 1.0
962
+ decay_mult: 1.0
963
+ }
964
+ param {
965
+ lr_mult: 2.0
966
+ decay_mult: 0.0
967
+ }
968
+ convolution_param {
969
+ num_output: 128
970
+ kernel_size: 1
971
+ weight_filler {
972
+ type: "msra"
973
+ }
974
+ bias_filler {
975
+ type: "constant"
976
+ value: 0.0
977
+ }
978
+ }
979
+ }
980
+ layer {
981
+ name: "conv15_1/relu"
982
+ type: "ReLU"
983
+ bottom: "conv15_1"
984
+ top: "conv15_1"
985
+ }
986
+ layer {
987
+ name: "conv15_2"
988
+ type: "Convolution"
989
+ bottom: "conv15_1"
990
+ top: "conv15_2"
991
+ param {
992
+ lr_mult: 1.0
993
+ decay_mult: 1.0
994
+ }
995
+ param {
996
+ lr_mult: 2.0
997
+ decay_mult: 0.0
998
+ }
999
+ convolution_param {
1000
+ num_output: 256
1001
+ pad: 1
1002
+ kernel_size: 3
1003
+ stride: 2
1004
+ weight_filler {
1005
+ type: "msra"
1006
+ }
1007
+ bias_filler {
1008
+ type: "constant"
1009
+ value: 0.0
1010
+ }
1011
+ }
1012
+ }
1013
+ layer {
1014
+ name: "conv15_2/relu"
1015
+ type: "ReLU"
1016
+ bottom: "conv15_2"
1017
+ top: "conv15_2"
1018
+ }
1019
+ layer {
1020
+ name: "conv16_1"
1021
+ type: "Convolution"
1022
+ bottom: "conv15_2"
1023
+ top: "conv16_1"
1024
+ param {
1025
+ lr_mult: 1.0
1026
+ decay_mult: 1.0
1027
+ }
1028
+ param {
1029
+ lr_mult: 2.0
1030
+ decay_mult: 0.0
1031
+ }
1032
+ convolution_param {
1033
+ num_output: 128
1034
+ kernel_size: 1
1035
+ weight_filler {
1036
+ type: "msra"
1037
+ }
1038
+ bias_filler {
1039
+ type: "constant"
1040
+ value: 0.0
1041
+ }
1042
+ }
1043
+ }
1044
+ layer {
1045
+ name: "conv16_1/relu"
1046
+ type: "ReLU"
1047
+ bottom: "conv16_1"
1048
+ top: "conv16_1"
1049
+ }
1050
+ layer {
1051
+ name: "conv16_2"
1052
+ type: "Convolution"
1053
+ bottom: "conv16_1"
1054
+ top: "conv16_2"
1055
+ param {
1056
+ lr_mult: 1.0
1057
+ decay_mult: 1.0
1058
+ }
1059
+ param {
1060
+ lr_mult: 2.0
1061
+ decay_mult: 0.0
1062
+ }
1063
+ convolution_param {
1064
+ num_output: 256
1065
+ pad: 1
1066
+ kernel_size: 3
1067
+ stride: 2
1068
+ weight_filler {
1069
+ type: "msra"
1070
+ }
1071
+ bias_filler {
1072
+ type: "constant"
1073
+ value: 0.0
1074
+ }
1075
+ }
1076
+ }
1077
+ layer {
1078
+ name: "conv16_2/relu"
1079
+ type: "ReLU"
1080
+ bottom: "conv16_2"
1081
+ top: "conv16_2"
1082
+ }
1083
+ layer {
1084
+ name: "conv17_1"
1085
+ type: "Convolution"
1086
+ bottom: "conv16_2"
1087
+ top: "conv17_1"
1088
+ param {
1089
+ lr_mult: 1.0
1090
+ decay_mult: 1.0
1091
+ }
1092
+ param {
1093
+ lr_mult: 2.0
1094
+ decay_mult: 0.0
1095
+ }
1096
+ convolution_param {
1097
+ num_output: 64
1098
+ kernel_size: 1
1099
+ weight_filler {
1100
+ type: "msra"
1101
+ }
1102
+ bias_filler {
1103
+ type: "constant"
1104
+ value: 0.0
1105
+ }
1106
+ }
1107
+ }
1108
+ layer {
1109
+ name: "conv17_1/relu"
1110
+ type: "ReLU"
1111
+ bottom: "conv17_1"
1112
+ top: "conv17_1"
1113
+ }
1114
+ layer {
1115
+ name: "conv17_2"
1116
+ type: "Convolution"
1117
+ bottom: "conv17_1"
1118
+ top: "conv17_2"
1119
+ param {
1120
+ lr_mult: 1.0
1121
+ decay_mult: 1.0
1122
+ }
1123
+ param {
1124
+ lr_mult: 2.0
1125
+ decay_mult: 0.0
1126
+ }
1127
+ convolution_param {
1128
+ num_output: 128
1129
+ pad: 1
1130
+ kernel_size: 3
1131
+ stride: 2
1132
+ weight_filler {
1133
+ type: "msra"
1134
+ }
1135
+ bias_filler {
1136
+ type: "constant"
1137
+ value: 0.0
1138
+ }
1139
+ }
1140
+ }
1141
+ layer {
1142
+ name: "conv17_2/relu"
1143
+ type: "ReLU"
1144
+ bottom: "conv17_2"
1145
+ top: "conv17_2"
1146
+ }
1147
+ layer {
1148
+ name: "conv11_mbox_loc"
1149
+ type: "Convolution"
1150
+ bottom: "conv11"
1151
+ top: "conv11_mbox_loc"
1152
+ param {
1153
+ lr_mult: 1.0
1154
+ decay_mult: 1.0
1155
+ }
1156
+ param {
1157
+ lr_mult: 2.0
1158
+ decay_mult: 0.0
1159
+ }
1160
+ convolution_param {
1161
+ num_output: 12
1162
+ kernel_size: 1
1163
+ weight_filler {
1164
+ type: "msra"
1165
+ }
1166
+ bias_filler {
1167
+ type: "constant"
1168
+ value: 0.0
1169
+ }
1170
+ }
1171
+ }
1172
+ layer {
1173
+ name: "conv11_mbox_loc_perm"
1174
+ type: "Permute"
1175
+ bottom: "conv11_mbox_loc"
1176
+ top: "conv11_mbox_loc_perm"
1177
+ permute_param {
1178
+ order: 0
1179
+ order: 2
1180
+ order: 3
1181
+ order: 1
1182
+ }
1183
+ }
1184
+ layer {
1185
+ name: "conv11_mbox_loc_flat"
1186
+ type: "Flatten"
1187
+ bottom: "conv11_mbox_loc_perm"
1188
+ top: "conv11_mbox_loc_flat"
1189
+ flatten_param {
1190
+ axis: 1
1191
+ }
1192
+ }
1193
+ layer {
1194
+ name: "conv11_mbox_conf"
1195
+ type: "Convolution"
1196
+ bottom: "conv11"
1197
+ top: "conv11_mbox_conf"
1198
+ param {
1199
+ lr_mult: 1.0
1200
+ decay_mult: 1.0
1201
+ }
1202
+ param {
1203
+ lr_mult: 2.0
1204
+ decay_mult: 0.0
1205
+ }
1206
+ convolution_param {
1207
+ num_output: 63
1208
+ kernel_size: 1
1209
+ weight_filler {
1210
+ type: "msra"
1211
+ }
1212
+ bias_filler {
1213
+ type: "constant"
1214
+ value: 0.0
1215
+ }
1216
+ }
1217
+ }
1218
+ layer {
1219
+ name: "conv11_mbox_conf_perm"
1220
+ type: "Permute"
1221
+ bottom: "conv11_mbox_conf"
1222
+ top: "conv11_mbox_conf_perm"
1223
+ permute_param {
1224
+ order: 0
1225
+ order: 2
1226
+ order: 3
1227
+ order: 1
1228
+ }
1229
+ }
1230
+ layer {
1231
+ name: "conv11_mbox_conf_flat"
1232
+ type: "Flatten"
1233
+ bottom: "conv11_mbox_conf_perm"
1234
+ top: "conv11_mbox_conf_flat"
1235
+ flatten_param {
1236
+ axis: 1
1237
+ }
1238
+ }
1239
+ layer {
1240
+ name: "conv11_mbox_priorbox"
1241
+ type: "PriorBox"
1242
+ bottom: "conv11"
1243
+ bottom: "data"
1244
+ top: "conv11_mbox_priorbox"
1245
+ prior_box_param {
1246
+ min_size: 60.0
1247
+ aspect_ratio: 2.0
1248
+ flip: true
1249
+ clip: false
1250
+ variance: 0.1
1251
+ variance: 0.1
1252
+ variance: 0.2
1253
+ variance: 0.2
1254
+ offset: 0.5
1255
+ }
1256
+ }
1257
+ layer {
1258
+ name: "conv13_mbox_loc"
1259
+ type: "Convolution"
1260
+ bottom: "conv13"
1261
+ top: "conv13_mbox_loc"
1262
+ param {
1263
+ lr_mult: 1.0
1264
+ decay_mult: 1.0
1265
+ }
1266
+ param {
1267
+ lr_mult: 2.0
1268
+ decay_mult: 0.0
1269
+ }
1270
+ convolution_param {
1271
+ num_output: 24
1272
+ kernel_size: 1
1273
+ weight_filler {
1274
+ type: "msra"
1275
+ }
1276
+ bias_filler {
1277
+ type: "constant"
1278
+ value: 0.0
1279
+ }
1280
+ }
1281
+ }
1282
+ layer {
1283
+ name: "conv13_mbox_loc_perm"
1284
+ type: "Permute"
1285
+ bottom: "conv13_mbox_loc"
1286
+ top: "conv13_mbox_loc_perm"
1287
+ permute_param {
1288
+ order: 0
1289
+ order: 2
1290
+ order: 3
1291
+ order: 1
1292
+ }
1293
+ }
1294
+ layer {
1295
+ name: "conv13_mbox_loc_flat"
1296
+ type: "Flatten"
1297
+ bottom: "conv13_mbox_loc_perm"
1298
+ top: "conv13_mbox_loc_flat"
1299
+ flatten_param {
1300
+ axis: 1
1301
+ }
1302
+ }
1303
+ layer {
1304
+ name: "conv13_mbox_conf"
1305
+ type: "Convolution"
1306
+ bottom: "conv13"
1307
+ top: "conv13_mbox_conf"
1308
+ param {
1309
+ lr_mult: 1.0
1310
+ decay_mult: 1.0
1311
+ }
1312
+ param {
1313
+ lr_mult: 2.0
1314
+ decay_mult: 0.0
1315
+ }
1316
+ convolution_param {
1317
+ num_output: 126
1318
+ kernel_size: 1
1319
+ weight_filler {
1320
+ type: "msra"
1321
+ }
1322
+ bias_filler {
1323
+ type: "constant"
1324
+ value: 0.0
1325
+ }
1326
+ }
1327
+ }
1328
+ layer {
1329
+ name: "conv13_mbox_conf_perm"
1330
+ type: "Permute"
1331
+ bottom: "conv13_mbox_conf"
1332
+ top: "conv13_mbox_conf_perm"
1333
+ permute_param {
1334
+ order: 0
1335
+ order: 2
1336
+ order: 3
1337
+ order: 1
1338
+ }
1339
+ }
1340
+ layer {
1341
+ name: "conv13_mbox_conf_flat"
1342
+ type: "Flatten"
1343
+ bottom: "conv13_mbox_conf_perm"
1344
+ top: "conv13_mbox_conf_flat"
1345
+ flatten_param {
1346
+ axis: 1
1347
+ }
1348
+ }
1349
+ layer {
1350
+ name: "conv13_mbox_priorbox"
1351
+ type: "PriorBox"
1352
+ bottom: "conv13"
1353
+ bottom: "data"
1354
+ top: "conv13_mbox_priorbox"
1355
+ prior_box_param {
1356
+ min_size: 105.0
1357
+ max_size: 150.0
1358
+ aspect_ratio: 2.0
1359
+ aspect_ratio: 3.0
1360
+ flip: true
1361
+ clip: false
1362
+ variance: 0.1
1363
+ variance: 0.1
1364
+ variance: 0.2
1365
+ variance: 0.2
1366
+ offset: 0.5
1367
+ }
1368
+ }
1369
+ layer {
1370
+ name: "conv14_2_mbox_loc"
1371
+ type: "Convolution"
1372
+ bottom: "conv14_2"
1373
+ top: "conv14_2_mbox_loc"
1374
+ param {
1375
+ lr_mult: 1.0
1376
+ decay_mult: 1.0
1377
+ }
1378
+ param {
1379
+ lr_mult: 2.0
1380
+ decay_mult: 0.0
1381
+ }
1382
+ convolution_param {
1383
+ num_output: 24
1384
+ kernel_size: 1
1385
+ weight_filler {
1386
+ type: "msra"
1387
+ }
1388
+ bias_filler {
1389
+ type: "constant"
1390
+ value: 0.0
1391
+ }
1392
+ }
1393
+ }
1394
+ layer {
1395
+ name: "conv14_2_mbox_loc_perm"
1396
+ type: "Permute"
1397
+ bottom: "conv14_2_mbox_loc"
1398
+ top: "conv14_2_mbox_loc_perm"
1399
+ permute_param {
1400
+ order: 0
1401
+ order: 2
1402
+ order: 3
1403
+ order: 1
1404
+ }
1405
+ }
1406
+ layer {
1407
+ name: "conv14_2_mbox_loc_flat"
1408
+ type: "Flatten"
1409
+ bottom: "conv14_2_mbox_loc_perm"
1410
+ top: "conv14_2_mbox_loc_flat"
1411
+ flatten_param {
1412
+ axis: 1
1413
+ }
1414
+ }
1415
+ layer {
1416
+ name: "conv14_2_mbox_conf"
1417
+ type: "Convolution"
1418
+ bottom: "conv14_2"
1419
+ top: "conv14_2_mbox_conf"
1420
+ param {
1421
+ lr_mult: 1.0
1422
+ decay_mult: 1.0
1423
+ }
1424
+ param {
1425
+ lr_mult: 2.0
1426
+ decay_mult: 0.0
1427
+ }
1428
+ convolution_param {
1429
+ num_output: 126
1430
+ kernel_size: 1
1431
+ weight_filler {
1432
+ type: "msra"
1433
+ }
1434
+ bias_filler {
1435
+ type: "constant"
1436
+ value: 0.0
1437
+ }
1438
+ }
1439
+ }
1440
+ layer {
1441
+ name: "conv14_2_mbox_conf_perm"
1442
+ type: "Permute"
1443
+ bottom: "conv14_2_mbox_conf"
1444
+ top: "conv14_2_mbox_conf_perm"
1445
+ permute_param {
1446
+ order: 0
1447
+ order: 2
1448
+ order: 3
1449
+ order: 1
1450
+ }
1451
+ }
1452
+ layer {
1453
+ name: "conv14_2_mbox_conf_flat"
1454
+ type: "Flatten"
1455
+ bottom: "conv14_2_mbox_conf_perm"
1456
+ top: "conv14_2_mbox_conf_flat"
1457
+ flatten_param {
1458
+ axis: 1
1459
+ }
1460
+ }
1461
+ layer {
1462
+ name: "conv14_2_mbox_priorbox"
1463
+ type: "PriorBox"
1464
+ bottom: "conv14_2"
1465
+ bottom: "data"
1466
+ top: "conv14_2_mbox_priorbox"
1467
+ prior_box_param {
1468
+ min_size: 150.0
1469
+ max_size: 195.0
1470
+ aspect_ratio: 2.0
1471
+ aspect_ratio: 3.0
1472
+ flip: true
1473
+ clip: false
1474
+ variance: 0.1
1475
+ variance: 0.1
1476
+ variance: 0.2
1477
+ variance: 0.2
1478
+ offset: 0.5
1479
+ }
1480
+ }
1481
+ layer {
1482
+ name: "conv15_2_mbox_loc"
1483
+ type: "Convolution"
1484
+ bottom: "conv15_2"
1485
+ top: "conv15_2_mbox_loc"
1486
+ param {
1487
+ lr_mult: 1.0
1488
+ decay_mult: 1.0
1489
+ }
1490
+ param {
1491
+ lr_mult: 2.0
1492
+ decay_mult: 0.0
1493
+ }
1494
+ convolution_param {
1495
+ num_output: 24
1496
+ kernel_size: 1
1497
+ weight_filler {
1498
+ type: "msra"
1499
+ }
1500
+ bias_filler {
1501
+ type: "constant"
1502
+ value: 0.0
1503
+ }
1504
+ }
1505
+ }
1506
+ layer {
1507
+ name: "conv15_2_mbox_loc_perm"
1508
+ type: "Permute"
1509
+ bottom: "conv15_2_mbox_loc"
1510
+ top: "conv15_2_mbox_loc_perm"
1511
+ permute_param {
1512
+ order: 0
1513
+ order: 2
1514
+ order: 3
1515
+ order: 1
1516
+ }
1517
+ }
1518
+ layer {
1519
+ name: "conv15_2_mbox_loc_flat"
1520
+ type: "Flatten"
1521
+ bottom: "conv15_2_mbox_loc_perm"
1522
+ top: "conv15_2_mbox_loc_flat"
1523
+ flatten_param {
1524
+ axis: 1
1525
+ }
1526
+ }
1527
+ layer {
1528
+ name: "conv15_2_mbox_conf"
1529
+ type: "Convolution"
1530
+ bottom: "conv15_2"
1531
+ top: "conv15_2_mbox_conf"
1532
+ param {
1533
+ lr_mult: 1.0
1534
+ decay_mult: 1.0
1535
+ }
1536
+ param {
1537
+ lr_mult: 2.0
1538
+ decay_mult: 0.0
1539
+ }
1540
+ convolution_param {
1541
+ num_output: 126
1542
+ kernel_size: 1
1543
+ weight_filler {
1544
+ type: "msra"
1545
+ }
1546
+ bias_filler {
1547
+ type: "constant"
1548
+ value: 0.0
1549
+ }
1550
+ }
1551
+ }
1552
+ layer {
1553
+ name: "conv15_2_mbox_conf_perm"
1554
+ type: "Permute"
1555
+ bottom: "conv15_2_mbox_conf"
1556
+ top: "conv15_2_mbox_conf_perm"
1557
+ permute_param {
1558
+ order: 0
1559
+ order: 2
1560
+ order: 3
1561
+ order: 1
1562
+ }
1563
+ }
1564
+ layer {
1565
+ name: "conv15_2_mbox_conf_flat"
1566
+ type: "Flatten"
1567
+ bottom: "conv15_2_mbox_conf_perm"
1568
+ top: "conv15_2_mbox_conf_flat"
1569
+ flatten_param {
1570
+ axis: 1
1571
+ }
1572
+ }
1573
+ layer {
1574
+ name: "conv15_2_mbox_priorbox"
1575
+ type: "PriorBox"
1576
+ bottom: "conv15_2"
1577
+ bottom: "data"
1578
+ top: "conv15_2_mbox_priorbox"
1579
+ prior_box_param {
1580
+ min_size: 195.0
1581
+ max_size: 240.0
1582
+ aspect_ratio: 2.0
1583
+ aspect_ratio: 3.0
1584
+ flip: true
1585
+ clip: false
1586
+ variance: 0.1
1587
+ variance: 0.1
1588
+ variance: 0.2
1589
+ variance: 0.2
1590
+ offset: 0.5
1591
+ }
1592
+ }
1593
+ layer {
1594
+ name: "conv16_2_mbox_loc"
1595
+ type: "Convolution"
1596
+ bottom: "conv16_2"
1597
+ top: "conv16_2_mbox_loc"
1598
+ param {
1599
+ lr_mult: 1.0
1600
+ decay_mult: 1.0
1601
+ }
1602
+ param {
1603
+ lr_mult: 2.0
1604
+ decay_mult: 0.0
1605
+ }
1606
+ convolution_param {
1607
+ num_output: 24
1608
+ kernel_size: 1
1609
+ weight_filler {
1610
+ type: "msra"
1611
+ }
1612
+ bias_filler {
1613
+ type: "constant"
1614
+ value: 0.0
1615
+ }
1616
+ }
1617
+ }
1618
+ layer {
1619
+ name: "conv16_2_mbox_loc_perm"
1620
+ type: "Permute"
1621
+ bottom: "conv16_2_mbox_loc"
1622
+ top: "conv16_2_mbox_loc_perm"
1623
+ permute_param {
1624
+ order: 0
1625
+ order: 2
1626
+ order: 3
1627
+ order: 1
1628
+ }
1629
+ }
1630
+ layer {
1631
+ name: "conv16_2_mbox_loc_flat"
1632
+ type: "Flatten"
1633
+ bottom: "conv16_2_mbox_loc_perm"
1634
+ top: "conv16_2_mbox_loc_flat"
1635
+ flatten_param {
1636
+ axis: 1
1637
+ }
1638
+ }
1639
+ layer {
1640
+ name: "conv16_2_mbox_conf"
1641
+ type: "Convolution"
1642
+ bottom: "conv16_2"
1643
+ top: "conv16_2_mbox_conf"
1644
+ param {
1645
+ lr_mult: 1.0
1646
+ decay_mult: 1.0
1647
+ }
1648
+ param {
1649
+ lr_mult: 2.0
1650
+ decay_mult: 0.0
1651
+ }
1652
+ convolution_param {
1653
+ num_output: 126
1654
+ kernel_size: 1
1655
+ weight_filler {
1656
+ type: "msra"
1657
+ }
1658
+ bias_filler {
1659
+ type: "constant"
1660
+ value: 0.0
1661
+ }
1662
+ }
1663
+ }
1664
+ layer {
1665
+ name: "conv16_2_mbox_conf_perm"
1666
+ type: "Permute"
1667
+ bottom: "conv16_2_mbox_conf"
1668
+ top: "conv16_2_mbox_conf_perm"
1669
+ permute_param {
1670
+ order: 0
1671
+ order: 2
1672
+ order: 3
1673
+ order: 1
1674
+ }
1675
+ }
1676
+ layer {
1677
+ name: "conv16_2_mbox_conf_flat"
1678
+ type: "Flatten"
1679
+ bottom: "conv16_2_mbox_conf_perm"
1680
+ top: "conv16_2_mbox_conf_flat"
1681
+ flatten_param {
1682
+ axis: 1
1683
+ }
1684
+ }
1685
+ layer {
1686
+ name: "conv16_2_mbox_priorbox"
1687
+ type: "PriorBox"
1688
+ bottom: "conv16_2"
1689
+ bottom: "data"
1690
+ top: "conv16_2_mbox_priorbox"
1691
+ prior_box_param {
1692
+ min_size: 240.0
1693
+ max_size: 285.0
1694
+ aspect_ratio: 2.0
1695
+ aspect_ratio: 3.0
1696
+ flip: true
1697
+ clip: false
1698
+ variance: 0.1
1699
+ variance: 0.1
1700
+ variance: 0.2
1701
+ variance: 0.2
1702
+ offset: 0.5
1703
+ }
1704
+ }
1705
+ layer {
1706
+ name: "conv17_2_mbox_loc"
1707
+ type: "Convolution"
1708
+ bottom: "conv17_2"
1709
+ top: "conv17_2_mbox_loc"
1710
+ param {
1711
+ lr_mult: 1.0
1712
+ decay_mult: 1.0
1713
+ }
1714
+ param {
1715
+ lr_mult: 2.0
1716
+ decay_mult: 0.0
1717
+ }
1718
+ convolution_param {
1719
+ num_output: 24
1720
+ kernel_size: 1
1721
+ weight_filler {
1722
+ type: "msra"
1723
+ }
1724
+ bias_filler {
1725
+ type: "constant"
1726
+ value: 0.0
1727
+ }
1728
+ }
1729
+ }
1730
+ layer {
1731
+ name: "conv17_2_mbox_loc_perm"
1732
+ type: "Permute"
1733
+ bottom: "conv17_2_mbox_loc"
1734
+ top: "conv17_2_mbox_loc_perm"
1735
+ permute_param {
1736
+ order: 0
1737
+ order: 2
1738
+ order: 3
1739
+ order: 1
1740
+ }
1741
+ }
1742
+ layer {
1743
+ name: "conv17_2_mbox_loc_flat"
1744
+ type: "Flatten"
1745
+ bottom: "conv17_2_mbox_loc_perm"
1746
+ top: "conv17_2_mbox_loc_flat"
1747
+ flatten_param {
1748
+ axis: 1
1749
+ }
1750
+ }
1751
+ layer {
1752
+ name: "conv17_2_mbox_conf"
1753
+ type: "Convolution"
1754
+ bottom: "conv17_2"
1755
+ top: "conv17_2_mbox_conf"
1756
+ param {
1757
+ lr_mult: 1.0
1758
+ decay_mult: 1.0
1759
+ }
1760
+ param {
1761
+ lr_mult: 2.0
1762
+ decay_mult: 0.0
1763
+ }
1764
+ convolution_param {
1765
+ num_output: 126
1766
+ kernel_size: 1
1767
+ weight_filler {
1768
+ type: "msra"
1769
+ }
1770
+ bias_filler {
1771
+ type: "constant"
1772
+ value: 0.0
1773
+ }
1774
+ }
1775
+ }
1776
+ layer {
1777
+ name: "conv17_2_mbox_conf_perm"
1778
+ type: "Permute"
1779
+ bottom: "conv17_2_mbox_conf"
1780
+ top: "conv17_2_mbox_conf_perm"
1781
+ permute_param {
1782
+ order: 0
1783
+ order: 2
1784
+ order: 3
1785
+ order: 1
1786
+ }
1787
+ }
1788
+ layer {
1789
+ name: "conv17_2_mbox_conf_flat"
1790
+ type: "Flatten"
1791
+ bottom: "conv17_2_mbox_conf_perm"
1792
+ top: "conv17_2_mbox_conf_flat"
1793
+ flatten_param {
1794
+ axis: 1
1795
+ }
1796
+ }
1797
+ layer {
1798
+ name: "conv17_2_mbox_priorbox"
1799
+ type: "PriorBox"
1800
+ bottom: "conv17_2"
1801
+ bottom: "data"
1802
+ top: "conv17_2_mbox_priorbox"
1803
+ prior_box_param {
1804
+ min_size: 285.0
1805
+ max_size: 300.0
1806
+ aspect_ratio: 2.0
1807
+ aspect_ratio: 3.0
1808
+ flip: true
1809
+ clip: false
1810
+ variance: 0.1
1811
+ variance: 0.1
1812
+ variance: 0.2
1813
+ variance: 0.2
1814
+ offset: 0.5
1815
+ }
1816
+ }
1817
+ layer {
1818
+ name: "mbox_loc"
1819
+ type: "Concat"
1820
+ bottom: "conv11_mbox_loc_flat"
1821
+ bottom: "conv13_mbox_loc_flat"
1822
+ bottom: "conv14_2_mbox_loc_flat"
1823
+ bottom: "conv15_2_mbox_loc_flat"
1824
+ bottom: "conv16_2_mbox_loc_flat"
1825
+ bottom: "conv17_2_mbox_loc_flat"
1826
+ top: "mbox_loc"
1827
+ concat_param {
1828
+ axis: 1
1829
+ }
1830
+ }
1831
+ layer {
1832
+ name: "mbox_conf"
1833
+ type: "Concat"
1834
+ bottom: "conv11_mbox_conf_flat"
1835
+ bottom: "conv13_mbox_conf_flat"
1836
+ bottom: "conv14_2_mbox_conf_flat"
1837
+ bottom: "conv15_2_mbox_conf_flat"
1838
+ bottom: "conv16_2_mbox_conf_flat"
1839
+ bottom: "conv17_2_mbox_conf_flat"
1840
+ top: "mbox_conf"
1841
+ concat_param {
1842
+ axis: 1
1843
+ }
1844
+ }
1845
+ layer {
1846
+ name: "mbox_priorbox"
1847
+ type: "Concat"
1848
+ bottom: "conv11_mbox_priorbox"
1849
+ bottom: "conv13_mbox_priorbox"
1850
+ bottom: "conv14_2_mbox_priorbox"
1851
+ bottom: "conv15_2_mbox_priorbox"
1852
+ bottom: "conv16_2_mbox_priorbox"
1853
+ bottom: "conv17_2_mbox_priorbox"
1854
+ top: "mbox_priorbox"
1855
+ concat_param {
1856
+ axis: 2
1857
+ }
1858
+ }
1859
+ layer {
1860
+ name: "mbox_conf_reshape"
1861
+ type: "Reshape"
1862
+ bottom: "mbox_conf"
1863
+ top: "mbox_conf_reshape"
1864
+ reshape_param {
1865
+ shape {
1866
+ dim: 0
1867
+ dim: -1
1868
+ dim: 21
1869
+ }
1870
+ }
1871
+ }
1872
+ layer {
1873
+ name: "mbox_conf_softmax"
1874
+ type: "Softmax"
1875
+ bottom: "mbox_conf_reshape"
1876
+ top: "mbox_conf_softmax"
1877
+ softmax_param {
1878
+ axis: 2
1879
+ }
1880
+ }
1881
+ layer {
1882
+ name: "mbox_conf_flatten"
1883
+ type: "Flatten"
1884
+ bottom: "mbox_conf_softmax"
1885
+ top: "mbox_conf_flatten"
1886
+ flatten_param {
1887
+ axis: 1
1888
+ }
1889
+ }
1890
+ layer {
1891
+ name: "detection_out"
1892
+ type: "DetectionOutput"
1893
+ bottom: "mbox_loc"
1894
+ bottom: "mbox_conf_flatten"
1895
+ bottom: "mbox_priorbox"
1896
+ top: "detection_out"
1897
+ include {
1898
+ phase: TEST
1899
+ }
1900
+ detection_output_param {
1901
+ num_classes: 21
1902
+ share_location: true
1903
+ background_label_id: 0
1904
+ nms_param {
1905
+ nms_threshold: 0.45
1906
+ top_k: 100
1907
+ }
1908
+ code_type: CENTER_SIZE
1909
+ keep_top_k: 100
1910
+ confidence_threshold: 0.25
1911
+ }
1912
+ }
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Anycoder 5932b618
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.0.0
8
- app_file: app.py
9
- pinned: false
10
- tags:
11
- - anycoder
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Anycoder 5932b618
3
+ emoji: 💻
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 6.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ tags:
11
+ - anycoder
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -4,237 +4,58 @@ from PIL import Image, ImageDraw
4
  import json
5
  from typing import Tuple, List, Dict, Any
6
  import time
 
 
7
 
8
- # Try to import cv2, but make it optional
9
- try:
10
- import cv2
11
- CV2_AVAILABLE = True
12
- except ImportError:
13
- CV2_AVAILABLE = False
14
- print("Warning: OpenCV (cv2) not available. Using fallback image processing.")
15
 
16
- def load_detection_models():
17
- """Load detection models or return mock models if cv2 is not available."""
18
- if CV2_AVAILABLE:
19
- try:
20
- # Load face cascade
21
- face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
22
-
23
- # Load object detection model (MobileNet SSD)
24
- model_path = "MobileNetSSD_deploy.prototxt"
25
- weights_path = "MobileNetSSD_deploy.caffemodel"
26
-
27
- # Try to load the model, fall back to mock if not available
28
- try:
29
- object_net = cv2.dnn.readNetFromCaffe(model_path, weights_path)
30
- object_classes = [
31
- "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
32
- "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
33
- "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
34
- "train", "tvmonitor"
35
- ]
36
- except:
37
- object_net = None
38
- object_classes = [
39
- "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
40
- "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
41
- "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
42
- "train", "tvmonitor"
43
- ]
44
-
45
- return face_cascade, object_net, object_classes
46
- except Exception as e:
47
- print(f"Error loading models: {e}")
48
- return None, None, []
49
- else:
50
- # Return mock models for PIL-based processing
51
- return None, None, []
52
-
53
- def detect_faces_pil(image: np.ndarray, confidence: float) -> List[Dict[str, Any]]:
54
- """Simple face detection simulation using PIL (fallback when cv2 not available)."""
55
- try:
56
- pil_image = Image.fromarray(image)
57
- width, height = pil_image.size
58
-
59
- # Simulate face detection with random bounding boxes
60
- # In a real scenario, you'd use a face detection library that works with PIL
61
- faces = []
62
-
63
- # For demonstration, detect faces based on skin color approximation
64
- img_array = np.array(pil_image)
65
-
66
- # Simple skin color detection (very basic approximation)
67
- lower_skin = np.array([0, 48, 80], dtype=np.uint8)
68
- upper_skin = np.array([20, 255, 255], dtype=np.uint8)
69
-
70
- # Convert to HSV for better color detection
71
- try:
72
- import colorsys
73
- # Simple heuristic: detect regions that might be faces
74
- # This is a placeholder - real face detection would require a proper model
75
- for i in range(0, min(3, np.random.randint(0, 3) + 1)): # Random 0-3 faces
76
- x = np.random.randint(0, max(1, width - 100))
77
- y = np.random.randint(0, max(1, height - 100))
78
- w = np.random.randint(50, min(150, width - x))
79
- h = np.random.randint(50, min(150, height - y))
80
-
81
- faces.append({
82
- "bbox": [x, y, w, h],
83
- "confidence": round(np.random.uniform(0.5, 0.95), 3),
84
- "label": "face"
85
- })
86
- except:
87
- pass
88
-
89
- return faces
90
- except Exception as e:
91
- print(f"Error in face detection: {e}")
92
- return []
93
-
94
- def detect_objects_pil(image: np.ndarray, confidence: float) -> List[Dict[str, Any]]:
95
- """Simple object detection simulation using PIL (fallback when cv2 not available)."""
96
- try:
97
- pil_image = Image.fromarray(image)
98
- width, height = pil_image.size
99
-
100
- # Simulate object detection
101
- objects = []
102
-
103
- # For demonstration, detect random objects
104
- object_classes = ["person", "car", "dog", "cat", "bottle", "chair", "laptop", "phone"]
105
-
106
- for i in range(0, min(5, np.random.randint(0, 5) + 1)): # Random 0-5 objects
107
- x = np.random.randint(0, max(1, width - 100))
108
- y = np.random.randint(0, max(1, height - 100))
109
- w = np.random.randint(50, min(150, width - x))
110
- h = np.random.randint(50, min(150, height - y))
111
- obj_class = np.random.choice(object_classes)
112
-
113
- objects.append({
114
- "bbox": [x, y, w, h],
115
- "confidence": round(np.random.uniform(0.4, 0.9), 3),
116
- "label": obj_class
117
- })
118
-
119
- return objects
120
- except Exception as e:
121
- print(f"Error in object detection: {e}")
122
- return []
123
-
124
- def detect_faces_cv2(image: np.ndarray, face_cascade, confidence: float) -> List[Dict[str, Any]]:
125
- """Face detection using OpenCV Haar Cascade."""
126
- try:
127
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
128
- faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
129
-
130
- face_results = []
131
- for (x, y, w, h) in faces:
132
- face_results.append({
133
- "bbox": [int(x), int(y), int(w), int(h)],
134
- "confidence": round(np.random.uniform(0.7, 0.95), 3), # Haar cascade doesn't provide confidence
135
- "label": "face"
136
- })
137
-
138
- return face_results
139
- except Exception as e:
140
- print(f"Error in face detection: {e}")
141
- return []
142
 
143
- def detect_objects_cv2(image: np.ndarray, net, classes, confidence: float) -> List[Dict[str, Any]]:
144
- """Object detection using OpenCV DNN."""
145
- try:
146
- if net is None:
147
- return []
148
-
149
- h, w = image.shape[:2]
150
-
151
- # Create blob from image
152
- blob = cv2.dnn.blobFromImage(image, 0.007843, (300, 300), 127.5)
153
- net.setInput(blob)
154
- detections = net.forward()
155
-
156
- objects = []
157
- for i in range(detections.shape[2]):
158
- confidence_score = detections[0, 0, i, 2]
159
-
160
- if confidence_score > confidence:
161
- idx = int(detections[0, 0, i, 1])
162
- if idx < len(classes):
163
- x1 = int(detections[0, 0, i, 3] * w)
164
- y1 = int(detections[0, 0, i, 4] * h)
165
- x2 = int(detections[0, 0, i, 5] * w)
166
- y2 = int(detections[0, 0, i, 6] * h)
167
-
168
- objects.append({
169
- "bbox": [x1, y1, x2 - x1, y2 - y1],
170
- "confidence": round(float(confidence_score), 3),
171
- "label": classes[idx]
172
- })
173
-
174
- return objects
175
- except Exception as e:
176
- print(f"Error in object detection: {e}")
177
- return []
178
 
179
- def process_image(image, face_cascade, object_net, object_classes, enable_face, enable_objects, face_conf, object_conf):
180
- """Process image and detect faces and objects."""
181
- face_results = []
182
- object_results = []
183
 
184
- if enable_face:
185
- if CV2_AVAILABLE and face_cascade is not None:
186
- face_results = detect_faces_cv2(image, face_cascade, face_conf)
187
- else:
188
- face_results = detect_faces_pil(image, face_conf)
 
 
189
 
190
- if enable_objects:
191
- if CV2_AVAILABLE and object_net is not None:
192
- object_results = detect_objects_cv2(image, object_net, object_classes, object_conf)
 
 
 
 
 
 
193
  else:
194
- object_results = detect_objects_pil(image, object_conf)
 
195
 
196
- return image.copy(), face_results, object_results
197
-
198
- def draw_detections(image, face_results, object_results, show_labels, box_color):
199
- """Draw detection boxes on image."""
200
- try:
201
- pil_image = Image.fromarray(image)
202
- draw = ImageDraw.Draw(pil_image)
203
-
204
- # Convert color name to RGB
205
- color_map = {
206
- "red": (255, 0, 0),
207
- "green": (0, 255, 0),
208
- "blue": (0, 0, 255),
209
- "yellow": (255, 255, 0),
210
- "purple": (128, 0, 128),
211
- "orange": (255, 165, 0)
212
- }
213
- color = color_map.get(box_color, (255, 0, 0))
214
-
215
- # Draw face boxes
216
- for face in face_results:
217
- x, y, w, h = face["bbox"]
218
- draw.rectangle([x, y, x + w, y + h], outline=color, width=3)
219
- if show_labels:
220
- label = f"Face {face.get('confidence', '')}"
221
- draw.text((x, y - 20), label, fill=color)
222
-
223
- # Draw object boxes
224
- for obj in object_results:
225
- x, y, w, h = obj["bbox"]
226
- draw.rectangle([x, y, x + w, y + h], outline=color, width=3)
227
- if show_labels:
228
- label = f"{obj['label']} {obj.get('confidence', '')}"
229
- draw.text((x, y - 20), label, fill=color)
230
 
231
- return np.array(pil_image)
232
- except Exception as e:
233
- print(f"Error drawing detections: {e}")
234
- return image
235
-
236
- # Load models at startup
237
- face_cascade, object_net, object_classes = load_detection_models()
238
 
239
  def recognize_face_and_objects(
240
  image: np.ndarray,
@@ -244,18 +65,34 @@ def recognize_face_and_objects(
244
  object_confidence: float,
245
  draw_boxes: bool,
246
  show_labels: bool,
247
- box_color: str
248
- ) -> Tuple[np.ndarray, str, str]:
 
 
 
 
 
 
249
  """
250
- Perform face and object detection on the input image.
251
  """
252
  if image is None:
253
- return None, "No image provided", "No image provided"
254
-
255
  # Convert PIL to numpy if needed
256
  if isinstance(image, Image.Image):
257
  image = np.array(image)
258
 
 
 
 
 
 
 
 
 
 
 
259
  # Process image
260
  processed_image, face_results, object_results = process_image(
261
  image,
@@ -268,6 +105,9 @@ def recognize_face_and_objects(
268
  object_confidence
269
  )
270
 
 
 
 
271
  # Draw detections if requested
272
  if draw_boxes:
273
  processed_image = draw_detections(
@@ -279,10 +119,10 @@ def recognize_face_and_objects(
279
  )
280
 
281
  # Convert results to JSON
282
- face_json = json.dumps(face_results, indent=2) if face_results else "No faces detected"
283
- object_json = json.dumps(object_results, indent=2) if object_results else "No objects detected"
284
 
285
- return processed_image, face_json, object_json
286
 
287
  def webcam_recognition(
288
  image: np.ndarray,
@@ -292,13 +132,28 @@ def webcam_recognition(
292
  object_confidence: float,
293
  draw_boxes: bool,
294
  show_labels: bool,
295
- box_color: str
 
 
 
 
 
 
296
  ) -> np.ndarray:
297
- """Real-time webcam recognition."""
298
  if image is None:
299
  return None
300
 
301
- processed_image, _, _ = recognize_face_and_objects(
 
 
 
 
 
 
 
 
 
302
  image,
303
  enable_face_detection,
304
  enable_object_detection,
@@ -306,7 +161,13 @@ def webcam_recognition(
306
  object_confidence,
307
  draw_boxes,
308
  show_labels,
309
- box_color
 
 
 
 
 
 
310
  )
311
 
312
  return processed_image
@@ -349,6 +210,28 @@ def get_detection_statistics() -> str:
349
  }
350
  return json.dumps(stats, indent=2)
351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  # Create custom CSS for better styling
353
  custom_css = """
354
  .main-container {
@@ -377,17 +260,31 @@ custom_css = """
377
  padding: 15px;
378
  margin-bottom: 20px;
379
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  """
381
 
382
- with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as demo:
 
383
  gr.Markdown("""
384
- # 🔍 Face & Object Recognition Platform
385
  Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
386
 
387
- Advanced computer vision platform for real-time face and object detection with customizable settings.
388
  """)
389
 
390
- # Show warning if OpenCV is not available
391
  if not CV2_AVAILABLE:
392
  with gr.Row():
393
  gr.Markdown("""
@@ -397,6 +294,17 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
397
  </div>
398
  """)
399
 
 
 
 
 
 
 
 
 
 
 
 
400
  with gr.Row():
401
  with gr.Column(scale=2):
402
  gr.Markdown("### 📤 Input Source")
@@ -417,7 +325,7 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
417
  streaming=True,
418
  height=400
419
  )
420
- gr.Markdown("*Webcam provides real-time detection (may have slight delay)*")
421
 
422
  with gr.Column(scale=1):
423
  gr.Markdown("### ⚙️ Detection Settings")
@@ -463,6 +371,14 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
463
  height=400,
464
  elem_classes=["image-container"]
465
  )
 
 
 
 
 
 
 
 
466
 
467
  with gr.Column():
468
  with gr.Tabs():
@@ -478,6 +394,44 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
478
  elem_classes=["result-panel"]
479
  )
480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  with gr.TabItem("ℹ️ Model Info"):
482
  model_info = gr.JSON(
483
  label="Detection Models Information",
@@ -486,6 +440,9 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
486
  )
487
 
488
  # Event handlers
 
 
 
489
  analyze_btn.click(
490
  fn=recognize_face_and_objects,
491
  inputs=[
@@ -496,9 +453,16 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
496
  object_conf,
497
  draw_boxes,
498
  show_labels,
499
- box_color
 
 
 
 
 
 
 
500
  ],
501
- outputs=[output_image, face_results, object_results]
502
  )
503
 
504
  # Real-time webcam processing
@@ -512,29 +476,53 @@ with gr.Blocks(css=custom_css, title="Face & Object Recognition Platform") as de
512
  object_conf,
513
  draw_boxes,
514
  show_labels,
515
- box_color
 
 
 
 
 
 
 
516
  ],
517
  outputs=[output_image],
518
  time_limit=30,
519
  stream_every=0.5
520
  )
521
 
 
 
 
 
 
 
 
522
  gr.Markdown("""
523
  ---
524
  ### 📚 Usage Instructions
525
  1. **Upload Image**: Select an image from your device for analysis
526
- 2. **Webcam**: Use your webcam for real-time detection
527
  3. **Adjust Settings**: Customize confidence thresholds and display options
528
- 4. **View Results**: See detections overlayed on the image with detailed JSON data
 
 
 
 
 
 
 
 
529
 
530
  ### 🎯 Features
531
  - **Face Detection**: Identifies faces in images using Haar Cascade classifiers (or simulation mode)
532
  - **Object Detection**: Recognizes object classes using MobileNet-SSD (or simulation mode)
533
- - **Real-time Processing**: Webcam support with live detection
534
  - **Customizable**: Adjustable confidence thresholds and visual settings
535
  - **Detailed Output**: JSON formatted results with coordinates and confidence scores
 
536
  ### ⚙️ Installation Notes
537
- Install OpenCV for full functionality: `pip install opencv-python`
 
538
  """)
539
 
540
  if __name__ == "__main__":
 
4
  import json
5
  from typing import Tuple, List, Dict, Any
6
  import time
7
+ import threading
8
+ import queue
9
 
10
+ from models import load_detection_models, CV2_AVAILABLE # CV2_AVAILABLE needs to come from models.py
11
+ from utils import draw_detections, process_image, generate_tone, play_sound, AlarmSystem, AUDIO_AVAILABLE
 
 
 
 
 
12
 
13
+ # Global alarm system
14
+ alarm_system = AlarmSystem()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Load models at startup
17
+ face_cascade, object_net, object_classes = load_detection_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def check_and_trigger_alarm(face_results, object_results, alarm_settings):
20
+ """Check detection results and trigger alarm if conditions are met."""
21
+ if not alarm_settings.get("alarm_enabled", False):
22
+ return False, "Alarm disabled"
23
 
24
+ alarm_triggered = False
25
+ alarm_reason = ""
26
+
27
+ # Check face detection alarm
28
+ if alarm_settings.get("face_alarm", False) and face_results:
29
+ alarm_triggered = True
30
+ alarm_reason = f"Face detected ({len(face_results)} faces)"
31
 
32
+ # Check object detection alarm
33
+ elif alarm_settings.get("object_alarm", False) and object_results:
34
+ # Check for specific object types if specified
35
+ target_objects = alarm_settings.get("target_objects", [])
36
+ if target_objects:
37
+ detected_objects = [obj["label"] for obj in object_results if obj["label"] in target_objects]
38
+ if detected_objects:
39
+ alarm_triggered = True
40
+ alarm_reason = f"Target object detected: {', '.join(set(detected_objects))}"
41
  else:
42
+ alarm_triggered = True
43
+ alarm_reason = f"Object detected ({len(object_results)} objects)"
44
 
45
+ # Trigger alarm if conditions are met
46
+ if alarm_triggered:
47
+ sound_type = alarm_settings.get("alarm_sound", "Beep")
48
+ if sound_type == "Custom":
49
+ sound_to_play = alarm_settings.get("custom_alarm_sound")
50
+ else:
51
+ sound_to_play = sound_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ if alarm_system.trigger_alarm(sound_to_play):
54
+ return True, f"🚨 ALARM TRIGGERED: {alarm_reason}"
55
+ else:
56
+ return False, "Alarm cooldown active"
57
+
58
+ return False, "No alarm conditions met"
 
59
 
60
  def recognize_face_and_objects(
61
  image: np.ndarray,
 
65
  object_confidence: float,
66
  draw_boxes: bool,
67
  show_labels: bool,
68
+ box_color: str,
69
+ alarm_enabled_val: bool, # New parameter for alarm_enabled
70
+ face_alarm_val: bool, # New parameter for face_alarm
71
+ object_alarm_val: bool, # New parameter for object_alarm
72
+ alarm_sound_val: str, # New parameter for alarm_sound
73
+ target_objects_val: List[str], # New parameter for target_objects
74
+ custom_alarm_sound_val: str
75
+ ) -> Tuple[np.ndarray, str, str, str]:
76
  """
77
+ Perform face and object detection on the input image with alarm support.
78
  """
79
  if image is None:
80
+ return None, "[]", "[]", "No image provided" # Changed this line to return empty JSON arrays for face and object results
81
+
82
  # Convert PIL to numpy if needed
83
  if isinstance(image, Image.Image):
84
  image = np.array(image)
85
 
86
+ # Construct alarm_settings dictionary from the passed values
87
+ alarm_settings = {
88
+ "alarm_enabled": alarm_enabled_val,
89
+ "face_alarm": face_alarm_val,
90
+ "object_alarm": object_alarm_val,
91
+ "alarm_sound": alarm_sound_val,
92
+ "target_objects": target_objects_val,
93
+ "custom_alarm_sound": custom_alarm_sound_val
94
+ }
95
+
96
  # Process image
97
  processed_image, face_results, object_results = process_image(
98
  image,
 
105
  object_confidence
106
  )
107
 
108
+ # Check alarm conditions
109
+ alarm_status, alarm_message = check_and_trigger_alarm(face_results, object_results, alarm_settings)
110
+
111
  # Draw detections if requested
112
  if draw_boxes:
113
  processed_image = draw_detections(
 
119
  )
120
 
121
  # Convert results to JSON
122
+ face_json = json.dumps(face_results, indent=2) if face_results else "[]"
123
+ object_json = json.dumps(object_results, indent=2) if object_results else "[]"
124
 
125
+ return processed_image, face_json, object_json, alarm_message
126
 
127
  def webcam_recognition(
128
  image: np.ndarray,
 
132
  object_confidence: float,
133
  draw_boxes: bool,
134
  show_labels: bool,
135
+ box_color: str,
136
+ alarm_enabled_val: bool, # New parameter for alarm_enabled
137
+ face_alarm_val: bool, # New parameter for face_alarm
138
+ object_alarm_val: bool, # New parameter for object_alarm
139
+ alarm_sound_val: str, # New parameter for alarm_sound
140
+ target_objects_val: List[str], # New parameter for target_objects
141
+ custom_alarm_sound_val: str
142
  ) -> np.ndarray:
143
+ """Real-time webcam recognition with alarm."""
144
  if image is None:
145
  return None
146
 
147
+ # Construct alarm_settings dictionary from the passed values
148
+ alarm_settings = {
149
+ "alarm_enabled": alarm_enabled_val,
150
+ "face_alarm": face_alarm_val,
151
+ "object_alarm": object_alarm_val,
152
+ "alarm_sound": alarm_sound_val,
153
+ "target_objects": target_objects_val
154
+ }
155
+
156
+ processed_image, _, _, _ = recognize_face_and_objects(
157
  image,
158
  enable_face_detection,
159
  enable_object_detection,
 
161
  object_confidence,
162
  draw_boxes,
163
  show_labels,
164
+ box_color,
165
+ alarm_enabled_val, # Pass these directly
166
+ face_alarm_val,
167
+ object_alarm_val,
168
+ alarm_sound_val,
169
+ target_objects_val,
170
+ custom_alarm_sound_val
171
  )
172
 
173
  return processed_image
 
210
  }
211
  return json.dumps(stats, indent=2)
212
 
213
+ def test_alarm_sound(sound_type, custom_sound_file):
214
+ """Test alarm sound."""
215
+ if not AUDIO_AVAILABLE:
216
+ return "⚠️ Audio not available. Install pyaudio for sound support."
217
+
218
+ try:
219
+ if sound_type == "Custom":
220
+ sound_to_play = custom_sound_file
221
+ if sound_to_play is None:
222
+ return "Custom sound selected, but no file uploaded."
223
+ else:
224
+ sound_to_play = sound_type
225
+
226
+ play_sound(sound_to_play)
227
+ # Give a more descriptive message for custom sounds
228
+ if sound_type == "Custom":
229
+ return f"✅ Played custom sound"
230
+ else:
231
+ return f"✅ Played {sound_type} sound"
232
+ except Exception as e:
233
+ return f"❌ Error playing sound: {str(e)}"
234
+
235
  # Create custom CSS for better styling
236
  custom_css = """
237
  .main-container {
 
260
  padding: 15px;
261
  margin-bottom: 20px;
262
  }
263
+ .alarm-box {
264
+ background-color: #f8d7da;
265
+ border: 2px solid #f5c6cb;
266
+ border-radius: 8px;
267
+ padding: 15px;
268
+ margin-bottom: 20px;
269
+ animation: pulse 1s infinite;
270
+ }
271
+ @keyframes pulse {
272
+ 0% { opacity: 1; }
273
+ 50% { opacity: 0.7; }
274
+ 100% { opacity: 1; }
275
+ }
276
  """
277
 
278
+ with gr.Blocks(title="Face & Object Recognition Platform") as demo:
279
+ gr.HTML(f"<style>{custom_css}</style>")
280
  gr.Markdown("""
281
+ # 🔍 Face & Object Recognition Platform with Alarm System
282
  Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
283
 
284
+ Advanced computer vision platform for real-time face and object detection with customizable settings and alarm notifications.
285
  """)
286
 
287
+ # Show warnings if dependencies are not available
288
  if not CV2_AVAILABLE:
289
  with gr.Row():
290
  gr.Markdown("""
 
294
  </div>
295
  """)
296
 
297
+ if not AUDIO_AVAILABLE:
298
+ with gr.Row():
299
+ gr.Markdown("""
300
+ <div class="warning-box">
301
+ ⚠️ **Audio Not Available**: Install audio libraries for alarm sounds: `pip install pyaudio`
302
+ </div>
303
+ """)
304
+
305
+ # Alarm state
306
+ alarm_status = gr.Textbox(label="Alarm Status", visible=False, interactive=False)
307
+
308
  with gr.Row():
309
  with gr.Column(scale=2):
310
  gr.Markdown("### 📤 Input Source")
 
325
  streaming=True,
326
  height=400
327
  )
328
+ gr.Markdown("*Webcam provides real-time detection with alarm system*")
329
 
330
  with gr.Column(scale=1):
331
  gr.Markdown("### ⚙️ Detection Settings")
 
371
  height=400,
372
  elem_classes=["image-container"]
373
  )
374
+
375
+ # Alarm status display
376
+ alarm_display = gr.Textbox(
377
+ label="🚨 Alarm Status",
378
+ value="Ready",
379
+ interactive=False,
380
+ elem_classes=["alarm-box" if False else ""]
381
+ )
382
 
383
  with gr.Column():
384
  with gr.Tabs():
 
394
  elem_classes=["result-panel"]
395
  )
396
 
397
+ with gr.TabItem("🚨 Alarm Settings"):
398
+ gr.Markdown("#### Configure Alarm System")
399
+
400
+ alarm_enabled = gr.Checkbox(label="🔔 Enable Alarm System", value=False)
401
+ face_alarm = gr.Checkbox(label="👤 Alarm on Face Detection", value=True)
402
+ object_alarm = gr.Checkbox(label="📦 Alarm on Object Detection", value=True)
403
+
404
+ alarm_sound = gr.Dropdown(
405
+ label="🔊 Alarm Sound",
406
+ choices=["Beep", "Siren", "Chime", "Alert", "Buzzer", "Ring", "Custom"],
407
+ value="Beep",
408
+ info="Select alarm sound type"
409
+ )
410
+
411
+ custom_alarm_sound = gr.File(
412
+ label="Upload Custom Alarm Sound (.wav)",
413
+ file_types=[".wav"],
414
+ visible=False
415
+ )
416
+
417
+ def toggle_custom_sound(sound_choice):
418
+ return gr.update(visible=sound_choice == "Custom")
419
+
420
+ alarm_sound.change(
421
+ fn=toggle_custom_sound,
422
+ inputs=alarm_sound,
423
+ outputs=custom_alarm_sound
424
+ )
425
+
426
+ target_objects = gr.CheckboxGroup(
427
+ label="🎯 Specific Objects to Trigger Alarm (optional)",
428
+ choices=["person", "car", "dog", "cat", "bottle", "chair", "laptop", "phone"],
429
+ info="Leave empty to alarm on any object"
430
+ )
431
+
432
+ test_sound_btn = gr.Button("🔊 Test Sound", variant="secondary")
433
+ sound_test_result = gr.Textbox(label="Sound Test Result", interactive=False)
434
+
435
  with gr.TabItem("ℹ️ Model Info"):
436
  model_info = gr.JSON(
437
  label="Detection Models Information",
 
440
  )
441
 
442
  # Event handlers
443
+ # NOTE: The gr.State values are captured at the time the UI is created.
444
+ # To get the current values, we need to pass the Gradio components themselves
445
+ # and then read their values in the `recognize_face_and_objects` function.
446
  analyze_btn.click(
447
  fn=recognize_face_and_objects,
448
  inputs=[
 
453
  object_conf,
454
  draw_boxes,
455
  show_labels,
456
+ box_color,
457
+ # Pass the Gradio components, not their values
458
+ alarm_enabled,
459
+ face_alarm,
460
+ object_alarm,
461
+ alarm_sound,
462
+ target_objects,
463
+ custom_alarm_sound
464
  ],
465
+ outputs=[output_image, face_results, object_results, alarm_display]
466
  )
467
 
468
  # Real-time webcam processing
 
476
  object_conf,
477
  draw_boxes,
478
  show_labels,
479
+ box_color,
480
+ # Pass the Gradio components, not their values
481
+ alarm_enabled,
482
+ face_alarm,
483
+ object_alarm,
484
+ alarm_sound,
485
+ target_objects,
486
+ custom_alarm_sound
487
  ],
488
  outputs=[output_image],
489
  time_limit=30,
490
  stream_every=0.5
491
  )
492
 
493
+ # Test sound button
494
+ test_sound_btn.click(
495
+ fn=test_alarm_sound,
496
+ inputs=[alarm_sound, custom_alarm_sound],
497
+ outputs=[sound_test_result]
498
+ )
499
+
500
  gr.Markdown("""
501
  ---
502
  ### 📚 Usage Instructions
503
  1. **Upload Image**: Select an image from your device for analysis
504
+ 2. **Webcam**: Use your webcam for real-time detection with alarms
505
  3. **Adjust Settings**: Customize confidence thresholds and display options
506
+ 4. **Configure Alarm**: Set up alarm conditions and sounds in the Alarm Settings tab
507
+ 5. **View Results**: See detections overlayed on the image with detailed JSON data
508
+
509
+ ### 🚨 Alarm Features
510
+ - **Face Detection Alarm**: Triggers when faces are detected
511
+ - **Object Detection Alarm**: Triggers when objects are detected (all or specific types)
512
+ - **Multiple Sounds**: Choose from 6 different alarm sounds
513
+ - **Cooldown Period**: Prevents alarm spam (2-second cooldown)
514
+ - **Real-time Monitoring**: Works with webcam for continuous monitoring
515
 
516
  ### 🎯 Features
517
  - **Face Detection**: Identifies faces in images using Haar Cascade classifiers (or simulation mode)
518
  - **Object Detection**: Recognizes object classes using MobileNet-SSD (or simulation mode)
519
+ - **Real-time Processing**: Webcam support with live detection and alarms
520
  - **Customizable**: Adjustable confidence thresholds and visual settings
521
  - **Detailed Output**: JSON formatted results with coordinates and confidence scores
522
+
523
  ### ⚙️ Installation Notes
524
+ - Install OpenCV for full functionality: `pip install opencv-python`
525
+ - Install audio support for alarms: `pip install pyaudio`
526
  """)
527
 
528
  if __name__ == "__main__":
models.py CHANGED
@@ -14,7 +14,7 @@ def load_detection_models():
14
  if CV2_AVAILABLE:
15
  try:
16
  # Load face cascade
17
- face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
18
 
19
  # Load object detection model (MobileNet SSD)
20
  model_path = "MobileNetSSD_deploy.prototxt"
@@ -29,7 +29,8 @@ def load_detection_models():
29
  "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
30
  "train", "tvmonitor"
31
  ]
32
- except:
 
33
  object_net = None
34
  object_classes = [
35
  "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
@@ -124,7 +125,6 @@ def detect_faces_pil(image, confidence):
124
  # Simulate face detection with random bounding boxes
125
  faces = []
126
 
127
- # For demonstration, detect faces based on basic heuristics
128
  for i in range(0, min(3, np.random.randint(0, 3) + 1)): # Random 0-3 faces
129
  x = np.random.randint(0, max(1, width - 100))
130
  y = np.random.randint(0, max(1, height - 100))
 
14
  if CV2_AVAILABLE:
15
  try:
16
  # Load face cascade
17
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
18
 
19
  # Load object detection model (MobileNet SSD)
20
  model_path = "MobileNetSSD_deploy.prototxt"
 
29
  "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
30
  "train", "tvmonitor"
31
  ]
32
+ except Exception as e:
33
+ print(f"Error loading object detection model: {e}")
34
  object_net = None
35
  object_classes = [
36
  "background", "aeroplane", "bicycle", "bird", "boat", "bottle",
 
125
  # Simulate face detection with random bounding boxes
126
  faces = []
127
 
 
128
  for i in range(0, min(3, np.random.randint(0, 3) + 1)): # Random 0-3 faces
129
  x = np.random.randint(0, max(1, width - 100))
130
  y = np.random.randint(0, max(1, height - 100))
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- opencv-python
2
- Pillow
3
- gradio
4
- numpy
5
- requests
6
- matplotlib
7
- scipy
 
1
+ opencv-python
2
+ Pillow
3
+ gradio
4
+ numpy
5
+ requests
6
+ matplotlib
7
+ scipy
utils.py CHANGED
@@ -1,6 +1,122 @@
1
  import numpy as np
2
  from PIL import Image, ImageDraw
 
 
3
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def draw_detections(image, face_results, object_results, show_labels, box_color):
6
  """Draw detection boxes on image using PIL."""
@@ -53,9 +169,4 @@ def process_image(image, face_cascade, object_net, object_classes, enable_face,
53
  if enable_objects:
54
  object_results = detect_objects(image, object_net, object_classes, object_conf)
55
 
56
- return image.copy(), face_results, object_results
57
-
58
- def load_detection_models():
59
- """Load detection models."""
60
- from models import load_detection_models as load_models
61
- return load_models()
 
1
  import numpy as np
2
  from PIL import Image, ImageDraw
3
+ import wave
4
+ import os
5
  import json
6
+ import time
7
+ import threading
8
+ import queue
9
+
10
+ # Try to import cv2, but make it optional
11
+ try:
12
+ import cv2
13
+ CV2_AVAILABLE = True
14
+ except ImportError:
15
+ CV2_AVAILABLE = False
16
+
17
+ # Try to import sound libraries
18
+ try:
19
+ import pyaudio
20
+ import numpy as np
21
+ AUDIO_AVAILABLE = True
22
+ except ImportError:
23
+ AUDIO_AVAILABLE = False
24
+
25
+ def generate_tone(frequency, duration, sample_rate=44100, volume=0.5):
26
+ """Generate a simple tone."""
27
+ if not AUDIO_AVAILABLE:
28
+ return None
29
+
30
+ frames = int(duration * sample_rate)
31
+ arr = np.zeros(frames)
32
+ for i in range(frames):
33
+ arr[i] = volume * np.sin(2 * np.pi * frequency * i / sample_rate)
34
+ return arr.astype(np.float32)
35
+
36
+ def play_sound(sound_type):
37
+ """Play different alarm sounds or a custom audio file."""
38
+ if not AUDIO_AVAILABLE:
39
+ print(f"Alarm: {sound_type} (audio not available)")
40
+ return
41
+
42
+ p = pyaudio.PyAudio()
43
+
44
+ try:
45
+ # Check if sound_type is a path to a custom .wav file
46
+ if isinstance(sound_type, str) and sound_type.endswith('.wav') and os.path.exists(sound_type):
47
+ with wave.open(sound_type, 'rb') as wf:
48
+ stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
49
+ channels=wf.getnchannels(),
50
+ rate=wf.getframerate(),
51
+ output=True)
52
+
53
+ data = wf.readframes(1024)
54
+ while data:
55
+ stream.write(data)
56
+ data = wf.readframes(1024)
57
+
58
+ stream.stop_stream()
59
+ stream.close()
60
+ else:
61
+ # Existing tone generation logic
62
+ sound_patterns = {
63
+ "Beep": [(440, 0.2), (440, 0.2)],
64
+ "Siren": [(600, 0.1), (800, 0.1), (600, 0.1), (800, 0.1)],
65
+ "Chime": [(523, 0.3), (659, 0.3), (784, 0.5)],
66
+ "Alert": [(1000, 0.1), (1500, 0.1), (2000, 0.1)],
67
+ "Buzzer": [(200, 0.5)],
68
+ "Ring": [(800, 0.2), (600, 0.2), (800, 0.2), (600, 0.2)]
69
+ }
70
+
71
+ stream = p.open(format=pyaudio.paFloat32,
72
+ channels=1,
73
+ rate=44100,
74
+ output=True)
75
+
76
+ if sound_type in sound_patterns:
77
+ for freq, duration in sound_patterns[sound_type]:
78
+ tone = generate_tone(freq, duration)
79
+ if tone is not None:
80
+ stream.write(tone.tobytes())
81
+
82
+ stream.stop_stream()
83
+ stream.close()
84
+
85
+ except Exception as e:
86
+ print(f"Error playing sound: {e}")
87
+ finally:
88
+ p.terminate()
89
+
90
+ class AlarmSystem:
91
+ """Manages alarm functionality."""
92
+ def __init__(self):
93
+ self.alarm_queue = queue.Queue()
94
+ self.alarm_thread = threading.Thread(target=self._alarm_worker, daemon=True)
95
+ self.alarm_thread.start()
96
+ self.last_alarm_time = 0
97
+ self.alarm_cooldown = 2 # seconds between alarms
98
+
99
+ def _alarm_worker(self):
100
+ """Worker thread for playing alarms."""
101
+ while True:
102
+ try:
103
+ sound_type = self.alarm_queue.get(timeout=1)
104
+ if sound_type:
105
+ play_sound(sound_type)
106
+ self.alarm_queue.task_done()
107
+ except queue.Empty:
108
+ continue
109
+ except Exception as e:
110
+ print(f"Alarm worker error: {e}")
111
+
112
+ def trigger_alarm(self, sound_type):
113
+ """Trigger an alarm with cooldown."""
114
+ current_time = time.time()
115
+ if current_time - self.last_alarm_time > self.alarm_cooldown:
116
+ self.alarm_queue.put(sound_type)
117
+ self.last_alarm_time = current_time
118
+ return True
119
+ return False
120
 
121
  def draw_detections(image, face_results, object_results, show_labels, box_color):
122
  """Draw detection boxes on image using PIL."""
 
169
  if enable_objects:
170
  object_results = detect_objects(image, object_net, object_classes, object_conf)
171
 
172
+ return image.copy(), face_results, object_results