Anuj-Panthri commited on
Commit
8f1e9c2
·
1 Parent(s): a8514bd

yolo_v2_gui using gradio blocks

Browse files
Files changed (7) hide show
  1. app.py +91 -0
  2. config.py +17 -0
  3. decode_yolo_v2.py +109 -0
  4. load_model.py +137 -0
  5. requirements.txt +0 -0
  6. test.py +38 -0
  7. yolo_v2(iou_70.5945).h5 +3 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from matplotlib.patches import Rectangle
5
+ from load_model import load_model
6
+ from decode_yolo_v2 import *
7
+ from config import *
8
+ import gradio as gr
9
+ import json
10
+ from glob import glob
11
+
12
+
13
+
14
+ model=load_model("yolo_v2(iou_70.5945).h5")
15
+
16
+
17
+ def get_preds(test_img,p=0.2,iou_threshold=0.3):
18
+ img=cv2.resize(cv2.cvtColor(cv2.imread(test_img),cv2.COLOR_BGR2RGB),[image_size,image_size])
19
+ img=np.expand_dims(img,axis=0)
20
+ y_pred=model.predict(img)
21
+
22
+
23
+ objs_found=get_objects(y_pred[0],p=p)
24
+ objs_found=nms(objs_found,iou_threshold=iou_threshold)
25
+ print("objs_found:",objs_found)
26
+ plt.axis('off')
27
+ # show_objects(img[0],objs_found)
28
+ img=pred_image(img[0],objs_found)
29
+ plt.imshow(img)
30
+ plt.show()
31
+
32
+ # get_preds(input("Enter image path:"))
33
+ # get_preds("C:/Users/Home/Downloads/image_2.jpeg")
34
+
35
+ def get_output(img,p,iou_threshold):
36
+ h,w=img.shape[:2]
37
+ img=cv2.resize(img,[image_size,image_size])
38
+ img=np.expand_dims(img,axis=0)
39
+ y_pred=model.predict(img,verbose=0)
40
+
41
+
42
+ objs_found=get_objects(y_pred[0],p=p)
43
+ objs_found=nms(objs_found,iou_threshold=iou_threshold)
44
+ # print("objs_found:",objs_found)
45
+ img=pred_image(img[0],objs_found)
46
+ img=cv2.resize(img,[w,h])
47
+ return img,json.dumps({'objects_found':objs_found})
48
+
49
+
50
+ def toggle_webcam(x):
51
+ if x=='upload':
52
+ input_image.source='upload'
53
+ return {'value':None,'streaming':False,'source':'upload','__type__': 'update'}
54
+ else:
55
+ input_image.source='webcam'
56
+ return {'value':None,'streaming':True,'source':'webcam','__type__': 'update'}
57
+
58
+ def clear_all(x):
59
+ out_img.unrender()
60
+ return {'value':None,'__type__': 'update'}
61
+
62
+ with gr.Blocks(title="Yolo V2 Object detection") as app:
63
+
64
+ with gr.Column():
65
+ # gr.Markdown("# Yolo V2 Object detection")
66
+ gr.Markdown("<h1 style=\"text-align: center;\">Yolo V2 Object detection</h1>")
67
+ gr.Markdown(f"we can detection 20 type of objects which are: {', '.join(class_names)}")
68
+ with gr.Row():
69
+ with gr.Box():
70
+ input_image=gr.Image(value=None,interactive=True)
71
+ with gr.Row():
72
+ radio_btn=gr.Radio(['upload','webcam'],value='upload',interactive=True)
73
+
74
+ conf_slider=gr.Slider(0,1,value=0.2,label='min_confidence',interactive=True)
75
+ nms_slider=gr.Slider(0,1,value=0.3,label='nms_iou_threshold',interactive=True)
76
+
77
+ with gr.Row():
78
+ reset_btn=gr.Button('reset',visible=False)
79
+ submit=gr.Button('submit')
80
+ with gr.Box():
81
+ with gr.Column():
82
+ out_img=gr.Image(label='objects_found',interactive=False)
83
+ out_labels=gr.JSON(label='objects_found',interactive=False)
84
+
85
+
86
+
87
+ # reset_btn.click(clear_all,reset_btn,out_img)
88
+ radio_btn.change(toggle_webcam,radio_btn,input_image)
89
+ submit.click(get_output,[input_image,conf_slider,nms_slider],[out_img,out_labels])
90
+
91
+ app.launch()
config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ image_size=416
3
+ num_anchors=4
4
+ output_size=image_size/32
5
+ class_names=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
6
+ class_colors={class_name:np.random.rand(3) for class_name in class_names}
7
+
8
+
9
+ anchor_boxes = np.array( [[1.07709888, 1.78171903], # anchor box 1, width , height
10
+ [2.71054693, 5.12469308], # anchor box 2, width, height
11
+ [10.47181473, 10.09646365], # anchor box 3, width, height
12
+ [5.48531347, 8.11011331]] ,dtype='float32')
13
+
14
+
15
+ cell_size=image_size/output_size
16
+ class_to_idx={class_name:i for i,class_name in enumerate(class_names)}
17
+ idx_to_class={i:class_name for i,class_name in enumerate(class_names)}
decode_yolo_v2.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from matplotlib.patches import Rectangle
4
+ import copy
5
+ import tensorflow.keras.backend as K
6
+ from config import *
7
+ import cv2
8
+
9
+
10
+ tf_anchors=K.reshape(K.variable(anchor_boxes),[1, 1, 1, num_anchors, 2])
11
+
12
+ def get_objects(y_pred,p=0.5,decode_preds=True,idx=None):
13
+ y_pred=copy.deepcopy(y_pred)
14
+ if decode_preds:
15
+ y_pred[...,0]=K.sigmoid(y_pred[...,0])
16
+ y_pred[...,3:5]=np.clip((K.exp(y_pred[...,3:5])*tf_anchors).numpy(),0,output_size)
17
+ # y_pred[...,3:5]=np.clip(y_pred[...,3:5],0,output_size)
18
+ objs_found=[]
19
+ idxs=np.where(y_pred[...,0]>=p)
20
+ if np.size(idxs):
21
+ for i,obj in enumerate(y_pred[idxs[0],idxs[1],idxs[2],:]):
22
+ # obj (p,x,y,w,h,c_1,c_2,c_3,c_4,c_5.......c_n)
23
+ if decode_preds:
24
+ obj[1:3]=K.sigmoid(obj[1:3]) # x,y
25
+
26
+ prob=obj[0]
27
+ obj=obj[1:]
28
+ # obj[4]=np.argmax(K.softmax(obj[4:]))
29
+ obj[4]=np.argmax(obj[4:])
30
+ obj=obj[:5]
31
+ obj[:-1]*=cell_size # scaling back w and h
32
+
33
+ obj[0]=(idxs[1][i]*cell_size)+obj[0] # center x
34
+ obj[1]=(idxs[0][i]*cell_size)+obj[1] # center y
35
+
36
+ obj[0]=np.clip(obj[0]-(obj[2]/2),0,image_size) # xmin
37
+ obj[1]=np.clip(obj[1]-(obj[3]/2),0,image_size) # ymin
38
+
39
+
40
+ obj_name=idx_to_class[obj[4]]
41
+
42
+ # plt.gca().add_patch(Rectangle((obj[0],obj[1]),(obj[2]),(obj[3]),linewidth=4,edgecolor=class_colors[obj_name],facecolor='none'))
43
+ # plt.text(obj[0],obj[1],obj_name)
44
+
45
+ obj_details={'p':float(prob),'xywh':obj[:-1].astype(int).tolist(),'class_idx':int(obj[4]),'class':obj_name}
46
+
47
+ if idx is not None:obj_details['idx']=idx
48
+ objs_found.append(obj_details)
49
+ objs_found=sorted(objs_found,key=lambda x:x['p'],reverse=True)
50
+ return objs_found
51
+
52
+ def list_get_iou(bboxes1, bboxes2):
53
+
54
+ bboxes1 = [bboxes1[0],bboxes1[1],bboxes1[0]+bboxes1[2],bboxes1[1]+bboxes1[3]]
55
+ bboxes2 = [bboxes2[0],bboxes2[1],bboxes2[0]+bboxes2[2],bboxes2[1]+bboxes2[3]]
56
+
57
+ xA = max(bboxes1[0], bboxes2[0])
58
+ yA = max(bboxes1[1], bboxes2[1])
59
+ xB = min(bboxes1[2], bboxes2[2])
60
+ yB = min(bboxes1[3], bboxes2[3])
61
+
62
+ intersection_area = max(0, xB - xA ) * max(0, yB - yA )
63
+
64
+ box1_area = (bboxes1[2] - bboxes1[0] ) * (bboxes1[3] - bboxes1[1] )
65
+ box2_area = (bboxes2[2] - bboxes2[0] ) * (bboxes2[3] - bboxes2[1] )
66
+
67
+ iou = intersection_area / float(box1_area + box2_area - intersection_area+1e-6)
68
+
69
+ return iou
70
+
71
+ def nms(objs_found,iou_threshold=0.2):
72
+ objs_found=np.array(copy.deepcopy(objs_found))
73
+ best_boxes=[]
74
+ while len(objs_found)>0:
75
+ obj=objs_found[0]
76
+ objs_found=objs_found[1:]
77
+
78
+ delete_idx=[]
79
+ for b_idx,b in enumerate(objs_found):
80
+
81
+ if obj['class_idx']==b['class_idx']:
82
+ iou=list_get_iou(obj['xywh'],b['xywh'])
83
+ if iou>= iou_threshold:
84
+ delete_idx.append(b_idx)
85
+ objs_found=np.delete(objs_found,delete_idx)
86
+ best_boxes.append(obj)
87
+ return best_boxes
88
+
89
+ def show_objects(img,objs_found,return_img=False):
90
+ plt.imshow(img)
91
+ for i in range(len(objs_found)):
92
+ p=objs_found[i]['p']
93
+ obj=objs_found[i]['xywh']
94
+ obj_name=objs_found[i]['class']
95
+ plt.gca().add_patch(Rectangle((obj[0],obj[1]),(obj[2]),(obj[3]),linewidth=4,edgecolor=class_colors[obj_name],facecolor='none'))
96
+ plt.text(obj[0],obj[1],obj_name)
97
+
98
+
99
+
100
+ def pred_image(img,objs_found):
101
+ for i in range(len(objs_found)):
102
+ p=objs_found[i]['p']
103
+ obj=objs_found[i]['xywh']
104
+ obj_name=objs_found[i]['class']
105
+
106
+ img=cv2.rectangle(img,(int(obj[0]),int(obj[1])),(int(obj[0]+obj[2]),int(obj[1]+obj[3])),(class_colors[obj_name]*255),4)
107
+ img=cv2.putText(img,obj_name,(int(obj[0]),int(obj[1])),cv2.FONT_HERSHEY_SIMPLEX,1, (0,0,0), 2, lineType=cv2.LINE_AA)
108
+ # draw_text(img, "world", font_scale=4, pos=(10, 20 + h), text_color_bg=(255, 0, 0))
109
+ return img
load_model.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tensorflow as tf
3
+ from config import *
4
+ from tensorflow.keras import Model,layers
5
+
6
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
7
+ # custom layer for reshaping last layer
8
+
9
+ class yolo_dynamic_reshape(tf.keras.layers.Layer):
10
+ def __init__(self, **kwargs):
11
+ super(yolo_dynamic_reshape,self).__init__()
12
+ self.scaler=32
13
+ self.last_item=(5+len(class_names))
14
+ def call(self,input_layer,output_layer):
15
+ # print(input_layer.shape)
16
+ if input_layer.shape[1] is not None:
17
+ return layers.Reshape(((input_layer.shape[1]//self.scaler),(input_layer.shape[2]//self.scaler),num_anchors,self.last_item))(output_layer)
18
+ return layers.Reshape((0,0,num_anchors,self.last_item))(output_layer)
19
+
20
+ def create_model():
21
+
22
+ def space_to_depth_x2(x):
23
+ return tf.nn.space_to_depth(x,block_size=2)
24
+
25
+ x_input=layers.Input(shape=(416,416,3))
26
+ x=layers.Lambda(lambda x:x/255.)(x_input)
27
+ x=layers.Conv2D(32,(3,3),strides=(1,1),padding='same',name='conv_1',use_bias=False)(x)
28
+ x=layers.BatchNormalization(name='norm_1')(x)
29
+ x=layers.LeakyReLU(alpha=0.1)(x)
30
+ x=layers.MaxPooling2D(pool_size=(2,2))(x)
31
+
32
+ x=layers.Conv2D(64,(3,3),strides=(1,1),padding='same',name='conv_2',use_bias=False)(x)
33
+ x=layers.BatchNormalization(name='norm_2')(x)
34
+ x=layers.LeakyReLU(alpha=0.1)(x)
35
+ x=layers.MaxPooling2D(pool_size=(2,2))(x)
36
+
37
+ x=layers.Conv2D(128,(3,3),strides=(1,1),padding='same',name='conv_3',use_bias=False)(x)
38
+ x=layers.BatchNormalization(name='norm_3')(x)
39
+ x=layers.LeakyReLU(alpha=0.1)(x)
40
+
41
+ x=layers.Conv2D(64,(1,1),strides=(1,1),padding='same',name='conv_4',use_bias=False)(x)
42
+ x=layers.BatchNormalization(name='norm_4')(x)
43
+ x=layers.LeakyReLU(alpha=0.1)(x)
44
+
45
+ x=layers.Conv2D(128,(3,3),strides=(1,1),padding='same',name='conv_5',use_bias=False)(x)
46
+ x=layers.BatchNormalization(name='norm_5')(x)
47
+ x=layers.LeakyReLU(alpha=0.1)(x)
48
+ x=layers.MaxPooling2D(pool_size=(2,2))(x)
49
+
50
+ x=layers.Conv2D(256,(3,3),strides=(1,1),padding='same',name='conv_6',use_bias=False)(x)
51
+ x=layers.BatchNormalization(name='norm_6')(x)
52
+ x=layers.LeakyReLU(alpha=0.1)(x)
53
+
54
+ x=layers.Conv2D(128,(1,1),strides=(1,1),padding='same',name='conv_7',use_bias=False)(x)
55
+ x=layers.BatchNormalization(name='norm_7')(x)
56
+ x=layers.LeakyReLU(alpha=0.1)(x)
57
+
58
+ x=layers.Conv2D(256,(3,3),strides=(1,1),padding='same',name='conv_8',use_bias=False)(x)
59
+ x=layers.BatchNormalization(name='norm_8')(x)
60
+ x=layers.LeakyReLU(alpha=0.1)(x)
61
+ x=layers.MaxPooling2D(pool_size=(2,2))(x)
62
+
63
+ x=layers.Conv2D(512,(3,3),strides=(1,1),padding='same',name='conv_9',use_bias=False)(x)
64
+ x=layers.BatchNormalization(name='norm_9')(x)
65
+ x=layers.LeakyReLU(alpha=0.1)(x)
66
+
67
+ x=layers.Conv2D(256,(1,1),strides=(1,1),padding='same',name='conv_10',use_bias=False)(x)
68
+ x=layers.BatchNormalization(name='norm_10')(x)
69
+ x=layers.LeakyReLU(alpha=0.1)(x)
70
+
71
+ x=layers.Conv2D(512,(3,3),strides=(1,1),padding='same',name='conv_11',use_bias=False)(x)
72
+ x=layers.BatchNormalization(name='norm_11')(x)
73
+ x=layers.LeakyReLU(alpha=0.1)(x)
74
+
75
+ x=layers.Conv2D(256,(1,1),strides=(1,1),padding='same',name='conv_12',use_bias=False)(x)
76
+ x=layers.BatchNormalization(name='norm_12')(x)
77
+ x=layers.LeakyReLU(alpha=0.1)(x)
78
+
79
+ x=layers.Conv2D(512,(3,3),strides=(1,1),padding='same',name='conv_13',use_bias=False)(x)
80
+ x=layers.BatchNormalization(name='norm_13')(x)
81
+ x=layers.LeakyReLU(alpha=0.1)(x)
82
+
83
+ skip_connection = x
84
+
85
+ x=layers.MaxPooling2D(pool_size=(2,2))(x)
86
+
87
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_14',use_bias=False)(x)
88
+ x=layers.BatchNormalization(name='norm_14')(x)
89
+ x=layers.LeakyReLU(alpha=0.1)(x)
90
+
91
+ x=layers.Conv2D(512,(1,1),strides=(1,1),padding='same',name='conv_15',use_bias=False)(x)
92
+ x=layers.BatchNormalization(name='norm_15')(x)
93
+ x=layers.LeakyReLU(alpha=0.1)(x)
94
+
95
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_16',use_bias=False)(x)
96
+ x=layers.BatchNormalization(name='norm_16')(x)
97
+ x=layers.LeakyReLU(alpha=0.1)(x)
98
+
99
+ x=layers.Conv2D(512,(1,1),strides=(1,1),padding='same',name='conv_17',use_bias=False)(x)
100
+ x=layers.BatchNormalization(name='norm_17')(x)
101
+ x=layers.LeakyReLU(alpha=0.1)(x)
102
+
103
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_18',use_bias=False)(x)
104
+ x=layers.BatchNormalization(name='norm_18')(x)
105
+ x=layers.LeakyReLU(alpha=0.1)(x)
106
+
107
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_19',use_bias=False)(x)
108
+ x=layers.BatchNormalization(name='norm_19')(x)
109
+ x=layers.LeakyReLU(alpha=0.1)(x)
110
+
111
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_20',use_bias=False)(x)
112
+ x=layers.BatchNormalization(name='norm_20')(x)
113
+ x=layers.LeakyReLU(alpha=0.1)(x)
114
+
115
+ skip_connection=layers.Conv2D(64,(1,1),strides=(1,1),padding='same',name='conv_21',use_bias=False)(skip_connection)
116
+ skip_connection=layers.BatchNormalization(name='norm_21')(skip_connection)
117
+ skip_connection=layers.LeakyReLU(alpha=0.1)(skip_connection)
118
+ skip_connection=layers.Lambda(space_to_depth_x2)(skip_connection) # halfs the resolution and add more depth
119
+
120
+ x=layers.concatenate([skip_connection,x])
121
+ x=layers.Conv2D(1024,(3,3),strides=(1,1),padding='same',name='conv_22',use_bias=False)(x)
122
+ x=layers.BatchNormalization(name='norm_22')(x)
123
+ x=layers.LeakyReLU(alpha=0.1)(x)
124
+
125
+ x=layers.Conv2D((num_anchors*(5+len(class_names))),(1,1),strides=(1,1),padding='same',name='conv_23')(x)
126
+ out=yolo_dynamic_reshape()(x_input,x)
127
+
128
+ model=Model(x_input,out,name='yolo_v2_model')
129
+ # model.summary()
130
+ return model
131
+
132
+ def load_model(path):
133
+ # model=tf.keras.models.load_model(path,custom_objects={'yolo_dynamic_reshape':yolo_dynamic_reshape},compile=False)
134
+ model=create_model()
135
+ model.load_weights(path)
136
+ # model=tf.keras.models.load_model(path)
137
+ return model
requirements.txt ADDED
Binary file (2.3 kB). View file
 
test.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import h5py
2
+
3
+ def print_structure(weight_file_path):
4
+ """
5
+ Prints out the structure of HDF5 file.
6
+ Args:
7
+ weight_file_path (str) : Path to the file to analyze
8
+ """
9
+ f = h5py.File(weight_file_path)
10
+ try:
11
+ if len(f.attrs.items()):
12
+ print("{} contains: ".format(weight_file_path))
13
+ print("Root attributes:")
14
+ for key, value in f.attrs.items():
15
+ print(" {}: {}".format(key, value))
16
+
17
+ if len(f.items())==0:
18
+ return
19
+
20
+ for layer, g in f.items():
21
+ print(" {}".format(layer))
22
+ print(" Attributes:")
23
+ for key, value in g.attrs.items():
24
+ print(" {}: {}".format(key, value))
25
+
26
+ print(" Dataset:")
27
+ for p_name in g.keys():
28
+ param = g[p_name]
29
+ subkeys = param.keys()
30
+ for k_name in param.keys():
31
+ print(" {}/{}: {}".format(p_name, k_name, param.get(k_name)[:]))
32
+ finally:
33
+ f.close()
34
+
35
+ try:
36
+ print_structure("yolo_v2(iou_70.5945).h5")
37
+ except:
38
+ pass
yolo_v2(iou_70.5945).h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9495daa9752653b365f652eb7f904b4d5b58ef26b3896f14f5077f8767a6a8
3
+ size 608026904