Spaces:
Sleeping
Sleeping
| #------------------------------------------------------------------------------- | |
| # Name: pySaliencyMap | |
| # Purpose: Extracting a saliency map from a single still image | |
| # | |
| # Author: Akisato Kimura <akisato@ieee.org> | |
| # | |
| # Created: April 24, 2014 | |
| # Copyright: (c) Akisato Kimura 2014- | |
| # Licence: All rights reserved | |
| #------------------------------------------------------------------------------- | |
| import cv2 | |
| import numpy as np | |
| import SaRa.pySaliencyMapDefs as pySaliencyMapDefs | |
| import time | |
| class pySaliencyMap: | |
| # initialization | |
| def __init__(self, width, height): | |
| self.width = width | |
| self.height = height | |
| self.prev_frame = None | |
| self.SM = None | |
| self.GaborKernel0 = np.array(pySaliencyMapDefs.GaborKernel_0) | |
| self.GaborKernel45 = np.array(pySaliencyMapDefs.GaborKernel_45) | |
| self.GaborKernel90 = np.array(pySaliencyMapDefs.GaborKernel_90) | |
| self.GaborKernel135 = np.array(pySaliencyMapDefs.GaborKernel_135) | |
| # extracting color channels | |
| def SMExtractRGBI(self, inputImage): | |
| # convert scale of array elements | |
| src = np.float32(inputImage) * 1./255 | |
| # split | |
| (B, G, R) = cv2.split(src) | |
| # extract an intensity image | |
| I = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY) | |
| # return | |
| return R, G, B, I | |
| # feature maps | |
| ## constructing a Gaussian pyramid | |
| def FMCreateGaussianPyr(self, src): | |
| dst = list() | |
| dst.append(src) | |
| for i in range(1,9): | |
| nowdst = cv2.pyrDown(dst[i-1]) | |
| dst.append(nowdst) | |
| return dst | |
| ## taking center-surround differences | |
| def FMCenterSurroundDiff(self, GaussianMaps): | |
| dst = list() | |
| for s in range(2,5): | |
| now_size = GaussianMaps[s].shape | |
| now_size = (now_size[1], now_size[0]) ## (width, height) | |
| tmp = cv2.resize(GaussianMaps[s+3], now_size, interpolation=cv2.INTER_LINEAR) | |
| nowdst = cv2.absdiff(GaussianMaps[s], tmp) | |
| dst.append(nowdst) | |
| tmp = cv2.resize(GaussianMaps[s+4], now_size, interpolation=cv2.INTER_LINEAR) | |
| nowdst = cv2.absdiff(GaussianMaps[s], tmp) | |
| dst.append(nowdst) | |
| return dst | |
| ## constructing a Gaussian pyramid + taking center-surround differences | |
| def FMGaussianPyrCSD(self, src): | |
| GaussianMaps = self.FMCreateGaussianPyr(src) | |
| dst = self.FMCenterSurroundDiff(GaussianMaps) | |
| return dst | |
| ## intensity feature maps | |
| def IFMGetFM(self, I): | |
| return self.FMGaussianPyrCSD(I) | |
| ## color feature maps | |
| def CFMGetFM(self, R, G, B): | |
| # max(R,G,B) | |
| tmp1 = cv2.max(R, G) | |
| RGBMax = cv2.max(B, tmp1) | |
| RGBMax[RGBMax <= 0] = 0.0001 # prevent dividing by 0 | |
| # min(R,G) | |
| RGMin = cv2.min(R, G) | |
| # RG = (R-G)/max(R,G,B) | |
| RG = (R - G) / RGBMax | |
| # BY = (B-min(R,G)/max(R,G,B) | |
| BY = (B - RGMin) / RGBMax | |
| # clamp nagative values to 0 | |
| RG[RG < 0] = 0 | |
| BY[BY < 0] = 0 | |
| # obtain feature maps in the same way as intensity | |
| RGFM = self.FMGaussianPyrCSD(RG) | |
| BYFM = self.FMGaussianPyrCSD(BY) | |
| # return | |
| return RGFM, BYFM | |
| ## orientation feature maps | |
| def OFMGetFM(self, src): | |
| # creating a Gaussian pyramid | |
| GaussianI = self.FMCreateGaussianPyr(src) | |
| # convoluting a Gabor filter with an intensity image to extract oriemtation features | |
| GaborOutput0 = [ np.empty((1,1)), np.empty((1,1)) ] # dummy data: any kinds of np.array()s are OK | |
| GaborOutput45 = [ np.empty((1,1)), np.empty((1,1)) ] | |
| GaborOutput90 = [ np.empty((1,1)), np.empty((1,1)) ] | |
| GaborOutput135 = [ np.empty((1,1)), np.empty((1,1)) ] | |
| for j in range(2,9): | |
| GaborOutput0.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel0) ) | |
| GaborOutput45.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel45) ) | |
| GaborOutput90.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel90) ) | |
| GaborOutput135.append( cv2.filter2D(GaussianI[j], cv2.CV_32F, self.GaborKernel135) ) | |
| # calculating center-surround differences for every oriantation | |
| CSD0 = self.FMCenterSurroundDiff(GaborOutput0) | |
| CSD45 = self.FMCenterSurroundDiff(GaborOutput45) | |
| CSD90 = self.FMCenterSurroundDiff(GaborOutput90) | |
| CSD135 = self.FMCenterSurroundDiff(GaborOutput135) | |
| # concatenate | |
| dst = list(CSD0) | |
| dst.extend(CSD45) | |
| dst.extend(CSD90) | |
| dst.extend(CSD135) | |
| # return | |
| return dst | |
| ## motion feature maps | |
| def MFMGetFM(self, src): | |
| # convert scale | |
| I8U = np.uint8(255 * src) | |
| # cv2.waitKey(10) | |
| # calculating optical flows | |
| if self.prev_frame is not None: | |
| farne_pyr_scale= pySaliencyMapDefs.farne_pyr_scale | |
| farne_levels = pySaliencyMapDefs.farne_levels | |
| farne_winsize = pySaliencyMapDefs.farne_winsize | |
| farne_iterations = pySaliencyMapDefs.farne_iterations | |
| farne_poly_n = pySaliencyMapDefs.farne_poly_n | |
| farne_poly_sigma = pySaliencyMapDefs.farne_poly_sigma | |
| farne_flags = pySaliencyMapDefs.farne_flags | |
| flow = cv2.calcOpticalFlowFarneback(\ | |
| prev = self.prev_frame, \ | |
| next = I8U, \ | |
| pyr_scale = farne_pyr_scale, \ | |
| levels = farne_levels, \ | |
| winsize = farne_winsize, \ | |
| iterations = farne_iterations, \ | |
| poly_n = farne_poly_n, \ | |
| poly_sigma = farne_poly_sigma, \ | |
| flags = farne_flags, \ | |
| flow = None \ | |
| ) | |
| flowx = flow[...,0] | |
| flowy = flow[...,1] | |
| else: | |
| flowx = np.zeros(I8U.shape) | |
| flowy = np.zeros(I8U.shape) | |
| # create Gaussian pyramids | |
| dst_x = self.FMGaussianPyrCSD(flowx) | |
| dst_y = self.FMGaussianPyrCSD(flowy) | |
| # update the current frame | |
| self.prev_frame = np.uint8(I8U) | |
| # return | |
| return dst_x, dst_y | |
| # conspicuity maps | |
| ## standard range normalization | |
| def SMRangeNormalize(self, src): | |
| minn, maxx, dummy1, dummy2 = cv2.minMaxLoc(src) | |
| if maxx!=minn: | |
| dst = src/(maxx-minn) + minn/(minn-maxx) | |
| else: | |
| dst = src - minn | |
| return dst | |
| ## computing an average of local maxima | |
| def SMAvgLocalMax(self, src): | |
| # size | |
| stepsize = pySaliencyMapDefs.default_step_local | |
| width = src.shape[1] | |
| height = src.shape[0] | |
| # find local maxima | |
| numlocal = 0 | |
| lmaxmean = 0 | |
| for y in range(0, height-stepsize, stepsize): | |
| for x in range(0, width-stepsize, stepsize): | |
| localimg = src[y:y+stepsize, x:x+stepsize] | |
| lmin, lmax, dummy1, dummy2 = cv2.minMaxLoc(localimg) | |
| lmaxmean += lmax | |
| numlocal += 1 | |
| # averaging over all the local regions (error checking for numlocal) | |
| if numlocal==0: | |
| return 0 | |
| else: | |
| return lmaxmean / numlocal | |
| ## normalization specific for the saliency map model | |
| def SMNormalization(self, src): | |
| dst = self.SMRangeNormalize(src) | |
| lmaxmean = self.SMAvgLocalMax(dst) | |
| normcoeff = (1-lmaxmean)*(1-lmaxmean) | |
| return dst * normcoeff | |
| ## normalizing feature maps | |
| def normalizeFeatureMaps(self, FM): | |
| NFM = list() | |
| for i in range(0,6): | |
| normalizedImage = self.SMNormalization(FM[i]) | |
| nownfm = cv2.resize(normalizedImage, (self.width, self.height), interpolation=cv2.INTER_LINEAR) | |
| NFM.append(nownfm) | |
| return NFM | |
| ## intensity conspicuity map | |
| def ICMGetCM(self, IFM): | |
| NIFM = self.normalizeFeatureMaps(IFM) | |
| ICM = sum(NIFM) | |
| return ICM | |
| ## color conspicuity map | |
| def CCMGetCM(self, CFM_RG, CFM_BY): | |
| # extracting a conspicuity map for every color opponent pair | |
| CCM_RG = self.ICMGetCM(CFM_RG) | |
| CCM_BY = self.ICMGetCM(CFM_BY) | |
| # merge | |
| CCM = CCM_RG + CCM_BY | |
| # return | |
| return CCM | |
| ## orientation conspicuity map | |
| def OCMGetCM(self, OFM): | |
| OCM = np.zeros((self.height, self.width)) | |
| for i in range (0,4): | |
| # slicing | |
| nowofm = OFM[i*6:(i+1)*6] # angle = i*45 | |
| # extracting a conspicuity map for every angle | |
| NOFM = self.ICMGetCM(nowofm) | |
| # normalize | |
| NOFM2 = self.SMNormalization(NOFM) | |
| # accumulate | |
| OCM += NOFM2 | |
| return OCM | |
| ## motion conspicuity map | |
| def MCMGetCM(self, MFM_X, MFM_Y): | |
| return self.CCMGetCM(MFM_X, MFM_Y) | |
| # core | |
| def SMGetSM(self, src): | |
| # definitions | |
| size = src.shape | |
| width = size[1] | |
| height = size[0] | |
| # check | |
| # if(width != self.width or height != self.height): | |
| # sys.exit("size mismatch") | |
| # extracting individual color channels | |
| R, G, B, I = self.SMExtractRGBI(src) | |
| # extracting feature maps | |
| IFM = self.IFMGetFM(I) | |
| CFM_RG, CFM_BY = self.CFMGetFM(R, G, B) | |
| OFM = self.OFMGetFM(I) | |
| MFM_X, MFM_Y = self.MFMGetFM(I) | |
| # extracting conspicuity maps | |
| ICM = self.ICMGetCM(IFM) | |
| CCM = self.CCMGetCM(CFM_RG, CFM_BY) | |
| OCM = self.OCMGetCM(OFM) | |
| MCM = self.MCMGetCM(MFM_X, MFM_Y) | |
| # adding all the conspicuity maps to form a saliency map | |
| wi = pySaliencyMapDefs.weight_intensity | |
| wc = pySaliencyMapDefs.weight_color | |
| wo = pySaliencyMapDefs.weight_orientation | |
| wm = pySaliencyMapDefs.weight_motion | |
| SMMat = wi*ICM + wc*CCM + wo*OCM + wm*MCM | |
| # normalize | |
| normalizedSM = self.SMRangeNormalize(SMMat) | |
| normalizedSM2 = normalizedSM.astype(np.float32) | |
| smoothedSM = cv2.bilateralFilter(normalizedSM2, 7, 3, 1.55) | |
| self.SM = cv2.resize(smoothedSM, (width,height), interpolation=cv2.INTER_NEAREST) | |
| # return | |
| return self.SM | |
| def SMGetBinarizedSM(self, src): | |
| # get a saliency map | |
| if self.SM is None: | |
| self.SM = self.SMGetSM(src) | |
| # convert scale | |
| SM_I8U = np.uint8(255 * self.SM) | |
| # binarize | |
| thresh, binarized_SM = cv2.threshold(SM_I8U, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU) | |
| return binarized_SM | |
| def SMGetSalientRegion(self, src): | |
| # get a binarized saliency map | |
| binarized_SM = self.SMGetBinarizedSM(src) | |
| # GrabCut | |
| img = src.copy() | |
| mask = np.where((binarized_SM!=0), cv2.GC_PR_FGD, cv2.GC_PR_BGD).astype('uint8') | |
| bgdmodel = np.zeros((1,65),np.float64) | |
| fgdmodel = np.zeros((1,65),np.float64) | |
| rect = (0,0,1,1) # dummy | |
| iterCount = 1 | |
| cv2.grabCut(img, mask=mask, rect=rect, bgdModel=bgdmodel, fgdModel=fgdmodel, iterCount=iterCount, mode=cv2.GC_INIT_WITH_MASK) | |
| # post-processing | |
| mask_out = np.where((mask==cv2.GC_FGD) + (mask==cv2.GC_PR_FGD), 255, 0).astype('uint8') | |
| output = cv2.bitwise_and(img,img,mask=mask_out) | |
| return output | |