require 'torch' require 'image' require 'paths' require 'nn' require 'inn' require 'image' require 'xlua' require 'cudnn' require 'loadcaffe' local gp = require 'gpath' local nn_utils = {} nn_utils.mean = torch.Tensor({129.67, 114.43, 107.26}) nn_utils.nmean = nn_utils.mean:clone():div(255) ------------------------------------------------------------------- -- generate an image of a particular size with values scaled in [0, 1] and -- then mean substracted function nn_utils.normalize(im, width, height, nmean) nmean = nmean or nn_utils.nmean assert((#im)[1] == (#nmean)[1]) -- scale the image local normalized = image.scale(im, width, height) -- normalize it to [0, 1] if normalized:max() > 1 then normalized:div(255) end -- mean subtraction for i = 1, (#nmean)[1] do normalized[i]:csub(nmean[i]) end return normalized end -- add mean value back function nn_utils.unnormalize(im, nmean) nmean = nmean or nn_utils.nmean assert((#im)[1] == (#nmean)[1]) local unnorm = im:clone() for i = 1, (#nmean)[1] do unnorm[i]:add(nmean[i]) end return unnorm end function nn_utils.loadNormalizeIm(imName, numChn, width, height) numChn = numChn or 3 local im = image.load(imName) if im:size(1) == 1 and numChn == 3 then im = torch.repeatTensor(im, 3, 1, 1) end -- normalize image im = nn_utils.normalize(im, width, height, mean) return im end function nn_utils.toCaffeInput(input, fullScale, swapChn, width, height, mean) assert(input:dim() == 4 and input:size(2) == 3 or input:dim() == 3 and input:size(1) == 3) fullScale = fullScale or true swapChn = swapChn or true width = width or 227 height = height or 227 mean = mean or nn_utils.mean if input:dim() == 4 then local bs = input:size(1) local ch = input:size(2) local ht = input:size(3) local wd = input:size(4) input = image.scale(input:view(bs * ch, ht, wd), width, height):view(bs, ch, height, width) else local ch = input:size(1) local ht = input:size(2) local wd = input:size(3) input = image.scale(input, width, height) end local maxV = input:max() local minV = input:min() if fullScale then if math.abs(maxV) <= 1 and math.abs(minV) <= 1 then input:mul(255) end if maxV >= 0.5 and minV >= 0 then if input:dim() == 4 then for i = 1, 3 do input[{{}, i, {}, {}}]:csub(mean[i]) end else for i = 1, 3 do input[{i, {}, {}}]:csub(mean[i]) end end end else if math.abs(maxV) > 1 or math.abs(minV) > 1 then input:div(255) end if maxV >= 0.5 and minV >= 0 then if input:dim() == 4 then for i = 1, 3 do input[{{}, i, {}, {}}]:csub(mean[i]/255) end else for i = 1, 3 do input[{i, {}, {}}]:csub(mean[i]/255) end end end end if swapChn then if input:dim() == 4 then local tmp = input[{{}, 1, {}, {}}]:clone() input[{{}, 1, {}, {}}] = input[{{}, 3, {}, {}}] input[{{}, 3, {}, {}}] = tmp else local tmp = input[{1, {}, {}}]:clone() input[{1, {}, {}}] = input[{3, {}, {}}] input[{3, {}, {}}] = tmp end end return input end ----------------------------------------------------------------------------- -- function nn_utils.loadLeNet(net) net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'lenet') return loadcaffe.load(paths.concat(modelPath, 'lenet.prototxt'), paths.concat(modelPath, 'lenet_iter_10000.caffemodel'), net) end function nn_utils.loadAlexNet(net) net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'bvlc_alexnet') return loadcaffe.load(paths.concat(modelPath, 'deploy.prototxt'), paths.concat(modelPath, 'bvlc_alexnet.caffemodel'), net) end -- not working function nn_utils.loadPlacesAlexNet(net) print('Warning: loadFasterRCNNZF is not working') local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'places205_alexnet') return loadcaffe.load(paths.concat(modelPath, 'places205CNN_deploy_torch.prototxt'), paths.concat(modelPath, 'places205CNN_iter_300000.caffemodel'), net) end -- not working function nn_utils.loadHybridAlexNet(net) print('Warning: loadFasterRCNNZF is not working') local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'hybrid_alexnet') return loadcaffe.load(paths.concat(modelPath, 'hybridCNN_deploy.prototxt'), paths.concat(modelPath, 'hybridCNN_iter_700000.caffemodel'), net) end function nn_utils.loadCaffeNet(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'bvlc_reference_caffenet') return loadcaffe.load(paths.concat(modelPath, 'deploy.prototxt'), paths.concat(modelPath, 'bvlc_reference_caffenet.caffemodel'), net) end function nn_utils.loadVGG16(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'vgg_16') return loadcaffe.load(paths.concat(modelPath, 'VGG_ILSVRC_16_layers_deploy.prototxt'), paths.concat(modelPath, 'VGG_ILSVRC_16_layers.caffemodel'), net) end function nn_utils.loadVGG19(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'vgg_19') return loadcaffe.load(paths.concat(modelPath, 'VGG_ILSVRC_19_layers_deploy.prototxt'), paths.concat(modelPath, 'VGG_ILSVRC_19_layers.caffemodel'), net) end function nn_utils.loadGoogleNet(net) local modelPath = paths.concat(gp.caffe_model, 'googlenet') return torch.load(paths.concat(modelPath, 'inceptionv3.net')) end function nn_utils.loadResNet18() local modelPath = paths.concat(gp.caffe_model, 'resnet_18') return torch.load(paths.concat(modelPath, 'resnet-18.t7')) end function nn_utils.loadResNet34() local modelPath = paths.concat(gp.caffe_model, 'resnet_34') return torch.load(paths.concat(modelPath, 'resnet-34.t7')) end function nn_utils.loadResNet50() local modelPath = paths.concat(gp.caffe_model, 'resnet_50') return torch.load(paths.concat(modelPath, 'resnet-50.t7')) end function nn_utils.loadResNet101() local modelPath = paths.concat(gp.caffe_model, 'resnet_101') return torch.load(paths.concat(modelPath, 'resnet-101.t7')) end function nn_utils.loadResNet152() local modelPath = paths.concat(gp.caffe_model, 'resnet_152') return torch.load(paths.concat(modelPath, 'resnet-152.t7')) end function nn_utils.loadResNet200() local modelPath = paths.concat(gp.caffe_model, 'resnet_200') return torch.load(paths.concat(modelPath, 'resnet-200.t7')) end function nn_utils.loadRCNN(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'bvlc_reference_rcnn_ilsvrc13') return loadcaffe.load(paths.concat(modelPath, 'deploy.prototxt'), paths.concat(modelPath, 'bvlc_reference_rcnn_ilsvrc13.caffemodel'), net) end function nn_utils.loadFastRCNNCaffeNet() local modelPath = paths.concat(gp.caffe_model, 'fastrcnn') return torch.load(paths.concat(modelPath, 'caffenet_fast_rcnn_iter_40000.t7')):unpack() end function nn_utils.loadFastRCNNVGG16() local modelPath = paths.concat(gp.caffe_model, 'fastrcnn') return torch.load(paths.concat(modelPath, 'vgg16_fast_rcnn_iter_40000.t7')):unpack() end function nn_utils.loadFCN32s(net) local modelPath = paths.concat(gp.caffe_model, 'fcn_32s_pascal') return torch.load(paths.concat(modelPath, 'fcn_32s_pascal.t7')) end function nn_utils.loadFCN32sRaw(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'fcn_32s_pascal') return loadcaffe.load(paths.concat(modelPath, 'fcn-32s-pascal-deploy.prototxt'), paths.concat(modelPath, 'fcn-32s-pascal.caffemodel'), net) end function nn_utils.loadNIN(net) local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'nin') return loadcaffe.load(paths.concat(modelPath, 'train_val.prototxt'), paths.concat(modelPath, 'nin_imagenet_conv.caffemodel'), net) end -- not working function nn_utils.loadFasterRCNNZF(net) print('Warning: loadFasterRCNNZF is not working') local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'faster_rcnn_VOC0712_ZF') return loadcaffe.load(paths.concat(modelPath, 'deploy.prototxt'), paths.concat(modelPath, 'ZF_faster_rcnn_final.caffemodel'), net) end -- not working function nn_utils.loadFasterRCNNVGG(net) print('Warning: loadFasterRCNNVGG is not working') local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'faster_rcnn_VOC0712_vgg_16layers') return loadcaffe.load(paths.concat(modelPath, 'deploy.prototxt'), paths.concat(modelPath, 'VGG16_faster_rcnn_final.caffemodel'), net) end -- not working function nn_utils.loadHED(net) print('Warning: loadHED is not working') local net = net or 'cudnn' local modelPath = paths.concat(gp.caffe_model, 'hed') return loadcaffe.load(paths.concat(modelPath, 'hed.prototxt'), paths.concat(modelPath, 'hed_bsds.caffemodel'), net) end ------------------------------------------------------------------------ function nn_utils.sanitize(net) local list = net:listModules() for nameL, val in ipairs(list) do for name, field in pairs(val) do if torch.type(field) == 'cdata' then val[name] = nil end if name == 'homeGradBuffers' then val[name] = nil end if name == 'input_gpu' then val['input_gpu'] = {} end if name == 'gradOutput_gpu' then val['gradOutput_gpu'] = {} end if name == 'gradInput_gpu' then val['gradInput_gpu'] = {} end --if (name == 'output' or name == 'gradInput' or -- name == 'fgradInput' or name == 'finput' or -- name == 'gradWeight' or name == 'gradBias') then if (name == 'output' or name == 'gradInput' or name == 'fgradInput' or name == 'finput') then if torch.type(field) == 'table' then val[name] = {} else val[name] = field.new() end end if name == 'buffer' or name == 'buffer2' or name == 'normalized' or name == 'centered' or name == 'addBuffer' then val[name] = nil end end end return net end ------------------------------------------------------------------------- function nn_utils.tensorDimsStr (A) if torch.isTensor(A) then local tmp = A:size(1) for iDim = 2,A:nDimension() do tmp = tmp .. ' x ' .. A:size(iDim) end return tmp else local tmp = 'Length ' .. #A .. ' Table\n' for i = 1, #A do tmp = tmp .. 'Table[' .. i ..']: ' .. nn_utils.tensorDimsStr(A[i]) .. '\n' end return tmp end end -- A multi-concat function. -- Replaces the 'concat' in torch, which can't deal with cuda tensors function nn_utils.concatTensors (tensors, outputDimension) local nTensors = table.getn(tensors) local sumOutputSizes = 0 for iTensor = 1,nTensors do sumOutputSizes = sumOutputSizes + tensors[iTensor]:size(outputDimension) end local outputSize = tensors[1]:size() outputSize[outputDimension] = sumOutputSizes -- We clone and then resize to make sure it's the right kind of tensor. -- TODO is there a better way to do this? local res = tensors[1]:clone() res:resize (outputSize) local curOutputOffset = 1 for iTensor = 1,nTensors do local accessor = {} for j = 1,outputSize:size() do accessor[j] = {} end local outputDimSize = tensors[iTensor]:size(outputDimension) accessor[outputDimension] = {curOutputOffset, curOutputOffset + outputDimSize - 1} res[accessor]:copy(tensors[iTensor]) curOutputOffset = curOutputOffset + outputDimSize end return res end function nn_utils.dumpNetwork (layer, inputData, prefix) prefix = prefix or '' local prefixExtension = " " local output local strLayer = tostring(layer) if (strLayer:sub(1,13) == 'nn.Sequential') then local nLayers = layer:size() print (prefix .. 'Layer type: nn.Sequential (' .. nLayers .. ')') print (prefix .. 'Input: ' .. nn_utils.tensorDimsStr(inputData)) local layerInput = inputData for iLayer = 1,nLayers do print (prefix .. 'Sequential layer ' .. iLayer) local curLayer = layer:get(iLayer) local res = nn_utils.dumpNetwork (curLayer, layerInput, prefix .. prefixExtension) layerInput = res end output = layerInput elseif (strLayer:sub(1,16) ~= "nn.ParallelTable" and strLayer:sub(1,11) == "nn.Parallel") then local nLayers = table.getn(layer.modules) print (prefix .. 'Layer type: nn.Parallel (' .. nLayers .. ')') local inputDimension = layer.inputDimension local outputDimension = layer.outputDimension print (prefix .. 'Split on ' .. inputDimension) print (prefix .. 'Input: ' .. nn_utils.tensorDimsStr(inputData)) local layerRes = {} local sumOutputSizes = 0 for iLayer = 1,nLayers do print (prefix .. 'Parallel layer ' .. iLayer) local curLayer = layer:get(iLayer) local curInput = inputData:select(inputDimension, iLayer) local res = nn_utils.dumpNetwork (curLayer, curInput, prefix .. prefixExtension) layerRes[iLayer] = res end output = nn_utils.concatTensors (layerRes, outputDimension) else print (prefix .. 'Layer type: ' .. strLayer) print (prefix .. 'Input: ' .. nn_utils.tensorDimsStr(inputData)) output = layer:forward(inputData) end if torch.isTensor(output) and output:ne(output):sum() > 0 then print( prefix .. '!!!!!!!!!!!!!!!!!!!!!!! Found NaN in output !!!!!!!!!!!!!!!!!!!!!!!') end print (prefix .. 'Output: ' .. nn_utils.tensorDimsStr(output)) return output end local function appendToPrefix (oldPrefix, newStuff) if (oldPrefix and oldPrefix ~= '') then return oldPrefix .. '_' .. newStuff; else return newStuff; end end -- Assumes that the data matrix is set up as -- level 1, channel 1 -- level 1, channel 2 -- ... -- level 2, channel 1 -- level 2, channel 2 -- ... function nn_utils.dumpIntermediateWeights (layer, inputData, pyramidLevelSizes, channelNames, outputImagesDir, filePrefix) local output local strLayer = tostring(layer) if (strLayer:sub(1,13) == 'nn.Sequential') then local nLayers = layer:size() local layerInput = inputData for iLayer = 1,nLayers do local curLayer = layer:get(iLayer) local newPrefix = appendToPrefix (filePrefix, 'layer' .. iLayer) local res = nn_utils.dumpIntermediateWeights (curLayer, layerInput, pyramidLevelSizes, channelNames, outputImagesDir, newPrefix) layerInput = res end output = layerInput elseif (strLayer:sub(1,11) == "nn.Parallel") then local nLayers = table.getn(layer.modules) local inputDimension = layer.inputDimension local outputDimension = layer.outputDimension local combinedRes; local nPyramidLevels = table.getn (pyramidLevelSizes) local nChannels = table.getn (channelNames) local layerRes = {} assert (nLayers == nPyramidLevels * nChannels) for iLevel = 1,nPyramidLevels do for jChannel = 1,nChannels do local iLayer = (iLevel-1) * nChannels + jChannel local curLayer = layer:get(iLayer) local curInput = inputData:select(inputDimension, iLayer) local newPrefix = appendToPrefix (filePrefix, 'level' .. iLevel .. '_' .. channelNames[jChannel]) local res = nn_utils.dumpIntermediateWeights (curLayer, curInput, pyramidLevelSizes, channelNames, outputImagesDir, newPrefix) layerRes[iLayer] = res end end output = nn_utils.concatTensors (layerRes, outputDimension) elseif (strLayer == "nn.SpatialConvolution" or strLayer == "nn.SpatialConvolutionMM") then -- For convolution layers, save out the weights and stuff: local nInputPlane = layer["nInputPlane"] local nOutputPlane = layer["nOutputPlane"] local kw = layer["kW"] local kh = layer["kH"] local weightOrig = layer["weight"] local w = torch.reshape (weightOrig, torch.LongStorage{nOutputPlane,nInputPlane,kw,kh}) local nChannels = table.getn (channelNames) -- Only do this for the first layer: if (w:size(2) == nChannels) then local filename = appendToPrefix (filePrefix, '_weights.png') image.save (paths.concat(outputImagesDir, filename), image.toDisplayTensor {input=w:select(2,1), padding=3}) end -- Only show the first 10 activations: local nActivationImages = math.min (nOutputPlane, 10) output = layer:forward(inputData) for iOutputPlane = 1,nActivationImages do local filename = appendToPrefix (filePrefix, '_activations_plane' .. iOutputPlane .. '.png') image.save (paths.concat (outputImagesDir, filename), image.toDisplayTensor {input=output[{{},iOutputPlane,{},{}}], padding=3}) end elseif (strLayer == 'nn.View') then output = layer:forward(inputData) if (output:nDimension() == 4 and output:size(2) == 1) then local filename = appendToPrefix (filePrefix, '_view.png') image.save (paths.concat (outputImagesDir, filename), image.toDisplayTensor {input=output[{{},1,{},{}}], padding=0}) end else output = layer:forward(inputData) end return output end function nn_utils.customLCN(inputs, kernel, threshold, thresval) assert (inputs:dim() == 4, "Input should be of the form nSamples x nChannels x width x height") local padH = math.floor(kernel:size(1)/2) local padW = padH -- normalize the kernel kernel:div(kernel:sum()) local meanestimator = nn.Sequential() meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(1), kernel:size(1), 1)) meanestimator:add(nn.SpatialConvolution(1, 1, 1, kernel:size(1), 1)) local stdestimator = nn.Sequential() stdestimator:add(nn.Square()) stdestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) stdestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(1), kernel:size(1), 1)) stdestimator:add(nn.SpatialConvolution(1, 1, 1, kernel:size(1))) stdestimator:add(nn.Sqrt()) for i = 1,1 do meanestimator.modules[2].weight[i]:copy(kernel) meanestimator.modules[3].weight[1][i]:copy(kernel) stdestimator.modules[3].weight[i]:copy(kernel) stdestimator.modules[4].weight[1][i]:copy(kernel) end meanestimator.modules[2].bias:zero() meanestimator.modules[3].bias:zero() stdestimator.modules[3].bias:zero() stdestimator.modules[4].bias:zero() -- Run the meanestimator on a bunch of ones to figure out the sum of the kenrel -- (This is pretty wasteful for large number of samples N of Nx1xKxK.) local coef = meanestimator:updateOutput(inputs.new():resizeAs(inputs):fill(1)) coef = coef:clone() -- Take the kernel weighted local sums local localSums = meanestimator:updateOutput(inputs) -- Divide by the response of the kernel on ones (effectively, dividing by the kernel sum) local adjustedSums = nn.CDivTable():updateOutput{localSums, coef} -- Subtract tout the kernel weigthed adjusted sums local meanSubtracted = nn.CSubTable():updateOutput{inputs, adjustedSums} -- Take the mean subtracted output and divide out the kernel weighted standard deviation local localStds = stdestimator:updateOutput(meanSubtracted) local adjustedStds = nn.CDivTable():updateOutput{localStds, coef} local thresholdedStds = nn.Threshold(threshold, thresval):updateOutput(adjustedStds) local outputs = nn.CDivTable():updateOutput{meanSubtracted, thresholdedStds} return outputs end function nn_utils.originalLCN(inputs, kernel, threshold, thresval) local normalization = nn.SpatialContrastiveNormalization(1, kernel, threshold, thresval) local outputs = inputs:clone() for i=1,inputs:size(1) do outputs[i] = normalization:forward(inputs[i]) xlua.progress(i, inputs:size(1)) end return outputs end function nn_utils.testLCN() local neighborhood = image.gaussian1D(7) local inputs = torch.randn(100, 1, 50, 50) local timer = torch.Timer() timer:reset() local originalOutputs = nn_utils.originalLCN(inputs, neighborhood, 1, 1) print('Original LCN took : ' .. timer:time().real .. ' seconds') timer:reset() local customOutputs = nn_utils.customLCN(inputs, neighborhood, 1, 1) print(' Custom LCN took : ' .. timer:time().real .. ' seconds') local norm = (customOutputs - originalOutputs):norm() print('Difference between original and custom LCN implementations : '..norm) end local function ConvInit(model, name) for k, v in pairs(model:findModules(name)) do local n = v.kW * v.kH * v.nOutputPlane v.weight:normal(0, math.sqrt(2 / n)) if cudnn.version >= 4000 then v.bias = nil v.gradBias = nil else v.bias:zero() end end end local function BNInit(model, name) for k, v in pairs(model:findModules(name)) do v.weight:fill(1) v.bias:zero() end end function nn_utils.init(model, opt) ConvInit(model, 'cudnn.SpatialConvolution') ConvInit(model, 'nn.SpatialConvolution') BNInit(model, 'fbnn.SpatialBatchNormalization') BNInit(model, 'cudnn.SpatialBatchNormalization') BNInit(model, 'nn.SpatialBatchNormalization') for k, v in pairs(model:findModules('nn.Linear')) do v.bias:zero() end end function nn_utils.cudnnize(model, opt) model:cuda() cudnn.convert(model, cudnn) if opt.cudnn == 'deterministic' then model:apply(function(m) if m.setMode then m:setMode(1,1,1) end end) end end return nn_utils