UvmPinAsync / workloads /realworld /async /backprop /backprop_cuda_kernel.cu.bp

lrh12580

first commit

5cb6c4b about 2 years ago

2.29 kB



	#ifndef _BACKPROP_CUDA_KERNEL_H_
	#define _BACKPROP_CUDA_KERNEL_H_

	#include <stdio.h>
	#include "backprop.h"
	#include "math.h"
	#include "cuda.h"


	__global__ void
	bpnn_layerforward_CUDA(float *input_cuda,
	float *output_hidden_cuda,
	float *input_hidden_cuda,
	float *hidden_partial_sum,
	int in,
	int hid)
	{
	int by = blockIdx.y;
	int tx = threadIdx.x;
	int ty = threadIdx.y;

	int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;

	int index_in = HEIGHT * by + ty + 1;

	__shared__ float input_node[HEIGHT];
	__shared__ float weight_matrix[HEIGHT][WIDTH];


	if ( tx == 0 )
	input_node[ty] = input_cuda[index_in] ;

	__syncthreads();

	weight_matrix[ty][tx] = input_hidden_cuda[index];

	__syncthreads();

	weight_matrix[ty][tx] = weight_matrix[ty][tx] * input_node[ty];

	__syncthreads();

	for ( int i = 1 ; i <= __log2f(HEIGHT) ; i++){

	int power_two = __powf(2, i);

	if( ty % power_two == 0 )
	weight_matrix[ty][tx] = weight_matrix[ty][tx] + weight_matrix[ty + power_two/2][tx];

	__syncthreads();

	}

	input_hidden_cuda[index] = weight_matrix[ty][tx];

	__syncthreads();

	if ( tx == 0 ) {
	hidden_partial_sum[by * hid + ty] = weight_matrix[tx][ty];
	}

	}


	__global__ void bpnn_adjust_weights_cuda(float * delta,
	int hid,
	float * ly,
	int in,
	float * w,
	float * oldw)
	{


	int by = blockIdx.y;

	int tx = threadIdx.x;
	int ty = threadIdx.y;

	int index = ( hid + 1 ) * HEIGHT * by + ( hid + 1 ) * ty + tx + 1 + ( hid + 1 ) ;
	int index_y = HEIGHT * by + ty + 1;
	int index_x = tx + 1;
	//eta = 0.3;
	//momentum = 0.3;

	w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));
	oldw[index] = ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index]));


	__syncthreads();

	if (ty == 0 && by ==0){
	w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
	oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x]));
	}


	}
	#endif