| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #if defined(CUDA_SIFTGPU_ENABLED) |
| |
|
| | #include "GL/glew.h" |
| | #include <iostream> |
| | #include <vector> |
| | #include <algorithm> |
| | #include <stdlib.h> |
| | #include <math.h> |
| | using namespace std; |
| |
|
| |
|
| | #include <cuda.h> |
| | #include <cuda_runtime_api.h> |
| | #include <cuda_gl_interop.h> |
| |
|
| | #include "GlobalUtil.h" |
| | #include "GLTexImage.h" |
| | #include "CuTexImage.h" |
| | #include "ProgramCU.h" |
| |
|
| | #if CUDA_VERSION <= 2010 && defined(SIFTGPU_ENABLE_LINEAR_TEX2D) |
| | #error "Require CUDA 2.2 or higher" |
| | #endif |
| |
|
| |
|
| | CuTexImage::CuTexImage() |
| | { |
| | _cuData = NULL; |
| | _cuData2D = NULL; |
| | _fromPBO = 0; |
| | _numChannel = _numBytes = 0; |
| | _imgWidth = _imgHeight = _texWidth = _texHeight = 0; |
| | } |
| |
|
| | CuTexImage::CuTexImage(int width, int height, int nchannel, GLuint pbo) |
| | { |
| | _cuData = NULL; |
| |
|
| | |
| | GLint bsize, esize = width * height * nchannel * sizeof(float); |
| | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); |
| | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
| | if(bsize < esize) |
| | { |
| | glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize, NULL, GL_STATIC_DRAW_ARB); |
| | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
| | } |
| | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); |
| | if(bsize >=esize) |
| | { |
| |
|
| | cudaGLRegisterBufferObject(pbo); |
| | cudaGLMapBufferObject(&_cuData, pbo); |
| | ProgramCU::CheckErrorCUDA("cudaGLMapBufferObject"); |
| | _fromPBO = pbo; |
| | }else |
| | { |
| | _cuData = NULL; |
| | _fromPBO = 0; |
| | } |
| | if(_cuData) |
| | { |
| | _numBytes = bsize; |
| | _imgWidth = width; |
| | _imgHeight = height; |
| | _numChannel = nchannel; |
| | }else |
| | { |
| | _numBytes = 0; |
| | _imgWidth = 0; |
| | _imgHeight = 0; |
| | _numChannel = 0; |
| | } |
| |
|
| | _texWidth = _texHeight =0; |
| |
|
| | _cuData2D = NULL; |
| | } |
| |
|
| | CuTexImage::~CuTexImage() |
| | { |
| |
|
| |
|
| | if(_fromPBO) |
| | { |
| | cudaGLUnmapBufferObject(_fromPBO); |
| | cudaGLUnregisterBufferObject(_fromPBO); |
| | }else if(_cuData) |
| | { |
| | cudaFree(_cuData); |
| | } |
| | if(_cuData2D) cudaFreeArray(_cuData2D); |
| | } |
| |
|
| | void CuTexImage::SetImageSize(int width, int height) |
| | { |
| | _imgWidth = width; |
| | _imgHeight = height; |
| | } |
| |
|
| | bool CuTexImage::InitTexture(int width, int height, int nchannel) |
| | { |
| | _imgWidth = width; |
| | _imgHeight = height; |
| | _numChannel = min(max(nchannel, 1), 4); |
| |
|
| | const size_t size = width * height * _numChannel * sizeof(float); |
| |
|
| | if (size < 0) { |
| | return false; |
| | } |
| |
|
| | |
| | |
| | if (size >= INT_MAX * sizeof(float)) { |
| | return false; |
| | } |
| |
|
| | if(size <= _numBytes) return true; |
| |
|
| | if(_cuData) cudaFree(_cuData); |
| |
|
| | |
| | const cudaError_t status = cudaMalloc(&_cuData, _numBytes = size); |
| |
|
| | if (status != cudaSuccess) { |
| | _cuData = NULL; |
| | _numBytes = 0; |
| | return false; |
| | } |
| |
|
| | return true; |
| | } |
| |
|
| | void CuTexImage::CopyFromHost(const void * buf) |
| | { |
| | if(_cuData == NULL) return; |
| | cudaMemcpy( _cuData, buf, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyHostToDevice); |
| | } |
| |
|
| | void CuTexImage::CopyToHost(void * buf) |
| | { |
| | if(_cuData == NULL) return; |
| | cudaMemcpy(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost); |
| | } |
| |
|
| | void CuTexImage::CopyToHost(void * buf, int stream) |
| | { |
| | if(_cuData == NULL) return; |
| | cudaMemcpyAsync(buf, _cuData, _imgWidth * _imgHeight * _numChannel * sizeof(float), cudaMemcpyDeviceToHost, (cudaStream_t)stream); |
| | } |
| |
|
| | void CuTexImage::InitTexture2D() |
| | { |
| | #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D) |
| | if(_cuData2D && (_texWidth < _imgWidth || _texHeight < _imgHeight)) |
| | { |
| | cudaFreeArray(_cuData2D); |
| | _cuData2D = NULL; |
| | } |
| |
|
| | if(_cuData2D == NULL) |
| | { |
| | _texWidth = max(_texWidth, _imgWidth); |
| | _texHeight = max(_texHeight, _imgHeight); |
| | cudaChannelFormatDesc desc; |
| | desc.f = cudaChannelFormatKindFloat; |
| | desc.x = sizeof(float) * 8; |
| | desc.y = _numChannel >=2 ? sizeof(float) * 8 : 0; |
| | desc.z = _numChannel >=3 ? sizeof(float) * 8 : 0; |
| | desc.w = _numChannel >=4 ? sizeof(float) * 8 : 0; |
| | const cudaError_t status = cudaMallocArray(&_cuData2D, &desc, _texWidth, _texHeight); |
| |
|
| | if (status != cudaSuccess) { |
| | _cuData = NULL; |
| | _numBytes = 0; |
| | } |
| |
|
| | ProgramCU::CheckErrorCUDA("CuTexImage::InitTexture2D"); |
| | } |
| | #endif |
| | } |
| |
|
| | void CuTexImage::CopyToTexture2D() |
| | { |
| | #if !defined(SIFTGPU_ENABLE_LINEAR_TEX2D) |
| | InitTexture2D(); |
| |
|
| | if(_cuData2D) |
| | { |
| | cudaMemcpy2DToArray(_cuData2D, 0, 0, _cuData, _imgWidth* _numChannel* sizeof(float) , |
| | _imgWidth * _numChannel*sizeof(float), _imgHeight, cudaMemcpyDeviceToDevice); |
| | ProgramCU::CheckErrorCUDA("cudaMemcpy2DToArray"); |
| | } |
| | #endif |
| |
|
| | } |
| |
|
| | int CuTexImage::DebugCopyToTexture2D() |
| | { |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | return 1; |
| | } |
| |
|
| |
|
| |
|
| | void CuTexImage::CopyFromPBO(int width, int height, GLuint pbo) |
| | { |
| | void* pbuf =NULL; |
| | GLint esize = width * height * sizeof(float); |
| | cudaGLRegisterBufferObject(pbo); |
| | cudaGLMapBufferObject(&pbuf, pbo); |
| |
|
| | cudaMemcpy(_cuData, pbuf, esize, cudaMemcpyDeviceToDevice); |
| |
|
| | cudaGLUnmapBufferObject(pbo); |
| | cudaGLUnregisterBufferObject(pbo); |
| | } |
| |
|
| | int CuTexImage::CopyToPBO(GLuint pbo) |
| | { |
| | void* pbuf =NULL; |
| | GLint bsize, esize = _imgWidth * _imgHeight * sizeof(float) * _numChannel; |
| | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo); |
| | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
| | if(bsize < esize) |
| | { |
| | glBufferData(GL_PIXEL_PACK_BUFFER_ARB, esize*3/2, NULL, GL_STATIC_DRAW_ARB); |
| | glGetBufferParameteriv(GL_PIXEL_PACK_BUFFER_ARB, GL_BUFFER_SIZE, &bsize); |
| | } |
| | glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); |
| |
|
| | if(bsize >= esize) |
| | { |
| | cudaGLRegisterBufferObject(pbo); |
| | cudaGLMapBufferObject(&pbuf, pbo); |
| | cudaMemcpy(pbuf, _cuData, esize, cudaMemcpyDeviceToDevice); |
| | cudaGLUnmapBufferObject(pbo); |
| | cudaGLUnregisterBufferObject(pbo); |
| | return 1; |
| | }else |
| | { |
| | return 0; |
| | } |
| | } |
| |
|
| | #endif |
| |
|
| |
|