File size: 8,049 Bytes
7873319 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | /*
* Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
*
* NVIDIA CORPORATION and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA CORPORATION is strictly prohibited.
*/
/** @file tinyexr_wrapper.cpp
* @author Thomas Müller, NVIDIA
* @brief Wrapper around the tinyexr library, providing a simple interface
* to load and store EXR images.
*/
#include <neural-graphics-primitives/common.h>
#include <neural-graphics-primitives/common_device.cuh>
#include <neural-graphics-primitives/tinyexr_wrapper.h>
#include <tiny-cuda-nn/gpu_memory.h>
#ifdef __NVCC__
# ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
# pragma nv_diag_suppress 174
# pragma nv_diag_suppress 550
# else
# pragma diag_suppress 174
# pragma diag_suppress 550
# endif
#endif
#define TINYEXR_IMPLEMENTATION
#include <tinyexr/tinyexr.h>
using namespace tcnn;
NGP_NAMESPACE_BEGIN
template <typename T>
__global__ void interleave_and_cast_kernel(const uint32_t num_pixels, bool has_alpha, const T* __restrict__ in, __half* __restrict__ out, bool fix_pre_mult) {
const uint32_t i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= num_pixels) return;
__half rgba_out[4];
float alpha = has_alpha ? (float)in[3*num_pixels + i] : (float)1.0f;
float fix = fix_pre_mult ? alpha : 1.0f;
rgba_out[0] = (__half)(float(in[0*num_pixels + i]) * fix);
rgba_out[1] = (__half)(float(in[1*num_pixels + i]) * fix);
rgba_out[2] = (__half)(float(in[2*num_pixels + i]) * fix);
rgba_out[3] = (__half)alpha;
*((uint64_t*)&out[i*4]) = *((uint64_t*)&rgba_out[0]);
}
void save_exr(const float* data, int width, int height, int nChannels, int channelStride, const char* outfilename) {
EXRHeader header;
InitEXRHeader(&header);
EXRImage image;
InitEXRImage(&image);
image.num_channels = nChannels;
std::vector<std::vector<float>> images(nChannels);
std::vector<float*> image_ptr(nChannels);
for (int i = 0; i < nChannels; ++i) {
images[i].resize(width * height);
}
for (int i = 0; i < nChannels; ++i) {
image_ptr[i] = images[nChannels - i - 1].data();
}
for (size_t i = 0; i < (size_t)width * height; i++) {
for (int c = 0; c < nChannels; ++c) {
images[c][i] = data[channelStride*i+c];
}
}
image.images = (unsigned char**)image_ptr.data();
image.width = width;
image.height = height;
header.num_channels = nChannels;
header.channels = (EXRChannelInfo *)malloc(sizeof(EXRChannelInfo) * header.num_channels);
// Must be (A)BGR order, since most of EXR viewers expect this channel order.
strncpy(header.channels[0].name, "B", 255); header.channels[0].name[strlen("B")] = '\0';
if (nChannels > 1) {
strncpy(header.channels[1].name, "G", 255); header.channels[1].name[strlen("G")] = '\0';
}
if (nChannels > 2) {
strncpy(header.channels[2].name, "R", 255); header.channels[2].name[strlen("R")] = '\0';
}
if (nChannels > 3) {
strncpy(header.channels[3].name, "A", 255); header.channels[3].name[strlen("A")] = '\0';
}
header.pixel_types = (int *)malloc(sizeof(int) * header.num_channels);
header.requested_pixel_types = (int *)malloc(sizeof(int) * header.num_channels);
for (int i = 0; i < header.num_channels; i++) {
header.pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image
header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_HALF; // pixel type of output image to be stored in .EXR
}
const char* err = NULL; // or nullptr in C++11 or later.
int ret = SaveEXRImageToFile(&image, &header, outfilename, &err);
if (ret != TINYEXR_SUCCESS) {
std::string error_message = std::string("Failed to save EXR image: ") + err;
FreeEXRErrorMessage(err); // free's buffer for an error message
throw std::runtime_error(error_message);
}
tlog::info() << "Saved exr file: " << outfilename;
free(header.channels);
free(header.pixel_types);
free(header.requested_pixel_types);
}
void load_exr(float** data, int* width, int* height, const char* filename) {
const char* err = nullptr;
int ret = LoadEXR(data, width, height, filename, &err);
if (ret != TINYEXR_SUCCESS) {
if (err) {
std::string error_message = std::string("Failed to load EXR image: ") + err;
FreeEXRErrorMessage(err);
throw std::runtime_error(error_message);
} else {
throw std::runtime_error("Failed to load EXR image");
}
}
}
__half* load_exr_to_gpu(int* width, int* height, const char* filename, bool fix_premult) {
// 1. Read EXR version.
EXRVersion exr_version;
int ret = ParseEXRVersionFromFile(&exr_version, filename);
if (ret != 0) {
std::string error_message = std::string("Failed to parse EXR image version");
throw std::runtime_error(error_message);
}
if (exr_version.multipart) {
throw std::runtime_error("EXR file must be singlepart");
}
// 2. Read EXR header
EXRHeader exr_header;
InitEXRHeader(&exr_header);
const char* err = NULL; // or `nullptr` in C++11 or later.
ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, &err);
if (ret != 0) {
std::string error_message = std::string("Failed to parse EXR image header: ") + err;
FreeEXRErrorMessage(err); // free's buffer for an error message
throw std::runtime_error(error_message);
}
bool full_precision = exr_header.pixel_types[0] == TINYEXR_PIXELTYPE_FLOAT;
// Read FLOAT channel as HALF.
for (int i = 0; i < exr_header.num_channels; i++) {
bool local_fp = exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_FLOAT;
if (local_fp != full_precision) {
throw std::runtime_error("Can't handle EXR images with mixed channel types");
}
}
EXRImage exr_image;
InitEXRImage(&exr_image);
ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, &err);
if (ret != 0) {
std::string error_message = std::string("Failed to load EXR image: ") + err;
FreeEXRHeader(&exr_header);
FreeEXRErrorMessage(err); // free's buffer for an error message
throw std::runtime_error(error_message);
}
// 3. Access image data
// `exr_image.images` will be filled when EXR is scanline format.
// `exr_image.tiled` will be filled when EXR is tiled format.
*width = exr_image.width;
*height = exr_image.height;
size_t n_pixels = exr_image.width * exr_image.height;
size_t bytes_per_pixel = full_precision ? 4 : 2;
GPUMemory<uint8_t> tmp{n_pixels*4*bytes_per_pixel};
uint8_t* rawptr = nullptr;
CUDA_CHECK_THROW(cudaMalloc(&rawptr, n_pixels*4*bytes_per_pixel));
__half* result = (__half*)rawptr;
CUDA_CHECK_THROW(cudaMemset(tmp.data(), 0, bytes_per_pixel * n_pixels*4));
bool has_alpha = false;
for (int c = 0; c < exr_header.num_channels; c++) {
if (strcmp(exr_header.channels[c].name, "R") == 0) {
CUDA_CHECK_THROW(cudaMemcpy(tmp.data() + n_pixels*0*bytes_per_pixel, exr_image.images[c], bytes_per_pixel * n_pixels, cudaMemcpyHostToDevice));
} else if (strcmp(exr_header.channels[c].name, "G") == 0) {
CUDA_CHECK_THROW(cudaMemcpy(tmp.data() + n_pixels*1*bytes_per_pixel, exr_image.images[c], bytes_per_pixel * n_pixels, cudaMemcpyHostToDevice));
} else if (strcmp(exr_header.channels[c].name, "B") == 0) {
CUDA_CHECK_THROW(cudaMemcpy(tmp.data() + n_pixels*2*bytes_per_pixel, exr_image.images[c], bytes_per_pixel * n_pixels, cudaMemcpyHostToDevice));
} else if (strcmp(exr_header.channels[c].name, "A") == 0) {
has_alpha = true;
CUDA_CHECK_THROW(cudaMemcpy(tmp.data() + n_pixels*3*bytes_per_pixel, exr_image.images[c], bytes_per_pixel * n_pixels, cudaMemcpyHostToDevice));
}
}
if (full_precision) {
linear_kernel(interleave_and_cast_kernel<float>, 0, nullptr, n_pixels, has_alpha, (float*)tmp.data(), result, fix_premult);
} else {
linear_kernel(interleave_and_cast_kernel<__half>, 0, nullptr, n_pixels, has_alpha, (__half*)tmp.data(), result, fix_premult);
}
// 4. Free image data
FreeEXRImage(&exr_image);
FreeEXRHeader(&exr_header);
return result;
}
NGP_NAMESPACE_END
|