| | |
| | |
| | |
| |
|
| | #include <algorithm> |
| | #include <array> |
| | #include <cstddef> |
| | #include <memory> |
| | #include "common/assert.h" |
| | #include "common/color.h" |
| | #include "common/common_types.h" |
| | #include "common/microprofile.h" |
| | #include "common/vector_math.h" |
| | #include "core/core.h" |
| | #include "core/hle/service/cam/y2r_u.h" |
| | #include "core/hw/y2r.h" |
| | #include "core/memory.h" |
| |
|
| | namespace HW::Y2R { |
| |
|
| | using namespace Service::Y2R; |
| |
|
| | static const std::size_t MAX_TILES = 1024 / 8; |
| | static const std::size_t TILE_SIZE = 8 * 8; |
| | using ImageTile = std::array<u32, TILE_SIZE>; |
| |
|
| | |
| | template <InputFormat input_format> |
| | static void ConvertYUVToRGB(const u8* input_Y, const u8* input_U, const u8* input_V, |
| | ImageTile output[], unsigned int width, unsigned int height, |
| | const CoefficientSet& coefficients) { |
| |
|
| | for (unsigned int y = 0; y < height; ++y) { |
| | for (unsigned int x = 0; x < width; ++x) { |
| | s32 Y; |
| | s32 U; |
| | s32 V; |
| | if constexpr (input_format == InputFormat::YUV422_Indiv8 || |
| | input_format == InputFormat::YUV422_Indiv16) { |
| | Y = input_Y[y * width + x]; |
| | U = input_U[(y * width + x) / 2]; |
| | V = input_V[(y * width + x) / 2]; |
| | } else if constexpr (input_format == InputFormat::YUV420_Indiv8 || |
| | input_format == InputFormat::YUV420_Indiv16) { |
| | Y = input_Y[y * width + x]; |
| | U = input_U[((y / 2) * width + x) / 2]; |
| | V = input_V[((y / 2) * width + x) / 2]; |
| | } else if constexpr (input_format == InputFormat::YUYV422_Interleaved) { |
| | Y = input_Y[(y * width + x) * 2]; |
| | U = input_Y[(y * width + (x / 2) * 2) * 2 + 1]; |
| | V = input_Y[(y * width + (x / 2) * 2) * 2 + 3]; |
| | } else { |
| | UNREACHABLE_MSG("Unknown Y2R input format {}", input_format); |
| | return; |
| | } |
| |
|
| | |
| | auto& c = coefficients; |
| | s32 cY = c[0] * Y; |
| |
|
| | s32 r = cY + c[1] * V; |
| | s32 g = cY - c[2] * V - c[3] * U; |
| | s32 b = cY + c[4] * U; |
| |
|
| | const s32 rounding_offset = 0x18; |
| | r = (r >> 3) + c[5] + rounding_offset; |
| | g = (g >> 3) + c[6] + rounding_offset; |
| | b = (b >> 3) + c[7] + rounding_offset; |
| |
|
| | unsigned int tile = x / 8; |
| | unsigned int tile_x = x % 8; |
| | u32* out = &output[tile][y * 8 + tile_x]; |
| | *out = ((u32)std::clamp(r >> 5, 0, 0xFF) << 24) | |
| | ((u32)std::clamp(g >> 5, 0, 0xFF) << 16) | |
| | ((u32)std::clamp(b >> 5, 0, 0xFF) << 8); |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | template <std::size_t N> |
| | static void ReceiveData(Memory::MemorySystem& memory, u8* output, ConversionBuffer& buf, |
| | std::size_t amount_of_data) { |
| | const u8* input = memory.GetPointer(buf.address); |
| |
|
| | std::size_t output_unit = buf.transfer_unit / N; |
| | ASSERT(amount_of_data % output_unit == 0); |
| |
|
| | while (amount_of_data > 0) { |
| | for (std::size_t i = 0; i < output_unit; ++i) { |
| | output[i] = input[i * N]; |
| | } |
| |
|
| | output += output_unit; |
| | input += buf.transfer_unit + buf.gap; |
| |
|
| | buf.address += buf.transfer_unit + buf.gap; |
| | buf.image_size -= buf.transfer_unit; |
| | amount_of_data -= output_unit; |
| | } |
| | } |
| |
|
| | |
| | |
| | template <OutputFormat output_format> |
| | static void SendData(Memory::MemorySystem& memory, const u32* input, ConversionBuffer& buf, |
| | int amount_of_data, u8 alpha) { |
| |
|
| | u8* output = memory.GetPointer(buf.address); |
| |
|
| | while (amount_of_data > 0) { |
| | u8* unit_end = output + buf.transfer_unit; |
| | while (output < unit_end) { |
| | u32 color = *input++; |
| | Common::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha}; |
| |
|
| | if constexpr (output_format == OutputFormat::RGBA8) { |
| | Common::Color::EncodeRGBA8(col_vec, output); |
| | output += 4; |
| | } else if constexpr (output_format == OutputFormat::RGB8) { |
| | Common::Color::EncodeRGB8(col_vec, output); |
| | output += 3; |
| | } else if constexpr (output_format == OutputFormat::RGB5A1) { |
| | Common::Color::EncodeRGB5A1(col_vec, output); |
| | output += 2; |
| | } else if constexpr (output_format == OutputFormat::RGB565) { |
| | Common::Color::EncodeRGB565(col_vec, output); |
| | output += 2; |
| | } else { |
| | UNREACHABLE_MSG("Unknown Y2R output format {}", output_format); |
| | } |
| |
|
| | amount_of_data -= 1; |
| | } |
| |
|
| | output += buf.gap; |
| | buf.address += buf.transfer_unit + buf.gap; |
| | buf.image_size -= buf.transfer_unit; |
| | } |
| | } |
| |
|
| | static const u8 linear_lut[TILE_SIZE] = { |
| | |
| | 0, 1, 2, 3, 4, 5, 6, 7, |
| | 8, 9, 10, 11, 12, 13, 14, 15, |
| | 16, 17, 18, 19, 20, 21, 22, 23, |
| | 24, 25, 26, 27, 28, 29, 30, 31, |
| | 32, 33, 34, 35, 36, 37, 38, 39, |
| | 40, 41, 42, 43, 44, 45, 46, 47, |
| | 48, 49, 50, 51, 52, 53, 54, 55, |
| | 56, 57, 58, 59, 60, 61, 62, 63, |
| | |
| | }; |
| |
|
| | static const u8 morton_lut[TILE_SIZE] = { |
| | |
| | 0, 1, 4, 5, 16, 17, 20, 21, |
| | 2, 3, 6, 7, 18, 19, 22, 23, |
| | 8, 9, 12, 13, 24, 25, 28, 29, |
| | 10, 11, 14, 15, 26, 27, 30, 31, |
| | 32, 33, 36, 37, 48, 49, 52, 53, |
| | 34, 35, 38, 39, 50, 51, 54, 55, |
| | 40, 41, 44, 45, 56, 57, 60, 61, |
| | 42, 43, 46, 47, 58, 59, 62, 63, |
| | |
| | }; |
| |
|
| | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, |
| | const u8 out_map[64]) { |
| | for (int i = 0; i < height * 8; ++i) { |
| | output[out_map[i]] = input[i]; |
| | } |
| | } |
| |
|
| | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, |
| | const u8 out_map[64]) { |
| | int out_i = 0; |
| | for (int x = 0; x < 8; ++x) { |
| | for (int y = height - 1; y >= 0; --y) { |
| | output[out_map[out_i++]] = input[y * 8 + x]; |
| | } |
| | } |
| | } |
| |
|
| | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, |
| | const u8 out_map[64]) { |
| | int out_i = 0; |
| | for (int i = height * 8 - 1; i >= 0; --i) { |
| | output[out_map[out_i++]] = input[i]; |
| | } |
| | } |
| |
|
| | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, |
| | const u8 out_map[64]) { |
| | int out_i = 0; |
| | for (int x = 8 - 1; x >= 0; --x) { |
| | for (int y = 0; y < height; ++y) { |
| | output[out_map[out_i++]] = input[y * 8 + x]; |
| | } |
| | } |
| | } |
| |
|
| | static void WriteTileToOutput(u32* output, const ImageTile& tile, int height, int line_stride) { |
| | for (int y = 0; y < height; ++y) { |
| | for (int x = 0; x < 8; ++x) { |
| | output[y * line_stride + x] = tile[y * 8 + x]; |
| | } |
| | } |
| | } |
| |
|
| | MICROPROFILE_DEFINE(Y2R_PerformConversion, "Y2R", "PerformConversion", MP_RGB(185, 66, 245)); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void PerformConversion(Memory::MemorySystem& memory, ConversionConfiguration cvt) { |
| | MICROPROFILE_SCOPE(Y2R_PerformConversion); |
| |
|
| | ASSERT(cvt.input_line_width % 8 == 0); |
| | ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); |
| | |
| | std::size_t num_tiles = cvt.input_line_width / 8; |
| | ASSERT(num_tiles <= MAX_TILES); |
| |
|
| | |
| | std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); |
| | |
| | std::unique_ptr<ImageTile[]> tiles(new ImageTile[num_tiles]); |
| | ImageTile tmp_tile; |
| |
|
| | |
| | |
| | const u8* tile_remap = nullptr; |
| | switch (cvt.block_alignment) { |
| | case BlockAlignment::Linear: |
| | tile_remap = linear_lut; |
| | break; |
| | case BlockAlignment::Block8x8: |
| | tile_remap = morton_lut; |
| | break; |
| | } |
| |
|
| | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { |
| | unsigned int row_height = std::min(cvt.input_lines - y, 8u); |
| |
|
| | |
| | const std::size_t row_data_size = row_height * cvt.input_line_width; |
| |
|
| | u8* input_Y = data_buffer.get(); |
| | u8* input_U = input_Y + 8 * cvt.input_line_width; |
| | u8* input_V = input_U + 8 * cvt.input_line_width / 2; |
| |
|
| | switch (cvt.input_format) { |
| | case InputFormat::YUV422_Indiv8: |
| | ReceiveData<1>(memory, input_Y, cvt.src_Y, row_data_size); |
| | ReceiveData<1>(memory, input_U, cvt.src_U, row_data_size / 2); |
| | ReceiveData<1>(memory, input_V, cvt.src_V, row_data_size / 2); |
| | ConvertYUVToRGB<InputFormat::YUV422_Indiv8>(input_Y, input_U, input_V, tiles.get(), |
| | cvt.input_line_width, row_height, |
| | cvt.coefficients); |
| | break; |
| | case InputFormat::YUV420_Indiv8: |
| | ReceiveData<1>(memory, input_Y, cvt.src_Y, row_data_size); |
| | ReceiveData<1>(memory, input_U, cvt.src_U, row_data_size / 4); |
| | ReceiveData<1>(memory, input_V, cvt.src_V, row_data_size / 4); |
| | ConvertYUVToRGB<InputFormat::YUV420_Indiv8>(input_Y, input_U, input_V, tiles.get(), |
| | cvt.input_line_width, row_height, |
| | cvt.coefficients); |
| | break; |
| | case InputFormat::YUV422_Indiv16: |
| | ReceiveData<2>(memory, input_Y, cvt.src_Y, row_data_size); |
| | ReceiveData<2>(memory, input_U, cvt.src_U, row_data_size / 2); |
| | ReceiveData<2>(memory, input_V, cvt.src_V, row_data_size / 2); |
| | ConvertYUVToRGB<InputFormat::YUV422_Indiv16>(input_Y, input_U, input_V, tiles.get(), |
| | cvt.input_line_width, row_height, |
| | cvt.coefficients); |
| | break; |
| | case InputFormat::YUV420_Indiv16: |
| | ReceiveData<2>(memory, input_Y, cvt.src_Y, row_data_size); |
| | ReceiveData<2>(memory, input_U, cvt.src_U, row_data_size / 4); |
| | ReceiveData<2>(memory, input_V, cvt.src_V, row_data_size / 4); |
| | ConvertYUVToRGB<InputFormat::YUV420_Indiv16>(input_Y, input_U, input_V, tiles.get(), |
| | cvt.input_line_width, row_height, |
| | cvt.coefficients); |
| | break; |
| | case InputFormat::YUYV422_Interleaved: |
| | input_U = nullptr; |
| | input_V = nullptr; |
| | ReceiveData<1>(memory, input_Y, cvt.src_YUYV, row_data_size * 2); |
| | ConvertYUVToRGB<InputFormat::YUYV422_Interleaved>(input_Y, input_U, input_V, |
| | tiles.get(), cvt.input_line_width, |
| | row_height, cvt.coefficients); |
| | break; |
| | default: |
| | UNREACHABLE_MSG("Unknown Y2R input format {}", cvt.input_format); |
| | return; |
| | } |
| |
|
| | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); |
| |
|
| | for (std::size_t i = 0; i < num_tiles; ++i) { |
| | int image_strip_width = 0; |
| | int output_stride = 0; |
| |
|
| | switch (cvt.rotation) { |
| | case Rotation::None: |
| | RotateTile0(tiles[i], tmp_tile, row_height, tile_remap); |
| | image_strip_width = cvt.input_line_width; |
| | output_stride = 8; |
| | break; |
| | case Rotation::Clockwise_90: |
| | RotateTile90(tiles[i], tmp_tile, row_height, tile_remap); |
| | image_strip_width = 8; |
| | output_stride = 8 * row_height; |
| | break; |
| | case Rotation::Clockwise_180: |
| | |
| | |
| | RotateTile180(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); |
| | image_strip_width = cvt.input_line_width; |
| | output_stride = 8; |
| | break; |
| | case Rotation::Clockwise_270: |
| | RotateTile270(tiles[num_tiles - i - 1], tmp_tile, row_height, tile_remap); |
| | image_strip_width = 8; |
| | output_stride = 8 * row_height; |
| | break; |
| | } |
| |
|
| | switch (cvt.block_alignment) { |
| | case BlockAlignment::Linear: |
| | WriteTileToOutput(output_buffer, tmp_tile, row_height, image_strip_width); |
| | output_buffer += output_stride; |
| | break; |
| | case BlockAlignment::Block8x8: |
| | WriteTileToOutput(output_buffer, tmp_tile, 8, 8); |
| | output_buffer += TILE_SIZE; |
| | break; |
| | } |
| | } |
| |
|
| | switch (cvt.output_format) { |
| | case OutputFormat::RGBA8: |
| | SendData<OutputFormat::RGBA8>(memory, reinterpret_cast<u32*>(data_buffer.get()), |
| | cvt.dst, static_cast<int>(row_data_size), |
| | static_cast<u8>(cvt.alpha)); |
| | break; |
| | case OutputFormat::RGB8: |
| | SendData<OutputFormat::RGB8>(memory, reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, |
| | static_cast<int>(row_data_size), |
| | static_cast<u8>(cvt.alpha)); |
| | break; |
| | case OutputFormat::RGB5A1: |
| | SendData<OutputFormat::RGB5A1>(memory, reinterpret_cast<u32*>(data_buffer.get()), |
| | cvt.dst, static_cast<int>(row_data_size), |
| | static_cast<u8>(cvt.alpha)); |
| | break; |
| | case OutputFormat::RGB565: |
| | SendData<OutputFormat::RGB565>(memory, reinterpret_cast<u32*>(data_buffer.get()), |
| | cvt.dst, static_cast<int>(row_data_size), |
| | static_cast<u8>(cvt.alpha)); |
| | break; |
| | default: |
| | UNREACHABLE_MSG("Unknown Y2R output format {}", cvt.output_format); |
| | return; |
| | } |
| | } |
| | } |
| | } |
| |
|