| | |
| | |
| |
|
| | #include <algorithm> |
| | #include <memory> |
| | #include <vector> |
| | #include <queue> |
| |
|
| | #include "common/settings.h" |
| | #include "shader_recompiler/exception.h" |
| | #include "shader_recompiler/frontend/ir/basic_block.h" |
| | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| | #include "shader_recompiler/frontend/ir/post_order.h" |
| | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" |
| | #include "shader_recompiler/frontend/maxwell/translate/translate.h" |
| | #include "shader_recompiler/frontend/maxwell/translate_program.h" |
| | #include "shader_recompiler/host_translate_info.h" |
| | #include "shader_recompiler/ir_opt/passes.h" |
| |
|
| | namespace Shader::Maxwell { |
| | namespace { |
| | IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { |
| | size_t num_syntax_blocks{}; |
| | for (const auto& node : syntax_list) { |
| | if (node.type == IR::AbstractSyntaxNode::Type::Block) { |
| | ++num_syntax_blocks; |
| | } |
| | } |
| | IR::BlockList blocks; |
| | blocks.reserve(num_syntax_blocks); |
| | u32 order_index{}; |
| | for (const auto& node : syntax_list) { |
| | if (node.type == IR::AbstractSyntaxNode::Type::Block) { |
| | blocks.push_back(node.data.block); |
| | blocks.back()->SetOrder(order_index++); |
| | } |
| | } |
| | return blocks; |
| | } |
| |
|
| | void RemoveUnreachableBlocks(IR::Program& program) { |
| | |
| | |
| | if (program.blocks.size() == program.post_order_blocks.size()) { |
| | return; |
| | } |
| | const auto begin{program.blocks.begin() + 1}; |
| | const auto end{program.blocks.end()}; |
| | const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; |
| | program.blocks.erase(std::remove_if(begin, end, pred), end); |
| | } |
| |
|
| | void CollectInterpolationInfo(Environment& env, IR::Program& program) { |
| | if (program.stage != Stage::Fragment) { |
| | return; |
| | } |
| | const ProgramHeader& sph{env.SPH()}; |
| | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| | std::optional<PixelImap> imap; |
| | for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { |
| | if (value == PixelImap::Unused) { |
| | continue; |
| | } |
| | if (imap && imap != value) { |
| | throw NotImplementedException("Per component interpolation"); |
| | } |
| | imap = value; |
| | } |
| | if (!imap) { |
| | continue; |
| | } |
| | program.info.interpolation[index] = [&] { |
| | switch (*imap) { |
| | case PixelImap::Unused: |
| | case PixelImap::Perspective: |
| | return Interpolation::Smooth; |
| | case PixelImap::Constant: |
| | return Interpolation::Flat; |
| | case PixelImap::ScreenLinear: |
| | return Interpolation::NoPerspective; |
| | } |
| | throw NotImplementedException("Unknown interpolation {}", *imap); |
| | }(); |
| | } |
| | } |
| |
|
| | void AddNVNStorageBuffers(IR::Program& program) { |
| | if (!program.info.uses_global_memory) { |
| | return; |
| | } |
| | const u32 driver_cbuf{0}; |
| | const u32 descriptor_size{0x10}; |
| | const u32 num_buffers{16}; |
| | const u32 base{[&] { |
| | switch (program.stage) { |
| | case Stage::VertexA: |
| | case Stage::VertexB: |
| | return 0x110u; |
| | case Stage::TessellationControl: |
| | return 0x210u; |
| | case Stage::TessellationEval: |
| | return 0x310u; |
| | case Stage::Geometry: |
| | return 0x410u; |
| | case Stage::Fragment: |
| | return 0x510u; |
| | case Stage::Compute: |
| | return 0x310u; |
| | } |
| | throw InvalidArgument("Invalid stage {}", program.stage); |
| | }()}; |
| | auto& descs{program.info.storage_buffers_descriptors}; |
| | for (u32 index = 0; index < num_buffers; ++index) { |
| | if (!program.info.nvn_buffer_used[index]) { |
| | continue; |
| | } |
| | const u32 offset{base + index * descriptor_size}; |
| | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; |
| | if (it != descs.end()) { |
| | it->is_written |= program.info.stores_global_memory; |
| | continue; |
| | } |
| | descs.push_back({ |
| | .cbuf_index = driver_cbuf, |
| | .cbuf_offset = offset, |
| | .count = 1, |
| | .is_written = program.info.stores_global_memory, |
| | }); |
| | } |
| | } |
| |
|
| | bool IsLegacyAttribute(IR::Attribute attribute) { |
| | return (attribute >= IR::Attribute::ColorFrontDiffuseR && |
| | attribute <= IR::Attribute::ColorBackSpecularA) || |
| | attribute == IR::Attribute::FogCoordinate || |
| | (attribute >= IR::Attribute::FixedFncTexture0S && |
| | attribute <= IR::Attribute::FixedFncTexture9Q); |
| | } |
| |
|
| | std::map<IR::Attribute, IR::Attribute> GenerateLegacyToGenericMappings( |
| | const VaryingState& state, std::queue<IR::Attribute> unused_generics, |
| | const std::map<IR::Attribute, IR::Attribute>& previous_stage_mapping) { |
| | std::map<IR::Attribute, IR::Attribute> mapping; |
| | auto update_mapping = [&mapping, &unused_generics, previous_stage_mapping](IR::Attribute attr, |
| | size_t count) { |
| | if (previous_stage_mapping.find(attr) != previous_stage_mapping.end()) { |
| | for (size_t i = 0; i < count; ++i) { |
| | mapping.insert({attr + i, previous_stage_mapping.at(attr + i)}); |
| | } |
| | } else { |
| | for (size_t i = 0; i < count; ++i) { |
| | mapping.insert({attr + i, unused_generics.front() + i}); |
| | } |
| | unused_generics.pop(); |
| | } |
| | }; |
| | for (size_t index = 0; index < 4; ++index) { |
| | auto attr = IR::Attribute::ColorFrontDiffuseR + index * 4; |
| | if (state.AnyComponent(attr)) { |
| | update_mapping(attr, 4); |
| | } |
| | } |
| | if (state[IR::Attribute::FogCoordinate]) { |
| | update_mapping(IR::Attribute::FogCoordinate, 1); |
| | } |
| | for (size_t index = 0; index < IR::NUM_FIXEDFNCTEXTURE; ++index) { |
| | auto attr = IR::Attribute::FixedFncTexture0S + index * 4; |
| | if (state.AnyComponent(attr)) { |
| | update_mapping(attr, 4); |
| | } |
| | } |
| | return mapping; |
| | } |
| |
|
| | void EmitGeometryPassthrough(IR::IREmitter& ir, const IR::Program& program, |
| | const Shader::VaryingState& passthrough_mask, |
| | bool passthrough_position, |
| | std::optional<IR::Attribute> passthrough_layer_attr) { |
| | for (u32 i = 0; i < program.output_vertices; i++) { |
| | |
| | for (u32 j = 0; j < 32; j++) { |
| | if (!passthrough_mask.Generic(j)) { |
| | continue; |
| | } |
| |
|
| | const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); |
| | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); |
| | } |
| |
|
| | if (passthrough_position) { |
| | |
| | const IR::Attribute attr = IR::Attribute::PositionX; |
| | ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); |
| | ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); |
| | } |
| |
|
| | if (passthrough_layer_attr) { |
| | |
| | ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(*passthrough_layer_attr), |
| | ir.Imm32(0)); |
| | } |
| |
|
| | |
| | ir.EmitVertex(ir.Imm32(0)); |
| | } |
| | ir.EndPrimitive(ir.Imm32(0)); |
| | } |
| |
|
| | u32 GetOutputTopologyVertices(OutputTopology output_topology) { |
| | switch (output_topology) { |
| | case OutputTopology::PointList: |
| | return 1; |
| | case OutputTopology::LineStrip: |
| | return 2; |
| | default: |
| | return 3; |
| | } |
| | } |
| |
|
| | void LowerGeometryPassthrough(const IR::Program& program, const HostTranslateInfo& host_info) { |
| | for (IR::Block* const block : program.blocks) { |
| | for (IR::Inst& inst : block->Instructions()) { |
| | if (inst.GetOpcode() == IR::Opcode::Epilogue) { |
| | IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| | EmitGeometryPassthrough( |
| | ir, program, program.info.passthrough, |
| | program.info.passthrough.AnyComponent(IR::Attribute::PositionX), {}); |
| | } |
| | } |
| | } |
| | } |
| |
|
| | } |
| |
|
| | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { |
| | IR::Program program; |
| | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); |
| | program.blocks = GenerateBlocks(program.syntax_list); |
| | program.post_order_blocks = PostOrder(program.syntax_list.front()); |
| | program.stage = env.ShaderStage(); |
| | program.local_memory_size = env.LocalMemorySize(); |
| | switch (program.stage) { |
| | case Stage::TessellationControl: { |
| | const ProgramHeader& sph{env.SPH()}; |
| | program.invocations = sph.common2.threads_per_input_primitive; |
| | break; |
| | } |
| | case Stage::Geometry: { |
| | const ProgramHeader& sph{env.SPH()}; |
| | program.output_topology = sph.common3.output_topology; |
| | program.output_vertices = sph.common4.max_output_vertices; |
| | program.invocations = sph.common2.threads_per_input_primitive; |
| | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; |
| | if (program.is_geometry_passthrough) { |
| | const auto& mask{env.GpPassthroughMask()}; |
| | for (size_t i = 0; i < mask.size() * 32; ++i) { |
| | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; |
| | } |
| |
|
| | if (!host_info.support_geometry_shader_passthrough) { |
| | program.output_vertices = GetOutputTopologyVertices(program.output_topology); |
| | LowerGeometryPassthrough(program, host_info); |
| | } |
| | } |
| | break; |
| | } |
| | case Stage::Compute: |
| | program.workgroup_size = env.WorkgroupSize(); |
| | program.shared_memory_size = env.SharedMemorySize(); |
| | break; |
| | default: |
| | break; |
| | } |
| | RemoveUnreachableBlocks(program); |
| |
|
| | |
| | if (!host_info.support_float64) { |
| | Optimization::LowerFp64ToFp32(program); |
| | } |
| | if (!host_info.support_float16) { |
| | Optimization::LowerFp16ToFp32(program); |
| | } |
| | if (!host_info.support_int64) { |
| | Optimization::LowerInt64ToInt32(program); |
| | } |
| | if (!host_info.support_conditional_barrier) { |
| | Optimization::ConditionalBarrierPass(program); |
| | } |
| | Optimization::SsaRewritePass(program); |
| |
|
| | Optimization::ConstantPropagationPass(env, program); |
| |
|
| | Optimization::PositionPass(env, program); |
| |
|
| | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| | Optimization::TexturePass(env, program, host_info); |
| |
|
| | if (Settings::values.resolution_info.active) { |
| | Optimization::RescalingPass(program); |
| | } |
| | Optimization::DeadCodeEliminationPass(program); |
| | if (Settings::values.renderer_debug) { |
| | Optimization::VerificationPass(program); |
| | } |
| | Optimization::CollectShaderInfoPass(env, program); |
| | Optimization::LayerPass(program, host_info); |
| | Optimization::VendorWorkaroundPass(program); |
| |
|
| | CollectInterpolationInfo(env, program); |
| | AddNVNStorageBuffers(program); |
| | return program; |
| | } |
| |
|
| | IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, |
| | Environment& env_vertex_b) { |
| | IR::Program result{}; |
| | Optimization::VertexATransformPass(vertex_a); |
| | Optimization::VertexBTransformPass(vertex_b); |
| | for (const auto& term : vertex_a.syntax_list) { |
| | if (term.type != IR::AbstractSyntaxNode::Type::Return) { |
| | result.syntax_list.push_back(term); |
| | } |
| | } |
| | result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), |
| | vertex_b.syntax_list.end()); |
| | result.blocks = GenerateBlocks(result.syntax_list); |
| | result.post_order_blocks = vertex_b.post_order_blocks; |
| | for (const auto& block : vertex_a.post_order_blocks) { |
| | result.post_order_blocks.push_back(block); |
| | } |
| | result.stage = Stage::VertexB; |
| | result.info = vertex_a.info; |
| | result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); |
| | result.info.loads.mask |= vertex_b.info.loads.mask; |
| | result.info.stores.mask |= vertex_b.info.stores.mask; |
| |
|
| | Optimization::JoinTextureInfo(result.info, vertex_b.info); |
| | Optimization::JoinStorageInfo(result.info, vertex_b.info); |
| | Optimization::DeadCodeEliminationPass(result); |
| | if (Settings::values.renderer_debug) { |
| | Optimization::VerificationPass(result); |
| | } |
| | Optimization::CollectShaderInfoPass(env_vertex_b, result); |
| | return result; |
| | } |
| |
|
| | void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& runtime_info) { |
| | auto& stores = program.info.stores; |
| | if (stores.Legacy()) { |
| | std::queue<IR::Attribute> unused_output_generics{}; |
| | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| | if (!stores.Generic(index)) { |
| | unused_output_generics.push(IR::Attribute::Generic0X + index * 4); |
| | } |
| | } |
| | program.info.legacy_stores_mapping = |
| | GenerateLegacyToGenericMappings(stores, unused_output_generics, {}); |
| | for (IR::Block* const block : program.post_order_blocks) { |
| | for (IR::Inst& inst : block->Instructions()) { |
| | switch (inst.GetOpcode()) { |
| | case IR::Opcode::SetAttribute: { |
| | const auto attr = inst.Arg(0).Attribute(); |
| | if (IsLegacyAttribute(attr)) { |
| | stores.Set(program.info.legacy_stores_mapping[attr], true); |
| | inst.SetArg(0, Shader::IR::Value(program.info.legacy_stores_mapping[attr])); |
| | } |
| | break; |
| | } |
| | default: |
| | break; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | auto& loads = program.info.loads; |
| | if (loads.Legacy()) { |
| | std::queue<IR::Attribute> unused_input_generics{}; |
| | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| | const AttributeType input_type{runtime_info.generic_input_types[index]}; |
| | if (!runtime_info.previous_stage_stores.Generic(index) || !loads.Generic(index) || |
| | input_type == AttributeType::Disabled) { |
| | unused_input_generics.push(IR::Attribute::Generic0X + index * 4); |
| | } |
| | } |
| | auto mappings = GenerateLegacyToGenericMappings( |
| | loads, unused_input_generics, runtime_info.previous_stage_legacy_stores_mapping); |
| | for (IR::Block* const block : program.post_order_blocks) { |
| | for (IR::Inst& inst : block->Instructions()) { |
| | switch (inst.GetOpcode()) { |
| | case IR::Opcode::GetAttribute: { |
| | const auto attr = inst.Arg(0).Attribute(); |
| | if (IsLegacyAttribute(attr)) { |
| | loads.Set(mappings[attr], true); |
| | inst.SetArg(0, Shader::IR::Value(mappings[attr])); |
| | } |
| | break; |
| | } |
| | default: |
| | break; |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, |
| | ObjectPool<IR::Block>& block_pool, |
| | const HostTranslateInfo& host_info, |
| | IR::Program& source_program, |
| | Shader::OutputTopology output_topology) { |
| | IR::Program program; |
| | program.stage = Stage::Geometry; |
| | program.output_topology = output_topology; |
| | program.output_vertices = GetOutputTopologyVertices(output_topology); |
| |
|
| | program.is_geometry_passthrough = false; |
| | program.info.loads.mask = source_program.info.stores.mask; |
| | program.info.stores.mask = source_program.info.stores.mask; |
| | program.info.stores.Set(IR::Attribute::Layer, true); |
| | program.info.stores.Set(source_program.info.emulated_layer, false); |
| |
|
| | IR::Block* current_block = block_pool.Create(inst_pool); |
| | auto& node{program.syntax_list.emplace_back()}; |
| | node.type = IR::AbstractSyntaxNode::Type::Block; |
| | node.data.block = current_block; |
| |
|
| | IR::IREmitter ir{*current_block}; |
| | EmitGeometryPassthrough(ir, program, program.info.stores, true, |
| | source_program.info.emulated_layer); |
| |
|
| | IR::Block* return_block{block_pool.Create(inst_pool)}; |
| | IR::IREmitter{*return_block}.Epilogue(); |
| | current_block->AddBranch(return_block); |
| |
|
| | auto& merge{program.syntax_list.emplace_back()}; |
| | merge.type = IR::AbstractSyntaxNode::Type::Block; |
| | merge.data.block = return_block; |
| | program.syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; |
| |
|
| | program.blocks = GenerateBlocks(program.syntax_list); |
| | program.post_order_blocks = PostOrder(program.syntax_list.front()); |
| | Optimization::SsaRewritePass(program); |
| |
|
| | return program; |
| | } |
| |
|
| | } |
| |
|