| | |
| | |
| |
|
| | #include <algorithm> |
| | #include <string> |
| | #include <tuple> |
| |
|
| | #include "common/div_ceil.h" |
| | #include "common/settings.h" |
| | #include "shader_recompiler/backend/bindings.h" |
| | #include "shader_recompiler/backend/glasm/emit_glasm.h" |
| | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" |
| | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" |
| | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| | #include "shader_recompiler/frontend/ir/program.h" |
| | #include "shader_recompiler/profile.h" |
| | #include "shader_recompiler/runtime_info.h" |
| |
|
| | namespace Shader::Backend::GLASM { |
| | namespace { |
| | template <class Func> |
| | struct FuncTraits {}; |
| |
|
| | template <class ReturnType_, class... Args> |
| | struct FuncTraits<ReturnType_ (*)(Args...)> { |
| | using ReturnType = ReturnType_; |
| |
|
| | static constexpr size_t NUM_ARGS = sizeof...(Args); |
| |
|
| | template <size_t I> |
| | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; |
| | }; |
| |
|
| | template <typename T> |
| | struct Identity { |
| | Identity(T data_) : data{data_} {} |
| |
|
| | T Extract() { |
| | return data; |
| | } |
| |
|
| | T data; |
| | }; |
| |
|
| | template <bool scalar> |
| | class RegWrapper { |
| | public: |
| | RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { |
| | const Value value{reg_alloc.Peek(ir_value)}; |
| | if (value.type == Type::Register) { |
| | inst = ir_value.InstRecursive(); |
| | reg = Register{value}; |
| | } else { |
| | reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); |
| | } |
| | switch (value.type) { |
| | case Type::Register: |
| | case Type::Void: |
| | break; |
| | case Type::U32: |
| | ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); |
| | break; |
| | case Type::U64: |
| | ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); |
| | break; |
| | } |
| | } |
| |
|
| | auto Extract() { |
| | if (inst) { |
| | reg_alloc.Unref(*inst); |
| | } else { |
| | reg_alloc.FreeReg(reg); |
| | } |
| | return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; |
| | } |
| |
|
| | private: |
| | RegAlloc& reg_alloc; |
| | IR::Inst* inst{}; |
| | Register reg{}; |
| | }; |
| |
|
| | template <typename ArgType> |
| | class ValueWrapper { |
| | public: |
| | ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) |
| | : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} |
| |
|
| | ArgType Extract() { |
| | if (!ir_value.IsImmediate()) { |
| | reg_alloc.Unref(*ir_value.InstRecursive()); |
| | } |
| | return value; |
| | } |
| |
|
| | private: |
| | RegAlloc& reg_alloc; |
| | const IR::Value& ir_value; |
| | ArgType value; |
| | }; |
| |
|
| | template <typename ArgType> |
| | auto Arg(EmitContext& ctx, const IR::Value& arg) { |
| | if constexpr (std::is_same_v<ArgType, Register>) { |
| | return RegWrapper<false>{ctx, arg}; |
| | } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { |
| | return RegWrapper<true>{ctx, arg}; |
| | } else if constexpr (std::is_base_of_v<Value, ArgType>) { |
| | return ValueWrapper<ArgType>{ctx, arg}; |
| | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { |
| | return Identity<const IR::Value&>{arg}; |
| | } else if constexpr (std::is_same_v<ArgType, u32>) { |
| | return Identity{arg.U32()}; |
| | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { |
| | return Identity{arg.Attribute()}; |
| | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { |
| | return Identity{arg.Patch()}; |
| | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { |
| | return Identity{arg.Reg()}; |
| | } |
| | } |
| |
|
| | template <auto func, bool is_first_arg_inst> |
| | struct InvokeCall { |
| | template <typename... Args> |
| | InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { |
| | if constexpr (is_first_arg_inst) { |
| | func(ctx, *inst, args.Extract()...); |
| | } else { |
| | func(ctx, args.Extract()...); |
| | } |
| | } |
| | }; |
| |
|
| | template <auto func, bool is_first_arg_inst, size_t... I> |
| | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { |
| | using Traits = FuncTraits<decltype(func)>; |
| | if constexpr (is_first_arg_inst) { |
| | InvokeCall<func, is_first_arg_inst>{ |
| | ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; |
| | } else { |
| | InvokeCall<func, is_first_arg_inst>{ |
| | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; |
| | } |
| | } |
| |
|
| | template <auto func> |
| | void Invoke(EmitContext& ctx, IR::Inst* inst) { |
| | using Traits = FuncTraits<decltype(func)>; |
| | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); |
| | if constexpr (Traits::NUM_ARGS == 1) { |
| | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); |
| | } else { |
| | using FirstArgType = typename Traits::template ArgType<1>; |
| | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; |
| | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; |
| | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); |
| | } |
| | } |
| |
|
| | void EmitInst(EmitContext& ctx, IR::Inst* inst) { |
| | switch (inst->GetOpcode()) { |
| | #define OPCODE(name, result_type, ...) \ |
| | case IR::Opcode::name: \ |
| | return Invoke<&Emit##name>(ctx, inst); |
| | #include "shader_recompiler/frontend/ir/opcodes.inc" |
| | #undef OPCODE |
| | } |
| | throw LogicError("Invalid opcode {}", inst->GetOpcode()); |
| | } |
| |
|
| | bool IsReference(IR::Inst& inst) { |
| | return inst.GetOpcode() == IR::Opcode::Reference; |
| | } |
| |
|
| | void PrecolorInst(IR::Inst& phi) { |
| | |
| | const size_t num_args{phi.NumArgs()}; |
| | for (size_t i = 0; i < num_args; ++i) { |
| | IR::Block& phi_block{*phi.PhiBlock(i)}; |
| | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; |
| | IR::IREmitter ir{phi_block, it}; |
| | const IR::Value arg{phi.Arg(i)}; |
| | if (arg.IsImmediate()) { |
| | ir.PhiMove(phi, arg); |
| | } else { |
| | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); |
| | } |
| | } |
| | for (size_t i = 0; i < num_args; ++i) { |
| | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); |
| | } |
| | } |
| |
|
| | void Precolor(const IR::Program& program) { |
| | for (IR::Block* const block : program.blocks) { |
| | for (IR::Inst& phi : block->Instructions()) { |
| | if (!IR::IsPhi(phi)) { |
| | break; |
| | } |
| | PrecolorInst(phi); |
| | } |
| | } |
| | } |
| |
|
| | void EmitCode(EmitContext& ctx, const IR::Program& program) { |
| | const auto eval{ |
| | [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; |
| | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { |
| | switch (node.type) { |
| | case IR::AbstractSyntaxNode::Type::Block: |
| | for (IR::Inst& inst : node.data.block->Instructions()) { |
| | EmitInst(ctx, &inst); |
| | } |
| | break; |
| | case IR::AbstractSyntaxNode::Type::If: |
| | ctx.Add("MOV.S.CC RC,{};" |
| | "IF NE.x;", |
| | eval(node.data.if_node.cond)); |
| | break; |
| | case IR::AbstractSyntaxNode::Type::EndIf: |
| | ctx.Add("ENDIF;"); |
| | break; |
| | case IR::AbstractSyntaxNode::Type::Loop: |
| | ctx.Add("REP;"); |
| | break; |
| | case IR::AbstractSyntaxNode::Type::Repeat: |
| | if (!Settings::values.disable_shader_loop_safety_checks) { |
| | const u32 loop_index{ctx.num_safety_loop_vars++}; |
| | const u32 vector_index{loop_index / 4}; |
| | const char component{"xyzw"[loop_index % 4]}; |
| | ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" |
| | "BRK(LT.{});", |
| | vector_index, component, vector_index, component, component); |
| | } |
| | if (node.data.repeat.cond.IsImmediate()) { |
| | if (node.data.repeat.cond.U1()) { |
| | ctx.Add("ENDREP;"); |
| | } else { |
| | ctx.Add("BRK;" |
| | "ENDREP;"); |
| | } |
| | } else { |
| | ctx.Add("MOV.S.CC RC,{};" |
| | "BRK(EQ.x);" |
| | "ENDREP;", |
| | eval(node.data.repeat.cond)); |
| | } |
| | break; |
| | case IR::AbstractSyntaxNode::Type::Break: |
| | if (node.data.break_node.cond.IsImmediate()) { |
| | if (node.data.break_node.cond.U1()) { |
| | ctx.Add("BRK;"); |
| | } |
| | } else { |
| | ctx.Add("MOV.S.CC RC,{};" |
| | "BRK (NE.x);", |
| | eval(node.data.break_node.cond)); |
| | } |
| | break; |
| | case IR::AbstractSyntaxNode::Type::Return: |
| | case IR::AbstractSyntaxNode::Type::Unreachable: |
| | ctx.Add("RET;"); |
| | break; |
| | } |
| | } |
| | if (!ctx.reg_alloc.IsEmpty()) { |
| | LOG_WARNING(Shader_GLASM, "Register leak after generating code"); |
| | } |
| | } |
| |
|
| | void SetupOptions(const IR::Program& program, const Profile& profile, |
| | const RuntimeInfo& runtime_info, std::string& header) { |
| | const Info& info{program.info}; |
| | const Stage stage{program.stage}; |
| |
|
| | |
| | header += "OPTION NV_internal;" |
| | "OPTION NV_shader_storage_buffer;" |
| | "OPTION NV_gpu_program_fp64;"; |
| | |
| | header += "OPTION NV_texture_multisample;"; |
| | if (info.uses_int64_bit_atomics) { |
| | header += "OPTION NV_shader_atomic_int64;"; |
| | } |
| | if (info.uses_atomic_f32_add) { |
| | header += "OPTION NV_shader_atomic_float;"; |
| | } |
| | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { |
| | header += "OPTION NV_shader_atomic_fp16_vector;"; |
| | } |
| | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || |
| | info.uses_fswzadd) { |
| | header += "OPTION NV_shader_thread_group;"; |
| | } |
| | if (info.uses_subgroup_shuffles) { |
| | header += "OPTION NV_shader_thread_shuffle;"; |
| | } |
| | if (info.uses_sparse_residency) { |
| | header += "OPTION EXT_sparse_texture2;"; |
| | } |
| | const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || |
| | info.stores[IR::Attribute::Layer]}; |
| | if ((stage != Stage::Geometry && stores_viewport_layer) || |
| | info.stores[IR::Attribute::ViewportMask]) { |
| | if (profile.support_viewport_index_layer_non_geometry) { |
| | header += "OPTION NV_viewport_array2;"; |
| | } |
| | } |
| | if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { |
| | header += "OPTION NV_geometry_shader_passthrough;"; |
| | } |
| | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { |
| | header += "OPTION EXT_shader_image_load_formatted;"; |
| | } |
| | if (profile.support_derivative_control) { |
| | header += "OPTION ARB_derivative_control;"; |
| | } |
| | if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { |
| | header += "OPTION NV_early_fragment_tests;"; |
| | } |
| | if (stage == Stage::Fragment) { |
| | header += "OPTION ARB_draw_buffers;"; |
| | header += "OPTION ARB_fragment_layer_viewport;"; |
| | } |
| | } |
| |
|
| | std::string_view StageHeader(Stage stage) { |
| | switch (stage) { |
| | case Stage::VertexA: |
| | case Stage::VertexB: |
| | return "!!NVvp5.0\n"; |
| | case Stage::TessellationControl: |
| | return "!!NVtcp5.0\n"; |
| | case Stage::TessellationEval: |
| | return "!!NVtep5.0\n"; |
| | case Stage::Geometry: |
| | return "!!NVgp5.0\n"; |
| | case Stage::Fragment: |
| | return "!!NVfp5.0\n"; |
| | case Stage::Compute: |
| | return "!!NVcp5.0\n"; |
| | } |
| | throw InvalidArgument("Invalid stage {}", stage); |
| | } |
| |
|
| | std::string_view InputPrimitive(InputTopology topology) { |
| | switch (topology) { |
| | case InputTopology::Points: |
| | return "POINTS"; |
| | case InputTopology::Lines: |
| | return "LINES"; |
| | case InputTopology::LinesAdjacency: |
| | return "LINES_ADJACENCY"; |
| | case InputTopology::Triangles: |
| | return "TRIANGLES"; |
| | case InputTopology::TrianglesAdjacency: |
| | return "TRIANGLES_ADJACENCY"; |
| | } |
| | throw InvalidArgument("Invalid input topology {}", topology); |
| | } |
| |
|
| | std::string_view OutputPrimitive(OutputTopology topology) { |
| | switch (topology) { |
| | case OutputTopology::PointList: |
| | return "POINTS"; |
| | case OutputTopology::LineStrip: |
| | return "LINE_STRIP"; |
| | case OutputTopology::TriangleStrip: |
| | return "TRIANGLE_STRIP"; |
| | } |
| | throw InvalidArgument("Invalid output topology {}", topology); |
| | } |
| |
|
| | std::string_view GetTessMode(TessPrimitive primitive) { |
| | switch (primitive) { |
| | case TessPrimitive::Triangles: |
| | return "TRIANGLES"; |
| | case TessPrimitive::Quads: |
| | return "QUADS"; |
| | case TessPrimitive::Isolines: |
| | return "ISOLINES"; |
| | } |
| | throw InvalidArgument("Invalid tessellation primitive {}", primitive); |
| | } |
| |
|
| | std::string_view GetTessSpacing(TessSpacing spacing) { |
| | switch (spacing) { |
| | case TessSpacing::Equal: |
| | return "EQUAL"; |
| | case TessSpacing::FractionalOdd: |
| | return "FRACTIONAL_ODD"; |
| | case TessSpacing::FractionalEven: |
| | return "FRACTIONAL_EVEN"; |
| | } |
| | throw InvalidArgument("Invalid tessellation spacing {}", spacing); |
| | } |
| | } |
| |
|
| | std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, |
| | Bindings& bindings) { |
| | EmitContext ctx{program, bindings, profile, runtime_info}; |
| | Precolor(program); |
| | EmitCode(ctx, program); |
| | std::string header{StageHeader(program.stage)}; |
| | SetupOptions(program, profile, runtime_info, header); |
| | switch (program.stage) { |
| | case Stage::TessellationControl: |
| | header += fmt::format("VERTICES_OUT {};", program.invocations); |
| | break; |
| | case Stage::TessellationEval: |
| | header += fmt::format("TESS_MODE {};" |
| | "TESS_SPACING {};" |
| | "TESS_VERTEX_ORDER {};", |
| | GetTessMode(runtime_info.tess_primitive), |
| | GetTessSpacing(runtime_info.tess_spacing), |
| | runtime_info.tess_clockwise ? "CW" : "CCW"); |
| | break; |
| | case Stage::Geometry: |
| | header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); |
| | if (program.is_geometry_passthrough) { |
| | if (profile.support_geometry_shader_passthrough) { |
| | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| | if (program.info.passthrough.Generic(index)) { |
| | header += fmt::format("PASSTHROUGH result.attrib[{}];", index); |
| | } |
| | } |
| | if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { |
| | header += "PASSTHROUGH result.position;"; |
| | } |
| | } else { |
| | LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); |
| | } |
| | } else { |
| | header += |
| | fmt::format("VERTICES_OUT {};" |
| | "PRIMITIVE_OUT {};", |
| | program.output_vertices, OutputPrimitive(program.output_topology)); |
| | } |
| | break; |
| | case Stage::Compute: |
| | header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], |
| | program.workgroup_size[1], program.workgroup_size[2]); |
| | break; |
| | default: |
| | break; |
| | } |
| | if (program.shared_memory_size > 0) { |
| | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); |
| | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); |
| | } |
| | if (program.info.uses_rescaling_uniform) { |
| | header += "PARAM scaling[1]={program.local[0..0]};"; |
| | } |
| | if (program.info.uses_render_area) { |
| | header += "PARAM render_area[1]={program.local[1..1]};"; |
| | } |
| | header += "TEMP "; |
| | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { |
| | header += fmt::format("R{},", index); |
| | } |
| | if (program.local_memory_size > 0) { |
| | header += fmt::format("lmem[{}],", Common::DivCeil(program.local_memory_size, 4U)); |
| | } |
| | if (program.info.uses_fswzadd) { |
| | header += "FSWZA[4],FSWZB[4],"; |
| | } |
| | const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; |
| | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { |
| | header += fmt::format("loop{},", index); |
| | } |
| | header += "RC;" |
| | "LONG TEMP "; |
| | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { |
| | header += fmt::format("D{},", index); |
| | } |
| | header += "DC;"; |
| | if (program.info.uses_fswzadd) { |
| | header += "MOV.F FSWZA[0],-1;" |
| | "MOV.F FSWZA[1],1;" |
| | "MOV.F FSWZA[2],-1;" |
| | "MOV.F FSWZA[3],0;" |
| | "MOV.F FSWZB[0],-1;" |
| | "MOV.F FSWZB[1],-1;" |
| | "MOV.F FSWZB[2],1;" |
| | "MOV.F FSWZB[3],-1;"; |
| | } |
| | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { |
| | header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); |
| | } |
| | if (ctx.uses_y_direction) { |
| | header += "PARAM y_direction[1]={state.material.front.ambient};"; |
| | } |
| | ctx.code.insert(0, header); |
| | ctx.code += "END"; |
| | return ctx.code; |
| | } |
| |
|
| | } |
| |
|