/* * par2serial-cc: optimize.h - Optimization pass manager */ #ifndef P2S_OPTIMIZE_H #define P2S_OPTIMIZE_H #include "ir.h" /* ── Optimization levels ─────────────────────────────────── */ typedef enum { OPT_O0 = 0, /* No optimization, just serial lowering */ OPT_O1 = 1, /* Basic scalar optimizations */ OPT_O2 = 2, /* + loop optimizations + SIMD */ OPT_O3 = 3, /* + memory layout + aggressive */ } OptLevel; /* ── SIMD target ─────────────────────────────────────────── */ typedef enum { SIMD_NONE = 0, SIMD_SSE = 1, /* 128-bit, 4 floats */ SIMD_AVX = 2, /* 256-bit, 8 floats */ SIMD_AVX2 = 3, /* 256-bit + FMA */ SIMD_AVX512 = 4, /* 512-bit, 16 floats */ SIMD_NEON = 5, /* ARM 128-bit */ } SIMDTarget; /* ── Optimization context ────────────────────────────────── */ typedef struct { OptLevel level; SIMDTarget simd; bool report; /* generate optimization report */ int tile_size; /* default tile size (0 = auto) */ int unroll_factor; /* default unroll (0 = auto) */ Arena *arena; } OptContext; /* ── Parallel loop info (detected by analysis) ───────────── */ typedef struct { int start_inst; /* index of PAR_FOR_BEGIN */ int end_inst; /* index of PAR_FOR_END */ const char *iter_var; int iter_reg; int lo_reg; int hi_reg; int step_reg; ReduceOp reduce_op; /* for reductions */ int accum_reg; /* for reductions */ bool is_reduce; bool is_scan; bool can_vectorize; bool has_dependency; int trip_count; /* estimated trip count, -1 if unknown */ TypeKind elem_type; /* element type for SIMD */ } ParLoopInfo; VEC_TYPEDEF(ParLoopInfo, ParLoopVec); /* ── Optimization report entry ───────────────────────────── */ typedef struct { const char *pass_name; const char *description; SourceLoc loc; bool applied; } OptReport; VEC_TYPEDEF(OptReport, OptReportVec); /* ── Main optimization pipeline ──────────────────────────── */ void optimize_module(IRModule *mod, OptContext *ctx); /* ── Individual passes ───────────────────────────────────── */ /* Pass 1: Parallel pattern detection & analysis */ ParLoopVec detect_parallel_patterns(IRBlock *bb); /* Pass 2: Scalar optimizations */ void opt_constant_fold(IRBlock *bb, Arena *arena); void opt_dead_code_eliminate(IRBlock *bb); void opt_strength_reduce(IRBlock *bb, Arena *arena); void opt_cse(IRBlock *bb, Arena *arena); /* Pass 3: Loop optimizations */ void opt_loop_tile(IRBlock *bb, ParLoopInfo *loop, int tile_size, Arena *arena); void opt_loop_unroll(IRBlock *bb, ParLoopInfo *loop, int factor, Arena *arena); void opt_loop_interchange(IRBlock *bb, Arena *arena); void opt_loop_fuse(IRBlock *bb, Arena *arena); /* Pass 4: Vectorization */ void opt_vectorize(IRBlock *bb, ParLoopInfo *loop, SIMDTarget target, Arena *arena); void opt_vectorize_reduce(IRBlock *bb, ParLoopInfo *loop, SIMDTarget target, Arena *arena); /* Pass 5: Memory optimizations */ void opt_insert_prefetch(IRBlock *bb, Arena *arena); void opt_align_data(IRBlock *bb, Arena *arena); /* ── Utility ─────────────────────────────────────────────── */ int simd_width(SIMDTarget target, TypeKind elem_type); const char *simd_target_str(SIMDTarget t); #endif /* P2S_OPTIMIZE_H */