par2serial-cc / src /optimize.h
clarenceleo's picture
Add optimization pass headers
b06720f verified
Raw
History Blame Contribute Delete
4.02 kB
/*
* par2serial-cc: optimize.h - Optimization pass manager
*/
#ifndef P2S_OPTIMIZE_H
#define P2S_OPTIMIZE_H
#include "ir.h"
/* ── Optimization levels ─────────────────────────────────── */
typedef enum {
OPT_O0 = 0, /* No optimization, just serial lowering */
OPT_O1 = 1, /* Basic scalar optimizations */
OPT_O2 = 2, /* + loop optimizations + SIMD */
OPT_O3 = 3, /* + memory layout + aggressive */
} OptLevel;
/* ── SIMD target ─────────────────────────────────────────── */
typedef enum {
SIMD_NONE = 0,
SIMD_SSE = 1, /* 128-bit, 4 floats */
SIMD_AVX = 2, /* 256-bit, 8 floats */
SIMD_AVX2 = 3, /* 256-bit + FMA */
SIMD_AVX512 = 4, /* 512-bit, 16 floats */
SIMD_NEON = 5, /* ARM 128-bit */
} SIMDTarget;
/* ── Optimization context ────────────────────────────────── */
typedef struct {
OptLevel level;
SIMDTarget simd;
bool report; /* generate optimization report */
int tile_size; /* default tile size (0 = auto) */
int unroll_factor; /* default unroll (0 = auto) */
Arena *arena;
} OptContext;
/* ── Parallel loop info (detected by analysis) ───────────── */
typedef struct {
int start_inst; /* index of PAR_FOR_BEGIN */
int end_inst; /* index of PAR_FOR_END */
const char *iter_var;
int iter_reg;
int lo_reg;
int hi_reg;
int step_reg;
ReduceOp reduce_op; /* for reductions */
int accum_reg; /* for reductions */
bool is_reduce;
bool is_scan;
bool can_vectorize;
bool has_dependency;
int trip_count; /* estimated trip count, -1 if unknown */
TypeKind elem_type; /* element type for SIMD */
} ParLoopInfo;
VEC_TYPEDEF(ParLoopInfo, ParLoopVec);
/* ── Optimization report entry ───────────────────────────── */
typedef struct {
const char *pass_name;
const char *description;
SourceLoc loc;
bool applied;
} OptReport;
VEC_TYPEDEF(OptReport, OptReportVec);
/* ── Main optimization pipeline ──────────────────────────── */
void optimize_module(IRModule *mod, OptContext *ctx);
/* ── Individual passes ───────────────────────────────────── */
/* Pass 1: Parallel pattern detection & analysis */
ParLoopVec detect_parallel_patterns(IRBlock *bb);
/* Pass 2: Scalar optimizations */
void opt_constant_fold(IRBlock *bb, Arena *arena);
void opt_dead_code_eliminate(IRBlock *bb);
void opt_strength_reduce(IRBlock *bb, Arena *arena);
void opt_cse(IRBlock *bb, Arena *arena);
/* Pass 3: Loop optimizations */
void opt_loop_tile(IRBlock *bb, ParLoopInfo *loop, int tile_size, Arena *arena);
void opt_loop_unroll(IRBlock *bb, ParLoopInfo *loop, int factor, Arena *arena);
void opt_loop_interchange(IRBlock *bb, Arena *arena);
void opt_loop_fuse(IRBlock *bb, Arena *arena);
/* Pass 4: Vectorization */
void opt_vectorize(IRBlock *bb, ParLoopInfo *loop, SIMDTarget target, Arena *arena);
void opt_vectorize_reduce(IRBlock *bb, ParLoopInfo *loop, SIMDTarget target, Arena *arena);
/* Pass 5: Memory optimizations */
void opt_insert_prefetch(IRBlock *bb, Arena *arena);
void opt_align_data(IRBlock *bb, Arena *arena);
/* ── Utility ─────────────────────────────────────────────── */
int simd_width(SIMDTarget target, TypeKind elem_type);
const char *simd_target_str(SIMDTarget t);
#endif /* P2S_OPTIMIZE_H */