from collections import OrderedDict from typing import Dict, Iterable, List FEATURE_VERSION = "qohlc_v2" FEATURE_VERSION_ID = 2 WINDOW_SECONDS = 5 SEGMENT_SECONDS = 300 TOKENS_PER_SEGMENT = SEGMENT_SECONDS // WINDOW_SECONDS LOOKBACK_SECONDS = [15, 30, 60, 120] FEATURE_NAMES: List[str] = [ "cum_log_return", "mean_log_return_1s", "std_log_return_1s", "max_up_1s", "max_down_1s", "realized_vol", "window_range_frac", "close_to_close_slope", "accel_proxy", "frac_pos_1s", "frac_neg_1s", ] for lookback in LOOKBACK_SECONDS: prefix = f"lb_{lookback}s" FEATURE_NAMES.extend([ f"{prefix}_dist_high", f"{prefix}_dist_low", f"{prefix}_drawdown_high", f"{prefix}_rebound_low", f"{prefix}_pos_in_range", f"{prefix}_range_width", f"{prefix}_compression_ratio", f"{prefix}_breakout_high", f"{prefix}_breakdown_low", f"{prefix}_reclaim_breakdown", f"{prefix}_rejection_breakout", ]) FEATURE_NAMES.extend([ "nearest_support_dist", "nearest_resistance_dist", "support_touch_count", "resistance_touch_count", "support_age_sec", "resistance_age_sec", "support_strength", "resistance_strength", "inside_support_zone", "inside_resistance_zone", "support_swept", "resistance_swept", "support_reclaim", "resistance_reject", "keylevel_breakout_up", "keylevel_breakout_down", "keylevel_hold_above", "keylevel_hold_below", "keylevel_failed_breakout_up", "keylevel_failed_breakout_down", "keylevel_flip_to_support", "keylevel_flip_to_resistance", "keylevel_upper_distance", "keylevel_lower_distance", "keylevel_zone_width_frac", "keylevel_density", "lower_trendline_slope", "upper_trendline_slope", "dist_to_lower_line", "dist_to_upper_line", "trend_channel_width", "trend_convergence", "trend_breakout_upper", "trend_breakdown_lower", "trend_reentry", "ema_fast", "ema_medium", "sma_fast", "sma_medium", "price_minus_ema_fast", "price_minus_ema_medium", "ema_spread", "price_zscore", "mean_reversion_score", "rolling_vol_zscore", ]) FEATURE_NAMES.extend([ "sr_available", "trendline_available", ]) FEATURE_INDEX = {name: idx for idx, name in enumerate(FEATURE_NAMES)} NUM_QUANT_OHLC_FEATURES = len(FEATURE_NAMES) FEATURE_GROUPS = OrderedDict([ ("price_path", [ "cum_log_return", "mean_log_return_1s", "std_log_return_1s", "max_up_1s", "max_down_1s", "realized_vol", "window_range_frac", "close_to_close_slope", "accel_proxy", "frac_pos_1s", "frac_neg_1s", ]), ("relative_structure", [name for name in FEATURE_NAMES if name.startswith("lb_")]), ("levels_breaks", [ "nearest_support_dist", "nearest_resistance_dist", "support_touch_count", "resistance_touch_count", "support_age_sec", "resistance_age_sec", "support_strength", "resistance_strength", "inside_support_zone", "inside_resistance_zone", "support_swept", "resistance_swept", "support_reclaim", "resistance_reject", "keylevel_breakout_up", "keylevel_breakout_down", "keylevel_hold_above", "keylevel_hold_below", "keylevel_failed_breakout_up", "keylevel_failed_breakout_down", "keylevel_flip_to_support", "keylevel_flip_to_resistance", "keylevel_upper_distance", "keylevel_lower_distance", "keylevel_zone_width_frac", "keylevel_density", ]), ("trendlines", [ "lower_trendline_slope", "upper_trendline_slope", "dist_to_lower_line", "dist_to_upper_line", "trend_channel_width", "trend_convergence", "trend_breakout_upper", "trend_breakdown_lower", "trend_reentry", ]), ("rolling_quant", [ "ema_fast", "ema_medium", "sma_fast", "sma_medium", "price_minus_ema_fast", "price_minus_ema_medium", "ema_spread", "price_zscore", "mean_reversion_score", "rolling_vol_zscore", ]), ("availability", [ "sr_available", "trendline_available", ]), ]) def empty_feature_dict() -> Dict[str, float]: return {name: 0.0 for name in FEATURE_NAMES} def feature_dict_to_vector(features: Dict[str, float]) -> List[float]: out: List[float] = [] for name in FEATURE_NAMES: value = features.get(name, 0.0) try: out.append(float(value)) except Exception: out.append(0.0) return out def group_feature_indices(group_names: Iterable[str]) -> List[int]: indices: List[int] = [] for group_name in group_names: for feature_name in FEATURE_GROUPS[group_name]: indices.append(FEATURE_INDEX[feature_name]) return sorted(set(indices))