| from collections import OrderedDict |
| from typing import Dict, Iterable, List |
|
|
|
|
| FEATURE_VERSION = "qohlc_v2" |
| FEATURE_VERSION_ID = 2 |
| WINDOW_SECONDS = 5 |
| SEGMENT_SECONDS = 300 |
| TOKENS_PER_SEGMENT = SEGMENT_SECONDS // WINDOW_SECONDS |
| LOOKBACK_SECONDS = [15, 30, 60, 120] |
|
|
|
|
| FEATURE_NAMES: List[str] = [ |
| "cum_log_return", |
| "mean_log_return_1s", |
| "std_log_return_1s", |
| "max_up_1s", |
| "max_down_1s", |
| "realized_vol", |
| "window_range_frac", |
| "close_to_close_slope", |
| "accel_proxy", |
| "frac_pos_1s", |
| "frac_neg_1s", |
| ] |
|
|
| for lookback in LOOKBACK_SECONDS: |
| prefix = f"lb_{lookback}s" |
| FEATURE_NAMES.extend([ |
| f"{prefix}_dist_high", |
| f"{prefix}_dist_low", |
| f"{prefix}_drawdown_high", |
| f"{prefix}_rebound_low", |
| f"{prefix}_pos_in_range", |
| f"{prefix}_range_width", |
| f"{prefix}_compression_ratio", |
| f"{prefix}_breakout_high", |
| f"{prefix}_breakdown_low", |
| f"{prefix}_reclaim_breakdown", |
| f"{prefix}_rejection_breakout", |
| ]) |
|
|
| FEATURE_NAMES.extend([ |
| "nearest_support_dist", |
| "nearest_resistance_dist", |
| "support_touch_count", |
| "resistance_touch_count", |
| "support_age_sec", |
| "resistance_age_sec", |
| "support_strength", |
| "resistance_strength", |
| "inside_support_zone", |
| "inside_resistance_zone", |
| "support_swept", |
| "resistance_swept", |
| "support_reclaim", |
| "resistance_reject", |
| "keylevel_breakout_up", |
| "keylevel_breakout_down", |
| "keylevel_hold_above", |
| "keylevel_hold_below", |
| "keylevel_failed_breakout_up", |
| "keylevel_failed_breakout_down", |
| "keylevel_flip_to_support", |
| "keylevel_flip_to_resistance", |
| "keylevel_upper_distance", |
| "keylevel_lower_distance", |
| "keylevel_zone_width_frac", |
| "keylevel_density", |
| "lower_trendline_slope", |
| "upper_trendline_slope", |
| "dist_to_lower_line", |
| "dist_to_upper_line", |
| "trend_channel_width", |
| "trend_convergence", |
| "trend_breakout_upper", |
| "trend_breakdown_lower", |
| "trend_reentry", |
| "ema_fast", |
| "ema_medium", |
| "sma_fast", |
| "sma_medium", |
| "price_minus_ema_fast", |
| "price_minus_ema_medium", |
| "ema_spread", |
| "price_zscore", |
| "mean_reversion_score", |
| "rolling_vol_zscore", |
| ]) |
|
|
| FEATURE_NAMES.extend([ |
| "sr_available", |
| "trendline_available", |
| ]) |
|
|
| FEATURE_INDEX = {name: idx for idx, name in enumerate(FEATURE_NAMES)} |
| NUM_QUANT_OHLC_FEATURES = len(FEATURE_NAMES) |
|
|
| FEATURE_GROUPS = OrderedDict([ |
| ("price_path", [ |
| "cum_log_return", |
| "mean_log_return_1s", |
| "std_log_return_1s", |
| "max_up_1s", |
| "max_down_1s", |
| "realized_vol", |
| "window_range_frac", |
| "close_to_close_slope", |
| "accel_proxy", |
| "frac_pos_1s", |
| "frac_neg_1s", |
| ]), |
| ("relative_structure", [name for name in FEATURE_NAMES if name.startswith("lb_")]), |
| ("levels_breaks", [ |
| "nearest_support_dist", |
| "nearest_resistance_dist", |
| "support_touch_count", |
| "resistance_touch_count", |
| "support_age_sec", |
| "resistance_age_sec", |
| "support_strength", |
| "resistance_strength", |
| "inside_support_zone", |
| "inside_resistance_zone", |
| "support_swept", |
| "resistance_swept", |
| "support_reclaim", |
| "resistance_reject", |
| "keylevel_breakout_up", |
| "keylevel_breakout_down", |
| "keylevel_hold_above", |
| "keylevel_hold_below", |
| "keylevel_failed_breakout_up", |
| "keylevel_failed_breakout_down", |
| "keylevel_flip_to_support", |
| "keylevel_flip_to_resistance", |
| "keylevel_upper_distance", |
| "keylevel_lower_distance", |
| "keylevel_zone_width_frac", |
| "keylevel_density", |
| ]), |
| ("trendlines", [ |
| "lower_trendline_slope", |
| "upper_trendline_slope", |
| "dist_to_lower_line", |
| "dist_to_upper_line", |
| "trend_channel_width", |
| "trend_convergence", |
| "trend_breakout_upper", |
| "trend_breakdown_lower", |
| "trend_reentry", |
| ]), |
| ("rolling_quant", [ |
| "ema_fast", |
| "ema_medium", |
| "sma_fast", |
| "sma_medium", |
| "price_minus_ema_fast", |
| "price_minus_ema_medium", |
| "ema_spread", |
| "price_zscore", |
| "mean_reversion_score", |
| "rolling_vol_zscore", |
| ]), |
| ("availability", [ |
| "sr_available", |
| "trendline_available", |
| ]), |
| ]) |
|
|
|
|
| def empty_feature_dict() -> Dict[str, float]: |
| return {name: 0.0 for name in FEATURE_NAMES} |
|
|
|
|
| def feature_dict_to_vector(features: Dict[str, float]) -> List[float]: |
| out: List[float] = [] |
| for name in FEATURE_NAMES: |
| value = features.get(name, 0.0) |
| try: |
| out.append(float(value)) |
| except Exception: |
| out.append(0.0) |
| return out |
|
|
|
|
| def group_feature_indices(group_names: Iterable[str]) -> List[int]: |
| indices: List[int] = [] |
| for group_name in group_names: |
| for feature_name in FEATURE_GROUPS[group_name]: |
| indices.append(FEATURE_INDEX[feature_name]) |
| return sorted(set(indices)) |
|
|