oracle / data /quant_ohlc_feature_schema.py
zirobtc's picture
Upload folder using huggingface_hub
a547253 verified
from collections import OrderedDict
from typing import Dict, Iterable, List
FEATURE_VERSION = "qohlc_v2"
FEATURE_VERSION_ID = 2
WINDOW_SECONDS = 5
SEGMENT_SECONDS = 300
TOKENS_PER_SEGMENT = SEGMENT_SECONDS // WINDOW_SECONDS
LOOKBACK_SECONDS = [15, 30, 60, 120]
FEATURE_NAMES: List[str] = [
"cum_log_return",
"mean_log_return_1s",
"std_log_return_1s",
"max_up_1s",
"max_down_1s",
"realized_vol",
"window_range_frac",
"close_to_close_slope",
"accel_proxy",
"frac_pos_1s",
"frac_neg_1s",
]
for lookback in LOOKBACK_SECONDS:
prefix = f"lb_{lookback}s"
FEATURE_NAMES.extend([
f"{prefix}_dist_high",
f"{prefix}_dist_low",
f"{prefix}_drawdown_high",
f"{prefix}_rebound_low",
f"{prefix}_pos_in_range",
f"{prefix}_range_width",
f"{prefix}_compression_ratio",
f"{prefix}_breakout_high",
f"{prefix}_breakdown_low",
f"{prefix}_reclaim_breakdown",
f"{prefix}_rejection_breakout",
])
FEATURE_NAMES.extend([
"nearest_support_dist",
"nearest_resistance_dist",
"support_touch_count",
"resistance_touch_count",
"support_age_sec",
"resistance_age_sec",
"support_strength",
"resistance_strength",
"inside_support_zone",
"inside_resistance_zone",
"support_swept",
"resistance_swept",
"support_reclaim",
"resistance_reject",
"keylevel_breakout_up",
"keylevel_breakout_down",
"keylevel_hold_above",
"keylevel_hold_below",
"keylevel_failed_breakout_up",
"keylevel_failed_breakout_down",
"keylevel_flip_to_support",
"keylevel_flip_to_resistance",
"keylevel_upper_distance",
"keylevel_lower_distance",
"keylevel_zone_width_frac",
"keylevel_density",
"lower_trendline_slope",
"upper_trendline_slope",
"dist_to_lower_line",
"dist_to_upper_line",
"trend_channel_width",
"trend_convergence",
"trend_breakout_upper",
"trend_breakdown_lower",
"trend_reentry",
"ema_fast",
"ema_medium",
"sma_fast",
"sma_medium",
"price_minus_ema_fast",
"price_minus_ema_medium",
"ema_spread",
"price_zscore",
"mean_reversion_score",
"rolling_vol_zscore",
])
FEATURE_NAMES.extend([
"sr_available",
"trendline_available",
])
FEATURE_INDEX = {name: idx for idx, name in enumerate(FEATURE_NAMES)}
NUM_QUANT_OHLC_FEATURES = len(FEATURE_NAMES)
FEATURE_GROUPS = OrderedDict([
("price_path", [
"cum_log_return",
"mean_log_return_1s",
"std_log_return_1s",
"max_up_1s",
"max_down_1s",
"realized_vol",
"window_range_frac",
"close_to_close_slope",
"accel_proxy",
"frac_pos_1s",
"frac_neg_1s",
]),
("relative_structure", [name for name in FEATURE_NAMES if name.startswith("lb_")]),
("levels_breaks", [
"nearest_support_dist",
"nearest_resistance_dist",
"support_touch_count",
"resistance_touch_count",
"support_age_sec",
"resistance_age_sec",
"support_strength",
"resistance_strength",
"inside_support_zone",
"inside_resistance_zone",
"support_swept",
"resistance_swept",
"support_reclaim",
"resistance_reject",
"keylevel_breakout_up",
"keylevel_breakout_down",
"keylevel_hold_above",
"keylevel_hold_below",
"keylevel_failed_breakout_up",
"keylevel_failed_breakout_down",
"keylevel_flip_to_support",
"keylevel_flip_to_resistance",
"keylevel_upper_distance",
"keylevel_lower_distance",
"keylevel_zone_width_frac",
"keylevel_density",
]),
("trendlines", [
"lower_trendline_slope",
"upper_trendline_slope",
"dist_to_lower_line",
"dist_to_upper_line",
"trend_channel_width",
"trend_convergence",
"trend_breakout_upper",
"trend_breakdown_lower",
"trend_reentry",
]),
("rolling_quant", [
"ema_fast",
"ema_medium",
"sma_fast",
"sma_medium",
"price_minus_ema_fast",
"price_minus_ema_medium",
"ema_spread",
"price_zscore",
"mean_reversion_score",
"rolling_vol_zscore",
]),
("availability", [
"sr_available",
"trendline_available",
]),
])
def empty_feature_dict() -> Dict[str, float]:
return {name: 0.0 for name in FEATURE_NAMES}
def feature_dict_to_vector(features: Dict[str, float]) -> List[float]:
out: List[float] = []
for name in FEATURE_NAMES:
value = features.get(name, 0.0)
try:
out.append(float(value))
except Exception:
out.append(0.0)
return out
def group_feature_indices(group_names: Iterable[str]) -> List[int]:
indices: List[int] = []
for group_name in group_names:
for feature_name in FEATURE_GROUPS[group_name]:
indices.append(FEATURE_INDEX[feature_name])
return sorted(set(indices))