stocks / core /utils /stats_helpers.py
Arrechenash's picture
Initial Commit
b2a37ab
"""Shared statistical helper functions extracted from scanner_service and backtester."""
import pandas as pd
def offset_days(daily_df, day_idx, offset):
"""Apply offset to day_idx, return date string at position day_idx - offset.
Args:
daily_df: DataFrame with 'timestamp' column
day_idx: Current day index in the DataFrame
offset: Number of days to look back (positive) or ahead (negative)
Returns:
Date string in '%Y-%m-%d' format, or None if out of bounds
"""
target_idx = day_idx - offset
if 0 <= target_idx < len(daily_df):
return daily_df["timestamp"].iloc[target_idx].strftime("%Y-%m-%d")
return None
def avg_vol10(daily_df, day_idx):
"""Calculate rolling 10-period avg volume shifted by 1, return value at day_idx.
Args:
daily_df: DataFrame with 'volume' column
day_idx: Index to get the average at
Returns:
Average volume (int) or None if not available
"""
if daily_df is None or "volume" not in daily_df.columns:
return None
vol_series = daily_df["volume"]
avg_vol10_values = vol_series.shift(1).rolling(window=10, min_periods=1).mean()
if 0 <= day_idx < len(avg_vol10_values):
val = avg_vol10_values.iloc[day_idx]
if pd.notna(val):
return int(val)
return None
def gap_pct(prev_close, open_price):
"""Return gap percentage: (open - prev_close) / prev_close * 100.
Args:
prev_close: Previous day's closing price
open_price: Current day's opening price
Returns:
Gap percentage as float, or None if calculation not possible
"""
if prev_close is None or open_price is None or prev_close <= 0:
return None
return float((open_price - prev_close) / prev_close * 100)
def bucket_gap(gap_pct_value):
"""Bucket a gap percentage into one of 5 categories.
Args:
gap_pct_value: The gap percentage as float
Returns:
Bucket name string or None if value is None
"""
if gap_pct_value is None:
return None
if gap_pct_value < -5:
return "gap_n5_p"
elif gap_pct_value < 0:
return "gap_n5_0"
elif gap_pct_value < 5:
return "gap_0_5"
elif gap_pct_value < 10:
return "gap_5_10"
else:
return "gap_10_p"
def bucket_range(pct_value):
"""Bucket an intraday range percentage into categories.
Args:
pct_value: The percentage value as float
Returns:
Bucket name string or None if value is None
"""
if pct_value is None:
return None
if pct_value < 2:
return "rng_0_2"
elif pct_value < 5:
return "rng_2_5"
elif pct_value < 10:
return "rng_5_10"
else:
return "rng_10_p"
def bucket_price(price):
"""Bucket a stock price into categories.
Args:
price: Stock price as float
Returns:
Bucket name string or None if value is None
"""
if price is None:
return None
if price < 5:
return "prc_lt_5"
elif price < 20:
return "prc_5_20"
elif price < 50:
return "prc_20_50"
elif price < 100:
return "prc_50_100"
else:
return "prc_gt_100"
def bucket_rel_vol(rel_vol):
"""Bucket relative volume into categories.
Args:
rel_vol: Relative volume as float
Returns:
Bucket name string or None if value is None
"""
if rel_vol is None:
return None
if rel_vol < 2:
return "rvol_lt_2"
elif rel_vol < 5:
return "rvol_2_5"
elif rel_vol < 10:
return "rvol_5_10"
else:
return "rvol_gt_10"
def bucket_premium_pct(pm_value, day_value):
"""Bucket premarket value as percentage of day's value.
Args:
pm_value: Premarket high or low
day_value: Day's high or low (corresponding)
Returns:
Bucket name string or None if value is None
"""
if pm_value is None or day_value is None or day_value <= 0:
return None
pct = (pm_value / day_value) * 100
if pct < 50:
return "pm_pct_0_50"
elif pct < 75:
return "pm_pct_50_75"
elif pct < 90:
return "pm_pct_75_90"
elif pct < 100:
return "pm_pct_90_100"
else:
return "pm_pct_100_p"