runner-ai-intelligence / src /ingestion /weekly_features.py
avfranco's picture
HF Space deploy snapshot (minimal allow-list)
d64fd55
# src/ingestion/weekly_features.py
from typing import List, Dict, Any
from datetime import datetime, timedelta
from collections import defaultdict
import math
from tools.runner_ai import sec_to_min_km
def week_start(date: datetime) -> datetime:
"""Return the start of the week (Monday) for a given date."""
return date - timedelta(days=date.weekday())
def summarize_runs(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Aggregate features from a list of runs into a single summary.
Args:
runs: list of run dicts with at least 'start_time', 'total_distance_m', 'total_duration_s', 'avg_hr_bpm' keys.
Returns:
Dict of aggregates.
"""
if not runs:
return {}
# Ensure all runs are dicts
clean_runs = [r if isinstance(r, dict) else {} for r in runs]
# Sort runs by start_time
clean_runs.sort(key=lambda r: r.get("start_time") or datetime.min)
# Compute totals
total_distance = sum(r.get("total_distance_m") or 0 for r in clean_runs)
total_duration = sum(r.get("total_duration_s") or 0 for r in clean_runs)
# Compute average pace (sec/km), handle zero distance
avg_pace = None
avg_pace_min_per_km = None
if total_distance > 0:
avg_pace = total_duration / (total_distance / 1000) # seconds per km
avg_pace_min_per_km = sec_to_min_km(avg_pace)
# HR metrics
avg_hr = None
hr_values = [r.get("avg_hr_bpm") for r in clean_runs if r.get("avg_hr_bpm") is not None]
if hr_values:
avg_hr = sum(hr_values) / len(hr_values)
# Time window
start_time = clean_runs[0].get("start_time")
end_time = clean_runs[-1].get("start_time") if clean_runs[-1].get("start_time") else start_time
return {
"start_time": start_time,
"end_time": end_time,
"total_distance_m": total_distance,
"total_duration_s": total_duration,
"avg_pace_s_per_km": avg_pace,
"avg_pace_min_per_km": avg_pace_min_per_km,
"avg_hr_bpm": avg_hr,
"num_runs": len(clean_runs),
"runs": clean_runs,
}
# Alias for backward compatibility if needed, though we are refactoring usages.
aggregate_weekly_features = summarize_runs
def aggregate_runs_by_week(runs: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
"""Group runs by ISO week and compute summaries for each week.
Returns:
Dict mapping "YYYY-WW" -> weekly summary dict
"""
weekly_runs = defaultdict(list)
for r in runs:
st = r.get("start_time")
if st:
iso_year, iso_week, _ = st.isocalendar()
key = f"{iso_year}-{iso_week:02d}"
weekly_runs[key].append(r)
weekly_summaries = {}
for k, v in weekly_runs.items():
weekly_summaries[k] = summarize_runs(v)
return weekly_summaries
def compute_trends(weekly_data: Dict[str, Dict[str, Any]], weeks_back: int = 4) -> Dict[str, Any]:
"""Compute basic trends over the last N weeks from a dict of weekly summaries.
Args:
weekly_data: Dict of "YYYY-WW" -> weekly summary dict.
weeks_back: Number of recent weeks to consider for trend computation.
Returns:
Dict of trend metrics.
"""
if not weekly_data:
return {}
# Sort keys to ensure chronological order
sorted_keys = sorted(weekly_data.keys())
selected_keys = sorted_keys[-weeks_back:] if weeks_back else sorted_keys
recent_weeks = [weekly_data[k] for k in selected_keys]
# Extract metrics, skipping weeks with missing pace
distances = [w.get("total_distance_m", 0) for w in recent_weeks]
paces = [
w.get("avg_pace_s_per_km") for w in recent_weeks if w.get("avg_pace_s_per_km") is not None
]
num_runs = [w.get("num_runs", 0) for w in recent_weeks]
trends = {}
# Pace trend (negative = faster, positive = slower)
if len(paces) >= 2:
trends["pace_trend_s_per_km"] = paces[-1] - paces[0]
else:
trends["pace_trend_s_per_km"] = None
# Distance trend
if len(distances) >= 2:
trends["distance_trend_m"] = distances[-1] - distances[0]
else:
trends["distance_trend_m"] = None
# Consistency: average number of runs per week
trends["avg_runs_per_week"] = sum(num_runs) / len(num_runs) if num_runs else None
# Weekly mileage stability (monotony)
if num_runs:
mean_runs = sum(num_runs) / len(num_runs)
sq_diff = [(r - mean_runs) ** 2 for r in num_runs]
std_runs = math.sqrt(sum(sq_diff) / len(sq_diff)) if sq_diff else 0.0
trends["run_monotony"] = std_runs
else:
trends["run_monotony"] = None
return trends