Spaces:
Running
Running
| # src/ingestion/weekly_features.py | |
| from typing import List, Dict, Any | |
| from datetime import datetime, timedelta | |
| from collections import defaultdict | |
| import math | |
| from tools.runner_ai import sec_to_min_km | |
| def week_start(date: datetime) -> datetime: | |
| """Return the start of the week (Monday) for a given date.""" | |
| return date - timedelta(days=date.weekday()) | |
| def summarize_runs(runs: List[Dict[str, Any]]) -> Dict[str, Any]: | |
| """Aggregate features from a list of runs into a single summary. | |
| Args: | |
| runs: list of run dicts with at least 'start_time', 'total_distance_m', 'total_duration_s', 'avg_hr_bpm' keys. | |
| Returns: | |
| Dict of aggregates. | |
| """ | |
| if not runs: | |
| return {} | |
| # Ensure all runs are dicts | |
| clean_runs = [r if isinstance(r, dict) else {} for r in runs] | |
| # Sort runs by start_time | |
| clean_runs.sort(key=lambda r: r.get("start_time") or datetime.min) | |
| # Compute totals | |
| total_distance = sum(r.get("total_distance_m") or 0 for r in clean_runs) | |
| total_duration = sum(r.get("total_duration_s") or 0 for r in clean_runs) | |
| # Compute average pace (sec/km), handle zero distance | |
| avg_pace = None | |
| avg_pace_min_per_km = None | |
| if total_distance > 0: | |
| avg_pace = total_duration / (total_distance / 1000) # seconds per km | |
| avg_pace_min_per_km = sec_to_min_km(avg_pace) | |
| # HR metrics | |
| avg_hr = None | |
| hr_values = [r.get("avg_hr_bpm") for r in clean_runs if r.get("avg_hr_bpm") is not None] | |
| if hr_values: | |
| avg_hr = sum(hr_values) / len(hr_values) | |
| # Time window | |
| start_time = clean_runs[0].get("start_time") | |
| end_time = clean_runs[-1].get("start_time") if clean_runs[-1].get("start_time") else start_time | |
| return { | |
| "start_time": start_time, | |
| "end_time": end_time, | |
| "total_distance_m": total_distance, | |
| "total_duration_s": total_duration, | |
| "avg_pace_s_per_km": avg_pace, | |
| "avg_pace_min_per_km": avg_pace_min_per_km, | |
| "avg_hr_bpm": avg_hr, | |
| "num_runs": len(clean_runs), | |
| "runs": clean_runs, | |
| } | |
| # Alias for backward compatibility if needed, though we are refactoring usages. | |
| aggregate_weekly_features = summarize_runs | |
| def aggregate_runs_by_week(runs: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: | |
| """Group runs by ISO week and compute summaries for each week. | |
| Returns: | |
| Dict mapping "YYYY-WW" -> weekly summary dict | |
| """ | |
| weekly_runs = defaultdict(list) | |
| for r in runs: | |
| st = r.get("start_time") | |
| if st: | |
| iso_year, iso_week, _ = st.isocalendar() | |
| key = f"{iso_year}-{iso_week:02d}" | |
| weekly_runs[key].append(r) | |
| weekly_summaries = {} | |
| for k, v in weekly_runs.items(): | |
| weekly_summaries[k] = summarize_runs(v) | |
| return weekly_summaries | |
| def compute_trends(weekly_data: Dict[str, Dict[str, Any]], weeks_back: int = 4) -> Dict[str, Any]: | |
| """Compute basic trends over the last N weeks from a dict of weekly summaries. | |
| Args: | |
| weekly_data: Dict of "YYYY-WW" -> weekly summary dict. | |
| weeks_back: Number of recent weeks to consider for trend computation. | |
| Returns: | |
| Dict of trend metrics. | |
| """ | |
| if not weekly_data: | |
| return {} | |
| # Sort keys to ensure chronological order | |
| sorted_keys = sorted(weekly_data.keys()) | |
| selected_keys = sorted_keys[-weeks_back:] if weeks_back else sorted_keys | |
| recent_weeks = [weekly_data[k] for k in selected_keys] | |
| # Extract metrics, skipping weeks with missing pace | |
| distances = [w.get("total_distance_m", 0) for w in recent_weeks] | |
| paces = [ | |
| w.get("avg_pace_s_per_km") for w in recent_weeks if w.get("avg_pace_s_per_km") is not None | |
| ] | |
| num_runs = [w.get("num_runs", 0) for w in recent_weeks] | |
| trends = {} | |
| # Pace trend (negative = faster, positive = slower) | |
| if len(paces) >= 2: | |
| trends["pace_trend_s_per_km"] = paces[-1] - paces[0] | |
| else: | |
| trends["pace_trend_s_per_km"] = None | |
| # Distance trend | |
| if len(distances) >= 2: | |
| trends["distance_trend_m"] = distances[-1] - distances[0] | |
| else: | |
| trends["distance_trend_m"] = None | |
| # Consistency: average number of runs per week | |
| trends["avg_runs_per_week"] = sum(num_runs) / len(num_runs) if num_runs else None | |
| # Weekly mileage stability (monotony) | |
| if num_runs: | |
| mean_runs = sum(num_runs) / len(num_runs) | |
| sq_diff = [(r - mean_runs) ** 2 for r in num_runs] | |
| std_runs = math.sqrt(sum(sq_diff) / len(sq_diff)) if sq_diff else 0.0 | |
| trends["run_monotony"] = std_runs | |
| else: | |
| trends["run_monotony"] = None | |
| return trends | |