Spaces:

avfranco
/

runner-ai-intelligence

Running

App Files Files Community

runner-ai-intelligence / src /ingestion /weekly_features.py

avfranco

HF Space deploy snapshot (minimal allow-list)

d64fd55 5 days ago

raw

history blame contribute delete

4.66 kB

	# src/ingestion/weekly_features.py
	from typing import List, Dict, Any
	from datetime import datetime, timedelta
	from collections import defaultdict
	import math

	from tools.runner_ai import sec_to_min_km


	def week_start(date: datetime) -> datetime:
	"""Return the start of the week (Monday) for a given date."""
	return date - timedelta(days=date.weekday())


	def summarize_runs(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""Aggregate features from a list of runs into a single summary.

	Args:
	runs: list of run dicts with at least 'start_time', 'total_distance_m', 'total_duration_s', 'avg_hr_bpm' keys.

	Returns:
	Dict of aggregates.
	"""

	if not runs:
	return {}

	# Ensure all runs are dicts
	clean_runs = [r if isinstance(r, dict) else {} for r in runs]

	# Sort runs by start_time
	clean_runs.sort(key=lambda r: r.get("start_time") or datetime.min)

	# Compute totals
	total_distance = sum(r.get("total_distance_m") or 0 for r in clean_runs)
	total_duration = sum(r.get("total_duration_s") or 0 for r in clean_runs)

	# Compute average pace (sec/km), handle zero distance
	avg_pace = None
	avg_pace_min_per_km = None
	if total_distance > 0:
	avg_pace = total_duration / (total_distance / 1000) # seconds per km
	avg_pace_min_per_km = sec_to_min_km(avg_pace)

	# HR metrics
	avg_hr = None
	hr_values = [r.get("avg_hr_bpm") for r in clean_runs if r.get("avg_hr_bpm") is not None]
	if hr_values:
	avg_hr = sum(hr_values) / len(hr_values)

	# Time window
	start_time = clean_runs[0].get("start_time")
	end_time = clean_runs[-1].get("start_time") if clean_runs[-1].get("start_time") else start_time

	return {
	"start_time": start_time,
	"end_time": end_time,
	"total_distance_m": total_distance,
	"total_duration_s": total_duration,
	"avg_pace_s_per_km": avg_pace,
	"avg_pace_min_per_km": avg_pace_min_per_km,
	"avg_hr_bpm": avg_hr,
	"num_runs": len(clean_runs),
	"runs": clean_runs,
	}


	# Alias for backward compatibility if needed, though we are refactoring usages.
	aggregate_weekly_features = summarize_runs


	def aggregate_runs_by_week(runs: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
	"""Group runs by ISO week and compute summaries for each week.

	Returns:
	Dict mapping "YYYY-WW" -> weekly summary dict
	"""
	weekly_runs = defaultdict(list)
	for r in runs:
	st = r.get("start_time")
	if st:
	iso_year, iso_week, _ = st.isocalendar()
	key = f"{iso_year}-{iso_week:02d}"
	weekly_runs[key].append(r)

	weekly_summaries = {}
	for k, v in weekly_runs.items():
	weekly_summaries[k] = summarize_runs(v)

	return weekly_summaries


	def compute_trends(weekly_data: Dict[str, Dict[str, Any]], weeks_back: int = 4) -> Dict[str, Any]:
	"""Compute basic trends over the last N weeks from a dict of weekly summaries.

	Args:
	weekly_data: Dict of "YYYY-WW" -> weekly summary dict.
	weeks_back: Number of recent weeks to consider for trend computation.

	Returns:
	Dict of trend metrics.
	"""
	if not weekly_data:
	return {}

	# Sort keys to ensure chronological order
	sorted_keys = sorted(weekly_data.keys())
	selected_keys = sorted_keys[-weeks_back:] if weeks_back else sorted_keys
	recent_weeks = [weekly_data[k] for k in selected_keys]

	# Extract metrics, skipping weeks with missing pace
	distances = [w.get("total_distance_m", 0) for w in recent_weeks]
	paces = [
	w.get("avg_pace_s_per_km") for w in recent_weeks if w.get("avg_pace_s_per_km") is not None
	]
	num_runs = [w.get("num_runs", 0) for w in recent_weeks]

	trends = {}

	# Pace trend (negative = faster, positive = slower)
	if len(paces) >= 2:
	trends["pace_trend_s_per_km"] = paces[-1] - paces[0]
	else:
	trends["pace_trend_s_per_km"] = None

	# Distance trend
	if len(distances) >= 2:
	trends["distance_trend_m"] = distances[-1] - distances[0]
	else:
	trends["distance_trend_m"] = None

	# Consistency: average number of runs per week
	trends["avg_runs_per_week"] = sum(num_runs) / len(num_runs) if num_runs else None

	# Weekly mileage stability (monotony)
	if num_runs:
	mean_runs = sum(num_runs) / len(num_runs)
	sq_diff = [(r - mean_runs) ** 2 for r in num_runs]
	std_runs = math.sqrt(sum(sq_diff) / len(sq_diff)) if sq_diff else 0.0
	trends["run_monotony"] = std_runs
	else:
	trends["run_monotony"] = None

	return trends