avfranco's picture
HF Space deploy snapshot (minimal allow-list)
d64fd55
import gzip
import xml.etree.ElementTree as ET
from datetime import datetime
from typing import List, Dict, Any, Optional
from pathlib import Path
import logging
from .distance import compute_total_distance
logger = logging.getLogger(__name__)
NS = {
"tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
}
def _open_tcx(path: str) -> str:
path = str(path)
lower = path.lower()
# loader.py now decompresses .tcx.gz beforehand,
# but we keep this for backward compatibility or direct CLI usage.
if lower.endswith(".gz"):
with gzip.open(path, "rb") as f:
raw_bytes = f.read()
else:
with open(path, "rb") as f:
raw_bytes = f.read()
# debug: fallback if empty read
if not raw_bytes:
print(f"⚠️ Warning: TCX read returned empty bytes for {path}")
decoded = raw_bytes.decode("utf-8", errors="replace").lstrip("\ufeff").strip()
return decoded
def parse_tcx_file(path: str, allow_nonrunning: bool = False) -> Optional[Dict[str, Any]]:
raw = _open_tcx(path)
try:
root = ET.fromstring(raw)
except ET.ParseError as e:
print(f"⚠️ Failed to parse TCX file {path}: {e}")
return None
activities = root.find("tcx:Activities", NS)
if activities is None:
return None
activity = activities.find("tcx:Activity", NS)
if activity is None:
return None
sport = activity.attrib.get("Sport", "Unknown")
# Strict Running-Only Domain
if sport.lower() not in ("running", "trailrunning"):
logger.info(f"Skipping TCX activity: sport={sport} (not running)")
return None
activity_id_el = activity.find("tcx:Id", NS)
start_time = None
if activity_id_el is not None and activity_id_el.text:
start_time = datetime.fromisoformat(activity_id_el.text.replace("Z", "+00:00"))
records: List[Dict[str, Any]] = []
for lap in activity.findall("tcx:Lap", NS):
for track in lap.findall("tcx:Track", NS):
for tp in track.findall("tcx:Trackpoint", NS):
time_el = tp.find("tcx:Time", NS)
pos = tp.find("tcx:Position", NS)
alt_el = tp.find("tcx:AltitudeMeters", NS)
dist_el = tp.find("tcx:DistanceMeters", NS)
hr_val = None
hr_el = tp.find("tcx:HeartRateBpm", NS)
if hr_el is not None:
v = hr_el.find("tcx:Value", NS)
if v is not None and v.text:
try:
hr_val = int(v.text)
except Exception:
hr_val = None
cadence = None
cad_el = tp.find("tcx:Cadence", NS)
if cad_el is not None and cad_el.text:
try:
cadence = int(cad_el.text)
except Exception:
cadence = None
time_val = None
if time_el is not None and time_el.text:
time_val = datetime.fromisoformat(time_el.text.replace("Z", "+00:00"))
lat = None
lon = None
if pos is not None:
lat_el = pos.find("tcx:LatitudeDegrees", NS)
lon_el = pos.find("tcx:LongitudeDegrees", NS)
if lat_el is not None and lat_el.text:
lat = float(lat_el.text)
if lon_el is not None and lon_el.text:
lon = float(lon_el.text)
record = {
"time": time_val,
"lat": lat,
"lon": lon,
"altitude_m": (
float(alt_el.text) if alt_el is not None and alt_el.text else None
),
"distance_m": (
float(dist_el.text) if dist_el is not None and dist_el.text else None
),
"hr_bpm": hr_val,
"cadence_rpm": cadence,
}
records.append(record)
if not records:
return None
file_stem = Path(path).stem
if file_stem.endswith(".tcx"):
file_stem = file_stem[:-4]
# compute total distance by summing available distance_m entries
total_distance = compute_total_distance(records) or 0.0
# Reject zero-distance
if total_distance <= 0:
logger.info(f"Skipping TCX activity {file_stem}: zero distance")
return None
total_duration = 0.0
if records[0].get("time") and records[-1].get("time"):
total_duration = (records[-1]["time"] - records[0]["time"]).total_seconds()
return {
"id": file_stem,
"sport": sport,
"start_time": start_time,
"total_distance_m": total_distance,
"total_duration_s": total_duration,
"records": records,
"source_path": str(path),
}