Spaces:
Runtime error
Runtime error
| """Cache de matriz de features para retreino WC (evita recomputar ~5k jogos).""" | |
| from __future__ import annotations | |
| import hashlib | |
| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| from config import settings | |
| CACHE_NAME = "logistic_features_cache.json" | |
| def _fingerprint(train_df: pd.DataFrame) -> str: | |
| payload = { | |
| "rows": len(train_df), | |
| "max_date": str(train_df["match_date"].max()) if not train_df.empty else "", | |
| "min_date": str(train_df["match_date"].min()) if not train_df.empty else "", | |
| } | |
| raw = json.dumps(payload, sort_keys=True).encode() | |
| return hashlib.sha256(raw).hexdigest()[:16] | |
| def cache_path() -> Path: | |
| return settings.wc_artifact_dir / CACHE_NAME | |
| def load_cached_features( | |
| train_df: pd.DataFrame, | |
| ) -> tuple[list[list[float]], list[str]] | None: | |
| path = cache_path() | |
| if not path.exists(): | |
| return None | |
| try: | |
| data = json.loads(path.read_text(encoding="utf-8")) | |
| if data.get("fingerprint") != _fingerprint(train_df): | |
| return None | |
| return data["x_rows"], data["y_rows"] | |
| except (json.JSONDecodeError, KeyError, TypeError): | |
| return None | |
| def save_cached_features( | |
| train_df: pd.DataFrame, | |
| x_rows: list[list[float]], | |
| y_rows: list[str], | |
| ) -> None: | |
| path = cache_path() | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| path.write_text( | |
| json.dumps( | |
| { | |
| "fingerprint": _fingerprint(train_df), | |
| "train_size": len(y_rows), | |
| "x_rows": x_rows, | |
| "y_rows": y_rows, | |
| }, | |
| ensure_ascii=False, | |
| ), | |
| encoding="utf-8", | |
| ) | |