amarorn / models /wc_feature_cache.py
beAnalytic's picture
feat: sync main with feature/superbet-live-inplay
16c19b8 verified
Raw
History Blame Contribute Delete
1.68 kB
"""Cache de matriz de features para retreino WC (evita recomputar ~5k jogos)."""
from __future__ import annotations
import hashlib
import json
from pathlib import Path
import pandas as pd
from config import settings
CACHE_NAME = "logistic_features_cache.json"
def _fingerprint(train_df: pd.DataFrame) -> str:
payload = {
"rows": len(train_df),
"max_date": str(train_df["match_date"].max()) if not train_df.empty else "",
"min_date": str(train_df["match_date"].min()) if not train_df.empty else "",
}
raw = json.dumps(payload, sort_keys=True).encode()
return hashlib.sha256(raw).hexdigest()[:16]
def cache_path() -> Path:
return settings.wc_artifact_dir / CACHE_NAME
def load_cached_features(
train_df: pd.DataFrame,
) -> tuple[list[list[float]], list[str]] | None:
path = cache_path()
if not path.exists():
return None
try:
data = json.loads(path.read_text(encoding="utf-8"))
if data.get("fingerprint") != _fingerprint(train_df):
return None
return data["x_rows"], data["y_rows"]
except (json.JSONDecodeError, KeyError, TypeError):
return None
def save_cached_features(
train_df: pd.DataFrame,
x_rows: list[list[float]],
y_rows: list[str],
) -> None:
path = cache_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"fingerprint": _fingerprint(train_df),
"train_size": len(y_rows),
"x_rows": x_rows,
"y_rows": y_rows,
},
ensure_ascii=False,
),
encoding="utf-8",
)