import asyncio import json import threading from contextlib import asynccontextmanager from dataclasses import asdict from datetime import datetime, timezone from pathlib import Path from typing import TYPE_CHECKING, Any from fastapi import FastAPI, File, HTTPException, Query, UploadFile from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field from api.auth import ApiKeyMiddleware, api_key_enabled from api.data_pulse import ( DataPulseMiddleware, build_pulse_snapshot, invalidate_pulse_meta_cache, ) from api.lake_cache import get_lake_counts, invalidate_lake_counts from config import settings from ingest.fixtures.brasileirao import load_fixtures from ingest.odds.the_odds_api import fetch_live_h2h_odds, merge_schedule_with_odds, save_odds_file from ingest.meta import collection_stats from models.corners_predictor import CornersPredictor from models.ev_value import MatchValueReport, evaluate_match from models.baseline import predict_baseline, predict_baseline_probs if TYPE_CHECKING: from models.wc_predictor import WcPrediction, WcPredictor from schemas.wc_kxl_dynamic import WcKxlMatchInput from pipelines.gold import build_gold_for_match from ingest.news_sync import sync_news_sources from pipelines.news_feed import ( build_news_all, build_news_cards, build_news_feed, resolve_news_teams, ) from pipelines.silver import load_silver from pipelines.wc_squads import get_squad_by_team, list_squad_teams, load_wc_squads from pipelines.wc_schedule import build_schedule_response, load_wc_schedule, official_match_exists from pipelines.wc_group_pressure import lookup_2026_group from pipelines.wc_group_standings import build_group_standings from schemas.national_teams import normalize_national_team from schemas.user_bet import UserOpenBetRequest WC_ROUND_FILE = Path("data/rounds/wc_2026.json") _wc_models_ready = False _wc_predictor: Any = None _wc_artifact_meta: dict = {} _wc_train_lock = threading.Lock() _wc_train_thread: threading.Thread | None = None def _wc_round_cache(): from api import wc_round_cache return wc_round_cache def _warm_sofascore_imports() -> None: """Carrega módulos Sofascore no thread principal (evita deadlock no thread pool).""" try: import ingest.sofascore.client # noqa: F401 import ingest.sofascore.fept_ingest # noqa: F401 import ingest.sofascore.stats_ingest # noqa: F401 except ImportError: pass def _warm_wc_models() -> None: global _wc_models_ready _warm_sofascore_imports() try: CornersPredictor() get_wc_predictor() _wc_round_cache().warm_from_disk() _wc_models_ready = True except ValueError: _wc_models_ready = False @asynccontextmanager async def lifespan(app: FastAPI): """Carrega modelos WC em background para a API aceitar tráfego imediatamente (deploy/health).""" loop = asyncio.get_event_loop() loop.run_in_executor(None, _warm_wc_models) yield app = FastAPI( title="Bolão News API", description="API de contexto e previsão baseada em notícias esportivas", version="0.2.0", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], expose_headers=[ "X-Data-Pulse-At", "X-Articles-Silver", "X-Fixtures", "X-WC-Models-Ready", "X-Collections-Last-Run", "X-Latest-Silver-At", ], ) app.add_middleware( DataPulseMiddleware, wc_models_ready=lambda: _wc_models_ready, ) app.add_middleware(ApiKeyMiddleware) def _custom_openapi(): if app.openapi_schema: return app.openapi_schema from fastapi.openapi.utils import get_openapi schema = get_openapi( title=app.title, version=app.version, description=app.description, routes=app.routes, ) if api_key_enabled(): schema.setdefault("components", {})["securitySchemes"] = { "ApiKeyHeader": { "type": "apiKey", "in": "header", "name": "X-API-Key", }, "BearerAuth": { "type": "http", "scheme": "bearer", }, } schema["security"] = [{"ApiKeyHeader": []}, {"BearerAuth": []}] app.openapi_schema = schema return app.openapi_schema app.openapi = _custom_openapi def get_wc_predictor(*, force: bool = False) -> "WcPredictor": global _wc_predictor, _wc_artifact_meta from models.wc_artifact import load_or_train_wc_predictor if force or _wc_predictor is None: _wc_predictor, _wc_artifact_meta = load_or_train_wc_predictor( force=force or settings.wc_artifact_force_retrain, allow_train=force or settings.wc_artifact_force_retrain, ) return _wc_predictor def _get_wc_predictor() -> "WcPredictor": return get_wc_predictor() class MatchRequest(BaseModel): home_team: str = Field(..., examples=["Flamengo"]) away_team: str = Field(..., examples=["Palmeiras"]) round_number: int = Field(1, ge=1) competition: str = Field("Brasileirão", examples=["Brasileirão"]) season: int | None = None class MatchContextResponse(BaseModel): match_id: str home_team: str away_team: str context_text: str news_count_home: int news_count_away: int injury_mentions_home: int injury_mentions_away: int sentiment_home: float | None sentiment_away: float | None home_position: int | None = None away_position: int | None = None home_form: str | None = None away_form: str | None = None prediction: str | None = None confidence: float | None = None reason: str | None = None model_source: str | None = None probabilities: dict[str, float] | None = None class RoundPrediction(BaseModel): home_team: str away_team: str prediction: str confidence: float reason: str news_count: int class RoundResponse(BaseModel): round_number: int competition: str predictions: list[RoundPrediction] class WcValueRequest(BaseModel): schedule_file: str = Field("data/rounds/wc_2026.json", examples=["data/rounds/wc_2026.json"]) output_odds_file: str = Field( "data/rounds/wc_2026_odds.json", examples=["data/rounds/wc_2026_odds.json"], ) sport_key: str | None = Field(None, examples=["soccer_fifa_world_cup"]) bookmaker: str | None = Field(None, examples=["bet365"]) regions: str | None = Field(None, examples=["eu"]) min_edge: float = Field(0.03, ge=0.0, le=1.0) save_odds_file: bool = True class WcOutcomeValue(BaseModel): outcome: str odd: float model_prob: float implied_prob: float expected_value: float fair_odd: float kelly_quarter: float class WcMatchValueResponse(BaseModel): home_team: str away_team: str best: WcOutcomeValue | None = None outcomes: list[WcOutcomeValue] class WcValueResponse(BaseModel): matched_games: int total_schedule_games: int source: str captured_at: str | None = None edges: list[WcMatchValueResponse] class WcCornersPredictRequest(BaseModel): home_team: str = Field(..., examples=["Brasil"]) away_team: str = Field(..., examples=["Marrocos"]) phase: str = Field("group", examples=["group"]) class WcCornerFactors(BaseModel): league_avg: float home_attack: float away_attack: float home_defense: float away_defense: float home_advantage: float elo_factor_home: float elo_factor_away: float lambda_home: float lambda_away: float training_matches: int blend_with_goal_proxy: float class WcCornersPredictResponse(BaseModel): home_team: str away_team: str data_source: str expected_corners: str expected_total_corners: float most_likely_corners: str prob_home_more_corners: float prob_draw_corners: float prob_away_more_corners: float line_probs: dict[str, float] factors: WcCornerFactors training_summary: dict class WcPredictRequest(BaseModel): home_team: str = Field(..., examples=["Brasil"]) away_team: str = Field(..., examples=["Marrocos"]) phase: str = Field("group", examples=["group"]) match_date: str | None = Field( None, description="Data do confronto (ISO); usada em /simulate e busca Sofascore", examples=["2026-06-06"], ) fifa_match_id: str | None = Field( None, description="IdMatch FIFA; evita busca na janela quando conhecido", examples=["400123456"], ) sofascore_event_id: int | None = Field( None, description="ID do evento Sofascore; preenche FEPT automaticamente se kxl_match.fept ausente", examples=[11774480], ) kxl_match: WcKxlMatchInput | None = Field( None, description="Entrada dinâmica KXL (FECL, FEJU, FEDE, FEPT, FEEM) — opcional", ) class WcInPlayRequest(BaseModel): home_team: str = Field(..., examples=["Brasil"]) away_team: str = Field(..., examples=["Egito"]) home_score: int = Field(..., ge=0, examples=[1]) away_score: int = Field(..., ge=0, examples=[1]) minute: int = Field(..., ge=0, le=120, description="Minuto de jogo (0–120)", examples=[17]) phase: str = Field("group", examples=["group"]) match_minutes: int = Field(90, ge=45, le=120, examples=[90]) ht_home_score: int | None = Field( None, ge=0, description="Placar no intervalo (casa). Recomendado quando minute > 45.", ) ht_away_score: int | None = Field( None, ge=0, description="Placar no intervalo (fora). Recomendado quando minute > 45.", ) superbet_event_id: int | None = Field( None, description="ID Superbet: preenche placar/minuto ao vivo e benchmark de mercado", examples=[13247229], ) merge_superbet_odds: bool = Field( False, description="Salva snapshot nas odds de mercado (superbet_odds.json) para treino", ) class WcInPlayResponse(BaseModel): home_team: str away_team: str current_score: str minute: int match_minutes: int remaining_fraction: float lambda_full_home: float lambda_full_away: float lambda_remaining_home: float lambda_remaining_away: float rho_used: float prob_final_home: float prob_final_draw: float prob_final_away: float prob_ht_home: float prob_ht_draw: float prob_ht_away: float prob_no_more_goals: float prob_next_goal_home: float prob_next_goal_away: float final_line_probs: dict[str, float] remainder_line_probs: dict[str, float] ht_line_probs: dict[str, float] second_half_line_probs: dict[str, float] team_final_line_probs: dict[str, float] top_final_scores: dict[str, float] top_ht_ft: dict[str, float] combo_markets: dict[str, float] btts_final: float n_simulations: int market_benchmark: dict | None = None superbet: dict | None = None class UserBetRequest(BaseModel): market: str = Field(..., examples=["h2h"], description="h2h, over_2_5, btts, next_goal, combo_btts_over_3_5") outcome: str = Field(..., examples=["1"], description="1, X, 2, yes, no, home, away") stake: float = Field(..., gt=0, examples=[100]) odds_placed: float = Field(..., gt=1, examples=[2.1]) class WcBetAdviceRequest(BaseModel): home_team: str = Field(..., examples=["Brasil"]) away_team: str = Field(..., examples=["Egito"]) superbet_event_id: int = Field(..., examples=[13247229]) phase: str = Field("friendly", examples=["friendly"]) bankroll: float = Field(1000, gt=0, examples=[1000]) user_bet: UserBetRequest | None = None class WcBetAdviceResponse(BaseModel): home_team: str away_team: str minute: int current_score: str | None cashout: dict | None aportes: list[dict] inplay_summary: dict superbet_event_id: int confidence: dict | None = None class WcSuperbetLiveAdviceResponse(WcBetAdviceResponse): period_label: str | None = None status: str | None = None is_finished: bool = False is_live: bool = True h2h_odds: dict[str, float] = Field(default_factory=dict) h2h_implied: dict[str, float] = Field(default_factory=dict) h2h_overround: float | None = None generosity_probs: dict[str, float] = Field(default_factory=dict) market_benchmark: dict | None = None strategy: dict | None = None captured_at: str | None = None betradar_id: str | None = None raw_market_count: int = 0 btts_odds: dict[str, float] = Field(default_factory=dict) next_goal_odds: dict[str, float] = Field(default_factory=dict) analysis_coverage: dict[str, bool | list[str]] | None = None class WcSuperbetLiveEventResponse(BaseModel): event_id: int home_team: str away_team: str event_name: str sport_id: int tournament_id: int | None utc_date: str | None betradar_id: str | None minute: int home_score: int away_score: int period_label: str | None status: str | None market_count: int h2h_odds: dict[str, float] captured_at: str class WcSuperbetLiveResponse(BaseModel): count: int sport_id: int | None events: list[WcSuperbetLiveEventResponse] captured_at: str class WcSuperbetEventResponse(BaseModel): event_id: int home_team: str away_team: str event_name: str utc_date: str | None betradar_id: str | None is_live: bool inplay: dict | None h2h_odds: dict[str, float] h2h_implied: dict[str, float] totals_implied: dict[str, dict[str, float]] corners_implied: dict[str, dict[str, float]] combo_markets: dict[str, dict[str, float]] generosity_probs: dict[str, float] raw_market_count: int captured_at: str class WcGoalFactors(BaseModel): league_avg: float home_attack: float away_attack: float home_defense: float away_defense: float home_advantage: float elo_factor_home: float elo_factor_away: float lambda_home: float lambda_away: float rho: float class WcMonteCarloBreakdown(BaseModel): prob_home: float prob_draw: float prob_away: float expected_goals_home: float expected_goals_away: float over_2_5: float under_2_5: float both_teams_score: float clean_sheet_home: float clean_sheet_away: float top_scores: dict[str, float] n_simulations: int rho_used: float class WcModelBreakdown(BaseModel): dixon_coles: dict[str, float] logistic: dict[str, float] dixon_coles_rho: float | None = None poisson_factors: WcGoalFactors | None = None holdout_2022_accuracy: float | None = None ensemble_weights: dict[str, float] ensemble_brier: float | None = None kxl_baseline: dict | None = None kxl_collision: dict | None = None kxl_dynamic: dict | None = None kxl_fept: dict | None = None monte_carlo: WcMonteCarloBreakdown | None = None class WcPredictionResponse(BaseModel): home_team: str away_team: str prediction: str confidence: float prob_home: float prob_draw: float prob_away: float poisson_score: str expected_goals: str context: str h2h_summary: str model_breakdown: WcModelBreakdown class WcSimulationScore(BaseModel): score: str prob: float class WcSimulationScenario(BaseModel): name: str description: str prob: float class WcSimulationResponse(BaseModel): home_team: str away_team: str match_date: str | None prediction: str confidence: float prob_home: float prob_draw: float prob_away: float poisson_score: str | None = None expected_goals: str | None = None # Dados reais da FIFA fifa_home_lineup: list[dict[str, Any]] | None = None fifa_away_lineup: list[dict[str, Any]] | None = None fifa_home_bench: list[dict[str, Any]] | None = None fifa_away_bench: list[dict[str, Any]] | None = None fifa_home_goals: list[dict[str, Any]] | None = None fifa_away_goals: list[dict[str, Any]] | None = None fifa_home_tactics: str | None = None fifa_away_tactics: str | None = None fifa_home_coach: str | None = None fifa_away_coach: str | None = None fifa_stadium: str | None = None fifa_attendance: int | None = None fifa_home_points: float | None = None fifa_away_points: float | None = None fifa_points_diff: float | None = None lineup_source: str | None = Field( None, description="Origem das escalações exibidas: fifa ou sofascore", ) # Dados enriquecidos enrich_features: dict[str, Any] | None = None stats_features: dict[str, Any] | None = None model_breakdown: dict[str, Any] warnings: list[str] class WcRoundResponse(BaseModel): season: int competition: str phase: str round: int predictions: list[WcPredictionResponse] class WcGroupStandingRow(BaseModel): position: int team: str played: int won: int drawn: int lost: int gf: int ga: int gd: int points: int class WcGroupStandingsBlock(BaseModel): group: str standings: list[WcGroupStandingRow] class WcGroupStandingsResponse(BaseModel): season: int competition: str simulated: bool = True note: str groups: list[WcGroupStandingsBlock] class WcTeamsResponse(BaseModel): teams: list[str] count: int class WcFriendlyItem(BaseModel): event_id: int | None = None fifa_match_id: str | None = None sources: list[str] = Field(default_factory=lambda: ["sofascore"]) home_team: str away_team: str match_date: str | None = None status: str home_score: int | None = None away_score: int | None = None tournament: str is_home: bool class WcFriendliesResponse(BaseModel): team: str year: int count: int friendlies: list[WcFriendlyItem] source: str = "sofascore+fifa" class WcScheduleGroup(BaseModel): id: str teams: list[str] class WcScheduleMatchItem(BaseModel): match_id: str home_team: str away_team: str group: str | None = None round: int phase: str kickoff: str | None = None venue: str | None = None city: str | None = None class WcScheduleResponse(BaseModel): season: int competition: str phase: str groups: list[WcScheduleGroup] matchdays: list[int] matches: list[WcScheduleMatchItem] total_matches: int class WcSquadPlayerItem(BaseModel): name: str club: str | None = None class WcSquadSectionItem(BaseModel): role: str position: str players: list[WcSquadPlayerItem] class WcSquadTeamItem(BaseModel): team: str player_count: int sections: list[WcSquadSectionItem] class WcSquadTeamsResponse(BaseModel): season: int competition: str source_url: str updated_at: str team_count: int teams: list[dict] class WcSquadDetailResponse(BaseModel): season: int competition: str source_url: str updated_at: str squad: WcSquadTeamItem class WcEditionItem(BaseModel): season: int label: str match_count: int class WcEditionsResponse(BaseModel): editions: list[WcEditionItem] class WcHistoricalMatchItem(BaseModel): match_id: str season: int home_team: str away_team: str match_date: str phase: str phase_label: str group_name: str | None = None home_score: int away_score: int result: str result_label: str score: str class WcEditionMatchesResponse(BaseModel): season: int matches: list[WcHistoricalMatchItem] class WcValidateRequest(BaseModel): season: int = Field(..., ge=1930, le=2022) match_id: str | None = None home_team: str | None = None away_team: str | None = None class WcValidateMatchInfo(BaseModel): match_id: str season: int home_team: str away_team: str match_date: str phase: str phase_label: str group_name: str | None = None home_score: int away_score: int actual_result: str actual_result_label: str actual_score: str class NewsArticleItem(BaseModel): id: str source: str source_name: str source_url: str title: str summary: str | None = None body_preview: str published_at: str | None = None scraped_at: str | None = None teams_mentioned: list[str] = Field(default_factory=list) national_teams_mentioned: list[str] = Field(default_factory=list) categories: list[str] = Field(default_factory=list) sentiment_score: float | None = None sentiment_label: str class NewsSourceItem(BaseModel): id: str name: str count: int class NewsFeedResponse(BaseModel): total: int limit: int offset: int sources: list[NewsSourceItem] articles: list[NewsArticleItem] class NewsCardsResponse(BaseModel): """Notícias formatadas para NewsArticleCard no frontend.""" total: int limit: int offset: int teams: list[str] = Field(default_factory=list) cards: list[NewsArticleItem] class NewsSyncResponse(BaseModel): collected: int by_source: dict[str, int] silver_updated: bool silver_path: str | None = None articles_silver: int synced_at: str class WcValidateResponse(BaseModel): match: WcValidateMatchInfo prediction: str confidence: float prob_home: float prob_draw: float prob_away: float poisson_score: str expected_goals: str correct: bool context: str h2h_summary: str model_breakdown: WcModelBreakdown cutoff_date: str cutoff_note: str def _context_to_response(context, include_prediction: bool = False) -> MatchContextResponse: resp = MatchContextResponse( match_id=context.match_id, home_team=context.home_team, away_team=context.away_team, context_text=context.context_text, news_count_home=context.features.news_count_home, news_count_away=context.features.news_count_away, injury_mentions_home=context.features.injury_mentions_home, injury_mentions_away=context.features.injury_mentions_away, sentiment_home=context.features.sentiment_home, sentiment_away=context.features.sentiment_away, home_position=context.features.home_position, away_position=context.features.away_position, home_form=context.features.home_form, away_form=context.features.away_form, ) if include_prediction: pred, conf, reason = predict_baseline(context.features) resp.prediction = pred resp.confidence = conf resp.reason = reason resp.model_source = "baseline" resp.probabilities = predict_baseline_probs(context.features) return resp def _breakdown_to_response(breakdown: dict) -> WcModelBreakdown: pf = breakdown.get("poisson_factors") mc = breakdown.get("monte_carlo") return WcModelBreakdown( dixon_coles=breakdown["dixon_coles"], logistic=breakdown["logistic"], dixon_coles_rho=breakdown.get("dixon_coles_rho"), poisson_factors=WcGoalFactors(**pf) if pf else None, holdout_2022_accuracy=breakdown.get("holdout_2022_accuracy"), ensemble_weights=breakdown["ensemble_weights"], ensemble_brier=breakdown.get("ensemble_brier"), kxl_baseline=breakdown.get("kxl_baseline"), kxl_collision=breakdown.get("kxl_collision"), kxl_dynamic=breakdown.get("kxl_dynamic"), kxl_fept=breakdown.get("kxl_fept"), monte_carlo=WcMonteCarloBreakdown(**mc) if mc else None, ) def _wc_prediction_to_response(pred: "WcPrediction") -> WcPredictionResponse: breakdown = pred.model_breakdown return WcPredictionResponse( home_team=pred.home_team, away_team=pred.away_team, prediction=pred.prediction, confidence=round(pred.confidence, 4), prob_home=round(pred.prob_home, 4), prob_draw=round(pred.prob_draw, 4), prob_away=round(pred.prob_away, 4), poisson_score=pred.poisson_score, expected_goals=pred.expected_goals, context=pred.context, h2h_summary=pred.h2h_summary, model_breakdown=_breakdown_to_response(breakdown), ) def _load_wc_round(path: Path = WC_ROUND_FILE) -> dict: if not path.exists(): raise HTTPException(status_code=404, detail=f"Rodada WC não encontrada: {path}") try: return json.loads(path.read_text(encoding="utf-8")) except Exception as exc: raise HTTPException(status_code=400, detail=f"Falha ao ler rodada WC: {exc}") from exc def _match_value_to_response(report: MatchValueReport) -> WcMatchValueResponse: best = None if report.best: best = WcOutcomeValue( outcome=report.best.outcome, odd=report.best.odd, model_prob=report.best.model_prob, implied_prob=report.best.implied_prob, expected_value=report.best.expected_value, fair_odd=report.best.fair_odd, kelly_quarter=report.best.kelly_quarter, ) outcomes = [ WcOutcomeValue( outcome=item.outcome, odd=item.odd, model_prob=item.model_prob, implied_prob=item.implied_prob, expected_value=item.expected_value, fair_odd=item.fair_odd, kelly_quarter=item.kelly_quarter, ) for item in report.outcomes ] return WcMatchValueResponse( home_team=report.home_team, away_team=report.away_team, best=best, outcomes=outcomes, ) def _sanitize_match_item(data: dict) -> dict: import math out = dict(data) group = out.get("group_name") if group is None or (isinstance(group, float) and math.isnan(group)): out["group_name"] = None else: out["group_name"] = str(group) return out @app.get("/health/live") def health_live(): """Liveness para o proxy Fly — sem I/O no lake (sobe antes do warm de modelos).""" return {"status": "ok"} @app.get("/health") async def health(): stats = collection_stats() articles_silver, fixtures = await asyncio.to_thread(get_lake_counts) return { "status": "ok", "lake_root": str(settings.lake_root), "articles_silver": articles_silver, "fixtures": fixtures, "collections": stats, "wc_models_ready": _wc_models_ready, "wc_artifact": _wc_artifact_meta if _wc_models_ready else None, } @app.get("/data/pulse") async def data_pulse(): """Heartbeat do datalake (GET) — mesmo snapshot anexado via headers em cada requisição.""" return await asyncio.to_thread( build_pulse_snapshot, wc_models_ready=_wc_models_ready, force_lake_counts=True, ) @app.get("/") def root(): return { "name": "api-noticia", "status": "running", "auth_required": api_key_enabled(), "docs": "/docs", "health": "/health", "data_pulse": "/data/pulse", "endpoints": [ "/data/pulse", "/news/feed", "/news/cards", "/news/all", "/news/sync", "/context", "/predict", "/round/predict", "/worldcup/predict", "/worldcup/inplay", "/worldcup/superbet/live", "/worldcup/superbet/live/{event_id}/advice", "/worldcup/superbet/events/{event_id}", "/worldcup/bet/advice", "/worldcup/round", "/worldcup/schedule", "/worldcup/squads", "/worldcup/squads/{team}", "/worldcup/teams", "/worldcup/friendlies", "/worldcup/value/live", "/worldcup/editions", "/worldcup/editions/{season}/matches", "/worldcup/validate", "/worldcup/walkforward", "/worldcup/retrain", "/worldcup/group-standings", ], } @app.post("/news/sync", response_model=NewsSyncResponse) async def news_sync( full_rebuild: bool = Query( False, description="Reprocessa todo o bronze no silver (use após purge-news)", ), fetch_body: bool | None = Query( None, description="Baixa o HTML de cada URL (texto completo no body_preview; bem mais lento)", ), ): try: do_fetch = ( settings.news_sync_fetch_body if fetch_body is None else fetch_body ) result = await sync_news_sources( fetch_body=do_fetch, run_silver=True, full_silver_rebuild=full_rebuild, ) except Exception as exc: raise HTTPException(status_code=502, detail=f"Falha ao sincronizar fontes: {exc}") from exc invalidate_lake_counts() invalidate_pulse_meta_cache() return NewsSyncResponse(**result) @app.get("/news/feed", response_model=NewsFeedResponse) async def news_feed( limit: int = 24, offset: int = 0, source: str | None = None, q: str | None = None, days: int | None = 30, ): limit = min(max(limit, 1), 100) offset = max(offset, 0) if days is not None: days = min(max(days, 1), 365) silver_df = await asyncio.to_thread(load_silver) payload = await asyncio.to_thread( build_news_feed, silver_df, limit=limit, offset=offset, source=source, query=q, days=days, ) return NewsFeedResponse( total=payload["total"], limit=payload["limit"], offset=payload["offset"], sources=[NewsSourceItem(**s) for s in payload["sources"]], articles=[NewsArticleItem(**a) for a in payload["articles"]], ) @app.get("/news/all", response_model=NewsFeedResponse) async def news_all( offset: int = 0, source: str | None = None, q: str | None = None, days: int | None = Query( None, description="Janela em dias; omita para trazer todo o histórico no lake", ), team: str | None = None, home_team: str | None = None, away_team: str | None = None, teams: str | None = Query(None, description="Brasil,Marrocos"), ): offset = max(offset, 0) if days is not None: days = min(max(days, 1), 3650) team_list: list[str] | None = None if teams: team_list = [t.strip() for t in teams.split(",") if t.strip()] resolved_teams = None if team_list or team or home_team or away_team: resolved_teams = resolve_news_teams( team=normalize_national_team(team) if team else None, home_team=normalize_national_team(home_team) if home_team else None, away_team=normalize_national_team(away_team) if away_team else None, teams=team_list, ) silver_df = await asyncio.to_thread(load_silver) payload = await asyncio.to_thread( build_news_all, silver_df, offset=offset, source=source, query=q, days=days, teams=resolved_teams, ) return NewsFeedResponse( total=payload["total"], limit=len(payload["articles"]), offset=payload["offset"], sources=[NewsSourceItem(**s) for s in payload["sources"]], articles=[NewsArticleItem(**a) for a in payload["articles"]], ) @app.get("/news/cards", response_model=NewsCardsResponse) async def news_cards( limit: int = 12, offset: int = 0, source: str | None = None, q: str | None = None, days: int | None = 14, team: str | None = Query(None, description="Filtrar por um time/seleção"), home_team: str | None = Query(None, description="Mandante (usa com away_team)"), away_team: str | None = Query(None, description="Visitante"), teams: str | None = Query( None, description="Lista separada por vírgula, ex: Brasil,Marrocos", ), ): limit = min(max(limit, 1), 48) offset = max(offset, 0) if days is not None: days = min(max(days, 1), 90) team_list: list[str] | None = None if teams: team_list = [t.strip() for t in teams.split(",") if t.strip()] home = normalize_national_team(home_team) if home_team else None away = normalize_national_team(away_team) if away_team else None single = normalize_national_team(team) if team else None silver_df = await asyncio.to_thread(load_silver) payload = await asyncio.to_thread( build_news_cards, silver_df, limit=limit, offset=offset, source=source, query=q, days=days, team=single, home_team=home, away_team=away, teams=team_list, ) return NewsCardsResponse( total=payload["total"], limit=payload["limit"], offset=payload["offset"], teams=payload["teams"], cards=[NewsArticleItem(**c) for c in payload["cards"]], ) @app.post("/context", response_model=MatchContextResponse) def get_match_context(req: MatchRequest): silver_df = load_silver() fixtures_df = load_fixtures() match_id = f"{req.home_team}_{req.away_team}_{req.round_number}".lower().replace(" ", "_") context = build_gold_for_match( match_id=match_id, home_team=req.home_team, away_team=req.away_team, round_number=req.round_number, competition=req.competition, match_date=datetime.now(timezone.utc), silver_df=silver_df, season=req.season, fixtures_df=fixtures_df if not fixtures_df.empty else None, live_mode=True, ) return _context_to_response(context) @app.post("/predict", response_model=MatchContextResponse) def predict_match(req: MatchRequest): resp = get_match_context(req) context = build_gold_for_match( match_id=resp.match_id, home_team=resp.home_team, away_team=resp.away_team, round_number=req.round_number, competition=req.competition, match_date=datetime.now(timezone.utc), silver_df=load_silver(), season=req.season, fixtures_df=load_fixtures(), live_mode=True, ) pred, conf, reason = predict_baseline(context.features) resp.prediction = pred resp.confidence = conf resp.reason = reason resp.model_source = "baseline" resp.probabilities = predict_baseline_probs(context.features) return resp @app.get("/round/predict", response_model=RoundResponse) def predict_current_round(): from pipelines.current_round import load_round_schedule, predict_round try: schedule = load_round_schedule() except FileNotFoundError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc results = predict_round(save=False) return RoundResponse( round_number=schedule["round"], competition=schedule.get("competition", "Brasileirão"), predictions=[ RoundPrediction( home_team=r["home_team"], away_team=r["away_team"], prediction=r["prediction"], confidence=r["confidence"], reason=r["reason"], news_count=r["news_count"], ) for r in results ], ) class WcSofascoreResolveResponse(BaseModel): event_id: int home_team: str away_team: str match_date: str sofascore_home: str | None = None sofascore_away: str | None = None class WcSofascoreStatsResponse(BaseModel): event_id: int home_team: str away_team: str match_date: str | None = None stats: dict[str, float | int | str | None] fetched_at: str source: str = "sofascore" cached: bool = False @app.get("/worldcup/sofascore/resolve", response_model=WcSofascoreResolveResponse) def worldcup_sofascore_resolve( home_team: str = Query(...), away_team: str = Query(...), date: str = Query(..., description="Data do jogo (YYYY-MM-DD)"), ): from datetime import date as date_type from ingest.sofascore.client import SofascoreClient, SofascoreClientError from ingest.sofascore.event_helpers import find_event_id from ingest.sofascore.teams import event_team_names home = normalize_national_team(home_team) away = normalize_national_team(away_team) try: match_date = date_type.fromisoformat(date) except ValueError as exc: raise HTTPException(status_code=400, detail="Data inválida; use YYYY-MM-DD") from exc try: client = SofascoreClient() event = find_event_id( client, home_team=home, away_team=away, match_date=match_date, ) except LookupError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except SofascoreClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc event_home, event_away = event_team_names(event) return WcSofascoreResolveResponse( event_id=int(event["id"]), home_team=home, away_team=away, match_date=match_date.isoformat(), sofascore_home=event_home or None, sofascore_away=event_away or None, ) @app.get( "/worldcup/sofascore/{event_id}/statistics", response_model=WcSofascoreStatsResponse, ) def worldcup_sofascore_statistics( event_id: int, refresh: bool = Query(False, description="Força nova coleta no Sofascore"), ): from datetime import datetime, timezone from ingest.sofascore.client import SofascoreClientError from ingest.sofascore.stats_ingest import ingest_match_stats, load_match_stats if not refresh: cached = load_match_stats(event_id) if cached: fetched_at = cached.get("fetched_at") if not isinstance(fetched_at, str): fetched_at = datetime.now(timezone.utc).isoformat() stats = { k: v for k, v in cached.items() if k not in ( "event_id", "home_team", "away_team", "match_date", "source", "fetched_at", ) } return WcSofascoreStatsResponse( event_id=int(cached["event_id"]), home_team=str(cached["home_team"]), away_team=str(cached["away_team"]), match_date=cached.get("match_date"), stats=stats, fetched_at=fetched_at, cached=True, ) try: result = ingest_match_stats(event_id=event_id, save=True) except LookupError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except SofascoreClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc payload = result.to_payload() stats = { k: v for k, v in payload.items() if k not in ( "event_id", "home_team", "away_team", "match_date", "source", "fetched_at", ) } return WcSofascoreStatsResponse( event_id=result.event_id, home_team=result.home_team, away_team=result.away_team, match_date=result.match_date, stats=stats, fetched_at=str(payload["fetched_at"]), cached=False, ) @app.post("/worldcup/corners/predict", response_model=WcCornersPredictResponse) def worldcup_corners_predict(req: WcCornersPredictRequest): home = normalize_national_team(req.home_team) away = normalize_national_team(req.away_team) if req.phase == "group" and not official_match_exists(home, away, phase="group"): raise HTTPException( status_code=400, detail=f"Confronto {home} x {away} não consta na tabela oficial da fase de grupos.", ) result = CornersPredictor().predict(home, away, phase=req.phase) pred = result.prediction factors = result.factors.as_dict() return WcCornersPredictResponse( home_team=result.home_team, away_team=result.away_team, data_source=result.data_source, expected_corners=f"{pred.expected_home_corners:.1f}x{pred.expected_away_corners:.1f}", expected_total_corners=round(pred.expected_total_corners, 2), most_likely_corners=pred.most_likely_score, prob_home_more_corners=round(pred.prob_home_more, 4), prob_draw_corners=round(pred.prob_draw_corners, 4), prob_away_more_corners=round(pred.prob_away_more, 4), line_probs={k: round(v, 4) for k, v in pred.line_probs.items()}, factors=WcCornerFactors(**factors), training_summary=result.training_summary, ) @app.get("/worldcup/superbet/live", response_model=WcSuperbetLiveResponse) def worldcup_superbet_live( sport_id: int = Query(5, description="Filtra por esporte (5=futebol)."), all_sports: bool = Query(False, description="Ignora sport_id e retorna todos os esportes."), ): """Lista jogos ao vivo na Superbet (feed /live).""" from datetime import datetime, timezone from ingest.superbet.client import SuperbetClient, SuperbetClientError filter_sport = None if all_sports else sport_id try: events = SuperbetClient().fetch_live_events(sport_id=filter_sport) except SuperbetClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc captured_at = datetime.now(timezone.utc).isoformat() return WcSuperbetLiveResponse( count=len(events), sport_id=filter_sport, events=[WcSuperbetLiveEventResponse(**event.to_dict()) for event in events], captured_at=captured_at, ) @app.get("/worldcup/superbet/live/{event_id}/advice", response_model=WcSuperbetLiveAdviceResponse) def worldcup_superbet_live_advice( event_id: int, phase: str = Query("friendly", description="Fase do modelo (friendly para amistosos)"), bankroll: float = Query(1000, gt=0), market: str | None = Query(None, description="Mercado da aposta ativa (h2h, over_2_5, btts, next_goal)"), outcome: str | None = Query(None, description="Palpite da aposta (1, X, 2, yes, home, away)"), stake: float | None = Query(None, gt=0), odds_placed: float | None = Query(None, gt=1), ): """Captura evento Superbet ao vivo, roda modelo e retorna cash-out / aportes.""" from ingest.superbet.advice import run_live_advice from ingest.superbet.client import SuperbetClientError from models.wc_bet_advice import UserBetInput try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc user_bet = None if market and outcome and stake is not None and odds_placed is not None: user_bet = UserBetInput( market=market, outcome=outcome, stake=stake, odds_placed=odds_placed, ) try: payload = run_live_advice( event_id, predictor, phase=phase, bankroll=bankroll, user_bet=user_bet, ) except SuperbetClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc return WcSuperbetLiveAdviceResponse(**payload) @app.get("/worldcup/superbet/events/{event_id}", response_model=WcSuperbetEventResponse) def worldcup_superbet_event( event_id: int, merge_odds: bool = False, save_bronze: bool = True, ): """Snapshot Superbet: estado ao vivo, odds e probabilidades implícitas.""" from ingest.superbet.client import SuperbetClient, SuperbetClientError from ingest.superbet.store import merge_snapshot_into_odds_file, save_event_snapshot try: snapshot = SuperbetClient().fetch_event(event_id) except SuperbetClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc if save_bronze: save_event_snapshot(snapshot) if merge_odds and snapshot.h2h_odds: merge_snapshot_into_odds_file(snapshot) from pipelines.wc_market_features import load_match_odds_index load_match_odds_index.cache_clear() return WcSuperbetEventResponse(**snapshot.to_dict()) @app.post("/worldcup/bet/advice", response_model=WcBetAdviceResponse) def worldcup_bet_advice(req: WcBetAdviceRequest): """Captura jogo ao vivo (Superbet), roda modelo e recomenda cash-out / aporte.""" from ingest.superbet.advice import run_live_advice from ingest.superbet.client import SuperbetClientError from models.wc_bet_advice import UserBetInput try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc user_bet = None if req.user_bet is not None: user_bet = UserBetInput( market=req.user_bet.market, outcome=req.user_bet.outcome, stake=req.user_bet.stake, odds_placed=req.user_bet.odds_placed, ) try: payload = run_live_advice( req.superbet_event_id, predictor, phase=req.phase, bankroll=req.bankroll, user_bet=user_bet, ) except SuperbetClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc return WcBetAdviceResponse( home_team=payload["home_team"], away_team=payload["away_team"], minute=payload["minute"], current_score=payload.get("current_score"), cashout=payload.get("cashout"), aportes=payload.get("aportes", []), inplay_summary=payload.get("inplay_summary", {}), superbet_event_id=req.superbet_event_id, ) @app.post("/user/open-bets", response_model=dict) def register_open_bet(req: UserOpenBetRequest): """Recebe apostas abertas capturadas da Superbet (extensão ou script).""" import uuid from api.user_bets_store import add_open_bet bet_id = req.id or str(uuid.uuid4()) pick_dicts = [p.model_dump() for p in req.picks] ub = add_open_bet( { "id": bet_id, "superbet_event_id": req.superbet_event_id, "event_name": req.event_name, "home_team": req.home_team, "away_team": req.away_team, "picks": pick_dicts, "stake": req.stake, "odds_placed": req.odds_placed, "potential_return": req.potential_return, "cashout_value": req.cashout_value, "ticket_code": req.ticket_code, "status": "open", "source": req.source, "captured_at": req.captured_at or __import__("datetime", fromlist=["datetime"]).datetime.now().isoformat(), } ) return { "id": ub.id, "message": "Aposta cadastrada com sucesso", "event_name": ub.event_name, "picks_count": len(ub.picks), "stake": ub.stake, "odds_placed": ub.odds_placed, } @app.get("/user/open-bets", response_model=dict) def list_user_open_bets(): """Lista apostas abertas do usuário.""" from api.user_bets_store import list_open_bets bets = list_open_bets() return { "count": len(bets), "bets": [ { "id": b.id, "event_name": b.event_name, "home_team": b.home_team, "away_team": b.away_team, "picks": [p.model_dump() if hasattr(p, "model_dump") else dict(p) for p in b.picks], "stake": b.stake, "odds_placed": b.odds_placed, "potential_return": b.potential_return, "cashout_value": b.cashout_value, "ticket_code": b.ticket_code, "status": b.status, "source": b.source, "captured_at": b.captured_at, } for b in bets ], } # --------------------------------------------------------------------------- # Fase 4 — Carteira & Reconciliação (CSV Superbet) # --------------------------------------------------------------------------- @app.post("/user/transactions/upload", response_model=dict) async def upload_user_transactions( user_id: str = Query("default", description="ID do usuário dono do CSV"), file: UploadFile = File(...), ): """Recebe CSV de transações da Superbet e persiste no bronze do datalake.""" import uuid as _uuid from ingest.user_transactions.parser import parse_user_csv from ingest.user_transactions.store import save_transactions_bronze if not file.filename or not file.filename.lower().endswith(".csv"): raise HTTPException(status_code=400, detail="Arquivo deve ser .csv") content = await file.read() if len(content) > 10 * 1024 * 1024: # 10 MB raise HTTPException(status_code=413, detail="CSV maior que 10MB") upload_id = str(_uuid.uuid4()) rows = parse_user_csv(content, user_id, upload_id) if not rows: raise HTTPException(status_code=422, detail="Nenhuma linha válida no CSV") out_path = save_transactions_bronze(rows, user_id, upload_id) n_inplay_placed = sum(1 for r in rows if r.is_inplay_bet and r.is_bet_placed) n_wins = sum(1 for r in rows if r.is_win) total_staked = sum(r.amount for r in rows if r.is_bet_placed) total_won = sum(r.amount for r in rows if r.is_win) return { "upload_id": upload_id, "user_id": user_id, "n_rows": len(rows), "n_inplay_bets_placed": n_inplay_placed, "n_wins": n_wins, "total_staked": round(total_staked, 2), "total_won": round(total_won, 2), "pnl": round(total_won - total_staked, 2), "file_path": str(out_path), "message": "ok", } @app.get("/user/transactions/summary", response_model=dict) def get_user_wallet_summary(user_id: str = Query("default")): """KPIs agregados da carteira do usuário.""" from pipelines.user_bet_analytics import compute_wallet_summary return compute_wallet_summary(user_id) @app.post("/user/transactions/reconcile", response_model=dict) def reconcile_user_bets(user_id: str = Query("default")): """Reconcilia transações com snapshots de eventos. Persiste silver.""" from pipelines.user_bet_reconciliation import ( reconcile_user_transactions, save_reconciliation, ) df = reconcile_user_transactions(user_id) if df.empty: return {"status": "empty", "n_pairs": 0} path = save_reconciliation(df, user_id) n_matched = int((df["match_confidence"].fillna(0) >= 0.5).sum()) return { "status": "ok", "n_pairs": len(df), "n_matched_high_confidence": n_matched, "match_rate": round(n_matched / len(df), 3), "file_path": str(path), } @app.get("/user/transactions/reconciliation", response_model=dict) def get_user_reconciliation( user_id: str = Query("default"), limit: int = Query(50, ge=1, le=500), offset: int = Query(0, ge=0), ): """Tabela paginada da reconciliação aposta-modelo.""" from pipelines.user_bet_analytics import compute_reconciliation_table return compute_reconciliation_table(user_id, limit=limit, offset=offset) @app.get("/user/transactions/model-errors", response_model=dict) def get_user_model_errors(user_id: str = Query("default")): """Heatmap de erros do modelo por bucket de minuto/score.""" from pipelines.user_bet_analytics import compute_model_errors_heatmap return compute_model_errors_heatmap(user_id) @app.post("/worldcup/inplay", response_model=WcInPlayResponse) def worldcup_inplay(req: WcInPlayRequest): """Mercados ao vivo condicionados ao placar e minuto (Monte Carlo no tempo restante).""" from ingest.superbet.benchmark import market_benchmark from ingest.superbet.client import SuperbetClient, SuperbetClientError from ingest.superbet.store import merge_snapshot_into_odds_file, save_event_snapshot from models.wc_inplay import inplay_from_predictor try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc home = normalize_national_team(req.home_team) away = normalize_national_team(req.away_team) home_score = req.home_score away_score = req.away_score minute = req.minute ht_home = req.ht_home_score ht_away = req.ht_away_score superbet_payload = None benchmark = None superbet_snapshot = None if req.superbet_event_id is not None: try: superbet_snapshot = SuperbetClient().fetch_event(req.superbet_event_id) save_event_snapshot(superbet_snapshot) if req.merge_superbet_odds and superbet_snapshot.h2h_odds: merge_snapshot_into_odds_file(superbet_snapshot) from pipelines.wc_market_features import load_match_odds_index load_match_odds_index.cache_clear() superbet_payload = superbet_snapshot.to_dict() if superbet_snapshot.inplay: home_score = superbet_snapshot.inplay.home_score away_score = superbet_snapshot.inplay.away_score minute = superbet_snapshot.inplay.minute if superbet_snapshot.inplay.ht_home_score is not None: ht_home = superbet_snapshot.inplay.ht_home_score ht_away = superbet_snapshot.inplay.ht_away_score except SuperbetClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc result = inplay_from_predictor( predictor, home_team=home, away_team=away, home_score=home_score, away_score=away_score, minute=minute, phase=req.phase, is_neutral=True, match_minutes=req.match_minutes, ht_home_score=ht_home, ht_away_score=ht_away, ) payload = result.to_dict() if superbet_snapshot and superbet_snapshot.h2h_implied: benchmark = market_benchmark( superbet_snapshot, model_h2h={ "1": payload["prob_final_home"], "X": payload["prob_final_draw"], "2": payload["prob_final_away"], }, model_totals=payload.get("final_line_probs"), ) payload["market_benchmark"] = benchmark payload["superbet"] = superbet_payload return WcInPlayResponse(**payload) @app.post("/worldcup/predict", response_model=WcPredictionResponse) def worldcup_predict(req: WcPredictRequest): from ingest.sofascore.client import SofascoreClientError from ingest.sofascore.kxl_merge import merge_sofascore_fept try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc home = normalize_national_team(req.home_team) away = normalize_national_team(req.away_team) if req.phase == "group" and not official_match_exists(home, away, phase="group"): raise HTTPException( status_code=400, detail=f"Confronto {home} x {away} não consta na tabela oficial da fase de grupos.", ) try: kxl_match, fept_meta = merge_sofascore_fept( kxl_match=req.kxl_match, sofascore_event_id=req.sofascore_event_id, home_team=home, away_team=away, ) except LookupError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except SofascoreClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc try: pred = predictor.predict( home, away, phase=req.phase, kxl_match=kxl_match, season=2026, group_name=lookup_2026_group(home, away) if req.phase == "group" else None, ) except Exception as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc response = _wc_prediction_to_response(pred) if fept_meta: response.model_breakdown.kxl_fept = fept_meta return response @app.post("/worldcup/simulate", response_model=WcSimulationResponse) def worldcup_simulate(req: WcPredictRequest): """Analisa um confronto entre duas seleções com dados reais da FIFA. Diferente de /worldcup/predict, este endpoint: - Busca escalações oficiais da FIFA (se jogo constar na janela atual) - Busca pontos FIFA ao vivo (atualizados a cada jogo) - Busca dados enriquecidos do Sofascore (forma, séries, H2H) - Retorna predição dos modelos + dados brutos reais """ from models.wc_match_simulator import simulate_match try: predictor = _get_wc_predictor() except ValueError: predictor = None from datetime import date as date_type home = normalize_national_team(req.home_team) away = normalize_national_team(req.away_team) parsed_date: date_type | None = None if req.match_date: try: parsed_date = date_type.fromisoformat(req.match_date[:10]) except ValueError: raise HTTPException(status_code=400, detail="match_date inválida") from None try: result = simulate_match( home_team=home, away_team=away, match_date=parsed_date, phase=req.phase, is_neutral=True, season=2026, group_name=lookup_2026_group(home, away) if req.phase == "group" else None, predictor=predictor, fifa_match_id=req.fifa_match_id, sofascore_event_id=req.sofascore_event_id, ) except Exception as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc return WcSimulationResponse( home_team=result.home_team, away_team=result.away_team, match_date=result.match_date, prediction=result.prediction, confidence=result.confidence, prob_home=result.prob_home, prob_draw=result.prob_draw, prob_away=result.prob_away, poisson_score=result.poisson_score, expected_goals=result.expected_goals, fifa_home_lineup=result.fifa_home_lineup, fifa_away_lineup=result.fifa_away_lineup, fifa_home_bench=result.fifa_home_bench, fifa_away_bench=result.fifa_away_bench, fifa_home_goals=result.fifa_home_goals, fifa_away_goals=result.fifa_away_goals, fifa_home_tactics=result.fifa_home_tactics, fifa_away_tactics=result.fifa_away_tactics, fifa_home_coach=result.fifa_home_coach, fifa_away_coach=result.fifa_away_coach, fifa_stadium=result.fifa_stadium, fifa_attendance=result.fifa_attendance, fifa_home_points=result.fifa_home_points, fifa_away_points=result.fifa_away_points, fifa_points_diff=result.fifa_points_diff, lineup_source=result.lineup_source, enrich_features=result.enrich_features, stats_features=result.stats_features, model_breakdown=result.model_breakdown, warnings=result.warnings, ) def _build_wc_round_predictions( predictor: "WcPredictor", round_data: dict, *, matchday: int | None = None, ) -> list[WcPredictionResponse]: cache = _wc_round_cache() phase_default = round_data.get("phase", "group") matches = round_data.get("matches", []) if matchday is not None: matches = [m for m in matches if m.get("round") == matchday] predictions: list[WcPredictionResponse] = [] dirty = False for match in matches: home = normalize_national_team(match["home_team"]) away = normalize_national_team(match["away_team"]) match_phase = match.get("phase", phase_default) key = cache.match_key(home, away, match_phase) cached = cache.get_cached(key) if cached is not None: predictions.append(WcPredictionResponse(**cached)) continue try: pred = predictor.predict( home, away, phase=match_phase, season=round_data.get("season", 2026), group_name=match.get("group"), ) resp = _wc_prediction_to_response(pred) cache.set_cached(key, resp.model_dump()) predictions.append(resp) dirty = True except Exception as exc: raise HTTPException( status_code=400, detail=f"Erro ao prever {home} x {away}: {exc}", ) from exc if dirty: cache.persist_to_disk() return predictions @app.get("/worldcup/round", response_model=WcRoundResponse) def worldcup_round( matchday: int | None = Query(None, alias="round", ge=1, le=3), ): try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc round_data = _load_wc_round() phase_default = round_data.get("phase", "group") predictions = _build_wc_round_predictions( predictor, round_data, matchday=matchday, ) return WcRoundResponse( season=round_data.get("season", 2026), competition=round_data.get("competition", "Copa do Mundo"), phase=phase_default, round=matchday if matchday is not None else round_data.get("round", 0), predictions=predictions, ) @app.get("/worldcup/group-standings", response_model=WcGroupStandingsResponse) def worldcup_group_standings(): try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc round_data = _load_wc_round() predictions = _build_wc_round_predictions(predictor, round_data, matchday=None) pair_group: dict[tuple[str, str], str] = {} for match in round_data.get("matches", []): home = normalize_national_team(match["home_team"]) away = normalize_national_team(match["away_team"]) g = match.get("group") if g: pair_group[(home, away)] = str(g) pred_rows: list[dict] = [] for pred in predictions: key = (pred.home_team, pred.away_team) pred_rows.append( { "home_team": pred.home_team, "away_team": pred.away_team, "prediction": pred.prediction, "group": pair_group.get(key), } ) groups_meta = round_data.get("groups", []) blocks = build_group_standings(groups_meta, pred_rows) return WcGroupStandingsResponse( season=int(round_data.get("season", 2026)), competition=round_data.get("competition", "Copa do Mundo FIFA 2026"), simulated=True, note="Pontos simulados pelos palpites do modelo (3 vitória, 1 empate, 0 derrota).", groups=[WcGroupStandingsBlock(**block) for block in blocks], ) @app.get("/worldcup/teams", response_model=WcTeamsResponse) def worldcup_teams(): from ingest.fixtures.world_cup import load_wc_fixtures fixtures = load_wc_fixtures() teams: set[str] = set() if not fixtures.empty: teams.update(fixtures["home_team"].dropna().unique()) teams.update(fixtures["away_team"].dropna().unique()) round_data = _load_wc_round() for match in round_data.get("matches", []): teams.add(normalize_national_team(match["home_team"])) teams.add(normalize_national_team(match["away_team"])) sorted_teams = sorted(teams, key=str.casefold) return WcTeamsResponse(teams=sorted_teams, count=len(sorted_teams)) @app.get("/worldcup/friendlies", response_model=WcFriendliesResponse) def worldcup_friendlies( team: str = Query(..., description="Seleção (nome canônico em português)"), pages: int = Query(2, ge=1, le=5, description="Páginas de histórico Sofascore por seleção"), year: int | None = Query( None, ge=2000, le=2100, description="Ano do calendário; padrão: ano corrente (UTC)", ), include_finished: bool = Query(True, description="Incluir amistosos já disputados"), include_upcoming: bool = Query(True, description="Incluir amistosos futuros/agendados"), ): from datetime import datetime, timezone from ingest.sofascore.client import SofascoreClient, SofascoreClientError from ingest.sofascore.friendlies import list_team_friendlies, save_friendlies_snapshot canonical = normalize_national_team(team) filter_year = year if year is not None else datetime.now(timezone.utc).year try: friendlies = list_team_friendlies( canonical, pages=pages, year=filter_year, include_finished=include_finished, include_upcoming=include_upcoming, client=SofascoreClient(), ) except LookupError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except SofascoreClientError as exc: raise HTTPException(status_code=502, detail=str(exc)) from exc try: save_friendlies_snapshot(canonical, filter_year, friendlies) except OSError: pass items = [WcFriendlyItem(**row.to_dict()) for row in friendlies] return WcFriendliesResponse( team=canonical, year=filter_year, count=len(items), friendlies=items, ) @app.get("/worldcup/schedule", response_model=WcScheduleResponse) def worldcup_schedule(): try: data = load_wc_schedule() except FileNotFoundError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except json.JSONDecodeError as exc: raise HTTPException(status_code=500, detail=f"Calendário WC inválido: {exc}") from exc payload = build_schedule_response(data) return WcScheduleResponse(**payload) @app.get("/worldcup/squads", response_model=WcSquadTeamsResponse) def worldcup_squads(): try: data = load_wc_squads() except FileNotFoundError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except json.JSONDecodeError as exc: raise HTTPException(status_code=500, detail=f"Convocações WC inválidas: {exc}") from exc return WcSquadTeamsResponse( season=data.get("season", 2026), competition=data.get("competition", "Copa do Mundo FIFA 2026"), source_url=data.get("source_url", ""), updated_at=data.get("updated_at", ""), team_count=data.get("team_count", len(data.get("squads", []))), teams=list_squad_teams(data), ) @app.get("/worldcup/squads/{team}", response_model=WcSquadDetailResponse) def worldcup_squad_detail(team: str): try: data = load_wc_squads() except FileNotFoundError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc squad = get_squad_by_team(data, team) if squad is None: raise HTTPException(status_code=404, detail=f"Convocação não encontrada: {team}") return WcSquadDetailResponse( season=data.get("season", 2026), competition=data.get("competition", "Copa do Mundo FIFA 2026"), source_url=data.get("source_url", ""), updated_at=data.get("updated_at", ""), squad=WcSquadTeamItem(**squad), ) @app.get("/worldcup/editions", response_model=WcEditionsResponse) def worldcup_editions(): from pipelines.wc_validate import list_wc_editions try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc editions = list_wc_editions(predictor.fixtures) return WcEditionsResponse( editions=[WcEditionItem(**e) for e in editions], ) @app.get("/worldcup/editions/{season}/matches", response_model=WcEditionMatchesResponse) def worldcup_edition_matches(season: int): from pipelines.wc_validate import list_edition_matches try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc matches = list_edition_matches(predictor.fixtures, season) if not matches: raise HTTPException( status_code=404, detail=f"Nenhum jogo encontrado para a edição {season}", ) return WcEditionMatchesResponse( season=season, matches=[WcHistoricalMatchItem(**_sanitize_match_item(m)) for m in matches], ) @app.post("/worldcup/validate", response_model=WcValidateResponse) def worldcup_validate(req: WcValidateRequest): if not req.match_id and (not req.home_team or not req.away_team): raise HTTPException( status_code=400, detail="Informe match_id ou home_team e away_team", ) try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc home = normalize_national_team(req.home_team) if req.home_team else None away = normalize_national_team(req.away_team) if req.away_team else None from pipelines.wc_validate import validate_historical_match try: result = validate_historical_match( predictor, predictor.fixtures, req.season, match_id=req.match_id, home_team=home, away_team=away, ) except ValueError as exc: raise HTTPException(status_code=404, detail=str(exc)) from exc except Exception as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc breakdown = result["model_breakdown"] return WcValidateResponse( match=WcValidateMatchInfo(**result["match"]), prediction=result["prediction"], confidence=round(result["confidence"], 4), prob_home=round(result["prob_home"], 4), prob_draw=round(result["prob_draw"], 4), prob_away=round(result["prob_away"], 4), poisson_score=result["poisson_score"], expected_goals=result["expected_goals"], correct=result["correct"], context=result["context"], h2h_summary=result["h2h_summary"], model_breakdown=_breakdown_to_response(breakdown), cutoff_date=result["cutoff_date"], cutoff_note=result["cutoff_note"], ) @app.get("/worldcup/walkforward") def worldcup_walkforward(): report_path = settings.lake_root / "reports" / "wc_walkforward_report.json" if not report_path.exists(): raise HTTPException( status_code=404, detail="Relatório ausente. Execute: walkforward-wc-models", ) return json.loads(report_path.read_text(encoding="utf-8")) def _run_wc_retrain_background(*, enable_mlflow: bool = False) -> None: global _wc_predictor, _wc_artifact_meta, _wc_models_ready, _wc_train_thread from models.wc_artifact import load_or_train_wc_predictor from models.wc_train_progress import WcTrainProgressReporter reporter = WcTrainProgressReporter(console=False) try: predictor, manifest = load_or_train_wc_predictor( force=True, progress=reporter, enable_mlflow=enable_mlflow, ) with _wc_train_lock: _wc_predictor = predictor _wc_artifact_meta = manifest _wc_models_ready = True _wc_round_cache().invalidate_wc_round_cache() except Exception as exc: with _wc_train_lock: _wc_models_ready = False reporter.fail(str(exc)) finally: with _wc_train_lock: _wc_train_thread = None @app.get("/worldcup/train/status") def worldcup_train_status(): from models.wc_train_progress import read_train_progress state = read_train_progress() with _wc_train_lock: thread_alive = _wc_train_thread is not None and _wc_train_thread.is_alive() if state is None: return { "status": "running" if thread_alive else "idle", "running": thread_alive, } payload = asdict(state) payload["running"] = thread_alive or state.status == "running" return payload @app.post("/worldcup/retrain") def worldcup_retrain( background: bool = Query(False), mlflow: bool = Query(False, description="Registra o treino no MLflow"), ): global _wc_predictor, _wc_artifact_meta, _wc_models_ready, _wc_train_thread if background: with _wc_train_lock: if _wc_train_thread is not None and _wc_train_thread.is_alive(): raise HTTPException(status_code=409, detail="Treino WC já em andamento") _wc_train_thread = threading.Thread( target=_run_wc_retrain_background, kwargs={"enable_mlflow": mlflow}, name="wc-retrain", daemon=True, ) _wc_train_thread.start() return {"status": "started", "poll": "/worldcup/train/status", "mlflow": mlflow} try: from models.wc_artifact import load_or_train_wc_predictor from models.wc_train_progress import WcTrainProgressReporter reporter = WcTrainProgressReporter(console=False) _wc_predictor, _wc_artifact_meta = load_or_train_wc_predictor( force=True, progress=reporter, enable_mlflow=mlflow, ) _wc_models_ready = True _wc_round_cache().invalidate_wc_round_cache() except ValueError as exc: _wc_models_ready = False raise HTTPException(status_code=503, detail=str(exc)) from exc return { "status": "ok", "artifact": _wc_artifact_meta, } @app.post("/worldcup/value/live", response_model=WcValueResponse) def worldcup_live_value(req: WcValueRequest): schedule_path = Path(req.schedule_file) if not schedule_path.exists(): raise HTTPException(status_code=404, detail=f"Schedule não encontrado: {schedule_path}") try: schedule = json.loads(schedule_path.read_text(encoding="utf-8")) except Exception as exc: raise HTTPException(status_code=400, detail=f"Falha ao ler schedule: {exc}") from exc try: live_odds = fetch_live_h2h_odds( sport_key=req.sport_key, regions=req.regions, preferred_bookmaker=req.bookmaker, ) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc except Exception as exc: raise HTTPException(status_code=502, detail=f"Erro ao consultar Odds API: {exc}") from exc merged, matched = merge_schedule_with_odds(schedule, live_odds) if req.save_odds_file: save_odds_file(merged, Path(req.output_odds_file)) try: predictor = _get_wc_predictor() except ValueError as exc: raise HTTPException(status_code=503, detail=str(exc)) from exc phase_default = merged.get("phase", "group") reports: list[WcMatchValueResponse] = [] for match in merged.get("matches", []): phase = match.get("phase", phase_default) pred = predictor.predict(match["home_team"], match["away_team"], phase=phase) probabilities = {"1": pred.prob_home, "X": pred.prob_draw, "2": pred.prob_away} value = evaluate_match( home_team=match["home_team"], away_team=match["away_team"], probabilities=probabilities, odds=match["odds"], min_edge=req.min_edge, ) reports.append(_match_value_to_response(value)) return WcValueResponse( matched_games=matched, total_schedule_games=len(schedule.get("matches", [])), source=merged.get("source", "the-odds-api"), captured_at=merged.get("captured_at"), edges=reports, )