Spaces:

previdengue
/

PreviDengueAPI

Running

App Files Files Community

GitHub Actions commited on Oct 11, 2025

Commit

0d73bd2

1 Parent(s): c662f80

Auto-deploy from GitHub

Browse files

Files changed (7) hide show

app.py +47 -2
models/model_state.keras +2 -2
models/scalers/scaler_dyn_global_state.pkl +1 -1
models/scalers/scaler_target_global_state.pkl +1 -1
models/state_peak.json +1 -0
municipal_predictor.py +266 -0
state_predictor.py +193 -0

app.py CHANGED Viewed

@@ -9,7 +9,8 @@ import numpy as np
 import json
 from detect import DengueDetector
-from predict import DenguePredictor
 def default_json_serializer(obj):
     if isinstance(obj, np.integer):
@@ -22,16 +23,23 @@ def default_json_serializer(obj):
 detector: DengueDetector = None
 predictor: DenguePredictor = None
 app = FastAPI()
 # --- evento de startup para carregar os modelos ---
 @app.on_event("startup")
 async def startup_event():
-    global detector, predictor
     print("Executando evento de startup: Carregando os módulos de IA...")
     detector = DengueDetector()
     predictor = DenguePredictor()
     print("Módulos de IA carregados com sucesso. API pronta.")
 # --- CORS ---
@@ -71,6 +79,7 @@ async def predict_dengue_route(payload: dict = Body(...)):
             raise ValueError("O campo 'ibge_code' é obrigatório.")
         ibge_code = int(ibge_code_str)
         result = predictor.predict(ibge_code)
         json_content = json.dumps(result, default=default_json_serializer)
@@ -83,4 +92,40 @@ async def predict_dengue_route(payload: dict = Body(...)):
         return JSONResponse(status_code=500, content={
             "error": str(e),
             "traceback": tb_str
         })

 import json
 from detect import DengueDetector
+from municipal_predictor import DenguePredictor
+from state_predictor import StatePredictor
 def default_json_serializer(obj):
     if isinstance(obj, np.integer):
 detector: DengueDetector = None
 predictor: DenguePredictor = None
+state_predictor: StatePredictor = None
 app = FastAPI()
 # --- evento de startup para carregar os modelos ---
 @app.on_event("startup")
 async def startup_event():
+    global detector, predictor, state_predictor
     print("Executando evento de startup: Carregando os módulos de IA...")
     detector = DengueDetector()
     predictor = DenguePredictor()
+    try:
+        state_predictor = StatePredictor()
+    except Exception as e:
+        # Não bloqueia a API se o modelo estadual faltar; a rota retornará 503
+        print("[WARN] StatePredictor não inicializado:", str(e))
+        state_predictor = None
     print("Módulos de IA carregados com sucesso. API pronta.")
 # --- CORS ---
             raise ValueError("O campo 'ibge_code' é obrigatório.")
         ibge_code = int(ibge_code_str)
+        # Sempre retorna histórico completo; frontend controla a janela visível
         result = predictor.predict(ibge_code)
         json_content = json.dumps(result, default=default_json_serializer)
         return JSONResponse(status_code=500, content={
             "error": str(e),
             "traceback": tb_str
+        })
+@app.post("/predict/state/")
+async def predict_dengue_state_route(payload: dict = Body(...)):
+    global state_predictor
+    if state_predictor is None:
+        # Tenta inicializar preguiçosamente no primeiro uso
+        try:
+            state_predictor = StatePredictor()
+        except Exception as e:
+            return JSONResponse(status_code=503, content={"error": f"Preditor estadual ainda não foi inicializado: {str(e)}"})
+    try:
+        state_sigla = payload.get("state") or payload.get("state_sigla") or payload.get("uf")
+        year = payload.get("year")
+        week = payload.get("week")
+        if not state_sigla:
+            raise ValueError("O campo 'state' (sigla) é obrigatório.")
+        # year/week são opcionais; se omitidos, prevê após o último ponto conhecido
+        # Sempre retorna histórico completo; frontend controla a janela visível
+        result = state_predictor.predict(
+            str(state_sigla).upper(),
+            year=int(year) if year is not None else None,
+            week=int(week) if week is not None else None,
+        )
+        json_content = json.dumps(result, default=default_json_serializer)
+        return Response(content=json_content, media_type="application/json")
+    except Exception as e:
+        tb_str = traceback.format_exc()
+        print(tb_str)
+        return JSONResponse(status_code=500, content={
+            "error": str(e),
+            "traceback": tb_str
         })

models/model_state.keras CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d23ef65f526f0a2c26a4ad4163d7400bc32c47d83abd9d46bce862b6114ba9af
-size 2534633

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4c86d8e99e3779125ec864816e0fbf96f72a8e324e40a5e170182168a617b30
+size 2536309

models/scalers/scaler_dyn_global_state.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bb2cfb7f78d33fbf9242461bdef7783f31fbbb35a8114b75c341da36b07fa33
 size 1303

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc7972df5abd0302686c2d6ff16962ff31a13c5ca5346cbe57633de1ec34f1c1
 size 1303

models/scalers/scaler_target_global_state.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86da4e22650d62cc4806750659f8c83bbd924404800d818015716f751c7e2947
 size 719

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a4e97671eeabf05f39cb9a6b53130816103d263c6bfffd9fc7fbee5f9c77178
 size 719

models/state_peak.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"AC": 2036.0, "AL": 2338.0, "AM": 388.0, "AP": 594.0, "BA": 7520.0, "CE": 2754.0, "DF": 6456.0, "ES": 2598.0, "GO": 13984.0, "MA": 1139.0, "MG": 68685.0, "MS": 3781.0, "MT": 1923.0, "PA": 810.0, "PB": 1613.0, "PE": 2249.0, "PI": 2913.0, "PR": 39913.0, "RJ": 12162.0, "RN": 2868.0, "RO": 631.0, "RR": 63.0, "RS": 16798.0, "SC": 26832.0, "SE": 408.0, "SP": 129817.0, "TO": 1289.0}

municipal_predictor.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import os
+import json
+import joblib
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from datetime import timedelta
+from io import BytesIO
+import base64
+import tensorflow as tf
+from tensorflow.keras.utils import register_keras_serializable
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from huggingface_hub import hf_hub_download
+plt.style.use('seaborn-v0_8-darkgrid')
+@register_keras_serializable(package="Custom", name="asymmetric_mse")
+def asymmetric_mse(y_true, y_pred):
+    penalty_factor = 10.0
+    error = y_true - y_pred
+    denom = tf.maximum(tf.abs(y_true), 1.0)
+    rel = tf.abs(error) / denom
+    penalty = tf.where(error > 0, 1.0 + penalty_factor * rel, 1.0)
+    loss = tf.square(error) * penalty
+    return tf.reduce_mean(loss)
+class DenguePredictor:
+    def __init__(self, project_root=None):
+        self.project_root = Path(project_root) if project_root else Path(__file__).resolve().parent
+        self.sequence_length = 12
+        self.horizon = 6
+        self.year_min_train = 2014
+        self.year_max_train = 2025
+        self.dynamic_features = [
+            "numero_casos", "casos_velocidade", "casos_aceleracao", "casos_mm_4_semanas",
+            "T2M", "T2M_MAX", "T2M_MIN", "PRECTOTCORR", "RH2M", "ALLSKY_SFC_SW_DWN",
+            "week_sin", "week_cos", "year_norm", "notificacao"
+        ]
+        self.static_features = ["latitude", "longitude"]
+        self.feature_names_pt = {
+            "numero_casos": "Nº de Casos de Dengue",
+            "T2M": "Temperatura Média (°C)",
+            "PRECTOTCORR": "Precipitação (mm)"
+        }
+        self._loaded = False
+        self.load_assets()
+    def load_assets(self):
+        models_dir = self.project_root / "models"
+        scalers_dir = models_dir / "scalers"
+        model_path = models_dir / "model.keras"
+        city_map_path = models_dir / "city_to_idx.json"
+        if not scalers_dir.exists():
+            raise FileNotFoundError(str(scalers_dir) + " not found")
+        self.scaler_dyn = joblib.load(scalers_dir / "scaler_dyn_global.pkl")
+        self.scaler_static = joblib.load(scalers_dir / "scaler_static_global.pkl")
+        self.scaler_target = joblib.load(scalers_dir / "scaler_target_global.pkl")
+        if city_map_path.exists():
+            with open(city_map_path, "r", encoding="utf-8") as fh:
+                self.city_to_idx = {int(k): int(v) for k, v in json.load(fh).items()}
+        else:
+            self.city_to_idx = {}
+        hf_token = os.environ.get("HF_TOKEN")
+        inference_path = hf_hub_download(
+            repo_id="previdengue/predict_inference_data",
+            filename="inference_data.parquet",
+            repo_type="dataset",
+            token=hf_token
+        )
+        df = pd.read_parquet(inference_path)
+        df["codigo_ibge"] = df["codigo_ibge"].astype(int)
+        df["ano"] = df["ano"].astype(int)
+        df["semana"] = df["semana"].astype(int)
+        try:
+            df["date"] = pd.to_datetime(df["ano"].astype(str) + df["semana"].astype(str) + "0", format="%Y%W%w", errors="coerce")
+        except Exception:
+            df["date"] = pd.NaT
+        df = df.sort_values(by=["codigo_ibge", "date"]).reset_index(drop=True)
+        df["week_sin"] = np.sin(2 * np.pi * df["semana"] / 52)
+        df["week_cos"] = np.cos(2 * np.pi * df["semana"] / 52)
+        df["year_norm"] = (df["ano"] - self.year_min_train) / (self.year_max_train - self.year_min_train)
+        df["notificacao"] = df["ano"].isin([2021, 2022]).astype(float)
+        self.df_master = df
+        self.municipios = df[["codigo_ibge", "municipio"]].drop_duplicates().sort_values("codigo_ibge")
+        if not model_path.exists():
+            raise FileNotFoundError(str(model_path) + " not found")
+        self.model = tf.keras.models.load_model(model_path, custom_objects={"asymmetric_mse": asymmetric_mse}, compile=False)
+        self._loaded = True
+    def plot_to_base64(self, fig):
+        buf = BytesIO()
+        fig.savefig(buf, format="png", bbox_inches="tight", facecolor=fig.get_facecolor())
+        buf.seek(0)
+        img_str = base64.b64encode(buf.read()).decode("utf-8")
+        plt.close(fig)
+        return img_str
+    def _prepare_sequence(self, df_mun):
+        df_seq = df_mun.tail(self.sequence_length).copy()
+        df_seq["casos_velocidade"] = df_seq["numero_casos"].diff().fillna(0)
+        df_seq["casos_aceleracao"] = df_seq["casos_velocidade"].diff().fillna(0)
+        df_seq["casos_mm_4_semanas"] = df_seq["numero_casos"].rolling(4, min_periods=1).mean()
+        df_seq["week_sin"] = np.sin(2 * np.pi * df_seq["semana"] / 52)
+        df_seq["week_cos"] = np.cos(2 * np.pi * df_seq["semana"] / 52)
+        df_seq["year_norm"] = (df_seq["ano"] - self.year_min_train) / (self.year_max_train - self.year_min_train)
+        if "notificacao" not in df_seq.columns:
+            df_seq["notificacao"] = df_seq["ano"].isin([2021, 2022]).astype(float)
+        else:
+            df_seq["notificacao"] = df_seq["notificacao"].astype(float)
+        return df_seq
+    def predict(self, ibge_code: int, show_plot=False, display_history_weeks=None):
+        if not self._loaded:
+            raise RuntimeError("assets not loaded")
+        df_mun = self.df_master[self.df_master["codigo_ibge"] == int(ibge_code)].copy().reset_index(drop=True)
+        if df_mun.empty or len(df_mun) < self.sequence_length:
+            raise ValueError(f"No data or insufficient history for ibge {ibge_code}")
+        municipio_row = self.municipios[self.municipios["codigo_ibge"] == int(ibge_code)]
+        municipality_name = municipio_row.iloc[0]["municipio"] if not municipio_row.empty else str(ibge_code)
+        df_mun_clean = df_mun.dropna(subset=["numero_casos"]).reset_index(drop=True)
+        if len(df_mun_clean) < self.sequence_length:
+            raise ValueError(f"Insufficient known-case history for {ibge_code}")
+        seq_df = self._prepare_sequence(df_mun_clean)
+        if len(seq_df) < self.sequence_length:
+            raise ValueError(f"Insufficient sequence length for {ibge_code}")
+        dynamic_raw = seq_df[self.dynamic_features].values
+        static_raw = seq_df[self.static_features].iloc[-1].values.reshape(1, -1)
+        missing_feats = [c for c in self.dynamic_features if c not in seq_df.columns]
+        if missing_feats:
+            raise ValueError(f"Missing dynamic features in dataframe: {missing_feats}")
+        if hasattr(self.scaler_dyn, "n_features_in_") and self.scaler_dyn.n_features_in_ != len(self.dynamic_features):
+            raise ValueError(
+                f"Dynamic scaler expects {getattr(self.scaler_dyn, 'n_features_in_', 'unknown')} features, "
+                f"but predictor assembled {len(self.dynamic_features)}. Ensure training and inference feature sets match."
+            )
+        dynamic_scaled = self.scaler_dyn.transform(dynamic_raw).reshape(1, self.sequence_length, -1)
+        static_scaled = self.scaler_static.transform(static_raw)
+        city_idx = int(self.city_to_idx.get(int(ibge_code), 0))
+        city_input = np.array([[city_idx]], dtype=np.int32)
+        y_pred = self.model.predict([dynamic_scaled, static_scaled, city_input], verbose=0)
+        y_pred_reg = y_pred[0] if isinstance(y_pred, (list, tuple)) else y_pred
+        y_pred_flat = y_pred_reg.reshape(-1, 1)
+        y_pred_inv_flat = self.scaler_target.inverse_transform(y_pred_flat)
+        y_pred_inv = y_pred_inv_flat.reshape(y_pred_reg.shape)
+        pred_values = np.maximum(y_pred_inv.flatten(), 0.0)
+        last_known_case = seq_df["numero_casos"].iloc[-1]
+        connected_prediction = np.insert(pred_values, 0, last_known_case)
+        last_real_date = seq_df["date"].iloc[-1] if "date" in seq_df.columns else None
+        predicted_data = []
+        for i, val in enumerate(connected_prediction[1:]):
+            pred_date = (last_real_date + timedelta(weeks=i + 1)).strftime("%Y-%m-%d") if pd.notna(last_real_date) else None
+            predicted_data.append({"date": pred_date, "predicted_cases": int(round(float(val)))})
+        # Histórico: por padrão retorna tudo; se display_history_weeks > 0, limita a janela
+        if display_history_weeks is None or (isinstance(display_history_weeks, (int, float)) and display_history_weeks <= 0):
+            hist_tail = df_mun.copy()
+        else:
+            hist_tail = df_mun.tail(min(len(df_mun), int(display_history_weeks))).copy()
+        historic_data = []
+        for _, row in hist_tail.iterrows():
+            historic_data.append({
+                "date": row["date"].strftime("%Y-%m-%d") if pd.notna(row.get("date")) else None,
+                "cases": int(row["numero_casos"]) if pd.notna(row.get("numero_casos")) else None
+            })
+        # Insights: lag correlation analysis and strategic summary
+        lag_plot_b64, strategic_summary, tipping_points = self.generate_lag_insights(df_mun)
+        insights = {
+            "lag_analysis_plot_base64": lag_plot_b64,
+            "strategic_summary": strategic_summary,
+            "tipping_points": tipping_points
+        }
+        return {
+            "municipality_name": municipality_name,
+            "ibge": int(ibge_code),
+            "last_known_index": int(df_mun.index[-1]),
+            "historic_data": historic_data,
+            "predicted_data": predicted_data,
+            "insights": insights,
+        }
+    def generate_lag_insights(self, df_mun: pd.DataFrame):
+        # Prepare analysis columns
+        df_analysis = df_mun.rename(columns={
+            "T2M": "Temperature_C",
+            "PRECTOTCORR": "Precipitation_mm"
+        })
+        max_lag = 12
+        cases_col = "numero_casos"
+        lag_features = ["Temperature_C", "Precipitation_mm"]
+        lag_correlations = {}
+        for col in lag_features:
+            if col in df_analysis.columns:
+                corrs = []
+                for lag in range(1, max_lag + 1):
+                    try:
+                        corr = df_analysis[cases_col].corr(df_analysis[col].shift(lag))
+                    except Exception:
+                        corr = np.nan
+                    corrs.append(corr)
+                lag_correlations[col] = corrs
+            else:
+                lag_correlations[col] = [np.nan] * max_lag
+        # Plot
+        fig, ax = plt.subplots(figsize=(10, 6), facecolor="#18181b")
+        ax.set_facecolor("#18181b")
+        for feature_name, corrs in lag_correlations.items():
+            ax.plot(range(1, max_lag + 1), corrs, marker="o", linestyle="-", label=feature_name)
+        ax.set_title("Lag Analysis", color="white")
+        ax.set_xlabel("Lag (weeks)", color="white")
+        ax.set_ylabel("Correlation with cases", color="white")
+        ax.tick_params(colors="white")
+        ax.legend(facecolor="#27272a", edgecolor="gray", labelcolor="white")
+        ax.grid(True, which="both", linestyle="--", linewidth=0.5, color="#444")
+        lag_plot_b64 = self.plot_to_base64(fig)
+        # Summaries
+        lag_peaks = {}
+        for feature, corrs in lag_correlations.items():
+            if corrs and not all(pd.isna(corrs)):
+                peak = int(np.nanargmax(np.abs(np.array(corrs))) + 1)
+            else:
+                peak = "N/A"
+            lag_peaks[feature] = peak
+        temp_lag = lag_peaks.get("Temperature_C", "N/A")
+        rain_lag = lag_peaks.get("Precipitation_mm", "N/A")
+        summary = (
+            f"O modelo identifica Temperatura e Precipitação como fatores climáticos chave. "
+            f"Temperatura mostra impacto máximo após {temp_lag} semanas e precipitação após {rain_lag} semanas. "
+            "Ações preventivas devem ser intensificadas nessas janelas após eventos climáticos extremos."
+        )
+        tipping_points = [
+            {"factor": "Temperatura", "value": f"Maior impacto em {temp_lag} semanas"},
+            {"factor": "Precipitação", "value": f"Maior impacto em {rain_lag} semanas"},
+            {"factor": "Umidade", "value": "Aumenta a sobrevivência de mosquitos adultos"}
+        ]
+        return lag_plot_b64, summary, tipping_points

state_predictor.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+import json
+import joblib
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from datetime import timedelta
+import tensorflow as tf
+from tensorflow.keras.utils import register_keras_serializable
+from huggingface_hub import hf_hub_download
+@register_keras_serializable(package="Custom", name="asymmetric_mse")
+def asymmetric_mse(y_true, y_pred):
+    penalty_factor = 10.0
+    error = y_true - y_pred
+    denom = tf.maximum(tf.abs(y_true), 1.0)
+    rel = tf.abs(error) / denom
+    penalty = tf.where(error > 0, 1.0 + penalty_factor * rel, 1.0)
+    loss = tf.square(error) * penalty
+    return tf.reduce_mean(loss)
+class StatePredictor:
+    def __init__(self, project_root=None):
+        self.project_root = Path(project_root) if project_root else Path(__file__).resolve().parent
+        self.sequence_length = 12
+        self.horizon = 6
+        self.dynamic_features = [
+            "casos_norm_log",
+            "casos_velocidade", "casos_aceleracao", "casos_mm_4_semanas",
+            "T2M_mean","T2M_std","PRECTOTCORR_mean","PRECTOTCORR_std",
+            "RH2M_mean","RH2M_std","ALLSKY_SFC_SW_DWN_mean","ALLSKY_SFC_SW_DWN_std",
+            "week_sin","week_cos","year_norm","notificacao"
+        ]
+        self.static_features = ["populacao_total"]
+        self._loaded = False
+        self.load_assets()
+    def load_assets(self):
+        models_dir = self.project_root / "models"
+        scalers_dir = models_dir / "scalers"
+        model_path = models_dir / "model_state.keras"
+        state_map_path = models_dir / "state_to_idx.json"
+        state_peak_path = models_dir / "state_peak.json"
+        # scalers
+        dyn_state = scalers_dir / "scaler_dyn_global_state.pkl"
+        static_state = scalers_dir / "scaler_static_global_state.pkl"
+        target_state = scalers_dir / "scaler_target_global_state.pkl"
+        if not dyn_state.exists() or not static_state.exists() or not target_state.exists():
+            raise FileNotFoundError("State scalers not found under models/scalers. Expected *_state.pkl files.")
+        self.scaler_dyn = joblib.load(dyn_state)
+        self.scaler_static = joblib.load(static_state)
+        self.scaler_target = joblib.load(target_state)
+        # mappings
+        if state_map_path.exists():
+            with open(state_map_path, "r", encoding="utf-8") as fh:
+                self.state_to_idx = json.load(fh)
+        else:
+            self.state_to_idx = {}
+        if state_peak_path.exists():
+            with open(state_peak_path, "r", encoding="utf-8") as fh:
+                self.state_peak_map = json.load(fh)
+        else:
+            self.state_peak_map = {}
+        # inference dataset (HF only)
+        hf_token = os.environ.get("HF_TOKEN")
+        hf_repo = "previdengue/predict_inference_data_estadual"
+        hf_filename = "inference_data_estadual.parquet"
+        try:
+            hf_path = hf_hub_download(
+                repo_id=hf_repo,
+                filename=hf_filename,
+                repo_type="dataset",
+                token=hf_token,
+            )
+            df_loaded = pd.read_parquet(hf_path)
+        except Exception as e:
+            raise FileNotFoundError(
+                "Could not download 'inference_data_estadual.parquet' from HF repo 'previdengue/predict_inference_data_estadual'. "
+                "Ensure the dataset exists and set HF_TOKEN if the repo requires authentication."
+            ) from e
+        # normalize
+        df = df_loaded.copy()
+        required = ["estado_sigla", "year", "week", "casos_soma"]
+        if any(col not in df.columns for col in required):
+            raise ValueError("State dataset missing required columns: ['estado_sigla','year','week','casos_soma']")
+        df["estado_sigla"] = df["estado_sigla"].astype(str)
+        df = df.sort_values(["estado_sigla", "year", "week"]).reset_index(drop=True)
+        if "date" not in df.columns:
+            try:
+                df["date"] = pd.to_datetime(df["year"].astype(str) + df["week"].astype(str) + "0", format="%Y%W%w", errors="coerce")
+            except Exception:
+                pass
+        if "week_sin" not in df.columns:
+            df["week_sin"] = np.sin(2*np.pi*df["week"]/52)
+        if "week_cos" not in df.columns:
+            df["week_cos"] = np.cos(2*np.pi*df["week"]/52)
+        if "year_norm" not in df.columns:
+            year_min, year_max = df["year"].min(), df["year"].max()
+            df["year_norm"] = (df["year"] - year_min) / max(1.0, (year_max - year_min))
+        df["notificacao"] = df["year"].isin([2021, 2022]).astype(float)
+        self.df_state = df
+        if not model_path.exists():
+            raise FileNotFoundError(str(model_path) + " not found")
+        self.model = tf.keras.models.load_model(model_path, custom_objects={"asymmetric_mse": asymmetric_mse}, compile=False)
+        self._loaded = True
+    def _prepare_state_sequence(self, df_st: pd.DataFrame, state_sigla: str):
+        df_st = df_st.copy()
+        df_st['casos_velocidade'] = df_st['casos_soma'].diff().fillna(0)
+        df_st['casos_aceleracao'] = df_st['casos_velocidade'].diff().fillna(0)
+        df_st['casos_mm_4_semanas'] = df_st['casos_soma'].rolling(4, min_periods=1).mean()
+        if "notificacao" not in df_st.columns:
+            df_st["notificacao"] = df_st["year"].isin([2021, 2022]).astype(float)
+        peak = float(self.state_peak_map.get(state_sigla, 1.0))
+        if peak <= 0:
+            peak = 1.0
+        df_st["casos_norm"] = df_st["casos_soma"] / peak
+        df_st["casos_norm_log"] = np.log1p(df_st["casos_norm"])
+        return df_st
+    def predict(self, state_sigla: str, year: int = None, week: int = None, display_history_weeks: int | None = None):
+        if not self._loaded:
+            raise RuntimeError("state assets not loaded")
+        st = str(state_sigla).upper()
+        df_st = self.df_state[self.df_state["estado_sigla"] == st].copy().sort_values(["year","week"]).reset_index(drop=True)
+        if df_st.empty or len(df_st) < self.sequence_length:
+            raise ValueError(f"No data or insufficient history for state {st}")
+        df_st = self._prepare_state_sequence(df_st, st)
+        if year is not None and week is not None:
+            idx_list = df_st.index[(df_st['year'] == int(year)) & (df_st['week'] == int(week))].tolist()
+            if not idx_list:
+                raise ValueError("Prediction point (year/week) not found in state series")
+            pred_point_idx = idx_list[0]
+        else:
+            pred_point_idx = len(df_st)
+        last_known_idx = pred_point_idx - 1
+        if last_known_idx < self.sequence_length - 1:
+            raise ValueError("Insufficient sequence window before prediction point")
+        start_idx = last_known_idx - self.sequence_length + 1
+        input_seq = df_st.iloc[start_idx:last_known_idx+1].copy()
+        for col in self.static_features:
+            if col not in input_seq.columns:
+                input_seq[col] = 0.0
+        static_raw = input_seq[self.static_features].iloc[0].values.reshape(1, -1)
+        missing_dyn = [c for c in self.dynamic_features if c not in input_seq.columns]
+        if missing_dyn:
+            raise ValueError(f"Missing dynamic state features: {missing_dyn}")
+        dyn_raw = input_seq[self.dynamic_features].values
+        if hasattr(self.scaler_dyn, "n_features_in_") and self.scaler_dyn.n_features_in_ != len(self.dynamic_features):
+            raise ValueError(
+                f"State dynamic scaler expects {self.scaler_dyn.n_features_in_} features, got {len(self.dynamic_features)}."
+            )
+        dyn_scaled = self.scaler_dyn.transform(dyn_raw).reshape(1, self.sequence_length, len(self.dynamic_features))
+        static_scaled = self.scaler_static.transform(static_raw)
+        state_idx = int(self.state_to_idx.get(st, 0))
+        state_input = np.array([[state_idx]], dtype=np.int32)
+        y_pred = self.model.predict([dyn_scaled, static_scaled, state_input], verbose=0)
+        y_pred_reg = y_pred[0] if isinstance(y_pred, (list, tuple)) else y_pred
+        y_pred_log_norm = self.scaler_target.inverse_transform(y_pred_reg.reshape(-1,1)).reshape(y_pred_reg.shape)
+        y_pred_norm = np.expm1(y_pred_log_norm)
+        peak = float(self.state_peak_map.get(st, 1.0))
+        if peak <= 0:
+            peak = 1.0
+        prediction_counts = np.maximum(y_pred_norm.flatten() * peak, 0.0)
+        last_known_date = df_st.iloc[last_known_idx]['date'] if 'date' in df_st.columns and last_known_idx < len(df_st) else None
+        predicted_data = []
+        for i, val in enumerate(prediction_counts):
+            if pd.notna(last_known_date):
+                pred_date = (last_known_date + timedelta(weeks=i+1)).strftime("%Y-%m-%d")
+            else:
+                pred_date = None
+            predicted_data.append({"date": pred_date, "predicted_cases": int(round(float(val)))})
+        if display_history_weeks is None or display_history_weeks <= 0:
+            hist_tail = df_st.iloc[:last_known_idx+1].copy()
+        else:
+            hist_tail = df_st.iloc[max(0, last_known_idx - display_history_weeks): last_known_idx+1].copy()
+        historic_data = []
+        for _, row in hist_tail.iterrows():
+            historic_data.append({
+                "date": row["date"].strftime("%Y-%m-%d") if pd.notna(row.get("date")) else None,
+                "cases": int(row["casos_soma"]) if pd.notna(row.get("casos_soma")) else None
+            })
+        return {
+            "state": st,
+            "last_known_index": int(last_known_idx),
+            "historic_data": historic_data,
+            "predicted_data": predicted_data,
+        }