GitHub Actions commited on
Commit
195d91a
·
1 Parent(s): b5c878a

Auto-deploy from GitHub

Browse files
app.py CHANGED
@@ -50,6 +50,8 @@ async def startup_event():
50
  )
51
  except Exception as e:
52
  print("[WARN] DenguePredictor (municipal) não inicializado:", str(e))
 
 
53
  predictor = None
54
  try:
55
  state_predictor = StatePredictor(
@@ -58,6 +60,7 @@ async def startup_event():
58
  )
59
  except Exception as e:
60
  print("[WARN] StatePredictor não inicializado:", str(e))
 
61
  state_predictor = None
62
  print("Módulos de IA carregados com sucesso. API pronta. Modo:", "online" if ONLINE else "offline")
63
 
 
50
  )
51
  except Exception as e:
52
  print("[WARN] DenguePredictor (municipal) não inicializado:", str(e))
53
+ # print full traceback to help debugging (was previously only printing str(e))
54
+ traceback.print_exc()
55
  predictor = None
56
  try:
57
  state_predictor = StatePredictor(
 
60
  )
61
  except Exception as e:
62
  print("[WARN] StatePredictor não inicializado:", str(e))
63
+ traceback.print_exc()
64
  state_predictor = None
65
  print("Módulos de IA carregados com sucesso. API pronta. Modo:", "online" if ONLINE else "offline")
66
 
models/model_state.keras CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63f79e41cda69694029fe4b334aeb7663be08aa5cebbf309d35e6e4fd7f35bff
3
- size 2536258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce46ac2a58f241e5ceaae51a4a9a5abc772018213860ef977989288f94f7f410
3
+ size 2534658
models/scalers/scaler_dyn_global_state.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc7972df5abd0302686c2d6ff16962ff31a13c5ca5346cbe57633de1ec34f1c1
3
  size 1303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bb2cfb7f78d33fbf9242461bdef7783f31fbbb35a8114b75c341da36b07fa33
3
  size 1303
models/scalers/scaler_target_global_state.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a4e97671eeabf05f39cb9a6b53130816103d263c6bfffd9fc7fbee5f9c77178
3
  size 719
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86da4e22650d62cc4806750659f8c83bbd924404800d818015716f751c7e2947
3
  size 719
state_predictor.py CHANGED
@@ -11,7 +11,7 @@ from huggingface_hub import hf_hub_download
11
 
12
  @register_keras_serializable(package="Custom", name="asymmetric_mse")
13
  def asymmetric_mse(y_true, y_pred):
14
- penalty_factor = 10.0
15
  error = y_true - y_pred
16
  denom = tf.maximum(tf.abs(y_true), 1.0)
17
  rel = tf.abs(error) / denom
@@ -27,7 +27,7 @@ class StatePredictor:
27
  self.sequence_length = 12
28
  self.horizon = 6
29
  self.dynamic_features = [
30
- "casos_norm_log",
31
  "casos_velocidade", "casos_aceleracao", "casos_mm_4_semanas",
32
  "T2M_mean","T2M_std","PRECTOTCORR_mean","PRECTOTCORR_std",
33
  "RH2M_mean","RH2M_std","ALLSKY_SFC_SW_DWN_mean","ALLSKY_SFC_SW_DWN_std",
@@ -44,7 +44,6 @@ class StatePredictor:
44
  state_map_path = models_dir / "state_to_idx.json"
45
  state_peak_path = models_dir / "state_peak.json"
46
 
47
- # scalers
48
  dyn_state = scalers_dir / "scaler_dyn_global_state.pkl"
49
  static_state = scalers_dir / "scaler_static_global_state.pkl"
50
  target_state = scalers_dir / "scaler_target_global_state.pkl"
@@ -54,28 +53,24 @@ class StatePredictor:
54
  self.scaler_static = joblib.load(static_state)
55
  self.scaler_target = joblib.load(target_state)
56
 
57
- # mappings
58
  if state_map_path.exists():
59
  with open(state_map_path, "r", encoding="utf-8") as fh:
60
  self.state_to_idx = json.load(fh)
61
  else:
62
  self.state_to_idx = {}
 
63
  if state_peak_path.exists():
64
  with open(state_peak_path, "r", encoding="utf-8") as fh:
65
  self.state_peak_map = json.load(fh)
66
  else:
67
  self.state_peak_map = {}
68
 
69
- # inference dataset: HF online or local offline (.parquet only)
70
  if self.offline:
71
- # Somente .parquet é aceito no modo offline
72
  candidate_paths = []
73
  if self.local_inference_path:
74
  candidate_paths.append(self.local_inference_path)
75
- # Candidatos comuns no diretório de modelos
76
  candidate_paths.append(models_dir / "inference_data_state.parquet")
77
  candidate_paths.append(models_dir / "inference_data_estadual.parquet")
78
-
79
  found = None
80
  for p in candidate_paths:
81
  try:
@@ -86,12 +81,10 @@ class StatePredictor:
86
  continue
87
  if not found:
88
  raise FileNotFoundError(
89
- "Offline mode enabled but no local Parquet state dataset found. "
90
- "Place 'inference_data_state.parquet' or 'inference_data_estadual.parquet' under models/ or pass a valid 'local_inference_path' (.parquet)."
91
  )
92
  df = pd.read_parquet(found)
93
  else:
94
- # Tenta baixar do HF; se falhar, tenta arquivo local como fallback
95
  df = None
96
  try:
97
  inference_path = hf_hub_download(
@@ -101,19 +94,19 @@ class StatePredictor:
101
  )
102
  df = pd.read_parquet(inference_path)
103
  except Exception:
104
- # Fallback local
105
  for p in [models_dir / "inference_data_state.parquet", models_dir / "inference_data_estadual.parquet"]:
106
  if p.exists():
107
  df = pd.read_parquet(p)
108
  break
109
  if df is None:
110
  raise FileNotFoundError(
111
- "Online state dataset not available from HF and no local fallback found. "
112
- "Place 'inference_data_estadual.parquet' under models/ or switch APP_MODE to 'offline'."
113
  )
 
114
  required = ["estado_sigla", "year", "week", "casos_soma"]
115
  if any(col not in df.columns for col in required):
116
  raise ValueError("State dataset missing required columns: ['estado_sigla','year','week','casos_soma']")
 
117
  df["estado_sigla"] = df["estado_sigla"].astype(str)
118
  df = df.sort_values(["estado_sigla", "year", "week"]).reset_index(drop=True)
119
  if "date" not in df.columns:
@@ -131,23 +124,19 @@ class StatePredictor:
131
  df["notificacao"] = df["year"].isin([2021, 2022]).astype(float)
132
 
133
  self.df_state = df
 
134
  if not model_path.exists():
135
  raise FileNotFoundError(str(model_path) + " not found")
136
  self.model = tf.keras.models.load_model(model_path, custom_objects={"asymmetric_mse": asymmetric_mse}, compile=False)
137
  self._loaded = True
138
 
139
- def _prepare_state_sequence(self, df_st: pd.DataFrame, state_sigla: str):
140
  df_st = df_st.copy()
141
  df_st['casos_velocidade'] = df_st['casos_soma'].diff().fillna(0)
142
  df_st['casos_aceleracao'] = df_st['casos_velocidade'].diff().fillna(0)
143
  df_st['casos_mm_4_semanas'] = df_st['casos_soma'].rolling(4, min_periods=1).mean()
144
  if "notificacao" not in df_st.columns:
145
  df_st["notificacao"] = df_st["year"].isin([2021, 2022]).astype(float)
146
- peak = float(self.state_peak_map.get(state_sigla, 1.0))
147
- if peak <= 0:
148
- peak = 1.0
149
- df_st["casos_norm"] = df_st["casos_soma"] / peak
150
- df_st["casos_norm_log"] = np.log1p(df_st["casos_norm"])
151
  return df_st
152
 
153
  def predict(self, state_sigla: str, year: int = None, week: int = None, display_history_weeks: int | None = None):
@@ -157,7 +146,7 @@ class StatePredictor:
157
  df_st = self.df_state[self.df_state["estado_sigla"] == st].copy().sort_values(["year","week"]).reset_index(drop=True)
158
  if df_st.empty or len(df_st) < self.sequence_length:
159
  raise ValueError(f"No data or insufficient history for state {st}")
160
- df_st = self._prepare_state_sequence(df_st, st)
161
  if year is not None and week is not None:
162
  idx_list = df_st.index[(df_st['year'] == int(year)) & (df_st['week'] == int(week))].tolist()
163
  if not idx_list:
@@ -173,11 +162,11 @@ class StatePredictor:
173
  for col in self.static_features:
174
  if col not in input_seq.columns:
175
  input_seq[col] = 0.0
176
- static_raw = input_seq[self.static_features].iloc[0].values.reshape(1, -1)
177
  missing_dyn = [c for c in self.dynamic_features if c not in input_seq.columns]
178
  if missing_dyn:
179
  raise ValueError(f"Missing dynamic state features: {missing_dyn}")
180
  dyn_raw = input_seq[self.dynamic_features].values
 
181
  if hasattr(self.scaler_dyn, "n_features_in_") and self.scaler_dyn.n_features_in_ != len(self.dynamic_features):
182
  raise ValueError(
183
  f"State dynamic scaler expects {self.scaler_dyn.n_features_in_} features, got {len(self.dynamic_features)}."
@@ -188,15 +177,11 @@ class StatePredictor:
188
  state_input = np.array([[state_idx]], dtype=np.int32)
189
  y_pred = self.model.predict([dyn_scaled, static_scaled, state_input], verbose=0)
190
  y_pred_reg = y_pred[0] if isinstance(y_pred, (list, tuple)) else y_pred
191
- y_pred_log_norm = self.scaler_target.inverse_transform(y_pred_reg.reshape(-1,1)).reshape(y_pred_reg.shape)
192
- y_pred_norm = np.expm1(y_pred_log_norm)
193
- peak = float(self.state_peak_map.get(st, 1.0))
194
- if peak <= 0:
195
- peak = 1.0
196
- prediction_counts = np.maximum(y_pred_norm.flatten() * peak, 0.0)
197
  last_known_date = df_st.iloc[last_known_idx]['date'] if 'date' in df_st.columns and last_known_idx < len(df_st) else None
198
  predicted_data = []
199
- for i, val in enumerate(prediction_counts):
200
  if pd.notna(last_known_date):
201
  pred_date = (last_known_date + timedelta(weeks=i+1)).strftime("%Y-%m-%d")
202
  else:
 
11
 
12
  @register_keras_serializable(package="Custom", name="asymmetric_mse")
13
  def asymmetric_mse(y_true, y_pred):
14
+ penalty_factor = 5.0
15
  error = y_true - y_pred
16
  denom = tf.maximum(tf.abs(y_true), 1.0)
17
  rel = tf.abs(error) / denom
 
27
  self.sequence_length = 12
28
  self.horizon = 6
29
  self.dynamic_features = [
30
+ "casos_soma",
31
  "casos_velocidade", "casos_aceleracao", "casos_mm_4_semanas",
32
  "T2M_mean","T2M_std","PRECTOTCORR_mean","PRECTOTCORR_std",
33
  "RH2M_mean","RH2M_std","ALLSKY_SFC_SW_DWN_mean","ALLSKY_SFC_SW_DWN_std",
 
44
  state_map_path = models_dir / "state_to_idx.json"
45
  state_peak_path = models_dir / "state_peak.json"
46
 
 
47
  dyn_state = scalers_dir / "scaler_dyn_global_state.pkl"
48
  static_state = scalers_dir / "scaler_static_global_state.pkl"
49
  target_state = scalers_dir / "scaler_target_global_state.pkl"
 
53
  self.scaler_static = joblib.load(static_state)
54
  self.scaler_target = joblib.load(target_state)
55
 
 
56
  if state_map_path.exists():
57
  with open(state_map_path, "r", encoding="utf-8") as fh:
58
  self.state_to_idx = json.load(fh)
59
  else:
60
  self.state_to_idx = {}
61
+
62
  if state_peak_path.exists():
63
  with open(state_peak_path, "r", encoding="utf-8") as fh:
64
  self.state_peak_map = json.load(fh)
65
  else:
66
  self.state_peak_map = {}
67
 
 
68
  if self.offline:
 
69
  candidate_paths = []
70
  if self.local_inference_path:
71
  candidate_paths.append(self.local_inference_path)
 
72
  candidate_paths.append(models_dir / "inference_data_state.parquet")
73
  candidate_paths.append(models_dir / "inference_data_estadual.parquet")
 
74
  found = None
75
  for p in candidate_paths:
76
  try:
 
81
  continue
82
  if not found:
83
  raise FileNotFoundError(
84
+ "Offline mode enabled but no local Parquet state dataset found."
 
85
  )
86
  df = pd.read_parquet(found)
87
  else:
 
88
  df = None
89
  try:
90
  inference_path = hf_hub_download(
 
94
  )
95
  df = pd.read_parquet(inference_path)
96
  except Exception:
 
97
  for p in [models_dir / "inference_data_state.parquet", models_dir / "inference_data_estadual.parquet"]:
98
  if p.exists():
99
  df = pd.read_parquet(p)
100
  break
101
  if df is None:
102
  raise FileNotFoundError(
103
+ "Online state dataset not available and no local fallback found."
 
104
  )
105
+
106
  required = ["estado_sigla", "year", "week", "casos_soma"]
107
  if any(col not in df.columns for col in required):
108
  raise ValueError("State dataset missing required columns: ['estado_sigla','year','week','casos_soma']")
109
+
110
  df["estado_sigla"] = df["estado_sigla"].astype(str)
111
  df = df.sort_values(["estado_sigla", "year", "week"]).reset_index(drop=True)
112
  if "date" not in df.columns:
 
124
  df["notificacao"] = df["year"].isin([2021, 2022]).astype(float)
125
 
126
  self.df_state = df
127
+
128
  if not model_path.exists():
129
  raise FileNotFoundError(str(model_path) + " not found")
130
  self.model = tf.keras.models.load_model(model_path, custom_objects={"asymmetric_mse": asymmetric_mse}, compile=False)
131
  self._loaded = True
132
 
133
+ def _prepare_state_sequence(self, df_st: pd.DataFrame):
134
  df_st = df_st.copy()
135
  df_st['casos_velocidade'] = df_st['casos_soma'].diff().fillna(0)
136
  df_st['casos_aceleracao'] = df_st['casos_velocidade'].diff().fillna(0)
137
  df_st['casos_mm_4_semanas'] = df_st['casos_soma'].rolling(4, min_periods=1).mean()
138
  if "notificacao" not in df_st.columns:
139
  df_st["notificacao"] = df_st["year"].isin([2021, 2022]).astype(float)
 
 
 
 
 
140
  return df_st
141
 
142
  def predict(self, state_sigla: str, year: int = None, week: int = None, display_history_weeks: int | None = None):
 
146
  df_st = self.df_state[self.df_state["estado_sigla"] == st].copy().sort_values(["year","week"]).reset_index(drop=True)
147
  if df_st.empty or len(df_st) < self.sequence_length:
148
  raise ValueError(f"No data or insufficient history for state {st}")
149
+ df_st = self._prepare_state_sequence(df_st)
150
  if year is not None and week is not None:
151
  idx_list = df_st.index[(df_st['year'] == int(year)) & (df_st['week'] == int(week))].tolist()
152
  if not idx_list:
 
162
  for col in self.static_features:
163
  if col not in input_seq.columns:
164
  input_seq[col] = 0.0
 
165
  missing_dyn = [c for c in self.dynamic_features if c not in input_seq.columns]
166
  if missing_dyn:
167
  raise ValueError(f"Missing dynamic state features: {missing_dyn}")
168
  dyn_raw = input_seq[self.dynamic_features].values
169
+ static_raw = input_seq[self.static_features].iloc[0].values.reshape(1, -1)
170
  if hasattr(self.scaler_dyn, "n_features_in_") and self.scaler_dyn.n_features_in_ != len(self.dynamic_features):
171
  raise ValueError(
172
  f"State dynamic scaler expects {self.scaler_dyn.n_features_in_} features, got {len(self.dynamic_features)}."
 
177
  state_input = np.array([[state_idx]], dtype=np.int32)
178
  y_pred = self.model.predict([dyn_scaled, static_scaled, state_input], verbose=0)
179
  y_pred_reg = y_pred[0] if isinstance(y_pred, (list, tuple)) else y_pred
180
+ y_pred_real_matrix = self.scaler_target.inverse_transform(y_pred_reg.reshape(-1,1)).reshape(y_pred_reg.shape)
181
+ y_pred_real_matrix = np.maximum(y_pred_real_matrix, 0.0)
 
 
 
 
182
  last_known_date = df_st.iloc[last_known_idx]['date'] if 'date' in df_st.columns and last_known_idx < len(df_st) else None
183
  predicted_data = []
184
+ for i, val in enumerate(y_pred_real_matrix.flatten()):
185
  if pd.notna(last_known_date):
186
  pred_date = (last_known_date + timedelta(weeks=i+1)).strftime("%Y-%m-%d")
187
  else: