COCODEDE04 commited on
Commit
e65910c
·
verified ·
1 Parent(s): d264e59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -188
app.py CHANGED
@@ -1,175 +1,119 @@
1
- # app.py
2
  import os
3
  import json
4
- from pathlib import Path
5
- from typing import Any, Dict, List, Tuple
6
 
7
  import numpy as np
8
  import tensorflow as tf
 
9
  from fastapi import FastAPI, Request
 
10
  from fastapi.middleware.cors import CORSMiddleware
11
 
12
- # ----------------- PATHS & CONFIG -----------------
13
- BASE_DIR = Path(__file__).resolve().parent
14
-
15
- # Prefer env vars, fall back to files next to app.py
16
- MODEL_PATH = os.getenv("MODEL_PATH") or str(BASE_DIR / "best_model.keras")
17
- if not Path(MODEL_PATH).exists():
18
- # fallback to .h5 if .keras not present
19
- alt = BASE_DIR / "best_model.h5"
20
- if alt.exists():
21
- MODEL_PATH = str(alt)
22
-
23
- STATS_PATH = os.getenv("STATS_PATH") or str(BASE_DIR / "means_std.json")
24
- IMPUTER_PATH = os.getenv("IMPUTER_PATH") or str(BASE_DIR / "imputer.joblib")
25
- SCALER_PATH = os.getenv("SCALER_PATH") or str(BASE_DIR / "scaler.joblib")
26
-
27
- CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"] # ordinal: 0..4
28
-
29
- # IMPORTANT — exact feature order used during training
30
- FEATURE_ORDER: List[str] = [
31
- "autosuf_oper",
32
- "improductiva",
33
- "gastos_fin_over_avg_cart",
34
- "_equity",
35
- "grado_absorcion",
36
- "_cartera_bruta",
37
- "gastos_oper_over_ing_oper",
38
- "cartera_vencida_ratio",
39
- "roe_pre_tax",
40
- "_assets",
41
- "_liab",
42
- "equity_over_assets",
43
- "_margen_bruto",
44
- "prov_over_cartera",
45
- "gastos_oper_over_cart",
46
- "ing_cartera_over_ing_total",
47
- "debt_to_equity",
48
- "prov_gasto_over_cart",
49
- "cov_improductiva",
50
- "rend_cart_over_avg_cart",
51
- "roa_pre_tax",
52
- ]
53
-
54
- print("Resolved paths:")
55
- print(" MODEL_PATH :", MODEL_PATH)
56
- print(" STATS_PATH :", STATS_PATH)
57
- print(" IMPUTER_PATH:", IMPUTER_PATH)
58
- print(" SCALER_PATH :", SCALER_PATH)
59
-
60
- # ----------------- LOAD ARTIFACTS -----------------
61
  print("Loading model / imputer / scaler...")
62
- # If the model used custom losses/metrics you’d pass custom_objects here.
 
63
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
64
 
65
- # Optional: imputer & scaler from training pipeline
66
- imputer = None
67
- scaler = None
 
 
 
 
68
  try:
69
- import joblib # in requirements
70
- if Path(IMPUTER_PATH).exists():
71
- imputer = joblib.load(IMPUTER_PATH)
72
- print("Loaded imputer:", IMPUTER_PATH)
73
- if Path(SCALER_PATH).exists():
74
- scaler = joblib.load(SCALER_PATH)
75
- print("Loaded scaler :", SCALER_PATH)
76
- except Exception as e:
77
- print("Imputer/scaler not loaded:", e)
78
-
79
- # Optional: stats fallback for manual z-scoring
80
- stats: Dict[str, Dict[str, float]] = {}
81
- if Path(STATS_PATH).exists():
82
- with open(STATS_PATH, "r") as f:
83
- stats = json.load(f)
84
- print("Loaded means/std from:", STATS_PATH)
85
-
86
- # ----------------- HELPERS -----------------
87
  def coerce_float(val: Any) -> float:
88
- """
89
- Accepts numeric or strings like:
90
- '49.709,14' -> 49709.14 ; '49,709.14' -> 49709.14 ; '0,005' -> 0.005
91
- """
92
- if isinstance(val, (int, float, np.number)):
93
  return float(val)
94
- s = str(val).strip()
95
  if s == "":
96
- raise ValueError("empty")
97
- s = s.replace(" ", "")
98
- has_dot, has_comma = "." in s, "," in s
99
- if has_dot and has_comma:
100
  if s.rfind(",") > s.rfind("."):
101
- s = s.replace(".", "")
102
- s = s.replace(",", ".")
103
  else:
104
  s = s.replace(",", "")
105
- elif has_comma and not has_dot:
106
  s = s.replace(",", ".")
107
- return float(s)
 
 
 
108
 
109
- def build_matrix_from_payload(payload: Dict[str, Any]) -> Tuple[np.ndarray, Dict[str, float], List[str]]:
110
- """
111
- Returns:
112
- X (1, 21) ready for model (imputed+scaled if artifacts exist; else z-scored via stats),
113
- z_detail (dict feature -> standardized value used),
114
- missing list (features not present in payload)
115
- """
116
- raw = []
117
- missing: List[str] = []
118
- for f in FEATURE_ORDER:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  if f in payload:
120
- try:
121
- raw.append(coerce_float(payload[f]))
122
- except Exception:
123
- raw.append(np.nan)
124
  else:
125
- raw.append(np.nan)
126
  missing.append(f)
 
 
127
 
128
- arr = np.array([raw], dtype=np.float32) # shape (1, 21)
129
 
130
- # primary path: use imputer + scaler if both available
131
- if imputer is not None and scaler is not None:
132
- arr_imp = imputer.transform(arr) # median impute
133
- arr_std = scaler.transform(arr_imp) # z-score to training distribution
134
- z_row = arr_std[0].tolist()
135
- z_detail = {f: float(z_row[i]) for i, f in enumerate(FEATURE_ORDER)}
136
- return arr_std.astype(np.float32), z_detail, missing
137
 
138
- # fallback path: manual z-score using means_std.json
139
- z_vals = []
140
- z_detail = {}
141
- for i, f in enumerate(FEATURE_ORDER):
142
- v = arr[0, i]
143
- if f in stats and "mean" in stats[f] and "std" in stats[f] and stats[f]["std"]:
144
- mean = float(stats[f]["mean"])
145
- std = float(stats[f]["std"])
146
- vv = 0.0 if np.isnan(v) else float(v)
147
- z = (vv - mean) / std
148
- else:
149
- z = 0.0 # safest fallback
150
- z_vals.append(z)
151
- z_detail[f] = float(z)
152
- return np.array([z_vals], dtype=np.float32), z_detail, missing
153
 
154
- def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
155
- """
156
- CORAL decode: (N, K-1) logits -> (N, K) probs.
157
- Adds a small monotonicity fix (non-increasing thresholds).
158
- """
159
- logits = tf.convert_to_tensor(logits_np, dtype=tf.float32) # (N, K-1)
160
- sig = tf.math.sigmoid(logits) # p(y>k)
161
- # Enforce non-increasing along thresholds (numerical guard)
162
- sig = tf.clip_by_value(sig, 1e-12, 1.0 - 1e-12)
163
- sig_sorted = tf.minimum(sig, tf.math.cummin(sig, axis=1, exclusive=False))
164
- left = tf.concat([tf.ones_like(sig_sorted[:, :1]), sig_sorted], axis=1)
165
- right = tf.concat([sig_sorted, tf.zeros_like(sig_sorted[:, :1])], axis=1)
166
- probs = tf.clip_by_value(left - right, 1e-12, 1.0)
167
- # Normalize row just in case
168
- probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
169
- return probs.numpy()
170
 
171
- # ----------------- FASTAPI -----------------
172
- app = FastAPI(title="Static Fingerprint API", version="1.0.0")
173
 
174
  app.add_middleware(
175
  CORSMiddleware,
@@ -179,6 +123,7 @@ app.add_middleware(
179
  allow_headers=["*"],
180
  )
181
 
 
182
  @app.get("/")
183
  def root():
184
  return {
@@ -186,68 +131,78 @@ def root():
186
  "try": ["GET /health", "POST /predict"],
187
  }
188
 
 
189
  @app.get("/health")
190
  def health():
191
  return {
192
  "status": "ok",
 
 
 
193
  "classes": CLASSES,
194
- "feature_order": FEATURE_ORDER,
195
- "paths": {
196
- "model": MODEL_PATH,
197
- "stats": STATS_PATH if Path(STATS_PATH).exists() else None,
198
- "imputer": IMPUTER_PATH if Path(IMPUTER_PATH).exists() else None,
199
- "scaler": SCALER_PATH if Path(SCALER_PATH).exists() else None,
200
- "base_dir_files": [p.name for p in BASE_DIR.iterdir()],
201
- },
202
- "has_imputer": imputer is not None,
203
- "has_scaler": scaler is not None,
204
  }
205
 
 
206
  @app.post("/echo")
207
  async def echo(req: Request):
 
208
  payload = await req.json()
209
  return {"received": payload}
210
 
 
211
  @app.post("/predict")
212
  async def predict(req: Request):
213
  """
214
- Body: JSON dict mapping feature -> value (raw numbers). Example:
 
215
  {
216
  "autosuf_oper": 1.0,
217
  "cov_improductiva": 0.9,
218
  ...
219
  }
220
  """
221
- payload = await req.json()
222
- if not isinstance(payload, dict):
223
- return {"error": "Expected a JSON object mapping feature -> value."}
224
-
225
- X, z_detail, missing = build_matrix_from_payload(payload) # shape (1, 21)
226
-
227
- raw = model.predict(X, verbose=0)
228
-
229
- # Auto-detect output head: CORAL (K-1) or softmax (K)
230
- decode_mode = "auto_coral"
231
- if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
232
- probs = coral_probs_from_logits(raw)[0]
233
- decode_mode = "auto_coral_monotone"
234
- else:
235
- # assume logits for K classes
236
- logits = tf.convert_to_tensor(raw, dtype=tf.float32)
237
- probs = tf.nn.softmax(logits, axis=1).numpy()[0]
238
- decode_mode = "softmax"
239
-
240
- pred_idx = int(np.argmax(probs))
241
- out = {
242
- "input_ok": (len(missing) == 0),
243
- "missing": missing,
244
- "z_scores": z_detail,
245
- "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
246
- "predicted_state": CLASSES[pred_idx],
247
- "debug": {
248
- "raw_shape": list(raw.shape),
249
- "decode_mode": decode_mode,
250
- "raw_first_row": [float(x) for x in raw[0].tolist()],
251
- },
252
- }
253
- return out
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
+ import traceback
4
+ from typing import Any, Dict
5
 
6
  import numpy as np
7
  import tensorflow as tf
8
+ import joblib
9
  from fastapi import FastAPI, Request
10
+ from fastapi.responses import JSONResponse
11
  from fastapi.middleware.cors import CORSMiddleware
12
 
13
+ # -------------------- CONFIG --------------------
14
+ MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
15
+ STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
16
+ IMPUTER_PATH = os.getenv("IMPUTER_PATH", "imputer.joblib")
17
+ SCALER_PATH = os.getenv("SCALER_PATH", "scaler.joblib")
18
+
19
+ CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
20
+ # ------------------------------------------------
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  print("Loading model / imputer / scaler...")
23
+
24
+ # ---- Model ----
25
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
26
 
27
+ # ---- Stats ----
28
+ with open(STATS_PATH, "r") as f:
29
+ stats: Dict[str, Dict[str, float]] = json.load(f)
30
+
31
+ FEATURES = list(stats.keys())
32
+
33
+ # ---- Optional artifacts ----
34
  try:
35
+ imputer = joblib.load(IMPUTER_PATH)
36
+ print("Imputer loaded.")
37
+ except Exception:
38
+ imputer = None
39
+ print("⚠️ No imputer found — skipping median imputation.")
40
+
41
+ try:
42
+ scaler = joblib.load(SCALER_PATH)
43
+ print("Scaler loaded.")
44
+ except Exception:
45
+ scaler = None
46
+ print("⚠️ No scaler found using manual z-scoring.")
47
+
48
+ # -------------------- HELPERS --------------------
 
 
 
 
49
  def coerce_float(val: Any) -> float:
50
+ """Convert strings like '49.709,14' or '0,005' to float."""
51
+ if isinstance(val, (int, float)):
 
 
 
52
  return float(val)
53
+ s = str(val).strip().replace(" ", "")
54
  if s == "":
55
+ return 0.0
56
+ if "," in s and "." in s:
 
 
57
  if s.rfind(",") > s.rfind("."):
58
+ s = s.replace(".", "").replace(",", ".")
 
59
  else:
60
  s = s.replace(",", "")
61
+ elif "," in s:
62
  s = s.replace(",", ".")
63
+ try:
64
+ return float(s)
65
+ except Exception:
66
+ return 0.0
67
 
68
+
69
+ def _z(val: Any, mean: float, sd: float) -> float:
70
+ try:
71
+ v = coerce_float(val)
72
+ except Exception:
73
+ return 0.0
74
+ if not sd or sd == 0:
75
+ return 0.0
76
+ return (v - mean) / sd
77
+
78
+
79
+ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
80
+ """Convert (N, K−1) logits to (N, K) probabilities for CORAL ordinal output."""
81
+ logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)
82
+ sig = tf.math.sigmoid(logits)
83
+ left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
84
+ right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
85
+ probs = tf.clip_by_value(left - right, 1e-12, 1.0)
86
+ return probs.numpy()
87
+
88
+
89
+ def build_matrix_from_payload(payload: Dict[str, Any]):
90
+ """Builds a z-scored matrix and returns (X, z_detail, missing)."""
91
+ z = []
92
+ z_detail = {}
93
+ missing = []
94
+ for f in FEATURES:
95
+ mean = stats[f]["mean"]
96
+ sd = stats[f]["std"]
97
  if f in payload:
98
+ zf = _z(payload[f], mean, sd)
 
 
 
99
  else:
100
+ zf = _z(0.0, mean, sd)
101
  missing.append(f)
102
+ z.append(zf)
103
+ z_detail[f] = zf
104
 
105
+ X = np.array([z], dtype=np.float32)
106
 
107
+ if imputer is not None:
108
+ X = imputer.transform(X)
109
+ if scaler is not None:
110
+ X = scaler.transform(X)
 
 
 
111
 
112
+ return X, z_detail, missing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ # -------------------- APP INIT --------------------
116
+ app = FastAPI(title="Static Fingerprint API", version="1.1.0")
117
 
118
  app.add_middleware(
119
  CORSMiddleware,
 
123
  allow_headers=["*"],
124
  )
125
 
126
+ # -------------------- ROUTES --------------------
127
  @app.get("/")
128
  def root():
129
  return {
 
131
  "try": ["GET /health", "POST /predict"],
132
  }
133
 
134
+
135
  @app.get("/health")
136
  def health():
137
  return {
138
  "status": "ok",
139
+ "model_file": MODEL_PATH,
140
+ "stats_file": STATS_PATH,
141
+ "features": FEATURES,
142
  "classes": CLASSES,
143
+ "imputer_loaded": imputer is not None,
144
+ "scaler_loaded": scaler is not None,
 
 
 
 
 
 
 
 
145
  }
146
 
147
+
148
  @app.post("/echo")
149
  async def echo(req: Request):
150
+ """Echoes back any JSON payload (debug)."""
151
  payload = await req.json()
152
  return {"received": payload}
153
 
154
+
155
  @app.post("/predict")
156
  async def predict(req: Request):
157
  """
158
+ POST JSON mapping each feature to a numeric value.
159
+ Example:
160
  {
161
  "autosuf_oper": 1.0,
162
  "cov_improductiva": 0.9,
163
  ...
164
  }
165
  """
166
+ try:
167
+ payload = await req.json()
168
+ if not isinstance(payload, dict):
169
+ return JSONResponse(
170
+ status_code=400,
171
+ content={"error": "Expected a JSON object mapping feature -> value."},
172
+ )
173
+
174
+ X, z_detail, missing = build_matrix_from_payload(payload)
175
+ raw = model.predict(X, verbose=0)
176
+
177
+ # Detect output type (CORAL or softmax)
178
+ if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
179
+ probs = coral_probs_from_logits(raw)[0]
180
+ decode_mode = "auto_coral_monotone"
181
+ else:
182
+ logits = tf.convert_to_tensor(raw, dtype=tf.float32)
183
+ probs = tf.nn.softmax(logits, axis=1).numpy()[0]
184
+ decode_mode = "softmax"
185
+
186
+ probs = probs / np.sum(probs)
187
+ pred_idx = int(np.argmax(probs))
188
+
189
+ return {
190
+ "input_ok": (len(missing) == 0),
191
+ "missing": missing,
192
+ "z_scores": z_detail,
193
+ "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
194
+ "predicted_state": CLASSES[pred_idx],
195
+ "debug": {
196
+ "raw_shape": list(raw.shape),
197
+ "decode_mode": decode_mode,
198
+ "raw_first_row": [float(x) for x in raw[0].tolist()],
199
+ },
200
+ }
201
+
202
+ except Exception as e:
203
+ tb = traceback.format_exc()
204
+ print("🔥 ERROR in /predict:", tb)
205
+ return JSONResponse(
206
+ status_code=500,
207
+ content={"error": str(e), "traceback": tb},
208
+ )