COCODEDE04 commited on
Commit
40255e4
·
verified ·
1 Parent(s): e7666b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -97
app.py CHANGED
@@ -1,112 +1,108 @@
1
- import json
2
  import os
 
3
  from typing import Any, Dict, List
4
 
5
  import numpy as np
6
  import tensorflow as tf
7
  from fastapi import FastAPI, Request
8
  from fastapi.middleware.cors import CORSMiddleware
 
 
9
 
10
  # ----------------- CONFIG -----------------
11
- MODEL_PATH = os.getenv("MODEL_PATH", "best_model.keras")
12
- STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
 
 
 
 
13
  CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
14
- # IMPORTANT: Freeze the exact training order of features:
 
15
  FEATURES: List[str] = [
16
  "autosuf_oper",
17
- "cov_improductiva",
18
- "ing_cartera_over_ing_total",
19
- "gastos_oper_over_cart",
20
- "prov_over_cartera",
21
- "_margen_bruto",
22
- "equity_over_assets",
23
- "rend_cart_over_avg_cart",
24
- "_assets",
25
- "roa_pre_tax",
26
- "cartera_vencida_ratio",
27
- "gastos_oper_over_ing_oper",
28
- "_cartera_bruta",
29
- "grado_absorcion",
30
- "_equity",
31
- "gastos_fin_over_avg_cart",
32
  "improductiva",
 
 
 
 
 
 
33
  "roe_pre_tax",
34
- "debt_to_equity",
35
  "_liab",
 
 
 
 
 
 
36
  "prov_gasto_over_cart",
 
 
 
37
  ]
38
  # ------------------------------------------
39
 
40
- print("Loading model and stats...")
 
41
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
 
 
42
 
43
- with open(STATS_PATH, "r") as f:
44
- stats: Dict[str, Dict[str, float]] = json.load(f)
45
-
46
- # ---- Per-feature transforms used at training (make all 'higher = better') ----
47
- # If during dataset prep you flipped signs on some “bad” metrics, reflect it here.
48
- # This set is the typical choice for microfinance health where larger values are worse:
49
- NEGATE = {
50
- "gastos_oper_over_cart",
51
- "prov_over_cartera",
52
- "cartera_vencida_ratio",
53
- "gastos_oper_over_ing_oper",
54
- "gastos_fin_over_avg_cart",
55
- "improductiva",
56
- "debt_to_equity",
57
- "prov_gasto_over_cart",
58
- # If your training actually negated coverage too (to align “higher=better”),
59
- # include the next line. If not, comment it out.
60
- # "cov_improductiva",
61
- }
62
 
63
  def coerce_float(val: Any) -> float:
64
- """Coerce numbers from strings with either comma or dot decimal and thousands."""
65
- if isinstance(val, (int, float)):
 
 
 
 
 
 
 
66
  return float(val)
 
67
  s = str(val).strip()
68
  if s == "":
69
  raise ValueError("empty")
 
70
  s = s.replace(" ", "")
71
  has_dot = "." in s
72
  has_comma = "," in s
 
73
  if has_dot and has_comma:
74
- # pick last as decimal
75
- if s.rfind(",") > s.rfind("."):
76
- s = s.replace(".", "").replace(",", ".")
 
 
 
77
  else:
 
78
  s = s.replace(",", "")
79
  elif has_comma and not has_dot:
80
  s = s.replace(",", ".")
81
- return float(s)
82
-
83
- def transform_feature(name: str, raw_val: Any) -> float:
84
- v = coerce_float(raw_val)
85
- if name in NEGATE:
86
- return -v
87
- return v
88
 
89
- def zscore(x: float, mean: float, std: float) -> float:
90
- if not std:
91
- return 0.0
92
- return (x - mean) / std
93
 
94
  def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
95
- """(N, K-1) logits -> (N, K) probabilities (CORAL). Enforce monotonicity."""
96
- logits = tf.convert_to_tensor(logits_np, dtype=tf.float32) # (N, K-1)
97
- sig = tf.math.sigmoid(logits)
98
- # enforce monotone increasing cumulative (numerical guard)
99
- sig_sorted = tf.sort(sig, axis=1)
100
- left = tf.concat([tf.ones_like(sig_sorted[:, :1]), sig_sorted], axis=1)
101
- right = tf.concat([sig_sorted, tf.zeros_like(sig_sorted[:, :1])], axis=1)
102
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
103
- # re-normalize (safety)
104
- probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
105
  return probs.numpy()
106
 
107
  # ------------- FastAPI app ----------------
108
- app = FastAPI(title="Static Fingerprint API", version="1.1.0")
109
 
 
110
  app.add_middleware(
111
  CORSMiddleware,
112
  allow_origins=["*"],
@@ -121,14 +117,13 @@ def root():
121
 
122
  @app.get("/health")
123
  def health():
124
- # show the frozen order and which transforms are active
125
  return {
126
  "status": "ok",
127
  "features": FEATURES,
128
- "negated_features": sorted(list(NEGATE)),
129
  "classes": CLASSES,
130
  "model_file": MODEL_PATH,
131
- "stats_file": STATS_PATH,
 
132
  }
133
 
134
  @app.post("/echo")
@@ -138,53 +133,68 @@ async def echo(req: Request):
138
 
139
  @app.post("/predict")
140
  async def predict(req: Request):
141
- payload = await req.json()
142
- if not isinstance(payload, dict):
143
- return {"error": "Expected a JSON object mapping feature -> value."}
 
 
 
 
 
144
 
145
- transformed: Dict[str, float] = {}
146
- z_detail: Dict[str, float] = {}
147
- missing: List[str] = []
148
 
149
- z_row: List[float] = []
 
 
150
  for f in FEATURES:
151
- mean = float(stats[f]["mean"])
152
- std = float(stats[f]["std"])
153
  if f in payload:
154
- tv = transform_feature(f, payload[f]) # apply the same transform as training
 
 
 
 
 
155
  else:
 
156
  missing.append(f)
157
- tv = transform_feature(f, 0.0) # treat missing as 0 before transform
158
- transformed[f] = tv
159
- zf = zscore(tv, mean, std)
160
- z_detail[f] = zf
161
- z_row.append(zf)
162
 
163
- X = np.array([z_row], dtype=np.float32)
164
- raw = model.predict(X, verbose=0)
 
 
 
 
165
 
166
- # Decode: CORAL (K-1) vs softmax (K)
167
  if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
168
- decode_mode = "auto_coral_monotone"
169
  probs = coral_probs_from_logits(raw)[0]
 
 
 
 
 
 
170
  else:
171
- decode_mode = "softmax_or_logits_norm"
172
- probs = raw[0]
173
- s = float(np.sum(probs))
174
- if s > 0:
175
- probs = probs / s
176
 
177
  pred_idx = int(np.argmax(probs))
 
 
 
 
178
  return {
179
  "input_ok": (len(missing) == 0),
180
- "missing": missing,
181
- "transformed": transformed, # post-transform, pre-z (should match training inputs)
182
- "z_scores": z_detail,
183
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
184
  "predicted_state": CLASSES[pred_idx],
185
  "debug": {
186
  "raw_shape": list(raw.shape),
187
  "decode_mode": decode_mode,
188
- "raw_first_row": [float(x) for x in raw[0].tolist()],
189
  },
190
  }
 
 
1
  import os
2
+ import json
3
  from typing import Any, Dict, List
4
 
5
  import numpy as np
6
  import tensorflow as tf
7
  from fastapi import FastAPI, Request
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import JSONResponse
10
+ import joblib
11
 
12
  # ----------------- CONFIG -----------------
13
+ # Use your actual filenames here (from your training export)
14
+ MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5") # or "best_model.h5" if that's what you have
15
+ IMPUTER_PATH = os.getenv("IMPUTER_PATH", "imputer.joblib")
16
+ SCALER_PATH = os.getenv("SCALER_PATH", "scaler.joblib")
17
+
18
+ # Class order per training: 0=Top .. 4=Low
19
  CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
20
+
21
+ # EXACT feature order used during training (from your Section 3.1)
22
  FEATURES: List[str] = [
23
  "autosuf_oper",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "improductiva",
25
+ "gastos_fin_over_avg_cart",
26
+ "_equity",
27
+ "grado_absorcion",
28
+ "_cartera_bruta",
29
+ "gastos_oper_over_ing_oper",
30
+ "cartera_vencida_ratio",
31
  "roe_pre_tax",
32
+ "_assets",
33
  "_liab",
34
+ "equity_over_assets",
35
+ "_margen_bruto",
36
+ "prov_over_cartera",
37
+ "gastos_oper_over_cart",
38
+ "ing_cartera_over_ing_total",
39
+ "debt_to_equity",
40
  "prov_gasto_over_cart",
41
+ "cov_improductiva",
42
+ "rend_cart_over_avg_cart",
43
+ "roa_pre_tax",
44
  ]
45
  # ------------------------------------------
46
 
47
+ print("Loading model / imputer / scaler...")
48
+ # Loss is not needed for inference; compile=False avoids needing custom loss objects.
49
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
50
+ imputer = joblib.load(IMPUTER_PATH) # median imputation from training
51
+ scaler = joblib.load(SCALER_PATH) # StandardScaler from training
52
 
53
+ print("Model loaded. Feature order:", FEATURES)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def coerce_float(val: Any) -> float:
56
+ """
57
+ Robust numeric parse:
58
+ "49.709,14" -> 49709.14
59
+ "49,709.14" -> 49709.14
60
+ "0,005" -> 0.005
61
+ 1.23 -> 1.23
62
+ Raises ValueError on failure.
63
+ """
64
+ if isinstance(val, (int, float, np.integer, np.floating)):
65
  return float(val)
66
+
67
  s = str(val).strip()
68
  if s == "":
69
  raise ValueError("empty")
70
+
71
  s = s.replace(" ", "")
72
  has_dot = "." in s
73
  has_comma = "," in s
74
+
75
  if has_dot and has_comma:
76
+ last_dot = s.rfind(".")
77
+ last_comma = s.rfind(",")
78
+ if last_comma > last_dot:
79
+ # decimal is comma, thousands is dot
80
+ s = s.replace(".", "")
81
+ s = s.replace(",", ".")
82
  else:
83
+ # decimal is dot, thousands is comma
84
  s = s.replace(",", "")
85
  elif has_comma and not has_dot:
86
  s = s.replace(",", ".")
87
+ # else: dots only or pure digits
 
 
 
 
 
 
88
 
89
+ return float(s)
 
 
 
90
 
91
  def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
92
+ """
93
+ Decode CORAL logits (N, K-1) -> probabilities (N, K)
94
+ """
95
+ logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)
96
+ sig = tf.math.sigmoid(logits) # (N, K-1)
97
+ left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
98
+ right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
99
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
 
 
100
  return probs.numpy()
101
 
102
  # ------------- FastAPI app ----------------
103
+ app = FastAPI(title="Static Fingerprint API", version="1.0.0")
104
 
105
+ # Allow Excel / local tools to call the API
106
  app.add_middleware(
107
  CORSMiddleware,
108
  allow_origins=["*"],
 
117
 
118
  @app.get("/health")
119
  def health():
 
120
  return {
121
  "status": "ok",
122
  "features": FEATURES,
 
123
  "classes": CLASSES,
124
  "model_file": MODEL_PATH,
125
+ "imputer_file": IMPUTER_PATH,
126
+ "scaler_file": SCALER_PATH,
127
  }
128
 
129
  @app.post("/echo")
 
133
 
134
  @app.post("/predict")
135
  async def predict(req: Request):
136
+ """
137
+ Body: JSON object mapping feature -> value (raw numbers; median+z will be applied here)
138
+ Missing features are imputed by the training imputer (median).
139
+ """
140
+ try:
141
+ payload = await req.json()
142
+ except Exception as e:
143
+ return JSONResponse(status_code=400, content={"error": f"Invalid JSON: {e}"})
144
 
145
+ if not isinstance(payload, dict):
146
+ return JSONResponse(status_code=400, content={"error": "Expected a JSON object mapping feature -> value."})
 
147
 
148
+ # Build raw vector in EXACT training order; use np.nan for missing so imputer handles it
149
+ x_raw = []
150
+ missing = []
151
  for f in FEATURES:
 
 
152
  if f in payload:
153
+ try:
154
+ x_raw.append(coerce_float(payload[f]))
155
+ except Exception:
156
+ # treat unparsable as missing -> np.nan (imputer will fill)
157
+ x_raw.append(np.nan)
158
+ missing.append(f)
159
  else:
160
+ x_raw.append(np.nan)
161
  missing.append(f)
 
 
 
 
 
162
 
163
+ X_raw = np.array([x_raw], dtype=np.float64) # (1, 21)
164
+ X_imp = imputer.transform(X_raw) # median imputation
165
+ X_std = scaler.transform(X_imp).astype(np.float32) # z-scores as per training
166
+
167
+ # Predict
168
+ raw = model.predict(X_std, verbose=0)
169
 
170
+ # CORAL vs softmax detection (your model is CORAL with 4 logits)
171
  if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
 
172
  probs = coral_probs_from_logits(raw)[0]
173
+ decode_mode = "coral"
174
+ elif raw.ndim == 2 and raw.shape[1] == len(CLASSES):
175
+ p = raw[0]
176
+ s = float(np.sum(p))
177
+ probs = (p / s) if s > 0 else p
178
+ decode_mode = "softmax"
179
  else:
180
+ # Fallback: try CORAL first
181
+ probs = coral_probs_from_logits(raw)[0]
182
+ decode_mode = "auto"
 
 
183
 
184
  pred_idx = int(np.argmax(probs))
185
+
186
+ # Build z-score dict for transparency
187
+ z_detail = {FEATURES[i]: float(X_std[0, i]) for i in range(len(FEATURES))}
188
+
189
  return {
190
  "input_ok": (len(missing) == 0),
191
+ "missing": missing, # features that were np.nan (imputed)
192
+ "z_scores": z_detail, # exactly what the model saw
 
193
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
194
  "predicted_state": CLASSES[pred_idx],
195
  "debug": {
196
  "raw_shape": list(raw.shape),
197
  "decode_mode": decode_mode,
198
+ "first_row_logits": [float(v) for v in (raw[0].tolist() if raw.ndim == 2 else np.atleast_1d(raw).tolist())],
199
  },
200
  }