COCODEDE04 commited on
Commit
6363de7
·
verified ·
1 Parent(s): acbe7ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -143
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  import os
3
- from typing import Any, Dict
4
 
5
  import numpy as np
6
  import tensorflow as tf
@@ -11,120 +11,102 @@ from fastapi.middleware.cors import CORSMiddleware
11
  MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
12
  STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
13
  CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # ------------------------------------------
15
 
16
- # Debug & decoding control
17
- FORCE_CORAL = os.getenv("FORCE_CORAL", "0") in ("1", "true", "True", "YES", "yes")
18
- RETURN_DEBUG = os.getenv("RETURN_DEBUG", "1") in ("1", "true", "True", "YES", "yes")
19
-
20
  print("Loading model and stats...")
21
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
22
 
23
  with open(STATS_PATH, "r") as f:
24
  stats: Dict[str, Dict[str, float]] = json.load(f)
25
 
26
- # IMPORTANT: FEATURES order must match training!
27
- FEATURES = list(stats.keys())
28
- print("Feature order:", FEATURES)
29
-
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- # ---------- robust numeric coercion ----------
32
  def coerce_float(val: Any) -> float:
33
- """
34
- Accepts numeric, or strings like:
35
- "49.709,14" -> 49709.14
36
- "49,709.14" -> 49709.14
37
- "0,005" -> 0.005
38
- " 1 234 " -> 1234
39
- Returns float, or raises ValueError if impossible.
40
- """
41
  if isinstance(val, (int, float)):
42
  return float(val)
43
-
44
  s = str(val).strip()
45
  if s == "":
46
  raise ValueError("empty")
47
-
48
  s = s.replace(" ", "")
49
  has_dot = "." in s
50
  has_comma = "," in s
51
-
52
  if has_dot and has_comma:
53
- last_dot = s.rfind(".")
54
- last_comma = s.rfind(",")
55
- if last_comma > last_dot:
56
- s = s.replace(".", "")
57
- s = s.replace(",", ".")
58
  else:
59
  s = s.replace(",", "")
60
  elif has_comma and not has_dot:
61
  s = s.replace(",", ".")
62
- # dots only or digits -> leave
63
  return float(s)
64
 
 
 
 
 
 
65
 
66
- def _z(val: Any, mean: float, sd: float) -> float:
67
- try:
68
- v = coerce_float(val)
69
- except Exception:
70
- return 0.0
71
- if not sd:
72
  return 0.0
73
- return (v - mean) / sd
74
-
75
-
76
- # ---------- CORAL utilities ----------
77
- def enforce_nonincreasing(sig_vec: np.ndarray) -> np.ndarray:
78
- """
79
- Given a 1D array of cumulative probs s (should be non-increasing for CORAL),
80
- enforce s[0] >= s[1] >= ... >= s[K-1] using a simple PAV algorithm.
81
- """
82
- s = sig_vec.astype(float).copy()
83
- n = len(s)
84
- blocks = [[i] for i in range(n)]
85
- vals = s.tolist()
86
-
87
- i = 0
88
- while i < len(vals) - 1:
89
- if vals[i] < vals[i + 1]: # violation: should be non-increasing
90
- merged_idx = blocks[i] + blocks[i + 1]
91
- avg = (
92
- (vals[i] * len(blocks[i]) + vals[i + 1] * len(blocks[i + 1]))
93
- / (len(blocks[i]) + len(blocks[i + 1]))
94
- )
95
- blocks[i] = merged_idx
96
- vals[i] = avg
97
- del blocks[i + 1]
98
- del vals[i + 1]
99
- if i > 0:
100
- i -= 1
101
- else:
102
- i += 1
103
-
104
- out = np.zeros(n, dtype=float)
105
- for v, idxs in zip(vals, blocks):
106
- for j in idxs:
107
- out[j] = v
108
- return np.clip(out, 1e-12, 1 - 1e-12)
109
-
110
-
111
- def coral_probs_from_logits_monotone(logits_np: np.ndarray) -> np.ndarray:
112
- """
113
- CORAL decoding with monotonicity enforcement so class probs are valid (sum=1, nonnegative).
114
- """
115
- sig = 1.0 / (1.0 + np.exp(-logits_np)) # sigmoid
116
- sig_m = enforce_nonincreasing(sig[0]) # enforce order
117
- left = np.concatenate([np.array([1.0], dtype=float), sig_m])
118
- right = np.concatenate([sig_m, np.array([0.0], dtype=float)])
119
- probs = np.clip(left - right, 1e-12, 1.0)
120
- probs = probs / probs.sum() # normalize
121
- return probs
122
-
123
 
124
  # ------------- FastAPI app ----------------
125
- app = FastAPI(title="Static Fingerprint API", version="1.0.0")
126
 
127
- # Allow Excel / local tools to call the API
128
  app.add_middleware(
129
  CORSMiddleware,
130
  allow_origins=["*"],
@@ -133,104 +115,76 @@ app.add_middleware(
133
  allow_headers=["*"],
134
  )
135
 
136
-
137
  @app.get("/")
138
  def root():
139
- return {
140
- "message": "Static Fingerprint API is running.",
141
- "try": ["GET /health", "POST /predict"],
142
- }
143
-
144
 
145
  @app.get("/health")
146
  def health():
 
147
  return {
148
  "status": "ok",
149
  "features": FEATURES,
 
150
  "classes": CLASSES,
151
  "model_file": MODEL_PATH,
152
  "stats_file": STATS_PATH,
153
  }
154
 
155
-
156
  @app.post("/echo")
157
  async def echo(req: Request):
158
  payload = await req.json()
159
  return {"received": payload}
160
 
161
-
162
  @app.post("/predict")
163
  async def predict(req: Request):
164
- """
165
- Body: a single JSON dict mapping feature -> numeric value.
166
- """
167
  payload = await req.json()
168
  if not isinstance(payload, dict):
169
  return {"error": "Expected a JSON object mapping feature -> value."}
170
 
171
- # --- Build z-scores in strict model order ---
172
- z = []
173
- z_detail = {}
174
- missing = []
 
175
  for f in FEATURES:
176
- mean = stats[f]["mean"]
177
- sd = stats[f]["std"]
178
  if f in payload:
179
- zf = _z(payload[f], mean, sd)
180
  else:
181
  missing.append(f)
182
- zf = _z(0.0, mean, sd)
183
- z.append(zf)
 
184
  z_detail[f] = zf
 
185
 
186
- X = np.array([z], dtype=np.float32)
187
  raw = model.predict(X, verbose=0)
188
- raw_shape = tuple(raw.shape)
189
-
190
- # --- Decode ---
191
- probs = None
192
- decode_mode = "auto"
193
- try:
194
- if FORCE_CORAL:
195
- decode_mode = "forced_coral_monotone"
196
- probs = coral_probs_from_logits_monotone(raw)
197
- else:
198
- if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
199
- decode_mode = "auto_coral_monotone"
200
- probs = coral_probs_from_logits_monotone(raw)
201
- else:
202
- decode_mode = "auto_softmax_or_logits"
203
- probs = raw[0]
204
- s = float(np.sum(probs))
205
- if s > 0:
206
- probs = probs / s
207
- except Exception:
208
- decode_mode = "fallback_raw_norm"
209
  probs = raw[0]
210
  s = float(np.sum(probs))
211
  if s > 0:
212
  probs = probs / s
213
 
214
  pred_idx = int(np.argmax(probs))
215
-
216
- # --- Response ---
217
- resp = {
218
  "input_ok": (len(missing) == 0),
219
  "missing": missing,
 
220
  "z_scores": z_detail,
221
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
222
  "predicted_state": CLASSES[pred_idx],
223
- }
224
-
225
- # --- Debug block ---
226
- if RETURN_DEBUG:
227
- resp["debug"] = {
228
- "raw_shape": raw_shape,
229
  "decode_mode": decode_mode,
230
- "raw_first_row": [
231
- float(x)
232
- for x in (raw[0].tolist() if raw.ndim >= 2 else [float(raw)])
233
- ],
234
- }
235
-
236
- return resp
 
1
  import json
2
  import os
3
+ from typing import Any, Dict, List
4
 
5
  import numpy as np
6
  import tensorflow as tf
 
11
  MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5")
12
  STATS_PATH = os.getenv("STATS_PATH", "means_std.json")
13
  CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
14
+ # IMPORTANT: Freeze the exact training order of features:
15
+ FEATURES: List[str] = [
16
+ "autosuf_oper",
17
+ "cov_improductiva",
18
+ "ing_cartera_over_ing_total",
19
+ "gastos_oper_over_cart",
20
+ "prov_over_cartera",
21
+ "_margen_bruto",
22
+ "equity_over_assets",
23
+ "rend_cart_over_avg_cart",
24
+ "_assets",
25
+ "roa_pre_tax",
26
+ "cartera_vencida_ratio",
27
+ "gastos_oper_over_ing_oper",
28
+ "_cartera_bruta",
29
+ "grado_absorcion",
30
+ "_equity",
31
+ "gastos_fin_over_avg_cart",
32
+ "improductiva",
33
+ "roe_pre_tax",
34
+ "debt_to_equity",
35
+ "_liab",
36
+ "prov_gasto_over_cart",
37
+ ]
38
  # ------------------------------------------
39
 
 
 
 
 
40
  print("Loading model and stats...")
41
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
42
 
43
  with open(STATS_PATH, "r") as f:
44
  stats: Dict[str, Dict[str, float]] = json.load(f)
45
 
46
+ # ---- Per-feature transforms used at training (make all 'higher = better') ----
47
+ # If during dataset prep you flipped signs on some “bad” metrics, reflect it here.
48
+ # This set is the typical choice for microfinance health where larger values are worse:
49
+ NEGATE = {
50
+ "gastos_oper_over_cart",
51
+ "prov_over_cartera",
52
+ "cartera_vencida_ratio",
53
+ "gastos_oper_over_ing_oper",
54
+ "gastos_fin_over_avg_cart",
55
+ "improductiva",
56
+ "debt_to_equity",
57
+ "prov_gasto_over_cart",
58
+ # If your training actually negated coverage too (to align “higher=better”),
59
+ # include the next line. If not, comment it out.
60
+ # "cov_improductiva",
61
+ }
62
 
 
63
  def coerce_float(val: Any) -> float:
64
+ """Coerce numbers from strings with either comma or dot decimal and thousands."""
 
 
 
 
 
 
 
65
  if isinstance(val, (int, float)):
66
  return float(val)
 
67
  s = str(val).strip()
68
  if s == "":
69
  raise ValueError("empty")
 
70
  s = s.replace(" ", "")
71
  has_dot = "." in s
72
  has_comma = "," in s
 
73
  if has_dot and has_comma:
74
+ # pick last as decimal
75
+ if s.rfind(",") > s.rfind("."):
76
+ s = s.replace(".", "").replace(",", ".")
 
 
77
  else:
78
  s = s.replace(",", "")
79
  elif has_comma and not has_dot:
80
  s = s.replace(",", ".")
 
81
  return float(s)
82
 
83
+ def transform_feature(name: str, raw_val: Any) -> float:
84
+ v = coerce_float(raw_val)
85
+ if name in NEGATE:
86
+ return -v
87
+ return v
88
 
89
+ def zscore(x: float, mean: float, std: float) -> float:
90
+ if not std:
 
 
 
 
91
  return 0.0
92
+ return (x - mean) / std
93
+
94
+ def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
95
+ """(N, K-1) logits -> (N, K) probabilities (CORAL). Enforce monotonicity."""
96
+ logits = tf.convert_to_tensor(logits_np, dtype=tf.float32) # (N, K-1)
97
+ sig = tf.math.sigmoid(logits)
98
+ # enforce monotone increasing cumulative (numerical guard)
99
+ sig_sorted = tf.sort(sig, axis=1)
100
+ left = tf.concat([tf.ones_like(sig_sorted[:, :1]), sig_sorted], axis=1)
101
+ right = tf.concat([sig_sorted, tf.zeros_like(sig_sorted[:, :1])], axis=1)
102
+ probs = tf.clip_by_value(left - right, 1e-12, 1.0)
103
+ # re-normalize (safety)
104
+ probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
105
+ return probs.numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  # ------------- FastAPI app ----------------
108
+ app = FastAPI(title="Static Fingerprint API", version="1.1.0")
109
 
 
110
  app.add_middleware(
111
  CORSMiddleware,
112
  allow_origins=["*"],
 
115
  allow_headers=["*"],
116
  )
117
 
 
118
  @app.get("/")
119
  def root():
120
+ return {"message": "Static Fingerprint API is running.", "try": ["GET /health", "POST /predict"]}
 
 
 
 
121
 
122
  @app.get("/health")
123
  def health():
124
+ # show the frozen order and which transforms are active
125
  return {
126
  "status": "ok",
127
  "features": FEATURES,
128
+ "negated_features": sorted(list(NEGATE)),
129
  "classes": CLASSES,
130
  "model_file": MODEL_PATH,
131
  "stats_file": STATS_PATH,
132
  }
133
 
 
134
  @app.post("/echo")
135
  async def echo(req: Request):
136
  payload = await req.json()
137
  return {"received": payload}
138
 
 
139
  @app.post("/predict")
140
  async def predict(req: Request):
 
 
 
141
  payload = await req.json()
142
  if not isinstance(payload, dict):
143
  return {"error": "Expected a JSON object mapping feature -> value."}
144
 
145
+ transformed: Dict[str, float] = {}
146
+ z_detail: Dict[str, float] = {}
147
+ missing: List[str] = []
148
+
149
+ z_row: List[float] = []
150
  for f in FEATURES:
151
+ mean = float(stats[f]["mean"])
152
+ std = float(stats[f]["std"])
153
  if f in payload:
154
+ tv = transform_feature(f, payload[f]) # apply the same transform as training
155
  else:
156
  missing.append(f)
157
+ tv = transform_feature(f, 0.0) # treat missing as 0 before transform
158
+ transformed[f] = tv
159
+ zf = zscore(tv, mean, std)
160
  z_detail[f] = zf
161
+ z_row.append(zf)
162
 
163
+ X = np.array([z_row], dtype=np.float32)
164
  raw = model.predict(X, verbose=0)
165
+
166
+ # Decode: CORAL (K-1) vs softmax (K)
167
+ if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
168
+ decode_mode = "auto_coral_monotone"
169
+ probs = coral_probs_from_logits(raw)[0]
170
+ else:
171
+ decode_mode = "softmax_or_logits_norm"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  probs = raw[0]
173
  s = float(np.sum(probs))
174
  if s > 0:
175
  probs = probs / s
176
 
177
  pred_idx = int(np.argmax(probs))
178
+ return {
 
 
179
  "input_ok": (len(missing) == 0),
180
  "missing": missing,
181
+ "transformed": transformed, # post-transform, pre-z (should match training inputs)
182
  "z_scores": z_detail,
183
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
184
  "predicted_state": CLASSES[pred_idx],
185
+ "debug": {
186
+ "raw_shape": list(raw.shape),
 
 
 
 
187
  "decode_mode": decode_mode,
188
+ "raw_first_row": [float(x) for x in raw[0].tolist()],
189
+ },
190
+ }