COCODEDE04 commited on
Commit
d264e59
·
verified ·
1 Parent(s): 73d7334

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -96
app.py CHANGED
@@ -1,25 +1,33 @@
 
1
  import os
2
  import json
3
- from typing import Any, Dict, List
 
4
 
5
  import numpy as np
6
  import tensorflow as tf
7
  from fastapi import FastAPI, Request
8
  from fastapi.middleware.cors import CORSMiddleware
9
- from fastapi.responses import JSONResponse
10
- import joblib
11
 
12
- # ----------------- CONFIG -----------------
13
- # Use your actual filenames here (from your training export)
14
- MODEL_PATH = os.getenv("MODEL_PATH", "best_model.h5") # or "best_model.h5" if that's what you have
15
- IMPUTER_PATH = os.getenv("IMPUTER_PATH", "imputer.joblib")
16
- SCALER_PATH = os.getenv("SCALER_PATH", "scaler.joblib")
17
 
18
- # Class order per training: 0=Top .. 4=Low
19
- CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"]
 
 
 
 
 
20
 
21
- # EXACT feature order used during training (from your Section 3.1)
22
- FEATURES: List[str] = [
 
 
 
 
 
 
23
  "autosuf_oper",
24
  "improductiva",
25
  "gastos_fin_over_avg_cart",
@@ -42,67 +50,127 @@ FEATURES: List[str] = [
42
  "rend_cart_over_avg_cart",
43
  "roa_pre_tax",
44
  ]
45
- # ------------------------------------------
46
 
 
 
 
 
 
 
 
47
  print("Loading model / imputer / scaler...")
48
- # Loss is not needed for inference; compile=False avoids needing custom loss objects.
49
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
50
- imputer = joblib.load(IMPUTER_PATH) # median imputation from training
51
- scaler = joblib.load(SCALER_PATH) # StandardScaler from training
52
-
53
- print("Model loaded. Feature order:", FEATURES)
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def coerce_float(val: Any) -> float:
56
  """
57
- Robust numeric parse:
58
- "49.709,14" -> 49709.14
59
- "49,709.14" -> 49709.14
60
- "0,005" -> 0.005
61
- 1.23 -> 1.23
62
- Raises ValueError on failure.
63
  """
64
- if isinstance(val, (int, float, np.integer, np.floating)):
65
  return float(val)
66
-
67
  s = str(val).strip()
68
  if s == "":
69
  raise ValueError("empty")
70
-
71
  s = s.replace(" ", "")
72
- has_dot = "." in s
73
- has_comma = "," in s
74
-
75
  if has_dot and has_comma:
76
- last_dot = s.rfind(".")
77
- last_comma = s.rfind(",")
78
- if last_comma > last_dot:
79
- # decimal is comma, thousands is dot
80
  s = s.replace(".", "")
81
  s = s.replace(",", ".")
82
  else:
83
- # decimal is dot, thousands is comma
84
  s = s.replace(",", "")
85
  elif has_comma and not has_dot:
86
  s = s.replace(",", ".")
87
- # else: dots only or pure digits
88
-
89
  return float(s)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
92
  """
93
- Decode CORAL logits (N, K-1) -> probabilities (N, K)
 
94
  """
95
- logits = tf.convert_to_tensor(logits_np, dtype=tf.float32)
96
- sig = tf.math.sigmoid(logits) # (N, K-1)
97
- left = tf.concat([tf.ones_like(sig[:, :1]), sig], axis=1)
98
- right = tf.concat([sig, tf.zeros_like(sig[:, :1])], axis=1)
 
 
 
99
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
 
 
100
  return probs.numpy()
101
 
102
- # ------------- FastAPI app ----------------
103
  app = FastAPI(title="Static Fingerprint API", version="1.0.0")
104
 
105
- # Allow Excel / local tools to call the API
106
  app.add_middleware(
107
  CORSMiddleware,
108
  allow_origins=["*"],
@@ -113,17 +181,26 @@ app.add_middleware(
113
 
114
  @app.get("/")
115
  def root():
116
- return {"message": "Static Fingerprint API is running.", "try": ["GET /health", "POST /predict"]}
 
 
 
117
 
118
  @app.get("/health")
119
  def health():
120
  return {
121
  "status": "ok",
122
- "features": FEATURES,
123
  "classes": CLASSES,
124
- "model_file": MODEL_PATH,
125
- "imputer_file": IMPUTER_PATH,
126
- "scaler_file": SCALER_PATH,
 
 
 
 
 
 
 
127
  }
128
 
129
  @app.post("/echo")
@@ -134,67 +211,43 @@ async def echo(req: Request):
134
  @app.post("/predict")
135
  async def predict(req: Request):
136
  """
137
- Body: JSON object mapping feature -> value (raw numbers; median+z will be applied here)
138
- Missing features are imputed by the training imputer (median).
 
 
 
 
139
  """
140
- try:
141
- payload = await req.json()
142
- except Exception as e:
143
- return JSONResponse(status_code=400, content={"error": f"Invalid JSON: {e}"})
144
-
145
  if not isinstance(payload, dict):
146
- return JSONResponse(status_code=400, content={"error": "Expected a JSON object mapping feature -> value."})
147
-
148
- # Build raw vector in EXACT training order; use np.nan for missing so imputer handles it
149
- x_raw = []
150
- missing = []
151
- for f in FEATURES:
152
- if f in payload:
153
- try:
154
- x_raw.append(coerce_float(payload[f]))
155
- except Exception:
156
- # treat unparsable as missing -> np.nan (imputer will fill)
157
- x_raw.append(np.nan)
158
- missing.append(f)
159
- else:
160
- x_raw.append(np.nan)
161
- missing.append(f)
162
 
163
- X_raw = np.array([x_raw], dtype=np.float64) # (1, 21)
164
- X_imp = imputer.transform(X_raw) # median imputation
165
- X_std = scaler.transform(X_imp).astype(np.float32) # z-scores as per training
166
 
167
- # Predict
168
- raw = model.predict(X_std, verbose=0)
169
 
170
- # CORAL vs softmax detection (your model is CORAL with 4 logits)
 
171
  if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
172
  probs = coral_probs_from_logits(raw)[0]
173
- decode_mode = "coral"
174
- elif raw.ndim == 2 and raw.shape[1] == len(CLASSES):
175
- p = raw[0]
176
- s = float(np.sum(p))
177
- probs = (p / s) if s > 0 else p
178
- decode_mode = "softmax"
179
  else:
180
- # Fallback: try CORAL first
181
- probs = coral_probs_from_logits(raw)[0]
182
- decode_mode = "auto"
 
183
 
184
  pred_idx = int(np.argmax(probs))
185
-
186
- # Build z-score dict for transparency
187
- z_detail = {FEATURES[i]: float(X_std[0, i]) for i in range(len(FEATURES))}
188
-
189
- return {
190
  "input_ok": (len(missing) == 0),
191
- "missing": missing, # features that were np.nan (imputed)
192
- "z_scores": z_detail, # exactly what the model saw
193
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
194
  "predicted_state": CLASSES[pred_idx],
195
  "debug": {
196
  "raw_shape": list(raw.shape),
197
  "decode_mode": decode_mode,
198
- "first_row_logits": [float(v) for v in (raw[0].tolist() if raw.ndim == 2 else np.atleast_1d(raw).tolist())],
199
  },
200
- }
 
 
1
+ # app.py
2
  import os
3
  import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Tuple
6
 
7
  import numpy as np
8
  import tensorflow as tf
9
  from fastapi import FastAPI, Request
10
  from fastapi.middleware.cors import CORSMiddleware
 
 
11
 
12
+ # ----------------- PATHS & CONFIG -----------------
13
+ BASE_DIR = Path(__file__).resolve().parent
 
 
 
14
 
15
+ # Prefer env vars, fall back to files next to app.py
16
+ MODEL_PATH = os.getenv("MODEL_PATH") or str(BASE_DIR / "best_model.keras")
17
+ if not Path(MODEL_PATH).exists():
18
+ # fallback to .h5 if .keras not present
19
+ alt = BASE_DIR / "best_model.h5"
20
+ if alt.exists():
21
+ MODEL_PATH = str(alt)
22
 
23
+ STATS_PATH = os.getenv("STATS_PATH") or str(BASE_DIR / "means_std.json")
24
+ IMPUTER_PATH = os.getenv("IMPUTER_PATH") or str(BASE_DIR / "imputer.joblib")
25
+ SCALER_PATH = os.getenv("SCALER_PATH") or str(BASE_DIR / "scaler.joblib")
26
+
27
+ CLASSES = ["Top", "Mid-Top", "Mid", "Mid-Low", "Low"] # ordinal: 0..4
28
+
29
+ # IMPORTANT — exact feature order used during training
30
+ FEATURE_ORDER: List[str] = [
31
  "autosuf_oper",
32
  "improductiva",
33
  "gastos_fin_over_avg_cart",
 
50
  "rend_cart_over_avg_cart",
51
  "roa_pre_tax",
52
  ]
 
53
 
54
+ print("Resolved paths:")
55
+ print(" MODEL_PATH :", MODEL_PATH)
56
+ print(" STATS_PATH :", STATS_PATH)
57
+ print(" IMPUTER_PATH:", IMPUTER_PATH)
58
+ print(" SCALER_PATH :", SCALER_PATH)
59
+
60
+ # ----------------- LOAD ARTIFACTS -----------------
61
  print("Loading model / imputer / scaler...")
62
+ # If the model used custom losses/metrics you’d pass custom_objects here.
63
  model = tf.keras.models.load_model(MODEL_PATH, compile=False)
 
 
 
 
64
 
65
+ # Optional: imputer & scaler from training pipeline
66
+ imputer = None
67
+ scaler = None
68
+ try:
69
+ import joblib # in requirements
70
+ if Path(IMPUTER_PATH).exists():
71
+ imputer = joblib.load(IMPUTER_PATH)
72
+ print("Loaded imputer:", IMPUTER_PATH)
73
+ if Path(SCALER_PATH).exists():
74
+ scaler = joblib.load(SCALER_PATH)
75
+ print("Loaded scaler :", SCALER_PATH)
76
+ except Exception as e:
77
+ print("Imputer/scaler not loaded:", e)
78
+
79
+ # Optional: stats fallback for manual z-scoring
80
+ stats: Dict[str, Dict[str, float]] = {}
81
+ if Path(STATS_PATH).exists():
82
+ with open(STATS_PATH, "r") as f:
83
+ stats = json.load(f)
84
+ print("Loaded means/std from:", STATS_PATH)
85
+
86
+ # ----------------- HELPERS -----------------
87
  def coerce_float(val: Any) -> float:
88
  """
89
+ Accepts numeric or strings like:
90
+ '49.709,14' -> 49709.14 ; '49,709.14' -> 49709.14 ; '0,005' -> 0.005
 
 
 
 
91
  """
92
+ if isinstance(val, (int, float, np.number)):
93
  return float(val)
 
94
  s = str(val).strip()
95
  if s == "":
96
  raise ValueError("empty")
 
97
  s = s.replace(" ", "")
98
+ has_dot, has_comma = "." in s, "," in s
 
 
99
  if has_dot and has_comma:
100
+ if s.rfind(",") > s.rfind("."):
 
 
 
101
  s = s.replace(".", "")
102
  s = s.replace(",", ".")
103
  else:
 
104
  s = s.replace(",", "")
105
  elif has_comma and not has_dot:
106
  s = s.replace(",", ".")
 
 
107
  return float(s)
108
 
109
+ def build_matrix_from_payload(payload: Dict[str, Any]) -> Tuple[np.ndarray, Dict[str, float], List[str]]:
110
+ """
111
+ Returns:
112
+ X (1, 21) ready for model (imputed+scaled if artifacts exist; else z-scored via stats),
113
+ z_detail (dict feature -> standardized value used),
114
+ missing list (features not present in payload)
115
+ """
116
+ raw = []
117
+ missing: List[str] = []
118
+ for f in FEATURE_ORDER:
119
+ if f in payload:
120
+ try:
121
+ raw.append(coerce_float(payload[f]))
122
+ except Exception:
123
+ raw.append(np.nan)
124
+ else:
125
+ raw.append(np.nan)
126
+ missing.append(f)
127
+
128
+ arr = np.array([raw], dtype=np.float32) # shape (1, 21)
129
+
130
+ # primary path: use imputer + scaler if both available
131
+ if imputer is not None and scaler is not None:
132
+ arr_imp = imputer.transform(arr) # median impute
133
+ arr_std = scaler.transform(arr_imp) # z-score to training distribution
134
+ z_row = arr_std[0].tolist()
135
+ z_detail = {f: float(z_row[i]) for i, f in enumerate(FEATURE_ORDER)}
136
+ return arr_std.astype(np.float32), z_detail, missing
137
+
138
+ # fallback path: manual z-score using means_std.json
139
+ z_vals = []
140
+ z_detail = {}
141
+ for i, f in enumerate(FEATURE_ORDER):
142
+ v = arr[0, i]
143
+ if f in stats and "mean" in stats[f] and "std" in stats[f] and stats[f]["std"]:
144
+ mean = float(stats[f]["mean"])
145
+ std = float(stats[f]["std"])
146
+ vv = 0.0 if np.isnan(v) else float(v)
147
+ z = (vv - mean) / std
148
+ else:
149
+ z = 0.0 # safest fallback
150
+ z_vals.append(z)
151
+ z_detail[f] = float(z)
152
+ return np.array([z_vals], dtype=np.float32), z_detail, missing
153
+
154
  def coral_probs_from_logits(logits_np: np.ndarray) -> np.ndarray:
155
  """
156
+ CORAL decode: (N, K-1) logits -> (N, K) probs.
157
+ Adds a small monotonicity fix (non-increasing thresholds).
158
  """
159
+ logits = tf.convert_to_tensor(logits_np, dtype=tf.float32) # (N, K-1)
160
+ sig = tf.math.sigmoid(logits) # p(y>k)
161
+ # Enforce non-increasing along thresholds (numerical guard)
162
+ sig = tf.clip_by_value(sig, 1e-12, 1.0 - 1e-12)
163
+ sig_sorted = tf.minimum(sig, tf.math.cummin(sig, axis=1, exclusive=False))
164
+ left = tf.concat([tf.ones_like(sig_sorted[:, :1]), sig_sorted], axis=1)
165
+ right = tf.concat([sig_sorted, tf.zeros_like(sig_sorted[:, :1])], axis=1)
166
  probs = tf.clip_by_value(left - right, 1e-12, 1.0)
167
+ # Normalize row just in case
168
+ probs = probs / tf.reduce_sum(probs, axis=1, keepdims=True)
169
  return probs.numpy()
170
 
171
+ # ----------------- FASTAPI -----------------
172
  app = FastAPI(title="Static Fingerprint API", version="1.0.0")
173
 
 
174
  app.add_middleware(
175
  CORSMiddleware,
176
  allow_origins=["*"],
 
181
 
182
  @app.get("/")
183
  def root():
184
+ return {
185
+ "message": "Static Fingerprint API is running.",
186
+ "try": ["GET /health", "POST /predict"],
187
+ }
188
 
189
  @app.get("/health")
190
  def health():
191
  return {
192
  "status": "ok",
 
193
  "classes": CLASSES,
194
+ "feature_order": FEATURE_ORDER,
195
+ "paths": {
196
+ "model": MODEL_PATH,
197
+ "stats": STATS_PATH if Path(STATS_PATH).exists() else None,
198
+ "imputer": IMPUTER_PATH if Path(IMPUTER_PATH).exists() else None,
199
+ "scaler": SCALER_PATH if Path(SCALER_PATH).exists() else None,
200
+ "base_dir_files": [p.name for p in BASE_DIR.iterdir()],
201
+ },
202
+ "has_imputer": imputer is not None,
203
+ "has_scaler": scaler is not None,
204
  }
205
 
206
  @app.post("/echo")
 
211
  @app.post("/predict")
212
  async def predict(req: Request):
213
  """
214
+ Body: JSON dict mapping feature -> value (raw numbers). Example:
215
+ {
216
+ "autosuf_oper": 1.0,
217
+ "cov_improductiva": 0.9,
218
+ ...
219
+ }
220
  """
221
+ payload = await req.json()
 
 
 
 
222
  if not isinstance(payload, dict):
223
+ return {"error": "Expected a JSON object mapping feature -> value."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
+ X, z_detail, missing = build_matrix_from_payload(payload) # shape (1, 21)
 
 
226
 
227
+ raw = model.predict(X, verbose=0)
 
228
 
229
+ # Auto-detect output head: CORAL (K-1) or softmax (K)
230
+ decode_mode = "auto_coral"
231
  if raw.ndim == 2 and raw.shape[1] == (len(CLASSES) - 1):
232
  probs = coral_probs_from_logits(raw)[0]
233
+ decode_mode = "auto_coral_monotone"
 
 
 
 
 
234
  else:
235
+ # assume logits for K classes
236
+ logits = tf.convert_to_tensor(raw, dtype=tf.float32)
237
+ probs = tf.nn.softmax(logits, axis=1).numpy()[0]
238
+ decode_mode = "softmax"
239
 
240
  pred_idx = int(np.argmax(probs))
241
+ out = {
 
 
 
 
242
  "input_ok": (len(missing) == 0),
243
+ "missing": missing,
244
+ "z_scores": z_detail,
245
  "probabilities": {CLASSES[i]: float(probs[i]) for i in range(len(CLASSES))},
246
  "predicted_state": CLASSES[pred_idx],
247
  "debug": {
248
  "raw_shape": list(raw.shape),
249
  "decode_mode": decode_mode,
250
+ "raw_first_row": [float(x) for x in raw[0].tolist()],
251
  },
252
+ }
253
+ return out