Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
-
# -
|
| 4 |
-
# -
|
| 5 |
# - RAG page-extraction regex fixed for [[PAGE=...]]
|
| 6 |
# ================================================================
|
| 7 |
|
|
@@ -106,7 +106,14 @@ CATEGORICAL_COLS = {
|
|
| 106 |
"Current Type"
|
| 107 |
}
|
| 108 |
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
REQUIRED_FIELDS = {
|
| 111 |
"Filler 1 Type",
|
| 112 |
"Filler 1 Diameter (µm)",
|
|
@@ -155,21 +162,18 @@ def _canon_cat(v: Any) -> str:
|
|
| 155 |
return CANON_NA
|
| 156 |
return s
|
| 157 |
|
| 158 |
-
def _to_float_or_nan(v):
|
| 159 |
-
if v in ("", None):
|
| 160 |
-
return np.nan
|
| 161 |
-
try:
|
| 162 |
-
# allow "1,234.5" by stripping commas
|
| 163 |
-
return float(str(v).replace(",", ""))
|
| 164 |
-
except Exception:
|
| 165 |
-
return np.nan
|
| 166 |
-
|
| 167 |
def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
| 168 |
row = {}
|
| 169 |
for col in MAIN_VARIABLES:
|
| 170 |
v = form_dict.get(col, None)
|
| 171 |
if col in NUMERIC_COLS:
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
elif col in CATEGORICAL_COLS:
|
| 174 |
row[col] = _canon_cat(v)
|
| 175 |
else:
|
|
@@ -177,12 +181,33 @@ def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
|
| 177 |
row[col] = s if s else CANON_NA
|
| 178 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
def _align_columns_to_model(df: pd.DataFrame, mdl) -> pd.DataFrame:
|
| 181 |
"""Align incoming dataframe columns to the model's expected feature order."""
|
| 182 |
try:
|
| 183 |
feat = getattr(mdl, "feature_names_in_", None)
|
| 184 |
if feat is not None and len(feat) > 0:
|
| 185 |
-
# add any missing columns as NaN, keep extras (model will ignore via transformer)
|
| 186 |
for c in feat:
|
| 187 |
if c not in df.columns:
|
| 188 |
df[c] = np.nan
|
|
@@ -193,13 +218,9 @@ def _align_columns_to_model(df: pd.DataFrame, mdl) -> pd.DataFrame:
|
|
| 193 |
return df
|
| 194 |
|
| 195 |
def predict_fn(**kwargs):
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
- Missing numerics -> NaN
|
| 200 |
-
- Categoricals -> canonical 'NA'
|
| 201 |
-
- If model missing or inference error -> return 0.0 (keeps UI stable)
|
| 202 |
-
"""
|
| 203 |
mdl = _load_model_or_error()
|
| 204 |
if mdl is None:
|
| 205 |
return 0.0
|
|
@@ -212,7 +233,9 @@ def predict_fn(**kwargs):
|
|
| 212 |
else:
|
| 213 |
y = y_raw
|
| 214 |
y = float(np.asarray(y).ravel()[0])
|
| 215 |
-
|
|
|
|
|
|
|
| 216 |
except Exception as e:
|
| 217 |
print(f"[Predict] {e}")
|
| 218 |
traceback.print_exc()
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
+
# - Predictor tab: required fields marked with *
|
| 4 |
+
# - Prediction fixed: NA is accepted for required categoricals
|
| 5 |
# - RAG page-extraction regex fixed for [[PAGE=...]]
|
| 6 |
# ================================================================
|
| 7 |
|
|
|
|
| 106 |
"Current Type"
|
| 107 |
}
|
| 108 |
|
| 109 |
+
OPTIONAL_FIELDS = {
|
| 110 |
+
"Filler 2 Type",
|
| 111 |
+
"Filler 2 Diameter (µm)",
|
| 112 |
+
"Filler 2 Length (mm)",
|
| 113 |
+
"Filler 2 Dimensionality",
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# Required fields (as you specified earlier)
|
| 117 |
REQUIRED_FIELDS = {
|
| 118 |
"Filler 1 Type",
|
| 119 |
"Filler 1 Diameter (µm)",
|
|
|
|
| 162 |
return CANON_NA
|
| 163 |
return s
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
| 166 |
row = {}
|
| 167 |
for col in MAIN_VARIABLES:
|
| 168 |
v = form_dict.get(col, None)
|
| 169 |
if col in NUMERIC_COLS:
|
| 170 |
+
if v in ("", None):
|
| 171 |
+
row[col] = np.nan
|
| 172 |
+
else:
|
| 173 |
+
try:
|
| 174 |
+
row[col] = float(v)
|
| 175 |
+
except Exception:
|
| 176 |
+
row[col] = np.nan
|
| 177 |
elif col in CATEGORICAL_COLS:
|
| 178 |
row[col] = _canon_cat(v)
|
| 179 |
else:
|
|
|
|
| 181 |
row[col] = s if s else CANON_NA
|
| 182 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 183 |
|
| 184 |
+
def _is_complete(form_dict: dict) -> bool:
|
| 185 |
+
"""
|
| 186 |
+
FIX: For required *categoricals*, NA counts as 'provided' (acceptable),
|
| 187 |
+
so users aren't blocked when NA is a legitimate choice.
|
| 188 |
+
Numeric required fields must be non-NaN.
|
| 189 |
+
"""
|
| 190 |
+
for col in REQUIRED_FIELDS:
|
| 191 |
+
v = form_dict.get(col, None)
|
| 192 |
+
if col in NUMERIC_COLS:
|
| 193 |
+
try:
|
| 194 |
+
if v in ("", None) or (isinstance(v, float) and np.isnan(v)):
|
| 195 |
+
return False
|
| 196 |
+
except Exception:
|
| 197 |
+
return False
|
| 198 |
+
else:
|
| 199 |
+
# Required categoricals/text: accept any non-empty after canonicalization,
|
| 200 |
+
# and accept CANON_NA as "provided".
|
| 201 |
+
s = _canon_cat(v)
|
| 202 |
+
if s == "" or s is None:
|
| 203 |
+
return False
|
| 204 |
+
return True
|
| 205 |
+
|
| 206 |
def _align_columns_to_model(df: pd.DataFrame, mdl) -> pd.DataFrame:
|
| 207 |
"""Align incoming dataframe columns to the model's expected feature order."""
|
| 208 |
try:
|
| 209 |
feat = getattr(mdl, "feature_names_in_", None)
|
| 210 |
if feat is not None and len(feat) > 0:
|
|
|
|
| 211 |
for c in feat:
|
| 212 |
if c not in df.columns:
|
| 213 |
df[c] = np.nan
|
|
|
|
| 218 |
return df
|
| 219 |
|
| 220 |
def predict_fn(**kwargs):
|
| 221 |
+
# Keep your contract: 0.0 if incomplete or on error
|
| 222 |
+
if not _is_complete(kwargs):
|
| 223 |
+
return 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
mdl = _load_model_or_error()
|
| 225 |
if mdl is None:
|
| 226 |
return 0.0
|
|
|
|
| 233 |
else:
|
| 234 |
y = y_raw
|
| 235 |
y = float(np.asarray(y).ravel()[0])
|
| 236 |
+
if y < 0:
|
| 237 |
+
y = 0.0
|
| 238 |
+
return y
|
| 239 |
except Exception as e:
|
| 240 |
print(f"[Predict] {e}")
|
| 241 |
traceback.print_exc()
|