Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
-
# - Predictor tab:
|
| 4 |
-
# -
|
| 5 |
-
# -
|
| 6 |
# ================================================================
|
| 7 |
|
| 8 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
@@ -49,7 +49,7 @@ LLM_AVAILABLE = (OPENAI_API_KEY is not None and OPENAI_API_KEY.strip() != "" and
|
|
| 49 |
# ========================= Predictor (kept) =========================
|
| 50 |
CF_COL = "Conductive Filler Conc. (wt%)"
|
| 51 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 52 |
-
CANON_NA = "NA" #
|
| 53 |
|
| 54 |
MAIN_VARIABLES = [
|
| 55 |
"Filler 1 Type",
|
|
@@ -113,7 +113,7 @@ OPTIONAL_FIELDS = {
|
|
| 113 |
"Filler 2 Dimensionality",
|
| 114 |
}
|
| 115 |
|
| 116 |
-
#
|
| 117 |
REQUIRED_FIELDS = {
|
| 118 |
"Filler 1 Type",
|
| 119 |
"Filler 1 Diameter (µm)",
|
|
@@ -177,12 +177,16 @@ def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
|
| 177 |
elif col in CATEGORICAL_COLS:
|
| 178 |
row[col] = _canon_cat(v)
|
| 179 |
else:
|
| 180 |
-
# non-numeric, non-categorical (free text) — keep trimmed, but not empty
|
| 181 |
s = str(v).strip() if v is not None else ""
|
| 182 |
row[col] = s if s else CANON_NA
|
| 183 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 184 |
|
| 185 |
def _is_complete(form_dict: dict) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
for col in REQUIRED_FIELDS:
|
| 187 |
v = form_dict.get(col, None)
|
| 188 |
if col in NUMERIC_COLS:
|
|
@@ -192,8 +196,10 @@ def _is_complete(form_dict: dict) -> bool:
|
|
| 192 |
except Exception:
|
| 193 |
return False
|
| 194 |
else:
|
|
|
|
|
|
|
| 195 |
s = _canon_cat(v)
|
| 196 |
-
if s ==
|
| 197 |
return False
|
| 198 |
return True
|
| 199 |
|
|
@@ -212,7 +218,7 @@ def _align_columns_to_model(df: pd.DataFrame, mdl) -> pd.DataFrame:
|
|
| 212 |
return df
|
| 213 |
|
| 214 |
def predict_fn(**kwargs):
|
| 215 |
-
#
|
| 216 |
if not _is_complete(kwargs):
|
| 217 |
return 0.0
|
| 218 |
mdl = _load_model_or_error()
|
|
@@ -221,13 +227,14 @@ def predict_fn(**kwargs):
|
|
| 221 |
X_new = _coerce_to_row(kwargs)
|
| 222 |
X_new = _align_columns_to_model(X_new, mdl)
|
| 223 |
try:
|
| 224 |
-
y_raw = mdl.predict(X_new)
|
| 225 |
-
# If your model was trained on log1p, set mdl.target_is_log1p_ = True before saving.
|
| 226 |
if getattr(mdl, "target_is_log1p_", False):
|
| 227 |
y = np.expm1(y_raw)
|
| 228 |
else:
|
| 229 |
y = y_raw
|
| 230 |
-
y = float(np.
|
|
|
|
|
|
|
| 231 |
return y
|
| 232 |
except Exception as e:
|
| 233 |
print(f"[Predict] {e}")
|
|
@@ -404,6 +411,7 @@ bm25 = BM25Okapi(bm25_tokens) if (BM25Okapi is not None and bm25_tokens is not N
|
|
| 404 |
st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
|
| 405 |
|
| 406 |
def _extract_page(text_chunk: str) -> str:
|
|
|
|
| 407 |
m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
|
| 408 |
return (m[-1].group(1) if m else "?")
|
| 409 |
|
|
@@ -692,7 +700,6 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 692 |
with gr.Row():
|
| 693 |
with gr.Column(scale=7):
|
| 694 |
with gr.Accordion("Primary conductive filler", open=True, elem_classes=["card"]):
|
| 695 |
-
# * marks on required inputs only
|
| 696 |
f1_type = gr.Textbox(label="Filler 1 Type *", placeholder="e.g., CNT, Graphite, Steel fiber")
|
| 697 |
f1_diam = gr.Number(label="Filler 1 Diameter (µm) *")
|
| 698 |
f1_len = gr.Number(label="Filler 1 Length (mm) *")
|
|
@@ -776,11 +783,11 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 776 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 777 |
w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
|
| 778 |
|
| 779 |
-
#
|
| 780 |
-
state_use_llm = gr.State(LLM_AVAILABLE)
|
| 781 |
state_model_name = gr.State(os.getenv("OPENAI_MODEL", OPENAI_MODEL))
|
| 782 |
state_temperature = gr.State(0.2)
|
| 783 |
-
state_strict = gr.State(False)
|
| 784 |
|
| 785 |
gr.ChatInterface(
|
| 786 |
fn=rag_chat_fn,
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
+
# - Predictor tab: required fields marked with *
|
| 4 |
+
# - Prediction fixed: NA is accepted for required categoricals
|
| 5 |
+
# - RAG page-extraction regex fixed for [[PAGE=...]]
|
| 6 |
# ================================================================
|
| 7 |
|
| 8 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
|
|
| 49 |
# ========================= Predictor (kept) =========================
|
| 50 |
CF_COL = "Conductive Filler Conc. (wt%)"
|
| 51 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 52 |
+
CANON_NA = "NA" # canonical placeholder for categoricals
|
| 53 |
|
| 54 |
MAIN_VARIABLES = [
|
| 55 |
"Filler 1 Type",
|
|
|
|
| 113 |
"Filler 2 Dimensionality",
|
| 114 |
}
|
| 115 |
|
| 116 |
+
# Required fields (as you specified earlier)
|
| 117 |
REQUIRED_FIELDS = {
|
| 118 |
"Filler 1 Type",
|
| 119 |
"Filler 1 Diameter (µm)",
|
|
|
|
| 177 |
elif col in CATEGORICAL_COLS:
|
| 178 |
row[col] = _canon_cat(v)
|
| 179 |
else:
|
|
|
|
| 180 |
s = str(v).strip() if v is not None else ""
|
| 181 |
row[col] = s if s else CANON_NA
|
| 182 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 183 |
|
| 184 |
def _is_complete(form_dict: dict) -> bool:
|
| 185 |
+
"""
|
| 186 |
+
FIX: For required *categoricals*, NA counts as 'provided' (acceptable),
|
| 187 |
+
so users aren't blocked when NA is a legitimate choice.
|
| 188 |
+
Numeric required fields must be non-NaN.
|
| 189 |
+
"""
|
| 190 |
for col in REQUIRED_FIELDS:
|
| 191 |
v = form_dict.get(col, None)
|
| 192 |
if col in NUMERIC_COLS:
|
|
|
|
| 196 |
except Exception:
|
| 197 |
return False
|
| 198 |
else:
|
| 199 |
+
# Required categoricals/text: accept any non-empty after canonicalization,
|
| 200 |
+
# and accept CANON_NA as "provided".
|
| 201 |
s = _canon_cat(v)
|
| 202 |
+
if s == "" or s is None:
|
| 203 |
return False
|
| 204 |
return True
|
| 205 |
|
|
|
|
| 218 |
return df
|
| 219 |
|
| 220 |
def predict_fn(**kwargs):
|
| 221 |
+
# Keep your contract: 0.0 if incomplete or on error
|
| 222 |
if not _is_complete(kwargs):
|
| 223 |
return 0.0
|
| 224 |
mdl = _load_model_or_error()
|
|
|
|
| 227 |
X_new = _coerce_to_row(kwargs)
|
| 228 |
X_new = _align_columns_to_model(X_new, mdl)
|
| 229 |
try:
|
| 230 |
+
y_raw = mdl.predict(X_new) # log1p or original scale depending on training
|
|
|
|
| 231 |
if getattr(mdl, "target_is_log1p_", False):
|
| 232 |
y = np.expm1(y_raw)
|
| 233 |
else:
|
| 234 |
y = y_raw
|
| 235 |
+
y = float(np.asarray(y).ravel()[0])
|
| 236 |
+
if y < 0:
|
| 237 |
+
y = 0.0
|
| 238 |
return y
|
| 239 |
except Exception as e:
|
| 240 |
print(f"[Predict] {e}")
|
|
|
|
| 411 |
st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
|
| 412 |
|
| 413 |
def _extract_page(text_chunk: str) -> str:
|
| 414 |
+
# FIXED: proper brackets; matches [[PAGE=123]]
|
| 415 |
m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
|
| 416 |
return (m[-1].group(1) if m else "?")
|
| 417 |
|
|
|
|
| 700 |
with gr.Row():
|
| 701 |
with gr.Column(scale=7):
|
| 702 |
with gr.Accordion("Primary conductive filler", open=True, elem_classes=["card"]):
|
|
|
|
| 703 |
f1_type = gr.Textbox(label="Filler 1 Type *", placeholder="e.g., CNT, Graphite, Steel fiber")
|
| 704 |
f1_diam = gr.Number(label="Filler 1 Diameter (µm) *")
|
| 705 |
f1_len = gr.Number(label="Filler 1 Length (mm) *")
|
|
|
|
| 783 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 784 |
w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
|
| 785 |
|
| 786 |
+
# Hidden states (unchanged)
|
| 787 |
+
state_use_llm = gr.State(LLM_AVAILABLE)
|
| 788 |
state_model_name = gr.State(os.getenv("OPENAI_MODEL", OPENAI_MODEL))
|
| 789 |
state_temperature = gr.State(0.2)
|
| 790 |
+
state_strict = gr.State(False)
|
| 791 |
|
| 792 |
gr.ChatInterface(
|
| 793 |
fn=rag_chat_fn,
|