Inframat-x commited on
Commit
80fa737
·
verified ·
1 Parent(s): a3b7322

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -1,8 +1,8 @@
1
  # ================================================================
2
  # Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
3
- # - Predictor tab: identical behavior (kept) + * marks for required fields
4
- # - Literature tab: Hybrid RAG; LLM runs silently when available
5
- # - UX: no visible "LLM & Controls" window; prediction=0.0 if incomplete/error
6
  # ================================================================
7
 
8
  # ---------------------- Runtime flags (HF-safe) ----------------------
@@ -49,7 +49,7 @@ LLM_AVAILABLE = (OPENAI_API_KEY is not None and OPENAI_API_KEY.strip() != "" and
49
  # ========================= Predictor (kept) =========================
50
  CF_COL = "Conductive Filler Conc. (wt%)"
51
  TARGET_COL = "Stress GF (MPa-1)"
52
- CANON_NA = "NA" # <-- canonical placeholder for categoricals
53
 
54
  MAIN_VARIABLES = [
55
  "Filler 1 Type",
@@ -113,7 +113,7 @@ OPTIONAL_FIELDS = {
113
  "Filler 2 Dimensionality",
114
  }
115
 
116
- # Only these fields are required
117
  REQUIRED_FIELDS = {
118
  "Filler 1 Type",
119
  "Filler 1 Diameter (µm)",
@@ -177,12 +177,16 @@ def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
177
  elif col in CATEGORICAL_COLS:
178
  row[col] = _canon_cat(v)
179
  else:
180
- # non-numeric, non-categorical (free text) — keep trimmed, but not empty
181
  s = str(v).strip() if v is not None else ""
182
  row[col] = s if s else CANON_NA
183
  return pd.DataFrame([row], columns=MAIN_VARIABLES)
184
 
185
  def _is_complete(form_dict: dict) -> bool:
 
 
 
 
 
186
  for col in REQUIRED_FIELDS:
187
  v = form_dict.get(col, None)
188
  if col in NUMERIC_COLS:
@@ -192,8 +196,10 @@ def _is_complete(form_dict: dict) -> bool:
192
  except Exception:
193
  return False
194
  else:
 
 
195
  s = _canon_cat(v)
196
- if s == CANON_NA:
197
  return False
198
  return True
199
 
@@ -212,7 +218,7 @@ def _align_columns_to_model(df: pd.DataFrame, mdl) -> pd.DataFrame:
212
  return df
213
 
214
  def predict_fn(**kwargs):
215
- # Return a NUMBER always (0.0 on incomplete or any error) to keep gr.Number happy
216
  if not _is_complete(kwargs):
217
  return 0.0
218
  mdl = _load_model_or_error()
@@ -221,13 +227,14 @@ def predict_fn(**kwargs):
221
  X_new = _coerce_to_row(kwargs)
222
  X_new = _align_columns_to_model(X_new, mdl)
223
  try:
224
- y_raw = mdl.predict(X_new) # model may predict log1p(target) or original scale
225
- # If your model was trained on log1p, set mdl.target_is_log1p_ = True before saving.
226
  if getattr(mdl, "target_is_log1p_", False):
227
  y = np.expm1(y_raw)
228
  else:
229
  y = y_raw
230
- y = float(np.maximum(np.asarray(y).ravel()[0], 0.0))
 
 
231
  return y
232
  except Exception as e:
233
  print(f"[Predict] {e}")
@@ -404,6 +411,7 @@ bm25 = BM25Okapi(bm25_tokens) if (BM25Okapi is not None and bm25_tokens is not N
404
  st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
405
 
406
  def _extract_page(text_chunk: str) -> str:
 
407
  m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
408
  return (m[-1].group(1) if m else "?")
409
 
@@ -692,7 +700,6 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
692
  with gr.Row():
693
  with gr.Column(scale=7):
694
  with gr.Accordion("Primary conductive filler", open=True, elem_classes=["card"]):
695
- # * marks on required inputs only
696
  f1_type = gr.Textbox(label="Filler 1 Type *", placeholder="e.g., CNT, Graphite, Steel fiber")
697
  f1_diam = gr.Number(label="Filler 1 Diameter (µm) *")
698
  f1_len = gr.Number(label="Filler 1 Length (mm) *")
@@ -776,11 +783,11 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
776
  w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
777
  w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
778
 
779
- # ---- Hidden states for LLM behavior (no visible controls) ----
780
- state_use_llm = gr.State(LLM_AVAILABLE) # True when key present; else False
781
  state_model_name = gr.State(os.getenv("OPENAI_MODEL", OPENAI_MODEL))
782
  state_temperature = gr.State(0.2)
783
- state_strict = gr.State(False) # hidden: default to not-strict
784
 
785
  gr.ChatInterface(
786
  fn=rag_chat_fn,
 
1
  # ================================================================
2
  # Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
3
+ # - Predictor tab: required fields marked with *
4
+ # - Prediction fixed: NA is accepted for required categoricals
5
+ # - RAG page-extraction regex fixed for [[PAGE=...]]
6
  # ================================================================
7
 
8
  # ---------------------- Runtime flags (HF-safe) ----------------------
 
49
  # ========================= Predictor (kept) =========================
50
  CF_COL = "Conductive Filler Conc. (wt%)"
51
  TARGET_COL = "Stress GF (MPa-1)"
52
+ CANON_NA = "NA" # canonical placeholder for categoricals
53
 
54
  MAIN_VARIABLES = [
55
  "Filler 1 Type",
 
113
  "Filler 2 Dimensionality",
114
  }
115
 
116
+ # Required fields (as you specified earlier)
117
  REQUIRED_FIELDS = {
118
  "Filler 1 Type",
119
  "Filler 1 Diameter (µm)",
 
177
  elif col in CATEGORICAL_COLS:
178
  row[col] = _canon_cat(v)
179
  else:
 
180
  s = str(v).strip() if v is not None else ""
181
  row[col] = s if s else CANON_NA
182
  return pd.DataFrame([row], columns=MAIN_VARIABLES)
183
 
184
  def _is_complete(form_dict: dict) -> bool:
185
+ """
186
+ FIX: For required *categoricals*, NA counts as 'provided' (acceptable),
187
+ so users aren't blocked when NA is a legitimate choice.
188
+ Numeric required fields must be non-NaN.
189
+ """
190
  for col in REQUIRED_FIELDS:
191
  v = form_dict.get(col, None)
192
  if col in NUMERIC_COLS:
 
196
  except Exception:
197
  return False
198
  else:
199
+ # Required categoricals/text: accept any non-empty after canonicalization,
200
+ # and accept CANON_NA as "provided".
201
  s = _canon_cat(v)
202
+ if s == "" or s is None:
203
  return False
204
  return True
205
 
 
218
  return df
219
 
220
  def predict_fn(**kwargs):
221
+ # Keep your contract: 0.0 if incomplete or on error
222
  if not _is_complete(kwargs):
223
  return 0.0
224
  mdl = _load_model_or_error()
 
227
  X_new = _coerce_to_row(kwargs)
228
  X_new = _align_columns_to_model(X_new, mdl)
229
  try:
230
+ y_raw = mdl.predict(X_new) # log1p or original scale depending on training
 
231
  if getattr(mdl, "target_is_log1p_", False):
232
  y = np.expm1(y_raw)
233
  else:
234
  y = y_raw
235
+ y = float(np.asarray(y).ravel()[0])
236
+ if y < 0:
237
+ y = 0.0
238
  return y
239
  except Exception as e:
240
  print(f"[Predict] {e}")
 
411
  st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
412
 
413
  def _extract_page(text_chunk: str) -> str:
414
+ # FIXED: proper brackets; matches [[PAGE=123]]
415
  m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
416
  return (m[-1].group(1) if m else "?")
417
 
 
700
  with gr.Row():
701
  with gr.Column(scale=7):
702
  with gr.Accordion("Primary conductive filler", open=True, elem_classes=["card"]):
 
703
  f1_type = gr.Textbox(label="Filler 1 Type *", placeholder="e.g., CNT, Graphite, Steel fiber")
704
  f1_diam = gr.Number(label="Filler 1 Diameter (µm) *")
705
  f1_len = gr.Number(label="Filler 1 Length (mm) *")
 
783
  w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
784
  w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
785
 
786
+ # Hidden states (unchanged)
787
+ state_use_llm = gr.State(LLM_AVAILABLE)
788
  state_model_name = gr.State(os.getenv("OPENAI_MODEL", OPENAI_MODEL))
789
  state_temperature = gr.State(0.2)
790
+ state_strict = gr.State(False)
791
 
792
  gr.ChatInterface(
793
  fn=rag_chat_fn,