Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
-
# - Predictor tab: identical behavior to your "second code"
|
| 4 |
# - Literature tab: from your "first code" (Hybrid RAG + MMR)
|
| 5 |
-
# -
|
|
|
|
| 6 |
# ================================================================
|
| 7 |
|
| 8 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
@@ -102,6 +103,14 @@ CATEGORICAL_COLS = {
|
|
| 102 |
"Current Type"
|
| 103 |
}
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
DIM_CHOICES = ["0D", "1D", "2D", "3D", "NA"]
|
| 106 |
CURRENT_CHOICES = ["DC", "AC", "NA"]
|
| 107 |
|
|
@@ -137,7 +146,41 @@ def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
|
| 137 |
row[col] = "" if v in (None, "NA") else str(v).strip()
|
| 138 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
def predict_fn(**kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
mdl = _load_model_or_error()
|
| 142 |
if isinstance(mdl, str):
|
| 143 |
return mdl
|
|
@@ -209,7 +252,7 @@ USE_ONLINE_SOURCES = os.getenv("USE_ONLINE_SOURCES", "false").lower() == "true"
|
|
| 209 |
# Retrieval weights
|
| 210 |
W_TFIDF_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 211 |
W_BM25_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 212 |
-
W_EMB_DEFAULT = 0.00 if
|
| 213 |
|
| 214 |
# Simple text processing
|
| 215 |
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")
|
|
@@ -220,7 +263,7 @@ def sent_split(text: str) -> List[str]:
|
|
| 220 |
def tokenize(text: str) -> List[str]:
|
| 221 |
return [t.lower() for t in TOKEN_RE.findall(text)]
|
| 222 |
|
| 223 |
-
# PDF text extraction
|
| 224 |
def _extract_pdf_text(pdf_path: Path) -> str:
|
| 225 |
try:
|
| 226 |
import fitz
|
|
@@ -287,13 +330,11 @@ def build_or_load_hybrid(pdf_dir: Path):
|
|
| 287 |
rows.append({"doc_path": str(pdf), "chunk_id": i, "text": ch})
|
| 288 |
all_tokens.append(tokenize(ch))
|
| 289 |
if not rows:
|
| 290 |
-
# create empty stub to avoid crashes; UI will message user to upload PDFs
|
| 291 |
meta = pd.DataFrame(columns=["doc_path", "chunk_id", "text"])
|
| 292 |
vectorizer = None; X_tfidf = None; emb = None; all_tokens = None
|
| 293 |
return vectorizer, X_tfidf, meta, all_tokens, emb
|
| 294 |
|
| 295 |
meta = pd.DataFrame(rows)
|
| 296 |
-
|
| 297 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 298 |
vectorizer = TfidfVectorizer(
|
| 299 |
ngram_range=(1,2),
|
|
@@ -334,7 +375,7 @@ def _extract_page(text_chunk: str) -> str:
|
|
| 334 |
m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
|
| 335 |
return (m[-1].group(1) if m else "?")
|
| 336 |
|
| 337 |
-
def hybrid_search(query: str, k=8, w_tfidf=
|
| 338 |
if rag_meta is None or rag_meta.empty:
|
| 339 |
return pd.DataFrame()
|
| 340 |
|
|
@@ -403,7 +444,6 @@ def mmr_select_sentences(question: str, hits: pd.DataFrame, top_n=4, pool_per_ch
|
|
| 403 |
|
| 404 |
sent_texts = [p["sent"] for p in pool]
|
| 405 |
|
| 406 |
-
# Embedding-based relevance if available, else TF-IDF
|
| 407 |
use_dense = USE_DENSE and st_query_model is not None
|
| 408 |
if use_dense:
|
| 409 |
try:
|
|
@@ -483,9 +523,9 @@ def rag_reply(
|
|
| 483 |
model: str = None,
|
| 484 |
temperature: float = 0.2,
|
| 485 |
strict_quotes_only: bool = False,
|
| 486 |
-
w_tfidf: float =
|
| 487 |
-
w_bm25: float =
|
| 488 |
-
w_emb: float =
|
| 489 |
) -> str:
|
| 490 |
hits = hybrid_search(question, k=k, w_tfidf=w_tfidf, w_bm25=w_bm25, w_emb=w_emb)
|
| 491 |
if hits is None or hits.empty:
|
|
@@ -547,101 +587,62 @@ def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
|
|
| 547 |
|
| 548 |
# ========================= UI (predictor styling kept) =========================
|
| 549 |
CSS = """
|
| 550 |
-
/*
|
|
|
|
| 551 |
.gradio-container {
|
| 552 |
-
background: linear-gradient(135deg, #
|
| 553 |
}
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
background: linear-gradient(160deg, #1f2937 0%, #14532d 55%, #0b3b68 100%) !important; /* gray → green → blue */
|
| 561 |
-
border-radius: 0.75rem;
|
| 562 |
-
border: 1px solid rgba(255,255,255,0.12);
|
| 563 |
}
|
|
|
|
|
|
|
| 564 |
|
| 565 |
-
/*
|
| 566 |
-
#rag-tab
|
| 567 |
-
|
| 568 |
-
|
|
|
|
| 569 |
}
|
| 570 |
-
|
| 571 |
-
/* Inputs in RAG tab: blue-gray fields with clear borders */
|
| 572 |
#rag-tab input, #rag-tab textarea, #rag-tab select, #rag-tab .scroll-hide, #rag-tab .chatbot textarea {
|
| 573 |
-
background: rgba(17, 24, 39, 0.85) !important;
|
| 574 |
-
border: 1px solid #60a5fa !important;
|
| 575 |
color: #e5f2ff !important;
|
| 576 |
}
|
| 577 |
-
|
| 578 |
-
/* Sliders (track + thumb) for RAG controls */
|
| 579 |
-
#rag-tab input[type="range"] {
|
| 580 |
-
accent-color: #22c55e !important; /* green accent for track */
|
| 581 |
-
}
|
| 582 |
-
#rag-tab input[type="range"]::-webkit-slider-thumb {
|
| 583 |
-
background: #22c55e !important; /* green thumb */
|
| 584 |
-
}
|
| 585 |
-
#rag-tab input[type="range"]::-moz-range-thumb {
|
| 586 |
-
background: #22c55e !important;
|
| 587 |
-
}
|
| 588 |
-
|
| 589 |
-
/* Checkboxes and toggles */
|
| 590 |
-
#rag-tab input[type="checkbox"] {
|
| 591 |
-
accent-color: #60a5fa !important; /* blue checks */
|
| 592 |
-
}
|
| 593 |
-
|
| 594 |
-
/* Buttons in RAG: primary blue, secondary gray-green */
|
| 595 |
#rag-tab button {
|
| 596 |
-
border-radius:
|
| 597 |
font-weight: 600 !important;
|
| 598 |
}
|
| 599 |
-
#rag-tab button.primary, #rag-tab button[aria-label*="Send"] {
|
| 600 |
-
background: #2563eb !important; /* blue */
|
| 601 |
-
color: #ffffff !important;
|
| 602 |
-
border: 1px solid #93c5fd !important;
|
| 603 |
-
}
|
| 604 |
-
#rag-tab button.secondary {
|
| 605 |
-
background: #374151 !important; /* gray */
|
| 606 |
-
color: #e5e7eb !important;
|
| 607 |
-
}
|
| 608 |
-
|
| 609 |
-
/* Chat area */
|
| 610 |
#rag-tab .chatbot {
|
| 611 |
-
background: rgba(15, 23, 42, 0.6) !important;
|
| 612 |
border: 1px solid rgba(148, 163, 184, 0.35) !important;
|
| 613 |
}
|
| 614 |
#rag-tab .message.user {
|
| 615 |
-
background: rgba(34, 197, 94, 0.15) !important;
|
| 616 |
border-left: 3px solid #22c55e !important;
|
| 617 |
}
|
| 618 |
#rag-tab .message.bot {
|
| 619 |
-
background: rgba(59, 130, 246, 0.15) !important;
|
| 620 |
border-left: 3px solid #60a5fa !important;
|
| 621 |
color: #eef6ff !important;
|
| 622 |
}
|
| 623 |
|
| 624 |
-
/*
|
| 625 |
-
#
|
| 626 |
-
background: rgba(2, 6, 23, 0.7) !important; /* deep navy for code */
|
| 627 |
-
border: 1px solid rgba(99, 102, 241, 0.35) !important; /* indigo edge */
|
| 628 |
-
color: #e5f2ff !important;
|
| 629 |
-
}
|
| 630 |
-
|
| 631 |
-
/* Tiny helper: make small helper text readable */
|
| 632 |
-
#rag-tab .text-xs, #rag-tab .text-sm, #rag-tab .description, #rag-tab .caption, #rag-tab .info {
|
| 633 |
-
color: #d1fae5 !important; /* minty green for microcopy */
|
| 634 |
-
opacity: 0.95 !important;
|
| 635 |
-
}
|
| 636 |
"""
|
| 637 |
|
| 638 |
theme = gr.themes.Soft(
|
| 639 |
primary_hue="blue",
|
| 640 |
neutral_hue="green"
|
| 641 |
).set(
|
| 642 |
-
body_background_fill="#
|
| 643 |
body_text_color="#e0f2fe",
|
| 644 |
-
input_background_fill="#
|
| 645 |
input_border_color="#1e40af",
|
| 646 |
button_primary_background_fill="#2563eb",
|
| 647 |
button_primary_text_color="#ffffff",
|
|
@@ -653,9 +654,8 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 653 |
gr.Markdown(
|
| 654 |
"<h1 style='margin:0'>Self-Sensing Concrete Assistant</h1>"
|
| 655 |
"<p style='opacity:.9'>"
|
| 656 |
-
"Left
|
| 657 |
-
"Right
|
| 658 |
-
"Upload PDFs into <code>papers/</code> in your Space repo."
|
| 659 |
"</p>"
|
| 660 |
)
|
| 661 |
|
|
@@ -699,7 +699,7 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 699 |
|
| 700 |
with gr.Column(scale=5):
|
| 701 |
with gr.Group(elem_classes=["card"]):
|
| 702 |
-
out_pred = gr.Number(label="Predicted Stress GF (MPa-1)", precision=6)
|
| 703 |
with gr.Row():
|
| 704 |
btn_pred = gr.Button("Predict", variant="primary")
|
| 705 |
btn_clear = gr.Button("Clear")
|
|
@@ -707,11 +707,12 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 707 |
|
| 708 |
with gr.Accordion("About this model", open=False, elem_classes=["card"]):
|
| 709 |
gr.Markdown(
|
| 710 |
-
"- Pipeline: ColumnTransformer
|
| 711 |
-
"- Target: Stress GF (MPa
|
| 712 |
"- Missing values are safely imputed per-feature.\n"
|
| 713 |
"- Trained columns:\n"
|
| 714 |
-
f" `{', '.join(MAIN_VARIABLES)}`"
|
|
|
|
| 715 |
)
|
| 716 |
|
| 717 |
# Wire predictor buttons
|
|
@@ -729,7 +730,7 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 729 |
return predict_fn(**data)
|
| 730 |
|
| 731 |
btn_pred.click(_predict_wrapper, inputs=inputs_in_order, outputs=out_pred)
|
| 732 |
-
btn_clear.click(lambda: _clear_all(), inputs=None, outputs=inputs_in_order)
|
| 733 |
btn_demo.click(lambda: _fill_example(), inputs=None, outputs=inputs_in_order)
|
| 734 |
|
| 735 |
# ------------------------- Literature Tab -------------------------
|
|
@@ -741,14 +742,14 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 741 |
with gr.Row():
|
| 742 |
top_k = gr.Slider(5, 12, value=8, step=1, label="Top-K chunks")
|
| 743 |
n_sentences = gr.Slider(2, 6, value=4, step=1, label="Answer length (sentences)")
|
| 744 |
-
include_passages = gr.Checkbox(value=False, label="Include supporting passages")
|
| 745 |
with gr.Accordion("Retriever weights (advanced)", open=False):
|
| 746 |
w_tfidf = gr.Slider(0.0, 1.0, value=W_TFIDF_DEFAULT, step=0.05, label="TF-IDF weight")
|
| 747 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 748 |
-
w_emb = gr.Slider(0.0, 1.0, value=
|
| 749 |
with gr.Accordion("LLM & Controls", open=False):
|
| 750 |
-
strict_quotes_only = gr.Checkbox(value=False, label="Strict quotes only (no paraphrasing)")
|
| 751 |
-
use_llm = gr.Checkbox(value=False, label="Use LLM to paraphrase selected sentences")
|
| 752 |
model_name = gr.Textbox(value=os.getenv("OPENAI_MODEL", OPENAI_MODEL),
|
| 753 |
label="LLM model", placeholder="e.g., gpt-5 or gpt-5-mini")
|
| 754 |
temperature = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature")
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
+
# - Predictor tab: identical behavior to your "second code" (kept)
|
| 4 |
# - Literature tab: from your "first code" (Hybrid RAG + MMR)
|
| 5 |
+
# - UX: checkboxes clickable, science-oriented layout, and
|
| 6 |
+
# prediction=0.0 when required fields are incomplete
|
| 7 |
# ================================================================
|
| 8 |
|
| 9 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
|
|
| 103 |
"Current Type"
|
| 104 |
}
|
| 105 |
|
| 106 |
+
# Optional fields (allowed to be missing) — everything else is required
|
| 107 |
+
OPTIONAL_FIELDS = {
|
| 108 |
+
"Filler 2 Type",
|
| 109 |
+
"Filler 2 Diameter (µm)",
|
| 110 |
+
"Filler 2 Length (mm)",
|
| 111 |
+
"Filler 2 Dimensionality",
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
DIM_CHOICES = ["0D", "1D", "2D", "3D", "NA"]
|
| 115 |
CURRENT_CHOICES = ["DC", "AC", "NA"]
|
| 116 |
|
|
|
|
| 146 |
row[col] = "" if v in (None, "NA") else str(v).strip()
|
| 147 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 148 |
|
| 149 |
+
def _is_complete(form_dict: dict) -> bool:
|
| 150 |
+
"""
|
| 151 |
+
Completeness rule:
|
| 152 |
+
- All fields except OPTIONAL_FIELDS must be present.
|
| 153 |
+
- For NUMERIC_COLS (except optional), require not-NaN.
|
| 154 |
+
- For CATEGORICAL_COLS (except optional), require non-empty string.
|
| 155 |
+
- 'NA' is allowed only for the optional Filler-2 dimensionality; treated as empty elsewhere.
|
| 156 |
+
"""
|
| 157 |
+
for col in MAIN_VARIABLES:
|
| 158 |
+
if col in OPTIONAL_FIELDS:
|
| 159 |
+
# optional: can be empty/NaN
|
| 160 |
+
continue
|
| 161 |
+
v = form_dict.get(col, None)
|
| 162 |
+
if col in NUMERIC_COLS:
|
| 163 |
+
try:
|
| 164 |
+
if v in ("", None) or (isinstance(v, float) and np.isnan(v)):
|
| 165 |
+
return False
|
| 166 |
+
except Exception:
|
| 167 |
+
return False
|
| 168 |
+
elif col in CATEGORICAL_COLS:
|
| 169 |
+
s = "" if v in (None, "NA") else str(v).strip()
|
| 170 |
+
if s == "":
|
| 171 |
+
return False
|
| 172 |
+
else:
|
| 173 |
+
# generic non-numeric, require non-empty
|
| 174 |
+
s = "" if v is None else str(v).strip()
|
| 175 |
+
if s == "":
|
| 176 |
+
return False
|
| 177 |
+
return True
|
| 178 |
+
|
| 179 |
def predict_fn(**kwargs):
|
| 180 |
+
# If incomplete, return 0.0 by spec
|
| 181 |
+
if not _is_complete(kwargs):
|
| 182 |
+
return 0.0
|
| 183 |
+
|
| 184 |
mdl = _load_model_or_error()
|
| 185 |
if isinstance(mdl, str):
|
| 186 |
return mdl
|
|
|
|
| 252 |
# Retrieval weights
|
| 253 |
W_TFIDF_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 254 |
W_BM25_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 255 |
+
W_EMB_DEFAULT = 0.00 if USE_DENSE is False else 0.40
|
| 256 |
|
| 257 |
# Simple text processing
|
| 258 |
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")
|
|
|
|
| 263 |
def tokenize(text: str) -> List[str]:
|
| 264 |
return [t.lower() for t in TOKEN_RE.findall(text)]
|
| 265 |
|
| 266 |
+
# PDF text extraction
|
| 267 |
def _extract_pdf_text(pdf_path: Path) -> str:
|
| 268 |
try:
|
| 269 |
import fitz
|
|
|
|
| 330 |
rows.append({"doc_path": str(pdf), "chunk_id": i, "text": ch})
|
| 331 |
all_tokens.append(tokenize(ch))
|
| 332 |
if not rows:
|
|
|
|
| 333 |
meta = pd.DataFrame(columns=["doc_path", "chunk_id", "text"])
|
| 334 |
vectorizer = None; X_tfidf = None; emb = None; all_tokens = None
|
| 335 |
return vectorizer, X_tfidf, meta, all_tokens, emb
|
| 336 |
|
| 337 |
meta = pd.DataFrame(rows)
|
|
|
|
| 338 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 339 |
vectorizer = TfidfVectorizer(
|
| 340 |
ngram_range=(1,2),
|
|
|
|
| 375 |
m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
|
| 376 |
return (m[-1].group(1) if m else "?")
|
| 377 |
|
| 378 |
+
def hybrid_search(query: str, k=8, w_tfidf=0.5, w_bm25=0.5, w_emb=0.0):
|
| 379 |
if rag_meta is None or rag_meta.empty:
|
| 380 |
return pd.DataFrame()
|
| 381 |
|
|
|
|
| 444 |
|
| 445 |
sent_texts = [p["sent"] for p in pool]
|
| 446 |
|
|
|
|
| 447 |
use_dense = USE_DENSE and st_query_model is not None
|
| 448 |
if use_dense:
|
| 449 |
try:
|
|
|
|
| 523 |
model: str = None,
|
| 524 |
temperature: float = 0.2,
|
| 525 |
strict_quotes_only: bool = False,
|
| 526 |
+
w_tfidf: float = 0.5,
|
| 527 |
+
w_bm25: float = 0.5,
|
| 528 |
+
w_emb: float = 0.0
|
| 529 |
) -> str:
|
| 530 |
hits = hybrid_search(question, k=k, w_tfidf=w_tfidf, w_bm25=w_bm25, w_emb=w_emb)
|
| 531 |
if hits is None or hits.empty:
|
|
|
|
| 587 |
|
| 588 |
# ========================= UI (predictor styling kept) =========================
|
| 589 |
CSS = """
|
| 590 |
+
/* Science-oriented: crisp contrast + readable numerics */
|
| 591 |
+
* {font-family: ui-sans-serif, system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial;}
|
| 592 |
.gradio-container {
|
| 593 |
+
background: linear-gradient(135deg, #0b1020 0%, #0c2b1a 60%, #0a2b4d 100%) !important; /* deep science vibe */
|
| 594 |
}
|
| 595 |
+
.card {background: rgba(255,255,255,0.06) !important; border: 1px solid rgba(255,255,255,0.14); border-radius: 12px;}
|
| 596 |
+
label {color: #e8f7ff !important; text-shadow: 0 1px 0 rgba(0,0,0,0.35); cursor: pointer;}
|
| 597 |
+
input[type="number"] {font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;}
|
| 598 |
+
/* Checkbox clickability fixes (some themes overlay labels) */
|
| 599 |
+
input[type="checkbox"], .gr-checkbox, .gr-checkbox > * {
|
| 600 |
+
pointer-events: auto !important;
|
|
|
|
|
|
|
|
|
|
| 601 |
}
|
| 602 |
+
.gr-checkbox label, .gr-check-radio label { pointer-events: auto !important; cursor: pointer; }
|
| 603 |
+
#rag-tab input[type="checkbox"] { accent-color: #60a5fa !important; }
|
| 604 |
|
| 605 |
+
/* RAG tab background and elements */
|
| 606 |
+
#rag-tab .block, #rag-tab .group, #rag-tab .accordion {
|
| 607 |
+
background: linear-gradient(160deg, #1f2937 0%, #14532d 55%, #0b3b68 100%) !important;
|
| 608 |
+
border-radius: 12px;
|
| 609 |
+
border: 1px solid rgba(255,255,255,0.14);
|
| 610 |
}
|
|
|
|
|
|
|
| 611 |
#rag-tab input, #rag-tab textarea, #rag-tab select, #rag-tab .scroll-hide, #rag-tab .chatbot textarea {
|
| 612 |
+
background: rgba(17, 24, 39, 0.85) !important;
|
| 613 |
+
border: 1px solid #60a5fa !important;
|
| 614 |
color: #e5f2ff !important;
|
| 615 |
}
|
| 616 |
+
#rag-tab input[type="range"] { accent-color: #22c55e !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
#rag-tab button {
|
| 618 |
+
border-radius: 10px !important;
|
| 619 |
font-weight: 600 !important;
|
| 620 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 621 |
#rag-tab .chatbot {
|
| 622 |
+
background: rgba(15, 23, 42, 0.6) !important;
|
| 623 |
border: 1px solid rgba(148, 163, 184, 0.35) !important;
|
| 624 |
}
|
| 625 |
#rag-tab .message.user {
|
| 626 |
+
background: rgba(34, 197, 94, 0.15) !important;
|
| 627 |
border-left: 3px solid #22c55e !important;
|
| 628 |
}
|
| 629 |
#rag-tab .message.bot {
|
| 630 |
+
background: rgba(59, 130, 246, 0.15) !important;
|
| 631 |
border-left: 3px solid #60a5fa !important;
|
| 632 |
color: #eef6ff !important;
|
| 633 |
}
|
| 634 |
|
| 635 |
+
/* Predictor output emphasis */
|
| 636 |
+
#pred-out .wrap { font-size: 20px; font-weight: 700; color: #ecfdf5; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
"""
|
| 638 |
|
| 639 |
theme = gr.themes.Soft(
|
| 640 |
primary_hue="blue",
|
| 641 |
neutral_hue="green"
|
| 642 |
).set(
|
| 643 |
+
body_background_fill="#0b1020",
|
| 644 |
body_text_color="#e0f2fe",
|
| 645 |
+
input_background_fill="#0f172a",
|
| 646 |
input_border_color="#1e40af",
|
| 647 |
button_primary_background_fill="#2563eb",
|
| 648 |
button_primary_text_color="#ffffff",
|
|
|
|
| 654 |
gr.Markdown(
|
| 655 |
"<h1 style='margin:0'>Self-Sensing Concrete Assistant</h1>"
|
| 656 |
"<p style='opacity:.9'>"
|
| 657 |
+
"Left: ML prediction for Stress Gauge Factor (original scale, MPa<sup>-1</sup>). "
|
| 658 |
+
"Right: Literature Q&A via Hybrid RAG (BM25 + TF-IDF + optional dense) with MMR sentence selection."
|
|
|
|
| 659 |
"</p>"
|
| 660 |
)
|
| 661 |
|
|
|
|
| 699 |
|
| 700 |
with gr.Column(scale=5):
|
| 701 |
with gr.Group(elem_classes=["card"]):
|
| 702 |
+
out_pred = gr.Number(label="Predicted Stress GF (MPa-1)", value=0.0, precision=6, elem_id="pred-out")
|
| 703 |
with gr.Row():
|
| 704 |
btn_pred = gr.Button("Predict", variant="primary")
|
| 705 |
btn_clear = gr.Button("Clear")
|
|
|
|
| 707 |
|
| 708 |
with gr.Accordion("About this model", open=False, elem_classes=["card"]):
|
| 709 |
gr.Markdown(
|
| 710 |
+
"- Pipeline: ColumnTransformer → (RobustScaler + OneHot) → XGBoost\n"
|
| 711 |
+
"- Target: Stress GF (MPa<sup>-1</sup>) on original scale (model trains on log1p).\n"
|
| 712 |
"- Missing values are safely imputed per-feature.\n"
|
| 713 |
"- Trained columns:\n"
|
| 714 |
+
f" `{', '.join(MAIN_VARIABLES)}`",
|
| 715 |
+
elem_classes=["prose"]
|
| 716 |
)
|
| 717 |
|
| 718 |
# Wire predictor buttons
|
|
|
|
| 730 |
return predict_fn(**data)
|
| 731 |
|
| 732 |
btn_pred.click(_predict_wrapper, inputs=inputs_in_order, outputs=out_pred)
|
| 733 |
+
btn_clear.click(lambda: _clear_all(), inputs=None, outputs=inputs_in_order).then(lambda: 0.0, outputs=out_pred)
|
| 734 |
btn_demo.click(lambda: _fill_example(), inputs=None, outputs=inputs_in_order)
|
| 735 |
|
| 736 |
# ------------------------- Literature Tab -------------------------
|
|
|
|
| 742 |
with gr.Row():
|
| 743 |
top_k = gr.Slider(5, 12, value=8, step=1, label="Top-K chunks")
|
| 744 |
n_sentences = gr.Slider(2, 6, value=4, step=1, label="Answer length (sentences)")
|
| 745 |
+
include_passages = gr.Checkbox(value=False, label="Include supporting passages", interactive=True)
|
| 746 |
with gr.Accordion("Retriever weights (advanced)", open=False):
|
| 747 |
w_tfidf = gr.Slider(0.0, 1.0, value=W_TFIDF_DEFAULT, step=0.05, label="TF-IDF weight")
|
| 748 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 749 |
+
w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
|
| 750 |
with gr.Accordion("LLM & Controls", open=False):
|
| 751 |
+
strict_quotes_only = gr.Checkbox(value=False, label="Strict quotes only (no paraphrasing)", interactive=True)
|
| 752 |
+
use_llm = gr.Checkbox(value=False, label="Use LLM to paraphrase selected sentences", interactive=True)
|
| 753 |
model_name = gr.Textbox(value=os.getenv("OPENAI_MODEL", OPENAI_MODEL),
|
| 754 |
label="LLM model", placeholder="e.g., gpt-5 or gpt-5-mini")
|
| 755 |
temperature = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature")
|