Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
-
# - Predictor tab: identical behavior
|
| 4 |
-
# - Literature tab:
|
| 5 |
-
# - UX:
|
| 6 |
-
# science-oriented styling, and prediction=0.0 if inputs incomplete
|
| 7 |
# ================================================================
|
| 8 |
|
| 9 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
@@ -44,10 +43,10 @@ try:
|
|
| 44 |
except Exception:
|
| 45 |
OpenAI = None
|
| 46 |
|
| 47 |
-
# LLM availability flag
|
| 48 |
LLM_AVAILABLE = (OPENAI_API_KEY is not None and OPENAI_API_KEY.strip() != "" and OpenAI is not None)
|
| 49 |
|
| 50 |
-
# ========================= Predictor (kept
|
| 51 |
CF_COL = "Conductive Filler Conc. (wt%)"
|
| 52 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 53 |
|
|
@@ -106,7 +105,6 @@ CATEGORICAL_COLS = {
|
|
| 106 |
"Current Type"
|
| 107 |
}
|
| 108 |
|
| 109 |
-
# Optional fields (allowed to be missing). All others required for prediction.
|
| 110 |
OPTIONAL_FIELDS = {
|
| 111 |
"Filler 2 Type",
|
| 112 |
"Filler 2 Diameter (µm)",
|
|
@@ -150,12 +148,6 @@ def _coerce_to_row(form_dict: dict) -> pd.DataFrame:
|
|
| 150 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 151 |
|
| 152 |
def _is_complete(form_dict: dict) -> bool:
|
| 153 |
-
"""
|
| 154 |
-
Completeness rule:
|
| 155 |
-
- All fields except OPTIONAL_FIELDS must be present.
|
| 156 |
-
- For NUMERIC_COLS (except optional), require not-NaN.
|
| 157 |
-
- For CATEGORICAL_COLS (except optional), require non-empty string (no 'NA').
|
| 158 |
-
"""
|
| 159 |
for col in MAIN_VARIABLES:
|
| 160 |
if col in OPTIONAL_FIELDS:
|
| 161 |
continue
|
|
@@ -177,10 +169,8 @@ def _is_complete(form_dict: dict) -> bool:
|
|
| 177 |
return True
|
| 178 |
|
| 179 |
def predict_fn(**kwargs):
|
| 180 |
-
# If incomplete, return 0.0 per requirement
|
| 181 |
if not _is_complete(kwargs):
|
| 182 |
return 0.0
|
| 183 |
-
|
| 184 |
mdl = _load_model_or_error()
|
| 185 |
if isinstance(mdl, str):
|
| 186 |
return mdl
|
|
@@ -236,8 +226,7 @@ def _clear_all():
|
|
| 236 |
cleared.append("")
|
| 237 |
return cleared
|
| 238 |
|
| 239 |
-
# ========================= Hybrid RAG
|
| 240 |
-
# Configuration
|
| 241 |
ARTIFACT_DIR = Path("rag_artifacts"); ARTIFACT_DIR.mkdir(exist_ok=True)
|
| 242 |
TFIDF_VECT_PATH = ARTIFACT_DIR / "tfidf_vectorizer.joblib"
|
| 243 |
TFIDF_MAT_PATH = ARTIFACT_DIR / "tfidf_matrix.joblib"
|
|
@@ -245,16 +234,13 @@ BM25_TOK_PATH = ARTIFACT_DIR / "bm25_tokens.joblib"
|
|
| 245 |
EMB_NPY_PATH = ARTIFACT_DIR / "chunk_embeddings.npy"
|
| 246 |
RAG_META_PATH = ARTIFACT_DIR / "chunks.parquet"
|
| 247 |
|
| 248 |
-
# PDF source (HF-safe: rely on local /papers by default)
|
| 249 |
LOCAL_PDF_DIR = Path("papers"); LOCAL_PDF_DIR.mkdir(exist_ok=True)
|
| 250 |
USE_ONLINE_SOURCES = os.getenv("USE_ONLINE_SOURCES", "false").lower() == "true"
|
| 251 |
|
| 252 |
-
# Retrieval weights
|
| 253 |
W_TFIDF_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 254 |
W_BM25_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 255 |
W_EMB_DEFAULT = 0.00 if USE_DENSE is False else 0.40
|
| 256 |
|
| 257 |
-
# Simple text processing
|
| 258 |
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")
|
| 259 |
TOKEN_RE = re.compile(r"[A-Za-z0-9_#+\-/\.%]+")
|
| 260 |
def sent_split(text: str) -> List[str]:
|
|
@@ -263,7 +249,6 @@ def sent_split(text: str) -> List[str]:
|
|
| 263 |
def tokenize(text: str) -> List[str]:
|
| 264 |
return [t.lower() for t in TOKEN_RE.findall(text)]
|
| 265 |
|
| 266 |
-
# PDF text extraction
|
| 267 |
def _extract_pdf_text(pdf_path: Path) -> str:
|
| 268 |
try:
|
| 269 |
import fitz
|
|
@@ -305,7 +290,6 @@ def _safe_init_st_model(name: str):
|
|
| 305 |
USE_DENSE = False
|
| 306 |
return None
|
| 307 |
|
| 308 |
-
# Build or load index
|
| 309 |
def build_or_load_hybrid(pdf_dir: Path):
|
| 310 |
have_cache = (TFIDF_VECT_PATH.exists() and TFIDF_MAT_PATH.exists()
|
| 311 |
and RAG_META_PATH.exists()
|
|
@@ -358,13 +342,11 @@ def build_or_load_hybrid(pdf_dir: Path):
|
|
| 358 |
print("Dense embedding failed:", e)
|
| 359 |
emb = None
|
| 360 |
|
| 361 |
-
# Save artifacts
|
| 362 |
joblib.dump(vectorizer, TFIDF_VECT_PATH)
|
| 363 |
-
joblib.dump(X_tfidf, TFIDF_MAT_PATH)
|
| 364 |
if BM25Okapi is not None:
|
| 365 |
joblib.dump(all_tokens, BM25_TOK_PATH)
|
| 366 |
meta.to_parquet(RAG_META_PATH, index=False)
|
| 367 |
-
|
| 368 |
return vectorizer, X_tfidf, meta, all_tokens, emb
|
| 369 |
|
| 370 |
tfidf_vectorizer, tfidf_matrix, rag_meta, bm25_tokens, emb_matrix = build_or_load_hybrid(LOCAL_PDF_DIR)
|
|
@@ -372,7 +354,7 @@ bm25 = BM25Okapi(bm25_tokens) if (BM25Okapi is not None and bm25_tokens is not N
|
|
| 372 |
st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
|
| 373 |
|
| 374 |
def _extract_page(text_chunk: str) -> str:
|
| 375 |
-
m = list(re.finditer(r"\[\[PAGE=(\d+)\]\]", text_chunk or ""))
|
| 376 |
return (m[-1].group(1) if m else "?")
|
| 377 |
|
| 378 |
def hybrid_search(query: str, k=8, w_tfidf=W_TFIDF_DEFAULT, w_bm25=W_BM25_DEFAULT, w_emb=W_EMB_DEFAULT):
|
|
@@ -401,7 +383,7 @@ def hybrid_search(query: str, k=8, w_tfidf=W_TFIDF_DEFAULT, w_bm25=W_BM25_DEFAUL
|
|
| 401 |
|
| 402 |
# BM25 scores
|
| 403 |
if bm25 is not None:
|
| 404 |
-
q_tokens = [t.lower() for t in re.findall(r"[A-Za-z0-9_#+\-/\.%]+", query)]
|
| 405 |
bm25_scores = np.array(bm25.get_scores(q_tokens), dtype=float)
|
| 406 |
else:
|
| 407 |
bm25_scores = np.zeros(len(rag_meta), dtype=float); w_bm25 = 0.0
|
|
@@ -536,6 +518,7 @@ def rag_reply(
|
|
| 536 |
srcs = {Path(r['doc_path']).name for _, r in hits.iterrows()}
|
| 537 |
coverage_note = "" if len(srcs) >= 3 else f"\n\n> Note: Only {len(srcs)} unique source(s) contributed. Add more PDFs or increase Top-K."
|
| 538 |
|
|
|
|
| 539 |
if strict_quotes_only:
|
| 540 |
if not selected:
|
| 541 |
return f"**Quoted Passages:**\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2]) + f"\n\n**Citations:** {header_cites}{coverage_note}"
|
|
@@ -585,15 +568,6 @@ def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
|
|
| 585 |
except Exception as e:
|
| 586 |
return f"RAG error: {e}"
|
| 587 |
|
| 588 |
-
# ---------- Small helpers to keep checkboxes mutually exclusive ----------
|
| 589 |
-
def _strict_on(strict, use_llm):
|
| 590 |
-
# If strict is turned on, force LLM off; otherwise keep LLM as is.
|
| 591 |
-
return False if strict else use_llm
|
| 592 |
-
|
| 593 |
-
def _llm_on(use_llm, strict):
|
| 594 |
-
# If LLM is turned on, force strict off; otherwise keep strict as is.
|
| 595 |
-
return False if use_llm else strict
|
| 596 |
-
|
| 597 |
# ========================= UI (science-oriented styling) =========================
|
| 598 |
CSS = """
|
| 599 |
/* Science-oriented: crisp contrast + readable numerics */
|
|
@@ -605,7 +579,7 @@ CSS = """
|
|
| 605 |
label {color: #e8f7ff !important; text-shadow: 0 1px 0 rgba(0,0,0,0.35); cursor: pointer;}
|
| 606 |
input[type="number"] {font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;}
|
| 607 |
|
| 608 |
-
/* Checkbox clickability fixes
|
| 609 |
input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !important; }
|
| 610 |
.gr-checkbox label, .gr-check-radio label { pointer-events: auto !important; cursor: pointer; }
|
| 611 |
#rag-tab input[type="checkbox"] { accent-color: #60a5fa !important; }
|
|
@@ -720,7 +694,6 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 720 |
elem_classes=["prose"]
|
| 721 |
)
|
| 722 |
|
| 723 |
-
# Wire predictor buttons
|
| 724 |
inputs_in_order = [
|
| 725 |
f1_type, f1_diam, f1_len, cf_conc,
|
| 726 |
f1_dim, f2_type, f2_diam, f2_len,
|
|
@@ -742,7 +715,7 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 742 |
with gr.Tab("📚 Ask the Literature (Hybrid RAG + MMR)", elem_id="rag-tab"):
|
| 743 |
gr.Markdown(
|
| 744 |
"Upload PDFs into the repository folder <code>papers/</code> then reload the Space. "
|
| 745 |
-
"Answers cite (Doc.pdf, p.X).
|
| 746 |
)
|
| 747 |
with gr.Row():
|
| 748 |
top_k = gr.Slider(5, 12, value=8, step=1, label="Top-K chunks")
|
|
@@ -754,43 +727,21 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 754 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 755 |
w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
|
| 756 |
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
use_llm = gr.Checkbox(
|
| 763 |
-
value=LLM_AVAILABLE, label="Use LLM to paraphrase selected sentences",
|
| 764 |
-
interactive=LLM_AVAILABLE
|
| 765 |
-
)
|
| 766 |
-
model_name = gr.Textbox(
|
| 767 |
-
value=os.getenv("OPENAI_MODEL", OPENAI_MODEL),
|
| 768 |
-
label="LLM model", placeholder="e.g., gpt-5 or gpt-5-mini"
|
| 769 |
-
)
|
| 770 |
-
temperature = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature")
|
| 771 |
-
|
| 772 |
-
# Availability banner
|
| 773 |
-
llm_status = gr.Markdown(
|
| 774 |
-
"**LLM status:** " + (
|
| 775 |
-
"✅ Ready — paraphrasing enabled by default." if LLM_AVAILABLE
|
| 776 |
-
else "⚠️ Disabled — set `OPENAI_API_KEY` to enable paraphrasing."
|
| 777 |
-
)
|
| 778 |
-
)
|
| 779 |
-
|
| 780 |
-
# Make the two checkboxes mutually exclusive
|
| 781 |
-
strict_quotes_only.change(
|
| 782 |
-
_strict_on, inputs=[strict_quotes_only, use_llm], outputs=use_llm
|
| 783 |
-
)
|
| 784 |
-
use_llm.change(
|
| 785 |
-
_llm_on, inputs=[use_llm, strict_quotes_only], outputs=strict_quotes_only
|
| 786 |
-
)
|
| 787 |
|
| 788 |
gr.ChatInterface(
|
| 789 |
fn=rag_chat_fn,
|
| 790 |
-
additional_inputs=[
|
| 791 |
-
|
|
|
|
|
|
|
|
|
|
| 792 |
title="Literature Q&A",
|
| 793 |
-
description="Hybrid retrieval with diversity. Answers carry inline (Doc, p.X) citations.
|
| 794 |
)
|
| 795 |
|
| 796 |
# ------------- Launch -------------
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
# Self-Sensing Concrete Assistant — Predictor (XGB) + Hybrid RAG
|
| 3 |
+
# - Predictor tab: identical behavior (kept)
|
| 4 |
+
# - Literature tab: Hybrid RAG; LLM runs silently when available
|
| 5 |
+
# - UX: no visible "LLM & Controls" window; prediction=0.0 if incomplete
|
|
|
|
| 6 |
# ================================================================
|
| 7 |
|
| 8 |
# ---------------------- Runtime flags (HF-safe) ----------------------
|
|
|
|
| 43 |
except Exception:
|
| 44 |
OpenAI = None
|
| 45 |
|
| 46 |
+
# LLM availability flag — used internally; UI remains hidden
|
| 47 |
LLM_AVAILABLE = (OPENAI_API_KEY is not None and OPENAI_API_KEY.strip() != "" and OpenAI is not None)
|
| 48 |
|
| 49 |
+
# ========================= Predictor (kept) =========================
|
| 50 |
CF_COL = "Conductive Filler Conc. (wt%)"
|
| 51 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 52 |
|
|
|
|
| 105 |
"Current Type"
|
| 106 |
}
|
| 107 |
|
|
|
|
| 108 |
OPTIONAL_FIELDS = {
|
| 109 |
"Filler 2 Type",
|
| 110 |
"Filler 2 Diameter (µm)",
|
|
|
|
| 148 |
return pd.DataFrame([row], columns=MAIN_VARIABLES)
|
| 149 |
|
| 150 |
def _is_complete(form_dict: dict) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
for col in MAIN_VARIABLES:
|
| 152 |
if col in OPTIONAL_FIELDS:
|
| 153 |
continue
|
|
|
|
| 169 |
return True
|
| 170 |
|
| 171 |
def predict_fn(**kwargs):
|
|
|
|
| 172 |
if not _is_complete(kwargs):
|
| 173 |
return 0.0
|
|
|
|
| 174 |
mdl = _load_model_or_error()
|
| 175 |
if isinstance(mdl, str):
|
| 176 |
return mdl
|
|
|
|
| 226 |
cleared.append("")
|
| 227 |
return cleared
|
| 228 |
|
| 229 |
+
# ========================= Hybrid RAG =========================
|
|
|
|
| 230 |
ARTIFACT_DIR = Path("rag_artifacts"); ARTIFACT_DIR.mkdir(exist_ok=True)
|
| 231 |
TFIDF_VECT_PATH = ARTIFACT_DIR / "tfidf_vectorizer.joblib"
|
| 232 |
TFIDF_MAT_PATH = ARTIFACT_DIR / "tfidf_matrix.joblib"
|
|
|
|
| 234 |
EMB_NPY_PATH = ARTIFACT_DIR / "chunk_embeddings.npy"
|
| 235 |
RAG_META_PATH = ARTIFACT_DIR / "chunks.parquet"
|
| 236 |
|
|
|
|
| 237 |
LOCAL_PDF_DIR = Path("papers"); LOCAL_PDF_DIR.mkdir(exist_ok=True)
|
| 238 |
USE_ONLINE_SOURCES = os.getenv("USE_ONLINE_SOURCES", "false").lower() == "true"
|
| 239 |
|
|
|
|
| 240 |
W_TFIDF_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 241 |
W_BM25_DEFAULT = 0.50 if not USE_DENSE else 0.30
|
| 242 |
W_EMB_DEFAULT = 0.00 if USE_DENSE is False else 0.40
|
| 243 |
|
|
|
|
| 244 |
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")
|
| 245 |
TOKEN_RE = re.compile(r"[A-Za-z0-9_#+\-/\.%]+")
|
| 246 |
def sent_split(text: str) -> List[str]:
|
|
|
|
| 249 |
def tokenize(text: str) -> List[str]:
|
| 250 |
return [t.lower() for t in TOKEN_RE.findall(text)]
|
| 251 |
|
|
|
|
| 252 |
def _extract_pdf_text(pdf_path: Path) -> str:
|
| 253 |
try:
|
| 254 |
import fitz
|
|
|
|
| 290 |
USE_DENSE = False
|
| 291 |
return None
|
| 292 |
|
|
|
|
| 293 |
def build_or_load_hybrid(pdf_dir: Path):
|
| 294 |
have_cache = (TFIDF_VECT_PATH.exists() and TFIDF_MAT_PATH.exists()
|
| 295 |
and RAG_META_PATH.exists()
|
|
|
|
| 342 |
print("Dense embedding failed:", e)
|
| 343 |
emb = None
|
| 344 |
|
|
|
|
| 345 |
joblib.dump(vectorizer, TFIDF_VECT_PATH)
|
| 346 |
+
joblib.dump(X_tfidF:=X_tfidf, TFIDF_MAT_PATH) # assign + save
|
| 347 |
if BM25Okapi is not None:
|
| 348 |
joblib.dump(all_tokens, BM25_TOK_PATH)
|
| 349 |
meta.to_parquet(RAG_META_PATH, index=False)
|
|
|
|
| 350 |
return vectorizer, X_tfidf, meta, all_tokens, emb
|
| 351 |
|
| 352 |
tfidf_vectorizer, tfidf_matrix, rag_meta, bm25_tokens, emb_matrix = build_or_load_hybrid(LOCAL_PDF_DIR)
|
|
|
|
| 354 |
st_query_model = _safe_init_st_model(os.getenv("EMB_MODEL_NAME", "sentence-transformers/all-MiniLM-L6-v2"))
|
| 355 |
|
| 356 |
def _extract_page(text_chunk: str) -> str:
|
| 357 |
+
m = list(re.finditer(r"\\[\\[PAGE=(\\d+)\\]\\]", text_chunk or ""))
|
| 358 |
return (m[-1].group(1) if m else "?")
|
| 359 |
|
| 360 |
def hybrid_search(query: str, k=8, w_tfidf=W_TFIDF_DEFAULT, w_bm25=W_BM25_DEFAULT, w_emb=W_EMB_DEFAULT):
|
|
|
|
| 383 |
|
| 384 |
# BM25 scores
|
| 385 |
if bm25 is not None:
|
| 386 |
+
q_tokens = [t.lower() for t in re.findall(r"[A-Za-z0-9_#+\\-\\/\\.%%]+", query)]
|
| 387 |
bm25_scores = np.array(bm25.get_scores(q_tokens), dtype=float)
|
| 388 |
else:
|
| 389 |
bm25_scores = np.zeros(len(rag_meta), dtype=float); w_bm25 = 0.0
|
|
|
|
| 518 |
srcs = {Path(r['doc_path']).name for _, r in hits.iterrows()}
|
| 519 |
coverage_note = "" if len(srcs) >= 3 else f"\n\n> Note: Only {len(srcs)} unique source(s) contributed. Add more PDFs or increase Top-K."
|
| 520 |
|
| 521 |
+
# Hidden policy: if strict==True → no paraphrasing; else try LLM if available
|
| 522 |
if strict_quotes_only:
|
| 523 |
if not selected:
|
| 524 |
return f"**Quoted Passages:**\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2]) + f"\n\n**Citations:** {header_cites}{coverage_note}"
|
|
|
|
| 568 |
except Exception as e:
|
| 569 |
return f"RAG error: {e}"
|
| 570 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
# ========================= UI (science-oriented styling) =========================
|
| 572 |
CSS = """
|
| 573 |
/* Science-oriented: crisp contrast + readable numerics */
|
|
|
|
| 579 |
label {color: #e8f7ff !important; text-shadow: 0 1px 0 rgba(0,0,0,0.35); cursor: pointer;}
|
| 580 |
input[type="number"] {font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;}
|
| 581 |
|
| 582 |
+
/* Checkbox clickability fixes */
|
| 583 |
input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !important; }
|
| 584 |
.gr-checkbox label, .gr-check-radio label { pointer-events: auto !important; cursor: pointer; }
|
| 585 |
#rag-tab input[type="checkbox"] { accent-color: #60a5fa !important; }
|
|
|
|
| 694 |
elem_classes=["prose"]
|
| 695 |
)
|
| 696 |
|
|
|
|
| 697 |
inputs_in_order = [
|
| 698 |
f1_type, f1_diam, f1_len, cf_conc,
|
| 699 |
f1_dim, f2_type, f2_diam, f2_len,
|
|
|
|
| 715 |
with gr.Tab("📚 Ask the Literature (Hybrid RAG + MMR)", elem_id="rag-tab"):
|
| 716 |
gr.Markdown(
|
| 717 |
"Upload PDFs into the repository folder <code>papers/</code> then reload the Space. "
|
| 718 |
+
"Answers cite (Doc.pdf, p.X)."
|
| 719 |
)
|
| 720 |
with gr.Row():
|
| 721 |
top_k = gr.Slider(5, 12, value=8, step=1, label="Top-K chunks")
|
|
|
|
| 727 |
w_bm25 = gr.Slider(0.0, 1.0, value=W_BM25_DEFAULT, step=0.05, label="BM25 weight")
|
| 728 |
w_emb = gr.Slider(0.0, 1.0, value=(0.0 if not USE_DENSE else 0.40), step=0.05, label="Dense weight (set 0 if disabled)")
|
| 729 |
|
| 730 |
+
# ---- Hidden states for LLM behavior (no visible controls) ----
|
| 731 |
+
state_use_llm = gr.State(LLM_AVAILABLE) # True when key present; else False
|
| 732 |
+
state_model_name = gr.State(os.getenv("OPENAI_MODEL", OPENAI_MODEL))
|
| 733 |
+
state_temperature = gr.State(0.2)
|
| 734 |
+
state_strict = gr.State(False) # hidden: default to not-strict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 735 |
|
| 736 |
gr.ChatInterface(
|
| 737 |
fn=rag_chat_fn,
|
| 738 |
+
additional_inputs=[
|
| 739 |
+
top_k, n_sentences, include_passages,
|
| 740 |
+
state_use_llm, state_model_name, state_temperature, state_strict,
|
| 741 |
+
w_tfidf, w_bm25, w_emb
|
| 742 |
+
],
|
| 743 |
title="Literature Q&A",
|
| 744 |
+
description="Hybrid retrieval with diversity. Answers carry inline (Doc, p.X) citations."
|
| 745 |
)
|
| 746 |
|
| 747 |
# ------------- Launch -------------
|