Spaces:
Running
Running
Update app.py
#14
by
kmanch3 - opened
app.py
CHANGED
|
@@ -15,6 +15,7 @@ os.environ["TRANSFORMERS_NO_TF"] = "1"
|
|
| 15 |
os.environ["TRANSFORMERS_NO_FLAX"] = "1"
|
| 16 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 17 |
|
|
|
|
| 18 |
# ------------------------------- Imports ------------------------------
|
| 19 |
import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
|
| 20 |
from pathlib import Path
|
|
@@ -24,6 +25,32 @@ import numpy as np
|
|
| 24 |
import pandas as pd
|
| 25 |
import gradio as gr
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
warnings.filterwarnings("ignore", category=UserWarning)
|
| 28 |
|
| 29 |
# Optional deps (handled gracefully if missing)
|
|
@@ -55,6 +82,7 @@ CF_COL = "Conductive Filler Conc. (wt%)"
|
|
| 55 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 56 |
CANON_NA = "NA" # canonical placeholder for categoricals
|
| 57 |
|
|
|
|
| 58 |
TYPE_CHOICES = [
|
| 59 |
"CNT",
|
| 60 |
"Brass fiber",
|
|
@@ -82,6 +110,34 @@ TYPE_CHOICES = [
|
|
| 82 |
CANON_NA
|
| 83 |
]
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
MAIN_VARIABLES = [
|
| 86 |
"Filler 1 Type",
|
| 87 |
"Filler 1 Diameter (µm)",
|
|
@@ -108,6 +164,40 @@ MAIN_VARIABLES = [
|
|
| 108 |
"Applied Voltage (V)"
|
| 109 |
]
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
NUMERIC_COLS = {
|
| 112 |
"Filler 1 Diameter (µm)",
|
| 113 |
"Filler 1 Length (mm)",
|
|
@@ -653,7 +743,6 @@ def synthesize_with_llm(question: str, sentence_lines: List[str], model: str = N
|
|
| 653 |
return out_text, usage
|
| 654 |
except Exception:
|
| 655 |
return None, None
|
| 656 |
-
|
| 657 |
def rag_reply(
|
| 658 |
question: str,
|
| 659 |
k: int = 8,
|
|
@@ -678,29 +767,14 @@ def rag_reply(
|
|
| 678 |
|
| 679 |
if hits is None or hits.empty:
|
| 680 |
final = "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
|
| 681 |
-
record = {
|
| 682 |
-
"run_id": run_id,
|
| 683 |
-
"ts": int(time.time()*1000),
|
| 684 |
-
"inputs": {
|
| 685 |
-
"question": question, "top_k": int(k), "n_sentences": int(n_sentences),
|
| 686 |
-
"w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
|
| 687 |
-
"use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
|
| 688 |
-
},
|
| 689 |
-
"retrieval": {"hits": [], "latency_ms_retriever": latency_ms_retriever},
|
| 690 |
-
"output": {"final_answer": final, "used_sentences": []},
|
| 691 |
-
"latency_ms_total": int((time.time()-t0_total)*1000),
|
| 692 |
-
"openai": None
|
| 693 |
-
}
|
| 694 |
-
_safe_write_jsonl(LOG_PATH, record)
|
| 695 |
return final
|
| 696 |
|
| 697 |
# Select sentences
|
| 698 |
selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
|
| 699 |
|
| 700 |
-
# Header citations
|
| 701 |
from urllib.parse import quote
|
| 702 |
-
|
| 703 |
-
|
| 704 |
|
| 705 |
header_links = []
|
| 706 |
unique_codes = set()
|
|
@@ -710,10 +784,12 @@ def rag_reply(
|
|
| 710 |
filename = Path(doc_path).name
|
| 711 |
short_code = _short_doc_code(doc_path)
|
| 712 |
|
| 713 |
-
# ✅
|
| 714 |
-
|
| 715 |
-
href = f"/file={
|
| 716 |
-
|
|
|
|
|
|
|
| 717 |
|
| 718 |
if short_code not in unique_codes:
|
| 719 |
header_links.append(link)
|
|
@@ -721,123 +797,31 @@ def rag_reply(
|
|
| 721 |
|
| 722 |
header_cites = "; ".join(header_links)
|
| 723 |
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
|
|
|
| 728 |
|
| 729 |
-
#
|
| 730 |
-
retr_list = []
|
| 731 |
-
for _, r in hits.iterrows():
|
| 732 |
-
retr_list.append({
|
| 733 |
-
"doc": Path(r["doc_path"]).name,
|
| 734 |
-
"page": _extract_page(r["text"]),
|
| 735 |
-
"score_tfidf": float(r.get("score_tfidf", 0.0)),
|
| 736 |
-
"score_bm25": float(r.get("score_bm25", 0.0)),
|
| 737 |
-
"score_dense": float(r.get("score_dense", 0.0)),
|
| 738 |
-
"combo_score": float(r.get("score", 0.0)),
|
| 739 |
-
})
|
| 740 |
-
|
| 741 |
-
# Strict quotes only (no LLM)
|
| 742 |
-
if strict_quotes_only:
|
| 743 |
-
if not selected:
|
| 744 |
-
final = (
|
| 745 |
-
"**Quoted Passages:**\n\n---\n" +
|
| 746 |
-
"\n\n".join(hits['text'].tolist()[:2]) +
|
| 747 |
-
f"\n\n**Citations:** {header_cites}{coverage_note}"
|
| 748 |
-
)
|
| 749 |
-
else:
|
| 750 |
-
bullets = "\n- ".join(f"{s['sent']} ({s['doc']})" for s in selected)
|
| 751 |
-
final = f"**Quoted Passages:**\n- {bullets}\n\n**Citations:** {header_cites}{coverage_note}"
|
| 752 |
-
if include_passages:
|
| 753 |
-
final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
|
| 754 |
-
|
| 755 |
-
record = {
|
| 756 |
-
"run_id": run_id,
|
| 757 |
-
"ts": int(time.time()*1000),
|
| 758 |
-
"inputs": {
|
| 759 |
-
"question": question, "top_k": int(k), "n_sentences": int(n_sentences),
|
| 760 |
-
"w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
|
| 761 |
-
"use_llm": False, "model": None, "temperature": float(temperature)
|
| 762 |
-
},
|
| 763 |
-
"retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
|
| 764 |
-
"output": {
|
| 765 |
-
"final_answer": final,
|
| 766 |
-
"used_sentences": [{"sent": s["sent"], "doc": s["doc"], "page": s["page"]} for s in selected]
|
| 767 |
-
},
|
| 768 |
-
"latency_ms_total": int((time.time()-t0_total)*1000),
|
| 769 |
-
"openai": None
|
| 770 |
-
}
|
| 771 |
-
_safe_write_jsonl(LOG_PATH, record)
|
| 772 |
-
return final
|
| 773 |
|
| 774 |
-
#
|
| 775 |
extractive = compose_extractive(selected)
|
| 776 |
-
|
| 777 |
-
llm_latency_ms = None
|
| 778 |
if use_llm and selected:
|
| 779 |
-
# Lines already carry short-code citations, e.g. "... (S92)"
|
| 780 |
lines = [f"{s['sent']} ({s['doc']})" for s in selected]
|
| 781 |
-
|
| 782 |
-
llm_text, llm_usage = synthesize_with_llm(question, lines, model=model, temperature=temperature)
|
| 783 |
-
t1_llm = time.time()
|
| 784 |
-
llm_latency_ms = int((t1_llm - t0_llm) * 1000)
|
| 785 |
-
|
| 786 |
if llm_text:
|
| 787 |
final = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
|
| 788 |
-
if include_passages:
|
| 789 |
-
final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
|
| 790 |
else:
|
| 791 |
-
|
| 792 |
-
final = (
|
| 793 |
-
f"**Answer:** Here are relevant passages.\n\n"
|
| 794 |
-
f"**Citations:** {header_cites}{coverage_note}\n\n---\n" +
|
| 795 |
-
"\n\n".join(hits['text'].tolist()[:2])
|
| 796 |
-
)
|
| 797 |
-
else:
|
| 798 |
-
final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
|
| 799 |
-
if include_passages:
|
| 800 |
-
final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
|
| 801 |
else:
|
| 802 |
if not extractive:
|
| 803 |
-
final = (
|
| 804 |
-
f"**Answer:** Here are relevant passages.\n\n"
|
| 805 |
-
f"**Citations:** {header_cites}{coverage_note}\n\n---\n" +
|
| 806 |
-
"\n\n".join(hits['text'].tolist()[:2])
|
| 807 |
-
)
|
| 808 |
else:
|
| 809 |
final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
# --------- Log full run ---------
|
| 814 |
-
prompt_toks = llm_usage.get("prompt_tokens") if llm_usage else None
|
| 815 |
-
completion_toks = llm_usage.get("completion_tokens") if llm_usage else None
|
| 816 |
-
cost_usd = _calc_cost_usd(prompt_toks, completion_toks)
|
| 817 |
-
|
| 818 |
-
total_ms = int((time.time() - t0_total) * 1000)
|
| 819 |
-
record = {
|
| 820 |
-
"run_id": run_id,
|
| 821 |
-
"ts": int(time.time()*1000),
|
| 822 |
-
"inputs": {
|
| 823 |
-
"question": question, "top_k": int(k), "n_sentences": int(n_sentences),
|
| 824 |
-
"w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
|
| 825 |
-
"use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
|
| 826 |
-
},
|
| 827 |
-
"retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
|
| 828 |
-
"output": {
|
| 829 |
-
"final_answer": final,
|
| 830 |
-
"used_sentences": [{"sent": s['sent'], "doc": s['doc'], "page": s['page']} for s in selected]
|
| 831 |
-
},
|
| 832 |
-
"latency_ms_total": total_ms,
|
| 833 |
-
"latency_ms_llm": llm_latency_ms,
|
| 834 |
-
"openai": {
|
| 835 |
-
"prompt_tokens": prompt_toks,
|
| 836 |
-
"completion_tokens": completion_toks,
|
| 837 |
-
"cost_usd": cost_usd
|
| 838 |
-
} if use_llm else None
|
| 839 |
-
}
|
| 840 |
-
_safe_write_jsonl(LOG_PATH, record)
|
| 841 |
return final
|
| 842 |
|
| 843 |
def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
|
|
@@ -1075,8 +1059,11 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
|
|
| 1075 |
|
| 1076 |
/* --- THE UNIVERSAL DROPDOWN OVERRIDE --- */
|
| 1077 |
|
| 1078 |
-
/* 1. All boxes show white text on the dark background */
|
| 1079 |
#filler-dropdown .single-select, #filler-dropdown input,
|
|
|
|
|
|
|
|
|
|
| 1080 |
#dim-dropdown .single-select, #dim-dropdown input,
|
| 1081 |
#dim2-dropdown .single-select, #dim2-dropdown input,
|
| 1082 |
#current-dropdown .single-select, #current-dropdown input {
|
|
@@ -1086,14 +1073,20 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
|
|
| 1086 |
|
| 1087 |
/* 2. All dropdown menus (the pop-outs) have a white background */
|
| 1088 |
#filler-dropdown .options,
|
|
|
|
|
|
|
|
|
|
| 1089 |
#dim-dropdown .options,
|
| 1090 |
#dim2-dropdown .options,
|
| 1091 |
#current-dropdown .options {
|
| 1092 |
background-color: #ffffff !important;
|
| 1093 |
}
|
| 1094 |
|
| 1095 |
-
/* 3. All items in the lists are forced to PURE BLACK */
|
| 1096 |
#filler-dropdown .item, #filler-dropdown .item span,
|
|
|
|
|
|
|
|
|
|
| 1097 |
#dim-dropdown .item, #dim-dropdown .item span,
|
| 1098 |
#dim2-dropdown .item, #dim2-dropdown .item span,
|
| 1099 |
#current-dropdown .item, #current-dropdown .item span,
|
|
@@ -1102,7 +1095,13 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
|
|
| 1102 |
-webkit-text-fill-color: #000000 !important;
|
| 1103 |
}
|
| 1104 |
|
| 1105 |
-
/* 4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1106 |
.gr-dropdown .item:hover {
|
| 1107 |
background-color: #dbeafe !important;
|
| 1108 |
}
|
|
@@ -1171,14 +1170,14 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 1171 |
f1_dim = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 1 Dimensionality *",elem_id="dim-dropdown")
|
| 1172 |
|
| 1173 |
with gr.Accordion("Secondary filler (optional)", open=False, elem_classes=["card"]):
|
| 1174 |
-
f2_type = gr.
|
| 1175 |
f2_diam = gr.Number(label="Filler 2 Diameter (µm)")
|
| 1176 |
f2_len = gr.Number(label="Filler 2 Length (mm)")
|
| 1177 |
f2_dim = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 2 Dimensionality", elem_id="dim2-dropdown")
|
| 1178 |
with gr.Accordion("Mix design & specimen", open=False, elem_classes=["card"]):
|
| 1179 |
spec_vol = gr.Number(label="Specimen Volume (mm3) *")
|
| 1180 |
-
probe_cnt = gr.
|
| 1181 |
-
probe_mat = gr.
|
| 1182 |
wb = gr.Number(label="W/B *")
|
| 1183 |
sb = gr.Number(label="S/B *")
|
| 1184 |
gauge_len = gr.Number(label="Gauge Length (mm) *")
|
|
@@ -1324,19 +1323,18 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 1324 |
|
| 1325 |
# ------------- Launch -------------
|
| 1326 |
if __name__ == "__main__":
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1330 |
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
|
| 1334 |
-
# 2. Everything below here only runs AFTER the server stops
|
| 1335 |
-
# (or might not run at all depending on how the server handles the exit)
|
| 1336 |
-
import os as _os
|
| 1337 |
-
import pandas as _pd
|
| 1338 |
-
folder = "papers"
|
| 1339 |
-
files = sorted(_os.listdir(folder)) if _os.path.exists(folder) else []
|
| 1340 |
-
_pd.DataFrame({"doc": files}).to_csv("paper_list.csv", index=False)
|
| 1341 |
-
print("✅ Saved paper_list.csv with", len(files), "papers")
|
| 1342 |
|
|
|
|
|
|
|
|
|
| 15 |
os.environ["TRANSFORMERS_NO_FLAX"] = "1"
|
| 16 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 17 |
|
| 18 |
+
|
| 19 |
# ------------------------------- Imports ------------------------------
|
| 20 |
import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
|
| 21 |
from pathlib import Path
|
|
|
|
| 25 |
import pandas as pd
|
| 26 |
import gradio as gr
|
| 27 |
|
| 28 |
+
SOURCES_CSV = "sources_fixed.csv"
|
| 29 |
+
|
| 30 |
+
def load_sources_map(csv_path=SOURCES_CSV):
|
| 31 |
+
"""
|
| 32 |
+
Returns dict keyed by filename stem (id) with url + citation.
|
| 33 |
+
Example key: 'S92-Research-on-the-self-sensing...' (no .pdf)
|
| 34 |
+
"""
|
| 35 |
+
if not os.path.exists(csv_path):
|
| 36 |
+
print(f"[Sources] Missing {csv_path}")
|
| 37 |
+
return {}
|
| 38 |
+
|
| 39 |
+
df = pd.read_csv(csv_path).fillna("")
|
| 40 |
+
df.columns = df.columns.str.strip()
|
| 41 |
+
|
| 42 |
+
src = {}
|
| 43 |
+
for _, r in df.iterrows():
|
| 44 |
+
_id = str(r.get("id", "")).strip()
|
| 45 |
+
url = str(r.get("url", "")).strip()
|
| 46 |
+
cit = str(r.get("citation", "")).strip()
|
| 47 |
+
if _id:
|
| 48 |
+
src[_id] = {"url": url, "citation": cit}
|
| 49 |
+
print(f"[Sources] Loaded {len(src)} sources from {csv_path}")
|
| 50 |
+
return src
|
| 51 |
+
|
| 52 |
+
SOURCES_MAP = load_sources_map()
|
| 53 |
+
|
| 54 |
warnings.filterwarnings("ignore", category=UserWarning)
|
| 55 |
|
| 56 |
# Optional deps (handled gracefully if missing)
|
|
|
|
| 82 |
TARGET_COL = "Stress GF (MPa-1)"
|
| 83 |
CANON_NA = "NA" # canonical placeholder for categoricals
|
| 84 |
|
| 85 |
+
|
| 86 |
TYPE_CHOICES = [
|
| 87 |
"CNT",
|
| 88 |
"Brass fiber",
|
|
|
|
| 110 |
CANON_NA
|
| 111 |
]
|
| 112 |
|
| 113 |
+
TYPE_CHOICES_2 = [
|
| 114 |
+
"None",
|
| 115 |
+
"CNT",
|
| 116 |
+
"Brass fiber",
|
| 117 |
+
"GNP",
|
| 118 |
+
"Steel fiber",
|
| 119 |
+
"Carbon fiber",
|
| 120 |
+
"Graphene oxide",
|
| 121 |
+
"Graphene",
|
| 122 |
+
"Carbon black",
|
| 123 |
+
"Graphite",
|
| 124 |
+
"Shungite",
|
| 125 |
+
"Nickel powder",
|
| 126 |
+
"Glass cullet",
|
| 127 |
+
"MWCNT",
|
| 128 |
+
"Nano carbon black",
|
| 129 |
+
"Carbon powder",
|
| 130 |
+
"Gasification char",
|
| 131 |
+
"Used foundry sand",
|
| 132 |
+
"Nickel fiber",
|
| 133 |
+
"Nickel aggregate",
|
| 134 |
+
"Steel slag aggregate",
|
| 135 |
+
"TiO2",
|
| 136 |
+
"Carbonyl iron powder",
|
| 137 |
+
"Magnetite aggregate",
|
| 138 |
+
CANON_NA
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
MAIN_VARIABLES = [
|
| 142 |
"Filler 1 Type",
|
| 143 |
"Filler 1 Diameter (µm)",
|
|
|
|
| 164 |
"Applied Voltage (V)"
|
| 165 |
]
|
| 166 |
|
| 167 |
+
PROBE_COUNT_CHOICES = ["2", "4", CANON_NA]
|
| 168 |
+
|
| 169 |
+
PROBE_CHOICES = [
|
| 170 |
+
"Copper mesh",
|
| 171 |
+
"Copper plates",
|
| 172 |
+
"Copper wire",
|
| 173 |
+
"Copper wire wrapped with silver paint at both ends",
|
| 174 |
+
"Copper wire bonded with conductive adhesive",
|
| 175 |
+
"Copper foil with silver paste",
|
| 176 |
+
"Copper tape",
|
| 177 |
+
"Copper E shape plate",
|
| 178 |
+
"Copper coated in silver paste",
|
| 179 |
+
"Copper, silver paste coating",
|
| 180 |
+
"Copper sheets attached on parallel surfaces of cube",
|
| 181 |
+
"Copper tape with conductive adhesive and copper wire",
|
| 182 |
+
"Stainless steel mesh",
|
| 183 |
+
"Stainless steel nets",
|
| 184 |
+
"Stainless steel gauze",
|
| 185 |
+
"Stainless steel electrode nets",
|
| 186 |
+
"Stainless steel bolt connected to copper wire",
|
| 187 |
+
"#6 stainless steel grides",
|
| 188 |
+
"Steel sheet with 3mm hole diameter",
|
| 189 |
+
"Wire mesh",
|
| 190 |
+
"Metallic (General)",
|
| 191 |
+
"Conductive adhesive type",
|
| 192 |
+
"Silver conductive adhesive",
|
| 193 |
+
"Polyester conductive adhesive tape with silver coating",
|
| 194 |
+
"Black titanium mesh",
|
| 195 |
+
"Titanium",
|
| 196 |
+
"Aluminum",
|
| 197 |
+
"Cement injected columns",
|
| 198 |
+
"None",
|
| 199 |
+
CANON_NA
|
| 200 |
+
]
|
| 201 |
NUMERIC_COLS = {
|
| 202 |
"Filler 1 Diameter (µm)",
|
| 203 |
"Filler 1 Length (mm)",
|
|
|
|
| 743 |
return out_text, usage
|
| 744 |
except Exception:
|
| 745 |
return None, None
|
|
|
|
| 746 |
def rag_reply(
|
| 747 |
question: str,
|
| 748 |
k: int = 8,
|
|
|
|
| 767 |
|
| 768 |
if hits is None or hits.empty:
|
| 769 |
final = "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
return final
|
| 771 |
|
| 772 |
# Select sentences
|
| 773 |
selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
|
| 774 |
|
| 775 |
+
# --- Header citations Logic ---
|
| 776 |
from urllib.parse import quote
|
| 777 |
+
from pathlib import Path
|
|
|
|
| 778 |
|
| 779 |
header_links = []
|
| 780 |
unique_codes = set()
|
|
|
|
| 784 |
filename = Path(doc_path).name
|
| 785 |
short_code = _short_doc_code(doc_path)
|
| 786 |
|
| 787 |
+
# ✅ FIX 1: Convert to Absolute Path String (Standard for Windows local hosting)
|
| 788 |
+
abs_pdf_path = str(Path(doc_path).resolve())
|
| 789 |
+
href = f"/file={abs_pdf_path}"
|
| 790 |
+
|
| 791 |
+
# ✅ FIX 2: Reverted to WHITE for your dark-blue theme
|
| 792 |
+
link = f'<a href="{href}" target="_blank" rel="noopener noreferrer" style="color: white; font-weight: bold; text-decoration: underline;">{short_code}</a>'
|
| 793 |
|
| 794 |
if short_code not in unique_codes:
|
| 795 |
header_links.append(link)
|
|
|
|
| 797 |
|
| 798 |
header_cites = "; ".join(header_links)
|
| 799 |
|
| 800 |
+
# ✅ FIX 3: Define coverage_note to prevent NameError crash
|
| 801 |
+
if len(unique_codes) < 3:
|
| 802 |
+
coverage_note = f"\n\n> Note: Only {len(unique_codes)} unique source(s) contributed. Add more PDFs or increase Top-K."
|
| 803 |
+
else:
|
| 804 |
+
coverage_note = ""
|
| 805 |
|
| 806 |
+
# ... (Keep your existing retr_list logging logic here) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 807 |
|
| 808 |
+
# --- Construct Final Output ---
|
| 809 |
extractive = compose_extractive(selected)
|
| 810 |
+
|
|
|
|
| 811 |
if use_llm and selected:
|
|
|
|
| 812 |
lines = [f"{s['sent']} ({s['doc']})" for s in selected]
|
| 813 |
+
llm_text, _ = synthesize_with_llm(question, lines, model=model, temperature=temperature)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 814 |
if llm_text:
|
| 815 |
final = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
|
|
|
|
|
|
|
| 816 |
else:
|
| 817 |
+
final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
else:
|
| 819 |
if not extractive:
|
| 820 |
+
final = f"**Answer:** Here are relevant passages.\n\n**Citations:** {header_cites}{coverage_note}\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 821 |
else:
|
| 822 |
final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
|
| 823 |
+
|
| 824 |
+
# Return the final string as your UI expects
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
return final
|
| 826 |
|
| 827 |
def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
|
|
|
|
| 1059 |
|
| 1060 |
/* --- THE UNIVERSAL DROPDOWN OVERRIDE --- */
|
| 1061 |
|
| 1062 |
+
/* 1. All boxes show white text on the dark background (Selection View) */
|
| 1063 |
#filler-dropdown .single-select, #filler-dropdown input,
|
| 1064 |
+
#filler2-dropdown .single-select, #filler2-dropdown input,
|
| 1065 |
+
#probe-dropdown .single-select, #probe-dropdown input,
|
| 1066 |
+
#probe-count-dropdown .single-select, #probe-count-dropdown input,
|
| 1067 |
#dim-dropdown .single-select, #dim-dropdown input,
|
| 1068 |
#dim2-dropdown .single-select, #dim2-dropdown input,
|
| 1069 |
#current-dropdown .single-select, #current-dropdown input {
|
|
|
|
| 1073 |
|
| 1074 |
/* 2. All dropdown menus (the pop-outs) have a white background */
|
| 1075 |
#filler-dropdown .options,
|
| 1076 |
+
#filler2-dropdown .options,
|
| 1077 |
+
#probe-dropdown .options,
|
| 1078 |
+
#probe-count-dropdown .options,
|
| 1079 |
#dim-dropdown .options,
|
| 1080 |
#dim2-dropdown .options,
|
| 1081 |
#current-dropdown .options {
|
| 1082 |
background-color: #ffffff !important;
|
| 1083 |
}
|
| 1084 |
|
| 1085 |
+
/* 3. All items in the lists are forced to PURE BLACK (The Dropdown List) */
|
| 1086 |
#filler-dropdown .item, #filler-dropdown .item span,
|
| 1087 |
+
#filler2-dropdown .item, #filler2-dropdown .item span,
|
| 1088 |
+
#probe-dropdown .item, #probe-dropdown .item span,
|
| 1089 |
+
#probe-count-dropdown .item, #probe-count-dropdown .item span,
|
| 1090 |
#dim-dropdown .item, #dim-dropdown .item span,
|
| 1091 |
#dim2-dropdown .item, #dim2-dropdown .item span,
|
| 1092 |
#current-dropdown .item, #current-dropdown .item span,
|
|
|
|
| 1095 |
-webkit-text-fill-color: #000000 !important;
|
| 1096 |
}
|
| 1097 |
|
| 1098 |
+
/* 4. Probe Count Info Text - Forest Green Override (Replaces Neon) */
|
| 1099 |
+
#probe-count-dropdown .info {
|
| 1100 |
+
color: #2e7d32 !important;
|
| 1101 |
+
font-weight: 500;
|
| 1102 |
+
}
|
| 1103 |
+
|
| 1104 |
+
/* 5. Hover effect for all dropdowns */
|
| 1105 |
.gr-dropdown .item:hover {
|
| 1106 |
background-color: #dbeafe !important;
|
| 1107 |
}
|
|
|
|
| 1170 |
f1_dim = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 1 Dimensionality *",elem_id="dim-dropdown")
|
| 1171 |
|
| 1172 |
with gr.Accordion("Secondary filler (optional)", open=False, elem_classes=["card"]):
|
| 1173 |
+
f2_type = gr.Dropdown(choices=TYPE_CHOICES_2, label="Filler 2 Type (Optional)", value="None", allow_custom_value=True, elem_id="filler2-dropdown")
|
| 1174 |
f2_diam = gr.Number(label="Filler 2 Diameter (µm)")
|
| 1175 |
f2_len = gr.Number(label="Filler 2 Length (mm)")
|
| 1176 |
f2_dim = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 2 Dimensionality", elem_id="dim2-dropdown")
|
| 1177 |
with gr.Accordion("Mix design & specimen", open=False, elem_classes=["card"]):
|
| 1178 |
spec_vol = gr.Number(label="Specimen Volume (mm3) *")
|
| 1179 |
+
probe_cnt = gr.Dropdown(choices=["2", "4", CANON_NA],label="Probe Count *",info="2-probe includes contact resistance; 4-probe isolates material resistivity.", value="4", allow_custom_value=False, elem_id="probe-count-dropdown")
|
| 1180 |
+
probe_mat = gr.Dropdown(choices=PROBE_CHOICES, label="Probe Material *", value="Copper mesh", allow_custom_value=True, elem_id="probe-dropdown")
|
| 1181 |
wb = gr.Number(label="W/B *")
|
| 1182 |
sb = gr.Number(label="S/B *")
|
| 1183 |
gauge_len = gr.Number(label="Gauge Length (mm) *")
|
|
|
|
| 1323 |
|
| 1324 |
# ------------- Launch -------------
|
| 1325 |
if __name__ == "__main__":
|
| 1326 |
+
import os
|
| 1327 |
+
from pathlib import Path
|
| 1328 |
+
|
| 1329 |
+
# Find the papers folder relative to this script
|
| 1330 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 1331 |
+
papers_dir = os.path.join(current_dir, "papers")
|
| 1332 |
+
|
| 1333 |
+
# Force resolve to absolute path for the Gradio whitelist
|
| 1334 |
+
abs_papers_path = str(Path(papers_dir).resolve())
|
| 1335 |
|
| 1336 |
+
print(f"🚀 SYSTEM READY")
|
| 1337 |
+
print(f"✅ Whitelisting folder: {abs_papers_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1338 |
|
| 1339 |
+
# Launch with the correct security permissions
|
| 1340 |
+
demo.launch(allowed_paths=[abs_papers_path, current_dir])
|