Spaces:

Inframat-x
/

ML-Chatbot

Running

App Files Files Community

Update app.py

#14

by kmanch3 - opened 2 days ago

base: refs/heads/main

←

from: refs/pr/14

Discussion Files changed

+145

-147

Files changed (1) hide show

app.py +145 -147

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ os.environ["TRANSFORMERS_NO_TF"] = "1"
 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ------------------------------- Imports ------------------------------
 import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
 from pathlib import Path
@@ -24,6 +25,32 @@ import numpy as np
 import pandas as pd
 import gradio as gr
 warnings.filterwarnings("ignore", category=UserWarning)
 # Optional deps (handled gracefully if missing)
@@ -55,6 +82,7 @@ CF_COL     = "Conductive Filler Conc. (wt%)"
 TARGET_COL = "Stress GF (MPa-1)"
 CANON_NA   = "NA"  # canonical placeholder for categoricals
 TYPE_CHOICES = [
     "CNT",
     "Brass fiber",
@@ -82,6 +110,34 @@ TYPE_CHOICES = [
     CANON_NA
 ]
 MAIN_VARIABLES = [
     "Filler 1 Type",
     "Filler 1 Diameter (µm)",
@@ -108,6 +164,40 @@ MAIN_VARIABLES = [
     "Applied Voltage (V)"
 ]
 NUMERIC_COLS = {
     "Filler 1 Diameter (µm)",
     "Filler 1 Length (mm)",
@@ -653,7 +743,6 @@ def synthesize_with_llm(question: str, sentence_lines: List[str], model: str = N
         return out_text, usage
     except Exception:
         return None, None
 def rag_reply(
     question: str,
     k: int = 8,
@@ -678,29 +767,14 @@ def rag_reply(
     if hits is None or hits.empty:
         final = "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
-        record = {
-            "run_id": run_id,
-            "ts": int(time.time()*1000),
-            "inputs": {
-                "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
-                "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
-                "use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
-            },
-            "retrieval": {"hits": [], "latency_ms_retriever": latency_ms_retriever},
-            "output": {"final_answer": final, "used_sentences": []},
-            "latency_ms_total": int((time.time()-t0_total)*1000),
-            "openai": None
-        }
-        _safe_write_jsonl(LOG_PATH, record)
         return final
     # Select sentences
     selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
-    # Header citations: short codes only, joined by '; ' (e.g., "S55; S71; S92")
     from urllib.parse import quote
     header_links = []
     unique_codes = set()
@@ -710,10 +784,12 @@ def rag_reply(
         filename = Path(doc_path).name
         short_code = _short_doc_code(doc_path)
-        # ✅ Correct Gradio route is /file= (NOT /file/)
-        abs_pdf = (LOCAL_PDF_DIR / filename).resolve()
-        href = f"/file={quote('papers/' + filename)}"
-        link = f'<a href="/file={quote("papers/" + filename)}" target="_blank" rel="noopener noreferrer">{short_code}</a>'
         if short_code not in unique_codes:
             header_links.append(link)
@@ -721,123 +797,31 @@ def rag_reply(
     header_cites = "; ".join(header_links)
-    coverage_note = "" if len(unique_codes) >= 3 else (
-    f"\n\n> Note: Only {len(unique_codes)} unique source(s) contributed. "
-    "Add more PDFs or increase Top-K."
-)
-    # Prepare retrieval list for logging (full filenames kept here)
-    retr_list = []
-    for _, r in hits.iterrows():
-        retr_list.append({
-            "doc": Path(r["doc_path"]).name,
-            "page": _extract_page(r["text"]),
-            "score_tfidf": float(r.get("score_tfidf", 0.0)),
-            "score_bm25": float(r.get("score_bm25", 0.0)),
-            "score_dense": float(r.get("score_dense", 0.0)),
-            "combo_score": float(r.get("score", 0.0)),
-        })
-    # Strict quotes only (no LLM)
-    if strict_quotes_only:
-        if not selected:
-            final = (
-                "**Quoted Passages:**\n\n---\n" +
-                "\n\n".join(hits['text'].tolist()[:2]) +
-                f"\n\n**Citations:** {header_cites}{coverage_note}"
-            )
-        else:
-            bullets = "\n- ".join(f"{s['sent']} ({s['doc']})" for s in selected)
-            final = f"**Quoted Passages:**\n- {bullets}\n\n**Citations:** {header_cites}{coverage_note}"
-            if include_passages:
-                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-        record = {
-            "run_id": run_id,
-            "ts": int(time.time()*1000),
-            "inputs": {
-                "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
-                "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
-                "use_llm": False, "model": None, "temperature": float(temperature)
-            },
-            "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
-            "output": {
-                "final_answer": final,
-                "used_sentences": [{"sent": s["sent"], "doc": s["doc"], "page": s["page"]} for s in selected]
-            },
-            "latency_ms_total": int((time.time()-t0_total)*1000),
-            "openai": None
-        }
-        _safe_write_jsonl(LOG_PATH, record)
-        return final
-    # Extractive or LLM synthesis
     extractive = compose_extractive(selected)
-    llm_usage = None
-    llm_latency_ms = None
     if use_llm and selected:
-        # Lines already carry short-code citations, e.g. "... (S92)"
         lines = [f"{s['sent']} ({s['doc']})" for s in selected]
-        t0_llm = time.time()
-        llm_text, llm_usage = synthesize_with_llm(question, lines, model=model, temperature=temperature)
-        t1_llm = time.time()
-        llm_latency_ms = int((t1_llm - t0_llm) * 1000)
         if llm_text:
             final = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
-            if include_passages:
-                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
         else:
-            if not extractive:
-                final = (
-                    f"**Answer:** Here are relevant passages.\n\n"
-                    f"**Citations:** {header_cites}{coverage_note}\n\n---\n" +
-                    "\n\n".join(hits['text'].tolist()[:2])
-                )
-            else:
-                final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
-                if include_passages:
-                    final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
     else:
         if not extractive:
-            final = (
-                f"**Answer:** Here are relevant passages.\n\n"
-                f"**Citations:** {header_cites}{coverage_note}\n\n---\n" +
-                "\n\n".join(hits['text'].tolist()[:2])
-            )
         else:
             final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
-            if include_passages:
-                final += "\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
-    # --------- Log full run ---------
-    prompt_toks = llm_usage.get("prompt_tokens") if llm_usage else None
-    completion_toks = llm_usage.get("completion_tokens") if llm_usage else None
-    cost_usd = _calc_cost_usd(prompt_toks, completion_toks)
-    total_ms = int((time.time() - t0_total) * 1000)
-    record = {
-        "run_id": run_id,
-        "ts": int(time.time()*1000),
-        "inputs": {
-            "question": question, "top_k": int(k), "n_sentences": int(n_sentences),
-            "w_tfidf": float(w_tfidf), "w_bm25": float(w_bm25), "w_emb": float(w_emb),
-            "use_llm": bool(use_llm), "model": model, "temperature": float(temperature)
-        },
-        "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
-        "output": {
-            "final_answer": final,
-            "used_sentences": [{"sent": s['sent'], "doc": s['doc'], "page": s['page']} for s in selected]
-        },
-        "latency_ms_total": total_ms,
-        "latency_ms_llm": llm_latency_ms,
-        "openai": {
-            "prompt_tokens": prompt_toks,
-            "completion_tokens": completion_toks,
-            "cost_usd": cost_usd
-        } if use_llm else None
-    }
-    _safe_write_jsonl(LOG_PATH, record)
     return final
 def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
@@ -1075,8 +1059,11 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
 /* --- THE UNIVERSAL DROPDOWN OVERRIDE --- */
-/* 1. All boxes show white text on the dark background */
 #filler-dropdown .single-select, #filler-dropdown input,
 #dim-dropdown .single-select, #dim-dropdown input,
 #dim2-dropdown .single-select, #dim2-dropdown input,
 #current-dropdown .single-select, #current-dropdown input {
@@ -1086,14 +1073,20 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
 /* 2. All dropdown menus (the pop-outs) have a white background */
 #filler-dropdown .options,
 #dim-dropdown .options,
 #dim2-dropdown .options,
 #current-dropdown .options {
     background-color: #ffffff !important;
 }
-/* 3. All items in the lists are forced to PURE BLACK */
 #filler-dropdown .item, #filler-dropdown .item span,
 #dim-dropdown .item, #dim-dropdown .item span,
 #dim2-dropdown .item, #dim2-dropdown .item span,
 #current-dropdown .item, #current-dropdown .item span,
@@ -1102,7 +1095,13 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
     -webkit-text-fill-color: #000000 !important;
 }
-/* 4. Hover effect for all dropdowns */
 .gr-dropdown .item:hover {
     background-color: #dbeafe !important;
 }
@@ -1171,14 +1170,14 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
                         f1_dim  = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 1 Dimensionality *",elem_id="dim-dropdown")
                     with gr.Accordion("Secondary filler (optional)", open=False, elem_classes=["card"]):
-                        f2_type = gr.Textbox(label="Filler 2 Type", placeholder="Optional")
                         f2_diam = gr.Number(label="Filler 2 Diameter (µm)")
                         f2_len  = gr.Number(label="Filler 2 Length (mm)")
                         f2_dim  = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 2 Dimensionality", elem_id="dim2-dropdown")
                     with gr.Accordion("Mix design & specimen", open=False, elem_classes=["card"]):
                         spec_vol  = gr.Number(label="Specimen Volume (mm3) *")
-                        probe_cnt = gr.Number(label="Probe Count *")
-                        probe_mat = gr.Textbox(label="Probe Material *", placeholder="e.g., Copper, Silver paste")
                         wb        = gr.Number(label="W/B *")
                         sb        = gr.Number(label="S/B *")
                         gauge_len = gr.Number(label="Gauge Length (mm) *")
@@ -1324,19 +1323,18 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
 # ------------- Launch -------------
 if __name__ == "__main__":
-    # 1. Start the Chatbot (This is what gives you the link)
-    # If using Gradio:
-    demo.launch()
-    # Or if using Flask:
-    # app.run(debug=True)
-    # 2. Everything below here only runs AFTER the server stops
-    # (or might not run at all depending on how the server handles the exit)
-    import os as _os
-    import pandas as _pd
-    folder = "papers"
-    files = sorted(_os.listdir(folder)) if _os.path.exists(folder) else []
-    _pd.DataFrame({"doc": files}).to_csv("paper_list.csv", index=False)
-    print("✅ Saved paper_list.csv with", len(files), "papers")

 os.environ["TRANSFORMERS_NO_FLAX"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ------------------------------- Imports ------------------------------
 import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
 from pathlib import Path
 import pandas as pd
 import gradio as gr
+SOURCES_CSV = "sources_fixed.csv"
+def load_sources_map(csv_path=SOURCES_CSV):
+    """
+    Returns dict keyed by filename stem (id) with url + citation.
+    Example key: 'S92-Research-on-the-self-sensing...'  (no .pdf)
+    """
+    if not os.path.exists(csv_path):
+        print(f"[Sources] Missing {csv_path}")
+        return {}
+    df = pd.read_csv(csv_path).fillna("")
+    df.columns = df.columns.str.strip()
+    src = {}
+    for _, r in df.iterrows():
+        _id = str(r.get("id", "")).strip()
+        url = str(r.get("url", "")).strip()
+        cit = str(r.get("citation", "")).strip()
+        if _id:
+            src[_id] = {"url": url, "citation": cit}
+    print(f"[Sources] Loaded {len(src)} sources from {csv_path}")
+    return src
+SOURCES_MAP = load_sources_map()
 warnings.filterwarnings("ignore", category=UserWarning)
 # Optional deps (handled gracefully if missing)
 TARGET_COL = "Stress GF (MPa-1)"
 CANON_NA   = "NA"  # canonical placeholder for categoricals
 TYPE_CHOICES = [
     "CNT",
     "Brass fiber",
     CANON_NA
 ]
+TYPE_CHOICES_2 = [
+    "None",
+    "CNT",
+    "Brass fiber",
+    "GNP",
+    "Steel fiber",
+    "Carbon fiber",
+    "Graphene oxide",
+    "Graphene",
+    "Carbon black",
+    "Graphite",
+    "Shungite",
+    "Nickel powder",
+    "Glass cullet",
+    "MWCNT",
+    "Nano carbon black",
+    "Carbon powder",
+    "Gasification char",
+    "Used foundry sand",
+    "Nickel fiber",
+    "Nickel aggregate",
+    "Steel slag aggregate",
+    "TiO2",
+    "Carbonyl iron powder",
+    "Magnetite aggregate",
+    CANON_NA
+]
 MAIN_VARIABLES = [
     "Filler 1 Type",
     "Filler 1 Diameter (µm)",
     "Applied Voltage (V)"
 ]
+PROBE_COUNT_CHOICES = ["2", "4", CANON_NA]
+PROBE_CHOICES = [
+    "Copper mesh",
+    "Copper plates",
+    "Copper wire",
+    "Copper wire wrapped with silver paint at both ends",
+    "Copper wire bonded with conductive adhesive",
+    "Copper foil with silver paste",
+    "Copper tape",
+    "Copper E shape plate",
+    "Copper coated in silver paste",
+    "Copper, silver paste coating",
+    "Copper sheets attached on parallel surfaces of cube",
+    "Copper tape with conductive adhesive and copper wire",
+    "Stainless steel mesh",
+    "Stainless steel nets",
+    "Stainless steel gauze",
+    "Stainless steel electrode nets",
+    "Stainless steel bolt connected to copper wire",
+    "#6 stainless steel grides",
+    "Steel sheet with 3mm hole diameter",
+    "Wire mesh",
+    "Metallic (General)",
+    "Conductive adhesive type",
+    "Silver conductive adhesive",
+    "Polyester conductive adhesive tape with silver coating",
+    "Black titanium mesh",
+    "Titanium",
+    "Aluminum",
+    "Cement injected columns",
+    "None",
+    CANON_NA
+]
 NUMERIC_COLS = {
     "Filler 1 Diameter (µm)",
     "Filler 1 Length (mm)",
         return out_text, usage
     except Exception:
         return None, None
 def rag_reply(
     question: str,
     k: int = 8,
     if hits is None or hits.empty:
         final = "No indexed PDFs found. Upload PDFs to the 'papers/' folder and reload the Space."
         return final
     # Select sentences
     selected = mmr_select_sentences(question, hits, top_n=int(n_sentences), pool_per_chunk=6, lambda_div=0.7)
+    # --- Header citations Logic ---
     from urllib.parse import quote
+    from pathlib import Path
     header_links = []
     unique_codes = set()
         filename = Path(doc_path).name
         short_code = _short_doc_code(doc_path)
+        # ✅ FIX 1: Convert to Absolute Path String (Standard for Windows local hosting)
+        abs_pdf_path = str(Path(doc_path).resolve())
+        href = f"/file={abs_pdf_path}"
+        # ✅ FIX 2: Reverted to WHITE for your dark-blue theme
+        link = f'<a href="{href}" target="_blank" rel="noopener noreferrer" style="color: white; font-weight: bold; text-decoration: underline;">{short_code}</a>'
         if short_code not in unique_codes:
             header_links.append(link)
     header_cites = "; ".join(header_links)
+    # ✅ FIX 3: Define coverage_note to prevent NameError crash
+    if len(unique_codes) < 3:
+        coverage_note = f"\n\n> Note: Only {len(unique_codes)} unique source(s) contributed. Add more PDFs or increase Top-K."
+    else:
+        coverage_note = ""
+    # ... (Keep your existing retr_list logging logic here) ...
+    # --- Construct Final Output ---
     extractive = compose_extractive(selected)
     if use_llm and selected:
         lines = [f"{s['sent']} ({s['doc']})" for s in selected]
+        llm_text, _ = synthesize_with_llm(question, lines, model=model, temperature=temperature)
         if llm_text:
             final = f"**Answer (LLM synthesis):** {llm_text}\n\n**Citations:** {header_cites}{coverage_note}"
         else:
+            final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
     else:
         if not extractive:
+            final = f"**Answer:** Here are relevant passages.\n\n**Citations:** {header_cites}{coverage_note}\n\n---\n" + "\n\n".join(hits['text'].tolist()[:2])
         else:
             final = f"**Answer:** {extractive}\n\n**Citations:** {header_cites}{coverage_note}"
+    # Return the final string as your UI expects
     return final
 def rag_chat_fn(message, history, top_k, n_sentences, include_passages,
 /* --- THE UNIVERSAL DROPDOWN OVERRIDE --- */
+/* 1. All boxes show white text on the dark background (Selection View) */
 #filler-dropdown .single-select, #filler-dropdown input,
+#filler2-dropdown .single-select, #filler2-dropdown input,
+#probe-dropdown .single-select, #probe-dropdown input,
+#probe-count-dropdown .single-select, #probe-count-dropdown input,
 #dim-dropdown .single-select, #dim-dropdown input,
 #dim2-dropdown .single-select, #dim2-dropdown input,
 #current-dropdown .single-select, #current-dropdown input {
 /* 2. All dropdown menus (the pop-outs) have a white background */
 #filler-dropdown .options,
+#filler2-dropdown .options,
+#probe-dropdown .options,
+#probe-count-dropdown .options,
 #dim-dropdown .options,
 #dim2-dropdown .options,
 #current-dropdown .options {
     background-color: #ffffff !important;
 }
+/* 3. All items in the lists are forced to PURE BLACK (The Dropdown List) */
 #filler-dropdown .item, #filler-dropdown .item span,
+#filler2-dropdown .item, #filler2-dropdown .item span,
+#probe-dropdown .item, #probe-dropdown .item span,
+#probe-count-dropdown .item, #probe-count-dropdown .item span,
 #dim-dropdown .item, #dim-dropdown .item span,
 #dim2-dropdown .item, #dim2-dropdown .item span,
 #current-dropdown .item, #current-dropdown .item span,
     -webkit-text-fill-color: #000000 !important;
 }
+/* 4. Probe Count Info Text - Forest Green Override (Replaces Neon) */
+#probe-count-dropdown .info {
+    color: #2e7d32 !important;
+    font-weight: 500;
+}
+/* 5. Hover effect for all dropdowns */
 .gr-dropdown .item:hover {
     background-color: #dbeafe !important;
 }
                         f1_dim  = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 1 Dimensionality *",elem_id="dim-dropdown")
                     with gr.Accordion("Secondary filler (optional)", open=False, elem_classes=["card"]):
+                        f2_type = gr.Dropdown(choices=TYPE_CHOICES_2, label="Filler 2 Type (Optional)", value="None", allow_custom_value=True, elem_id="filler2-dropdown")
                         f2_diam = gr.Number(label="Filler 2 Diameter (µm)")
                         f2_len  = gr.Number(label="Filler 2 Length (mm)")
                         f2_dim  = gr.Dropdown(DIM_CHOICES, value=CANON_NA, label="Filler 2 Dimensionality", elem_id="dim2-dropdown")
                     with gr.Accordion("Mix design & specimen", open=False, elem_classes=["card"]):
                         spec_vol  = gr.Number(label="Specimen Volume (mm3) *")
+                        probe_cnt = gr.Dropdown(choices=["2", "4", CANON_NA],label="Probe Count *",info="2-probe includes contact resistance; 4-probe isolates material resistivity.", value="4", allow_custom_value=False, elem_id="probe-count-dropdown")
+                        probe_mat = gr.Dropdown(choices=PROBE_CHOICES, label="Probe Material *", value="Copper mesh", allow_custom_value=True, elem_id="probe-dropdown")
                         wb        = gr.Number(label="W/B *")
                         sb        = gr.Number(label="S/B *")
                         gauge_len = gr.Number(label="Gauge Length (mm) *")
 # ------------- Launch -------------
 if __name__ == "__main__":
+    import os
+    from pathlib import Path
+    # Find the papers folder relative to this script
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    papers_dir = os.path.join(current_dir, "papers")
+    # Force resolve to absolute path for the Gradio whitelist
+    abs_papers_path = str(Path(papers_dir).resolve())
+    print(f"🚀 SYSTEM READY")
+    print(f"✅ Whitelisting folder: {abs_papers_path}")
+    # Launch with the correct security permissions
+    demo.launch(allowed_paths=[abs_papers_path, current_dir])