from __future__ import annotations import json import os import re import shutil import threading from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import gradio as gr import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from sklearn.neighbors import NearestNeighbors import numpy as np # ============================================================ # Configuration # ============================================================ APP_TITLE = "QuoteForge" APP_SUBTITLE = "Industrial Quote Intelligence Platform" DEFAULT_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6") MAIN_SHEET = "Sheet1" NOTES_SHEET = "SME_Notes" HEADERS = ["Request", "Information Extracted", "Design"] DATA_LOCK = threading.Lock() ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin1234") FONTS = "https://fonts.googleapis.com/css2?family=Bebas+Neue&family=DM+Mono:ital,wght@0,300;0,400;0,500;1,300&family=DM+Sans:wght@300;400;500;600&display=swap" CUSTOM_CSS = f""" @import url('{FONTS}'); :root {{ --forge-black: #0a0a0b; --forge-dark: #111114; --forge-panel: #18181d; --forge-border: #2a2a35; --forge-border-bright: #3d3d50; --forge-amber: #f59e0b; --forge-amber-dim: #92610a; --forge-amber-glow: rgba(245,158,11,0.15); --forge-red: #ef4444; --forge-green: #22c55e; --forge-blue: #3b82f6; --forge-text: #e8e8f0; --forge-muted: #6b6b80; --forge-mono: 'DM Mono', monospace; --forge-display: 'Bebas Neue', sans-serif; --forge-body: 'DM Sans', sans-serif; }} /* ── Global reset ── */ *, *::before, *::after {{ box-sizing: border-box; }} .gradio-container {{ max-width: 100% !important; padding: 0 !important; margin: 0 !important; background: var(--forge-black) !important; font-family: var(--forge-body) !important; min-height: 100vh; }} body, .dark {{ background: var(--forge-black) !important; }} /* ── Hide default gradio chrome ── */ footer {{ display: none !important; }} .svelte-1ipelgc {{ display: none !important; }} /* ── Header ── */ .forge-header {{ background: var(--forge-dark); border-bottom: 1px solid var(--forge-border); padding: 0 2rem; display: flex; align-items: center; justify-content: space-between; height: 64px; position: sticky; top: 0; z-index: 100; }} .forge-logo {{ display: flex; align-items: baseline; gap: 0.75rem; }} .forge-logo-primary {{ font-family: var(--forge-display); font-size: 2rem; letter-spacing: 0.08em; color: var(--forge-amber); line-height: 1; }} .forge-logo-sub {{ font-family: var(--forge-mono); font-size: 0.7rem; color: var(--forge-muted); letter-spacing: 0.2em; text-transform: uppercase; }} .forge-badge {{ font-family: var(--forge-mono); font-size: 0.65rem; padding: 0.25rem 0.6rem; border: 1px solid var(--forge-amber-dim); color: var(--forge-amber); letter-spacing: 0.15em; text-transform: uppercase; background: var(--forge-amber-glow); }} /* ── Tab navigation override ── */ .tab-nav {{ background: var(--forge-dark) !important; border-bottom: 1px solid var(--forge-border) !important; padding: 0 2rem !important; gap: 0 !important; }} .tab-nav button {{ font-family: var(--forge-mono) !important; font-size: 0.72rem !important; letter-spacing: 0.12em !important; text-transform: uppercase !important; color: var(--forge-muted) !important; background: transparent !important; border: none !important; border-bottom: 2px solid transparent !important; padding: 1rem 1.5rem !important; margin: 0 !important; transition: all 0.2s !important; border-radius: 0 !important; }} .tab-nav button:hover {{ color: var(--forge-text) !important; background: transparent !important; }} .tab-nav button.selected {{ color: var(--forge-amber) !important; border-bottom-color: var(--forge-amber) !important; background: transparent !important; }} /* ── Page sections ── */ .forge-page {{ padding: 2.5rem 2rem; max-width: 1400px; margin: 0 auto; }} /* ── Section headers ── */ .forge-section-label {{ font-family: var(--forge-mono); font-size: 0.65rem; letter-spacing: 0.2em; text-transform: uppercase; color: var(--forge-amber); margin-bottom: 0.5rem; display: flex; align-items: center; gap: 0.5rem; }} .forge-section-label::after {{ content: ''; flex: 1; height: 1px; background: var(--forge-border); }} .forge-section-title {{ font-family: var(--forge-display); font-size: 3rem; color: var(--forge-text); letter-spacing: 0.05em; line-height: 1; margin-bottom: 0.75rem; }} .forge-section-desc {{ font-family: var(--forge-body); font-size: 0.95rem; color: var(--forge-muted); line-height: 1.7; max-width: 560px; margin-bottom: 2rem; }} /* ── Cards / panels ── */ .forge-card {{ background: var(--forge-panel); border: 1px solid var(--forge-border); padding: 1.5rem; position: relative; }} .forge-card::before {{ content: ''; position: absolute; top: 0; left: 0; width: 3px; height: 100%; background: var(--forge-amber); }} /* ── Inputs ── */ label {{ font-family: var(--forge-mono) !important; font-size: 0.68rem !important; letter-spacing: 0.14em !important; text-transform: uppercase !important; color: var(--forge-muted) !important; margin-bottom: 0.4rem !important; }} textarea, input[type=text], input[type=password], input[type=number] {{ font-family: var(--forge-mono) !important; font-size: 0.85rem !important; background: var(--forge-black) !important; border: 1px solid var(--forge-border) !important; color: var(--forge-text) !important; border-radius: 0 !important; transition: border-color 0.2s !important; }} textarea:focus, input:focus {{ border-color: var(--forge-amber) !important; outline: none !important; box-shadow: 0 0 0 1px var(--forge-amber-dim) !important; }} /* ── Buttons ── */ button.primary {{ font-family: var(--forge-mono) !important; font-size: 0.75rem !important; letter-spacing: 0.15em !important; text-transform: uppercase !important; background: var(--forge-amber) !important; color: var(--forge-black) !important; border: none !important; border-radius: 0 !important; padding: 0.75rem 1.5rem !important; font-weight: 600 !important; transition: all 0.2s !important; cursor: pointer !important; }} button.primary:hover {{ background: #fbbf24 !important; transform: translateY(-1px) !important; box-shadow: 0 4px 20px rgba(245,158,11,0.3) !important; }} button.secondary {{ font-family: var(--forge-mono) !important; font-size: 0.72rem !important; letter-spacing: 0.12em !important; text-transform: uppercase !important; background: transparent !important; color: var(--forge-text) !important; border: 1px solid var(--forge-border-bright) !important; border-radius: 0 !important; padding: 0.65rem 1.25rem !important; transition: all 0.2s !important; }} button.secondary:hover {{ border-color: var(--forge-amber) !important; color: var(--forge-amber) !important; }} /* ── Status / alert banners ── */ .forge-alert {{ border: 1px solid; padding: 1rem 1.25rem; font-family: var(--forge-mono); font-size: 0.78rem; letter-spacing: 0.06em; display: flex; align-items: flex-start; gap: 0.75rem; margin-bottom: 1.5rem; }} .forge-alert.warn {{ border-color: var(--forge-amber-dim); background: var(--forge-amber-glow); color: var(--forge-amber); }} .forge-alert.error {{ border-color: #7f1d1d; background: rgba(239,68,68,0.08); color: var(--forge-red); }} .forge-alert.success {{ border-color: #14532d; background: rgba(34,197,94,0.08); color: var(--forge-green); }} .forge-alert.info {{ border-color: var(--forge-border-bright); background: rgba(59,130,246,0.08); color: #93c5fd; }} /* ── API key prompt ── */ #api-key-banner {{ background: linear-gradient(135deg, rgba(245,158,11,0.12), rgba(245,158,11,0.04)); border: 1px solid var(--forge-amber-dim); padding: 1.5rem 2rem; margin-bottom: 2rem; display: flex; align-items: center; gap: 1.5rem; flex-wrap: wrap; }} /* ── Data tables ── */ .gradio-dataframe {{ background: var(--forge-panel) !important; border: 1px solid var(--forge-border) !important; border-radius: 0 !important; }} .gradio-dataframe table th {{ font-family: var(--forge-mono) !important; font-size: 0.65rem !important; letter-spacing: 0.15em !important; text-transform: uppercase !important; color: var(--forge-amber) !important; background: var(--forge-dark) !important; border-bottom: 1px solid var(--forge-border) !important; padding: 0.75rem 1rem !important; }} .gradio-dataframe table td {{ font-family: var(--forge-mono) !important; font-size: 0.8rem !important; color: var(--forge-text) !important; background: transparent !important; border-bottom: 1px solid var(--forge-border) !important; padding: 0.65rem 1rem !important; }} .gradio-dataframe table tr:hover td {{ background: rgba(245,158,11,0.04) !important; }} /* ── Sliders ── */ input[type=range] {{ accent-color: var(--forge-amber) !important; }} /* ── Dropdown ── */ .wrap-inner {{ background: var(--forge-black) !important; border: 1px solid var(--forge-border) !important; border-radius: 0 !important; font-family: var(--forge-mono) !important; font-size: 0.82rem !important; color: var(--forge-text) !important; }} /* ── File upload ── */ .upload-btn {{ border: 1px dashed var(--forge-border-bright) !important; background: var(--forge-black) !important; border-radius: 0 !important; color: var(--forge-muted) !important; font-family: var(--forge-mono) !important; font-size: 0.78rem !important; }} /* ── Stat grid (admin) ── */ .forge-stat-grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 1px; background: var(--forge-border); border: 1px solid var(--forge-border); margin-bottom: 2rem; }} .forge-stat {{ background: var(--forge-panel); padding: 1.25rem 1.5rem; display: flex; flex-direction: column; gap: 0.25rem; }} .forge-stat-value {{ font-family: var(--forge-display); font-size: 2.2rem; color: var(--forge-amber); letter-spacing: 0.04em; line-height: 1; }} .forge-stat-label {{ font-family: var(--forge-mono); font-size: 0.62rem; letter-spacing: 0.18em; text-transform: uppercase; color: var(--forge-muted); }} /* ── SML indicator ── */ .sml-badge {{ display: inline-flex; align-items: center; gap: 0.4rem; font-family: var(--forge-mono); font-size: 0.65rem; letter-spacing: 0.12em; text-transform: uppercase; padding: 0.3rem 0.7rem; border: 1px solid; }} .sml-badge.llm {{ border-color: #14532d; color: var(--forge-green); background: rgba(34,197,94,0.08); }} .sml-badge.sml {{ border-color: var(--forge-amber-dim); color: var(--forge-amber); background: var(--forge-amber-glow); }} /* ── Hero section ── */ .forge-hero {{ padding: 4rem 2rem 3rem; max-width: 1400px; margin: 0 auto; display: grid; grid-template-columns: 1fr 1fr; gap: 4rem; align-items: start; }} .forge-hero-visual {{ display: flex; flex-direction: column; gap: 1.5rem; padding-top: 1rem; }} .forge-metric-row {{ display: flex; gap: 1px; background: var(--forge-border); }} .forge-metric {{ flex: 1; background: var(--forge-panel); padding: 1rem 1.25rem; display: flex; flex-direction: column; gap: 0.2rem; }} .forge-metric-val {{ font-family: var(--forge-display); font-size: 1.8rem; color: var(--forge-amber); }} .forge-metric-key {{ font-family: var(--forge-mono); font-size: 0.6rem; color: var(--forge-muted); letter-spacing: 0.15em; text-transform: uppercase; }} .forge-divider {{ height: 1px; background: var(--forge-border); margin: 2rem 0; }} /* ── Admin terminal ── */ .forge-terminal-header {{ background: var(--forge-dark); border: 1px solid var(--forge-border); border-bottom: none; padding: 0.75rem 1rem; display: flex; align-items: center; gap: 0.5rem; }} .terminal-dot {{ width: 10px; height: 10px; border-radius: 50%; }} .forge-terminal-body {{ background: var(--forge-black); border: 1px solid var(--forge-border); padding: 1.25rem; font-family: var(--forge-mono); font-size: 0.8rem; color: var(--forge-text); min-height: 60px; line-height: 1.8; }} /* ── Responsive ── */ @media (max-width: 900px) {{ .forge-hero {{ grid-template-columns: 1fr; }} }} """ # ============================================================ # Paths # ============================================================ REPO_ROOT = Path(__file__).resolve().parent REPO_DATA_DIR = REPO_ROOT / "data" REPO_DATA_DIR.mkdir(parents=True, exist_ok=True) SEED_WORKBOOK = REPO_DATA_DIR / "quote_request_training.xlsx" if Path("/data").exists(): APP_DATA_DIR = Path("/data") / "quote_request_handler" else: APP_DATA_DIR = REPO_DATA_DIR APP_DATA_DIR.mkdir(parents=True, exist_ok=True) EXPORT_DIR = APP_DATA_DIR / "exports" EXPORT_DIR.mkdir(parents=True, exist_ok=True) DATA_PATH = APP_DATA_DIR / "quote_request_training.xlsx" DEFAULT_NOTES = [ "fan curves and AI selects fans", "quote should call out unknowns clearly when application details are missing", ] # ============================================================ # Utilities # ============================================================ def clean_text(value: Any) -> str: if value is None: return "" if isinstance(value, float) and pd.isna(value): return "" return str(value).strip() def summarize_text(text: str, limit: int = 90) -> str: text = clean_text(text).replace("\n", " ") return text if len(text) <= limit else text[: limit - 3] + "..." def safe_bool_text(flag: bool) -> str: return "Yes" if flag else "No" def strip_code_fences(text: str) -> str: text = clean_text(text) if text.startswith("```"): text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE) text = re.sub(r"\s*```$", "", text) return text.strip() def extract_first_balanced_json(text: str) -> str: text = strip_code_fences(text) start = text.find("{") if start == -1: raise ValueError(f"No JSON object found:\n{text}") depth, in_string, escape = 0, False, False for idx in range(start, len(text)): char = text[idx] if in_string: if escape: escape = False elif char == "\\": escape = True elif char == '"': in_string = False continue if char == '"': in_string = True elif char == "{": depth += 1 elif char == "}": depth -= 1 if depth == 0: return text[start: idx + 1] raise ValueError(f"JSON truncated:\n{text}") def normalize_list(value: Any) -> List[str]: if isinstance(value, list): return [clean_text(v) for v in value if clean_text(v)] if isinstance(value, str): lines = [re.sub(r"^[-*\d.)\s]+", "", line).strip() for line in value.splitlines()] return [line for line in lines if line] return [] def ensure_seed_exists(path: Path) -> None: if path.exists(): return seed_df = pd.DataFrame([ { "Request": "15000 CFM pharmaceutical powder, corrosive dust, need fan and collector recommendation", "Information Extracted": "Pharmaceutical powder; corrosive dust; 15000 CFM; high-efficiency filtration, corrosion-resistant construction, combustibility review needed.", "Design": "Recommend cartridge/pulse-jet collector with PTFE media, stainless construction, fan review, NFPA combustibility confirmation before final quote.", }, { "Request": "Need a dust collection upgrade for metal grinding line, 8000 CFM, sparks possible", "Information Extracted": "Metal grinding dust; 8000 CFM; spark risk; abrasion-resistant design, spark mitigation, combustible metal hazard review.", "Design": "Collector with spark control, abrasion-resistant internals, combustible metals safety review before quoting.", }, ]) notes_df = pd.DataFrame([[note] for note in DEFAULT_NOTES]) with pd.ExcelWriter(path, engine="openpyxl") as writer: seed_df.to_excel(writer, sheet_name=MAIN_SHEET, index=False) notes_df.to_excel(writer, sheet_name=NOTES_SHEET, index=False, header=False) # ============================================================ # Workbook store # ============================================================ @dataclass class WorkbookBundle: dataset: pd.DataFrame extra_sheets: Dict[str, pd.DataFrame] class WorkbookStore: def __init__(self, data_path: Path, seed_path: Optional[Path] = None): self.path = data_path self.seed_path = seed_path self.ensure_exists() def ensure_exists(self) -> None: if self.path.exists(): return if self.seed_path and self.seed_path.exists() and self.seed_path.resolve() != self.path.resolve(): shutil.copy2(self.seed_path, self.path) return ensure_seed_exists(self.path) def load_bundle(self) -> WorkbookBundle: self.ensure_exists() xls = pd.ExcelFile(self.path) main = pd.read_excel(self.path, sheet_name=xls.sheet_names[0]) main.columns = [clean_text(c) for c in main.columns] for col in HEADERS: if col not in main.columns: main[col] = "" main = main[HEADERS].copy() for col in HEADERS: main[col] = main[col].map(clean_text) extra_sheets: Dict[str, pd.DataFrame] = {} for sheet in xls.sheet_names[1:]: extra_sheets[sheet] = pd.read_excel(self.path, sheet_name=sheet, header=None) if NOTES_SHEET not in extra_sheets: extra_sheets[NOTES_SHEET] = pd.DataFrame([[note] for note in DEFAULT_NOTES]) return WorkbookBundle(dataset=main, extra_sheets=extra_sheets) def save_bundle(self, bundle: WorkbookBundle) -> None: bundle.dataset = bundle.dataset.fillna("") with pd.ExcelWriter(self.path, engine="openpyxl") as writer: bundle.dataset.to_excel(writer, sheet_name=MAIN_SHEET, index=False) for sheet_name, df in bundle.extra_sheets.items(): df.to_excel(writer, sheet_name=sheet_name, index=False, header=False) def replace_from_upload(self, uploaded_path: str) -> None: xls = pd.ExcelFile(uploaded_path) main = pd.read_excel(uploaded_path, sheet_name=xls.sheet_names[0]) main.columns = [clean_text(c) for c in main.columns] for col in HEADERS: if col not in main.columns: main[col] = "" main = main[HEADERS].copy() for col in HEADERS: main[col] = main[col].map(clean_text) extras: Dict[str, pd.DataFrame] = {} for sheet in xls.sheet_names[1:]: extras[sheet] = pd.read_excel(uploaded_path, sheet_name=sheet, header=None) if NOTES_SHEET not in extras: extras[NOTES_SHEET] = pd.DataFrame([[note] for note in DEFAULT_NOTES]) self.save_bundle(WorkbookBundle(dataset=main, extra_sheets=extras)) # ============================================================ # SML (Small Machine Learning) Backend # — Runs entirely locally, no API key required # — Uses TF-IDF retrieval + rule-based extraction + template generation # ============================================================ class SMLBackend: """ Lightweight local inference engine. Extracts structured fields via regex + keyword heuristics, then generates quote guidance by template-blending the top-k nearest historical examples. """ AIRFLOW_PATTERN = re.compile(r"(\d[\d,]*)\s*(?:cfm|acfm|scfm)", re.IGNORECASE) MATERIAL_KEYWORDS = { "pharmaceutical": ["pharma", "pharmaceutical", "drug", "api ", "gmp"], "metal grinding": ["grind", "metal grind", "steel grind", "aluminum grind"], "wood dust": ["wood", "sawdust", "lumber", "mdf", "plywood"], "chemical": ["chemical", "solvent", "acid", "caustic", "reactive"], "food": ["food", "grain", "flour", "sugar", "starch", "spice"], "plastic": ["plastic", "polymer", "pellet", "resin", "pvc"], "cement/mineral": ["cement", "concrete", "lime", "silica", "mineral"], "general industrial": [], } HAZARD_KEYWORDS = { "combustible": ["combustible", "flammable", "explosive", "deflagration", "nfpa 652", "nfpa 654"], "corrosive": ["corrosive", "corrosion", "acid", "caustic", "hcl", "h2so4", "stainless"], "spark risk": ["spark", "sparks", "ignition", "hot work", "grinding", "welding"], "toxic": ["toxic", "carcinogen", "hazmat", "osha", "pel ", "tlv "], "high humidity": ["humid", "moisture", "wet", "condensation", "steam"], } COLLECTOR_KEYWORDS = { "cartridge collector": ["cartridge", "nano", "nanofiber", "pleated"], "baghouse": ["baghouse", "bag house", "pulse jet", "pulse-jet", "shaker", "reverse air"], "cyclone": ["cyclone", "centrifugal", "pre-separator"], "wet scrubber": ["wet scrubber", "scrubber", "venturi", "wet collector"], "electrostatic": ["esp", "electrostatic", "precipitator"], } def __init__(self, dataset: pd.DataFrame, notes: List[str]): self.dataset = dataset self.notes = notes self.vectorizer: Optional[TfidfVectorizer] = None self.matrix = None self.examples = dataset[ (dataset["Request"].map(clean_text) != "") & ((dataset["Information Extracted"].map(clean_text) != "") | (dataset["Design"].map(clean_text) != "")) ].reset_index(drop=True) self._build_index() def _build_index(self) -> None: if self.examples.empty: return corpus = ( self.examples["Request"].fillna("") + " || " + self.examples["Information Extracted"].fillna("") + " || " + self.examples["Design"].fillna("") ).tolist() self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english", max_features=5000) self.matrix = self.vectorizer.fit_transform(corpus) def _match_keywords(self, text: str, kw_dict: Dict[str, List[str]]) -> List[str]: text_lower = text.lower() matches = [] for category, keywords in kw_dict.items(): if any(kw in text_lower for kw in keywords): matches.append(category) return matches def _extract_cfm(self, text: str) -> str: m = self.AIRFLOW_PATTERN.search(text) return m.group(0).upper() if m else "Not specified — confirm with customer" def _detect_material(self, text: str) -> str: text_lower = text.lower() for material, keywords in self.MATERIAL_KEYWORDS.items(): if material == "general industrial": continue if any(kw in text_lower for kw in keywords): return material return "General industrial dust" def _detect_hazards(self, text: str) -> List[str]: return self._match_keywords(text, self.HAZARD_KEYWORDS) or ["No specific hazard keywords detected — verify with SME"] def _suggest_collector(self, text: str, material: str, hazards: List[str]) -> str: text_lower = text.lower() for ctype, keywords in self.COLLECTOR_KEYWORDS.items(): if any(kw in text_lower for kw in keywords): return ctype # heuristic fallback by material if "pharma" in material: return "cartridge collector (PTFE media recommended for pharma)" if "metal" in material: return "cartridge or baghouse with spark arrestor" if "wood" in material: return "baghouse or cartridge collector (check NFPA 652/664)" if "cement" in material or "mineral" in material: return "pulse-jet baghouse" return "pulse-jet cartridge collector (general recommendation)" def retrieve(self, request_text: str, sme_text: str, top_k: int = 4) -> pd.DataFrame: if self.vectorizer is None or self.matrix is None: return pd.DataFrame(columns=["Request", "Information Extracted", "Design", "Similarity"]) query = f"{clean_text(request_text)} || {clean_text(sme_text)}" qv = self.vectorizer.transform([query]) scores = cosine_similarity(qv, self.matrix).ravel() top_idx = scores.argsort()[::-1][:max(1, min(top_k, len(scores)))] out = self.examples.iloc[top_idx].copy() out["Similarity"] = scores[top_idx] out = out[["Request", "Information Extracted", "Design", "Similarity"]].reset_index(drop=True) out["Similarity"] = out["Similarity"].map(lambda x: round(float(x), 4)) return out def generate(self, request_text: str, sme_text: str = "", top_k: int = 4) -> Dict[str, Any]: combined = f"{request_text} {sme_text}" cfm = self._extract_cfm(combined) material = self._detect_material(combined) hazards = self._detect_hazards(combined) collector = self._suggest_collector(combined, material, hazards) retrieved = self.retrieve(request_text, sme_text, top_k) # Build information_extracted by blending extraction + top example context info_parts = [ f"Application: {material}.", f"Airflow: {cfm}.", f"Detected hazards: {'; '.join(hazards)}.", ] if sme_text: info_parts.append(f"SME notes: {sme_text.strip('.')}.") if self.notes: info_parts.append(f"Business context: {'; '.join(self.notes[:3])}.") if not retrieved.empty: best = retrieved.iloc[0] if best["Similarity"] > 0.05 and clean_text(best["Information Extracted"]): info_parts.append(f"Similar prior case: {summarize_text(best['Information Extracted'], 120)}") information_extracted = " ".join(info_parts) # Design guidance design_parts = [ f"Recommend a {collector}.", ] if "combustible" in hazards: design_parts.append("Include NFPA combustibility review and explosion protection before quoting final scope.") if "corrosive" in hazards: design_parts.append("Specify corrosion-resistant construction (304/316 SS or coated carbon steel); confirm chemical compatibility.") if "spark risk" in hazards: design_parts.append("Add spark detection and suppression or pre-separator spark arrestor.") if "pharma" in material: design_parts.append("GMP cleanability, PTFE filter media, and cGMP documentation package required.") if not retrieved.empty: best = retrieved.iloc[0] if best["Similarity"] > 0.05 and clean_text(best["Design"]): design_parts.append(f"Informed by similar case: {summarize_text(best['Design'], 120)}") design_parts.append("Confirm all open questions with customer before issuing formal quote.") design = " ".join(design_parts) open_questions = ["Confirm airflow (CFM) if not specified", "Verify inlet concentration and particle size", "Confirm electrical classification (Class/Div or Zone)"] if cfm == "Not specified — confirm with customer": open_questions.insert(0, "Airflow CFM not found in request — must be confirmed") assumptions = [ "SML local inference used — no LLM API key configured.", f"Material classification: {material} (keyword-based, verify with SME).", f"Collector suggestion: {collector} (heuristic, review before quoting).", "All outputs are draft guidance only and require SME validation.", ] return { "information_extracted": information_extracted, "design": design, "quote_inputs": { "application": material, "airflow_cfm": cfm, "dust_or_material": material, "collector_type": collector, "fan_notes": "Fan selection pending CFM and static pressure confirmation.", "material_of_construction": "TBD — depends on hazard/corrosion review", "filter_media": "TBD — depends on application", "safety_notes": "; ".join(hazards), "open_questions": open_questions, }, "assumptions": assumptions, "retrieved_examples": retrieved, "raw_model_output": f"[SML Backend] material={material}, cfm={cfm}, hazards={hazards}, collector={collector}", "backend": "sml", } # ============================================================ # LLM + Engine # ============================================================ def _get_anthropic_client(api_key_override: str = ""): try: from anthropic import Anthropic except ImportError: return None key = api_key_override.strip() or os.getenv("ANTHROPIC_API_KEY", "").strip() if not key: return None try: return Anthropic(api_key=key) except Exception: return None class QuoteRequestEngine: def __init__(self, store: WorkbookStore): self.store = store self.reload() def reload(self) -> None: bundle = self.store.load_bundle() self.bundle = bundle self.dataset = bundle.dataset.copy().reset_index(drop=True) self.notes = self._flatten_notes(bundle.extra_sheets) self.examples = self.dataset[ (self.dataset["Request"].map(clean_text) != "") & ((self.dataset["Information Extracted"].map(clean_text) != "") | (self.dataset["Design"].map(clean_text) != "")) ].reset_index(drop=True) self.vectorizer: Optional[TfidfVectorizer] = None self.matrix = None if not self.examples.empty: corpus = ( self.examples["Request"].fillna("") + " || " + self.examples["Information Extracted"].fillna("") + " || " + self.examples["Design"].fillna("") ).tolist() self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english") self.matrix = self.vectorizer.fit_transform(corpus) self._sml = SMLBackend(self.dataset, self.notes) @staticmethod def _flatten_notes(extra_sheets: Dict[str, pd.DataFrame]) -> List[str]: notes: List[str] = [] for df in extra_sheets.values(): for item in df.fillna("").astype(str).values.ravel().tolist(): item = clean_text(item) if item and item.lower() != "nan": notes.append(item) return notes def retrieve_examples(self, request_text: str, sme_text: str, top_k: int = 4) -> pd.DataFrame: if self.vectorizer is None or self.matrix is None or self.examples.empty: return pd.DataFrame(columns=["Request", "Information Extracted", "Design", "Similarity"]) query = f"{clean_text(request_text)} || {clean_text(sme_text)}" qv = self.vectorizer.transform([query]) scores = cosine_similarity(qv, self.matrix).ravel() top_idx = scores.argsort()[::-1][:max(1, min(top_k, len(scores)))] out = self.examples.iloc[top_idx].copy() out["Similarity"] = scores[top_idx] out = out[["Request", "Information Extracted", "Design", "Similarity"]].reset_index(drop=True) out["Similarity"] = out["Similarity"].map(lambda x: round(float(x), 4)) return out def _build_messages(self, request_text: str, sme_text: str, retrieved: pd.DataFrame) -> Tuple[str, str]: system_prompt = """ You are an industrial quote-request handler for a future quote automation system. Return only valid JSON with this exact schema: { "information_extracted": "string", "design": "string", "quote_inputs": { "application": "string", "airflow_cfm": "string", "dust_or_material": "string", "collector_type": "string", "fan_notes": "string", "material_of_construction": "string", "filter_media": "string", "safety_notes": "string", "open_questions": ["string"] }, "assumptions": ["string"] } Rules: treat requests as customer language that may be incomplete. SME notes are authoritative. Make design output quote-ready. Do not invent pricing or lead times. Clearly state unknowns. Do not wrap in markdown. """.strip() if retrieved.empty: examples_text = "No prior examples available." else: blocks = [] for idx, row in retrieved.iterrows(): blocks.append(f"Example {idx + 1}\nRequest: {row['Request']}\nSME: {row['Information Extracted']}\nDesign: {row['Design']}\nSimilarity: {row['Similarity']}") examples_text = "\n\n".join(blocks) notes_block = "\n".join(f"- {n}" for n in self.notes[:30]) if self.notes else "- None" user_prompt = f""" Customer Request: {clean_text(request_text) or '[Not provided]'} SME Knowledge: {clean_text(sme_text) or '[Not provided]'} Global SME Notes:\n{notes_block} Historical Examples:\n{examples_text} Generate quote-ready response using the schema exactly. """.strip() return system_prompt, user_prompt def _repair_json(self, broken: str, client) -> Dict[str, Any]: response = client.messages.create( model=DEFAULT_MODEL, max_tokens=1600, temperature=0, system="Repair malformed JSON. Return only valid JSON.", messages=[{"role": "user", "content": f"Repair into valid JSON, no markdown:\n{broken}"}], ) raw = "".join(b.text for b in response.content if getattr(b, "type", None) == "text").strip() return self._parse_json(raw, client=client, allow_repair=False) def _parse_json(self, text: str, client=None, allow_repair: bool = True) -> Dict[str, Any]: text = strip_code_fences(text) try: data = json.loads(extract_first_balanced_json(text)) except Exception: if allow_repair and client: data = self._repair_json(text, client) else: raise data.setdefault("information_extracted", "") data.setdefault("design", "") data.setdefault("quote_inputs", {}) data.setdefault("assumptions", []) data["information_extracted"] = clean_text(data["information_extracted"]) data["design"] = clean_text(data["design"]) if not isinstance(data.get("quote_inputs"), dict): data["quote_inputs"] = {} data["assumptions"] = normalize_list(data["assumptions"]) return data def generate_quote( self, request_text: str, sme_text: str = "", top_k: int = 4, temperature: float = 0.1, api_key_override: str = "", ) -> Dict[str, Any]: request_text = clean_text(request_text) sme_text = clean_text(sme_text) if not request_text and not sme_text: raise ValueError("Provide a request or SME notes before generating.") client = _get_anthropic_client(api_key_override) # ── LLM path ── if client: retrieved = self.retrieve_examples(request_text, sme_text, top_k) system_prompt, user_prompt = self._build_messages(request_text, sme_text, retrieved) response = client.messages.create( model=DEFAULT_MODEL, max_tokens=1800, temperature=float(temperature), system=system_prompt, messages=[{"role": "user", "content": user_prompt}], ) raw = "".join(b.text for b in response.content if getattr(b, "type", None) == "text").strip() parsed = self._parse_json(raw, client=client, allow_repair=True) parsed["raw_model_output"] = raw parsed["retrieved_examples"] = retrieved parsed["request"] = request_text parsed["sml_input"] = sme_text parsed["backend"] = "llm" return parsed # ── SML fallback ── return self._sml.generate(request_text, sme_text, top_k) # ============================================================ # Global store + engine # ============================================================ store = WorkbookStore(DATA_PATH, seed_path=SEED_WORKBOOK if SEED_WORKBOOK.exists() else None) engine = QuoteRequestEngine(store) # ============================================================ # Helper functions for UI # ============================================================ def get_dataset_preview() -> pd.DataFrame: engine.reload() df = engine.dataset.copy().reset_index(drop=True) if df.empty: return pd.DataFrame(columns=["row_id"] + HEADERS) df.insert(0, "row_id", df.index + 1) return df def get_note_preview() -> pd.DataFrame: engine.reload() if not engine.notes: return pd.DataFrame({"note_id": [], "SME Note": []}) return pd.DataFrame({"note_id": list(range(1, len(engine.notes) + 1)), "SME Note": engine.notes}) def get_row_choices() -> List[Tuple[str, int]]: df = get_dataset_preview() if df.empty: return [] return [(f"{int(r.row_id)} | {summarize_text(r.Request, 80)}", int(r.row_id)) for r in df.itertuples(index=False)] def get_downloadable_path() -> str: store.ensure_exists() return str(DATA_PATH) def api_key_active(override: str = "") -> bool: return bool(_get_anthropic_client(override)) def backend_label(override: str = "") -> str: if api_key_active(override): return '⬤ LLM · Claude Active' return '⬤ SML · Local Inference' def status_html() -> str: rows = len(engine.dataset) notes = len(engine.notes) backend = "LLM (Claude)" if api_key_active() else "SML (Local)" return f"""