Spaces:
Running
Running
| from __future__ import annotations | |
| import json | |
| import os | |
| import re | |
| import shutil | |
| import threading | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import gradio as gr | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.neighbors import NearestNeighbors | |
| import numpy as np | |
| # ============================================================ | |
| # Configuration | |
| # ============================================================ | |
| APP_TITLE = "QuoteForge" | |
| APP_SUBTITLE = "Industrial Quote Intelligence Platform" | |
| DEFAULT_MODEL = os.getenv("CLAUDE_MODEL", "claude-sonnet-4-6") | |
| MAIN_SHEET = "Sheet1" | |
| NOTES_SHEET = "SME_Notes" | |
| HEADERS = ["Request", "Information Extracted", "Design"] | |
| DATA_LOCK = threading.Lock() | |
| ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin1234") | |
| FONTS = "https://fonts.googleapis.com/css2?family=Bebas+Neue&family=DM+Mono:ital,wght@0,300;0,400;0,500;1,300&family=DM+Sans:wght@300;400;500;600&display=swap" | |
| CUSTOM_CSS = f""" | |
| @import url('{FONTS}'); | |
| :root {{ | |
| --forge-black: #0a0a0b; | |
| --forge-dark: #111114; | |
| --forge-panel: #18181d; | |
| --forge-border: #2a2a35; | |
| --forge-border-bright: #3d3d50; | |
| --forge-amber: #f59e0b; | |
| --forge-amber-dim: #92610a; | |
| --forge-amber-glow: rgba(245,158,11,0.15); | |
| --forge-red: #ef4444; | |
| --forge-green: #22c55e; | |
| --forge-blue: #3b82f6; | |
| --forge-text: #e8e8f0; | |
| --forge-muted: #6b6b80; | |
| --forge-mono: 'DM Mono', monospace; | |
| --forge-display: 'Bebas Neue', sans-serif; | |
| --forge-body: 'DM Sans', sans-serif; | |
| }} | |
| /* ββ Global reset ββ */ | |
| *, *::before, *::after {{ box-sizing: border-box; }} | |
| .gradio-container {{ | |
| max-width: 100% !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| background: var(--forge-black) !important; | |
| font-family: var(--forge-body) !important; | |
| min-height: 100vh; | |
| }} | |
| body, .dark {{ | |
| background: var(--forge-black) !important; | |
| }} | |
| /* ββ Hide default gradio chrome ββ */ | |
| footer {{ display: none !important; }} | |
| .svelte-1ipelgc {{ display: none !important; }} | |
| /* ββ Header ββ */ | |
| .forge-header {{ | |
| background: var(--forge-dark); | |
| border-bottom: 1px solid var(--forge-border); | |
| padding: 0 2rem; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| height: 64px; | |
| position: sticky; | |
| top: 0; | |
| z-index: 100; | |
| }} | |
| .forge-logo {{ | |
| display: flex; | |
| align-items: baseline; | |
| gap: 0.75rem; | |
| }} | |
| .forge-logo-primary {{ | |
| font-family: var(--forge-display); | |
| font-size: 2rem; | |
| letter-spacing: 0.08em; | |
| color: var(--forge-amber); | |
| line-height: 1; | |
| }} | |
| .forge-logo-sub {{ | |
| font-family: var(--forge-mono); | |
| font-size: 0.7rem; | |
| color: var(--forge-muted); | |
| letter-spacing: 0.2em; | |
| text-transform: uppercase; | |
| }} | |
| .forge-badge {{ | |
| font-family: var(--forge-mono); | |
| font-size: 0.65rem; | |
| padding: 0.25rem 0.6rem; | |
| border: 1px solid var(--forge-amber-dim); | |
| color: var(--forge-amber); | |
| letter-spacing: 0.15em; | |
| text-transform: uppercase; | |
| background: var(--forge-amber-glow); | |
| }} | |
| /* ββ Tab navigation override ββ */ | |
| .tab-nav {{ | |
| background: var(--forge-dark) !important; | |
| border-bottom: 1px solid var(--forge-border) !important; | |
| padding: 0 2rem !important; | |
| gap: 0 !important; | |
| }} | |
| .tab-nav button {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.72rem !important; | |
| letter-spacing: 0.12em !important; | |
| text-transform: uppercase !important; | |
| color: var(--forge-muted) !important; | |
| background: transparent !important; | |
| border: none !important; | |
| border-bottom: 2px solid transparent !important; | |
| padding: 1rem 1.5rem !important; | |
| margin: 0 !important; | |
| transition: all 0.2s !important; | |
| border-radius: 0 !important; | |
| }} | |
| .tab-nav button:hover {{ | |
| color: var(--forge-text) !important; | |
| background: transparent !important; | |
| }} | |
| .tab-nav button.selected {{ | |
| color: var(--forge-amber) !important; | |
| border-bottom-color: var(--forge-amber) !important; | |
| background: transparent !important; | |
| }} | |
| /* ββ Page sections ββ */ | |
| .forge-page {{ | |
| padding: 2.5rem 2rem; | |
| max-width: 1400px; | |
| margin: 0 auto; | |
| }} | |
| /* ββ Section headers ββ */ | |
| .forge-section-label {{ | |
| font-family: var(--forge-mono); | |
| font-size: 0.65rem; | |
| letter-spacing: 0.2em; | |
| text-transform: uppercase; | |
| color: var(--forge-amber); | |
| margin-bottom: 0.5rem; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| }} | |
| .forge-section-label::after {{ | |
| content: ''; | |
| flex: 1; | |
| height: 1px; | |
| background: var(--forge-border); | |
| }} | |
| .forge-section-title {{ | |
| font-family: var(--forge-display); | |
| font-size: 3rem; | |
| color: var(--forge-text); | |
| letter-spacing: 0.05em; | |
| line-height: 1; | |
| margin-bottom: 0.75rem; | |
| }} | |
| .forge-section-desc {{ | |
| font-family: var(--forge-body); | |
| font-size: 0.95rem; | |
| color: var(--forge-muted); | |
| line-height: 1.7; | |
| max-width: 560px; | |
| margin-bottom: 2rem; | |
| }} | |
| /* ββ Cards / panels ββ */ | |
| .forge-card {{ | |
| background: var(--forge-panel); | |
| border: 1px solid var(--forge-border); | |
| padding: 1.5rem; | |
| position: relative; | |
| }} | |
| .forge-card::before {{ | |
| content: ''; | |
| position: absolute; | |
| top: 0; left: 0; | |
| width: 3px; height: 100%; | |
| background: var(--forge-amber); | |
| }} | |
| /* ββ Inputs ββ */ | |
| label {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.68rem !important; | |
| letter-spacing: 0.14em !important; | |
| text-transform: uppercase !important; | |
| color: var(--forge-muted) !important; | |
| margin-bottom: 0.4rem !important; | |
| }} | |
| textarea, input[type=text], input[type=password], input[type=number] {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.85rem !important; | |
| background: var(--forge-black) !important; | |
| border: 1px solid var(--forge-border) !important; | |
| color: var(--forge-text) !important; | |
| border-radius: 0 !important; | |
| transition: border-color 0.2s !important; | |
| }} | |
| textarea:focus, input:focus {{ | |
| border-color: var(--forge-amber) !important; | |
| outline: none !important; | |
| box-shadow: 0 0 0 1px var(--forge-amber-dim) !important; | |
| }} | |
| /* ββ Buttons ββ */ | |
| button.primary {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.75rem !important; | |
| letter-spacing: 0.15em !important; | |
| text-transform: uppercase !important; | |
| background: var(--forge-amber) !important; | |
| color: var(--forge-black) !important; | |
| border: none !important; | |
| border-radius: 0 !important; | |
| padding: 0.75rem 1.5rem !important; | |
| font-weight: 600 !important; | |
| transition: all 0.2s !important; | |
| cursor: pointer !important; | |
| }} | |
| button.primary:hover {{ | |
| background: #fbbf24 !important; | |
| transform: translateY(-1px) !important; | |
| box-shadow: 0 4px 20px rgba(245,158,11,0.3) !important; | |
| }} | |
| button.secondary {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.72rem !important; | |
| letter-spacing: 0.12em !important; | |
| text-transform: uppercase !important; | |
| background: transparent !important; | |
| color: var(--forge-text) !important; | |
| border: 1px solid var(--forge-border-bright) !important; | |
| border-radius: 0 !important; | |
| padding: 0.65rem 1.25rem !important; | |
| transition: all 0.2s !important; | |
| }} | |
| button.secondary:hover {{ | |
| border-color: var(--forge-amber) !important; | |
| color: var(--forge-amber) !important; | |
| }} | |
| /* ββ Status / alert banners ββ */ | |
| .forge-alert {{ | |
| border: 1px solid; | |
| padding: 1rem 1.25rem; | |
| font-family: var(--forge-mono); | |
| font-size: 0.78rem; | |
| letter-spacing: 0.06em; | |
| display: flex; | |
| align-items: flex-start; | |
| gap: 0.75rem; | |
| margin-bottom: 1.5rem; | |
| }} | |
| .forge-alert.warn {{ | |
| border-color: var(--forge-amber-dim); | |
| background: var(--forge-amber-glow); | |
| color: var(--forge-amber); | |
| }} | |
| .forge-alert.error {{ | |
| border-color: #7f1d1d; | |
| background: rgba(239,68,68,0.08); | |
| color: var(--forge-red); | |
| }} | |
| .forge-alert.success {{ | |
| border-color: #14532d; | |
| background: rgba(34,197,94,0.08); | |
| color: var(--forge-green); | |
| }} | |
| .forge-alert.info {{ | |
| border-color: var(--forge-border-bright); | |
| background: rgba(59,130,246,0.08); | |
| color: #93c5fd; | |
| }} | |
| /* ββ API key prompt ββ */ | |
| #api-key-banner {{ | |
| background: linear-gradient(135deg, rgba(245,158,11,0.12), rgba(245,158,11,0.04)); | |
| border: 1px solid var(--forge-amber-dim); | |
| padding: 1.5rem 2rem; | |
| margin-bottom: 2rem; | |
| display: flex; | |
| align-items: center; | |
| gap: 1.5rem; | |
| flex-wrap: wrap; | |
| }} | |
| /* ββ Data tables ββ */ | |
| .gradio-dataframe {{ | |
| background: var(--forge-panel) !important; | |
| border: 1px solid var(--forge-border) !important; | |
| border-radius: 0 !important; | |
| }} | |
| .gradio-dataframe table th {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.65rem !important; | |
| letter-spacing: 0.15em !important; | |
| text-transform: uppercase !important; | |
| color: var(--forge-amber) !important; | |
| background: var(--forge-dark) !important; | |
| border-bottom: 1px solid var(--forge-border) !important; | |
| padding: 0.75rem 1rem !important; | |
| }} | |
| .gradio-dataframe table td {{ | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.8rem !important; | |
| color: var(--forge-text) !important; | |
| background: transparent !important; | |
| border-bottom: 1px solid var(--forge-border) !important; | |
| padding: 0.65rem 1rem !important; | |
| }} | |
| .gradio-dataframe table tr:hover td {{ | |
| background: rgba(245,158,11,0.04) !important; | |
| }} | |
| /* ββ Sliders ββ */ | |
| input[type=range] {{ | |
| accent-color: var(--forge-amber) !important; | |
| }} | |
| /* ββ Dropdown ββ */ | |
| .wrap-inner {{ | |
| background: var(--forge-black) !important; | |
| border: 1px solid var(--forge-border) !important; | |
| border-radius: 0 !important; | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.82rem !important; | |
| color: var(--forge-text) !important; | |
| }} | |
| /* ββ File upload ββ */ | |
| .upload-btn {{ | |
| border: 1px dashed var(--forge-border-bright) !important; | |
| background: var(--forge-black) !important; | |
| border-radius: 0 !important; | |
| color: var(--forge-muted) !important; | |
| font-family: var(--forge-mono) !important; | |
| font-size: 0.78rem !important; | |
| }} | |
| /* ββ Stat grid (admin) ββ */ | |
| .forge-stat-grid {{ | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); | |
| gap: 1px; | |
| background: var(--forge-border); | |
| border: 1px solid var(--forge-border); | |
| margin-bottom: 2rem; | |
| }} | |
| .forge-stat {{ | |
| background: var(--forge-panel); | |
| padding: 1.25rem 1.5rem; | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.25rem; | |
| }} | |
| .forge-stat-value {{ | |
| font-family: var(--forge-display); | |
| font-size: 2.2rem; | |
| color: var(--forge-amber); | |
| letter-spacing: 0.04em; | |
| line-height: 1; | |
| }} | |
| .forge-stat-label {{ | |
| font-family: var(--forge-mono); | |
| font-size: 0.62rem; | |
| letter-spacing: 0.18em; | |
| text-transform: uppercase; | |
| color: var(--forge-muted); | |
| }} | |
| /* ββ SML indicator ββ */ | |
| .sml-badge {{ | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.4rem; | |
| font-family: var(--forge-mono); | |
| font-size: 0.65rem; | |
| letter-spacing: 0.12em; | |
| text-transform: uppercase; | |
| padding: 0.3rem 0.7rem; | |
| border: 1px solid; | |
| }} | |
| .sml-badge.llm {{ | |
| border-color: #14532d; | |
| color: var(--forge-green); | |
| background: rgba(34,197,94,0.08); | |
| }} | |
| .sml-badge.sml {{ | |
| border-color: var(--forge-amber-dim); | |
| color: var(--forge-amber); | |
| background: var(--forge-amber-glow); | |
| }} | |
| /* ββ Hero section ββ */ | |
| .forge-hero {{ | |
| padding: 4rem 2rem 3rem; | |
| max-width: 1400px; | |
| margin: 0 auto; | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 4rem; | |
| align-items: start; | |
| }} | |
| .forge-hero-visual {{ | |
| display: flex; | |
| flex-direction: column; | |
| gap: 1.5rem; | |
| padding-top: 1rem; | |
| }} | |
| .forge-metric-row {{ | |
| display: flex; | |
| gap: 1px; | |
| background: var(--forge-border); | |
| }} | |
| .forge-metric {{ | |
| flex: 1; | |
| background: var(--forge-panel); | |
| padding: 1rem 1.25rem; | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.2rem; | |
| }} | |
| .forge-metric-val {{ | |
| font-family: var(--forge-display); | |
| font-size: 1.8rem; | |
| color: var(--forge-amber); | |
| }} | |
| .forge-metric-key {{ | |
| font-family: var(--forge-mono); | |
| font-size: 0.6rem; | |
| color: var(--forge-muted); | |
| letter-spacing: 0.15em; | |
| text-transform: uppercase; | |
| }} | |
| .forge-divider {{ | |
| height: 1px; | |
| background: var(--forge-border); | |
| margin: 2rem 0; | |
| }} | |
| /* ββ Admin terminal ββ */ | |
| .forge-terminal-header {{ | |
| background: var(--forge-dark); | |
| border: 1px solid var(--forge-border); | |
| border-bottom: none; | |
| padding: 0.75rem 1rem; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| }} | |
| .terminal-dot {{ | |
| width: 10px; height: 10px; | |
| border-radius: 50%; | |
| }} | |
| .forge-terminal-body {{ | |
| background: var(--forge-black); | |
| border: 1px solid var(--forge-border); | |
| padding: 1.25rem; | |
| font-family: var(--forge-mono); | |
| font-size: 0.8rem; | |
| color: var(--forge-text); | |
| min-height: 60px; | |
| line-height: 1.8; | |
| }} | |
| /* ββ Responsive ββ */ | |
| @media (max-width: 900px) {{ | |
| .forge-hero {{ | |
| grid-template-columns: 1fr; | |
| }} | |
| }} | |
| """ | |
| # ============================================================ | |
| # Paths | |
| # ============================================================ | |
| REPO_ROOT = Path(__file__).resolve().parent | |
| REPO_DATA_DIR = REPO_ROOT / "data" | |
| REPO_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| SEED_WORKBOOK = REPO_DATA_DIR / "quote_request_training.xlsx" | |
| if Path("/data").exists(): | |
| APP_DATA_DIR = Path("/data") / "quote_request_handler" | |
| else: | |
| APP_DATA_DIR = REPO_DATA_DIR | |
| APP_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| EXPORT_DIR = APP_DATA_DIR / "exports" | |
| EXPORT_DIR.mkdir(parents=True, exist_ok=True) | |
| DATA_PATH = APP_DATA_DIR / "quote_request_training.xlsx" | |
| DEFAULT_NOTES = [ | |
| "fan curves and AI selects fans", | |
| "quote should call out unknowns clearly when application details are missing", | |
| ] | |
| # ============================================================ | |
| # Utilities | |
| # ============================================================ | |
| def clean_text(value: Any) -> str: | |
| if value is None: | |
| return "" | |
| if isinstance(value, float) and pd.isna(value): | |
| return "" | |
| return str(value).strip() | |
| def summarize_text(text: str, limit: int = 90) -> str: | |
| text = clean_text(text).replace("\n", " ") | |
| return text if len(text) <= limit else text[: limit - 3] + "..." | |
| def safe_bool_text(flag: bool) -> str: | |
| return "Yes" if flag else "No" | |
| def strip_code_fences(text: str) -> str: | |
| text = clean_text(text) | |
| if text.startswith("```"): | |
| text = re.sub(r"^```(?:json)?\s*", "", text, flags=re.IGNORECASE) | |
| text = re.sub(r"\s*```$", "", text) | |
| return text.strip() | |
| def extract_first_balanced_json(text: str) -> str: | |
| text = strip_code_fences(text) | |
| start = text.find("{") | |
| if start == -1: | |
| raise ValueError(f"No JSON object found:\n{text}") | |
| depth, in_string, escape = 0, False, False | |
| for idx in range(start, len(text)): | |
| char = text[idx] | |
| if in_string: | |
| if escape: | |
| escape = False | |
| elif char == "\\": | |
| escape = True | |
| elif char == '"': | |
| in_string = False | |
| continue | |
| if char == '"': | |
| in_string = True | |
| elif char == "{": | |
| depth += 1 | |
| elif char == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return text[start: idx + 1] | |
| raise ValueError(f"JSON truncated:\n{text}") | |
| def normalize_list(value: Any) -> List[str]: | |
| if isinstance(value, list): | |
| return [clean_text(v) for v in value if clean_text(v)] | |
| if isinstance(value, str): | |
| lines = [re.sub(r"^[-*\d.)\s]+", "", line).strip() for line in value.splitlines()] | |
| return [line for line in lines if line] | |
| return [] | |
| def ensure_seed_exists(path: Path) -> None: | |
| if path.exists(): | |
| return | |
| seed_df = pd.DataFrame([ | |
| { | |
| "Request": "15000 CFM pharmaceutical powder, corrosive dust, need fan and collector recommendation", | |
| "Information Extracted": "Pharmaceutical powder; corrosive dust; 15000 CFM; high-efficiency filtration, corrosion-resistant construction, combustibility review needed.", | |
| "Design": "Recommend cartridge/pulse-jet collector with PTFE media, stainless construction, fan review, NFPA combustibility confirmation before final quote.", | |
| }, | |
| { | |
| "Request": "Need a dust collection upgrade for metal grinding line, 8000 CFM, sparks possible", | |
| "Information Extracted": "Metal grinding dust; 8000 CFM; spark risk; abrasion-resistant design, spark mitigation, combustible metal hazard review.", | |
| "Design": "Collector with spark control, abrasion-resistant internals, combustible metals safety review before quoting.", | |
| }, | |
| ]) | |
| notes_df = pd.DataFrame([[note] for note in DEFAULT_NOTES]) | |
| with pd.ExcelWriter(path, engine="openpyxl") as writer: | |
| seed_df.to_excel(writer, sheet_name=MAIN_SHEET, index=False) | |
| notes_df.to_excel(writer, sheet_name=NOTES_SHEET, index=False, header=False) | |
| # ============================================================ | |
| # Workbook store | |
| # ============================================================ | |
| class WorkbookBundle: | |
| dataset: pd.DataFrame | |
| extra_sheets: Dict[str, pd.DataFrame] | |
| class WorkbookStore: | |
| def __init__(self, data_path: Path, seed_path: Optional[Path] = None): | |
| self.path = data_path | |
| self.seed_path = seed_path | |
| self.ensure_exists() | |
| def ensure_exists(self) -> None: | |
| if self.path.exists(): | |
| return | |
| if self.seed_path and self.seed_path.exists() and self.seed_path.resolve() != self.path.resolve(): | |
| shutil.copy2(self.seed_path, self.path) | |
| return | |
| ensure_seed_exists(self.path) | |
| def load_bundle(self) -> WorkbookBundle: | |
| self.ensure_exists() | |
| xls = pd.ExcelFile(self.path) | |
| main = pd.read_excel(self.path, sheet_name=xls.sheet_names[0]) | |
| main.columns = [clean_text(c) for c in main.columns] | |
| for col in HEADERS: | |
| if col not in main.columns: | |
| main[col] = "" | |
| main = main[HEADERS].copy() | |
| for col in HEADERS: | |
| main[col] = main[col].map(clean_text) | |
| extra_sheets: Dict[str, pd.DataFrame] = {} | |
| for sheet in xls.sheet_names[1:]: | |
| extra_sheets[sheet] = pd.read_excel(self.path, sheet_name=sheet, header=None) | |
| if NOTES_SHEET not in extra_sheets: | |
| extra_sheets[NOTES_SHEET] = pd.DataFrame([[note] for note in DEFAULT_NOTES]) | |
| return WorkbookBundle(dataset=main, extra_sheets=extra_sheets) | |
| def save_bundle(self, bundle: WorkbookBundle) -> None: | |
| bundle.dataset = bundle.dataset.fillna("") | |
| with pd.ExcelWriter(self.path, engine="openpyxl") as writer: | |
| bundle.dataset.to_excel(writer, sheet_name=MAIN_SHEET, index=False) | |
| for sheet_name, df in bundle.extra_sheets.items(): | |
| df.to_excel(writer, sheet_name=sheet_name, index=False, header=False) | |
| def replace_from_upload(self, uploaded_path: str) -> None: | |
| xls = pd.ExcelFile(uploaded_path) | |
| main = pd.read_excel(uploaded_path, sheet_name=xls.sheet_names[0]) | |
| main.columns = [clean_text(c) for c in main.columns] | |
| for col in HEADERS: | |
| if col not in main.columns: | |
| main[col] = "" | |
| main = main[HEADERS].copy() | |
| for col in HEADERS: | |
| main[col] = main[col].map(clean_text) | |
| extras: Dict[str, pd.DataFrame] = {} | |
| for sheet in xls.sheet_names[1:]: | |
| extras[sheet] = pd.read_excel(uploaded_path, sheet_name=sheet, header=None) | |
| if NOTES_SHEET not in extras: | |
| extras[NOTES_SHEET] = pd.DataFrame([[note] for note in DEFAULT_NOTES]) | |
| self.save_bundle(WorkbookBundle(dataset=main, extra_sheets=extras)) | |
| # ============================================================ | |
| # SML (Small Machine Learning) Backend | |
| # β Runs entirely locally, no API key required | |
| # β Uses TF-IDF retrieval + rule-based extraction + template generation | |
| # ============================================================ | |
| class SMLBackend: | |
| """ | |
| Lightweight local inference engine. | |
| Extracts structured fields via regex + keyword heuristics, | |
| then generates quote guidance by template-blending the | |
| top-k nearest historical examples. | |
| """ | |
| AIRFLOW_PATTERN = re.compile(r"(\d[\d,]*)\s*(?:cfm|acfm|scfm)", re.IGNORECASE) | |
| MATERIAL_KEYWORDS = { | |
| "pharmaceutical": ["pharma", "pharmaceutical", "drug", "api ", "gmp"], | |
| "metal grinding": ["grind", "metal grind", "steel grind", "aluminum grind"], | |
| "wood dust": ["wood", "sawdust", "lumber", "mdf", "plywood"], | |
| "chemical": ["chemical", "solvent", "acid", "caustic", "reactive"], | |
| "food": ["food", "grain", "flour", "sugar", "starch", "spice"], | |
| "plastic": ["plastic", "polymer", "pellet", "resin", "pvc"], | |
| "cement/mineral": ["cement", "concrete", "lime", "silica", "mineral"], | |
| "general industrial": [], | |
| } | |
| HAZARD_KEYWORDS = { | |
| "combustible": ["combustible", "flammable", "explosive", "deflagration", "nfpa 652", "nfpa 654"], | |
| "corrosive": ["corrosive", "corrosion", "acid", "caustic", "hcl", "h2so4", "stainless"], | |
| "spark risk": ["spark", "sparks", "ignition", "hot work", "grinding", "welding"], | |
| "toxic": ["toxic", "carcinogen", "hazmat", "osha", "pel ", "tlv "], | |
| "high humidity": ["humid", "moisture", "wet", "condensation", "steam"], | |
| } | |
| COLLECTOR_KEYWORDS = { | |
| "cartridge collector": ["cartridge", "nano", "nanofiber", "pleated"], | |
| "baghouse": ["baghouse", "bag house", "pulse jet", "pulse-jet", "shaker", "reverse air"], | |
| "cyclone": ["cyclone", "centrifugal", "pre-separator"], | |
| "wet scrubber": ["wet scrubber", "scrubber", "venturi", "wet collector"], | |
| "electrostatic": ["esp", "electrostatic", "precipitator"], | |
| } | |
| def __init__(self, dataset: pd.DataFrame, notes: List[str]): | |
| self.dataset = dataset | |
| self.notes = notes | |
| self.vectorizer: Optional[TfidfVectorizer] = None | |
| self.matrix = None | |
| self.examples = dataset[ | |
| (dataset["Request"].map(clean_text) != "") & | |
| ((dataset["Information Extracted"].map(clean_text) != "") | | |
| (dataset["Design"].map(clean_text) != "")) | |
| ].reset_index(drop=True) | |
| self._build_index() | |
| def _build_index(self) -> None: | |
| if self.examples.empty: | |
| return | |
| corpus = ( | |
| self.examples["Request"].fillna("") + " || " + | |
| self.examples["Information Extracted"].fillna("") + " || " + | |
| self.examples["Design"].fillna("") | |
| ).tolist() | |
| self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english", max_features=5000) | |
| self.matrix = self.vectorizer.fit_transform(corpus) | |
| def _match_keywords(self, text: str, kw_dict: Dict[str, List[str]]) -> List[str]: | |
| text_lower = text.lower() | |
| matches = [] | |
| for category, keywords in kw_dict.items(): | |
| if any(kw in text_lower for kw in keywords): | |
| matches.append(category) | |
| return matches | |
| def _extract_cfm(self, text: str) -> str: | |
| m = self.AIRFLOW_PATTERN.search(text) | |
| return m.group(0).upper() if m else "Not specified β confirm with customer" | |
| def _detect_material(self, text: str) -> str: | |
| text_lower = text.lower() | |
| for material, keywords in self.MATERIAL_KEYWORDS.items(): | |
| if material == "general industrial": | |
| continue | |
| if any(kw in text_lower for kw in keywords): | |
| return material | |
| return "General industrial dust" | |
| def _detect_hazards(self, text: str) -> List[str]: | |
| return self._match_keywords(text, self.HAZARD_KEYWORDS) or ["No specific hazard keywords detected β verify with SME"] | |
| def _suggest_collector(self, text: str, material: str, hazards: List[str]) -> str: | |
| text_lower = text.lower() | |
| for ctype, keywords in self.COLLECTOR_KEYWORDS.items(): | |
| if any(kw in text_lower for kw in keywords): | |
| return ctype | |
| # heuristic fallback by material | |
| if "pharma" in material: | |
| return "cartridge collector (PTFE media recommended for pharma)" | |
| if "metal" in material: | |
| return "cartridge or baghouse with spark arrestor" | |
| if "wood" in material: | |
| return "baghouse or cartridge collector (check NFPA 652/664)" | |
| if "cement" in material or "mineral" in material: | |
| return "pulse-jet baghouse" | |
| return "pulse-jet cartridge collector (general recommendation)" | |
| def retrieve(self, request_text: str, sme_text: str, top_k: int = 4) -> pd.DataFrame: | |
| if self.vectorizer is None or self.matrix is None: | |
| return pd.DataFrame(columns=["Request", "Information Extracted", "Design", "Similarity"]) | |
| query = f"{clean_text(request_text)} || {clean_text(sme_text)}" | |
| qv = self.vectorizer.transform([query]) | |
| scores = cosine_similarity(qv, self.matrix).ravel() | |
| top_idx = scores.argsort()[::-1][:max(1, min(top_k, len(scores)))] | |
| out = self.examples.iloc[top_idx].copy() | |
| out["Similarity"] = scores[top_idx] | |
| out = out[["Request", "Information Extracted", "Design", "Similarity"]].reset_index(drop=True) | |
| out["Similarity"] = out["Similarity"].map(lambda x: round(float(x), 4)) | |
| return out | |
| def generate(self, request_text: str, sme_text: str = "", top_k: int = 4) -> Dict[str, Any]: | |
| combined = f"{request_text} {sme_text}" | |
| cfm = self._extract_cfm(combined) | |
| material = self._detect_material(combined) | |
| hazards = self._detect_hazards(combined) | |
| collector = self._suggest_collector(combined, material, hazards) | |
| retrieved = self.retrieve(request_text, sme_text, top_k) | |
| # Build information_extracted by blending extraction + top example context | |
| info_parts = [ | |
| f"Application: {material}.", | |
| f"Airflow: {cfm}.", | |
| f"Detected hazards: {'; '.join(hazards)}.", | |
| ] | |
| if sme_text: | |
| info_parts.append(f"SME notes: {sme_text.strip('.')}.") | |
| if self.notes: | |
| info_parts.append(f"Business context: {'; '.join(self.notes[:3])}.") | |
| if not retrieved.empty: | |
| best = retrieved.iloc[0] | |
| if best["Similarity"] > 0.05 and clean_text(best["Information Extracted"]): | |
| info_parts.append(f"Similar prior case: {summarize_text(best['Information Extracted'], 120)}") | |
| information_extracted = " ".join(info_parts) | |
| # Design guidance | |
| design_parts = [ | |
| f"Recommend a {collector}.", | |
| ] | |
| if "combustible" in hazards: | |
| design_parts.append("Include NFPA combustibility review and explosion protection before quoting final scope.") | |
| if "corrosive" in hazards: | |
| design_parts.append("Specify corrosion-resistant construction (304/316 SS or coated carbon steel); confirm chemical compatibility.") | |
| if "spark risk" in hazards: | |
| design_parts.append("Add spark detection and suppression or pre-separator spark arrestor.") | |
| if "pharma" in material: | |
| design_parts.append("GMP cleanability, PTFE filter media, and cGMP documentation package required.") | |
| if not retrieved.empty: | |
| best = retrieved.iloc[0] | |
| if best["Similarity"] > 0.05 and clean_text(best["Design"]): | |
| design_parts.append(f"Informed by similar case: {summarize_text(best['Design'], 120)}") | |
| design_parts.append("Confirm all open questions with customer before issuing formal quote.") | |
| design = " ".join(design_parts) | |
| open_questions = ["Confirm airflow (CFM) if not specified", "Verify inlet concentration and particle size", "Confirm electrical classification (Class/Div or Zone)"] | |
| if cfm == "Not specified β confirm with customer": | |
| open_questions.insert(0, "Airflow CFM not found in request β must be confirmed") | |
| assumptions = [ | |
| "SML local inference used β no LLM API key configured.", | |
| f"Material classification: {material} (keyword-based, verify with SME).", | |
| f"Collector suggestion: {collector} (heuristic, review before quoting).", | |
| "All outputs are draft guidance only and require SME validation.", | |
| ] | |
| return { | |
| "information_extracted": information_extracted, | |
| "design": design, | |
| "quote_inputs": { | |
| "application": material, | |
| "airflow_cfm": cfm, | |
| "dust_or_material": material, | |
| "collector_type": collector, | |
| "fan_notes": "Fan selection pending CFM and static pressure confirmation.", | |
| "material_of_construction": "TBD β depends on hazard/corrosion review", | |
| "filter_media": "TBD β depends on application", | |
| "safety_notes": "; ".join(hazards), | |
| "open_questions": open_questions, | |
| }, | |
| "assumptions": assumptions, | |
| "retrieved_examples": retrieved, | |
| "raw_model_output": f"[SML Backend] material={material}, cfm={cfm}, hazards={hazards}, collector={collector}", | |
| "backend": "sml", | |
| } | |
| # ============================================================ | |
| # LLM + Engine | |
| # ============================================================ | |
| def _get_anthropic_client(api_key_override: str = ""): | |
| try: | |
| from anthropic import Anthropic | |
| except ImportError: | |
| return None | |
| key = api_key_override.strip() or os.getenv("ANTHROPIC_API_KEY", "").strip() | |
| if not key: | |
| return None | |
| try: | |
| return Anthropic(api_key=key) | |
| except Exception: | |
| return None | |
| class QuoteRequestEngine: | |
| def __init__(self, store: WorkbookStore): | |
| self.store = store | |
| self.reload() | |
| def reload(self) -> None: | |
| bundle = self.store.load_bundle() | |
| self.bundle = bundle | |
| self.dataset = bundle.dataset.copy().reset_index(drop=True) | |
| self.notes = self._flatten_notes(bundle.extra_sheets) | |
| self.examples = self.dataset[ | |
| (self.dataset["Request"].map(clean_text) != "") & | |
| ((self.dataset["Information Extracted"].map(clean_text) != "") | | |
| (self.dataset["Design"].map(clean_text) != "")) | |
| ].reset_index(drop=True) | |
| self.vectorizer: Optional[TfidfVectorizer] = None | |
| self.matrix = None | |
| if not self.examples.empty: | |
| corpus = ( | |
| self.examples["Request"].fillna("") + " || " + | |
| self.examples["Information Extracted"].fillna("") + " || " + | |
| self.examples["Design"].fillna("") | |
| ).tolist() | |
| self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), stop_words="english") | |
| self.matrix = self.vectorizer.fit_transform(corpus) | |
| self._sml = SMLBackend(self.dataset, self.notes) | |
| def _flatten_notes(extra_sheets: Dict[str, pd.DataFrame]) -> List[str]: | |
| notes: List[str] = [] | |
| for df in extra_sheets.values(): | |
| for item in df.fillna("").astype(str).values.ravel().tolist(): | |
| item = clean_text(item) | |
| if item and item.lower() != "nan": | |
| notes.append(item) | |
| return notes | |
| def retrieve_examples(self, request_text: str, sme_text: str, top_k: int = 4) -> pd.DataFrame: | |
| if self.vectorizer is None or self.matrix is None or self.examples.empty: | |
| return pd.DataFrame(columns=["Request", "Information Extracted", "Design", "Similarity"]) | |
| query = f"{clean_text(request_text)} || {clean_text(sme_text)}" | |
| qv = self.vectorizer.transform([query]) | |
| scores = cosine_similarity(qv, self.matrix).ravel() | |
| top_idx = scores.argsort()[::-1][:max(1, min(top_k, len(scores)))] | |
| out = self.examples.iloc[top_idx].copy() | |
| out["Similarity"] = scores[top_idx] | |
| out = out[["Request", "Information Extracted", "Design", "Similarity"]].reset_index(drop=True) | |
| out["Similarity"] = out["Similarity"].map(lambda x: round(float(x), 4)) | |
| return out | |
| def _build_messages(self, request_text: str, sme_text: str, retrieved: pd.DataFrame) -> Tuple[str, str]: | |
| system_prompt = """ | |
| You are an industrial quote-request handler for a future quote automation system. | |
| Return only valid JSON with this exact schema: | |
| { | |
| "information_extracted": "string", | |
| "design": "string", | |
| "quote_inputs": { | |
| "application": "string", | |
| "airflow_cfm": "string", | |
| "dust_or_material": "string", | |
| "collector_type": "string", | |
| "fan_notes": "string", | |
| "material_of_construction": "string", | |
| "filter_media": "string", | |
| "safety_notes": "string", | |
| "open_questions": ["string"] | |
| }, | |
| "assumptions": ["string"] | |
| } | |
| Rules: treat requests as customer language that may be incomplete. SME notes are authoritative. Make design output quote-ready. Do not invent pricing or lead times. Clearly state unknowns. Do not wrap in markdown. | |
| """.strip() | |
| if retrieved.empty: | |
| examples_text = "No prior examples available." | |
| else: | |
| blocks = [] | |
| for idx, row in retrieved.iterrows(): | |
| blocks.append(f"Example {idx + 1}\nRequest: {row['Request']}\nSME: {row['Information Extracted']}\nDesign: {row['Design']}\nSimilarity: {row['Similarity']}") | |
| examples_text = "\n\n".join(blocks) | |
| notes_block = "\n".join(f"- {n}" for n in self.notes[:30]) if self.notes else "- None" | |
| user_prompt = f""" | |
| Customer Request: {clean_text(request_text) or '[Not provided]'} | |
| SME Knowledge: {clean_text(sme_text) or '[Not provided]'} | |
| Global SME Notes:\n{notes_block} | |
| Historical Examples:\n{examples_text} | |
| Generate quote-ready response using the schema exactly. | |
| """.strip() | |
| return system_prompt, user_prompt | |
| def _repair_json(self, broken: str, client) -> Dict[str, Any]: | |
| response = client.messages.create( | |
| model=DEFAULT_MODEL, max_tokens=1600, temperature=0, | |
| system="Repair malformed JSON. Return only valid JSON.", | |
| messages=[{"role": "user", "content": f"Repair into valid JSON, no markdown:\n{broken}"}], | |
| ) | |
| raw = "".join(b.text for b in response.content if getattr(b, "type", None) == "text").strip() | |
| return self._parse_json(raw, client=client, allow_repair=False) | |
| def _parse_json(self, text: str, client=None, allow_repair: bool = True) -> Dict[str, Any]: | |
| text = strip_code_fences(text) | |
| try: | |
| data = json.loads(extract_first_balanced_json(text)) | |
| except Exception: | |
| if allow_repair and client: | |
| data = self._repair_json(text, client) | |
| else: | |
| raise | |
| data.setdefault("information_extracted", "") | |
| data.setdefault("design", "") | |
| data.setdefault("quote_inputs", {}) | |
| data.setdefault("assumptions", []) | |
| data["information_extracted"] = clean_text(data["information_extracted"]) | |
| data["design"] = clean_text(data["design"]) | |
| if not isinstance(data.get("quote_inputs"), dict): | |
| data["quote_inputs"] = {} | |
| data["assumptions"] = normalize_list(data["assumptions"]) | |
| return data | |
| def generate_quote( | |
| self, | |
| request_text: str, | |
| sme_text: str = "", | |
| top_k: int = 4, | |
| temperature: float = 0.1, | |
| api_key_override: str = "", | |
| ) -> Dict[str, Any]: | |
| request_text = clean_text(request_text) | |
| sme_text = clean_text(sme_text) | |
| if not request_text and not sme_text: | |
| raise ValueError("Provide a request or SME notes before generating.") | |
| client = _get_anthropic_client(api_key_override) | |
| # ββ LLM path ββ | |
| if client: | |
| retrieved = self.retrieve_examples(request_text, sme_text, top_k) | |
| system_prompt, user_prompt = self._build_messages(request_text, sme_text, retrieved) | |
| response = client.messages.create( | |
| model=DEFAULT_MODEL, | |
| max_tokens=1800, | |
| temperature=float(temperature), | |
| system=system_prompt, | |
| messages=[{"role": "user", "content": user_prompt}], | |
| ) | |
| raw = "".join(b.text for b in response.content if getattr(b, "type", None) == "text").strip() | |
| parsed = self._parse_json(raw, client=client, allow_repair=True) | |
| parsed["raw_model_output"] = raw | |
| parsed["retrieved_examples"] = retrieved | |
| parsed["request"] = request_text | |
| parsed["sml_input"] = sme_text | |
| parsed["backend"] = "llm" | |
| return parsed | |
| # ββ SML fallback ββ | |
| return self._sml.generate(request_text, sme_text, top_k) | |
| # ============================================================ | |
| # Global store + engine | |
| # ============================================================ | |
| store = WorkbookStore(DATA_PATH, seed_path=SEED_WORKBOOK if SEED_WORKBOOK.exists() else None) | |
| engine = QuoteRequestEngine(store) | |
| # ============================================================ | |
| # Helper functions for UI | |
| # ============================================================ | |
| def get_dataset_preview() -> pd.DataFrame: | |
| engine.reload() | |
| df = engine.dataset.copy().reset_index(drop=True) | |
| if df.empty: | |
| return pd.DataFrame(columns=["row_id"] + HEADERS) | |
| df.insert(0, "row_id", df.index + 1) | |
| return df | |
| def get_note_preview() -> pd.DataFrame: | |
| engine.reload() | |
| if not engine.notes: | |
| return pd.DataFrame({"note_id": [], "SME Note": []}) | |
| return pd.DataFrame({"note_id": list(range(1, len(engine.notes) + 1)), "SME Note": engine.notes}) | |
| def get_row_choices() -> List[Tuple[str, int]]: | |
| df = get_dataset_preview() | |
| if df.empty: | |
| return [] | |
| return [(f"{int(r.row_id)} | {summarize_text(r.Request, 80)}", int(r.row_id)) for r in df.itertuples(index=False)] | |
| def get_downloadable_path() -> str: | |
| store.ensure_exists() | |
| return str(DATA_PATH) | |
| def api_key_active(override: str = "") -> bool: | |
| return bool(_get_anthropic_client(override)) | |
| def backend_label(override: str = "") -> str: | |
| if api_key_active(override): | |
| return '<span class="sml-badge llm">⬀ LLM · Claude Active</span>' | |
| return '<span class="sml-badge sml">⬀ SML · Local Inference</span>' | |
| def status_html() -> str: | |
| rows = len(engine.dataset) | |
| notes = len(engine.notes) | |
| backend = "LLM (Claude)" if api_key_active() else "SML (Local)" | |
| return f"""<div class="forge-stat-grid"> | |
| <div class="forge-stat"><div class="forge-stat-value">{rows}</div><div class="forge-stat-label">Training Rows</div></div> | |
| <div class="forge-stat"><div class="forge-stat-value">{notes}</div><div class="forge-stat-label">SME Notes</div></div> | |
| <div class="forge-stat"><div class="forge-stat-value">{"β" if api_key_active() else "β"}</div><div class="forge-stat-label">API Key</div></div> | |
| <div class="forge-stat"><div class="forge-stat-value" style="font-size:1.1rem;padding-top:0.5rem">{backend}</div><div class="forge-stat-label">Active Backend</div></div> | |
| </div>""" | |
| def format_quote_inputs(qi: Dict[str, Any]) -> str: | |
| if not qi: | |
| return "" | |
| keys = ["application", "airflow_cfm", "dust_or_material", "collector_type", | |
| "fan_notes", "material_of_construction", "filter_media", "safety_notes", "open_questions"] | |
| blocks = [] | |
| for key in keys: | |
| val = qi.get(key, "") | |
| if isinstance(val, list): | |
| val = "\n".join(f" Β· {clean_text(v)}" for v in val if clean_text(v)) | |
| else: | |
| val = clean_text(val) | |
| blocks.append(f"{key.replace('_',' ').upper()}\n{val or '[Not provided]'}") | |
| return "\n\n".join(blocks) | |
| def format_assumptions(items: List[str]) -> str: | |
| if not items: | |
| return "[None listed]" | |
| return "\n".join(f"Β· {clean_text(i)}" for i in items if clean_text(i)) | |
| # ============================================================ | |
| # Action handlers | |
| # ============================================================ | |
| def generate_quote_action(request_text, sme_text, top_k, temperature, api_key_override): | |
| try: | |
| result = engine.generate_quote( | |
| request_text=request_text, sme_text=sme_text, | |
| top_k=int(top_k), temperature=float(temperature), | |
| api_key_override=api_key_override, | |
| ) | |
| be = result.get("backend", "sml") | |
| be_label = "⬀ LLM · Claude" if be == "llm" else "⬀ SML · Local Inference" | |
| return ( | |
| result.get("information_extracted", ""), | |
| result.get("design", ""), | |
| format_quote_inputs(result.get("quote_inputs", {})), | |
| format_assumptions(result.get("assumptions", [])), | |
| result.get("retrieved_examples", pd.DataFrame()), | |
| result.get("raw_model_output", ""), | |
| f'<div class="forge-alert {"success" if be == "llm" else "warn"}">{be_label}</div>', | |
| ) | |
| except Exception as e: | |
| empty = pd.DataFrame(columns=["Request", "Information Extracted", "Design", "Similarity"]) | |
| return "", "", "", str(e), empty, "", f'<div class="forge-alert error">Error: {e}</div>' | |
| def save_generated_row(request_text, info, design): | |
| request_text, info, design = clean_text(request_text), clean_text(info), clean_text(design) | |
| if not any([request_text, info, design]): | |
| raise gr.Error("Nothing to save.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| bundle.dataset = pd.concat([bundle.dataset, pd.DataFrame([{"Request": request_text, "Information Extracted": info, "Design": design}])], ignore_index=True) | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return "β Row saved.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| def add_request_only(request_text): | |
| request_text = clean_text(request_text) | |
| if not request_text: | |
| raise gr.Error("Enter a request.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| bundle.dataset = pd.concat([bundle.dataset, pd.DataFrame([{"Request": request_text, "Information Extracted": "", "Design": ""}])], ignore_index=True) | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return "β Request appended.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| def add_full_training_row(req, sme, design): | |
| req = clean_text(req) | |
| if not req: | |
| raise gr.Error("Request field required.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| bundle.dataset = pd.concat([bundle.dataset, pd.DataFrame([{"Request": req, "Information Extracted": clean_text(sme), "Design": clean_text(design)}])], ignore_index=True) | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return "β Full row added.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| def add_global_sme_note(note_text): | |
| note_text = clean_text(note_text) | |
| if not note_text: | |
| raise gr.Error("Enter a note.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| notes_df = bundle.extra_sheets.get(NOTES_SHEET, pd.DataFrame(columns=[0])) | |
| notes_df = pd.concat([notes_df, pd.DataFrame([[note_text]])], ignore_index=True) | |
| bundle.extra_sheets[NOTES_SHEET] = notes_df | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return "β Note added.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| def load_row_for_edit(row_id): | |
| if row_id is None: | |
| return "", "", "" | |
| df = get_dataset_preview() | |
| row = df[df["row_id"] == int(row_id)] | |
| if row.empty: | |
| return "", "", "" | |
| r = row.iloc[0] | |
| return r["Request"], r["Information Extracted"], r["Design"] | |
| def update_row_fields(row_id, req, sme, design): | |
| if row_id is None: | |
| raise gr.Error("Select a row.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| df = bundle.dataset.copy().reset_index(drop=True) | |
| idx = int(row_id) - 1 | |
| if idx < 0 or idx >= len(df): | |
| raise gr.Error("Row out of range.") | |
| df.at[idx, "Request"] = clean_text(req) | |
| df.at[idx, "Information Extracted"] = clean_text(sme) | |
| df.at[idx, "Design"] = clean_text(design) | |
| bundle.dataset = df | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return f"β Row {row_id} updated.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=row_id), get_downloadable_path(), status_html() | |
| def append_sme_to_row(row_id, sme_text): | |
| if row_id is None: | |
| raise gr.Error("Select a row.") | |
| sme_text = clean_text(sme_text) | |
| if not sme_text: | |
| raise gr.Error("Enter SME knowledge.") | |
| with DATA_LOCK: | |
| bundle = store.load_bundle() | |
| df = bundle.dataset.copy().reset_index(drop=True) | |
| idx = int(row_id) - 1 | |
| if idx < 0 or idx >= len(df): | |
| raise gr.Error("Row out of range.") | |
| existing = clean_text(df.at[idx, "Information Extracted"]) | |
| df.at[idx, "Information Extracted"] = f"{existing}\n{sme_text}".strip() if existing else sme_text | |
| bundle.dataset = df | |
| store.save_bundle(bundle) | |
| engine.reload() | |
| return f"β SME appended to row {row_id}.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=row_id), get_downloadable_path(), status_html() | |
| def replace_dataset(uploaded_file): | |
| if not uploaded_file: | |
| raise gr.Error("Upload a .xlsx file first.") | |
| with DATA_LOCK: | |
| store.replace_from_upload(uploaded_file) | |
| engine.reload() | |
| return "β Workbook replaced.", get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| def export_training_assets(): | |
| engine.reload() | |
| df = engine.dataset.copy() | |
| csv_path = EXPORT_DIR / "quote_request_training.csv" | |
| jsonl_path = EXPORT_DIR / "quote_request_training.jsonl" | |
| df.to_csv(csv_path, index=False) | |
| with open(jsonl_path, "w", encoding="utf-8") as f: | |
| for _, row in df.iterrows(): | |
| f.write(json.dumps({"request": clean_text(row.get("Request", "")), "sme_knowledge": clean_text(row.get("Information Extracted", "")), "design": clean_text(row.get("Design", ""))}, ensure_ascii=False) + "\n") | |
| return f"β Exported {len(df)} rows.", str(csv_path), str(jsonl_path) | |
| def admin_login(password): | |
| if password == ADMIN_PASSWORD: | |
| return gr.update(visible=False), gr.update(visible=True), "β Access granted." | |
| return gr.update(visible=True), gr.update(visible=False), "β Invalid password." | |
| def set_api_key_session(key, override_state): | |
| key = key.strip() | |
| if not key: | |
| return override_state, '<div class="forge-alert error">β Enter an API key.</div>' | |
| client = _get_anthropic_client(key) | |
| if client: | |
| return key, '<div class="forge-alert success">β API key accepted β LLM backend active.</div>' | |
| return override_state, '<div class="forge-alert error">β Key rejected or Anthropic SDK not available.</div>' | |
| def refresh_all(): | |
| return get_dataset_preview(), get_note_preview(), gr.Dropdown(choices=get_row_choices(), value=None), get_downloadable_path(), status_html() | |
| # ============================================================ | |
| # UI | |
| # ============================================================ | |
| with gr.Blocks(title=APP_TITLE, css=CUSTOM_CSS, theme=gr.themes.Base()) as demo: | |
| api_key_state = gr.State("") | |
| # ββ Header ββ | |
| gr.HTML(f""" | |
| <div class="forge-header"> | |
| <div class="forge-logo"> | |
| <span class="forge-logo-primary">{APP_TITLE}</span> | |
| <span class="forge-logo-sub">{APP_SUBTITLE}</span> | |
| </div> | |
| <div style="display:flex;align-items:center;gap:1rem;"> | |
| <span class="forge-badge">MVP v2</span> | |
| <span class="forge-badge" id="hdr-backend">{"LLM ACTIVE" if api_key_active() else "SML MODE"}</span> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Tabs(elem_classes="main-tabs"): | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 1 Β· SUBMIT A REQUEST (public intake) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("REQUEST INTAKE"): | |
| gr.HTML(""" | |
| <div class="forge-hero"> | |
| <div> | |
| <div class="forge-section-label">Quote Intelligence</div> | |
| <div class="forge-section-title">SUBMIT YOUR<br>QUOTE REQUEST</div> | |
| <div class="forge-section-desc"> | |
| Paste your customer request below. Our engine β powered by Claude LLM or our local SML inference model β will extract application details, identify hazards, and generate quote-ready design guidance in seconds. | |
| </div> | |
| </div> | |
| <div class="forge-hero-visual"> | |
| <div class="forge-metric-row"> | |
| <div class="forge-metric"><div class="forge-metric-val">AβC</div><div class="forge-metric-key">Request to Design</div></div> | |
| <div class="forge-metric"><div class="forge-metric-val">SML</div><div class="forge-metric-key">Offline Fallback</div></div> | |
| <div class="forge-metric"><div class="forge-metric-val">β</div><div class="forge-metric-key">Training Loop</div></div> | |
| </div> | |
| <div class="forge-card" style="font-family:var(--forge-mono);font-size:0.78rem;color:var(--forge-muted);line-height:1.9;"> | |
| <div style="color:var(--forge-amber);margin-bottom:0.5rem;font-size:0.65rem;letter-spacing:0.2em;">HOW IT WORKS</div> | |
| 01 Β· Paste customer request<br> | |
| 02 Β· Add optional SME context<br> | |
| 03 Β· Engine retrieves similar cases<br> | |
| 04 Β· LLM or SML generates guidance<br> | |
| 05 Β· Review β save to training set | |
| </div> | |
| </div> | |
| </div> | |
| <div style="max-width:1400px;margin:0 auto;padding:0 2rem;"> | |
| """) | |
| # API key banner (shown when no key in env) | |
| api_key_banner_visible = not api_key_active() | |
| with gr.Group(visible=api_key_banner_visible, elem_id="api-key-section") as api_key_section: | |
| gr.HTML(""" | |
| <div class="forge-alert warn" style="margin-bottom:0.75rem;"> | |
| β <strong>No Anthropic API key detected.</strong> Running in SML (local inference) mode. | |
| Enter a key below to enable Claude LLM backend. Or continue β SML works offline. | |
| </div> | |
| """) | |
| with gr.Row(): | |
| api_key_input = gr.Textbox( | |
| label="Anthropic API Key (session only β not stored)", | |
| placeholder="sk-ant-...", | |
| type="password", | |
| scale=4, | |
| ) | |
| api_key_btn = gr.Button("Activate LLM", variant="primary", scale=1) | |
| api_key_status = gr.HTML("") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| request_input = gr.Textbox( | |
| label="Customer Request", | |
| lines=7, | |
| placeholder="e.g. 15000 CFM pharmaceutical powder, corrosive dust, need fan and collector recommendation", | |
| ) | |
| sme_input = gr.Textbox( | |
| label="SME Knowledge / Domain Notes (optional)", | |
| lines=5, | |
| placeholder="Add expert context that should influence design guidance...", | |
| ) | |
| with gr.Row(): | |
| top_k_input = gr.Slider(1, 8, value=4, step=1, label="Historical Examples") | |
| temperature_input = gr.Slider(0.0, 1.0, value=0.1, step=0.05, label="Temperature (LLM only)") | |
| with gr.Row(): | |
| generate_btn = gr.Button("Generate Quote Guidance", variant="primary") | |
| save_generated_btn = gr.Button("Save as Training Row", variant="secondary") | |
| save_generated_status = gr.HTML("") | |
| with gr.Column(scale=3): | |
| backend_indicator = gr.HTML(f'<div class="forge-alert info" style="margin-bottom:1rem;">Select backend and submit request to begin.</div>') | |
| info_output = gr.Textbox(label="Information Extracted (Col B)", lines=7) | |
| design_output = gr.Textbox(label="Design / Quote Guidance (Col C)", lines=8) | |
| with gr.Row(): | |
| with gr.Column(): | |
| quote_inputs_output = gr.Textbox(label="Structured Quote Inputs", lines=14) | |
| with gr.Column(): | |
| assumptions_output = gr.Textbox(label="Assumptions & Unknowns", lines=10) | |
| retrieved_output = gr.Dataframe( | |
| headers=["Request", "Information Extracted", "Design", "Similarity"], | |
| datatype=["str", "str", "str", "number"], | |
| label="Retrieved Historical Examples", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| raw_output = gr.Textbox(label="Raw Model Output (debug)", lines=5, visible=False) | |
| gr.HTML("</div>") # close forge-page | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 2 Β· ADMIN TERMINAL | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("ADMIN TERMINAL"): | |
| # ββ Login gate ββ | |
| with gr.Group(visible=True) as admin_login_panel: | |
| gr.HTML(""" | |
| <div class="forge-page" style="max-width:480px;"> | |
| <div class="forge-terminal-header"> | |
| <div class="terminal-dot" style="background:#ef4444"></div> | |
| <div class="terminal-dot" style="background:#f59e0b"></div> | |
| <div class="terminal-dot" style="background:#22c55e"></div> | |
| <span style="font-family:var(--forge-mono);font-size:0.7rem;color:var(--forge-muted);margin-left:0.5rem;">QUOTEFORGE ADMIN TERMINAL β RESTRICTED ACCESS</span> | |
| </div> | |
| <div class="forge-terminal-body"> | |
| <span style="color:var(--forge-amber);">QuoteForge</span> <span style="color:var(--forge-muted);">v2.0</span> β Authentication required<br> | |
| <span style="color:var(--forge-muted);">Set ADMIN_PASSWORD env var to change default (admin1234)</span> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(elem_classes="forge-page"): | |
| admin_pw_input = gr.Textbox(label="Admin Password", type="password", placeholder="Enter password...", scale=3) | |
| admin_login_btn = gr.Button("Authenticate", variant="primary", scale=1) | |
| admin_login_status = gr.HTML("") | |
| # ββ Admin workspace (hidden until auth) ββ | |
| with gr.Group(visible=False) as admin_workspace: | |
| gr.HTML(""" | |
| <div class="forge-page"> | |
| <div class="forge-terminal-header"> | |
| <div class="terminal-dot" style="background:#ef4444"></div> | |
| <div class="terminal-dot" style="background:#f59e0b"></div> | |
| <div class="terminal-dot" style="background:#22c55e"></div> | |
| <span style="font-family:var(--forge-mono);font-size:0.7rem;color:var(--forge-muted);margin-left:0.5rem;">QUOTEFORGE ADMIN TERMINAL β SESSION ACTIVE</span> | |
| </div> | |
| """) | |
| with gr.Row(elem_classes="forge-page"): | |
| admin_status_html = gr.HTML(status_html()) | |
| refresh_admin_btn = gr.Button("β» Refresh", variant="secondary") | |
| gr.HTML('<div class="forge-page"><div class="forge-section-label">Dataset Management</div></div>') | |
| with gr.Tabs(elem_classes="forge-page"): | |
| with gr.Tab("Add Request Only"): | |
| add_request_box = gr.Textbox(label="New Request (Column A)", lines=6) | |
| add_request_btn = gr.Button("Append to Column A", variant="primary") | |
| add_request_status = gr.HTML("") | |
| with gr.Tab("Add Full A/B/C Row"): | |
| full_request_box = gr.Textbox(label="Request (A)", lines=4) | |
| full_sme_box = gr.Textbox(label="Information Extracted (B)", lines=5) | |
| full_design_box = gr.Textbox(label="Design Guidance (C)", lines=6) | |
| add_full_btn = gr.Button("Append Full Row", variant="primary") | |
| add_full_status = gr.HTML("") | |
| with gr.Tab("Append SME to Row"): | |
| row_selector = gr.Dropdown(choices=get_row_choices(), label="Select Row", value=None) | |
| append_sme_box = gr.Textbox(label="SME Knowledge to Append", lines=6) | |
| append_sme_btn = gr.Button("Append to Selected Row", variant="primary") | |
| append_sme_status = gr.HTML("") | |
| with gr.Tab("Edit Row"): | |
| load_row_btn = gr.Button("Load Selected Row", variant="secondary") | |
| edit_request_box = gr.Textbox(label="Request (A)", lines=4) | |
| edit_sme_box = gr.Textbox(label="Information Extracted (B)", lines=5) | |
| edit_design_box = gr.Textbox(label="Design Guidance (C)", lines=6) | |
| update_row_btn = gr.Button("Save Changes", variant="primary") | |
| update_row_status = gr.HTML("") | |
| with gr.Tab("Global SME Notes"): | |
| global_note_box = gr.Textbox(label="New Global SME Note", lines=4) | |
| add_note_btn = gr.Button("Save Note", variant="primary") | |
| add_note_status = gr.HTML("") | |
| gr.HTML('<div class="forge-page"><div class="forge-section-label">Dataset Viewer & Export</div></div>') | |
| with gr.Row(elem_classes="forge-page"): | |
| with gr.Column(scale=3): | |
| dataset_preview = gr.Dataframe( | |
| value=get_dataset_preview(), | |
| headers=["row_id"] + HEADERS, | |
| datatype=["number", "str", "str", "str"], | |
| label="Training Dataset", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| notes_preview = gr.Dataframe( | |
| value=get_note_preview(), | |
| headers=["note_id", "SME Note"], | |
| datatype=["number", "str"], | |
| label="Global SME Notes", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| with gr.Column(scale=1): | |
| upload_file = gr.File(label="Upload Replacement Workbook (.xlsx)", file_types=[".xlsx"], type="filepath") | |
| replace_btn = gr.Button("Replace Dataset", variant="secondary") | |
| replace_status = gr.HTML("") | |
| dataset_download = gr.File(label="Download Current Workbook", value=get_downloadable_path(), interactive=False) | |
| export_btn = gr.Button("Export ML Assets", variant="primary") | |
| export_status = gr.HTML("") | |
| export_csv_file = gr.File(label="CSV Export", interactive=False) | |
| export_jsonl_file = gr.File(label="JSONL Export", interactive=False) | |
| gr.HTML("</div>") # close terminal body div | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Event wiring | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # API key activation | |
| api_key_btn.click( | |
| fn=set_api_key_session, | |
| inputs=[api_key_input, api_key_state], | |
| outputs=[api_key_state, api_key_status], | |
| ) | |
| # Generate | |
| generate_btn.click( | |
| fn=generate_quote_action, | |
| inputs=[request_input, sme_input, top_k_input, temperature_input, api_key_state], | |
| outputs=[info_output, design_output, quote_inputs_output, assumptions_output, retrieved_output, raw_output, backend_indicator], | |
| ) | |
| # Save generated row | |
| save_generated_btn.click( | |
| fn=save_generated_row, | |
| inputs=[request_input, info_output, design_output], | |
| outputs=[save_generated_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| # Admin login | |
| admin_login_btn.click( | |
| fn=admin_login, | |
| inputs=[admin_pw_input], | |
| outputs=[admin_login_panel, admin_workspace, admin_login_status], | |
| ) | |
| # Admin actions | |
| add_request_btn.click( | |
| fn=add_request_only, | |
| inputs=[add_request_box], | |
| outputs=[add_request_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| add_full_btn.click( | |
| fn=add_full_training_row, | |
| inputs=[full_request_box, full_sme_box, full_design_box], | |
| outputs=[add_full_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| append_sme_btn.click( | |
| fn=append_sme_to_row, | |
| inputs=[row_selector, append_sme_box], | |
| outputs=[append_sme_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| load_row_btn.click( | |
| fn=load_row_for_edit, | |
| inputs=[row_selector], | |
| outputs=[edit_request_box, edit_sme_box, edit_design_box], | |
| ) | |
| update_row_btn.click( | |
| fn=update_row_fields, | |
| inputs=[row_selector, edit_request_box, edit_sme_box, edit_design_box], | |
| outputs=[update_row_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| add_note_btn.click( | |
| fn=add_global_sme_note, | |
| inputs=[global_note_box], | |
| outputs=[add_note_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| replace_btn.click( | |
| fn=replace_dataset, | |
| inputs=[upload_file], | |
| outputs=[replace_status, dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| export_btn.click( | |
| fn=export_training_assets, | |
| inputs=[], | |
| outputs=[export_status, export_csv_file, export_jsonl_file], | |
| ) | |
| refresh_admin_btn.click( | |
| fn=refresh_all, | |
| inputs=[], | |
| outputs=[dataset_preview, notes_preview, row_selector, dataset_download, admin_status_html], | |
| ) | |
| def main() -> None: | |
| demo.queue(default_concurrency_limit=8).launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.getenv("PORT", "7860")), | |
| ) | |
| if __name__ == "__main__": | |
| main() | |