Spaces:

SuriRaja
/

hf_tender

Sleeping

App Files Files Community

hf_tender / app.py

SuriRaja

Upload 3 files

a413527 verified 7 months ago

raw

history blame contribute delete

22.4 kB

	import os, io, random, numpy as np, pandas as pd, streamlit as st
	from datetime import datetime
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.metrics import roc_auc_score, average_precision_score, brier_score_loss, classification_report
	from sklearn.preprocessing import OneHotEncoder, StandardScaler
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
	from sklearn.isotonic import IsotonicRegression
	from sklearn.metrics.pairwise import cosine_similarity
	from PyPDF2 import PdfReader
	import joblib

	st.set_page_config(page_title="NTPC/SCCL Tender Win Intelligence – POC v3", layout="wide")
	st.title("NTPC/SCCL Tender Win Intelligence – POC v3")
	st.caption("Win probability (calibrated) • Compliance-gap • Batch scoring • Decision bands • Price-band guidance • RAG exemplars • Real CSV upload")

	SCHEMA = {...} # populated later at runtime for readability

	TEXT_HELP = """Jargons & Checks
	- PQ = Pre-Qualification (Turnover / Similar Work / OEM / Solvency / Key Personnel)
	- OEM Back-to-Back = Authorisation/MoU from OEM for supply/spares/warranty
	- QCBS vs L1 = Quality+Cost Based Selection vs Lowest Price
	- Corrigenda Watch = Multiple corrigenda → scope/terms changes; track closely
	- MSME / Make-in-India / Local Content = Preferential clauses; ensure declarations & CA certificates
	- Integrity Pact / EMD = Use correct formats and exemptions if eligible
	"""

	# ---------------- Synthetic data w/ price ratio target ----------------
	def synth_text(cat, strict_pq, oem, site_visit):
	bits = []
	if cat == "Solar EPC":
	bits.append("EPC for solar PV with SCADA and remote monitoring")
	elif cat == "OB Removal":
	bits.append("Overburden removal with HEMM deployment and production targets")
	elif cat == "CHP Maintenance":
	bits.append("Conveyor belt replacement, chute liners, idlers, pulleys, gearboxes")
	elif cat == "E&M Spares":
	bits.append("Supply of OEM-equivalent spares with QA/QC and test certificates")
	else:
	bits.append(f"{cat} scope including manpower, materials, QA/QC")
	if strict_pq:
	bits.append("Strict PQ: turnover > 1.5x estimate; similar work in last 7 years")
	else:
	bits.append("Moderate PQ with MSME preference applicable")
	if oem:
	bits.append("OEM authorization letter mandatory")
	if site_visit:
	bits.append("Pre-bid site visit is mandatory")
	return ". ".join(bits)

	def make_synth_df(N=900, seed=42):
	random.seed(seed); np.random.seed(seed)
	orgs = ["NTPC","SCCL"]
	cats = ["CHP Maintenance","OB Removal","Solar EPC","E&M Spares","BoP O&M","Overhaul","Ash Handling"]
	regions = ["ER","SR","WR","NR"]
	qcbs_l1 = ["L1","QCBS"]
	rows = []
	start_date = np.datetime64("2022-01-01")
	for i in range(N):
	issue = start_date + np.timedelta64(int(i*2), "D")
	org = np.random.choice(orgs, p=[0.6,0.4])
	cat = np.random.choice(cats, p=[0.22,0.18,0.15,0.15,0.1,0.1,0.1])
	region = np.random.choice(regions)
	est = np.round(np.random.lognormal(mean=1.8, sigma=0.6), 2)
	emd = np.round(np.clip(np.random.normal(2.0, 0.7), 0.5, 5.0), 2)
	ql = np.random.choice(qcbs_l1, p=[0.75,0.25])
	window = int(np.clip(np.random.normal(21, 7), 7, 45))
	corrig = int(np.clip(np.random.poisson(0.6), 0, 5))
	pq_turn = np.round(np.maximum(est*np.random.uniform(0.6,2.2),0.5),2)
	pq_sim = np.round(np.maximum(est*np.random.uniform(0.5,2.0),0.3),2)
	site_visit = np.random.binomial(1, 0.3 if cat!="Solar EPC" else 0.5)
	mii = np.random.binomial(1, 0.6 if org=="NTPC" else 0.4)
	lcr = np.round(np.clip(np.random.normal(30, 15), 0, 70), 1)
	msme = np.random.binomial(1, 0.35)
	retender = 1 if (corrig>=3 and random.random()<0.2) else 0
	exp_bidders = int(np.clip(np.random.poisson(3 + (corrig>0) + (ql=="L1")), 1, 12))

	hist = np.round(np.clip(np.random.beta(2,5)+(cat=="CHP Maintenance")*0.05,0,1),3)
	oem = np.random.binomial(1, 0.55 if cat in ["E&M Spares","CHP Maintenance","Overhaul"] else 0.35)
	bidder_sim = np.round(np.maximum(np.random.normal(pq_sim0.9, pq_sim0.3),0.2),2)
	bidder_turn = np.round(np.maximum(np.random.normal(pq_turn0.95, pq_turn0.35),0.3),2)
	safety = np.round(np.clip(np.random.normal(0.75,0.15),0.2,0.99),3)

	text = synth_text(cat, strict_pq=(pq_turn>est1.4 or pq_sim>est1.4), oem=oem==1, site_visit=site_visit==1)

	# win label
	logit = (0.3(oem) + 0.6(hist-0.5) +
	0.25np.tanh((bidder_turn - pq_turn)/max(0.5, pq_turn0.6)) +
	0.25np.tanh((bidder_sim - pq_sim)/max(0.5, pq_sim0.6)) -
	0.15(corrig) - 0.2(retender) - 0.1*(exp_bidders-3)/5 +
	0.3(safety-0.7) + (0.15 if ql=="QCBS" else 0.0) + (0.1 if msme and est<2.0 else 0.0) - 0.05(site_visit))
	p = 1/(1+np.exp(-logit))
	win = np.random.binomial(1, np.clip(p, 0.02, 0.9))

	# price-to-estimate ratio target (for L1-like behavior; QCBS slightly higher ratios)
	base_ratio = np.clip(np.random.normal(0.98 if ql=="L1" else 1.03, 0.05), 0.85, 1.15)
	# increase ratio with high corrigenda/retender; decrease with OEM tie-up (efficiency)
	ratio = base_ratio + 0.01corrig + 0.02retender - 0.01oem + 0.01np.random.randn()
	ratio = float(np.clip(ratio, 0.80, 1.20))

	rows.append({
	"tender_id": f"T{i:05d}", "issue_date": str(issue), "org": org,
	"package_name": f"{cat} – {region}", "category": cat, "region": region,
	"est_value_crore": est, "emd_percent": emd, "qcbs_or_l1": ql,
	"bid_window_days": window, "corrigenda_count": corrig,
	"pq_turnover_cr": pq_turn, "pq_similar_work_cr": pq_sim,
	"site_visit_required": site_visit, "make_in_india_clause": mii,
	"local_content_req_pct": lcr, "msme_pref": msme,
	"retender_flag": retender, "expected_bidders": exp_bidders,
	"tender_text": text, "bidder_hist_winrate_cat": hist, "bidder_oem_tieup": oem,
	"bidder_similar_work_cr": bidder_sim, "bidder_turnover_cr": bidder_turn,
	"bidder_safety_score": safety, "label_win": win, "price_ratio": ratio
	})
	return pd.DataFrame(rows)

	def temporal_split(df, frac=0.8):
	df = df.copy()
	df["issue_date"] = pd.to_datetime(df["issue_date"])
	df = df.sort_values("issue_date").reset_index(drop=True)
	cut = int(len(df)*frac)
	return df.iloc[:cut].reset_index(drop=True), df.iloc[cut:].reset_index(drop=True)

	# ---------------- Preprocess & models ----------------
	def build_preprocessor():
	num_cols = ["est_value_crore","emd_percent","bid_window_days","corrigenda_count",
	"pq_turnover_cr","pq_similar_work_cr","local_content_req_pct",
	"expected_bidders","bidder_hist_winrate_cat","bidder_similar_work_cr",
	"bidder_turnover_cr","bidder_safety_score"]
	cat_cols = ["org","category","region","qcbs_or_l1","site_visit_required","make_in_india_clause",
	"msme_pref","retender_flag","bidder_oem_tieup"]
	pre = ColumnTransformer([
	("num", StandardScaler(with_mean=False), num_cols),
	("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
	("txt", TfidfVectorizer(max_features=12000, ngram_range=(1,2)), "tender_text")
	])
	return pre

	def compliance_gap(payload):
	missing = []
	text = (payload.get("tender_text") or "").lower()
	if "oem" in text and int(payload.get("bidder_oem_tieup",0)) == 0:
	missing.append("OEM Authorisation / Back-to-Back")
	if float(payload.get("bidder_turnover_cr",0)) < float(payload.get("pq_turnover_cr",0)):
	missing.append("PQ Turnover (add JV partner or audited statements)")
	if float(payload.get("bidder_similar_work_cr",0)) < float(payload.get("pq_similar_work_cr",0)):
	missing.append("PQ Similar Work (JV/Consortium or additional work orders)")
	if "site visit" in text and int(payload.get("site_visit_required",0)) == 1:
	missing.append("Pre-bid Site Visit Proof/Declaration")
	if int(payload.get("msme_pref",0)) == 1:
	missing.append("MSME Certificate / Declaration if eligible")
	if int(payload.get("make_in_india_clause",0)) == 1:
	missing.append("Local Content Declaration (CA certified)")
	if float(payload.get("emd_percent",0)) > 0:
	missing.append("EMD/BG instrument per format (or exemption, if applicable)")
	return missing

	# training (classification + calibration + regression)
	@st.cache_resource(show_spinner=True)
	def train_all(df):
	df_lab = df.dropna(subset=["label_win"])
	if len(df_lab) < 50:
	df_lab = make_synth_df(N=900, seed=42)
	train_df, test_df = temporal_split(df_lab, frac=0.8)
	y_train = train_df["label_win"].values
	y_test = test_df["label_win"].values

	pre = build_preprocessor()
	clf = RandomForestClassifier(
	n_estimators=500, max_depth=None, min_samples_split=4, min_samples_leaf=2,
	n_jobs=-1, random_state=42, class_weight="balanced_subsample"
	)
	cls_pipe = Pipeline([("pre", pre), ("clf", clf)])
	cls_pipe.fit(train_df.drop(columns=["label_win","price_ratio"]), y_train)

	# calibration
	val_cut = int(len(train_df)*0.8)
	X_val = train_df.iloc[val_cut:].drop(columns=["label_win","price_ratio"])
	y_val = y_train[val_cut:]
	p_val = cls_pipe.predict_proba(X_val)[:,1]
	cal = IsotonicRegression(out_of_bounds="clip").fit(p_val, y_val)

	# metrics
	p_test = cls_pipe.predict_proba(test_df.drop(columns=["label_win","price_ratio"]))[:,1]
	p_test_cal = cal.transform(p_test)
	metrics = {
	"train_size": int(len(train_df)),
	"test_size": int(len(test_df)),
	"roc_auc": float(roc_auc_score(y_test, p_test)),
	"ap": float(average_precision_score(y_test, p_test)),
	"brier": float(brier_score_loss(y_test, p_test_cal)),
	"report": classification_report(y_test, (p_test_cal>=0.5).astype(int), digits=3)
	}

	# regression for price ratio
	y_ratio_train = train_df["price_ratio"].values
	reg = RandomForestRegressor(n_estimators=400, random_state=42, n_jobs=-1)
	reg_pipe = Pipeline([("pre", pre), ("reg", reg)])
	reg_pipe.fit(train_df.drop(columns=["label_win","price_ratio"]), y_ratio_train)

	return {"cls": cls_pipe, "cal": cal, "reg": reg_pipe}, metrics

	# ---------------- Data input ----------------
	st.sidebar.header("Training Data")
	mode = st.sidebar.radio("Choose source", ["Synthetic (default)","Upload CSV (real)"], index=0)
	up = None
	if mode == "Upload CSV (real)":
	up = st.sidebar.file_uploader("Upload CSV (schema-compatible)", type=["csv"])

	if up is not None:
	df = pd.read_csv(up)
	else:
	df = make_synth_df(N=900, seed=42)

	with st.spinner("Training classification, calibration, and price model..."):
	arts, metrics = train_all(df)

	# decision bands
	st.sidebar.header("Decision Bands")
	go_thr = st.sidebar.slider("GO threshold (≥)", 0.50, 0.95, 0.65, 0.01)
	cond_lo = st.sidebar.slider("Conditional band (low)", 0.30, go_thr-0.01, 0.45, 0.01)
	cond_hi = go_thr
	def band_label(p):
	if p >= go_thr: return "GO"
	if cond_lo <= p < cond_hi: return "CONDITIONAL (Fix Gaps/Partner)"
	return "NO-BID / PARTNER"

	# exemplar upload (RAG)
	st.sidebar.header("RAG Exemplars (winning proposal snippets)")
	ex_files = st.sidebar.file_uploader("Upload .txt/.md/.pdf (multi-file)", type=["txt","md","pdf"], accept_multiple_files=True)
	ex_docs = []
	if ex_files:
	for f in ex_files:
	if f.name.lower().endswith(".pdf"):
	try:
	reader = PdfReader(f)
	text = ""
	for page in reader.pages[:10]: # light parse
	text += page.extract_text() or ""
	except Exception:
	text = ""
	else:
	text = f.read().decode("utf-8", errors="ignore")
	if text.strip():
	ex_docs.append({"name": f.name, "text": text})

	# build exemplar index
	rag_vec = None; rag_corpus = None
	if ex_docs:
	rag_corpus = [d["text"] for d in ex_docs]
	rag_vec = TfidfVectorizer(max_features=20000, ngram_range=(1,2)).fit(rag_corpus)
	rag_matrix = rag_vec.transform(rag_corpus)

	# metrics panel
	left, right = st.columns([1,1])
	with left:
	st.subheader("Evaluation (Temporal Test Split)")
	st.markdown(f"Train: {metrics['train_size']}    Test: {metrics['test_size']}")
	st.markdown(f"ROC-AUC: {metrics['roc_auc']:.3f} \nAverage Precision: {metrics['ap']:.3f} \nBrier (calibrated): {metrics['brier']:.3f}")
	st.code(metrics["report"], language="text")
	with right:
	st.subheader("Jargons / Checks")
	st.markdown(TEXT_HELP)

	st.markdown("---")

	# ---------------- Single scoring ----------------
	st.header("Score a Single Tender")
	with st.form("score_one"):
	c1,c2,c3 = st.columns(3)
	org = c1.selectbox("Organisation", ["NTPC","SCCL"])
	category = c2.selectbox("Classification / Package", ["CHP Maintenance","OB Removal","Solar EPC","E&M Spares","BoP O&M","Overhaul","Ash Handling"])
	region = c3.selectbox("Region", ["ER","SR","WR","NR"])
	c4,c5,c6 = st.columns(3)
	est_value_crore = c4.number_input("Estimated Value (Cr)", min_value=0.1, value=3.2, step=0.1)
	emd_percent = c5.number_input("EMD (%)", min_value=0.0, value=2.0, step=0.1)
	qcbs_or_l1 = c6.selectbox("Evaluation Method", ["L1","QCBS"])
	c7,c8,c9 = st.columns(3)
	bid_window_days = c7.number_input("Bid Window (days)", 1, 60, 21)
	corrigenda_count = c8.number_input("Corrigenda Count", 0, 10, 1)
	expected_bidders = c9.number_input("Expected Bidders", 1, 30, 4)
	c10,c11,c12 = st.columns(3)
	pq_turnover_cr = c10.number_input("PQ Turnover (Cr)", min_value=0.0, value=5.0, step=0.1)
	pq_similar_work_cr = c11.number_input("PQ Similar Work (Cr)", min_value=0.0, value=4.0, step=0.1)
	local_content_req_pct = c12.number_input("Local Content Req. (%)", 0.0, 100.0, 30.0, 1.0)
	c13,c14,c15 = st.columns(3)
	site_visit_required = c13.selectbox("Pre-bid Site Visit?", [0,1])
	make_in_india_clause = c14.selectbox("Make in India?", [0,1])
	msme_pref = c15.selectbox("MSME Preference?", [0,1])
	c16,c17,c18 = st.columns(3)
	retender_flag = c16.selectbox("Re-Tender?", [0,1])
	bidder_hist_winrate_cat = c17.slider("Your Past Win-rate (0-1)", 0.0, 1.0, 0.58, 0.01)
	bidder_oem_tieup = c18.selectbox("OEM Tie-up?", [0,1])
	c19,c20,c21 = st.columns(3)
	bidder_similar_work_cr = c19.number_input("Your Similar Work (Cr)", 0.0, 1000.0, 4.5, 0.1)
	bidder_turnover_cr = c20.number_input("Your Avg Turnover (Cr)", 0.0, 1000.0, 6.0, 0.1)
	bidder_safety_score = c21.slider("HSE/ESG Safety Score (0-1)", 0.0, 1.0, 0.82, 0.01)
	tender_text = st.text_area("Scope/PQ Highlights", "Conveyor belt replacement and idler maintenance with OEM authorization preferred. Pre-bid site visit is mandatory. Turnover > 1.5x estimate.", height=110)
	tender_id = st.text_input("E-Tender Ref No.", "ADHOC001")
	issue_date = st.date_input("NIT Date", datetime.today().date())
	submitted = st.form_submit_button("Predict")

	if submitted:
	row = {"tender_id": tender_id, "issue_date": str(issue_date), "org": org,
	"package_name": f"{category} – {region}", "category": category, "region": region,
	"est_value_crore": est_value_crore, "emd_percent": emd_percent, "qcbs_or_l1": qcbs_or_l1,
	"bid_window_days": bid_window_days, "corrigenda_count": int(corrigenda_count),
	"pq_turnover_cr": pq_turnover_cr, "pq_similar_work_cr": pq_similar_work_cr,
	"site_visit_required": site_visit_required, "make_in_india_clause": make_in_india_clause,
	"local_content_req_pct": local_content_req_pct, "msme_pref": msme_pref,
	"retender_flag": retender_flag, "expected_bidders": int(expected_bidders),
	"tender_text": tender_text, "bidder_hist_winrate_cat": bidder_hist_winrate_cat,
	"bidder_oem_tieup": bidder_oem_tieup, "bidder_similar_work_cr": bidder_similar_work_cr,
	"bidder_turnover_cr": bidder_turnover_cr, "bidder_safety_score": bidder_safety_score}

	# win prob
	base = arts["cls"].predict_proba(pd.DataFrame([row]))[:,1][0]
	proba = float(arts["cal"].transform([base])[0])
	st.success(f"Calibrated win probability: {proba:.3f} \| Decision: {('GO' if proba>=0.65 else ('CONDITIONAL' if proba>=0.45 else 'NO-BID / PARTNER'))}")

	# compliance pointers
	tips = []
	gaps = compliance_gap(row)
	if gaps: tips.append("Compliance gaps: " + "; ".join(gaps))
	if bidder_oem_tieup == 0 and "oem" in tender_text.lower():
	tips.append("Secure OEM Back-to-Back / Authorisation letter.")
	if corrigenda_count >= 2:
	tips.append("High Corrigenda: monitor changes; tailor methodology and pricing accordingly.")
	if expected_bidders >= 6 and qcbs_or_l1 == "L1":
	tips.append("Crowded L1: tighten pricing, reduce optional scope, enhance delivery assurances.")
	if bidder_hist_winrate_cat < 0.45:
	tips.append("Strengthen past performance (completion certs, LD-free delivery, client references).")
	if tips:
	st.info("Improvement pointers:\n- " + "\n- ".join(tips))

	# price-band guidance
	ratio_pred = float(arts["reg"].predict(pd.DataFrame([row]))[0])
	# create a band around the prediction using heuristic ±IQR from training errors if available -> fallback ±3%
	# (Since cache doesn't keep residuals, use ±0.03 default)
	low = max(0.80, ratio_pred - 0.03)
	high = min(1.20, ratio_pred + 0.03)
	st.subheader("Suggested Price Band")
	st.markdown(f"- Predicted price-to-estimate ratio: `{ratio_pred:.3f}`")
	st.markdown(f"- Suggested band: {low:.3f} – {high:.3f} × Estimated Value")
	st.caption("Example: If estimate is ₹100.0 Cr, quote within ₹{:.1f} – ₹{:.1f} Cr.".format(100low, 100high))

	# RAG exemplar panel
	if 'rag_vec' in globals() and rag_vec is not None and ex_docs:
	st.subheader("RAG Exemplar Snippets (from your uploads)")
	query = (tender_text or "") + " " + category
	q_vec = rag_vec.transform([query])
	sims = cosine_similarity(q_vec, rag_matrix).ravel()
	topk = sims.argsort()[::-1][:3]
	for idx in topk:
	doc = ex_docs[idx]
	st.markdown(f"{doc['name']} — similarity `{sims[idx]:.3f}`")
	snippet = doc["text"][:800].strip().replace("\n","\n\n")
	st.code(snippet, language="markdown")
	else:
	st.caption("Upload winning proposal snippets in the sidebar (.txt/.md/.pdf) to enable exemplar retrieval.")

	st.markdown("---")

	# batch scoring
	st.header("Batch Scoring (CSV → Excel with price band)")
	batch = st.file_uploader("Upload CSV to score (same columns as schema, label_win optional)", type=["csv"])
	if batch is not None:
	df_in = pd.read_csv(batch)
	probs = arts["cal"].transform(arts["cls"].predict_proba(df_in)[:,1])
	ratio_preds = arts["reg"].predict(df_in)
	bands_lo = np.clip(ratio_preds - 0.03, 0.80, 1.20)
	bands_hi = np.clip(ratio_preds + 0.03, 0.80, 1.20)

	def band_label(p):
	if p >= 0.65: return "GO"
	if p >= 0.45: return "CONDITIONAL"
	return "NO-BID / PARTNER"

	out = df_in.copy()
	out["win_probability"] = probs
	out["decision_band"] = [band_label(p) for p in probs]
	out["pred_price_ratio"] = ratio_preds
	out["price_band_low"] = bands_lo
	out["price_band_high"] = bands_hi

	recs = []
	for _, r in df_in.iterrows():
	payload = r.to_dict()
	gaps = compliance_gap(payload)
	rec = []
	if gaps: rec.append("Gaps: " + "; ".join(gaps))
	if int(payload.get("bidder_oem_tieup",0))==0 and "oem" in str(payload.get("tender_text","")).lower():
	rec.append("Add OEM letter")
	if int(payload.get("corrigenda_count",0))>=2:
	rec.append("Monitor corrigenda closely")
	recs.append(" \| ".join(rec))
	out["recommendations"] = recs

	buf = io.BytesIO()
	with pd.ExcelWriter(buf, engine="openpyxl") as xlw:
	out.to_excel(xlw, index=False, sheet_name="Scored")
	st.download_button("Download Scored Excel", data=buf.getvalue(), file_name="tender_scored_with_price.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")

	# schema object shown for users
	SCHEMA = {
	"tender_id": "E-Tender Ref No.", "issue_date": "NIT Date (YYYY-MM-DD)",
	"org": "Organisation (NTPC / SCCL)", "package_name": "Package / Work Name (optional)",
	"category": "Classification / Package Type", "region": "Region / Location (ER/SR/WR/NR)",
	"est_value_crore": "Estimated Value (INR Cr)", "emd_percent": "EMD (%)",
	"qcbs_or_l1": "Evaluation Method (L1/QCBS)", "bid_window_days": "Bid Window (days)",
	"corrigenda_count": "Corrigenda Count", "pq_turnover_cr": "PQ: Avg Turnover required (Cr)",
	"pq_similar_work_cr": "PQ: Similar Work value required (Cr)", "site_visit_required": "Site Visit Required? (0/1)",
	"make_in_india_clause": "Make in India Clause (0/1)", "local_content_req_pct": "Local Content Requirement (%)",
	"msme_pref": "MSME Preference (0/1)", "retender_flag": "Re-Tender (0/1)",
	"expected_bidders": "Expected Bidders (best guess)", "tender_text": "Scope/PQ highlights (free text)",
	"bidder_hist_winrate_cat": "Your past win-rate (0-1)", "bidder_oem_tieup": "OEM Tie-up (0/1)",
	"bidder_similar_work_cr": "Your Similar Work (Cr)", "bidder_turnover_cr": "Your Avg Turnover (Cr)",
	"bidder_safety_score": "HSE/ESG / Safety Score (0-1)", "label_win": "Target: Win (1) / Lose (0)",
	"price_ratio": "Train-only (actual price / estimate)"
	}

	with st.expander("Schema (column → meaning)"):
	st.json(SCHEMA)

	st.header("Export trained models")
	if st.button("Download model bundle"):
	buf = io.BytesIO()
	joblib.dump(arts, buf) # contains cls, cal, reg
	st.download_button("Download model_bundle.pkl", data=buf.getvalue(), file_name="model_bundle.pkl")

	st.caption("Tip: Upload your own winning proposal snippets (txt/md/pdf) to enable exemplar retrieval for clause phrasing.")