Spaces:

thoughtspot-dp
/

demoprep

Running

App Files Files Community

demoprep / tests /newvision_sample_runner.py

mikeboone

feat: March 2026 sprint — new vision merge, pipeline improvements, settings refactor

5ac32c1 12 days ago

raw

history blame contribute delete

52.1 kB

	#!/usr/bin/env python3
	"""Run the New Vision 4-case sample set without UI login flow.

	Modes:
	1) Full chat pipeline mode (uses configured default_llm and required settings)
	2) Offline DDL mode (deterministic schema template, still validates settings up front)

	Usage:
	source ./demoprep/bin/activate
	python tests/newvision_sample_runner.py
	python tests/newvision_sample_runner.py --offline-ddl
	python tests/newvision_sample_runner.py --skip-thoughtspot
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	import sys
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Any

	import yaml

	PROJECT_ROOT = Path(__file__).parent.parent
	sys.path.insert(0, str(PROJECT_ROOT))

	from dotenv import load_dotenv

	load_dotenv(PROJECT_ROOT / ".env")
	os.environ.setdefault("DEMOPREP_NO_AUTH", "true")

	# Pull admin settings into environment when available.
	try:
	from supabase_client import inject_admin_settings_to_env

	inject_admin_settings_to_env()
	except Exception as exc: # noqa: BLE001
	print(f"[newvision_runner] Admin setting injection unavailable: {exc}")

	OFFLINE_DEMO_DDL = """
	CREATE TABLE DIM_DATE (
	DATE_KEY INT PRIMARY KEY,
	ORDER_DATE DATE,
	MONTH_NAME VARCHAR(30),
	QUARTER_NAME VARCHAR(10),
	YEAR_NUM INT,
	IS_WEEKEND BOOLEAN
	);

	CREATE TABLE DIM_LOCATION (
	LOCATION_KEY INT PRIMARY KEY,
	COUNTRY VARCHAR(100),
	REGION VARCHAR(100),
	STATE VARCHAR(100),
	CITY VARCHAR(100),
	SALES_CHANNEL VARCHAR(100),
	CUSTOMER_SEGMENT VARCHAR(100)
	);

	CREATE TABLE DIM_PRODUCT (
	PRODUCT_KEY INT PRIMARY KEY,
	PRODUCT_NAME VARCHAR(200),
	BRAND_NAME VARCHAR(100),
	CATEGORY VARCHAR(100),
	SUB_CATEGORY VARCHAR(100),
	PRODUCT_TIER VARCHAR(50),
	UNIT_PRICE DECIMAL(12,2)
	);

	CREATE TABLE FACT_RETAIL_DAILY (
	TRANSACTION_KEY INT PRIMARY KEY,
	DATE_KEY INT,
	LOCATION_KEY INT,
	PRODUCT_KEY INT,
	ORDER_DATE DATE,
	ORDER_COUNT INT,
	UNITS_SOLD INT,
	UNIT_PRICE DECIMAL(12,2),
	GROSS_REVENUE DECIMAL(14,2),
	NET_REVENUE DECIMAL(14,2),
	SALES_AMOUNT DECIMAL(14,2),
	DISCOUNT_PCT DECIMAL(5,2),
	INVENTORY_ON_HAND INT,
	LOST_SALES_USD DECIMAL(14,2),
	IS_OOS BOOLEAN,
	FOREIGN KEY (DATE_KEY) REFERENCES DIM_DATE(DATE_KEY),
	FOREIGN KEY (LOCATION_KEY) REFERENCES DIM_LOCATION(LOCATION_KEY),
	FOREIGN KEY (PRODUCT_KEY) REFERENCES DIM_PRODUCT(PRODUCT_KEY)
	);
	""".strip()


	def _now_utc_iso() -> str:
	return datetime.now(timezone.utc).isoformat()


	def _load_cases(cases_file: Path) -> list[dict[str, Any]]:
	data = yaml.safe_load(cases_file.read_text(encoding="utf-8")) or {}
	return list(data.get("test_cases", []))


	def _parse_quality_report_path(message: str) -> str \| None:
	for line in (message or "").splitlines():
	if "Report:" in line:
	return line.split("Report:", 1)[1].strip()
	if "See report:" in line:
	return line.split("See report:", 1)[1].strip()
	return None


	def _load_quality_report(report_path: str \| None) -> dict[str, Any]:
	if not report_path:
	return {}
	path = Path(report_path)
	json_path = path.with_suffix(".json")
	if json_path.exists():
	try:
	return json.loads(json_path.read_text(encoding="utf-8"))
	except Exception: # noqa: BLE001
	return {}
	return {}


	def _build_quality_gate_stage(report_path: str \| None) -> dict[str, Any]:
	report = _load_quality_report(report_path)
	summary = report.get("summary", {}) if isinstance(report, dict) else {}
	passed = report.get("passed") if isinstance(report, dict) else None
	return {
	"ok": bool(passed) if passed is not None else False,
	"report_path": report_path,
	"passed": passed,
	"summary": {
	"semantic_pass_ratio": summary.get("semantic_pass_ratio"),
	"categorical_junk_count": summary.get("categorical_junk_count"),
	"fk_orphan_count": summary.get("fk_orphan_count"),
	"temporal_violations": summary.get("temporal_violations"),
	"numeric_violations": summary.get("numeric_violations"),
	"volatility_breaches": summary.get("volatility_breaches"),
	"smoothness_score": summary.get("smoothness_score"),
	"outlier_explainability": summary.get("outlier_explainability"),
	"kpi_consistency": summary.get("kpi_consistency"),
	},
	}


	def _resolve_runtime_settings() -> tuple[str, str]:
	user_email = (
	os.getenv("USER_EMAIL")
	or os.getenv("INITIAL_USER")
	or os.getenv("THOUGHTSPOT_ADMIN_USER")
	or "default@user.com"
	).strip()
	default_llm = (os.getenv("DEFAULT_LLM") or os.getenv("OPENAI_MODEL") or "").strip()
	if not default_llm:
	raise ValueError("Missing required env var: DEFAULT_LLM or OPENAI_MODEL")
	return user_email, default_llm


	def _run_realism_sanity_checks(schema_name: str, case: dict[str, Any]) -> dict[str, Any]:
	"""Fast, opinionated sanity checks for demo realism.

	These checks intentionally target user-visible demo breakages that can slip
	through structural quality gates (e.g., null dimensions in top-N charts).
	"""
	checks: list[dict[str, Any]] = []
	failures: list[str] = []
	if not schema_name:
	return {"ok": False, "checks": checks, "failures": ["Missing schema name for sanity checks"]}

	use_case = str(case.get("use_case", "") or "")
	use_case_lower = (use_case or "").lower()
	case_name = str(case.get("name", "") or "").lower()
	is_legal = "legal" in use_case_lower
	is_private_equity = any(
	marker in use_case_lower
	for marker in ("private equity", "lp reporting", "state street")
	)
	is_saas_finance = any(
	marker in use_case_lower
	for marker in ("saas finance", "unit economics", "financial analytics", "fp&a", "fpa")
	)
	if not is_legal and not is_private_equity and not is_saas_finance:
	# Keep runtime fast by evaluating only scoped vertical checks.
	return {"ok": True, "checks": checks, "failures": []}

	from supabase_client import inject_admin_settings_to_env
	from snowflake_auth import get_snowflake_connection

	inject_admin_settings_to_env()
	conn = None
	cur = None
	try:
	db_name = (os.getenv("SNOWFLAKE_DATABASE") or "DEMOBUILD").strip()
	safe_schema = schema_name.replace('"', "")
	conn = get_snowflake_connection()
	cur = conn.cursor()
	cur.execute(f'USE DATABASE "{db_name}"')
	cur.execute(f'USE SCHEMA "{safe_schema}"')

	if is_legal:
	cur.execute("SHOW TABLES")
	legal_tables = {str(row[1]).upper() for row in cur.fetchall()}
	has_split_legal = {"LEGAL_MATTERS", "OUTSIDE_COUNSEL_INVOICES", "ATTORNEYS", "MATTER_TYPES"}.issubset(legal_tables)
	has_event_legal = "LEGAL_SPEND_EVENTS" in legal_tables

	if has_split_legal:
	# 1) Invoice -> matter -> attorney join coverage.
	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(a.ATTORNEY_NAME IS NULL) AS null_rows
	FROM OUTSIDE_COUNSEL_INVOICES oci
	LEFT JOIN LEGAL_MATTERS lm ON oci.MATTER_ID = lm.MATTER_ID
	LEFT JOIN ATTORNEYS a ON lm.ASSIGNED_ATTORNEY_ID = a.ATTORNEY_ID
	"""
	)
	total_rows, null_rows = cur.fetchone()
	null_pct = (float(null_rows) * 100.0 / float(total_rows)) if total_rows else 100.0
	checks.append(
	{
	"name": "legal_attorney_join_null_pct",
	"value": round(null_pct, 2),
	"threshold": "<= 5.0",
	"ok": null_pct <= 5.0,
	}
	)
	if null_pct > 5.0:
	failures.append(
	f"Attorney join null rate too high: {null_pct:.2f}% (expected <= 5%)"
	)

	# 1b) Invoice MATTER_ID linkage must be complete.
	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(MATTER_ID IS NULL) AS null_rows
	FROM OUTSIDE_COUNSEL_INVOICES
	"""
	)
	total_rows, null_rows = cur.fetchone()
	null_pct = (float(null_rows) * 100.0 / float(total_rows)) if total_rows else 100.0
	checks.append(
	{
	"name": "legal_invoice_matter_id_null_pct",
	"value": round(null_pct, 2),
	"threshold": "== 0.0",
	"ok": null_pct == 0.0,
	}
	)
	if null_pct != 0.0:
	failures.append(
	f"Invoice MATTER_ID null rate is {null_pct:.2f}% (expected 0%)"
	)

	# 2) Region cardinality should be compact for legal executive demos.
	cur.execute("SELECT COUNT(DISTINCT REGION) FROM LEGAL_MATTERS WHERE REGION IS NOT NULL")
	region_cardinality = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_region_distinct_count",
	"value": region_cardinality,
	"threshold": "<= 6",
	"ok": region_cardinality <= 6,
	}
	)
	if region_cardinality > 6:
	failures.append(
	f"Region cardinality too high: {region_cardinality} distinct values (expected <= 6)"
	)

	# 3) Firm names should not contain obvious cross-vertical banking/org jargon.
	cur.execute(
	"""
	SELECT COUNT(*)
	FROM OUTSIDE_COUNSEL
	WHERE REGEXP_LIKE(
	LOWER(FIRM_NAME),
	'retail banking\|consumer lending\|digital channels\|enterprise operations\|regional service'
	)
	"""
	)
	bad_firm_count = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_firm_name_cross_vertical_count",
	"value": bad_firm_count,
	"threshold": "== 0",
	"ok": bad_firm_count == 0,
	}
	)
	if bad_firm_count != 0:
	failures.append(
	f"Detected {bad_firm_count} cross-vertical/non-legal firm names"
	)

	# 4) Matter type taxonomy should remain concise and demo-friendly.
	cur.execute(
	"""
	SELECT COUNT(DISTINCT mt.MATTER_TYPE_NAME)
	FROM LEGAL_MATTERS lm
	LEFT JOIN MATTER_TYPES mt ON lm.MATTER_TYPE_ID = mt.MATTER_TYPE_ID
	WHERE mt.MATTER_TYPE_NAME IS NOT NULL
	"""
	)
	matter_type_cardinality = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_matter_type_distinct_count",
	"value": matter_type_cardinality,
	"threshold": "<= 15",
	"ok": matter_type_cardinality <= 15,
	}
	)
	if matter_type_cardinality > 15:
	failures.append(
	f"Matter type cardinality too high: {matter_type_cardinality} distinct values (expected <= 15)"
	)
	elif has_event_legal:
	# 1) Attorney dimension join coverage (critical for "Top Attorney by Cost").
	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(a.ATTORNEY_NAME IS NULL) AS null_rows
	FROM LEGAL_SPEND_EVENTS lse
	LEFT JOIN ATTORNEYS a ON lse.ATTORNEY_ID = a.ATTORNEY_ID
	"""
	)
	total_rows, null_rows = cur.fetchone()
	null_pct = (float(null_rows) * 100.0 / float(total_rows)) if total_rows else 100.0
	checks.append(
	{
	"name": "legal_attorney_join_null_pct",
	"value": round(null_pct, 2),
	"threshold": "<= 5.0",
	"ok": null_pct <= 5.0,
	}
	)
	if null_pct > 5.0:
	failures.append(
	f"Attorney join null rate too high: {null_pct:.2f}% (expected <= 5%)"
	)

	# 2) Region cardinality should be compact for legal executive demos.
	cur.execute("SELECT COUNT(DISTINCT REGION) FROM LEGAL_SPEND_EVENTS WHERE REGION IS NOT NULL")
	region_cardinality = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_region_distinct_count",
	"value": region_cardinality,
	"threshold": "<= 6",
	"ok": region_cardinality <= 6,
	}
	)
	if region_cardinality > 6:
	failures.append(
	f"Region cardinality too high: {region_cardinality} distinct values (expected <= 6)"
	)

	# 3) Firm names should not contain obvious cross-vertical banking/org jargon.
	cur.execute(
	"""
	SELECT COUNT(*)
	FROM OUTSIDE_COUNSEL_FIRMS
	WHERE REGEXP_LIKE(
	LOWER(FIRM_NAME),
	'retail banking\|consumer lending\|digital channels\|enterprise operations\|regional service'
	)
	"""
	)
	bad_firm_count = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_firm_name_cross_vertical_count",
	"value": bad_firm_count,
	"threshold": "== 0",
	"ok": bad_firm_count == 0,
	}
	)
	if bad_firm_count != 0:
	failures.append(
	f"Detected {bad_firm_count} cross-vertical/non-legal firm names"
	)

	# 4) Matter type taxonomy should remain concise and demo-friendly.
	cur.execute(
	"""
	SELECT COUNT(DISTINCT mt.MATTER_TYPE_NAME)
	FROM LEGAL_SPEND_EVENTS lse
	LEFT JOIN MATTER_TYPES mt ON lse.MATTER_TYPE_ID = mt.MATTER_TYPE_ID
	WHERE mt.MATTER_TYPE_NAME IS NOT NULL
	"""
	)
	matter_type_cardinality = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "legal_matter_type_distinct_count",
	"value": matter_type_cardinality,
	"threshold": "<= 15",
	"ok": matter_type_cardinality <= 15,
	}
	)
	if matter_type_cardinality > 15:
	failures.append(
	f"Matter type cardinality too high: {matter_type_cardinality} distinct values (expected <= 15)"
	)
	else:
	failures.append("Could not find supported legal schema shape for realism checks")

	if is_private_equity:
	# Guard against semantic leakage where sector/strategy dimensions are
	# accidentally populated with company names.
	cur.execute(
	"""
	WITH dim_companies AS (
	SELECT DISTINCT COMPANY_NAME
	FROM PORTFOLIO_COMPANIES
	WHERE COMPANY_NAME IS NOT NULL
	),
	dim_sectors AS (
	SELECT DISTINCT SECTOR_NAME
	FROM SECTORS
	WHERE SECTOR_NAME IS NOT NULL
	),
	dim_strategies AS (
	SELECT DISTINCT FUND_STRATEGY
	FROM FUNDS
	WHERE FUND_STRATEGY IS NOT NULL
	)
	SELECT
	(SELECT COUNT(*) FROM dim_sectors),
	(SELECT COUNT(*) FROM dim_strategies),
	(SELECT COUNT(*) FROM dim_sectors s JOIN dim_companies c ON s.SECTOR_NAME = c.COMPANY_NAME),
	(SELECT COUNT(*) FROM dim_strategies f JOIN dim_companies c ON f.FUND_STRATEGY = c.COMPANY_NAME)
	"""
	)
	sector_distinct, strategy_distinct, sector_overlap, strategy_overlap = cur.fetchone()
	sector_distinct = int(sector_distinct or 0)
	strategy_distinct = int(strategy_distinct or 0)
	sector_overlap = int(sector_overlap or 0)
	strategy_overlap = int(strategy_overlap or 0)

	checks.append(
	{
	"name": "pe_sector_name_company_overlap_count",
	"value": sector_overlap,
	"threshold": "== 0",
	"ok": sector_overlap == 0,
	}
	)
	if sector_overlap != 0:
	failures.append(
	f"Sector names overlap company names ({sector_overlap} overlaps); likely mislabeled dimensions"
	)

	checks.append(
	{
	"name": "pe_fund_strategy_company_overlap_count",
	"value": strategy_overlap,
	"threshold": "== 0",
	"ok": strategy_overlap == 0,
	}
	)
	if strategy_overlap != 0:
	failures.append(
	f"Fund strategy values overlap company names ({strategy_overlap} overlaps); likely mislabeled dimensions"
	)

	checks.append(
	{
	"name": "pe_sector_distinct_count",
	"value": sector_distinct,
	"threshold": ">= 4 and <= 20",
	"ok": 4 <= sector_distinct <= 20,
	}
	)
	if not (4 <= sector_distinct <= 20):
	failures.append(
	f"Sector distinct count out of expected demo range: {sector_distinct} (expected 4-20)"
	)

	checks.append(
	{
	"name": "pe_fund_strategy_distinct_count",
	"value": strategy_distinct,
	"threshold": ">= 4 and <= 20",
	"ok": 4 <= strategy_distinct <= 20,
	}
	)
	if not (4 <= strategy_distinct <= 20):
	failures.append(
	f"Fund strategy distinct count out of expected demo range: {strategy_distinct} (expected 4-20)"
	)

	if case_name == "statestreet_private_equity_lp_reporting":
	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(ABS(TOTAL_VALUE_USD - (REPORTED_VALUE_USD + DISTRIBUTIONS_USD)) > 0.01) AS bad_rows
	FROM PORTFOLIO_PERFORMANCE
	"""
	)
	total_rows, bad_rows = cur.fetchone()
	total_rows = int(total_rows or 0)
	bad_rows = int(bad_rows or 0)
	identity_ok = total_rows > 0 and bad_rows == 0
	checks.append(
	{
	"name": "pe_total_value_identity_bad_rows",
	"value": bad_rows,
	"threshold": "== 0",
	"ok": identity_ok,
	}
	)
	if not identity_ok:
	failures.append(
	f"Total value identity broken in {bad_rows} PE fact rows"
	)

	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(IRR_SUB_LINE_IMPACT_BPS BETWEEN 80 AND 210) AS in_band_rows,
	COUNT_IF(ABS(IRR_SUB_LINE_IMPACT_BPS - ((GROSS_IRR - GROSS_IRR_WITHOUT_SUB_LINE) * 10000)) <= 5) AS identity_rows
	FROM PORTFOLIO_PERFORMANCE
	"""
	)
	total_rows, in_band_rows, identity_rows = cur.fetchone()
	total_rows = int(total_rows or 0)
	in_band_rows = int(in_band_rows or 0)
	identity_rows = int(identity_rows or 0)
	irr_band_ok = total_rows > 0 and in_band_rows == total_rows and identity_rows == total_rows
	checks.append(
	{
	"name": "pe_subscription_line_impact_rows_valid",
	"value": {"total": total_rows, "in_band": in_band_rows, "identity": identity_rows},
	"threshold": "all rows in 80-210 bps band and identity holds",
	"ok": irr_band_ok,
	}
	)
	if not irr_band_ok:
	failures.append("Subscription line impact rows do not consistently satisfy PE IRR delta rules")

	cur.execute(
	"""
	SELECT
	COUNT(*) AS apex_rows,
	MAX(pp.IRR_SUB_LINE_IMPACT_BPS) AS apex_max_bps,
	(
	SELECT MAX(IRR_SUB_LINE_IMPACT_BPS)
	FROM PORTFOLIO_PERFORMANCE
	) AS overall_max_bps
	FROM PORTFOLIO_PERFORMANCE pp
	JOIN PORTFOLIO_COMPANIES pc ON pp.COMPANY_ID = pc.COMPANY_ID
	WHERE LOWER(pc.COMPANY_NAME) = 'apex industrial solutions'
	"""
	)
	apex_rows, apex_max_bps, overall_max_bps = cur.fetchone()
	apex_ok = int(apex_rows or 0) > 0 and apex_max_bps is not None and abs(float(apex_max_bps) - 210.0) <= 1.0 and overall_max_bps is not None and abs(float(overall_max_bps) - 210.0) <= 1.0
	checks.append(
	{
	"name": "pe_apex_subscription_line_outlier",
	"value": {"rows": int(apex_rows or 0), "apex_max_bps": apex_max_bps, "overall_max_bps": overall_max_bps},
	"threshold": "Apex exists and max impact == 210 bps",
	"ok": apex_ok,
	}
	)
	if not apex_ok:
	failures.append("Apex Industrial Solutions outlier is missing or not set to the expected 210 bps impact")

	cur.execute(
	"""
	WITH covenant_exceptions AS (
	SELECT
	LOWER(pc.COMPANY_NAME) AS company_name,
	LOWER(pp.COVENANT_STATUS) AS covenant_status,
	COUNT(*) AS row_count
	FROM PORTFOLIO_PERFORMANCE pp
	JOIN PORTFOLIO_COMPANIES pc ON pp.COMPANY_ID = pc.COMPANY_ID
	WHERE LOWER(pp.COVENANT_STATUS) <> 'compliant'
	GROUP BY 1, 2
	)
	SELECT
	COUNT_IF(company_name = 'meridian specialty chemicals' AND covenant_status = 'waived') AS meridian_waived_groups,
	COUNT_IF(company_name <> 'meridian specialty chemicals' OR covenant_status <> 'waived') AS invalid_groups
	FROM covenant_exceptions
	"""
	)
	meridian_groups, invalid_groups = cur.fetchone()
	meridian_ok = int(meridian_groups or 0) > 0 and int(invalid_groups or 0) == 0
	checks.append(
	{
	"name": "pe_meridian_covenant_exception",
	"value": {"meridian_groups": int(meridian_groups or 0), "invalid_groups": int(invalid_groups or 0)},
	"threshold": "Meridian only, status waived",
	"ok": meridian_ok,
	}
	)
	if not meridian_ok:
	failures.append("Meridian Specialty Chemicals is not the sole waived/non-compliant covenant exception")

	cur.execute(
	"""
	WITH sector_perf AS (
	SELECT
	s.SECTOR_NAME,
	AVG(pp.ENTRY_EV_EBITDA_MULTIPLE) AS avg_entry_multiple,
	AVG(pp.TOTAL_RETURN_MULTIPLE) AS avg_tvpi
	FROM PORTFOLIO_PERFORMANCE pp
	JOIN PORTFOLIO_COMPANIES pc ON pp.COMPANY_ID = pc.COMPANY_ID
	JOIN SECTORS s ON pc.SECTOR_ID = s.SECTOR_ID
	GROUP BY 1
	)
	SELECT
	MAX(CASE WHEN LOWER(SECTOR_NAME) = 'technology' THEN avg_entry_multiple END) AS tech_entry,
	MAX(CASE WHEN LOWER(SECTOR_NAME) = 'technology' THEN avg_tvpi END) AS tech_tvpi,
	MAX(CASE WHEN LOWER(SECTOR_NAME) <> 'technology' THEN avg_entry_multiple END) AS other_entry_max,
	MAX(CASE WHEN LOWER(SECTOR_NAME) <> 'technology' THEN avg_tvpi END) AS other_tvpi_max
	FROM sector_perf
	"""
	)
	tech_entry, tech_tvpi, other_entry_max, other_tvpi_max = cur.fetchone()
	tech_sector_ok = (
	tech_entry is not None
	and tech_tvpi is not None
	and other_entry_max is not None
	and other_tvpi_max is not None
	and float(tech_entry) >= float(other_entry_max)
	and float(tech_tvpi) >= float(other_tvpi_max)
	)
	checks.append(
	{
	"name": "pe_technology_sector_leads_multiples",
	"value": {
	"tech_entry": tech_entry,
	"tech_tvpi": tech_tvpi,
	"other_entry_max": other_entry_max,
	"other_tvpi_max": other_tvpi_max,
	},
	"threshold": "Technology leads average entry and return multiples",
	"ok": tech_sector_ok,
	}
	)
	if not tech_sector_ok:
	failures.append("Technology sector does not lead entry and return multiples as required by the State Street narrative")

	cur.execute(
	"""
	WITH vintage_rank AS (
	SELECT
	VINTAGE_YEAR,
	SUM(REPORTED_VALUE_USD) AS total_reported_value,
	DENSE_RANK() OVER (ORDER BY SUM(REPORTED_VALUE_USD) DESC) AS value_rank
	FROM PORTFOLIO_PERFORMANCE
	GROUP BY 1
	)
	SELECT LISTAGG(TO_VARCHAR(VINTAGE_YEAR), ',') WITHIN GROUP (ORDER BY VINTAGE_YEAR)
	FROM vintage_rank
	WHERE value_rank <= 2
	"""
	)
	top_vintages = cur.fetchone()[0] or ""
	top_vintage_set = {part.strip() for part in str(top_vintages).split(",") if part.strip()}
	vintage_ok = top_vintage_set == {"2021", "2022"}
	checks.append(
	{
	"name": "pe_top_vintages_reported_value",
	"value": sorted(top_vintage_set),
	"threshold": "top 2 vintages are 2021 and 2022",
	"ok": vintage_ok,
	}
	)
	if not vintage_ok:
	failures.append("2021 and 2022 are not the top reported-value vintages")

	cur.execute(
	"""
	WITH healthcare_quarters AS (
	SELECT
	DATE_TRUNC('quarter', pp.FULL_DATE) AS quarter_start,
	AVG(pp.TOTAL_VALUE_USD) AS avg_total_value
	FROM PORTFOLIO_PERFORMANCE pp
	JOIN PORTFOLIO_COMPANIES pc ON pp.COMPANY_ID = pc.COMPANY_ID
	JOIN SECTORS s ON pc.SECTOR_ID = s.SECTOR_ID
	WHERE LOWER(s.SECTOR_NAME) = 'healthcare'
	GROUP BY 1
	)
	SELECT
	MAX(CASE WHEN quarter_start = DATE '2024-07-01' THEN avg_total_value END) AS q3_2024_value,
	MAX(CASE WHEN quarter_start = DATE '2024-10-01' THEN avg_total_value END) AS q4_2024_value
	FROM healthcare_quarters
	"""
	)
	q3_2024_value, q4_2024_value = cur.fetchone()
	healthcare_dip_ok = (
	q3_2024_value is not None
	and q4_2024_value is not None
	and float(q4_2024_value) < float(q3_2024_value)
	)
	checks.append(
	{
	"name": "pe_healthcare_q4_2024_dip",
	"value": {"q3_2024": q3_2024_value, "q4_2024": q4_2024_value},
	"threshold": "Q4 2024 healthcare total value lower than Q3 2024",
	"ok": healthcare_dip_ok,
	}
	)
	if not healthcare_dip_ok:
	failures.append("Healthcare Q4 2024 performance dip is missing")

	cur.execute(
	"""
	WITH company_trends AS (
	SELECT
	pc.COMPANY_NAME,
	FIRST_VALUE(pp.REVENUE_USD) OVER (PARTITION BY pc.COMPANY_NAME ORDER BY pp.FULL_DATE) AS first_revenue,
	LAST_VALUE(pp.REVENUE_USD) OVER (
	PARTITION BY pc.COMPANY_NAME ORDER BY pp.FULL_DATE
	ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
	) AS last_revenue,
	FIRST_VALUE(pp.EBITDA_MARGIN_PCT) OVER (PARTITION BY pc.COMPANY_NAME ORDER BY pp.FULL_DATE) AS first_margin,
	LAST_VALUE(pp.EBITDA_MARGIN_PCT) OVER (
	PARTITION BY pc.COMPANY_NAME ORDER BY pp.FULL_DATE
	ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
	) AS last_margin
	FROM PORTFOLIO_PERFORMANCE pp
	JOIN PORTFOLIO_COMPANIES pc ON pp.COMPANY_ID = pc.COMPANY_ID
	)
	SELECT COUNT(DISTINCT COMPANY_NAME)
	FROM company_trends
	WHERE last_revenue > first_revenue AND last_margin < first_margin
	"""
	)
	trend_company_count = int(cur.fetchone()[0] or 0)
	trend_ok = trend_company_count >= 1
	checks.append(
	{
	"name": "pe_revenue_up_margin_down_company_count",
	"value": trend_company_count,
	"threshold": ">= 1",
	"ok": trend_ok,
	}
	)
	if not trend_ok:
	failures.append("No portfolio company shows the required revenue-up / EBITDA-margin-down trend")

	if is_saas_finance:
	cur.execute("SELECT COUNT(DISTINCT MONTH_KEY) FROM DATES")
	month_count = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "saas_month_count",
	"value": month_count,
	"threshold": ">= 24",
	"ok": month_count >= 24,
	}
	)
	if month_count < 24:
	failures.append(f"SaaS finance month count too low: {month_count} (expected >= 24)")

	cur.execute("SELECT COUNT(DISTINCT SEGMENT) FROM CUSTOMERS WHERE SEGMENT IS NOT NULL")
	segment_count = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "saas_segment_distinct_count",
	"value": segment_count,
	"threshold": ">= 3",
	"ok": segment_count >= 3,
	}
	)
	if segment_count < 3:
	failures.append(f"SaaS finance segment count too low: {segment_count} (expected >= 3)")

	cur.execute("SELECT COUNT(DISTINCT REGION) FROM LOCATIONS WHERE REGION IS NOT NULL")
	region_count = int(cur.fetchone()[0] or 0)
	checks.append(
	{
	"name": "saas_region_distinct_count",
	"value": region_count,
	"threshold": ">= 3",
	"ok": region_count >= 3,
	}
	)
	if region_count < 3:
	failures.append(f"SaaS finance region count too low: {region_count} (expected >= 3)")

	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(
	ABS(
	ENDING_ARR_USD - (
	STARTING_ARR_USD + NEW_LOGO_ARR_USD + EXPANSION_ARR_USD
	- CONTRACTION_ARR_USD - CHURNED_ARR_USD
	)
	) > 1.0
	) AS bad_arr_rows,
	COUNT_IF(ABS((MRR_USD * 12.0) - ENDING_ARR_USD) > 12.0) AS bad_mrr_rows
	FROM SAAS_CUSTOMER_MONTHLY
	"""
	)
	total_rows, bad_arr_rows, bad_mrr_rows = cur.fetchone()
	total_rows = int(total_rows or 0)
	bad_arr_rows = int(bad_arr_rows or 0)
	bad_mrr_rows = int(bad_mrr_rows or 0)
	arr_identity_ok = total_rows > 0 and bad_arr_rows == 0 and bad_mrr_rows == 0
	checks.append(
	{
	"name": "saas_arr_rollforward_bad_rows",
	"value": {"total": total_rows, "bad_arr": bad_arr_rows, "bad_mrr": bad_mrr_rows},
	"threshold": "all rows reconcile",
	"ok": arr_identity_ok,
	}
	)
	if not arr_identity_ok:
	failures.append(
	f"SaaS finance ARR identities broken (bad_arr={bad_arr_rows}, bad_mrr={bad_mrr_rows})"
	)

	cur.execute(
	"""
	WITH month_counts AS (
	SELECT CUSTOMER_KEY, COUNT(DISTINCT MONTH_KEY) AS active_months
	FROM SAAS_CUSTOMER_MONTHLY
	GROUP BY 1
	)
	SELECT AVG(active_months), MIN(active_months), MAX(active_months)
	FROM month_counts
	"""
	)
	avg_active_months, min_active_months, max_active_months = cur.fetchone()
	avg_active_months = float(avg_active_months or 0.0)
	min_active_months = int(min_active_months or 0)
	max_active_months = int(max_active_months or 0)
	density_ok = avg_active_months >= 12.0 and max_active_months >= 20
	checks.append(
	{
	"name": "saas_customer_month_density",
	"value": {
	"avg_active_months": round(avg_active_months, 2),
	"min_active_months": min_active_months,
	"max_active_months": max_active_months,
	},
	"threshold": "avg >= 12.0 and max >= 20",
	"ok": density_ok,
	}
	)
	if not density_ok:
	failures.append(
	f"SaaS finance customer-month density too sparse (avg={avg_active_months:.2f}, max={max_active_months})"
	)

	cur.execute(
	"""
	SELECT
	COUNT(*) AS total_rows,
	COUNT_IF(ABS(TOTAL_S_AND_M_SPEND_USD - (SALES_SPEND_USD + MARKETING_SPEND_USD)) > 1.0) AS bad_rows
	FROM SALES_MARKETING_SPEND_MONTHLY
	"""
	)
	spend_total_rows, bad_spend_rows = cur.fetchone()
	spend_total_rows = int(spend_total_rows or 0)
	bad_spend_rows = int(bad_spend_rows or 0)
	spend_ok = spend_total_rows > 0 and bad_spend_rows == 0
	checks.append(
	{
	"name": "saas_spend_identity_bad_rows",
	"value": {"total": spend_total_rows, "bad_rows": bad_spend_rows},
	"threshold": "== 0",
	"ok": spend_ok,
	}
	)
	if not spend_ok:
	failures.append(f"SaaS finance spend identity broken in {bad_spend_rows} rows")

	except Exception as exc: # noqa: BLE001
	failures.append(f"Realism sanity checks failed to execute: {exc}")
	finally:
	try:
	if cur is not None:
	cur.close()
	except Exception:
	pass
	try:
	if conn is not None:
	conn.close()
	except Exception:
	pass

	return {"ok": len(failures) == 0, "checks": checks, "failures": failures}


	def _run_case_chat(
	case: dict[str, Any],
	default_llm: str,
	user_email: str,
	skip_thoughtspot: bool = False,
	) -> dict[str, Any]:
	from chat_interface import ChatDemoInterface
	from demo_personas import get_use_case_config, parse_use_case

	company = case["company"]
	use_case = case["use_case"]
	model = default_llm
	context = case.get("context", "")

	controller = ChatDemoInterface(user_email=user_email)
	controller.settings["model"] = model
	controller.vertical, controller.function = parse_use_case(use_case or "")
	controller.use_case_config = get_use_case_config(
	controller.vertical or "Generic",
	controller.function or "Generic",
	)

	result: dict[str, Any] = {
	"name": case.get("name") or f"{company}_{use_case}",
	"company": company,
	"use_case": use_case,
	"mode": "chat",
	"started_at": _now_utc_iso(),
	"success": False,
	"stages": {},
	}

	stage_start = datetime.now(timezone.utc)
	last_research = None
	for update in controller.run_research_streaming(company, use_case, generic_context=context):
	last_research = update
	result["stages"]["research"] = {
	"ok": bool(controller.demo_builder and controller.demo_builder.company_analysis_results),
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"preview": str(last_research)[:500] if last_research else "",
	}

	stage_start = datetime.now(timezone.utc)
	ddl_text = (controller.demo_builder.schema_generation_results or "") if controller.demo_builder else ""
	if not ddl_text:
	_, ddl_text = controller.run_ddl_creation()
	result["stages"]["ddl"] = {
	"ok": bool(ddl_text and "CREATE TABLE" in ddl_text.upper()),
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"ddl_length": len(ddl_text or ""),
	}

	if not result["stages"]["ddl"]["ok"]:
	result["error"] = "DDL generation failed"
	result["finished_at"] = _now_utc_iso()
	return result

	stage_start = datetime.now(timezone.utc)
	deploy_error = None
	try:
	for _ in controller.run_deployment_streaming():
	pass
	except Exception as exc: # noqa: BLE001
	deploy_error = str(exc)
	deployed_schema = getattr(controller, "_deployed_schema_name", None)
	schema_candidate = deployed_schema or getattr(controller, "_last_schema_name", None)
	result["stages"]["deploy_snowflake"] = {
	"ok": bool(deployed_schema) and deploy_error is None,
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"schema": schema_candidate,
	"error": deploy_error,
	}

	if schema_candidate:
	quality_report_path = getattr(controller, "_last_population_quality_report_path", None)
	result["stages"]["quality_gate"] = _build_quality_gate_stage(quality_report_path)
	if not result["stages"]["quality_gate"]["ok"]:
	result["error"] = f"Quality gate failed: {quality_report_path or 'missing quality report'}"
	elif deploy_error and not result.get("error"):
	result["error"] = deploy_error

	stage_start = datetime.now(timezone.utc)
	if result["stages"]["quality_gate"]["ok"]:
	sanity = _run_realism_sanity_checks(schema_candidate, case)
	result["stages"]["realism_sanity"] = {
	"ok": bool(sanity.get("ok")),
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"checks": sanity.get("checks", []),
	"failures": sanity.get("failures", []),
	}
	if not result["stages"]["realism_sanity"]["ok"] and not result.get("error"):
	result["error"] = "Realism sanity checks failed before ThoughtSpot deployment"

	if not skip_thoughtspot and deployed_schema and not result.get("error"):
	stage_start = datetime.now(timezone.utc)
	ts_ok = True
	ts_last = None
	try:
	for ts_update in controller._run_thoughtspot_deployment(deployed_schema, company, use_case):
	ts_last = ts_update
	except Exception as exc: # noqa: BLE001
	ts_ok = False
	ts_last = str(exc)
	# Some deployment paths return a structured failure payload rather than
	# raising; treat those as failures so pass/fail reporting is accurate.
	ts_preview_text = str(ts_last) if ts_last is not None else ""
	if ts_ok and (
	"THOUGHTSPOT DEPLOYMENT FAILED" in ts_preview_text.upper()
	or "MODEL VALIDATION FAILED" in ts_preview_text.upper()
	or "LIVEBOARD CREATION FAILED" in ts_preview_text.upper()
	):
	ts_ok = False
	result["stages"]["deploy_thoughtspot"] = {
	"ok": ts_ok,
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"preview": ts_preview_text[:1000],
	}

	result["schema_name"] = schema_candidate
	result["success"] = all(stage.get("ok") for stage in result["stages"].values())
	result["finished_at"] = _now_utc_iso()
	return result


	def _run_case_offline(
	case: dict[str, Any],
	default_llm: str,
	user_email: str,
	skip_thoughtspot: bool = False,
	) -> dict[str, Any]:
	from cdw_connector import SnowflakeDeployer
	from demo_prep import generate_demo_base_name
	from legitdata_bridge import populate_demo_data
	from thoughtspot_deployer import deploy_to_thoughtspot

	company = case["company"]
	use_case = case["use_case"]

	result: dict[str, Any] = {
	"name": case.get("name") or f"{company}_{use_case}",
	"company": company,
	"use_case": use_case,
	"mode": "offline_ddl",
	"started_at": _now_utc_iso(),
	"success": False,
	"stages": {},
	"ddl_template": "offline_star_schema_v1",
	}

	deployer = SnowflakeDeployer()

	# 1) Snowflake schema + DDL deploy
	stage_start = datetime.now(timezone.utc)
	ok, msg = deployer.connect()
	if not ok:
	result["stages"]["snowflake_connect"] = {
	"ok": False,
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"message": msg,
	}
	result["error"] = msg
	result["finished_at"] = _now_utc_iso()
	return result

	base_name = generate_demo_base_name("", company)
	ok, schema_name, ddl_msg = deployer.create_demo_schema_and_deploy(base_name, OFFLINE_DEMO_DDL)
	result["stages"]["snowflake_ddl"] = {
	"ok": ok,
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"schema": schema_name,
	"message": ddl_msg,
	}
	if not ok or not schema_name:
	result["error"] = ddl_msg
	result["finished_at"] = _now_utc_iso()
	return result

	# 2) Data population via LegitData
	stage_start = datetime.now(timezone.utc)
	pop_ok, pop_msg, pop_results = populate_demo_data(
	ddl_content=OFFLINE_DEMO_DDL,
	company_url=company,
	use_case=use_case,
	schema_name=schema_name,
	llm_model=default_llm,
	user_email=user_email,
	size="medium",
	)
	result["stages"]["populate_data"] = {
	"ok": pop_ok,
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"rows": pop_results,
	"quality_report": _parse_quality_report_path(pop_msg),
	}
	if not pop_ok:
	result["error"] = pop_msg
	result["finished_at"] = _now_utc_iso()
	return result

	quality_report_path = _parse_quality_report_path(pop_msg)
	result["stages"]["quality_gate"] = _build_quality_gate_stage(quality_report_path)
	if not result["stages"]["quality_gate"]["ok"]:
	result["error"] = f"Quality gate failed: {quality_report_path or 'missing quality report'}"
	result["schema_name"] = schema_name
	result["finished_at"] = _now_utc_iso()
	return result

	stage_start = datetime.now(timezone.utc)
	sanity = _run_realism_sanity_checks(schema_name, case)
	result["stages"]["realism_sanity"] = {
	"ok": bool(sanity.get("ok")),
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"checks": sanity.get("checks", []),
	"failures": sanity.get("failures", []),
	}
	if not result["stages"]["realism_sanity"]["ok"]:
	result["error"] = "Realism sanity checks failed before ThoughtSpot deployment"
	result["schema_name"] = schema_name
	result["finished_at"] = _now_utc_iso()
	return result

	# 3) ThoughtSpot model + liveboard
	if not skip_thoughtspot:
	stage_start = datetime.now(timezone.utc)
	ts_result = deploy_to_thoughtspot(
	ddl=OFFLINE_DEMO_DDL,
	database=os.getenv("SNOWFLAKE_DATABASE", "DEMOBUILD"),
	schema=schema_name,
	base_name=base_name,
	connection_name=f"{base_name}_conn",
	company_name=company,
	use_case=use_case,
	llm_model=default_llm,
	)
	result["stages"]["deploy_thoughtspot"] = {
	"ok": bool(ts_result and not ts_result.get("errors")),
	"duration_s": (datetime.now(timezone.utc) - stage_start).total_seconds(),
	"result": ts_result,
	}

	result["schema_name"] = schema_name
	result["success"] = all(stage.get("ok") for stage in result["stages"].values())
	result["finished_at"] = _now_utc_iso()
	return result


	def main() -> None:
	parser = argparse.ArgumentParser(description="Run New Vision sample set")
	parser.add_argument(
	"--cases-file",
	default="tests/newvision_test_cases.yaml",
	help="Path to YAML test case file",
	)
	parser.add_argument(
	"--skip-thoughtspot",
	action="store_true",
	help="Run through data generation only and skip ThoughtSpot object creation",
	)
	parser.add_argument(
	"--offline-ddl",
	action="store_true",
	help="Force offline DDL mode (no LLM dependency)",
	)
	args = parser.parse_args()
	user_email, default_llm = _resolve_runtime_settings()
	from startup_validation import validate_required_pipeline_settings_or_raise

	validate_required_pipeline_settings_or_raise(
	default_llm=default_llm,
	require_thoughtspot=not args.skip_thoughtspot,
	require_snowflake=True,
	)

	cases_file = (PROJECT_ROOT / args.cases_file).resolve()
	cases = _load_cases(cases_file)

	run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
	out_dir = PROJECT_ROOT / "results" / "newvision_samples" / run_id
	out_dir.mkdir(parents=True, exist_ok=True)

	use_offline = bool(args.offline_ddl)
	print(f"Mode: {'offline_ddl' if use_offline else 'chat'}", flush=True)
	print(f"default_llm: {default_llm}", flush=True)

	results = []
	for idx, case in enumerate(cases, start=1):
	print(f"\n[{idx}/{len(cases)}] {case.get('name', case['company'])} -> {case['use_case']}", flush=True)
	try:
	if use_offline:
	case_result = _run_case_offline(
	case,
	default_llm=default_llm,
	user_email=user_email,
	skip_thoughtspot=args.skip_thoughtspot,
	)
	else:
	case_result = _run_case_chat(
	case,
	default_llm=default_llm,
	user_email=user_email,
	skip_thoughtspot=args.skip_thoughtspot,
	)
	except Exception as exc: # noqa: BLE001
	case_result = {
	"name": case.get("name") or f"{case['company']}_{case['use_case']}",
	"company": case["company"],
	"use_case": case["use_case"],
	"mode": "offline_ddl" if use_offline else "chat",
	"started_at": _now_utc_iso(),
	"finished_at": _now_utc_iso(),
	"success": False,
	"error": f"Runner exception: {exc}",
	"stages": {
	"runner_exception": {
	"ok": False,
	"message": str(exc),
	}
	},
	}
	results.append(case_result)
	(out_dir / f"{case_result['name']}.json").write_text(
	json.dumps(case_result, indent=2),
	encoding="utf-8",
	)
	print(f" success={case_result['success']} schema={case_result.get('schema_name')}", flush=True)

	summary = {
	"run_id": run_id,
	"mode": "offline_ddl" if use_offline else "chat",
	"cases_file": str(cases_file),
	"total": len(results),
	"passed": sum(1 for r in results if r.get("success")),
	"failed": sum(1 for r in results if not r.get("success")),
	"results": results,
	}
	(out_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")

	print("\nSaved sample artifacts:", out_dir)
	print(f"Passed: {summary['passed']} / {summary['total']}")

	if summary["failed"]:
	raise SystemExit(1)


	if __name__ == "__main__":
	main()