Spaces:

Flamehaven
/

stem-bio-ai

Running

stem-bio-ai / scripts /validate_release.ps1

Codex

sync hf space snapshot

6a1cba7 15 days ago

14 kB

	param(
	[string]$ExpectedVersion = "1.6.0",
	[string]$OutputRoot = "tmp\release_validation",
	[string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector",
	[switch]$WithSlop
	)

	Set-StrictMode -Version Latest
	$ErrorActionPreference = "Stop"

	$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
	$repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path
	$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
	$versionSlug = "v" + ($ExpectedVersion -replace "\.", "_")
	$outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp")

	function Invoke-Step {
	param(
	[string]$Name,
	[scriptblock]$Body
	)
	Write-Host ""
	Write-Host "==> $Name"
	& $Body
	Write-Host "PASS: $Name"
	}

	function Assert-True {
	param(
	[bool]$Condition,
	[string]$Message
	)
	if (-not $Condition) {
	throw $Message
	}
	}

	Push-Location $repoRoot
	try {
	Invoke-Step "CLI version is $ExpectedVersion" {
	$version = python -m stem_ai --version
	Write-Host $version
	Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version"
	}

	Invoke-Step "pytest regression suite" {
	python -m pytest -q
	}

	Invoke-Step "package build" {
	python scripts\build_stdlib_package.py
	Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing"
	Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing"
	}

	Invoke-Step "local audit artifacts with --explain" {
	New-Item -ItemType Directory -Force -Path $outDir \| Out-Null
	python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate
	}

	Invoke-Step "audit JSON contract" {
	$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
	Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)"
	$result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw \| ConvertFrom-Json

	Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)"
	Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)"
	Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty"
	Assert-True ($null -ne $result.detector_summary) "detector_summary missing"
	Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing"
	Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing"
	Assert-True ($null -ne $result.replication_score) "replication_score missing"
	Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)"
	Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing"
	Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)"
	Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score"
	Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing"
	Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing"
	Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing"
	Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing"
	Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)"
	Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score"
	Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations"
	Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations"

	$badIds = @($result.evidence_ledger \| Where-Object { [string]$_.finding_id -match "\\" })
	Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash"

	$s4Findings = @($result.evidence_ledger \| Where-Object { [string]$_.detector -like "S4_*" })
	Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger"

	Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)"
	Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)"
	Write-Host "evidence_ledger=$($result.evidence_ledger.Count)"
	}

	Invoke-Step "explain artifact contract" {
	$explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt")
	Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)"
	$explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw
	Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing"
	Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing"
	Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing"
	Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing"
	Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing"
	}

	Invoke-Step "advisory packet export contract" {
	$packetDir = Join-Path $outDir "packet"
	New-Item -ItemType Directory -Force -Path $packetDir \| Out-Null
	python -m stem_ai . --format json --out $packetDir --advisory packet
	$packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json")
	Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)"
	$packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw \| ConvertFrom-Json
	Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)"
	Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text"
	Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations"
	Assert-True ($null -ne $packet.provider_request) "provider_request missing"
	Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none"
	Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small"
	Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing"
	Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted"
	Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings"
	Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch"
	Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing"
	Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids"
	Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing"
	Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch"
	Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate"
	Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing"
	Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing"
	Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing"
	Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing"
	Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch"
	Assert-True ($null -ne $packet.packet_contract) "packet_contract missing"
	Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate"
	$packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw
	Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets"
	}

	Invoke-Step "advisory response file validation contract" {
	$responseDir = Join-Path $outDir "response"
	New-Item -ItemType Directory -Force -Path $responseDir \| Out-Null
	$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
	$baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw \| ConvertFrom-Json
	$cite = [string]$baseline.evidence_ledger[0].finding_id
	$responseFile = Join-Path $responseDir "provider_advisory.json"
	@{
	provider = "external_response"
	model = "release-validation-provider"
	reviewer_notes = @(@{
	claim = "Review the cited evidence before advisory use."
	severity = "info"
	cites = @($cite)
	recommended_action = "Inspect the cited finding in the evidence ledger."
	})
	inspection_priorities = @()
	} \| ConvertTo-Json -Depth 8 \| Set-Content -LiteralPath $responseFile -Encoding UTF8

	python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile
	$responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json")
	Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)"
	$response = Get-Content -LiteralPath $responseJson[0].FullName -Raw \| ConvertFrom-Json
	Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate"
	Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network"
	Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations"
	Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations"
	}

	Invoke-Step "provider benchmark exporter contract" {
	$benchmarkDir = Join-Path $outDir "provider_benchmark"
	python scripts\provider_packet_benchmark.py --out $benchmarkDir
	Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing"
	Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing"
	Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing"
	$summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw \| ConvertFrom-Json
	Assert-True ($summary.record_count -gt 0) "provider benchmark should include records"
	Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact"
	Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings"
	}

	Invoke-Step "markdown and PDF artifacts exist" {
	$mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md")
	$pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf")
	Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)"
	Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)"
	Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small"
	}

	if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) {
	Invoke-Step "slop detector clean scan" {
	$slopOut = Join-Path $outDir "slop_report.json"
	$slopConfig = Join-Path $outDir "slop_config.yaml"
	@"
	ignore:
	- ".git/**"
	- "/.git/"
	- "__pycache__/**"
	- "/__pycache__/"
	- ".pytest_cache/**"
	- "tmp/**"
	- "/tmp/"
	- "dist/**"
	- "build/**"
	- ".egg-info/*"
	- "audits/**"
	- "stem_output/*"
	- ".venv/**"
	- "venv/**"
	- "node_modules/**"
	"@ \| Set-Content -LiteralPath $slopConfig -Encoding UTF8
	Push-Location $SlopDetectorPath
	try {
	python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut
	}
	finally {
	Pop-Location
	}
	$slop = Get-Content -LiteralPath $slopOut -Raw \| ConvertFrom-Json
	Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)"
	Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)"
	Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)"
	}
	}
	else {
	Write-Host ""
	Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)"
	}

	Write-Host ""
	Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED"
	Write-Host "Artifacts: $outDir"
	}
	finally {
	Pop-Location
	}