param( [string]$ExpectedVersion = "1.6.0", [string]$OutputRoot = "tmp\release_validation", [string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector", [switch]$WithSlop ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path $repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path $timestamp = Get-Date -Format "yyyyMMdd_HHmmss" $versionSlug = "v" + ($ExpectedVersion -replace "\.", "_") $outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp") function Invoke-Step { param( [string]$Name, [scriptblock]$Body ) Write-Host "" Write-Host "==> $Name" & $Body Write-Host "PASS: $Name" } function Assert-True { param( [bool]$Condition, [string]$Message ) if (-not $Condition) { throw $Message } } Push-Location $repoRoot try { Invoke-Step "CLI version is $ExpectedVersion" { $version = python -m stem_ai --version Write-Host $version Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version" } Invoke-Step "pytest regression suite" { python -m pytest -q } Invoke-Step "package build" { python scripts\build_stdlib_package.py Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing" Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing" } Invoke-Step "local audit artifacts with --explain" { New-Item -ItemType Directory -Force -Path $outDir | Out-Null python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate } Invoke-Step "audit JSON contract" { $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json") Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)" $result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)" Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)" Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty" Assert-True ($null -ne $result.detector_summary) "detector_summary missing" Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing" Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing" Assert-True ($null -ne $result.replication_score) "replication_score missing" Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)" Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing" Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)" Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score" Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing" Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing" Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing" Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing" Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)" Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score" Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations" Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations" $badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" }) Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash" $s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" }) Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger" Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)" Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)" Write-Host "evidence_ledger=$($result.evidence_ledger.Count)" } Invoke-Step "explain artifact contract" { $explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt") Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)" $explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing" Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing" Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing" Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing" Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing" } Invoke-Step "advisory packet export contract" { $packetDir = Join-Path $outDir "packet" New-Item -ItemType Directory -Force -Path $packetDir | Out-Null python -m stem_ai . --format json --out $packetDir --advisory packet $packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json") Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)" $packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)" Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text" Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations" Assert-True ($null -ne $packet.provider_request) "provider_request missing" Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none" Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small" Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing" Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted" Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings" Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch" Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing" Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids" Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing" Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch" Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate" Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing" Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing" Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing" Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing" Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch" Assert-True ($null -ne $packet.packet_contract) "packet_contract missing" Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate" $packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets" } Invoke-Step "advisory response file validation contract" { $responseDir = Join-Path $outDir "response" New-Item -ItemType Directory -Force -Path $responseDir | Out-Null $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json") $baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json $cite = [string]$baseline.evidence_ledger[0].finding_id $responseFile = Join-Path $responseDir "provider_advisory.json" @{ provider = "external_response" model = "release-validation-provider" reviewer_notes = @(@{ claim = "Review the cited evidence before advisory use." severity = "info" cites = @($cite) recommended_action = "Inspect the cited finding in the evidence ledger." }) inspection_priorities = @() } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8 python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile $responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json") Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)" $response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate" Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network" Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations" Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations" } Invoke-Step "provider benchmark exporter contract" { $benchmarkDir = Join-Path $outDir "provider_benchmark" python scripts\provider_packet_benchmark.py --out $benchmarkDir Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing" Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing" Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing" $summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json Assert-True ($summary.record_count -gt 0) "provider benchmark should include records" Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact" Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings" } Invoke-Step "markdown and PDF artifacts exist" { $mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md") $pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf") Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)" Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)" Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small" } if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) { Invoke-Step "slop detector clean scan" { $slopOut = Join-Path $outDir "slop_report.json" $slopConfig = Join-Path $outDir "slop_config.yaml" @" ignore: - ".git/**" - "**/.git/**" - "__pycache__/**" - "**/__pycache__/**" - ".pytest_cache/**" - "tmp/**" - "**/tmp/**" - "dist/**" - "build/**" - "*.egg-info/**" - "audits/**" - "stem_output*/**" - ".venv/**" - "venv/**" - "node_modules/**" "@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8 Push-Location $SlopDetectorPath try { python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut } finally { Pop-Location } $slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)" Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)" Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)" } } else { Write-Host "" Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)" } Write-Host "" Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED" Write-Host "Artifacts: $outDir" } finally { Pop-Location }