stem-bio-ai / scripts /validate_release.ps1
Codex
sync hf space snapshot
6a1cba7
param(
[string]$ExpectedVersion = "1.6.0",
[string]$OutputRoot = "tmp\release_validation",
[string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector",
[switch]$WithSlop
)
Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$versionSlug = "v" + ($ExpectedVersion -replace "\.", "_")
$outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp")
function Invoke-Step {
param(
[string]$Name,
[scriptblock]$Body
)
Write-Host ""
Write-Host "==> $Name"
& $Body
Write-Host "PASS: $Name"
}
function Assert-True {
param(
[bool]$Condition,
[string]$Message
)
if (-not $Condition) {
throw $Message
}
}
Push-Location $repoRoot
try {
Invoke-Step "CLI version is $ExpectedVersion" {
$version = python -m stem_ai --version
Write-Host $version
Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version"
}
Invoke-Step "pytest regression suite" {
python -m pytest -q
}
Invoke-Step "package build" {
python scripts\build_stdlib_package.py
Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing"
Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing"
}
Invoke-Step "local audit artifacts with --explain" {
New-Item -ItemType Directory -Force -Path $outDir | Out-Null
python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate
}
Invoke-Step "audit JSON contract" {
$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)"
$result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)"
Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)"
Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty"
Assert-True ($null -ne $result.detector_summary) "detector_summary missing"
Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing"
Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing"
Assert-True ($null -ne $result.replication_score) "replication_score missing"
Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)"
Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing"
Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)"
Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score"
Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing"
Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing"
Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing"
Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing"
Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)"
Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score"
Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations"
Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations"
$badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" })
Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash"
$s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" })
Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger"
Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)"
Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)"
Write-Host "evidence_ledger=$($result.evidence_ledger.Count)"
}
Invoke-Step "explain artifact contract" {
$explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt")
Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)"
$explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw
Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing"
Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing"
Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing"
Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing"
Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing"
}
Invoke-Step "advisory packet export contract" {
$packetDir = Join-Path $outDir "packet"
New-Item -ItemType Directory -Force -Path $packetDir | Out-Null
python -m stem_ai . --format json --out $packetDir --advisory packet
$packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json")
Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)"
$packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json
Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)"
Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text"
Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations"
Assert-True ($null -ne $packet.provider_request) "provider_request missing"
Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none"
Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small"
Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing"
Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted"
Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings"
Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch"
Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing"
Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids"
Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing"
Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch"
Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate"
Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing"
Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing"
Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing"
Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing"
Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch"
Assert-True ($null -ne $packet.packet_contract) "packet_contract missing"
Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate"
$packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw
Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets"
}
Invoke-Step "advisory response file validation contract" {
$responseDir = Join-Path $outDir "response"
New-Item -ItemType Directory -Force -Path $responseDir | Out-Null
$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
$baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
$cite = [string]$baseline.evidence_ledger[0].finding_id
$responseFile = Join-Path $responseDir "provider_advisory.json"
@{
provider = "external_response"
model = "release-validation-provider"
reviewer_notes = @(@{
claim = "Review the cited evidence before advisory use."
severity = "info"
cites = @($cite)
recommended_action = "Inspect the cited finding in the evidence ledger."
})
inspection_priorities = @()
} | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8
python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile
$responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json")
Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)"
$response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json
Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate"
Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network"
Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations"
Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations"
}
Invoke-Step "provider benchmark exporter contract" {
$benchmarkDir = Join-Path $outDir "provider_benchmark"
python scripts\provider_packet_benchmark.py --out $benchmarkDir
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing"
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing"
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing"
$summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json
Assert-True ($summary.record_count -gt 0) "provider benchmark should include records"
Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact"
Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings"
}
Invoke-Step "markdown and PDF artifacts exist" {
$mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md")
$pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf")
Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)"
Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)"
Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small"
}
if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) {
Invoke-Step "slop detector clean scan" {
$slopOut = Join-Path $outDir "slop_report.json"
$slopConfig = Join-Path $outDir "slop_config.yaml"
@"
ignore:
- ".git/**"
- "**/.git/**"
- "__pycache__/**"
- "**/__pycache__/**"
- ".pytest_cache/**"
- "tmp/**"
- "**/tmp/**"
- "dist/**"
- "build/**"
- "*.egg-info/**"
- "audits/**"
- "stem_output*/**"
- ".venv/**"
- "venv/**"
- "node_modules/**"
"@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8
Push-Location $SlopDetectorPath
try {
python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut
}
finally {
Pop-Location
}
$slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json
Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)"
Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)"
Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)"
}
}
else {
Write-Host ""
Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)"
}
Write-Host ""
Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED"
Write-Host "Artifacts: $outDir"
}
finally {
Pop-Location
}