Spaces:
Running
Running
| param( | |
| [string]$ExpectedVersion = "1.6.0", | |
| [string]$OutputRoot = "tmp\release_validation", | |
| [string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector", | |
| [switch]$WithSlop | |
| ) | |
| Set-StrictMode -Version Latest | |
| $ErrorActionPreference = "Stop" | |
| $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path | |
| $repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path | |
| $timestamp = Get-Date -Format "yyyyMMdd_HHmmss" | |
| $versionSlug = "v" + ($ExpectedVersion -replace "\.", "_") | |
| $outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp") | |
| function Invoke-Step { | |
| param( | |
| [string]$Name, | |
| [scriptblock]$Body | |
| ) | |
| Write-Host "" | |
| Write-Host "==> $Name" | |
| & $Body | |
| Write-Host "PASS: $Name" | |
| } | |
| function Assert-True { | |
| param( | |
| [bool]$Condition, | |
| [string]$Message | |
| ) | |
| if (-not $Condition) { | |
| throw $Message | |
| } | |
| } | |
| Push-Location $repoRoot | |
| try { | |
| Invoke-Step "CLI version is $ExpectedVersion" { | |
| $version = python -m stem_ai --version | |
| Write-Host $version | |
| Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version" | |
| } | |
| Invoke-Step "pytest regression suite" { | |
| python -m pytest -q | |
| } | |
| Invoke-Step "package build" { | |
| python scripts\build_stdlib_package.py | |
| Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing" | |
| Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing" | |
| } | |
| Invoke-Step "local audit artifacts with --explain" { | |
| New-Item -ItemType Directory -Force -Path $outDir | Out-Null | |
| python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate | |
| } | |
| Invoke-Step "audit JSON contract" { | |
| $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json") | |
| Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)" | |
| $result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json | |
| Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)" | |
| Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)" | |
| Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty" | |
| Assert-True ($null -ne $result.detector_summary) "detector_summary missing" | |
| Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing" | |
| Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing" | |
| Assert-True ($null -ne $result.replication_score) "replication_score missing" | |
| Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)" | |
| Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing" | |
| Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)" | |
| Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score" | |
| Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing" | |
| Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing" | |
| Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing" | |
| Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing" | |
| Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)" | |
| Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score" | |
| Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations" | |
| Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations" | |
| $badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" }) | |
| Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash" | |
| $s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" }) | |
| Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger" | |
| Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)" | |
| Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)" | |
| Write-Host "evidence_ledger=$($result.evidence_ledger.Count)" | |
| } | |
| Invoke-Step "explain artifact contract" { | |
| $explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt") | |
| Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)" | |
| $explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw | |
| Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing" | |
| Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing" | |
| Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing" | |
| Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing" | |
| Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing" | |
| } | |
| Invoke-Step "advisory packet export contract" { | |
| $packetDir = Join-Path $outDir "packet" | |
| New-Item -ItemType Directory -Force -Path $packetDir | Out-Null | |
| python -m stem_ai . --format json --out $packetDir --advisory packet | |
| $packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json") | |
| Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)" | |
| $packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json | |
| Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)" | |
| Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text" | |
| Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations" | |
| Assert-True ($null -ne $packet.provider_request) "provider_request missing" | |
| Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none" | |
| Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small" | |
| Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing" | |
| Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted" | |
| Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings" | |
| Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch" | |
| Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing" | |
| Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids" | |
| Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing" | |
| Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch" | |
| Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate" | |
| Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing" | |
| Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing" | |
| Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing" | |
| Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing" | |
| Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch" | |
| Assert-True ($null -ne $packet.packet_contract) "packet_contract missing" | |
| Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate" | |
| $packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | |
| Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets" | |
| } | |
| Invoke-Step "advisory response file validation contract" { | |
| $responseDir = Join-Path $outDir "response" | |
| New-Item -ItemType Directory -Force -Path $responseDir | Out-Null | |
| $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json") | |
| $baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json | |
| $cite = [string]$baseline.evidence_ledger[0].finding_id | |
| $responseFile = Join-Path $responseDir "provider_advisory.json" | |
| @{ | |
| provider = "external_response" | |
| model = "release-validation-provider" | |
| reviewer_notes = @(@{ | |
| claim = "Review the cited evidence before advisory use." | |
| severity = "info" | |
| cites = @($cite) | |
| recommended_action = "Inspect the cited finding in the evidence ledger." | |
| }) | |
| inspection_priorities = @() | |
| } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8 | |
| python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile | |
| $responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json") | |
| Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)" | |
| $response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json | |
| Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate" | |
| Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network" | |
| Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations" | |
| Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations" | |
| } | |
| Invoke-Step "provider benchmark exporter contract" { | |
| $benchmarkDir = Join-Path $outDir "provider_benchmark" | |
| python scripts\provider_packet_benchmark.py --out $benchmarkDir | |
| Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing" | |
| Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing" | |
| Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing" | |
| $summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json | |
| Assert-True ($summary.record_count -gt 0) "provider benchmark should include records" | |
| Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact" | |
| Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings" | |
| } | |
| Invoke-Step "markdown and PDF artifacts exist" { | |
| $mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md") | |
| $pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf") | |
| Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)" | |
| Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)" | |
| Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small" | |
| } | |
| if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) { | |
| Invoke-Step "slop detector clean scan" { | |
| $slopOut = Join-Path $outDir "slop_report.json" | |
| $slopConfig = Join-Path $outDir "slop_config.yaml" | |
| @" | |
| ignore: | |
| - ".git/**" | |
| - "**/.git/**" | |
| - "__pycache__/**" | |
| - "**/__pycache__/**" | |
| - ".pytest_cache/**" | |
| - "tmp/**" | |
| - "**/tmp/**" | |
| - "dist/**" | |
| - "build/**" | |
| - "*.egg-info/**" | |
| - "audits/**" | |
| - "stem_output*/**" | |
| - ".venv/**" | |
| - "venv/**" | |
| - "node_modules/**" | |
| "@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8 | |
| Push-Location $SlopDetectorPath | |
| try { | |
| python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut | |
| } | |
| finally { | |
| Pop-Location | |
| } | |
| $slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json | |
| Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)" | |
| Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)" | |
| Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)" | |
| } | |
| } | |
| else { | |
| Write-Host "" | |
| Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)" | |
| } | |
| Write-Host "" | |
| Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED" | |
| Write-Host "Artifacts: $outDir" | |
| } | |
| finally { | |
| Pop-Location | |
| } | |