Spaces:
Running
Running
File size: 14,023 Bytes
6a1cba7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | param(
[string]$ExpectedVersion = "1.6.0",
[string]$OutputRoot = "tmp\release_validation",
[string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector",
[switch]$WithSlop
)
Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"
$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$versionSlug = "v" + ($ExpectedVersion -replace "\.", "_")
$outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp")
function Invoke-Step {
param(
[string]$Name,
[scriptblock]$Body
)
Write-Host ""
Write-Host "==> $Name"
& $Body
Write-Host "PASS: $Name"
}
function Assert-True {
param(
[bool]$Condition,
[string]$Message
)
if (-not $Condition) {
throw $Message
}
}
Push-Location $repoRoot
try {
Invoke-Step "CLI version is $ExpectedVersion" {
$version = python -m stem_ai --version
Write-Host $version
Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version"
}
Invoke-Step "pytest regression suite" {
python -m pytest -q
}
Invoke-Step "package build" {
python scripts\build_stdlib_package.py
Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing"
Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing"
}
Invoke-Step "local audit artifacts with --explain" {
New-Item -ItemType Directory -Force -Path $outDir | Out-Null
python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate
}
Invoke-Step "audit JSON contract" {
$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)"
$result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)"
Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)"
Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty"
Assert-True ($null -ne $result.detector_summary) "detector_summary missing"
Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing"
Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing"
Assert-True ($null -ne $result.replication_score) "replication_score missing"
Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)"
Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing"
Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)"
Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score"
Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing"
Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing"
Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing"
Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing"
Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)"
Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score"
Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations"
Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations"
$badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" })
Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash"
$s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" })
Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger"
Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)"
Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)"
Write-Host "evidence_ledger=$($result.evidence_ledger.Count)"
}
Invoke-Step "explain artifact contract" {
$explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt")
Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)"
$explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw
Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing"
Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing"
Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing"
Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing"
Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing"
}
Invoke-Step "advisory packet export contract" {
$packetDir = Join-Path $outDir "packet"
New-Item -ItemType Directory -Force -Path $packetDir | Out-Null
python -m stem_ai . --format json --out $packetDir --advisory packet
$packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json")
Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)"
$packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json
Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)"
Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text"
Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations"
Assert-True ($null -ne $packet.provider_request) "provider_request missing"
Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none"
Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small"
Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing"
Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted"
Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings"
Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch"
Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing"
Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids"
Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing"
Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch"
Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate"
Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing"
Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing"
Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing"
Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing"
Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch"
Assert-True ($null -ne $packet.packet_contract) "packet_contract missing"
Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate"
$packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw
Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets"
}
Invoke-Step "advisory response file validation contract" {
$responseDir = Join-Path $outDir "response"
New-Item -ItemType Directory -Force -Path $responseDir | Out-Null
$jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
$baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
$cite = [string]$baseline.evidence_ledger[0].finding_id
$responseFile = Join-Path $responseDir "provider_advisory.json"
@{
provider = "external_response"
model = "release-validation-provider"
reviewer_notes = @(@{
claim = "Review the cited evidence before advisory use."
severity = "info"
cites = @($cite)
recommended_action = "Inspect the cited finding in the evidence ledger."
})
inspection_priorities = @()
} | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8
python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile
$responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json")
Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)"
$response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json
Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate"
Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network"
Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations"
Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations"
}
Invoke-Step "provider benchmark exporter contract" {
$benchmarkDir = Join-Path $outDir "provider_benchmark"
python scripts\provider_packet_benchmark.py --out $benchmarkDir
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing"
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing"
Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing"
$summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json
Assert-True ($summary.record_count -gt 0) "provider benchmark should include records"
Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact"
Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings"
}
Invoke-Step "markdown and PDF artifacts exist" {
$mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md")
$pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf")
Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)"
Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)"
Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small"
}
if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) {
Invoke-Step "slop detector clean scan" {
$slopOut = Join-Path $outDir "slop_report.json"
$slopConfig = Join-Path $outDir "slop_config.yaml"
@"
ignore:
- ".git/**"
- "**/.git/**"
- "__pycache__/**"
- "**/__pycache__/**"
- ".pytest_cache/**"
- "tmp/**"
- "**/tmp/**"
- "dist/**"
- "build/**"
- "*.egg-info/**"
- "audits/**"
- "stem_output*/**"
- ".venv/**"
- "venv/**"
- "node_modules/**"
"@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8
Push-Location $SlopDetectorPath
try {
python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut
}
finally {
Pop-Location
}
$slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json
Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)"
Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)"
Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)"
}
}
else {
Write-Host ""
Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)"
}
Write-Host ""
Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED"
Write-Host "Artifacts: $outDir"
}
finally {
Pop-Location
}
|