File size: 14,023 Bytes
6a1cba7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
param(
    [string]$ExpectedVersion = "1.6.0",
    [string]$OutputRoot = "tmp\release_validation",
    [string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector",
    [switch]$WithSlop
)

Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"

$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$versionSlug = "v" + ($ExpectedVersion -replace "\.", "_")
$outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp")

function Invoke-Step {
    param(
        [string]$Name,
        [scriptblock]$Body
    )
    Write-Host ""
    Write-Host "==> $Name"
    & $Body
    Write-Host "PASS: $Name"
}

function Assert-True {
    param(
        [bool]$Condition,
        [string]$Message
    )
    if (-not $Condition) {
        throw $Message
    }
}

Push-Location $repoRoot
try {
    Invoke-Step "CLI version is $ExpectedVersion" {
        $version = python -m stem_ai --version
        Write-Host $version
        Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version"
    }

    Invoke-Step "pytest regression suite" {
        python -m pytest -q
    }

    Invoke-Step "package build" {
        python scripts\build_stdlib_package.py
        Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing"
    }

    Invoke-Step "local audit artifacts with --explain" {
        New-Item -ItemType Directory -Force -Path $outDir | Out-Null
        python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate
    }

    Invoke-Step "audit JSON contract" {
        $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
        Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)"
        $result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json

        Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)"
        Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)"
        Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty"
        Assert-True ($null -ne $result.detector_summary) "detector_summary missing"
        Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing"
        Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing"
        Assert-True ($null -ne $result.replication_score) "replication_score missing"
        Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)"
        Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing"
        Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)"
        Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score"
        Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing"
        Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing"
        Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing"
        Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing"
        Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)"
        Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score"
        Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations"
        Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations"

        $badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" })
        Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash"

        $s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" })
        Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger"

        Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)"
        Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)"
        Write-Host "evidence_ledger=$($result.evidence_ledger.Count)"
    }

    Invoke-Step "explain artifact contract" {
        $explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt")
        Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)"
        $explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw
        Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing"
        Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing"
        Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing"
        Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing"
        Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing"
    }

    Invoke-Step "advisory packet export contract" {
        $packetDir = Join-Path $outDir "packet"
        New-Item -ItemType Directory -Force -Path $packetDir | Out-Null
        python -m stem_ai . --format json --out $packetDir --advisory packet
        $packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json")
        Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)"
        $packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json
        Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)"
        Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text"
        Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations"
        Assert-True ($null -ne $packet.provider_request) "provider_request missing"
        Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none"
        Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small"
        Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing"
        Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted"
        Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings"
        Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch"
        Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing"
        Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids"
        Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing"
        Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch"
        Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate"
        Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing"
        Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing"
        Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing"
        Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing"
        Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch"
        Assert-True ($null -ne $packet.packet_contract) "packet_contract missing"
        Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate"
        $packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw
        Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets"
    }

    Invoke-Step "advisory response file validation contract" {
        $responseDir = Join-Path $outDir "response"
        New-Item -ItemType Directory -Force -Path $responseDir | Out-Null
        $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
        $baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
        $cite = [string]$baseline.evidence_ledger[0].finding_id
        $responseFile = Join-Path $responseDir "provider_advisory.json"
        @{
            provider = "external_response"
            model = "release-validation-provider"
            reviewer_notes = @(@{
                claim = "Review the cited evidence before advisory use."
                severity = "info"
                cites = @($cite)
                recommended_action = "Inspect the cited finding in the evidence ledger."
            })
            inspection_priorities = @()
        } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8

        python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile
        $responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json")
        Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)"
        $response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json
        Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate"
        Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network"
        Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations"
        Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations"
    }

    Invoke-Step "provider benchmark exporter contract" {
        $benchmarkDir = Join-Path $outDir "provider_benchmark"
        python scripts\provider_packet_benchmark.py --out $benchmarkDir
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing"
        $summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json
        Assert-True ($summary.record_count -gt 0) "provider benchmark should include records"
        Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact"
        Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings"
    }

    Invoke-Step "markdown and PDF artifacts exist" {
        $mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md")
        $pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf")
        Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)"
        Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)"
        Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small"
    }

    if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) {
        Invoke-Step "slop detector clean scan" {
            $slopOut = Join-Path $outDir "slop_report.json"
            $slopConfig = Join-Path $outDir "slop_config.yaml"
            @"
ignore:
  - ".git/**"
  - "**/.git/**"
  - "__pycache__/**"
  - "**/__pycache__/**"
  - ".pytest_cache/**"
  - "tmp/**"
  - "**/tmp/**"
  - "dist/**"
  - "build/**"
  - "*.egg-info/**"
  - "audits/**"
  - "stem_output*/**"
  - ".venv/**"
  - "venv/**"
  - "node_modules/**"
"@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8
            Push-Location $SlopDetectorPath
            try {
                python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut
            }
            finally {
                Pop-Location
            }
            $slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json
            Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)"
            Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)"
            Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)"
        }
    }
    else {
        Write-Host ""
        Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)"
    }

    Write-Host ""
    Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED"
    Write-Host "Artifacts: $outDir"
}
finally {
    Pop-Location
}