Spaces:

Flamehaven
/

stem-bio-ai

Running

File size: 14,023 Bytes

6a1cba7

param(
    [string]$ExpectedVersion = "1.6.0",
    [string]$OutputRoot = "tmp\release_validation",
    [string]$SlopDetectorPath = "D:\Sanctum\ai-slop-detector",
    [switch]$WithSlop
)

Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"

$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$repoRoot = (Resolve-Path (Join-Path $scriptDir "..")).Path
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$versionSlug = "v" + ($ExpectedVersion -replace "\.", "_")
$outDir = Join-Path $repoRoot (Join-Path $OutputRoot "$($versionSlug)_$timestamp")

function Invoke-Step {
    param(
        [string]$Name,
        [scriptblock]$Body
    )
    Write-Host ""
    Write-Host "==> $Name"
    & $Body
    Write-Host "PASS: $Name"
}

function Assert-True {
    param(
        [bool]$Condition,
        [string]$Message
    )
    if (-not $Condition) {
        throw $Message
    }
}

Push-Location $repoRoot
try {
    Invoke-Step "CLI version is $ExpectedVersion" {
        $version = python -m stem_ai --version
        Write-Host $version
        Assert-True ($version -eq "STEM BIO-AI $ExpectedVersion") "Unexpected CLI version: $version"
    }

    Invoke-Step "pytest regression suite" {
        python -m pytest -q
    }

    Invoke-Step "package build" {
        python scripts\build_stdlib_package.py
        Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion-py3-none-any.whl")) "Wheel artifact missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $repoRoot "dist\stem_ai-$ExpectedVersion.tar.gz")) "sdist artifact missing"
    }

    Invoke-Step "local audit artifacts with --explain" {
        New-Item -ItemType Directory -Force -Path $outDir | Out-Null
        python -m stem_ai . --level 3 --format all --out $outDir --explain --advisory validate
    }

    Invoke-Step "audit JSON contract" {
        $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
        Assert-True ($jsonFiles.Count -eq 1) "Expected one experiment_results JSON, found $($jsonFiles.Count)"
        $result = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json

        Assert-True ($result.stem_ai_version -eq $ExpectedVersion) "stem_ai_version mismatch: $($result.stem_ai_version)"
        Assert-True ($result.schema_version -eq "stem-ai-local-cli-result-v1.6") "schema_version mismatch: $($result.schema_version)"
        Assert-True ($null -ne $result.evidence_ledger -and $result.evidence_ledger.Count -gt 0) "evidence_ledger missing or empty"
        Assert-True ($null -ne $result.detector_summary) "detector_summary missing"
        Assert-True ($null -ne $result.ast_signal_summary) "ast_signal_summary missing"
        Assert-True ($null -ne $result.stage_4_rubric) "stage_4_rubric missing"
        Assert-True ($null -ne $result.replication_score) "replication_score missing"
        Assert-True ([string]$result.replication_tier -match "^R[0-4]$") "replication_tier invalid: $($result.replication_tier)"
        Assert-True ($null -ne $result.reasoning_model) "reasoning_model missing"
        Assert-True ($result.reasoning_model.version -eq "stem-bio-ai-reasoning-v1.3.2") "reasoning_model version mismatch: $($result.reasoning_model.version)"
        Assert-True ($result.reasoning_model.policy.final_score_override -eq $false) "reasoning_model must not override final score"
        Assert-True ($null -ne $result.reasoning_model.lane_coherence) "reasoning_model.lane_coherence missing"
        Assert-True ($null -ne $result.reasoning_model.uncertainty_budget) "reasoning_model.uncertainty_budget missing"
        Assert-True ($null -ne $result.reasoning_model.evidence_risk_gate) "reasoning_model.evidence_risk_gate missing"
        Assert-True ($null -ne $result.ai_advisory) "ai_advisory missing"
        Assert-True ($result.ai_advisory.schema_version -eq "stem-ai-advisory-v1.4") "ai_advisory schema mismatch: $($result.ai_advisory.schema_version)"
        Assert-True ($result.ai_advisory.policy.final_score_override -eq $false) "ai_advisory must not override final score"
        Assert-True ($result.ai_advisory.policy.requires_finding_id_citations -eq $true) "ai_advisory must require finding_id citations"
        Assert-True ($result.ai_advisory.invalid_citations.Count -eq 0) "ai_advisory has invalid citations"

        $badIds = @($result.evidence_ledger | Where-Object { [string]$_.finding_id -match "\\" })
        Assert-True ($badIds.Count -eq 0) "finding_id contains Windows backslash"

        $s4Findings = @($result.evidence_ledger | Where-Object { [string]$_.detector -like "S4_*" })
        Assert-True ($s4Findings.Count -gt 0) "Stage 4 findings missing from evidence_ledger"

        Write-Host "score=$($result.score.final_score) tier=$($result.score.formal_tier)"
        Write-Host "replication_score=$($result.replication_score) replication_tier=$($result.replication_tier)"
        Write-Host "evidence_ledger=$($result.evidence_ledger.Count)"
    }

    Invoke-Step "explain artifact contract" {
        $explainFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_explain.txt")
        Assert-True ($explainFiles.Count -eq 1) "Expected one explain artifact, found $($explainFiles.Count)"
        $explain = Get-Content -LiteralPath $explainFiles[0].FullName -Raw
        Assert-True ($explain.Contains("STEM BIO-AI Explain Report")) "Explain header missing"
        Assert-True ($explain.Contains("finding_id:")) "Explain finding_id lines missing"
        Assert-True ($explain.Contains("AST Signal Summary")) "Explain AST summary missing"
        Assert-True ($explain.Contains("Stage 4 Replication Rubric")) "Explain Stage 4 rubric missing"
        Assert-True ($explain.Contains("DISCLAIMER:")) "Explain disclaimer missing"
    }

    Invoke-Step "advisory packet export contract" {
        $packetDir = Join-Path $outDir "packet"
        New-Item -ItemType Directory -Force -Path $packetDir | Out-Null
        python -m stem_ai . --format json --out $packetDir --advisory packet
        $packetFiles = @(Get-ChildItem -LiteralPath $packetDir -Filter "*_advisory_input.json")
        Assert-True ($packetFiles.Count -eq 1) "Expected one advisory input packet, found $($packetFiles.Count)"
        $packet = Get-Content -LiteralPath $packetFiles[0].FullName -Raw | ConvertFrom-Json
        Assert-True ($packet.schema_version -eq "stem-ai-advisory-input-v1.4") "advisory packet schema mismatch: $($packet.schema_version)"
        Assert-True ($packet.policy.raw_repo_text_allowed -eq $false) "advisory packet must not allow raw repo text"
        Assert-True ($packet.policy.requires_finding_id_citations -eq $true) "advisory packet must require finding_id citations"
        Assert-True ($null -ne $packet.provider_request) "provider_request missing"
        Assert-True ($packet.provider_request.provider -eq "none") "default provider should be none"
        Assert-True ($packet.provider_request.registry.Count -ge 7) "provider registry too small"
        Assert-True ($null -ne $packet.evidence_ledger -and $packet.evidence_ledger.Count -gt 0) "packet evidence ledger missing"
        Assert-True ($packet.packet_profile -eq "provider_budgeted") "packet profile should be provider_budgeted"
        Assert-True ($packet.evidence_ledger.Count -le 40) "provider packet should be capped to 40 findings"
        Assert-True ($packet.allowed_finding_ids.Count -eq $packet.evidence_ledger.Count) "allowed_finding_ids count mismatch"
        Assert-True ($null -ne $packet.provider_prompt_contract) "provider_prompt_contract missing"
        Assert-True ([string]$packet.provider_prompt_contract.citation_rule -match "allowed_finding_ids") "citation rule must mention allowed_finding_ids"
        Assert-True ($null -ne $packet.provider_request.request_schema) "provider request schema missing"
        Assert-True ($packet.provider_request.request_schema.schema_version -eq "stem-ai-provider-request-v1.4") "provider request schema version mismatch"
        Assert-True ($packet.provider_request.args_validation.status -eq "valid") "provider request args should validate"
        Assert-True ($null -ne $packet.provider_request.base_url_validation) "base_url_validation missing"
        Assert-True ($null -ne $packet.provider_request.secret_policy) "secret_policy missing"
        Assert-True ($null -ne $packet.provider_request.env_contract) "env_contract missing"
        Assert-True ($null -ne $packet.contract_schemas) "contract_schemas missing"
        Assert-True ($packet.contract_schemas.schema_version -eq "stem-ai-advisory-contracts-v1.4") "contract_schemas version mismatch"
        Assert-True ($null -ne $packet.packet_contract) "packet_contract missing"
        Assert-True ($packet.packet_contract.status -eq "valid") "packet_contract must validate"
        $packetText = Get-Content -LiteralPath $packetFiles[0].FullName -Raw
        Assert-True (-not $packetText.Contains('"snippet"')) "packet must not include raw snippets"
    }

    Invoke-Step "advisory response file validation contract" {
        $responseDir = Join-Path $outDir "response"
        New-Item -ItemType Directory -Force -Path $responseDir | Out-Null
        $jsonFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_experiment_results.json")
        $baseline = Get-Content -LiteralPath $jsonFiles[0].FullName -Raw | ConvertFrom-Json
        $cite = [string]$baseline.evidence_ledger[0].finding_id
        $responseFile = Join-Path $responseDir "provider_advisory.json"
        @{
            provider = "external_response"
            model = "release-validation-provider"
            reviewer_notes = @(@{
                claim = "Review the cited evidence before advisory use."
                severity = "info"
                cites = @($cite)
                recommended_action = "Inspect the cited finding in the evidence ledger."
            })
            inspection_priorities = @()
        } | ConvertTo-Json -Depth 8 | Set-Content -LiteralPath $responseFile -Encoding UTF8

        python -m stem_ai . --format json --out $responseDir --advisory-response $responseFile
        $responseJson = @(Get-ChildItem -LiteralPath $responseDir -Filter "*_experiment_results.json")
        Assert-True ($responseJson.Count -eq 1) "Expected one response validation result JSON, found $($responseJson.Count)"
        $response = Get-Content -LiteralPath $responseJson[0].FullName -Raw | ConvertFrom-Json
        Assert-True ($response.ai_advisory.status -eq "valid") "advisory response should validate"
        Assert-True ($response.ai_advisory.response_contract.network_called -eq $false) "response validator must not call network"
        Assert-True ($response.ai_advisory.response_contract.citation_repair_attempted -eq $false) "response validator must not repair citations"
        Assert-True ($response.ai_advisory.invalid_citations.Count -eq 0) "response validator has invalid citations"
    }

    Invoke-Step "provider benchmark exporter contract" {
        $benchmarkDir = Join-Path $outDir "provider_benchmark"
        python scripts\provider_packet_benchmark.py --out $benchmarkDir
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "benchmark_manifest.json")) "provider benchmark manifest missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_stats.jsonl")) "packet_stats.jsonl missing"
        Assert-True (Test-Path -LiteralPath (Join-Path $benchmarkDir "packet_summary.json")) "packet_summary.json missing"
        $summary = Get-Content -LiteralPath (Join-Path $benchmarkDir "packet_summary.json") -Raw | ConvertFrom-Json
        Assert-True ($summary.record_count -gt 0) "provider benchmark should include records"
        Assert-True ($summary.all_citation_allowlists_exact -eq $true) "citation allowlists should be exact"
        Assert-True ($summary.max_packet_finding_count -le 40) "provider packets should be capped to 40 findings"
    }

    Invoke-Step "markdown and PDF artifacts exist" {
        $mdFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*_report.md")
        $pdfFiles = @(Get-ChildItem -LiteralPath $outDir -Filter "*.pdf")
        Assert-True ($mdFiles.Count -eq 1) "Expected one Markdown report, found $($mdFiles.Count)"
        Assert-True ($pdfFiles.Count -eq 1) "Expected one PDF report, found $($pdfFiles.Count)"
        Assert-True ($pdfFiles[0].Length -gt 1000) "PDF report appears too small"
    }

    if ($WithSlop -and (Test-Path -LiteralPath $SlopDetectorPath)) {
        Invoke-Step "slop detector clean scan" {
            $slopOut = Join-Path $outDir "slop_report.json"
            $slopConfig = Join-Path $outDir "slop_config.yaml"
            @"
ignore:
  - ".git/**"
  - "**/.git/**"
  - "__pycache__/**"
  - "**/__pycache__/**"
  - ".pytest_cache/**"
  - "tmp/**"
  - "**/tmp/**"
  - "dist/**"
  - "build/**"
  - "*.egg-info/**"
  - "audits/**"
  - "stem_output*/**"
  - ".venv/**"
  - "venv/**"
  - "node_modules/**"
"@ | Set-Content -LiteralPath $slopConfig -Encoding UTF8
            Push-Location $SlopDetectorPath
            try {
                python -m slop_detector.cli --project $repoRoot --config $slopConfig --json --output $slopOut
            }
            finally {
                Pop-Location
            }
            $slop = Get-Content -LiteralPath $slopOut -Raw | ConvertFrom-Json
            Assert-True ($slop.overall_status -eq "clean") "Slop status is not clean: $($slop.overall_status)"
            Assert-True ([int]$slop.deficit_files -eq 0) "Slop deficit_files is not zero: $($slop.deficit_files)"
            Write-Host "slop overall_status=$($slop.overall_status) clean_files=$($slop.clean_files) deficit_files=$($slop.deficit_files)"
        }
    }
    else {
        Write-Host ""
        Write-Host "SKIP: external slop detector clean scan (pass -WithSlop to enable)"
    }

    Write-Host ""
    Write-Host "STEM BIO-AI v$ExpectedVersion validation PASSED"
    Write-Host "Artifacts: $outDir"
}
finally {
    Pop-Location
}