Atomic-VSA / src /vsa_paper_stats.jl
marshad180's picture
Update Atomic VSA deployment
fa6bd30 verified
# ==============================================================================
# VSA PAPER STATS & ABLATION SUITE (Phase K)
# Empirical evidence for research publications
# ==============================================================================
using Statistics
using Printf
"""
compute_separability(table::VDBTable, n_samples::Int=100)
Gathers similarity scores for "Hits" (record vs itself) and "Noise" (record vs others).
Returns (hits::Vector{Float64}, noise::Vector{Float64})
"""
function compute_separability(table::VDBTable, n_samples::Int=100)
n = length(table.records)
hits = Float64[]
noise = Float64[]
samples = randperm(n)[1:min(n, n_samples)]
for i in samples
query = table.records[i]
# Hit
push!(hits, Float64(similarity(query, query)))
# Noise (sample 10 others)
others = randperm(n)[1:min(n, 10)]
for j in others
i == j && continue
push!(noise, Float64(similarity(query, table.records[j])))
end
end
return hits, noise
end
"""
bench_vsa_latency(table::VDBTable, n_queries::Int=100)
Measures latency quantiles for holographic similarity scans.
"""
function bench_vsa_latency(table::VDBTable, n_queries::Int=100)
latencies = Float64[]
n = length(table.records)
query_indices = rand(1:n, n_queries)
# Warmup
vdb_select_similar(table, table.record_ids[1]; top_k=5)
for idx in query_indices
id = table.record_ids[idx]
t = @elapsed vdb_select_similar(table, id; top_k=5)
push!(latencies, t * 1000) # ms
end
sort!(latencies)
p50 = latencies[round(Int, 0.5 * n_queries)]
p90 = latencies[round(Int, 0.9 * n_queries)]
p99 = latencies[round(Int, 0.99 * n_queries)]
return (p50=p50, p90=p90, p99=p99, mean=mean(latencies))
end
"""
export_to_csv(filename::String, headers::Vector{String}, data::Vector{<:Vector})
Simple CSV exporter for paper plotting.
"""
function export_to_csv(filename::String, headers::Vector{String}, data::Vector{<:Vector})
open(filename, "w") do io
println(io, join(headers, ","))
n_rows = length(data[1])
for i in 1:n_rows
row = [string(d[i]) for d in data]
println(io, join(row, ","))
end
end
println(" ✓ Exported to $filename")
end
"""
ascii_hist(data::Vector{Float64}, bins::Int=20, title::String="")
Hand-rolled ASCII histogram for terminal proof.
"""
function ascii_hist(data::Vector{Float64}, bins::Int=20, title::String="")
isempty(data) && return
min_v, max_v = minimum(data), maximum(data)
if min_v == max_v
max_v += 0.0001
end
counts = zeros(Int, bins)
range_v = max_v - min_v
for v in data
b = min(bins, floor(Int, (v - min_v) / range_v * bins) + 1)
counts[b] += 1
end
max_count = maximum(counts)
println("\n $title")
println(" " * "─"^40)
for i in 1:bins
bin_start = min_v + (i-1) * (range_v / bins)
bar_len = max_count == 0 ? 0 : round(Int, (counts[i] / max_count) * 30)
@printf(" %5.2f | %s (%d)\n", bin_start, "█"^bar_len, counts[i])
end
println(" " * "─"^40)
end
"""
blind_manifold_mining(table::VDBTable, sector::String; top_k::Int=5)
Extracts semantic "Hubs" from a global superposition without any user cues.
It probes the collective manifold against the registry and identifies
the strongest resonance signals (Unsupervised Identification).
"""
function blind_manifold_mining(table::VDBTable, sector::String; top_k::Int=5)
# 1. Build/ensure superposition exists (representing the entire dataset memory)
vdb_build_superposition!(table)
collective_memory = table.superposition
# 2. Extract all identity atoms for the given sector from the registry
reg = table.reg
!haskey(reg.sectors, sector) && return []
labels = collect(keys(reg.sectors[sector]))
atoms = [get_element(reg, sector, label, table.dim) for label in labels]
# 3. Probe the Collective Memory (Superposition) for natural resonance
# This is "Blind" because no specific query was given - we are scanning the sea of data
res_scores = [similarity(collective_memory, atom) for atom in atoms]
# 4. Sort and return peaks
p = sortperm(res_scores, rev=true)
results = []
for i in 1:min(length(p), top_k)
push!(results, (label=labels[p[i]], resonance=res_scores[p[i]]))
end
return results
end