Spaces:
Sleeping
Sleeping
| # ============================================================================== | |
| # VSA PAPER STATS & ABLATION SUITE (Phase K) | |
| # Empirical evidence for research publications | |
| # ============================================================================== | |
| using Statistics | |
| using Printf | |
| """ | |
| compute_separability(table::VDBTable, n_samples::Int=100) | |
| Gathers similarity scores for "Hits" (record vs itself) and "Noise" (record vs others). | |
| Returns (hits::Vector{Float64}, noise::Vector{Float64}) | |
| """ | |
| function compute_separability(table::VDBTable, n_samples::Int=100) | |
| n = length(table.records) | |
| hits = Float64[] | |
| noise = Float64[] | |
| samples = randperm(n)[1:min(n, n_samples)] | |
| for i in samples | |
| query = table.records[i] | |
| # Hit | |
| push!(hits, Float64(similarity(query, query))) | |
| # Noise (sample 10 others) | |
| others = randperm(n)[1:min(n, 10)] | |
| for j in others | |
| i == j && continue | |
| push!(noise, Float64(similarity(query, table.records[j]))) | |
| end | |
| end | |
| return hits, noise | |
| end | |
| """ | |
| bench_vsa_latency(table::VDBTable, n_queries::Int=100) | |
| Measures latency quantiles for holographic similarity scans. | |
| """ | |
| function bench_vsa_latency(table::VDBTable, n_queries::Int=100) | |
| latencies = Float64[] | |
| n = length(table.records) | |
| query_indices = rand(1:n, n_queries) | |
| # Warmup | |
| vdb_select_similar(table, table.record_ids[1]; top_k=5) | |
| for idx in query_indices | |
| id = table.record_ids[idx] | |
| t = vdb_select_similar(table, id; top_k=5) | |
| push!(latencies, t * 1000) # ms | |
| end | |
| sort!(latencies) | |
| p50 = latencies[round(Int, 0.5 * n_queries)] | |
| p90 = latencies[round(Int, 0.9 * n_queries)] | |
| p99 = latencies[round(Int, 0.99 * n_queries)] | |
| return (p50=p50, p90=p90, p99=p99, mean=mean(latencies)) | |
| end | |
| """ | |
| export_to_csv(filename::String, headers::Vector{String}, data::Vector{<:Vector}) | |
| Simple CSV exporter for paper plotting. | |
| """ | |
| function export_to_csv(filename::String, headers::Vector{String}, data::Vector{<:Vector}) | |
| open(filename, "w") do io | |
| println(io, join(headers, ",")) | |
| n_rows = length(data[1]) | |
| for i in 1:n_rows | |
| row = [string(d[i]) for d in data] | |
| println(io, join(row, ",")) | |
| end | |
| end | |
| println(" ✓ Exported to $filename") | |
| end | |
| """ | |
| ascii_hist(data::Vector{Float64}, bins::Int=20, title::String="") | |
| Hand-rolled ASCII histogram for terminal proof. | |
| """ | |
| function ascii_hist(data::Vector{Float64}, bins::Int=20, title::String="") | |
| isempty(data) && return | |
| min_v, max_v = minimum(data), maximum(data) | |
| if min_v == max_v | |
| max_v += 0.0001 | |
| end | |
| counts = zeros(Int, bins) | |
| range_v = max_v - min_v | |
| for v in data | |
| b = min(bins, floor(Int, (v - min_v) / range_v * bins) + 1) | |
| counts[b] += 1 | |
| end | |
| max_count = maximum(counts) | |
| println("\n $title") | |
| println(" " * "─"^40) | |
| for i in 1:bins | |
| bin_start = min_v + (i-1) * (range_v / bins) | |
| bar_len = max_count == 0 ? 0 : round(Int, (counts[i] / max_count) * 30) | |
| (" %5.2f | %s (%d)\n", bin_start, "█"^bar_len, counts[i]) | |
| end | |
| println(" " * "─"^40) | |
| end | |
| """ | |
| blind_manifold_mining(table::VDBTable, sector::String; top_k::Int=5) | |
| Extracts semantic "Hubs" from a global superposition without any user cues. | |
| It probes the collective manifold against the registry and identifies | |
| the strongest resonance signals (Unsupervised Identification). | |
| """ | |
| function blind_manifold_mining(table::VDBTable, sector::String; top_k::Int=5) | |
| # 1. Build/ensure superposition exists (representing the entire dataset memory) | |
| vdb_build_superposition!(table) | |
| collective_memory = table.superposition | |
| # 2. Extract all identity atoms for the given sector from the registry | |
| reg = table.reg | |
| !haskey(reg.sectors, sector) && return [] | |
| labels = collect(keys(reg.sectors[sector])) | |
| atoms = [get_element(reg, sector, label, table.dim) for label in labels] | |
| # 3. Probe the Collective Memory (Superposition) for natural resonance | |
| # This is "Blind" because no specific query was given - we are scanning the sea of data | |
| res_scores = [similarity(collective_memory, atom) for atom in atoms] | |
| # 4. Sort and return peaks | |
| p = sortperm(res_scores, rev=true) | |
| results = [] | |
| for i in 1:min(length(p), top_k) | |
| push!(results, (label=labels[p[i]], resonance=res_scores[p[i]])) | |
| end | |
| return results | |
| end | |