File size: 4,479 Bytes
c30c476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
-- =====================================================================
-- Sovereign Hive — Vault Migration 003
-- =====================================================================
-- Adds: kv_sensitivity_profile
--
-- Rationale: The HSAQ KV-cache profiler (kv_profiler.py) emits per-layer
--   drift measurements at each (k_bits, v_bits, quantizer) probe. These
--   feed the allocator (assignment_v2.assign_kv_bits) under a separate
--   KV-cache memory budget from weights. Caching the rows means re-paying
--   the profiling cost only when (model_hash, calibration_hash,
--   pipeline_version) changes.
--
-- Audit invariants:
--   - Every write carries originating agent_id + agent_tier + timestamp.
--   - quantizer / drift_metric / k_bits / v_bits constrained via CHECK
--     so storage layer rejects garbled rows before they hit the allocator.
--   - Schema is append-mostly; multiple options per layer all live as
--     separate rows under the same composite primary key.
--
-- Pipeline-version note:
--   pipeline_version is part of the primary key. Any change to the drift
--   metric implementation, hook semantics, or sweep contract bumps the
--   version and renders previous rows lookup-misses (silently ignored
--   rather than reused with stale semantics).
--
-- Migration policy: HUMAN-APPLIED ONLY. Do not auto-apply from agent code.
-- Assumes migration 001 created schema_migrations and migration 002 ran
-- successfully (we don't depend on 002's tables but enforce ordering).
-- =====================================================================

BEGIN;

-- ---------------------------------------------------------------------
-- kv_sensitivity_profile
-- ---------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS kv_sensitivity_profile (
    -- Invalidation key (matches sensitivity_profile pattern from 002)
    model_hash                          TEXT    NOT NULL,
    calibration_hash                    TEXT    NOT NULL,
    pipeline_version                    TEXT    NOT NULL,

    -- Per-layer identity
    layer_idx                           INTEGER NOT NULL,

    -- Probe definition
    k_bits                              INTEGER NOT NULL
        CHECK (k_bits IN (2, 3, 4, 8, 16)),
    v_bits                              INTEGER NOT NULL
        CHECK (v_bits IN (2, 3, 4, 8, 16)),
    quantizer                           TEXT    NOT NULL
        CHECK (quantizer IN ('hqq_g64', 'scaled_uniform', 'scaled_per_head', 'fp16_passthrough')),

    -- Measured drift (attention output, vs full-precision baseline)
    drift_attn_output                   REAL    NOT NULL,
    drift_metric                        TEXT    NOT NULL
        CHECK (drift_metric IN ('mse_normalised', 'kl_softmax')),

    -- Cost accounting — kept here so the allocator never recomputes
    bytes_per_kv_token                  REAL    NOT NULL,
    max_seq_len_observed                INTEGER NOT NULL,

    -- Per-layer architecture facts (for assemblers / cross-checks)
    num_kv_heads                        INTEGER NOT NULL,
    head_dim                            INTEGER NOT NULL,

    -- Provenance (audit chain — matches 002)
    profiled_at                         TEXT    NOT NULL,        -- ISO 8601 UTC
    profiled_by_agent_id                TEXT    NOT NULL,
    profiled_by_agent_tier              INTEGER NOT NULL,
    written_at                          TEXT    NOT NULL
        DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),

    PRIMARY KEY (model_hash, calibration_hash, pipeline_version,
                 layer_idx, k_bits, v_bits, quantizer)
);

-- The allocator's bulk-fetch pattern: "give me every option for every
-- layer for this (model, calibration, pipeline_version)." This index
-- covers that read.
CREATE INDEX IF NOT EXISTS idx_kv_profile_lookup
    ON kv_sensitivity_profile(model_hash, calibration_hash, pipeline_version);

-- For "show me everything we've ever measured for this model" queries.
CREATE INDEX IF NOT EXISTS idx_kv_profile_model
    ON kv_sensitivity_profile(model_hash, written_at DESC);


-- ---------------------------------------------------------------------
-- Record this migration
-- ---------------------------------------------------------------------
INSERT INTO schema_migrations (version, applied_at, description)
VALUES (
    '003',
    strftime('%Y-%m-%dT%H:%M:%fZ', 'now'),
    'Add kv_sensitivity_profile for HSAQ KV-cache profiler'
);

COMMIT;