File size: 3,947 Bytes
c906fee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
-- Supabase Database Schema for Security Guard Demo
-- Run this in your Supabase SQL Editor

-- Drop existing tables if they exist (careful in production!)
DROP TABLE IF EXISTS feedback CASCADE;
DROP TABLE IF EXISTS interactions CASCADE;

-- Interactions table: stores each user interaction with the security guard
CREATE TABLE interactions (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    -- User input
    user_message TEXT NOT NULL,

    -- Model and moderation details
    model_used TEXT NOT NULL,
    moderation_id TEXT,  -- Alinia API moderation ID for linking feedback

    -- v1 model results
    v1_score FLOAT NOT NULL,
    v1_flagged BOOLEAN NOT NULL,

    -- v2 model results
    v2_score FLOAT NOT NULL,
    v2_flagged BOOLEAN NOT NULL,

    -- Combined result
    blocked BOOLEAN NOT NULL,

    -- Discrepancy tracking (v1 flags but v2 doesn't)
    v1_v2_discrepancy BOOLEAN NOT NULL DEFAULT FALSE
);

-- Feedback table: stores user feedback on moderation results
CREATE TABLE feedback (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

    -- Link to interaction
    interaction_id UUID REFERENCES interactions(id) ON DELETE CASCADE,
    moderation_id TEXT NOT NULL,  -- Alinia API moderation ID

    -- Feedback details
    config_path TEXT NOT NULL,  -- Always "security.adversarial" for this app
    expected_value BOOLEAN NOT NULL,  -- What user expected (False = should not have flagged)
    comment TEXT  -- Optional user comment
);

-- Create indexes for common queries
CREATE INDEX idx_interactions_created_at ON interactions(created_at DESC);
CREATE INDEX idx_interactions_blocked ON interactions(blocked);
CREATE INDEX idx_interactions_discrepancy ON interactions(v1_v2_discrepancy);
CREATE INDEX idx_interactions_moderation_id ON interactions(moderation_id);
CREATE INDEX idx_feedback_interaction_id ON feedback(interaction_id);
CREATE INDEX idx_feedback_moderation_id ON feedback(moderation_id);
CREATE INDEX idx_feedback_created_at ON feedback(created_at DESC);

-- Enable Row Level Security (RLS)
ALTER TABLE interactions ENABLE ROW LEVEL SECURITY;
ALTER TABLE feedback ENABLE ROW LEVEL SECURITY;

-- Create policies for anonymous access (adjust based on your security needs)
-- For now, allow all operations - you can tighten this later

-- Interactions policies
CREATE POLICY "Allow anonymous read access on interactions"
    ON interactions FOR SELECT
    USING (true);

CREATE POLICY "Allow anonymous insert access on interactions"
    ON interactions FOR INSERT
    WITH CHECK (true);

CREATE POLICY "Allow anonymous delete access on interactions"
    ON interactions FOR DELETE
    USING (true);

-- Feedback policies
CREATE POLICY "Allow anonymous read access on feedback"
    ON feedback FOR SELECT
    USING (true);

CREATE POLICY "Allow anonymous insert access on feedback"
    ON feedback FOR INSERT
    WITH CHECK (true);

CREATE POLICY "Allow anonymous delete access on feedback"
    ON feedback FOR DELETE
    USING (true);

-- Create a view for joined interactions with feedback (makes queries easier)
CREATE OR REPLACE VIEW interactions_with_feedback AS
SELECT
    i.*,
    json_build_object(
        'id', f.id,
        'created_at', f.created_at,
        'expected_value', f.expected_value,
        'comment', f.comment
    ) AS feedback
FROM interactions i
LEFT JOIN feedback f ON i.id = f.interaction_id;

-- Grant access to the view
GRANT SELECT ON interactions_with_feedback TO anon, authenticated;

COMMENT ON TABLE interactions IS 'Stores each interaction with the Security Guard, including v1 and v2 model scores';
COMMENT ON TABLE feedback IS 'Stores user feedback on moderation results for model improvement';
COMMENT ON COLUMN interactions.v1_v2_discrepancy IS 'TRUE when v1 and v2 models disagree on flagging (either v1 flags but v2 does not, or v2 flags but v1 does not)';