File size: 6,752 Bytes
c25dcd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import 'dart:convert';
import 'package:flutter/services.dart';
import 'package:sqflite/sqflite.dart';

/// Result of a vector-similarity search against the scam_embeddings table.
class VectorMatch {
  final String category;
  final double distance;
  final bool isHighRisk;
  final bool isMediumRisk;

  bool get isScam => isMediumRisk || isHighRisk;
  String get threatLevel => isHighRisk ? 'HIGH' : isMediumRisk ? 'MEDIUM' : 'LOW';

  VectorMatch({
    required this.category,
    required this.distance,
    required this.isHighRisk,
    required this.isMediumRisk,
  });
}

/// Encapsulates sqlite-vec Layer 2 of the Kavacha pipeline.
///
/// Design principle: NEVER throw out. All failures are caught and logged.
/// The pipeline must continue with layers 1, 3, 4 if this layer is unavailable.
class VectorService {
  static final VectorService _instance = VectorService._internal();
  factory VectorService() => _instance;
  VectorService._internal();

  static bool _isReady = false;
  static bool get isReady => _isReady;

  /// The sqlite-vec version string, available after successful [initialize].
  static String vecVersion = 'not loaded';

  /// Initialize the sqlite-vec extension and create the virtual table.
  ///
  /// Must be called once, after the database is opened, before the pipeline runs.
  /// Safe to call multiple times — subsequent calls are no-ops.
  static Future<void> initialize(Database db) async {
    if (_isReady) return;
    try {
      // CRITICAL: enable_load_extension MUST come before load_extension.
      // SQLite on Android disables extension loading by default.
      await db.execute("PRAGMA enable_load_extension = 1");
      await db.execute("SELECT load_extension('libsqlitevec')");
      await db.execute("PRAGMA enable_load_extension = 0");

      // Verify extension loaded correctly.
      final version = await db.rawQuery("SELECT vec_version()");
      vecVersion = version.first.values.first?.toString() ?? 'unknown';
      print('[VectorService] sqlite-vec version: $vecVersion');

      // Create virtual vector table for scam embeddings (128-dimension).
      await db.execute('''
        CREATE VIRTUAL TABLE IF NOT EXISTS scam_embeddings
        USING vec0(
          embedding FLOAT[384],
          +category TEXT,
          +source TEXT,
          +created_at INTEGER
        )
      ''');

      _isReady = true;
      print('[VectorService] Layer 2 ready');
    } catch (e) {
      _isReady = false;
      vecVersion = 'not loaded';
      print('[VectorService] Failed to load: $e');
      print('[VectorService] Layer 2 disabled — pipeline continues without it');
      // Do NOT rethrow — Kavacha pipeline must continue with layers 1, 3, 4.
    }
  }

  /// Store a confirmed scam as an embedding for future matching.
  Future<void> addScamEmbedding({
    required Database db,
    required List<double> embedding,
    required String category,
    required String source,
  }) async {
    if (!_isReady) return;
    try {
      await db.execute(
        'INSERT INTO scam_embeddings(embedding, category, source, created_at) VALUES (?, ?, ?, ?)',
        [
          '[${embedding.join(',')}]',
          category,
          source,
          DateTime.now().millisecondsSinceEpoch,
        ],
      );
    } catch (e) {
      print('[VectorService] addScamEmbedding error: $e');
    }
  }

  /// Find the closest scam pattern in the embedding store.
  ///
  /// Returns [VectorMatch] if a candidate is found, [null] if not or layer is disabled.
  Future<VectorMatch?> findClosest({
    required Database db,
    required List<double> queryEmbedding,
    double highRiskThreshold = 0.15,
    double mediumRiskThreshold = 0.35,
  }) async {
    if (!_isReady) return null;
    try {
      final results = await db.rawQuery('''
        SELECT category, source, distance
        FROM scam_embeddings
        WHERE embedding MATCH ?
        ORDER BY distance
        LIMIT 1
      ''', ['[${queryEmbedding.join(',')}]']);

      if (results.isEmpty) return null;

      final distance = (results.first['distance'] as num).toDouble();
      final category = results.first['category'] as String;

      return VectorMatch(
        category: category,
        distance: distance,
        isHighRisk: distance < highRiskThreshold,
        isMediumRisk: distance < mediumRiskThreshold,
      );
    } catch (e) {
      print('[VectorService] findClosest error: $e');
      return null;
    }
  }

  /// Convenience accessor that uses StorageService's shared DB.
  Future<VectorMatch?> findClosestScam(List<double> queryEmbedding) async {
    if (!_isReady) return null;
    // This path is used by layer2_vector.dart which has its own db reference.
    // The caller passes the embedding; DB must be provided via findClosest().
    // Return null here to signal the caller to use findClosest() directly.
    return null;
  }

  /// Seed the vector store with pre-computed scam embeddings from a JSON asset.
  ///
  /// Call once on first install. Skips silently if embeddings already exist.
  Future<void> seedEmbeddings(Database db, String assetPath) async {
    if (!_isReady) return;
    try {
      final countResult = await db.rawQuery(
        'SELECT COUNT(*) as c FROM scam_embeddings',
      );
      final existing = (countResult.first['c'] as int);
      if (existing > 0) {
        print('[VectorService] Already seeded ($existing embeddings), skipping');
        return;
      }

      print('[VectorService] Seeding embeddings from $assetPath');
      final String jsonString = await rootBundle.loadString(assetPath);
      final List<dynamic> data = jsonDecode(jsonString);

      final batch = db.batch();
      for (final item in data) {
        final List<dynamic> rawEmbedding = item['embedding'] as List<dynamic>;
        final embeddingStr =
            '[${rawEmbedding.map((v) => (v as num).toDouble()).join(',')}]';
        batch.execute(
          'INSERT INTO scam_embeddings(embedding, category, source, created_at) VALUES (?, ?, ?, ?)',
          [
            embeddingStr,
            item['category'] as String,
            item['source'] as String,
            DateTime.now().millisecondsSinceEpoch,
          ],
        );
      }
      await batch.commit(noResult: true);
      print('[VectorService] Seeded ${data.length} embeddings');
    } catch (e) {
      print('[VectorService] Seed error: $e');
    }
  }

  /// Returns the count of embeddings currently in the store.
  Future<int> embeddingCount(Database db) async {
    if (!_isReady) return 0;
    try {
      final result = await db.rawQuery(
        'SELECT COUNT(*) as c FROM scam_embeddings',
      );
      return (result.first['c'] as int);
    } catch (_) {
      return 0;
    }
  }
}