prem / lib /services /vector_service.dart
Nitishkumar-ai's picture
Deploy source code to Hugging Face without binaries
c25dcd7
import 'dart:convert';
import 'package:flutter/services.dart';
import 'package:sqflite/sqflite.dart';
/// Result of a vector-similarity search against the scam_embeddings table.
class VectorMatch {
final String category;
final double distance;
final bool isHighRisk;
final bool isMediumRisk;
bool get isScam => isMediumRisk || isHighRisk;
String get threatLevel => isHighRisk ? 'HIGH' : isMediumRisk ? 'MEDIUM' : 'LOW';
VectorMatch({
required this.category,
required this.distance,
required this.isHighRisk,
required this.isMediumRisk,
});
}
/// Encapsulates sqlite-vec Layer 2 of the Kavacha pipeline.
///
/// Design principle: NEVER throw out. All failures are caught and logged.
/// The pipeline must continue with layers 1, 3, 4 if this layer is unavailable.
class VectorService {
static final VectorService _instance = VectorService._internal();
factory VectorService() => _instance;
VectorService._internal();
static bool _isReady = false;
static bool get isReady => _isReady;
/// The sqlite-vec version string, available after successful [initialize].
static String vecVersion = 'not loaded';
/// Initialize the sqlite-vec extension and create the virtual table.
///
/// Must be called once, after the database is opened, before the pipeline runs.
/// Safe to call multiple times — subsequent calls are no-ops.
static Future<void> initialize(Database db) async {
if (_isReady) return;
try {
// CRITICAL: enable_load_extension MUST come before load_extension.
// SQLite on Android disables extension loading by default.
await db.execute("PRAGMA enable_load_extension = 1");
await db.execute("SELECT load_extension('libsqlitevec')");
await db.execute("PRAGMA enable_load_extension = 0");
// Verify extension loaded correctly.
final version = await db.rawQuery("SELECT vec_version()");
vecVersion = version.first.values.first?.toString() ?? 'unknown';
print('[VectorService] sqlite-vec version: $vecVersion');
// Create virtual vector table for scam embeddings (128-dimension).
await db.execute('''
CREATE VIRTUAL TABLE IF NOT EXISTS scam_embeddings
USING vec0(
embedding FLOAT[384],
+category TEXT,
+source TEXT,
+created_at INTEGER
)
''');
_isReady = true;
print('[VectorService] Layer 2 ready');
} catch (e) {
_isReady = false;
vecVersion = 'not loaded';
print('[VectorService] Failed to load: $e');
print('[VectorService] Layer 2 disabled — pipeline continues without it');
// Do NOT rethrow — Kavacha pipeline must continue with layers 1, 3, 4.
}
}
/// Store a confirmed scam as an embedding for future matching.
Future<void> addScamEmbedding({
required Database db,
required List<double> embedding,
required String category,
required String source,
}) async {
if (!_isReady) return;
try {
await db.execute(
'INSERT INTO scam_embeddings(embedding, category, source, created_at) VALUES (?, ?, ?, ?)',
[
'[${embedding.join(',')}]',
category,
source,
DateTime.now().millisecondsSinceEpoch,
],
);
} catch (e) {
print('[VectorService] addScamEmbedding error: $e');
}
}
/// Find the closest scam pattern in the embedding store.
///
/// Returns [VectorMatch] if a candidate is found, [null] if not or layer is disabled.
Future<VectorMatch?> findClosest({
required Database db,
required List<double> queryEmbedding,
double highRiskThreshold = 0.15,
double mediumRiskThreshold = 0.35,
}) async {
if (!_isReady) return null;
try {
final results = await db.rawQuery('''
SELECT category, source, distance
FROM scam_embeddings
WHERE embedding MATCH ?
ORDER BY distance
LIMIT 1
''', ['[${queryEmbedding.join(',')}]']);
if (results.isEmpty) return null;
final distance = (results.first['distance'] as num).toDouble();
final category = results.first['category'] as String;
return VectorMatch(
category: category,
distance: distance,
isHighRisk: distance < highRiskThreshold,
isMediumRisk: distance < mediumRiskThreshold,
);
} catch (e) {
print('[VectorService] findClosest error: $e');
return null;
}
}
/// Convenience accessor that uses StorageService's shared DB.
Future<VectorMatch?> findClosestScam(List<double> queryEmbedding) async {
if (!_isReady) return null;
// This path is used by layer2_vector.dart which has its own db reference.
// The caller passes the embedding; DB must be provided via findClosest().
// Return null here to signal the caller to use findClosest() directly.
return null;
}
/// Seed the vector store with pre-computed scam embeddings from a JSON asset.
///
/// Call once on first install. Skips silently if embeddings already exist.
Future<void> seedEmbeddings(Database db, String assetPath) async {
if (!_isReady) return;
try {
final countResult = await db.rawQuery(
'SELECT COUNT(*) as c FROM scam_embeddings',
);
final existing = (countResult.first['c'] as int);
if (existing > 0) {
print('[VectorService] Already seeded ($existing embeddings), skipping');
return;
}
print('[VectorService] Seeding embeddings from $assetPath');
final String jsonString = await rootBundle.loadString(assetPath);
final List<dynamic> data = jsonDecode(jsonString);
final batch = db.batch();
for (final item in data) {
final List<dynamic> rawEmbedding = item['embedding'] as List<dynamic>;
final embeddingStr =
'[${rawEmbedding.map((v) => (v as num).toDouble()).join(',')}]';
batch.execute(
'INSERT INTO scam_embeddings(embedding, category, source, created_at) VALUES (?, ?, ?, ?)',
[
embeddingStr,
item['category'] as String,
item['source'] as String,
DateTime.now().millisecondsSinceEpoch,
],
);
}
await batch.commit(noResult: true);
print('[VectorService] Seeded ${data.length} embeddings');
} catch (e) {
print('[VectorService] Seed error: $e');
}
}
/// Returns the count of embeddings currently in the store.
Future<int> embeddingCount(Database db) async {
if (!_isReady) return 0;
try {
final result = await db.rawQuery(
'SELECT COUNT(*) as c FROM scam_embeddings',
);
return (result.first['c'] as int);
} catch (_) {
return 0;
}
}
}