agent01 / src /crawler /db.ts
Auto Deployer
Deploy compliance agent services
f39c319
import DatabaseConstructor, { Database } from 'better-sqlite3';
import path from 'path';
import fs from 'fs';
const dbDir = path.join(process.cwd(), 'data');
if (!fs.existsSync(dbDir)) {
fs.mkdirSync(dbDir, { recursive: true });
}
const db: Database = new DatabaseConstructor(path.join(dbDir, 'crawler.db'));
// Initialize database tables
export function initDB() {
db.exec(`
CREATE TABLE IF NOT EXISTS source_registry (
source_id VARCHAR(64) PRIMARY KEY,
source_name VARCHAR(255),
source_type VARCHAR(32),
domain VARCHAR(255),
entry_url TEXT,
url_pattern TEXT,
parser_type VARCHAR(64),
crawl_frequency VARCHAR(32),
priority VARCHAR(16),
enabled BOOLEAN,
topic_tags TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS crawl_job (
job_id VARCHAR(64) PRIMARY KEY,
source_id VARCHAR(64),
trigger_type VARCHAR(32),
status VARCHAR(32),
started_at DATETIME,
ended_at DATETIME,
error_code VARCHAR(32),
error_message TEXT,
retry_count INTEGER DEFAULT 0,
FOREIGN KEY(source_id) REFERENCES source_registry(source_id)
);
CREATE TABLE IF NOT EXISTS raw_snapshot (
snapshot_id VARCHAR(64) PRIMARY KEY,
source_id VARCHAR(64),
job_id VARCHAR(64),
fetched_at DATETIME,
content_type VARCHAR(64),
raw_body TEXT,
raw_hash VARCHAR(128),
http_status INTEGER,
final_url TEXT,
FOREIGN KEY(source_id) REFERENCES source_registry(source_id),
FOREIGN KEY(job_id) REFERENCES crawl_job(job_id)
);
CREATE TABLE IF NOT EXISTS normalized_document (
doc_id VARCHAR(64) PRIMARY KEY,
source_id VARCHAR(64),
snapshot_id VARCHAR(64),
title VARCHAR(500),
version_date DATE,
effective_date DATE,
normalized_text TEXT,
normalized_hash VARCHAR(128),
doc_status VARCHAR(32),
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(source_id) REFERENCES source_registry(source_id),
FOREIGN KEY(snapshot_id) REFERENCES raw_snapshot(snapshot_id)
);
CREATE TABLE IF NOT EXISTS clause_chunk (
chunk_id VARCHAR(64) PRIMARY KEY,
doc_id VARCHAR(64),
section_path VARCHAR(500),
section_title VARCHAR(255),
clause_text TEXT,
topic_tags TEXT,
embedding_status VARCHAR(32),
chunk_order INTEGER,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(doc_id) REFERENCES normalized_document(doc_id)
);
CREATE TABLE IF NOT EXISTS diff_event (
event_id VARCHAR(64) PRIMARY KEY,
source_id VARCHAR(64),
from_doc_id VARCHAR(64),
to_doc_id VARCHAR(64),
change_type VARCHAR(32),
section_title VARCHAR(255),
old_excerpt TEXT,
new_excerpt TEXT,
topic_tags TEXT,
impact_level VARCHAR(16),
detected_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY(source_id) REFERENCES source_registry(source_id)
);
`);
}
export default db;