import DatabaseConstructor, { Database } from 'better-sqlite3'; import path from 'path'; import fs from 'fs'; const dbDir = path.join(process.cwd(), 'data'); if (!fs.existsSync(dbDir)) { fs.mkdirSync(dbDir, { recursive: true }); } const db: Database = new DatabaseConstructor(path.join(dbDir, 'crawler.db')); // Initialize database tables export function initDB() { db.exec(` CREATE TABLE IF NOT EXISTS source_registry ( source_id VARCHAR(64) PRIMARY KEY, source_name VARCHAR(255), source_type VARCHAR(32), domain VARCHAR(255), entry_url TEXT, url_pattern TEXT, parser_type VARCHAR(64), crawl_frequency VARCHAR(32), priority VARCHAR(16), enabled BOOLEAN, topic_tags TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS crawl_job ( job_id VARCHAR(64) PRIMARY KEY, source_id VARCHAR(64), trigger_type VARCHAR(32), status VARCHAR(32), started_at DATETIME, ended_at DATETIME, error_code VARCHAR(32), error_message TEXT, retry_count INTEGER DEFAULT 0, FOREIGN KEY(source_id) REFERENCES source_registry(source_id) ); CREATE TABLE IF NOT EXISTS raw_snapshot ( snapshot_id VARCHAR(64) PRIMARY KEY, source_id VARCHAR(64), job_id VARCHAR(64), fetched_at DATETIME, content_type VARCHAR(64), raw_body TEXT, raw_hash VARCHAR(128), http_status INTEGER, final_url TEXT, FOREIGN KEY(source_id) REFERENCES source_registry(source_id), FOREIGN KEY(job_id) REFERENCES crawl_job(job_id) ); CREATE TABLE IF NOT EXISTS normalized_document ( doc_id VARCHAR(64) PRIMARY KEY, source_id VARCHAR(64), snapshot_id VARCHAR(64), title VARCHAR(500), version_date DATE, effective_date DATE, normalized_text TEXT, normalized_hash VARCHAR(128), doc_status VARCHAR(32), created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(source_id) REFERENCES source_registry(source_id), FOREIGN KEY(snapshot_id) REFERENCES raw_snapshot(snapshot_id) ); CREATE TABLE IF NOT EXISTS clause_chunk ( chunk_id VARCHAR(64) PRIMARY KEY, doc_id VARCHAR(64), section_path VARCHAR(500), section_title VARCHAR(255), clause_text TEXT, topic_tags TEXT, embedding_status VARCHAR(32), chunk_order INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(doc_id) REFERENCES normalized_document(doc_id) ); CREATE TABLE IF NOT EXISTS diff_event ( event_id VARCHAR(64) PRIMARY KEY, source_id VARCHAR(64), from_doc_id VARCHAR(64), to_doc_id VARCHAR(64), change_type VARCHAR(32), section_title VARCHAR(255), old_excerpt TEXT, new_excerpt TEXT, topic_tags TEXT, impact_level VARCHAR(16), detected_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(source_id) REFERENCES source_registry(source_id) ); `); } export default db;