agent01 / src /crawler /scheduler.ts
Auto Deployer
Deploy compliance agent services
f39c319
import cron from 'node-cron';
import db from './db';
import { v4 as uuidv4 } from 'uuid';
export function initScheduler() {
console.log('Initializing crawler scheduler...');
// 1. Daily crawler for peer_bank and sdk_vendor
cron.schedule('0 2 * * *', () => {
console.log('Running daily crawler job (0 2 * * *)...');
triggerJobs('daily');
});
// 2. 4h crawler for regulator
cron.schedule('0 */4 * * *', () => {
console.log('Running 4h crawler job (0 */4 * * *)...');
triggerJobs('4h');
});
// 3. Retry logic (e.g. run every 10 minutes to find failed jobs to retry)
cron.schedule('*/10 * * * *', () => {
console.log('Running retry checker job...');
handleRetries();
});
}
function triggerJobs(frequency: string) {
try {
const sources = db.prepare('SELECT source_id FROM source_registry WHERE enabled = 1 AND crawl_frequency = ?').all(frequency) as { source_id: string }[];
if (sources.length === 0) return;
const now = new Date().toISOString();
const stmt = db.prepare(`
INSERT INTO crawl_job (
job_id, source_id, trigger_type, status, started_at
) VALUES (
@job_id, @source_id, @trigger_type, @status, @started_at
)
`);
const insertMany = db.transaction((srcs: { source_id: string }[]) => {
for (const src of srcs) {
const job_id = `job_${uuidv4().replace(/-/g, '').substring(0, 16)}`;
stmt.run({
job_id,
source_id: src.source_id,
trigger_type: 'schedule',
status: 'queued',
started_at: now
});
// Here we would normally run the crawler job asynchronously
console.log(`Job ${job_id} queued for source ${src.source_id}`);
}
});
insertMany(sources);
console.log(`Triggered ${sources.length} jobs for frequency ${frequency}`);
} catch (error) {
console.error('Failed to trigger jobs:', error);
}
}
function handleRetries() {
// Logic placeholders:
// 1. Find jobs that failed and haven't reached max retries
// 2. If retry_count < 1 (for 10 min retry), increment retry_count and requeue
// 3. If consecutive failures > 3, mark source degraded
// 4. If consecutive failures > 7 days, put into manual check queue
console.log('Retry checker placeholder executed');
}