| import cron from 'node-cron'; |
| import db from './db'; |
| import { v4 as uuidv4 } from 'uuid'; |
|
|
| export function initScheduler() { |
| console.log('Initializing crawler scheduler...'); |
|
|
| |
| cron.schedule('0 2 * * *', () => { |
| console.log('Running daily crawler job (0 2 * * *)...'); |
| triggerJobs('daily'); |
| }); |
|
|
| |
| cron.schedule('0 */4 * * *', () => { |
| console.log('Running 4h crawler job (0 */4 * * *)...'); |
| triggerJobs('4h'); |
| }); |
|
|
| |
| cron.schedule('*/10 * * * *', () => { |
| console.log('Running retry checker job...'); |
| handleRetries(); |
| }); |
| } |
|
|
| function triggerJobs(frequency: string) { |
| try { |
| const sources = db.prepare('SELECT source_id FROM source_registry WHERE enabled = 1 AND crawl_frequency = ?').all(frequency) as { source_id: string }[]; |
| |
| if (sources.length === 0) return; |
|
|
| const now = new Date().toISOString(); |
| const stmt = db.prepare(` |
| INSERT INTO crawl_job ( |
| job_id, source_id, trigger_type, status, started_at |
| ) VALUES ( |
| @job_id, @source_id, @trigger_type, @status, @started_at |
| ) |
| `); |
|
|
| const insertMany = db.transaction((srcs: { source_id: string }[]) => { |
| for (const src of srcs) { |
| const job_id = `job_${uuidv4().replace(/-/g, '').substring(0, 16)}`; |
| stmt.run({ |
| job_id, |
| source_id: src.source_id, |
| trigger_type: 'schedule', |
| status: 'queued', |
| started_at: now |
| }); |
| |
| |
| console.log(`Job ${job_id} queued for source ${src.source_id}`); |
| } |
| }); |
|
|
| insertMany(sources); |
| console.log(`Triggered ${sources.length} jobs for frequency ${frequency}`); |
| } catch (error) { |
| console.error('Failed to trigger jobs:', error); |
| } |
| } |
|
|
| function handleRetries() { |
| |
| |
| |
| |
| |
| console.log('Retry checker placeholder executed'); |
| } |
|
|