File size: 2,320 Bytes
f39c319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import cron from 'node-cron';
import db from './db';
import { v4 as uuidv4 } from 'uuid';

export function initScheduler() {
  console.log('Initializing crawler scheduler...');

  // 1. Daily crawler for peer_bank and sdk_vendor
  cron.schedule('0 2 * * *', () => {
    console.log('Running daily crawler job (0 2 * * *)...');
    triggerJobs('daily');
  });

  // 2. 4h crawler for regulator
  cron.schedule('0 */4 * * *', () => {
    console.log('Running 4h crawler job (0 */4 * * *)...');
    triggerJobs('4h');
  });

  // 3. Retry logic (e.g. run every 10 minutes to find failed jobs to retry)
  cron.schedule('*/10 * * * *', () => {
    console.log('Running retry checker job...');
    handleRetries();
  });
}

function triggerJobs(frequency: string) {
  try {
    const sources = db.prepare('SELECT source_id FROM source_registry WHERE enabled = 1 AND crawl_frequency = ?').all(frequency) as { source_id: string }[];
    
    if (sources.length === 0) return;

    const now = new Date().toISOString();
    const stmt = db.prepare(`
      INSERT INTO crawl_job (
        job_id, source_id, trigger_type, status, started_at
      ) VALUES (
        @job_id, @source_id, @trigger_type, @status, @started_at
      )
    `);

    const insertMany = db.transaction((srcs: { source_id: string }[]) => {
      for (const src of srcs) {
        const job_id = `job_${uuidv4().replace(/-/g, '').substring(0, 16)}`;
        stmt.run({
          job_id,
          source_id: src.source_id,
          trigger_type: 'schedule',
          status: 'queued',
          started_at: now
        });
        
        // Here we would normally run the crawler job asynchronously
        console.log(`Job ${job_id} queued for source ${src.source_id}`);
      }
    });

    insertMany(sources);
    console.log(`Triggered ${sources.length} jobs for frequency ${frequency}`);
  } catch (error) {
    console.error('Failed to trigger jobs:', error);
  }
}

function handleRetries() {
  // Logic placeholders:
  // 1. Find jobs that failed and haven't reached max retries
  // 2. If retry_count < 1 (for 10 min retry), increment retry_count and requeue
  // 3. If consecutive failures > 3, mark source degraded
  // 4. If consecutive failures > 7 days, put into manual check queue
  console.log('Retry checker placeholder executed');
}