File size: 17,855 Bytes
da2e594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
#!/usr/bin/env node
import { createDatabaseAdapter } from '../database/database-adapter';
import { TemplateService } from '../templates/template-service';
import * as fs from 'fs';
import * as path from 'path';
import * as zlib from 'zlib';
import * as dotenv from 'dotenv';
import type { MetadataRequest } from '../templates/metadata-generator';

// Load environment variables
dotenv.config();

/**
 * Extract node configurations from a template workflow
 */
function extractNodeConfigs(
  templateId: number,
  templateName: string,
  templateViews: number,
  workflowCompressed: string,
  metadata: any
): Array<{
  node_type: string;
  template_id: number;
  template_name: string;
  template_views: number;
  node_name: string;
  parameters_json: string;
  credentials_json: string | null;
  has_credentials: number;
  has_expressions: number;
  complexity: string;
  use_cases: string;
}> {
  try {
    // Decompress workflow
    const decompressed = zlib.gunzipSync(Buffer.from(workflowCompressed, 'base64'));
    const workflow = JSON.parse(decompressed.toString('utf-8'));

    const configs: any[] = [];

    for (const node of workflow.nodes || []) {
      // Skip UI-only nodes (sticky notes, etc.)
      if (node.type.includes('stickyNote') || !node.parameters) {
        continue;
      }

      configs.push({
        node_type: node.type,
        template_id: templateId,
        template_name: templateName,
        template_views: templateViews,
        node_name: node.name,
        parameters_json: JSON.stringify(node.parameters),
        credentials_json: node.credentials ? JSON.stringify(node.credentials) : null,
        has_credentials: node.credentials ? 1 : 0,
        has_expressions: detectExpressions(node.parameters) ? 1 : 0,
        complexity: metadata?.complexity || 'medium',
        use_cases: JSON.stringify(metadata?.use_cases || [])
      });
    }

    return configs;
  } catch (error) {
    console.error(`Error extracting configs from template ${templateId}:`, error);
    return [];
  }
}

/**
 * Detect n8n expressions in parameters
 */
function detectExpressions(params: any): boolean {
  if (!params) return false;
  const json = JSON.stringify(params);
  return json.includes('={{') || json.includes('$json') || json.includes('$node');
}

/**
 * Insert extracted configs into database and rank them
 */
function insertAndRankConfigs(db: any, configs: any[]) {
  if (configs.length === 0) {
    console.log('No configs to insert');
    return;
  }

  // Clear old configs for these templates
  const templateIds = [...new Set(configs.map(c => c.template_id))];
  const placeholders = templateIds.map(() => '?').join(',');
  db.prepare(`DELETE FROM template_node_configs WHERE template_id IN (${placeholders})`).run(...templateIds);

  // Insert new configs
  const insertStmt = db.prepare(`
    INSERT INTO template_node_configs (
      node_type, template_id, template_name, template_views,
      node_name, parameters_json, credentials_json,
      has_credentials, has_expressions, complexity, use_cases
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
  `);

  for (const config of configs) {
    insertStmt.run(
      config.node_type,
      config.template_id,
      config.template_name,
      config.template_views,
      config.node_name,
      config.parameters_json,
      config.credentials_json,
      config.has_credentials,
      config.has_expressions,
      config.complexity,
      config.use_cases
    );
  }

  // Rank configs per node_type by template popularity
  db.exec(`
    UPDATE template_node_configs
    SET rank = (
      SELECT COUNT(*) + 1
      FROM template_node_configs AS t2
      WHERE t2.node_type = template_node_configs.node_type
        AND t2.template_views > template_node_configs.template_views
    )
  `);

  // Keep only top 10 per node_type
  db.exec(`
    DELETE FROM template_node_configs
    WHERE id NOT IN (
      SELECT id FROM template_node_configs
      WHERE rank <= 10
      ORDER BY node_type, rank
    )
  `);

  console.log(`βœ… Extracted and ranked ${configs.length} node configurations`);
}

/**
 * Extract node configurations from existing templates
 */
async function extractTemplateConfigs(db: any, service: TemplateService) {
  console.log('πŸ“¦ Extracting node configurations from templates...');
  const repository = (service as any).repository;
  const allTemplates = repository.getAllTemplates();

  const allConfigs: any[] = [];
  let configsExtracted = 0;

  for (const template of allTemplates) {
    if (template.workflow_json_compressed) {
      const metadata = template.metadata_json ? JSON.parse(template.metadata_json) : null;
      const configs = extractNodeConfigs(
        template.id,
        template.name,
        template.views,
        template.workflow_json_compressed,
        metadata
      );
      allConfigs.push(...configs);
      configsExtracted += configs.length;
    }
  }

  if (allConfigs.length > 0) {
    insertAndRankConfigs(db, allConfigs);

    // Show stats
    const configStats = db.prepare(`
      SELECT
        COUNT(DISTINCT node_type) as node_types,
        COUNT(*) as total_configs,
        AVG(rank) as avg_rank
      FROM template_node_configs
    `).get() as any;

    console.log(`πŸ“Š Node config stats:`);
    console.log(`   - Unique node types: ${configStats.node_types}`);
    console.log(`   - Total configs stored: ${configStats.total_configs}`);
    console.log(`   - Average rank: ${configStats.avg_rank?.toFixed(1) || 'N/A'}`);
  } else {
    console.log('⚠️  No node configurations extracted');
  }
}

async function fetchTemplates(
  mode: 'rebuild' | 'update' = 'rebuild',
  generateMetadata: boolean = false,
  metadataOnly: boolean = false,
  extractOnly: boolean = false
) {
  // If extract-only mode, skip template fetching and only extract configs
  if (extractOnly) {
    console.log('πŸ“¦ Extract-only mode: Extracting node configurations from existing templates...\n');

    const db = await createDatabaseAdapter('./data/nodes.db');

    // Ensure template_node_configs table exists
    try {
      const tableExists = db.prepare(`
        SELECT name FROM sqlite_master
        WHERE type='table' AND name='template_node_configs'
      `).get();

      if (!tableExists) {
        console.log('πŸ“‹ Creating template_node_configs table...');
        const migrationPath = path.join(__dirname, '../../src/database/migrations/add-template-node-configs.sql');
        const migration = fs.readFileSync(migrationPath, 'utf8');
        db.exec(migration);
        console.log('βœ… Table created successfully\n');
      }
    } catch (error) {
      console.error('❌ Error checking/creating template_node_configs table:', error);
      if ('close' in db && typeof db.close === 'function') {
        db.close();
      }
      process.exit(1);
    }

    const service = new TemplateService(db);

    await extractTemplateConfigs(db, service);

    if ('close' in db && typeof db.close === 'function') {
      db.close();
    }
    return;
  }

  // If metadata-only mode, skip template fetching entirely
  if (metadataOnly) {
    console.log('πŸ€– Metadata-only mode: Generating metadata for existing templates...\n');

    if (!process.env.OPENAI_API_KEY) {
      console.error('❌ OPENAI_API_KEY not set in environment');
      process.exit(1);
    }

    const db = await createDatabaseAdapter('./data/nodes.db');
    const service = new TemplateService(db);

    await generateTemplateMetadata(db, service);

    if ('close' in db && typeof db.close === 'function') {
      db.close();
    }
    return;
  }
  
  const modeEmoji = mode === 'rebuild' ? 'πŸ”„' : '⬆️';
  const modeText = mode === 'rebuild' ? 'Rebuilding' : 'Updating';
  console.log(`${modeEmoji} ${modeText} n8n workflow templates...\n`);
  
  if (generateMetadata) {
    console.log('πŸ€– Metadata generation enabled (using OpenAI)\n');
  }
  
  // Ensure data directory exists
  const dataDir = './data';
  if (!fs.existsSync(dataDir)) {
    fs.mkdirSync(dataDir, { recursive: true });
  }
  
  // Initialize database
  const db = await createDatabaseAdapter('./data/nodes.db');
  
  // Handle database schema based on mode
  if (mode === 'rebuild') {
    try {
      // Drop existing tables in rebuild mode
      db.exec('DROP TABLE IF EXISTS templates');
      db.exec('DROP TABLE IF EXISTS templates_fts');
      console.log('πŸ—‘οΈ  Dropped existing templates tables (rebuild mode)\n');
      
      // Apply fresh schema
      const schema = fs.readFileSync(path.join(__dirname, '../../src/database/schema.sql'), 'utf8');
      db.exec(schema);
      console.log('πŸ“‹ Applied database schema\n');
    } catch (error) {
      console.error('❌ Error setting up database schema:', error);
      throw error;
    }
  } else {
    console.log('πŸ“Š Update mode: Keeping existing templates and schema\n');
    
    // In update mode, only ensure new columns exist (for migration)
    try {
      // Check if metadata columns exist, add them if not (migration support)
      const columns = db.prepare("PRAGMA table_info(templates)").all() as any[];
      const hasMetadataColumn = columns.some((col: any) => col.name === 'metadata_json');
      
      if (!hasMetadataColumn) {
        console.log('πŸ“‹ Adding metadata columns to existing schema...');
        db.exec(`
          ALTER TABLE templates ADD COLUMN metadata_json TEXT;
          ALTER TABLE templates ADD COLUMN metadata_generated_at DATETIME;
        `);
        console.log('βœ… Metadata columns added\n');
      }
    } catch (error) {
      // Columns might already exist, that's fine
      console.log('πŸ“‹ Schema is up to date\n');
    }
  }
  
  // FTS5 initialization is handled by TemplateRepository
  // No need to duplicate the logic here
  
  // Create service
  const service = new TemplateService(db);
  
  // Progress tracking
  let lastMessage = '';
  const startTime = Date.now();
  
  try {
    await service.fetchAndUpdateTemplates((message, current, total) => {
      // Clear previous line
      if (lastMessage) {
        process.stdout.write('\r' + ' '.repeat(lastMessage.length) + '\r');
      }
      
      const progress = total > 0 ? Math.round((current / total) * 100) : 0;
      lastMessage = `πŸ“Š ${message}: ${current}/${total} (${progress}%)`;
      process.stdout.write(lastMessage);
    }, mode);  // Pass the mode parameter!
    
    console.log('\n'); // New line after progress
    
    // Get stats
    const stats = await service.getTemplateStats();
    const elapsed = Math.round((Date.now() - startTime) / 1000);
    
    console.log('βœ… Template fetch complete!\n');
    console.log('πŸ“ˆ Statistics:');
    console.log(`   - Total templates: ${stats.totalTemplates}`);
    console.log(`   - Average views: ${stats.averageViews}`);
    console.log(`   - Time elapsed: ${elapsed} seconds`);
    console.log('\nπŸ” Top used nodes:');
    
    stats.topUsedNodes.forEach((node: any, index: number) => {
      console.log(`   ${index + 1}. ${node.node} (${node.count} templates)`);
    });

    // Extract node configurations from templates
    console.log('');
    await extractTemplateConfigs(db, service);

    // Generate metadata if requested
    if (generateMetadata && process.env.OPENAI_API_KEY) {
      console.log('\nπŸ€– Generating metadata for templates...');
      await generateTemplateMetadata(db, service);
    } else if (generateMetadata && !process.env.OPENAI_API_KEY) {
      console.log('\n⚠️  Metadata generation requested but OPENAI_API_KEY not set');
    }

  } catch (error) {
    console.error('\n❌ Error fetching templates:', error);
    process.exit(1);
  }
  
  // Close database
  if ('close' in db && typeof db.close === 'function') {
    db.close();
  }
}

// Generate metadata for templates using OpenAI
async function generateTemplateMetadata(db: any, service: TemplateService) {
  try {
    const { BatchProcessor } = await import('../templates/batch-processor');
    const repository = (service as any).repository;
    
    // Get templates without metadata (0 = no limit)
    const limit = parseInt(process.env.METADATA_LIMIT || '0');
    const templatesWithoutMetadata = limit > 0 
      ? repository.getTemplatesWithoutMetadata(limit)
      : repository.getTemplatesWithoutMetadata(999999); // Get all
    
    if (templatesWithoutMetadata.length === 0) {
      console.log('βœ… All templates already have metadata');
      return;
    }
    
    console.log(`Found ${templatesWithoutMetadata.length} templates without metadata`);
    
    // Create batch processor
    const batchSize = parseInt(process.env.OPENAI_BATCH_SIZE || '50');
    console.log(`Processing in batches of ${batchSize} templates each`);
    
    // Warn if batch size is very large
    if (batchSize > 100) {
      console.log(`⚠️  Large batch size (${batchSize}) may take longer to process`);
      console.log(`   Consider using OPENAI_BATCH_SIZE=50 for faster results`);
    }
    
    const processor = new BatchProcessor({
      apiKey: process.env.OPENAI_API_KEY!,
      model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
      batchSize: batchSize,
      outputDir: './temp/batch'
    });
    
    // Prepare metadata requests
    const requests: MetadataRequest[] = templatesWithoutMetadata.map((t: any) => {
      let workflow = undefined;
      try {
        if (t.workflow_json_compressed) {
          const decompressed = zlib.gunzipSync(Buffer.from(t.workflow_json_compressed, 'base64'));
          workflow = JSON.parse(decompressed.toString());
        } else if (t.workflow_json) {
          workflow = JSON.parse(t.workflow_json);
        }
      } catch (error) {
        console.warn(`Failed to parse workflow for template ${t.id}:`, error);
      }

      // Parse nodes_used safely
      let nodes: string[] = [];
      try {
        if (t.nodes_used) {
          nodes = JSON.parse(t.nodes_used);
          // Ensure it's an array
          if (!Array.isArray(nodes)) {
            console.warn(`Template ${t.id} has invalid nodes_used (not an array), using empty array`);
            nodes = [];
          }
        }
      } catch (error) {
        console.warn(`Failed to parse nodes_used for template ${t.id}:`, error);
        nodes = [];
      }

      return {
        templateId: t.id,
        name: t.name,
        description: t.description,
        nodes: nodes,
        workflow
      };
    });
    
    // Process in batches
    const results = await processor.processTemplates(requests, (message, current, total) => {
      process.stdout.write(`\rπŸ“Š ${message}: ${current}/${total}`);
    });
    
    console.log('\n');
    
    // Update database with metadata
    const metadataMap = new Map();
    for (const [templateId, result] of results) {
      if (!result.error) {
        metadataMap.set(templateId, result.metadata);
      }
    }
    
    if (metadataMap.size > 0) {
      repository.batchUpdateMetadata(metadataMap);
      console.log(`βœ… Updated metadata for ${metadataMap.size} templates`);
    }
    
    // Show stats
    const stats = repository.getMetadataStats();
    console.log('\nπŸ“ˆ Metadata Statistics:');
    console.log(`   - Total templates: ${stats.total}`);
    console.log(`   - With metadata: ${stats.withMetadata}`);
    console.log(`   - Without metadata: ${stats.withoutMetadata}`);
    console.log(`   - Outdated (>30 days): ${stats.outdated}`);
  } catch (error) {
    console.error('\n❌ Error generating metadata:', error);
  }
}

// Parse command line arguments
function parseArgs(): { mode: 'rebuild' | 'update', generateMetadata: boolean, metadataOnly: boolean, extractOnly: boolean } {
  const args = process.argv.slice(2);

  let mode: 'rebuild' | 'update' = 'rebuild';
  let generateMetadata = false;
  let metadataOnly = false;
  let extractOnly = false;

  // Check for --mode flag
  const modeIndex = args.findIndex(arg => arg.startsWith('--mode'));
  if (modeIndex !== -1) {
    const modeArg = args[modeIndex];
    const modeValue = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1];

    if (modeValue === 'update') {
      mode = 'update';
    }
  }

  // Check for --update flag as shorthand
  if (args.includes('--update')) {
    mode = 'update';
  }

  // Check for --generate-metadata flag
  if (args.includes('--generate-metadata') || args.includes('--metadata')) {
    generateMetadata = true;
  }

  // Check for --metadata-only flag
  if (args.includes('--metadata-only')) {
    metadataOnly = true;
  }

  // Check for --extract-only flag
  if (args.includes('--extract-only') || args.includes('--extract')) {
    extractOnly = true;
  }

  // Show help if requested
  if (args.includes('--help') || args.includes('-h')) {
    console.log('Usage: npm run fetch:templates [options]\n');
    console.log('Options:');
    console.log('  --mode=rebuild|update  Rebuild from scratch or update existing (default: rebuild)');
    console.log('  --update               Shorthand for --mode=update');
    console.log('  --generate-metadata    Generate AI metadata after fetching templates');
    console.log('  --metadata             Shorthand for --generate-metadata');
    console.log('  --metadata-only        Only generate metadata, skip template fetching');
    console.log('  --extract-only         Only extract node configs, skip template fetching');
    console.log('  --extract              Shorthand for --extract-only');
    console.log('  --help, -h             Show this help message');
    process.exit(0);
  }

  return { mode, generateMetadata, metadataOnly, extractOnly };
}

// Run if called directly
if (require.main === module) {
  const { mode, generateMetadata, metadataOnly, extractOnly } = parseArgs();
  fetchTemplates(mode, generateMetadata, metadataOnly, extractOnly).catch(console.error);
}

export { fetchTemplates };