Spaces:

Kraft102
/

widgetdc-cortex

Paused

File size: 22,112 Bytes

529090e

/**
 * 🧬 DNA SPLICER - Intelligence Pattern Extractor
 *
 * Analyzes leaked System Prompts and Reference Code to extract
 * intelligence patterns for WidgeTDC's Knowledge Graph.
 *
 * Node Types Created:
 * - (:Persona) - AI personalities (Claude, GPT variants, custom GPTs)
 * - (:Directive) - Rules/instructions from system prompts
 * - (:Capability) - Identified capabilities (CodeGeneration, WebBrowsing, etc.)
 * - (:Hotkey) - Interaction patterns
 * - (:Pattern) - Behavioral patterns extracted
 */

import fs from 'fs/promises';
import path from 'path';
import crypto from 'crypto';
import neo4j, { Driver, Session } from 'neo4j-driver';

// ============================================
// TYPES
// ============================================
export interface PersonaNode {
  name: string;
  source: string;        // filename
  sourceUrl?: string;    // original URL if available
  author?: string;
  description?: string;
  hash: string;
}

export interface DirectiveNode {
  id: string;
  content: string;
  type: 'rule' | 'behavior' | 'restriction' | 'capability' | 'tone' | 'format';
  priority: number;      // 1-10, how important
}

export interface CapabilityNode {
  name: string;
  description: string;
  codeSnippet?: string;
  sourceFile?: string;
}

export interface HotkeyPattern {
  key: string;
  action: string;
  description: string;
}

export interface SpliceResult {
  personas: number;
  directives: number;
  capabilities: number;
  hotkeys: number;
  duration: number;
  errors: string[];
}

// ============================================
// DIRECTIVE PATTERNS (Regex)
// ============================================
const DIRECTIVE_PATTERNS = {
  rule: /(?:you (?:must|should|always|never)|rule:|important:|note:)/i,
  behavior: /(?:when|if|respond|reply|act|behave|personality|tone)/i,
  restriction: /(?:never|don't|do not|cannot|must not|forbidden|under no circumstances)/i,
  capability: /(?:can|able to|capable|support|feature|tool|function)/i,
  tone: /(?:tone|voice|style|manner|personality|character|persona)/i,
  format: /(?:format|structure|output|respond with|use|markdown|code block)/i,
};

// ============================================
// CAPABILITY KEYWORDS
// ============================================
const CAPABILITY_KEYWORDS = [
  { keyword: 'code', capability: 'CodeGeneration' },
  { keyword: 'write code', capability: 'CodeGeneration' },
  { keyword: 'programming', capability: 'CodeGeneration' },
  { keyword: 'file', capability: 'FileOperations' },
  { keyword: 'read file', capability: 'FileReading' },
  { keyword: 'write file', capability: 'FileWriting' },
  { keyword: 'browse', capability: 'WebBrowsing' },
  { keyword: 'search', capability: 'WebSearch' },
  { keyword: 'image', capability: 'ImageGeneration' },
  { keyword: 'dalle', capability: 'ImageGeneration' },
  { keyword: 'python', capability: 'PythonExecution' },
  { keyword: 'jupyter', capability: 'PythonExecution' },
  { keyword: 'analyze', capability: 'DataAnalysis' },
  { keyword: 'math', capability: 'MathematicalReasoning' },
  { keyword: 'reason', capability: 'LogicalReasoning' },
  { keyword: 'step-by-step', capability: 'ChainOfThought' },
  { keyword: 'task', capability: 'TaskManagement' },
  { keyword: 'memory', capability: 'ContextMemory' },
  { keyword: 'remember', capability: 'ContextMemory' },
  { keyword: 'tool', capability: 'ToolUse' },
  { keyword: 'function', capability: 'FunctionCalling' },
  { keyword: 'api', capability: 'APIIntegration' },
  { keyword: 'autonomous', capability: 'AutonomousOperation' },
  { keyword: 'self-heal', capability: 'SelfHealing' },
  { keyword: 'debug', capability: 'Debugging' },
];

// ============================================
// DNA SPLICER CLASS
// ============================================
export class DNASplicer {
  private driver: Driver;
  private intelPath: string;

  // Premium prompt directories (real model system prompts)
  private static PREMIUM_PATHS = [
    'system_prompts_leaks-main/system_prompts_leaks-main/Anthropic',
    'system_prompts_leaks-main/system_prompts_leaks-main/OpenAI',
    'system_prompts_leaks-main/system_prompts_leaks-main/Google',
    'system_prompts_leaks-main/system_prompts_leaks-main/xAI',
    'system_prompts_leaks-main/system_prompts_leaks-main/Perplexity',
    'system_prompts_leaks-main/system_prompts_leaks-main/Misc',
  ];

  constructor(intelPath: string = '/intel') {
    const uri = process.env.NEO4J_URI || 'bolt://neo4j:7687';
    const user = process.env.NEO4J_USER || 'neo4j';
    const password = process.env.NEO4J_PASSWORD || 'password';

    this.driver = neo4j.driver(uri, neo4j.auth.basic(user, password));
    this.intelPath = intelPath;

    console.log('🧬 DNA Splicer initialized');
  }

  /**
   * 🔬 PREMIUM SPLICE - Only real model prompts (Claude, GPT, Gemini, Grok)
   */
  async splicePremium(): Promise<SpliceResult> {
    const startTime = Date.now();
    const errors: string[] = [];
    let personas = 0;
    let directives = 0;
    let capabilities = 0;
    let hotkeys = 0;

    console.log('🧬 DNA SPLICER [PREMIUM MODE]: Extracting real model system prompts');

    const session = this.driver.session();

    try {
      await this.createIndexes(session);

      // Only scan premium directories
      for (const premiumPath of DNASplicer.PREMIUM_PATHS) {
        const fullPath = path.join(this.intelPath, 'Leaked_x', premiumPath);
        console.log(`📂 Scanning premium path: ${fullPath}`);

        try {
          const files = await this.findPromptFiles(fullPath);
          console.log(`   Found ${files.length} files`);

          for (const filePath of files) {
            try {
              const result = await this.processPromptFile(session, filePath, true); // premium flag
              personas += result.personas;
              directives += result.directives;
              capabilities += result.capabilities;
              hotkeys += result.hotkeys;
            } catch (err) {
              errors.push(`Failed: ${path.basename(filePath)}: ${err}`);
            }
          }
        } catch (err) {
          console.warn(`⚠️ Could not access ${premiumPath}`);
        }
      }

      await this.createPersonaRelationships(session);

    } finally {
      await session.close();
    }

    const duration = Date.now() - startTime;
    console.log(`✅ PREMIUM SPLICE COMPLETE: ${personas} personas, ${directives} directives in ${duration}ms`);

    return { personas, directives, capabilities, hotkeys, duration, errors };
  }

  /**
   * 🔬 MAIN SPLICE OPERATION
   * Scans intel directory and extracts all patterns
   */
  async splice(): Promise<SpliceResult> {
    const startTime = Date.now();
    const errors: string[] = [];
    let personas = 0;
    let directives = 0;
    let capabilities = 0;
    let hotkeys = 0;

    console.log(`🧬 DNA SPLICER: Starting intelligence extraction from ${this.intelPath}`);

    try {
      // Find all markdown files (system prompts)
      const promptFiles = await this.findPromptFiles(this.intelPath);
      console.log(`📁 Found ${promptFiles.length} prompt files`);

      const session = this.driver.session();

      try {
        // Create indexes for performance
        await this.createIndexes(session);

        // Process each prompt file
        for (const filePath of promptFiles) {
          try {
            const result = await this.processPromptFile(session, filePath);
            personas += result.personas;
            directives += result.directives;
            capabilities += result.capabilities;
            hotkeys += result.hotkeys;
          } catch (err) {
            const error = `Failed to process ${filePath}: ${err}`;
            console.error(`❌ ${error}`);
            errors.push(error);
          }
        }

        // Create inter-persona relationships
        await this.createPersonaRelationships(session);

      } finally {
        await session.close();
      }

    } catch (err) {
      errors.push(`Splice operation failed: ${err}`);
      console.error('❌ Splice failed:', err);
    }

    const duration = Date.now() - startTime;

    console.log(`✅ DNA SPLICE COMPLETE: ${personas} personas, ${directives} directives, ${capabilities} capabilities in ${duration}ms`);

    return {
      personas,
      directives,
      capabilities,
      hotkeys,
      duration,
      errors,
    };
  }

  /**
   * Recursively find all .md files
   */
  private async findPromptFiles(dir: string): Promise<string[]> {
    const files: string[] = [];

    try {
      const entries = await fs.readdir(dir, { withFileTypes: true });

      for (const entry of entries) {
        const fullPath = path.join(dir, entry.name);

        if (entry.isDirectory()) {
          const subFiles = await this.findPromptFiles(fullPath);
          files.push(...subFiles);
        } else if (entry.name.endsWith('.md') || entry.name.endsWith('.txt')) {
          files.push(fullPath);
        }
      }
    } catch (err) {
      console.warn(`⚠️ Could not read directory ${dir}:`, err);
    }

    return files;
  }

  /**
   * Create Neo4j indexes
   */
  private async createIndexes(session: Session): Promise<void> {
    const indexes = [
      'CREATE INDEX persona_name IF NOT EXISTS FOR (p:Persona) ON (p.name)',
      'CREATE INDEX directive_type IF NOT EXISTS FOR (d:Directive) ON (d.type)',
      'CREATE INDEX capability_name IF NOT EXISTS FOR (c:Capability) ON (c.name)',
    ];

    for (const idx of indexes) {
      try {
        await session.run(idx);
      } catch {
        // Index might already exist
      }
    }
  }

  /**
   * Process a single prompt file
   */
  private async processPromptFile(session: Session, filePath: string, isPremium: boolean = false): Promise<{
    personas: number;
    directives: number;
    capabilities: number;
    hotkeys: number;
  }> {
    const content = await fs.readFile(filePath, 'utf-8');
    const fileName = path.basename(filePath, path.extname(filePath));

    // Extract metadata from markdown
    const metadata = this.extractMetadata(content, fileName);

    // Extract the actual prompt content (inside code blocks)
    const promptContent = this.extractPromptContent(content);

    if (!promptContent) {
      return { personas: 0, directives: 0, capabilities: 0, hotkeys: 0 };
    }

    // Create Persona node
    const personaHash = crypto.createHash('md5').update(promptContent).digest('hex');

    // Determine vendor from path
    let vendor = 'Unknown';
    if (filePath.includes('/Anthropic/') || filePath.includes('claude')) vendor = 'Anthropic';
    else if (filePath.includes('/OpenAI/') || filePath.includes('gpt')) vendor = 'OpenAI';
    else if (filePath.includes('/Google/') || filePath.includes('gemini')) vendor = 'Google';
    else if (filePath.includes('/xAI/') || filePath.includes('grok')) vendor = 'xAI';
    else if (filePath.includes('/Perplexity/')) vendor = 'Perplexity';
    else if (filePath.includes('/Misc/')) vendor = 'Misc';

    await session.run(`
      MERGE (p:Persona {name: $name})
      ON CREATE SET
        p.source = $source,
        p.sourceUrl = $sourceUrl,
        p.author = $author,
        p.description = $description,
        p.hash = $hash,
        p.rawPrompt = $rawPrompt,
        p.vendor = $vendor,
        p.isPremium = $isPremium,
        p.createdAt = datetime()
      ON MATCH SET
        p.hash = $hash,
        p.rawPrompt = $rawPrompt,
        p.vendor = $vendor,
        p.isPremium = $isPremium,
        p.updatedAt = datetime()
    `, {
      name: metadata.name,
      source: filePath,
      sourceUrl: metadata.url || null,
      author: metadata.author || null,
      description: metadata.description || null,
      hash: personaHash,
      rawPrompt: promptContent.substring(0, 50000),
      vendor,
      isPremium,
    });

    // Extract and create Directives
    const extractedDirectives = this.extractDirectives(promptContent);
    let directiveCount = 0;

    for (const directive of extractedDirectives) {
      await session.run(`
        MATCH (p:Persona {name: $personaName})
        MERGE (d:Directive {id: $id})
        ON CREATE SET
          d.content = $content,
          d.type = $type,
          d.priority = $priority,
          d.createdAt = datetime()
        MERGE (p)-[:HAS_DIRECTIVE]->(d)
      `, {
        personaName: metadata.name,
        id: directive.id,
        content: directive.content,
        type: directive.type,
        priority: directive.priority,
      });
      directiveCount++;
    }

    // Extract and create Capabilities
    const extractedCapabilities = this.extractCapabilities(promptContent);
    let capabilityCount = 0;

    for (const capability of extractedCapabilities) {
      await session.run(`
        MATCH (p:Persona {name: $personaName})
        MERGE (c:Capability {name: $name})
        ON CREATE SET
          c.description = $description,
          c.createdAt = datetime()
        MERGE (p)-[:HAS_CAPABILITY]->(c)
      `, {
        personaName: metadata.name,
        name: capability.name,
        description: capability.description,
      });
      capabilityCount++;
    }

    // Extract Hotkeys
    const extractedHotkeys = this.extractHotkeys(promptContent);
    let hotkeyCount = 0;

    for (const hotkey of extractedHotkeys) {
      await session.run(`
        MATCH (p:Persona {name: $personaName})
        MERGE (h:Hotkey {key: $key, personaName: $personaName})
        ON CREATE SET
          h.action = $action,
          h.description = $description,
          h.createdAt = datetime()
        MERGE (p)-[:USES_HOTKEY]->(h)
      `, {
        personaName: metadata.name,
        key: hotkey.key,
        action: hotkey.action,
        description: hotkey.description,
      });
      hotkeyCount++;
    }

    console.log(`  ✓ ${metadata.name}: ${directiveCount} directives, ${capabilityCount} capabilities, ${hotkeyCount} hotkeys`);

    return {
      personas: 1,
      directives: directiveCount,
      capabilities: capabilityCount,
      hotkeys: hotkeyCount,
    };
  }

  /**
   * Extract metadata from markdown header
   */
  private extractMetadata(content: string, fileName: string): {
    name: string;
    description?: string;
    author?: string;
    url?: string;
  } {
    const lines = content.split('\n');
    let name = fileName;
    let description: string | undefined;
    let author: string | undefined;
    let url: string | undefined;

    for (const line of lines.slice(0, 15)) {
      // ## Title
      if (line.startsWith('## ') && !name) {
        name = line.replace('## ', '').trim();
      }
      // Description line (usually after title)
      else if (!description && line.trim() && !line.startsWith('#') && !line.startsWith('By ') && !line.startsWith('http')) {
        description = line.trim();
      }
      // By Author
      else if (line.startsWith('By ')) {
        author = line.replace('By ', '').trim();
      }
      // URL
      else if (line.includes('chat.openai.com/g/') || line.includes('chatgpt.com/g/')) {
        url = line.trim();
      }
    }

    return { name, description, author, url };
  }

  /**
   * Extract prompt content from code blocks
   */
  private extractPromptContent(content: string): string | null {
    // Match content inside ```markdown ... ``` or ``` ... ```
    const codeBlockMatch = content.match(/```(?:markdown)?\s*([\s\S]*?)```/);
    if (codeBlockMatch) {
      return codeBlockMatch[1].trim();
    }

    // If no code block, try to get content after metadata
    const lines = content.split('\n');
    const contentStart = lines.findIndex((l, i) => i > 5 && l.trim() && !l.startsWith('#') && !l.startsWith('By ') && !l.includes('http'));

    if (contentStart > 0) {
      return lines.slice(contentStart).join('\n').trim();
    }

    return null;
  }

  /**
   * Extract directives from prompt content
   */
  private extractDirectives(content: string): DirectiveNode[] {
    const directives: DirectiveNode[] = [];
    const sentences = content.split(/[.!?\n]/).filter(s => s.trim().length > 10);

    for (const sentence of sentences) {
      const trimmed = sentence.trim();

      // Determine directive type
      let type: DirectiveNode['type'] = 'rule';
      let priority = 5;

      if (DIRECTIVE_PATTERNS.restriction.test(trimmed)) {
        type = 'restriction';
        priority = 9; // High priority for restrictions
      } else if (DIRECTIVE_PATTERNS.capability.test(trimmed)) {
        type = 'capability';
        priority = 6;
      } else if (DIRECTIVE_PATTERNS.behavior.test(trimmed)) {
        type = 'behavior';
        priority = 7;
      } else if (DIRECTIVE_PATTERNS.tone.test(trimmed)) {
        type = 'tone';
        priority = 5;
      } else if (DIRECTIVE_PATTERNS.format.test(trimmed)) {
        type = 'format';
        priority = 4;
      } else if (DIRECTIVE_PATTERNS.rule.test(trimmed)) {
        type = 'rule';
        priority = 8;
      } else {
        continue; // Skip if no pattern matches
      }

      // Check for emphasis markers
      if (trimmed.toUpperCase() === trimmed || trimmed.includes('IMPORTANT') || trimmed.includes('MUST')) {
        priority = Math.min(10, priority + 2);
      }

      const id = crypto.createHash('md5').update(trimmed).digest('hex').substring(0, 12);

      directives.push({
        id,
        content: trimmed.substring(0, 500),
        type,
        priority,
      });
    }

    return directives;
  }

  /**
   * Extract capabilities from content
   */
  private extractCapabilities(content: string): CapabilityNode[] {
    const capabilities: CapabilityNode[] = [];
    const found = new Set<string>();
    const lowerContent = content.toLowerCase();

    for (const { keyword, capability } of CAPABILITY_KEYWORDS) {
      if (lowerContent.includes(keyword) && !found.has(capability)) {
        found.add(capability);
        capabilities.push({
          name: capability,
          description: `Detected via keyword: "${keyword}"`,
        });
      }
    }

    return capabilities;
  }

  /**
   * Extract hotkey patterns
   */
  private extractHotkeys(content: string): HotkeyPattern[] {
    const hotkeys: HotkeyPattern[] = [];

    // Match patterns like "W: action" or "- W: action"
    const hotkeyRegex = /[-•*]?\s*([A-Z]{1,3})\s*[:：]\s*(.+?)(?:\n|$)/g;
    let match;

    while ((match = hotkeyRegex.exec(content)) !== null) {
      const key = match[1].trim();
      const description = match[2].trim();

      // Skip if too long (probably not a hotkey)
      if (description.length > 200) continue;

      hotkeys.push({
        key,
        action: key,
        description,
      });
    }

    return hotkeys;
  }

  /**
   * Create relationships between similar personas
   */
  private async createPersonaRelationships(session: Session): Promise<void> {
    // Connect personas with shared capabilities
    await session.run(`
      MATCH (p1:Persona)-[:HAS_CAPABILITY]->(c:Capability)<-[:HAS_CAPABILITY]-(p2:Persona)
      WHERE p1 <> p2
      MERGE (p1)-[:SHARES_CAPABILITY {capability: c.name}]->(p2)
    `);

    // Connect personas with similar directives (by type)
    await session.run(`
      MATCH (p1:Persona)-[:HAS_DIRECTIVE]->(d1:Directive)
      MATCH (p2:Persona)-[:HAS_DIRECTIVE]->(d2:Directive)
      WHERE p1 <> p2 AND d1.type = d2.type AND d1.priority >= 8 AND d2.priority >= 8
      WITH p1, p2, count(*) AS sharedCount
      WHERE sharedCount >= 3
      MERGE (p1)-[:SIMILAR_PERSONA {sharedDirectives: sharedCount}]->(p2)
    `);
  }

  /**
   * Get splice statistics
   */
  async getStats(): Promise<{
    totalPersonas: number;
    totalDirectives: number;
    totalCapabilities: number;
    topCapabilities: { name: string; count: number }[];
  }> {
    const session = this.driver.session();

    try {
      const result = await session.run(`
        MATCH (p:Persona) WITH count(p) AS personas
        MATCH (d:Directive) WITH personas, count(d) AS directives
        MATCH (c:Capability) WITH personas, directives, count(c) AS capabilities
        RETURN personas, directives, capabilities
      `);

      const topCaps = await session.run(`
        MATCH (p:Persona)-[:HAS_CAPABILITY]->(c:Capability)
        RETURN c.name AS name, count(p) AS count
        ORDER BY count DESC
        LIMIT 10
      `);

      const record = result.records[0];

      return {
        totalPersonas: record?.get('personas')?.toNumber() || 0,
        totalDirectives: record?.get('directives')?.toNumber() || 0,
        totalCapabilities: record?.get('capabilities')?.toNumber() || 0,
        topCapabilities: topCaps.records.map(r => ({
          name: r.get('name'),
          count: r.get('count').toNumber(),
        })),
      };
    } finally {
      await session.close();
    }
  }

  /**
   * Query personas by capability
   */
  async findPersonasByCapability(capability: string): Promise<string[]> {
    const session = this.driver.session();

    try {
      const result = await session.run(`
        MATCH (p:Persona)-[:HAS_CAPABILITY]->(c:Capability {name: $capability})
        RETURN p.name AS name, p.description AS description
      `, { capability });

      return result.records.map(r => r.get('name'));
    } finally {
      await session.close();
    }
  }

  /**
   * Get directives for a persona
   */
  async getPersonaDirectives(personaName: string): Promise<DirectiveNode[]> {
    const session = this.driver.session();

    try {
      const result = await session.run(`
        MATCH (p:Persona {name: $name})-[:HAS_DIRECTIVE]->(d:Directive)
        RETURN d.id AS id, d.content AS content, d.type AS type, d.priority AS priority
        ORDER BY d.priority DESC
      `, { name: personaName });

      return result.records.map(r => ({
        id: r.get('id'),
        content: r.get('content'),
        type: r.get('type'),
        priority: r.get('priority'),
      }));
    } finally {
      await session.close();
    }
  }

  async close(): Promise<void> {
    await this.driver.close();
  }
}

export default DNASplicer;