File size: 10,704 Bytes

40d7073

"use strict";
/**
 * Diff Embeddings - Semantic encoding of git diffs
 *
 * Generates embeddings for code changes to enable:
 * - Change classification (feature, bugfix, refactor)
 * - Similar change detection
 * - Risk assessment
 * - Review prioritization
 */
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseDiff = parseDiff;
exports.classifyChange = classifyChange;
exports.calculateRiskScore = calculateRiskScore;
exports.analyzeFileDiff = analyzeFileDiff;
exports.getCommitDiff = getCommitDiff;
exports.getStagedDiff = getStagedDiff;
exports.getUnstagedDiff = getUnstagedDiff;
exports.analyzeCommit = analyzeCommit;
exports.findSimilarCommits = findSimilarCommits;
const child_process_1 = require("child_process");
const onnx_embedder_1 = require("./onnx-embedder");
/**
 * Parse a unified diff into hunks
 */
function parseDiff(diff) {
    const hunks = [];
    const lines = diff.split('\n');
    let currentFile = '';
    let currentHunk = null;
    for (const line of lines) {
        // File header
        if (line.startsWith('diff --git')) {
            const match = line.match(/diff --git a\/(.+) b\/(.+)/);
            if (match) {
                currentFile = match[2];
            }
        }
        // Hunk header
        if (line.startsWith('@@')) {
            if (currentHunk) {
                hunks.push(currentHunk);
            }
            const match = line.match(/@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@/);
            if (match) {
                currentHunk = {
                    file: currentFile,
                    oldStart: parseInt(match[1]),
                    oldLines: parseInt(match[2] || '1'),
                    newStart: parseInt(match[3]),
                    newLines: parseInt(match[4] || '1'),
                    content: '',
                    additions: [],
                    deletions: [],
                };
            }
        }
        else if (currentHunk) {
            // Content lines
            if (line.startsWith('+') && !line.startsWith('+++')) {
                currentHunk.additions.push(line.substring(1));
                currentHunk.content += line + '\n';
            }
            else if (line.startsWith('-') && !line.startsWith('---')) {
                currentHunk.deletions.push(line.substring(1));
                currentHunk.content += line + '\n';
            }
            else if (line.startsWith(' ')) {
                currentHunk.content += line + '\n';
            }
        }
    }
    if (currentHunk) {
        hunks.push(currentHunk);
    }
    return hunks;
}
/**
 * Classify a change based on patterns
 */
function classifyChange(diff, message = '') {
    const lowerMessage = message.toLowerCase();
    const lowerDiff = diff.toLowerCase();
    // Check message patterns
    if (/\b(fix|bug|issue|error|crash|patch)\b/.test(lowerMessage))
        return 'bugfix';
    if (/\b(feat|feature|add|new|implement)\b/.test(lowerMessage))
        return 'feature';
    if (/\b(refactor|clean|improve|optimize)\b/.test(lowerMessage))
        return 'refactor';
    if (/\b(doc|readme|comment|jsdoc)\b/.test(lowerMessage))
        return 'docs';
    if (/\b(test|spec|coverage)\b/.test(lowerMessage))
        return 'test';
    if (/\b(config|ci|cd|build|deps)\b/.test(lowerMessage))
        return 'config';
    // Check diff patterns
    if (/\.(md|txt|rst)$/.test(diff))
        return 'docs';
    if (/\.(test|spec)\.[jt]sx?/.test(diff))
        return 'test';
    if (/\.(json|ya?ml|toml|ini)$/.test(diff))
        return 'config';
    // Check content patterns
    if (/\bcatch\b|\btry\b|\berror\b/.test(lowerDiff) && /\bfix\b/.test(lowerDiff))
        return 'bugfix';
    if (/\bfunction\b|\bclass\b|\bexport\b/.test(lowerDiff))
        return 'feature';
    return 'unknown';
}
/**
 * Calculate risk score for a diff
 */
function calculateRiskScore(analysis) {
    let risk = 0;
    // Size risk
    const totalChanges = analysis.totalAdditions + analysis.totalDeletions;
    if (totalChanges > 500)
        risk += 0.3;
    else if (totalChanges > 200)
        risk += 0.2;
    else if (totalChanges > 50)
        risk += 0.1;
    // Complexity risk
    if (analysis.complexity > 20)
        risk += 0.2;
    else if (analysis.complexity > 10)
        risk += 0.1;
    // File type risk
    if (analysis.file.includes('auth') || analysis.file.includes('security'))
        risk += 0.2;
    if (analysis.file.includes('database') || analysis.file.includes('migration'))
        risk += 0.15;
    if (analysis.file.includes('api') || analysis.file.includes('endpoint'))
        risk += 0.1;
    // Pattern risk (deletions of error handling, etc.)
    for (const hunk of analysis.hunks) {
        for (const del of hunk.deletions) {
            if (/\bcatch\b|\berror\b|\bvalidat/.test(del))
                risk += 0.1;
            if (/\bif\b.*\bnull\b|\bundefined\b/.test(del))
                risk += 0.05;
        }
    }
    return Math.min(1, risk);
}
/**
 * Analyze a single file diff
 */
async function analyzeFileDiff(file, diff, message = '') {
    const hunks = parseDiff(diff).filter(h => h.file === file || h.file === '');
    const totalAdditions = hunks.reduce((sum, h) => sum + h.additions.length, 0);
    const totalDeletions = hunks.reduce((sum, h) => sum + h.deletions.length, 0);
    // Calculate complexity (branch keywords in additions)
    let complexity = 0;
    for (const hunk of hunks) {
        for (const add of hunk.additions) {
            if (/\bif\b|\belse\b|\bfor\b|\bwhile\b|\bswitch\b|\bcatch\b|\?/.test(add)) {
                complexity++;
            }
        }
    }
    const category = classifyChange(diff, message);
    const analysis = {
        file,
        hunks,
        totalAdditions,
        totalDeletions,
        complexity,
        riskScore: 0,
        category,
    };
    analysis.riskScore = calculateRiskScore(analysis);
    // Generate embedding for the diff
    if ((0, onnx_embedder_1.isReady)()) {
        const diffText = hunks.map(h => h.content).join('\n');
        const result = await (0, onnx_embedder_1.embed)(`${category} change in ${file}: ${diffText.substring(0, 500)}`);
        analysis.embedding = result.embedding;
    }
    return analysis;
}
/**
 * Get diff for a commit
 */
function getCommitDiff(commitHash = 'HEAD') {
    try {
        return (0, child_process_1.execSync)(`git show ${commitHash} --format="" 2>/dev/null`, {
            encoding: 'utf8',
            maxBuffer: 10 * 1024 * 1024,
        });
    }
    catch {
        return '';
    }
}
/**
 * Get diff for staged changes
 */
function getStagedDiff() {
    try {
        return (0, child_process_1.execSync)('git diff --cached 2>/dev/null', {
            encoding: 'utf8',
            maxBuffer: 10 * 1024 * 1024,
        });
    }
    catch {
        return '';
    }
}
/**
 * Get diff for unstaged changes
 */
function getUnstagedDiff() {
    try {
        return (0, child_process_1.execSync)('git diff 2>/dev/null', {
            encoding: 'utf8',
            maxBuffer: 10 * 1024 * 1024,
        });
    }
    catch {
        return '';
    }
}
/**
 * Analyze a commit
 */
async function analyzeCommit(commitHash = 'HEAD') {
    const diff = getCommitDiff(commitHash);
    // Get commit metadata
    let message = '', author = '', date = '';
    try {
        const info = (0, child_process_1.execSync)(`git log -1 --format="%s|%an|%aI" ${commitHash} 2>/dev/null`, {
            encoding: 'utf8',
        }).trim();
        [message, author, date] = info.split('|');
    }
    catch { }
    // Parse hunks and group by file
    const hunks = parseDiff(diff);
    const fileHunks = new Map();
    for (const hunk of hunks) {
        if (!fileHunks.has(hunk.file)) {
            fileHunks.set(hunk.file, []);
        }
        fileHunks.get(hunk.file).push(hunk);
    }
    // Analyze each file
    const files = [];
    for (const [file, fileHunkList] of fileHunks) {
        const fileDiff = fileHunkList.map(h => h.content).join('\n');
        const analysis = await analyzeFileDiff(file, diff, message);
        files.push(analysis);
    }
    const totalAdditions = files.reduce((sum, f) => sum + f.totalAdditions, 0);
    const totalDeletions = files.reduce((sum, f) => sum + f.totalDeletions, 0);
    const riskScore = files.length > 0
        ? files.reduce((sum, f) => sum + f.riskScore, 0) / files.length
        : 0;
    // Generate commit embedding
    let embedding;
    if ((0, onnx_embedder_1.isReady)()) {
        const commitText = `${message}\n\nFiles changed: ${files.map(f => f.file).join(', ')}\n+${totalAdditions} -${totalDeletions}`;
        const result = await (0, onnx_embedder_1.embed)(commitText);
        embedding = result.embedding;
    }
    return {
        hash: commitHash,
        message,
        author,
        date,
        files,
        totalAdditions,
        totalDeletions,
        riskScore,
        embedding,
    };
}
/**
 * Find similar past commits based on diff embeddings
 */
async function findSimilarCommits(currentDiff, recentCommits = 50, topK = 5) {
    if (!(0, onnx_embedder_1.isReady)()) {
        await (0, onnx_embedder_1.initOnnxEmbedder)();
    }
    // Get current diff embedding
    const currentEmbedding = (await (0, onnx_embedder_1.embed)(currentDiff.substring(0, 1000))).embedding;
    // Get recent commits
    let commits = [];
    try {
        commits = (0, child_process_1.execSync)(`git log -${recentCommits} --format="%H" 2>/dev/null`, {
            encoding: 'utf8',
        }).trim().split('\n');
    }
    catch {
        return [];
    }
    // Analyze and compare
    const results = [];
    for (const hash of commits.slice(0, Math.min(commits.length, recentCommits))) {
        const analysis = await analyzeCommit(hash);
        if (analysis.embedding) {
            const similarity = cosineSimilarity(currentEmbedding, analysis.embedding);
            results.push({ hash, similarity, message: analysis.message });
        }
    }
    return results
        .sort((a, b) => b.similarity - a.similarity)
        .slice(0, topK);
}
function cosineSimilarity(a, b) {
    if (a.length !== b.length)
        return 0;
    let dotProduct = 0;
    let normA = 0;
    let normB = 0;
    for (let i = 0; i < a.length; i++) {
        dotProduct += a[i] * b[i];
        normA += a[i] * a[i];
        normB += b[i] * b[i];
    }
    const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
    return magnitude === 0 ? 0 : dotProduct / magnitude;
}
exports.default = {
    parseDiff,
    classifyChange,
    calculateRiskScore,
    analyzeFileDiff,
    analyzeCommit,
    getCommitDiff,
    getStagedDiff,
    getUnstagedDiff,
    findSimilarCommits,
};