Spaces:

safetrack
/

edtech

Running

File size: 6,396 Bytes

181ff6e
ef0913c
 
181ff6e
b6de1ea
ef0913c
 
 
181ff6e
 
 
 
ef0913c
b6de1ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef0913c
 
 
181ff6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef0913c
 
 
 
 
b6de1ea
 
ef0913c
 
 
181ff6e
 
 
 
ef0913c
181ff6e
 
 
 
 
 
 
b6de1ea
 
 
 
181ff6e
b6de1ea
 
181ff6e
 
 
 
 
 
 
 
 
 
 
 
 
b6de1ea
 
181ff6e
 
 
d9879cf
 
ef0913c
 
 
 
b6de1ea
 
 
 
 
 
 
 
 
ef0913c
 
 
 
b6de1ea
 
 
ef0913c
 
 
 
 
 
 
 
 
 
 
 
b6de1ea
 
ef0913c
 
 
 
181ff6e

import 'dotenv/config';
import fs from 'fs';
import path from 'path';
import { execSync } from 'child_process';
import levenshtein from 'fast-levenshtein';
import { aiService } from '../services/ai';
import { normalizeWolof } from './normalizeWolof';

const DATA_DIR = path.join(__dirname, '../../data');
const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json');
const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json');
const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py');

/**
 * Computes Word Error Rate (WER) using Levenshtein distance on words.
 * WER = (Substitutions + Deletions + Insertions) / Total Reference Words
 */
function calculateWER(reference: string, hypothesis: string): number {
    const refWords = reference.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w);
    const hypWords = hypothesis.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w);

    if (refWords.length === 0) return 0;

    // Map words to unique characters to use fast-levenshtein (which expects strings)
    const wordMap = new Map<string, string>();
    let charCode = 0xE000; // Use Private Use Area characters

    const getChar = (word: string) => {
        if (!wordMap.has(word)) {
            wordMap.set(word, String.fromCharCode(charCode++));
        }
        return wordMap.get(word)!;
    };

    const refChars = refWords.map(getChar).join('');
    const hypChars = hypWords.map(getChar).join('');

    const distance = levenshtein.get(refChars, hypChars);
    return distance / refWords.length;
}

export async function runCalibration() {
    console.log("🚀 Starting Whisper Confidence Calibration Stress-Test...");

    // Ensure data dir exists
    if (!fs.existsSync(DATA_DIR)) {
        fs.mkdirSync(DATA_DIR, { recursive: true });
    }

    // Run Python fetching script if samples not already present
    if (!fs.existsSync(HF_SAMPLES_PATH)) {
        console.log("📥 Calling Python datasets library to download Hugging Face audio...");
        try {
            execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' });
        } catch (e) {
            console.error("❌ Python script failed to fetch samples. Please check HF_TOKEN or network.");
        }
    } else {
        console.log("♻️ Using cached Hugging Face samples...");
    }

    if (!fs.existsSync(HF_SAMPLES_PATH)) {
        console.error("❌ No samples mapped. Exiting calibration.");
        return;
    }

    const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8'));
    console.log(`\n🎧 Processing ${samples.length} samples through Whisper STT...`);

    const results = [];
    let redCount = 0;
    let orangeCount = 0;
    let greenCount = 0;
    let totalConfidence = 0;
    let totalRawWER = 0;
    let totalNormWER = 0;

    let totalProcessed = 0;

    for (let i = 0; i < samples.length; i++) {
        const sample = samples[i];
        console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`);

        try {
            const audioBuffer = Buffer.from(sample.audio_base64, 'base64');
            // Pass to Whisper
            const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF');

            // Normalize
            const normResult = normalizeWolof(text);

            // Compute WER
            const rawWER = calculateWER(sample.original_text, text);
            const normWER = calculateWER(sample.original_text, normResult.normalizedText);

            totalConfidence += confidence;
            totalRawWER += rawWER;
            totalNormWER += normWER;
            totalProcessed++;

            if (confidence <= 50) redCount++;
            else if (confidence <= 80) orangeCount++;
            else greenCount++;

            results.push({
                source: sample.source,
                index: i,
                hfOriginalText: sample.original_text,
                transcribedText: text,
                normalizedText: normResult.normalizedText,
                confidenceScore: confidence,
                rawWER,
                normalizedWER: normWER,
                status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN'
            });

        } catch (err: unknown) {
            console.error(`Error processing sample ${i} from ${sample.source}: ${(err instanceof Error ? (err instanceof Error ? err.message : String(err)) : String(err))}`);
        }
    }

    const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0;
    const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0;
    const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0;

    // Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER)
    let dictionaryEfficiency = 0;
    if (averageRawWER > 0) {
        // If WER goes down, efficiency is positive.
        dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100;
    }

    const stats = {
        totalProcessed,
        averageConfidence,
        averageRawWER,
        averageNormalizedWER,
        dictionaryEfficiency,
        distribution: {
            red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 },
            orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 },
            green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 }
        },
        samples: results,
        updatedAt: new Date().toISOString()
    };

    fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2));
    console.log(`\n✅ Calibration finished! Stats saved to ${STATS_PATH}`);
    console.log(`Average Confidence: ${averageConfidence}%`);
    console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% | Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`);
    console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`);
    console.log(`Red (<=50%): ${redCount} | Orange (51-80%): ${orangeCount} | Green (>80%): ${greenCount}`);
}

// Allow running directly from command line
runCalibration().then(() => process.exit(0)).catch(err => {
    console.error(err);
    process.exit(1);
});