import 'dotenv/config'; import fs from 'fs'; import path from 'path'; import { execSync } from 'child_process'; import levenshtein from 'fast-levenshtein'; import { aiService } from '../services/ai'; import { normalizeWolof } from './normalizeWolof'; const DATA_DIR = path.join(__dirname, '../../data'); const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json'); const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json'); const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py'); /** * Computes Word Error Rate (WER) using Levenshtein distance on words. * WER = (Substitutions + Deletions + Insertions) / Total Reference Words */ function calculateWER(reference: string, hypothesis: string): number { const refWords = reference.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w); const hypWords = hypothesis.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w); if (refWords.length === 0) return 0; // Map words to unique characters to use fast-levenshtein (which expects strings) const wordMap = new Map(); let charCode = 0xE000; // Use Private Use Area characters const getChar = (word: string) => { if (!wordMap.has(word)) { wordMap.set(word, String.fromCharCode(charCode++)); } return wordMap.get(word)!; }; const refChars = refWords.map(getChar).join(''); const hypChars = hypWords.map(getChar).join(''); const distance = levenshtein.get(refChars, hypChars); return distance / refWords.length; } export async function runCalibration() { console.log("šŸš€ Starting Whisper Confidence Calibration Stress-Test..."); // Ensure data dir exists if (!fs.existsSync(DATA_DIR)) { fs.mkdirSync(DATA_DIR, { recursive: true }); } // Run Python fetching script if samples not already present if (!fs.existsSync(HF_SAMPLES_PATH)) { console.log("šŸ“„ Calling Python datasets library to download Hugging Face audio..."); try { execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' }); } catch (e) { console.error("āŒ Python script failed to fetch samples. Please check HF_TOKEN or network."); } } else { console.log("ā™»ļø Using cached Hugging Face samples..."); } if (!fs.existsSync(HF_SAMPLES_PATH)) { console.error("āŒ No samples mapped. Exiting calibration."); return; } const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8')); console.log(`\nšŸŽ§ Processing ${samples.length} samples through Whisper STT...`); const results = []; let redCount = 0; let orangeCount = 0; let greenCount = 0; let totalConfidence = 0; let totalRawWER = 0; let totalNormWER = 0; let totalProcessed = 0; for (let i = 0; i < samples.length; i++) { const sample = samples[i]; console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`); try { const audioBuffer = Buffer.from(sample.audio_base64, 'base64'); // Pass to Whisper const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF'); // Normalize const normResult = normalizeWolof(text); // Compute WER const rawWER = calculateWER(sample.original_text, text); const normWER = calculateWER(sample.original_text, normResult.normalizedText); totalConfidence += confidence; totalRawWER += rawWER; totalNormWER += normWER; totalProcessed++; if (confidence <= 50) redCount++; else if (confidence <= 80) orangeCount++; else greenCount++; results.push({ source: sample.source, index: i, hfOriginalText: sample.original_text, transcribedText: text, normalizedText: normResult.normalizedText, confidenceScore: confidence, rawWER, normalizedWER: normWER, status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN' }); } catch (err: unknown) { console.error(`Error processing sample ${i} from ${sample.source}: ${(err instanceof Error ? (err instanceof Error ? err.message : String(err)) : String(err))}`); } } const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0; const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0; const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0; // Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER) let dictionaryEfficiency = 0; if (averageRawWER > 0) { // If WER goes down, efficiency is positive. dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100; } const stats = { totalProcessed, averageConfidence, averageRawWER, averageNormalizedWER, dictionaryEfficiency, distribution: { red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 }, orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 }, green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 } }, samples: results, updatedAt: new Date().toISOString() }; fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2)); console.log(`\nāœ… Calibration finished! Stats saved to ${STATS_PATH}`); console.log(`Average Confidence: ${averageConfidence}%`); console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% | Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`); console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`); console.log(`Red (<=50%): ${redCount} | Orange (51-80%): ${orangeCount} | Green (>80%): ${greenCount}`); } // Allow running directly from command line runCalibration().then(() => process.exit(0)).catch(err => { console.error(err); process.exit(1); });