CognxSafeTrack
chore: execute Sprint 38 technical debt resolution (Type Safety, Zod validation, Vitest, Mock LLM extracted)
d9879cf | import 'dotenv/config'; | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| import { execSync } from 'child_process'; | |
| import levenshtein from 'fast-levenshtein'; | |
| import { aiService } from '../services/ai'; | |
| import { normalizeWolof } from './normalizeWolof'; | |
| const DATA_DIR = path.join(__dirname, '../../data'); | |
| const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json'); | |
| const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json'); | |
| const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py'); | |
| /** | |
| * Computes Word Error Rate (WER) using Levenshtein distance on words. | |
| * WER = (Substitutions + Deletions + Insertions) / Total Reference Words | |
| */ | |
| function calculateWER(reference: string, hypothesis: string): number { | |
| const refWords = reference.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w); | |
| const hypWords = hypothesis.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, "").split(/\s+/).filter(w => w); | |
| if (refWords.length === 0) return 0; | |
| // Map words to unique characters to use fast-levenshtein (which expects strings) | |
| const wordMap = new Map<string, string>(); | |
| let charCode = 0xE000; // Use Private Use Area characters | |
| const getChar = (word: string) => { | |
| if (!wordMap.has(word)) { | |
| wordMap.set(word, String.fromCharCode(charCode++)); | |
| } | |
| return wordMap.get(word)!; | |
| }; | |
| const refChars = refWords.map(getChar).join(''); | |
| const hypChars = hypWords.map(getChar).join(''); | |
| const distance = levenshtein.get(refChars, hypChars); | |
| return distance / refWords.length; | |
| } | |
| export async function runCalibration() { | |
| console.log("π Starting Whisper Confidence Calibration Stress-Test..."); | |
| // Ensure data dir exists | |
| if (!fs.existsSync(DATA_DIR)) { | |
| fs.mkdirSync(DATA_DIR, { recursive: true }); | |
| } | |
| // Run Python fetching script if samples not already present | |
| if (!fs.existsSync(HF_SAMPLES_PATH)) { | |
| console.log("π₯ Calling Python datasets library to download Hugging Face audio..."); | |
| try { | |
| execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' }); | |
| } catch (e) { | |
| console.error("β Python script failed to fetch samples. Please check HF_TOKEN or network."); | |
| } | |
| } else { | |
| console.log("β»οΈ Using cached Hugging Face samples..."); | |
| } | |
| if (!fs.existsSync(HF_SAMPLES_PATH)) { | |
| console.error("β No samples mapped. Exiting calibration."); | |
| return; | |
| } | |
| const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8')); | |
| console.log(`\nπ§ Processing ${samples.length} samples through Whisper STT...`); | |
| const results = []; | |
| let redCount = 0; | |
| let orangeCount = 0; | |
| let greenCount = 0; | |
| let totalConfidence = 0; | |
| let totalRawWER = 0; | |
| let totalNormWER = 0; | |
| let totalProcessed = 0; | |
| for (let i = 0; i < samples.length; i++) { | |
| const sample = samples[i]; | |
| console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`); | |
| try { | |
| const audioBuffer = Buffer.from(sample.audio_base64, 'base64'); | |
| // Pass to Whisper | |
| const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF'); | |
| // Normalize | |
| const normResult = normalizeWolof(text); | |
| // Compute WER | |
| const rawWER = calculateWER(sample.original_text, text); | |
| const normWER = calculateWER(sample.original_text, normResult.normalizedText); | |
| totalConfidence += confidence; | |
| totalRawWER += rawWER; | |
| totalNormWER += normWER; | |
| totalProcessed++; | |
| if (confidence <= 50) redCount++; | |
| else if (confidence <= 80) orangeCount++; | |
| else greenCount++; | |
| results.push({ | |
| source: sample.source, | |
| index: i, | |
| hfOriginalText: sample.original_text, | |
| transcribedText: text, | |
| normalizedText: normResult.normalizedText, | |
| confidenceScore: confidence, | |
| rawWER, | |
| normalizedWER: normWER, | |
| status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN' | |
| }); | |
| } catch (err: unknown) { | |
| console.error(`Error processing sample ${i} from ${sample.source}: ${(err instanceof Error ? (err instanceof Error ? err.message : String(err)) : String(err))}`); | |
| } | |
| } | |
| const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0; | |
| const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0; | |
| const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0; | |
| // Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER) | |
| let dictionaryEfficiency = 0; | |
| if (averageRawWER > 0) { | |
| // If WER goes down, efficiency is positive. | |
| dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100; | |
| } | |
| const stats = { | |
| totalProcessed, | |
| averageConfidence, | |
| averageRawWER, | |
| averageNormalizedWER, | |
| dictionaryEfficiency, | |
| distribution: { | |
| red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 }, | |
| orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 }, | |
| green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 } | |
| }, | |
| samples: results, | |
| updatedAt: new Date().toISOString() | |
| }; | |
| fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2)); | |
| console.log(`\nβ Calibration finished! Stats saved to ${STATS_PATH}`); | |
| console.log(`Average Confidence: ${averageConfidence}%`); | |
| console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% | Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`); | |
| console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`); | |
| console.log(`Red (<=50%): ${redCount} | Orange (51-80%): ${orangeCount} | Green (>80%): ${greenCount}`); | |
| } | |
| // Allow running directly from command line | |
| runCalibration().then(() => process.exit(0)).catch(err => { | |
| console.error(err); | |
| process.exit(1); | |
| }); | |