CognxSafeTrack
chore: finalize Sprint P2 & P3 optimizations, baseline prisma migrations, and update technical audit docs
cfbb685 | import 'dotenv/config'; | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| import { execSync } from 'child_process'; | |
| import { aiService } from '../services/ai'; | |
| import { normalizeWolof } from './normalizeWolof'; | |
| import { calculateWER, formatError } from '../utils/metrics'; | |
| const DATA_DIR = path.join(__dirname, '../../data'); | |
| const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json'); | |
| const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json'); | |
| const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py'); | |
| export async function runCalibration() { | |
| console.log("🚀 Starting Whisper Confidence Calibration Stress-Test..."); | |
| // Ensure data dir exists | |
| if (!fs.existsSync(DATA_DIR)) { | |
| fs.mkdirSync(DATA_DIR, { recursive: true }); | |
| } | |
| // Run Python fetching script if samples not already present | |
| if (!fs.existsSync(HF_SAMPLES_PATH)) { | |
| console.log("📥 Calling Python datasets library to download Hugging Face audio..."); | |
| try { | |
| execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' }); | |
| } catch (e) { | |
| console.error("❌ Python script failed to fetch samples. Please check HF_TOKEN or network."); | |
| } | |
| } else { | |
| console.log("♻️ Using cached Hugging Face samples..."); | |
| } | |
| if (!fs.existsSync(HF_SAMPLES_PATH)) { | |
| console.error("❌ No samples mapped. Exiting calibration."); | |
| return; | |
| } | |
| const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8')); | |
| console.log(`\n🎧 Processing ${samples.length} samples through Whisper STT...`); | |
| const results = []; | |
| let redCount = 0; | |
| let orangeCount = 0; | |
| let greenCount = 0; | |
| let totalConfidence = 0; | |
| let totalRawWER = 0; | |
| let totalNormWER = 0; | |
| let totalProcessed = 0; | |
| for (let i = 0; i < samples.length; i++) { | |
| const sample = samples[i]; | |
| console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`); | |
| try { | |
| const audioBuffer = Buffer.from(sample.audio_base64, 'base64'); | |
| // Pass to Whisper | |
| const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF'); | |
| // Normalize | |
| const normResult = normalizeWolof(text); | |
| // Compute WER | |
| const rawWER = calculateWER(sample.original_text, text); | |
| const normWER = calculateWER(sample.original_text, normResult.normalizedText); | |
| totalConfidence += confidence; | |
| totalRawWER += rawWER; | |
| totalNormWER += normWER; | |
| totalProcessed++; | |
| if (confidence <= 50) redCount++; | |
| else if (confidence <= 80) orangeCount++; | |
| else greenCount++; | |
| results.push({ | |
| source: sample.source, | |
| index: i, | |
| hfOriginalText: sample.original_text, | |
| transcribedText: text, | |
| normalizedText: normResult.normalizedText, | |
| confidenceScore: confidence, | |
| rawWER, | |
| normalizedWER: normWER, | |
| status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN' | |
| }); | |
| } catch (err: unknown) { | |
| console.error(`Error processing sample ${i} from ${sample.source}: ${formatError(err)}`); | |
| } | |
| } | |
| const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0; | |
| const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0; | |
| const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0; | |
| // Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER) | |
| let dictionaryEfficiency = 0; | |
| if (averageRawWER > 0) { | |
| // If WER goes down, efficiency is positive. | |
| dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100; | |
| } | |
| const stats = { | |
| totalProcessed, | |
| averageConfidence, | |
| averageRawWER, | |
| averageNormalizedWER, | |
| dictionaryEfficiency, | |
| distribution: { | |
| red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 }, | |
| orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 }, | |
| green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 } | |
| }, | |
| samples: results, | |
| updatedAt: new Date().toISOString() | |
| }; | |
| fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2)); | |
| console.log(`\n✅ Calibration finished! Stats saved to ${STATS_PATH}`); | |
| console.log(`Average Confidence: ${averageConfidence}%`); | |
| console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% | Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`); | |
| console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`); | |
| console.log(`Red (<=50%): ${redCount} | Orange (51-80%): ${orangeCount} | Green (>80%): ${greenCount}`); | |
| } | |
| // Allow running directly from command line | |
| runCalibration().then(() => process.exit(0)).catch(err => { | |
| console.error(err); | |
| process.exit(1); | |
| }); | |