edtech / apps /api /src /scripts /calibrate-whisper.ts
CognxSafeTrack
chore: finalize Sprint P2 & P3 optimizations, baseline prisma migrations, and update technical audit docs
cfbb685
raw
history blame
5.25 kB
import 'dotenv/config';
import fs from 'fs';
import path from 'path';
import { execSync } from 'child_process';
import { aiService } from '../services/ai';
import { normalizeWolof } from './normalizeWolof';
import { calculateWER, formatError } from '../utils/metrics';
const DATA_DIR = path.join(__dirname, '../../data');
const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json');
const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json');
const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py');
export async function runCalibration() {
console.log("🚀 Starting Whisper Confidence Calibration Stress-Test...");
// Ensure data dir exists
if (!fs.existsSync(DATA_DIR)) {
fs.mkdirSync(DATA_DIR, { recursive: true });
}
// Run Python fetching script if samples not already present
if (!fs.existsSync(HF_SAMPLES_PATH)) {
console.log("📥 Calling Python datasets library to download Hugging Face audio...");
try {
execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' });
} catch (e) {
console.error("❌ Python script failed to fetch samples. Please check HF_TOKEN or network.");
}
} else {
console.log("♻️ Using cached Hugging Face samples...");
}
if (!fs.existsSync(HF_SAMPLES_PATH)) {
console.error("❌ No samples mapped. Exiting calibration.");
return;
}
const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8'));
console.log(`\n🎧 Processing ${samples.length} samples through Whisper STT...`);
const results = [];
let redCount = 0;
let orangeCount = 0;
let greenCount = 0;
let totalConfidence = 0;
let totalRawWER = 0;
let totalNormWER = 0;
let totalProcessed = 0;
for (let i = 0; i < samples.length; i++) {
const sample = samples[i];
console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`);
try {
const audioBuffer = Buffer.from(sample.audio_base64, 'base64');
// Pass to Whisper
const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF');
// Normalize
const normResult = normalizeWolof(text);
// Compute WER
const rawWER = calculateWER(sample.original_text, text);
const normWER = calculateWER(sample.original_text, normResult.normalizedText);
totalConfidence += confidence;
totalRawWER += rawWER;
totalNormWER += normWER;
totalProcessed++;
if (confidence <= 50) redCount++;
else if (confidence <= 80) orangeCount++;
else greenCount++;
results.push({
source: sample.source,
index: i,
hfOriginalText: sample.original_text,
transcribedText: text,
normalizedText: normResult.normalizedText,
confidenceScore: confidence,
rawWER,
normalizedWER: normWER,
status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN'
});
} catch (err: unknown) {
console.error(`Error processing sample ${i} from ${sample.source}: ${formatError(err)}`);
}
}
const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0;
const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0;
const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0;
// Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER)
let dictionaryEfficiency = 0;
if (averageRawWER > 0) {
// If WER goes down, efficiency is positive.
dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100;
}
const stats = {
totalProcessed,
averageConfidence,
averageRawWER,
averageNormalizedWER,
dictionaryEfficiency,
distribution: {
red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 },
orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 },
green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 }
},
samples: results,
updatedAt: new Date().toISOString()
};
fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2));
console.log(`\n✅ Calibration finished! Stats saved to ${STATS_PATH}`);
console.log(`Average Confidence: ${averageConfidence}%`);
console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% | Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`);
console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`);
console.log(`Red (<=50%): ${redCount} | Orange (51-80%): ${orangeCount} | Green (>80%): ${greenCount}`);
}
// Allow running directly from command line
runCalibration().then(() => process.exit(0)).catch(err => {
console.error(err);
process.exit(1);
});