Spaces:

safetrack
/

edtech

Running

edtech / apps /api /src /scripts /calibrate-whisper.ts

CognxSafeTrack

chore: finalize Sprint P2 & P3 optimizations, baseline prisma migrations, and update technical audit docs

cfbb685 about 1 month ago

5.25 kB

	import 'dotenv/config';
	import fs from 'fs';
	import path from 'path';
	import { execSync } from 'child_process';
	import { aiService } from '../services/ai';
	import { normalizeWolof } from './normalizeWolof';
	import { calculateWER, formatError } from '../utils/metrics';

	const DATA_DIR = path.join(__dirname, '../../data');
	const STATS_PATH = path.join(DATA_DIR, 'calibration_stats.json');
	const HF_SAMPLES_PATH = path.join(DATA_DIR, 'hf_samples.json');
	const PY_SCRIPT = path.join(__dirname, 'fetch_hf_audio.py');


	export async function runCalibration() {
	console.log("🚀 Starting Whisper Confidence Calibration Stress-Test...");

	// Ensure data dir exists
	if (!fs.existsSync(DATA_DIR)) {
	fs.mkdirSync(DATA_DIR, { recursive: true });
	}

	// Run Python fetching script if samples not already present
	if (!fs.existsSync(HF_SAMPLES_PATH)) {
	console.log("📥 Calling Python datasets library to download Hugging Face audio...");
	try {
	execSync(`python3 ${PY_SCRIPT} --output ${DATA_DIR}`, { stdio: 'inherit' });
	} catch (e) {
	console.error("❌ Python script failed to fetch samples. Please check HF_TOKEN or network.");
	}
	} else {
	console.log("♻️ Using cached Hugging Face samples...");
	}

	if (!fs.existsSync(HF_SAMPLES_PATH)) {
	console.error("❌ No samples mapped. Exiting calibration.");
	return;
	}

	const samples = JSON.parse(fs.readFileSync(HF_SAMPLES_PATH, 'utf-8'));
	console.log(`\n🎧 Processing ${samples.length} samples through Whisper STT...`);

	const results = [];
	let redCount = 0;
	let orangeCount = 0;
	let greenCount = 0;
	let totalConfidence = 0;
	let totalRawWER = 0;
	let totalNormWER = 0;

	let totalProcessed = 0;

	for (let i = 0; i < samples.length; i++) {
	const sample = samples[i];
	console.log(`[${sample.source} ${i + 1}/${samples.length}] Transcribing...`);

	try {
	const audioBuffer = Buffer.from(sample.audio_base64, 'base64');
	// Pass to Whisper
	const { text, confidence } = await aiService.transcribeAudio(audioBuffer, `sample_${i}.wav`, 'WOLOF');

	// Normalize
	const normResult = normalizeWolof(text);

	// Compute WER
	const rawWER = calculateWER(sample.original_text, text);
	const normWER = calculateWER(sample.original_text, normResult.normalizedText);

	totalConfidence += confidence;
	totalRawWER += rawWER;
	totalNormWER += normWER;
	totalProcessed++;

	if (confidence <= 50) redCount++;
	else if (confidence <= 80) orangeCount++;
	else greenCount++;

	results.push({
	source: sample.source,
	index: i,
	hfOriginalText: sample.original_text,
	transcribedText: text,
	normalizedText: normResult.normalizedText,
	confidenceScore: confidence,
	rawWER,
	normalizedWER: normWER,
	status: confidence <= 50 ? 'RED' : confidence <= 80 ? 'ORANGE' : 'GREEN'
	});

	} catch (err: unknown) {
	console.error(`Error processing sample ${i} from ${sample.source}: ${formatError(err)}`);
	}
	}

	const averageConfidence = totalProcessed > 0 ? Math.round(totalConfidence / totalProcessed) : 0;
	const averageRawWER = totalProcessed > 0 ? totalRawWER / totalProcessed : 0;
	const averageNormalizedWER = totalProcessed > 0 ? totalNormWER / totalProcessed : 0;

	// Calculate Dictionary Efficiency (Improvement in WER relative to Raw WER)
	let dictionaryEfficiency = 0;
	if (averageRawWER > 0) {
	// If WER goes down, efficiency is positive.
	dictionaryEfficiency = ((averageRawWER - averageNormalizedWER) / averageRawWER) * 100;
	}

	const stats = {
	totalProcessed,
	averageConfidence,
	averageRawWER,
	averageNormalizedWER,
	dictionaryEfficiency,
	distribution: {
	red: { count: redCount, percentage: totalProcessed ? Math.round((redCount / totalProcessed) * 100) : 0 },
	orange: { count: orangeCount, percentage: totalProcessed ? Math.round((orangeCount / totalProcessed) * 100) : 0 },
	green: { count: greenCount, percentage: totalProcessed ? Math.round((greenCount / totalProcessed) * 100) : 0 }
	},
	samples: results,
	updatedAt: new Date().toISOString()
	};

	fs.writeFileSync(STATS_PATH, JSON.stringify(stats, null, 2));
	console.log(`\n✅ Calibration finished! Stats saved to ${STATS_PATH}`);
	console.log(`Average Confidence: ${averageConfidence}%`);
	console.log(`Raw WER: ${(averageRawWER * 100).toFixed(2)}% \| Normalized WER: ${(averageNormalizedWER * 100).toFixed(2)}%`);
	console.log(`Dictionary Efficiency Gain: ${dictionaryEfficiency.toFixed(2)}%`);
	console.log(`Red (<=50%): ${redCount} \| Orange (51-80%): ${orangeCount} \| Green (>80%): ${greenCount}`);
	}

	// Allow running directly from command line
	runCalibration().then(() => process.exit(0)).catch(err => {
	console.error(err);
	process.exit(1);
	});