edtech / packages /database /src /translate-tracks.ts
CognxSafeTrack
feat(content): add AI track translation script for EN/ES/PT
697c637
/**
* translate-tracks.ts
*
* Translates all T*-FR.json track files into EN, ES, PT using GPT-4o.
* Preserves all non-text fields (IDs, URLs, dayNumber, weights, thresholds).
* Leaves audioUrl null so TTS is auto-generated on first lesson delivery.
*
* Usage:
* OPENAI_API_KEY=sk-... npx tsx packages/database/src/translate-tracks.ts
*
* Options:
* --tracks T1-FR,T2-FR Translate specific tracks only (default: all)
* --langs EN,ES Target languages only (default: EN,ES,PT)
* --dry-run Print first day only, no file writes
*/
import fs from 'fs';
import path from 'path';
import OpenAI from 'openai';
const CONTENT_DIR = path.join(__dirname, '../content/tracks');
const TARGET_LANGS = ['EN', 'ES', 'PT'] as const;
type TargetLang = typeof TARGET_LANGS[number];
const LANG_LABELS: Record<TargetLang, string> = {
EN: 'English',
ES: 'Spanish (Latin American)',
PT: 'Portuguese (Brazilian)',
};
const LANG_SUFFIX: Record<TargetLang, string> = {
EN: 'EN',
ES: 'ES',
PT: 'PT',
};
// Fields to translate at TrackDay level
const TEXT_FIELDS_DAY = ['title', 'lessonText', 'exercisePrompt', 'videoCaption'] as const;
// Fields to translate at Track level
const TEXT_FIELDS_TRACK = ['title', 'description'] as const;
interface TranslationBatch {
key: string;
text: string;
}
async function translateBatch(
client: OpenAI,
items: TranslationBatch[],
lang: TargetLang,
context: string,
): Promise<Record<string, string>> {
if (items.length === 0) return {};
const itemsJson = JSON.stringify(
items.map(i => ({ key: i.key, text: i.text })),
null,
2,
);
const prompt = `You are a professional educational content translator for entrepreneurship training in West Africa.
Translate the following strings from French to ${LANG_LABELS[lang]}.
Context: ${context}
Rules:
- Keep the exact tone: direct, encouraging, coach-like
- Adapt examples to feel natural in ${LANG_LABELS[lang]}-speaking African contexts when appropriate
- Keep all formatting (βœ…, numbering, line breaks \\n\\n, bold asterisks *word*)
- Keep all placeholder patterns like [PRODUCT], [CLIENT], [LIEU] β€” translate the word inside brackets
- For "videoCaption" fields (Wolof text starting with emojis): translate to ${LANG_LABELS[lang]} keeping the emoji and structure
- Return ONLY a JSON object mapping each "key" to its translated "text"
- Do not add explanations
Items to translate:
${itemsJson}
Return format:
{"key1": "translated text 1", "key2": "translated text 2", ...}`;
const response = await client.chat.completions.create({
model: 'gpt-4o',
messages: [{ role: 'user', content: prompt }],
response_format: { type: 'json_object' },
temperature: 0.3,
});
const content = response.choices[0]?.message?.content;
if (!content) throw new Error('Empty response from GPT-4o');
return JSON.parse(content);
}
function collectDayTexts(day: any, prefix: string): TranslationBatch[] {
const items: TranslationBatch[] = [];
for (const field of TEXT_FIELDS_DAY) {
if (day[field] && typeof day[field] === 'string') {
items.push({ key: `${prefix}.${field}`, text: day[field] });
}
}
// exerciseCriteria nested text
const crit = day.exerciseCriteria;
if (crit) {
if (crit.goal) items.push({ key: `${prefix}.criteria.goal`, text: crit.goal });
if (crit.evaluation?.examples) items.push({ key: `${prefix}.criteria.examples`, text: crit.evaluation.examples });
if (crit.remediation?.hint) items.push({ key: `${prefix}.criteria.hint`, text: crit.remediation.hint });
// mustInclude descriptions
if (Array.isArray(crit.success?.mustInclude)) {
crit.success.mustInclude.forEach((item: any, idx: number) => {
if (item.desc) {
items.push({ key: `${prefix}.criteria.must.${idx}.desc`, text: item.desc });
}
});
}
}
// buttonsJson titles (keep IDs)
if (Array.isArray(day.buttonsJson)) {
day.buttonsJson.forEach((btn: any, idx: number) => {
if (btn.title) {
items.push({ key: `${prefix}.btn.${idx}.title`, text: btn.title });
}
});
}
return items;
}
function applyTranslations(day: any, prefix: string, translations: Record<string, string>): any {
const d = { ...day };
for (const field of TEXT_FIELDS_DAY) {
const key = `${prefix}.${field}`;
if (translations[key] !== undefined) d[field] = translations[key];
}
// exerciseCriteria
if (d.exerciseCriteria) {
d.exerciseCriteria = { ...d.exerciseCriteria };
if (translations[`${prefix}.criteria.goal`]) {
d.exerciseCriteria.goal = translations[`${prefix}.criteria.goal`];
}
if (d.exerciseCriteria.evaluation) {
d.exerciseCriteria.evaluation = { ...d.exerciseCriteria.evaluation };
if (translations[`${prefix}.criteria.examples`]) {
d.exerciseCriteria.evaluation.examples = translations[`${prefix}.criteria.examples`];
}
}
if (d.exerciseCriteria.remediation) {
d.exerciseCriteria.remediation = { ...d.exerciseCriteria.remediation };
if (translations[`${prefix}.criteria.hint`]) {
d.exerciseCriteria.remediation.hint = translations[`${prefix}.criteria.hint`];
}
}
if (d.exerciseCriteria.success?.mustInclude) {
d.exerciseCriteria.success = { ...d.exerciseCriteria.success };
d.exerciseCriteria.success.mustInclude = d.exerciseCriteria.success.mustInclude.map(
(item: any, idx: number) => {
const key = `${prefix}.criteria.must.${idx}.desc`;
return translations[key] ? { ...item, desc: translations[key] } : item;
},
);
}
}
// buttonsJson
if (Array.isArray(d.buttonsJson)) {
d.buttonsJson = d.buttonsJson.map((btn: any, idx: number) => {
const key = `${prefix}.btn.${idx}.title`;
return translations[key] ? { ...btn, title: translations[key] } : btn;
});
}
// Always clear audioUrl β€” TTS auto-generates on first delivery
d.audioUrl = null;
return d;
}
async function translateTrack(
client: OpenAI,
sourceFile: string,
lang: TargetLang,
dryRun: boolean,
): Promise<void> {
const sourcePath = path.join(CONTENT_DIR, sourceFile);
const source = JSON.parse(fs.readFileSync(sourcePath, 'utf-8'));
const sourceId = source.trackId as string; // e.g. T1-FR
const targetId = sourceId.replace('-FR', `-${LANG_SUFFIX[lang]}`);
const targetFile = sourceFile.replace('-FR.json', `-${LANG_SUFFIX[lang]}.json`);
const targetPath = path.join(CONTENT_DIR, targetFile);
console.log(`\n[${lang}] ${sourceId} β†’ ${targetId}`);
// Skip if already exists (resume support)
if (!dryRun && fs.existsSync(targetPath)) {
console.log(` ⏭ ${targetFile} already exists β€” skipping`);
return;
}
// Collect all text from all days in one big batch
const allItems: TranslationBatch[] = [];
// Track-level texts
for (const field of TEXT_FIELDS_TRACK) {
if (source[field]) allItems.push({ key: `track.${field}`, text: source[field] });
}
// Day-level texts
source.days.forEach((day: any, idx: number) => {
const dayItems = collectDayTexts(day, `day.${idx}`);
allItems.push(...dayItems);
});
const context = `Entrepreneurship education track "${source.title}" for micro-entrepreneurs in Africa. Day-by-day coaching program on how to understand, build, and pitch a business.`;
// Batch in chunks of 40 items to stay within token limits
const CHUNK = 40;
const translations: Record<string, string> = {};
for (let i = 0; i < allItems.length; i += CHUNK) {
const chunk = allItems.slice(i, i + CHUNK);
process.stdout.write(` Translating batch ${Math.floor(i / CHUNK) + 1}/${Math.ceil(allItems.length / CHUNK)}...`);
const result = await translateBatch(client, chunk, lang, context);
Object.assign(translations, result);
process.stdout.write(' βœ“\n');
}
// Build translated track
const translated = {
trackId: targetId,
title: translations['track.title'] ?? source.title,
language: lang,
description: translations['track.description'] ?? source.description,
totalDays: source.totalDays,
version: source.version,
days: source.days.map((day: any, idx: number) =>
applyTranslations(day, `day.${idx}`, translations),
),
};
if (dryRun) {
console.log('\n [DRY RUN] First day preview:');
console.log(JSON.stringify(translated.days[0], null, 2));
return;
}
fs.writeFileSync(targetPath, JSON.stringify(translated, null, 2), 'utf-8');
console.log(` βœ… Written: ${targetFile}`);
}
async function main() {
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const tracksArg = args.find(a => a.startsWith('--tracks='))?.split('=')[1];
const langsArg = args.find(a => a.startsWith('--langs='))?.split('=')[1];
const targetLangs = (langsArg?.split(',') ?? TARGET_LANGS) as TargetLang[];
const sourceTracks = tracksArg?.split(',').map(t => `${t}.json`)
?? fs.readdirSync(CONTENT_DIR).filter(f => f.endsWith('-FR.json'));
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error('OPENAI_API_KEY is required');
const client = new OpenAI({ apiKey });
console.log(`🌍 Translating ${sourceTracks.length} tracks β†’ ${targetLangs.join(', ')}${dryRun ? ' [DRY RUN]' : ''}`);
console.log(`Source tracks: ${sourceTracks.join(', ')}\n`);
for (const sourceFile of sourceTracks) {
for (const lang of targetLangs) {
await translateTrack(client, sourceFile, lang, dryRun);
}
}
console.log('\nπŸŽ‰ Translation complete!');
if (!dryRun) {
console.log('Next step: run the seeder to load translated tracks into DB');
console.log(' pnpm --filter @repo/database seed');
}
}
main().catch(err => {
console.error('❌ Translation failed:', err.message);
process.exit(1);
});