/** * translate-tracks.ts * * Translates all T*-FR.json track files into EN, ES, PT using GPT-4o. * Preserves all non-text fields (IDs, URLs, dayNumber, weights, thresholds). * Leaves audioUrl null so TTS is auto-generated on first lesson delivery. * * Usage: * OPENAI_API_KEY=sk-... npx tsx packages/database/src/translate-tracks.ts * * Options: * --tracks T1-FR,T2-FR Translate specific tracks only (default: all) * --langs EN,ES Target languages only (default: EN,ES,PT) * --dry-run Print first day only, no file writes */ import fs from 'fs'; import path from 'path'; import OpenAI from 'openai'; const CONTENT_DIR = path.join(__dirname, '../content/tracks'); const TARGET_LANGS = ['EN', 'ES', 'PT'] as const; type TargetLang = typeof TARGET_LANGS[number]; const LANG_LABELS: Record = { EN: 'English', ES: 'Spanish (Latin American)', PT: 'Portuguese (Brazilian)', }; const LANG_SUFFIX: Record = { EN: 'EN', ES: 'ES', PT: 'PT', }; // Fields to translate at TrackDay level const TEXT_FIELDS_DAY = ['title', 'lessonText', 'exercisePrompt', 'videoCaption'] as const; // Fields to translate at Track level const TEXT_FIELDS_TRACK = ['title', 'description'] as const; interface TranslationBatch { key: string; text: string; } async function translateBatch( client: OpenAI, items: TranslationBatch[], lang: TargetLang, context: string, ): Promise> { if (items.length === 0) return {}; const itemsJson = JSON.stringify( items.map(i => ({ key: i.key, text: i.text })), null, 2, ); const prompt = `You are a professional educational content translator for entrepreneurship training in West Africa. Translate the following strings from French to ${LANG_LABELS[lang]}. Context: ${context} Rules: - Keep the exact tone: direct, encouraging, coach-like - Adapt examples to feel natural in ${LANG_LABELS[lang]}-speaking African contexts when appropriate - Keep all formatting (✅, numbering, line breaks \\n\\n, bold asterisks *word*) - Keep all placeholder patterns like [PRODUCT], [CLIENT], [LIEU] — translate the word inside brackets - For "videoCaption" fields (Wolof text starting with emojis): translate to ${LANG_LABELS[lang]} keeping the emoji and structure - Return ONLY a JSON object mapping each "key" to its translated "text" - Do not add explanations Items to translate: ${itemsJson} Return format: {"key1": "translated text 1", "key2": "translated text 2", ...}`; const response = await client.chat.completions.create({ model: 'gpt-4o', messages: [{ role: 'user', content: prompt }], response_format: { type: 'json_object' }, temperature: 0.3, }); const content = response.choices[0]?.message?.content; if (!content) throw new Error('Empty response from GPT-4o'); return JSON.parse(content); } function collectDayTexts(day: any, prefix: string): TranslationBatch[] { const items: TranslationBatch[] = []; for (const field of TEXT_FIELDS_DAY) { if (day[field] && typeof day[field] === 'string') { items.push({ key: `${prefix}.${field}`, text: day[field] }); } } // exerciseCriteria nested text const crit = day.exerciseCriteria; if (crit) { if (crit.goal) items.push({ key: `${prefix}.criteria.goal`, text: crit.goal }); if (crit.evaluation?.examples) items.push({ key: `${prefix}.criteria.examples`, text: crit.evaluation.examples }); if (crit.remediation?.hint) items.push({ key: `${prefix}.criteria.hint`, text: crit.remediation.hint }); // mustInclude descriptions if (Array.isArray(crit.success?.mustInclude)) { crit.success.mustInclude.forEach((item: any, idx: number) => { if (item.desc) { items.push({ key: `${prefix}.criteria.must.${idx}.desc`, text: item.desc }); } }); } } // buttonsJson titles (keep IDs) if (Array.isArray(day.buttonsJson)) { day.buttonsJson.forEach((btn: any, idx: number) => { if (btn.title) { items.push({ key: `${prefix}.btn.${idx}.title`, text: btn.title }); } }); } return items; } function applyTranslations(day: any, prefix: string, translations: Record): any { const d = { ...day }; for (const field of TEXT_FIELDS_DAY) { const key = `${prefix}.${field}`; if (translations[key] !== undefined) d[field] = translations[key]; } // exerciseCriteria if (d.exerciseCriteria) { d.exerciseCriteria = { ...d.exerciseCriteria }; if (translations[`${prefix}.criteria.goal`]) { d.exerciseCriteria.goal = translations[`${prefix}.criteria.goal`]; } if (d.exerciseCriteria.evaluation) { d.exerciseCriteria.evaluation = { ...d.exerciseCriteria.evaluation }; if (translations[`${prefix}.criteria.examples`]) { d.exerciseCriteria.evaluation.examples = translations[`${prefix}.criteria.examples`]; } } if (d.exerciseCriteria.remediation) { d.exerciseCriteria.remediation = { ...d.exerciseCriteria.remediation }; if (translations[`${prefix}.criteria.hint`]) { d.exerciseCriteria.remediation.hint = translations[`${prefix}.criteria.hint`]; } } if (d.exerciseCriteria.success?.mustInclude) { d.exerciseCriteria.success = { ...d.exerciseCriteria.success }; d.exerciseCriteria.success.mustInclude = d.exerciseCriteria.success.mustInclude.map( (item: any, idx: number) => { const key = `${prefix}.criteria.must.${idx}.desc`; return translations[key] ? { ...item, desc: translations[key] } : item; }, ); } } // buttonsJson if (Array.isArray(d.buttonsJson)) { d.buttonsJson = d.buttonsJson.map((btn: any, idx: number) => { const key = `${prefix}.btn.${idx}.title`; return translations[key] ? { ...btn, title: translations[key] } : btn; }); } // Always clear audioUrl — TTS auto-generates on first delivery d.audioUrl = null; return d; } async function translateTrack( client: OpenAI, sourceFile: string, lang: TargetLang, dryRun: boolean, ): Promise { const sourcePath = path.join(CONTENT_DIR, sourceFile); const source = JSON.parse(fs.readFileSync(sourcePath, 'utf-8')); const sourceId = source.trackId as string; // e.g. T1-FR const targetId = sourceId.replace('-FR', `-${LANG_SUFFIX[lang]}`); const targetFile = sourceFile.replace('-FR.json', `-${LANG_SUFFIX[lang]}.json`); const targetPath = path.join(CONTENT_DIR, targetFile); console.log(`\n[${lang}] ${sourceId} → ${targetId}`); // Skip if already exists (resume support) if (!dryRun && fs.existsSync(targetPath)) { console.log(` ⏭ ${targetFile} already exists — skipping`); return; } // Collect all text from all days in one big batch const allItems: TranslationBatch[] = []; // Track-level texts for (const field of TEXT_FIELDS_TRACK) { if (source[field]) allItems.push({ key: `track.${field}`, text: source[field] }); } // Day-level texts source.days.forEach((day: any, idx: number) => { const dayItems = collectDayTexts(day, `day.${idx}`); allItems.push(...dayItems); }); const context = `Entrepreneurship education track "${source.title}" for micro-entrepreneurs in Africa. Day-by-day coaching program on how to understand, build, and pitch a business.`; // Batch in chunks of 40 items to stay within token limits const CHUNK = 40; const translations: Record = {}; for (let i = 0; i < allItems.length; i += CHUNK) { const chunk = allItems.slice(i, i + CHUNK); process.stdout.write(` Translating batch ${Math.floor(i / CHUNK) + 1}/${Math.ceil(allItems.length / CHUNK)}...`); const result = await translateBatch(client, chunk, lang, context); Object.assign(translations, result); process.stdout.write(' ✓\n'); } // Build translated track const translated = { trackId: targetId, title: translations['track.title'] ?? source.title, language: lang, description: translations['track.description'] ?? source.description, totalDays: source.totalDays, version: source.version, days: source.days.map((day: any, idx: number) => applyTranslations(day, `day.${idx}`, translations), ), }; if (dryRun) { console.log('\n [DRY RUN] First day preview:'); console.log(JSON.stringify(translated.days[0], null, 2)); return; } fs.writeFileSync(targetPath, JSON.stringify(translated, null, 2), 'utf-8'); console.log(` ✅ Written: ${targetFile}`); } async function main() { const args = process.argv.slice(2); const dryRun = args.includes('--dry-run'); const tracksArg = args.find(a => a.startsWith('--tracks='))?.split('=')[1]; const langsArg = args.find(a => a.startsWith('--langs='))?.split('=')[1]; const targetLangs = (langsArg?.split(',') ?? TARGET_LANGS) as TargetLang[]; const sourceTracks = tracksArg?.split(',').map(t => `${t}.json`) ?? fs.readdirSync(CONTENT_DIR).filter(f => f.endsWith('-FR.json')); const apiKey = process.env.OPENAI_API_KEY; if (!apiKey) throw new Error('OPENAI_API_KEY is required'); const client = new OpenAI({ apiKey }); console.log(`🌍 Translating ${sourceTracks.length} tracks → ${targetLangs.join(', ')}${dryRun ? ' [DRY RUN]' : ''}`); console.log(`Source tracks: ${sourceTracks.join(', ')}\n`); for (const sourceFile of sourceTracks) { for (const lang of targetLangs) { await translateTrack(client, sourceFile, lang, dryRun); } } console.log('\n🎉 Translation complete!'); if (!dryRun) { console.log('Next step: run the seeder to load translated tracks into DB'); console.log(' pnpm --filter @repo/database seed'); } } main().catch(err => { console.error('❌ Translation failed:', err.message); process.exit(1); });