CognxSafeTrack Claude Sonnet 4.6 commited on
Commit
697c637
Β·
1 Parent(s): f786c37

feat(content): add AI track translation script for EN/ES/PT

Browse files

Adds translate-tracks.ts β€” a GPT-4o script that reads all T*-FR.json
track files and generates translated versions (T*-EN, T*-ES, T*-PT).

Translates: title, lessonText, exercisePrompt, videoCaption,
exerciseCriteria text fields, and buttonsJson titles.
Preserves: IDs, URLs, dayNumbers, weights, thresholds, badges.
Sets audioUrl=null so TTS auto-generates on first delivery.

Commands:
pnpm --filter @repo /database translate:tracks # run all
pnpm --filter @repo /database translate:tracks:dry # preview only

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

packages/database/package.json CHANGED
@@ -14,7 +14,9 @@
14
  "generate": "prisma generate",
15
  "migrate:deploy": "prisma migrate deploy",
16
  "migrate:dev": "prisma migrate dev",
17
- "validate:content": "ts-node scripts/validate-content.ts"
 
 
18
  },
19
  "prisma": {
20
  "seed": "tsx seed.ts"
@@ -27,6 +29,7 @@
27
  "@repo/tsconfig": "workspace:*",
28
  "@types/node": "^20.0.0",
29
  "dotenv": "^17.4.2",
 
30
  "prisma": "^5.0.0",
31
  "ts-node": "^10.9.2"
32
  }
 
14
  "generate": "prisma generate",
15
  "migrate:deploy": "prisma migrate deploy",
16
  "migrate:dev": "prisma migrate dev",
17
+ "validate:content": "ts-node scripts/validate-content.ts",
18
+ "translate:tracks": "tsx src/translate-tracks.ts",
19
+ "translate:tracks:dry": "tsx src/translate-tracks.ts --dry-run"
20
  },
21
  "prisma": {
22
  "seed": "tsx seed.ts"
 
29
  "@repo/tsconfig": "workspace:*",
30
  "@types/node": "^20.0.0",
31
  "dotenv": "^17.4.2",
32
+ "openai": "^4.0.0",
33
  "prisma": "^5.0.0",
34
  "ts-node": "^10.9.2"
35
  }
packages/database/src/translate-tracks.ts ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * translate-tracks.ts
3
+ *
4
+ * Translates all T*-FR.json track files into EN, ES, PT using GPT-4o.
5
+ * Preserves all non-text fields (IDs, URLs, dayNumber, weights, thresholds).
6
+ * Leaves audioUrl null so TTS is auto-generated on first lesson delivery.
7
+ *
8
+ * Usage:
9
+ * OPENAI_API_KEY=sk-... npx tsx packages/database/src/translate-tracks.ts
10
+ *
11
+ * Options:
12
+ * --tracks T1-FR,T2-FR Translate specific tracks only (default: all)
13
+ * --langs EN,ES Target languages only (default: EN,ES,PT)
14
+ * --dry-run Print first day only, no file writes
15
+ */
16
+
17
+ import fs from 'fs';
18
+ import path from 'path';
19
+ import OpenAI from 'openai';
20
+
21
+ const CONTENT_DIR = path.join(__dirname, '../content/tracks');
22
+ const TARGET_LANGS = ['EN', 'ES', 'PT'] as const;
23
+ type TargetLang = typeof TARGET_LANGS[number];
24
+
25
+ const LANG_LABELS: Record<TargetLang, string> = {
26
+ EN: 'English',
27
+ ES: 'Spanish (Latin American)',
28
+ PT: 'Portuguese (Brazilian)',
29
+ };
30
+
31
+ const LANG_SUFFIX: Record<TargetLang, string> = {
32
+ EN: 'EN',
33
+ ES: 'ES',
34
+ PT: 'PT',
35
+ };
36
+
37
+ // Fields to translate at TrackDay level
38
+ const TEXT_FIELDS_DAY = ['title', 'lessonText', 'exercisePrompt', 'videoCaption'] as const;
39
+
40
+ // Fields to translate at Track level
41
+ const TEXT_FIELDS_TRACK = ['title', 'description'] as const;
42
+
43
+ interface TranslationBatch {
44
+ key: string;
45
+ text: string;
46
+ }
47
+
48
+ async function translateBatch(
49
+ client: OpenAI,
50
+ items: TranslationBatch[],
51
+ lang: TargetLang,
52
+ context: string,
53
+ ): Promise<Record<string, string>> {
54
+ if (items.length === 0) return {};
55
+
56
+ const itemsJson = JSON.stringify(
57
+ items.map(i => ({ key: i.key, text: i.text })),
58
+ null,
59
+ 2,
60
+ );
61
+
62
+ const prompt = `You are a professional educational content translator for entrepreneurship training in West Africa.
63
+
64
+ Translate the following strings from French to ${LANG_LABELS[lang]}.
65
+
66
+ Context: ${context}
67
+
68
+ Rules:
69
+ - Keep the exact tone: direct, encouraging, coach-like
70
+ - Adapt examples to feel natural in ${LANG_LABELS[lang]}-speaking African contexts when appropriate
71
+ - Keep all formatting (βœ…, numbering, line breaks \\n\\n, bold asterisks *word*)
72
+ - Keep all placeholder patterns like [PRODUCT], [CLIENT], [LIEU] β€” translate the word inside brackets
73
+ - For "videoCaption" fields (Wolof text starting with emojis): translate to ${LANG_LABELS[lang]} keeping the emoji and structure
74
+ - Return ONLY a JSON object mapping each "key" to its translated "text"
75
+ - Do not add explanations
76
+
77
+ Items to translate:
78
+ ${itemsJson}
79
+
80
+ Return format:
81
+ {"key1": "translated text 1", "key2": "translated text 2", ...}`;
82
+
83
+ const response = await client.chat.completions.create({
84
+ model: 'gpt-4o',
85
+ messages: [{ role: 'user', content: prompt }],
86
+ response_format: { type: 'json_object' },
87
+ temperature: 0.3,
88
+ });
89
+
90
+ const content = response.choices[0]?.message?.content;
91
+ if (!content) throw new Error('Empty response from GPT-4o');
92
+ return JSON.parse(content);
93
+ }
94
+
95
+ function collectDayTexts(day: any, prefix: string): TranslationBatch[] {
96
+ const items: TranslationBatch[] = [];
97
+
98
+ for (const field of TEXT_FIELDS_DAY) {
99
+ if (day[field] && typeof day[field] === 'string') {
100
+ items.push({ key: `${prefix}.${field}`, text: day[field] });
101
+ }
102
+ }
103
+
104
+ // exerciseCriteria nested text
105
+ const crit = day.exerciseCriteria;
106
+ if (crit) {
107
+ if (crit.goal) items.push({ key: `${prefix}.criteria.goal`, text: crit.goal });
108
+ if (crit.evaluation?.examples) items.push({ key: `${prefix}.criteria.examples`, text: crit.evaluation.examples });
109
+ if (crit.remediation?.hint) items.push({ key: `${prefix}.criteria.hint`, text: crit.remediation.hint });
110
+
111
+ // mustInclude descriptions
112
+ if (Array.isArray(crit.success?.mustInclude)) {
113
+ crit.success.mustInclude.forEach((item: any, idx: number) => {
114
+ if (item.desc) {
115
+ items.push({ key: `${prefix}.criteria.must.${idx}.desc`, text: item.desc });
116
+ }
117
+ });
118
+ }
119
+ }
120
+
121
+ // buttonsJson titles (keep IDs)
122
+ if (Array.isArray(day.buttonsJson)) {
123
+ day.buttonsJson.forEach((btn: any, idx: number) => {
124
+ if (btn.title) {
125
+ items.push({ key: `${prefix}.btn.${idx}.title`, text: btn.title });
126
+ }
127
+ });
128
+ }
129
+
130
+ return items;
131
+ }
132
+
133
+ function applyTranslations(day: any, prefix: string, translations: Record<string, string>): any {
134
+ const d = { ...day };
135
+
136
+ for (const field of TEXT_FIELDS_DAY) {
137
+ const key = `${prefix}.${field}`;
138
+ if (translations[key] !== undefined) d[field] = translations[key];
139
+ }
140
+
141
+ // exerciseCriteria
142
+ if (d.exerciseCriteria) {
143
+ d.exerciseCriteria = { ...d.exerciseCriteria };
144
+ if (translations[`${prefix}.criteria.goal`]) {
145
+ d.exerciseCriteria.goal = translations[`${prefix}.criteria.goal`];
146
+ }
147
+ if (d.exerciseCriteria.evaluation) {
148
+ d.exerciseCriteria.evaluation = { ...d.exerciseCriteria.evaluation };
149
+ if (translations[`${prefix}.criteria.examples`]) {
150
+ d.exerciseCriteria.evaluation.examples = translations[`${prefix}.criteria.examples`];
151
+ }
152
+ }
153
+ if (d.exerciseCriteria.remediation) {
154
+ d.exerciseCriteria.remediation = { ...d.exerciseCriteria.remediation };
155
+ if (translations[`${prefix}.criteria.hint`]) {
156
+ d.exerciseCriteria.remediation.hint = translations[`${prefix}.criteria.hint`];
157
+ }
158
+ }
159
+ if (d.exerciseCriteria.success?.mustInclude) {
160
+ d.exerciseCriteria.success = { ...d.exerciseCriteria.success };
161
+ d.exerciseCriteria.success.mustInclude = d.exerciseCriteria.success.mustInclude.map(
162
+ (item: any, idx: number) => {
163
+ const key = `${prefix}.criteria.must.${idx}.desc`;
164
+ return translations[key] ? { ...item, desc: translations[key] } : item;
165
+ },
166
+ );
167
+ }
168
+ }
169
+
170
+ // buttonsJson
171
+ if (Array.isArray(d.buttonsJson)) {
172
+ d.buttonsJson = d.buttonsJson.map((btn: any, idx: number) => {
173
+ const key = `${prefix}.btn.${idx}.title`;
174
+ return translations[key] ? { ...btn, title: translations[key] } : btn;
175
+ });
176
+ }
177
+
178
+ // Always clear audioUrl β€” TTS auto-generates on first delivery
179
+ d.audioUrl = null;
180
+
181
+ return d;
182
+ }
183
+
184
+ async function translateTrack(
185
+ client: OpenAI,
186
+ sourceFile: string,
187
+ lang: TargetLang,
188
+ dryRun: boolean,
189
+ ): Promise<void> {
190
+ const sourcePath = path.join(CONTENT_DIR, sourceFile);
191
+ const source = JSON.parse(fs.readFileSync(sourcePath, 'utf-8'));
192
+
193
+ const sourceId = source.trackId as string; // e.g. T1-FR
194
+ const targetId = sourceId.replace('-FR', `-${LANG_SUFFIX[lang]}`);
195
+ const targetFile = sourceFile.replace('-FR.json', `-${LANG_SUFFIX[lang]}.json`);
196
+ const targetPath = path.join(CONTENT_DIR, targetFile);
197
+
198
+ console.log(`\n[${lang}] ${sourceId} β†’ ${targetId}`);
199
+
200
+ // Skip if already exists (resume support)
201
+ if (!dryRun && fs.existsSync(targetPath)) {
202
+ console.log(` ⏭ ${targetFile} already exists β€” skipping`);
203
+ return;
204
+ }
205
+
206
+ // Collect all text from all days in one big batch
207
+ const allItems: TranslationBatch[] = [];
208
+
209
+ // Track-level texts
210
+ for (const field of TEXT_FIELDS_TRACK) {
211
+ if (source[field]) allItems.push({ key: `track.${field}`, text: source[field] });
212
+ }
213
+
214
+ // Day-level texts
215
+ source.days.forEach((day: any, idx: number) => {
216
+ const dayItems = collectDayTexts(day, `day.${idx}`);
217
+ allItems.push(...dayItems);
218
+ });
219
+
220
+ const context = `Entrepreneurship education track "${source.title}" for micro-entrepreneurs in Africa. Day-by-day coaching program on how to understand, build, and pitch a business.`;
221
+
222
+ // Batch in chunks of 40 items to stay within token limits
223
+ const CHUNK = 40;
224
+ const translations: Record<string, string> = {};
225
+
226
+ for (let i = 0; i < allItems.length; i += CHUNK) {
227
+ const chunk = allItems.slice(i, i + CHUNK);
228
+ process.stdout.write(` Translating batch ${Math.floor(i / CHUNK) + 1}/${Math.ceil(allItems.length / CHUNK)}...`);
229
+ const result = await translateBatch(client, chunk, lang, context);
230
+ Object.assign(translations, result);
231
+ process.stdout.write(' βœ“\n');
232
+ }
233
+
234
+ // Build translated track
235
+ const translated = {
236
+ trackId: targetId,
237
+ title: translations['track.title'] ?? source.title,
238
+ language: lang,
239
+ description: translations['track.description'] ?? source.description,
240
+ totalDays: source.totalDays,
241
+ version: source.version,
242
+ days: source.days.map((day: any, idx: number) =>
243
+ applyTranslations(day, `day.${idx}`, translations),
244
+ ),
245
+ };
246
+
247
+ if (dryRun) {
248
+ console.log('\n [DRY RUN] First day preview:');
249
+ console.log(JSON.stringify(translated.days[0], null, 2));
250
+ return;
251
+ }
252
+
253
+ fs.writeFileSync(targetPath, JSON.stringify(translated, null, 2), 'utf-8');
254
+ console.log(` βœ… Written: ${targetFile}`);
255
+ }
256
+
257
+ async function main() {
258
+ const args = process.argv.slice(2);
259
+ const dryRun = args.includes('--dry-run');
260
+
261
+ const tracksArg = args.find(a => a.startsWith('--tracks='))?.split('=')[1];
262
+ const langsArg = args.find(a => a.startsWith('--langs='))?.split('=')[1];
263
+
264
+ const targetLangs = (langsArg?.split(',') ?? TARGET_LANGS) as TargetLang[];
265
+ const sourceTracks = tracksArg?.split(',').map(t => `${t}.json`)
266
+ ?? fs.readdirSync(CONTENT_DIR).filter(f => f.endsWith('-FR.json'));
267
+
268
+ const apiKey = process.env.OPENAI_API_KEY;
269
+ if (!apiKey) throw new Error('OPENAI_API_KEY is required');
270
+
271
+ const client = new OpenAI({ apiKey });
272
+
273
+ console.log(`🌍 Translating ${sourceTracks.length} tracks β†’ ${targetLangs.join(', ')}${dryRun ? ' [DRY RUN]' : ''}`);
274
+ console.log(`Source tracks: ${sourceTracks.join(', ')}\n`);
275
+
276
+ for (const sourceFile of sourceTracks) {
277
+ for (const lang of targetLangs) {
278
+ await translateTrack(client, sourceFile, lang, dryRun);
279
+ }
280
+ }
281
+
282
+ console.log('\nπŸŽ‰ Translation complete!');
283
+ if (!dryRun) {
284
+ console.log('Next step: run the seeder to load translated tracks into DB');
285
+ console.log(' pnpm --filter @repo/database seed');
286
+ }
287
+ }
288
+
289
+ main().catch(err => {
290
+ console.error('❌ Translation failed:', err.message);
291
+ process.exit(1);
292
+ });
pnpm-lock.yaml CHANGED
@@ -416,6 +416,9 @@ importers:
416
  dotenv:
417
  specifier: ^17.4.2
418
  version: 17.4.2
 
 
 
419
  prisma:
420
  specifier: ^5.0.0
421
  version: 5.22.0
@@ -8730,6 +8733,21 @@ snapshots:
8730
  transitivePeerDependencies:
8731
  - encoding
8732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8733
  p-limit@5.0.0:
8734
  dependencies:
8735
  yocto-queue: 1.2.2
 
416
  dotenv:
417
  specifier: ^17.4.2
418
  version: 17.4.2
419
+ openai:
420
+ specifier: ^4.0.0
421
+ version: 4.104.0(ws@8.19.0)(zod@4.3.6)
422
  prisma:
423
  specifier: ^5.0.0
424
  version: 5.22.0
 
8733
  transitivePeerDependencies:
8734
  - encoding
8735
 
8736
+ openai@4.104.0(ws@8.19.0)(zod@4.3.6):
8737
+ dependencies:
8738
+ '@types/node': 18.19.130
8739
+ '@types/node-fetch': 2.6.13
8740
+ abort-controller: 3.0.0
8741
+ agentkeepalive: 4.6.0
8742
+ form-data-encoder: 1.7.2
8743
+ formdata-node: 4.4.1
8744
+ node-fetch: 2.7.0
8745
+ optionalDependencies:
8746
+ ws: 8.19.0
8747
+ zod: 4.3.6
8748
+ transitivePeerDependencies:
8749
+ - encoding
8750
+
8751
  p-limit@5.0.0:
8752
  dependencies:
8753
  yocto-queue: 1.2.2