CognxSafeTrack commited on
Commit
abc4e24
·
1 Parent(s): 86262f4

feat: implement STT inbound audio, TTS fallback, and Interactive Lists

Browse files
apps/api/src/routes/ai.ts CHANGED
@@ -64,16 +64,22 @@ export async function aiRoutes(fastify: FastifyInstance) {
64
  });
65
 
66
  // 4. Generate TTS Audio for a lesson
67
- fastify.post('/tts', async (request) => {
68
  const bodySchema = z.object({ text: z.string() });
69
  const { text } = bodySchema.parse(request.body);
70
 
71
  console.log(`Generating TTS audio...`);
72
 
73
- const audioBuffer = await aiService.generateSpeech(text);
74
- const downloadUrl = await uploadFile(audioBuffer, `lesson-audio-${Date.now()}.mp3`, 'audio/mpeg');
75
-
76
- return { success: true, url: downloadUrl };
 
 
 
 
 
 
77
  });
78
 
79
  // 5. Transcribe audio (called by Railway worker after downloading media from Meta)
 
64
  });
65
 
66
  // 4. Generate TTS Audio for a lesson
67
+ fastify.post('/tts', async (request, reply) => {
68
  const bodySchema = z.object({ text: z.string() });
69
  const { text } = bodySchema.parse(request.body);
70
 
71
  console.log(`Generating TTS audio...`);
72
 
73
+ try {
74
+ const audioBuffer = await aiService.generateSpeech(text);
75
+ const downloadUrl = await uploadFile(audioBuffer, `lesson-audio-${Date.now()}.mp3`, 'audio/mpeg');
76
+ return { success: true, url: downloadUrl };
77
+ } catch (err: any) {
78
+ if (err?.name === 'QuotaExceededError') {
79
+ return reply.code(429).send({ error: 'quota_exceeded' });
80
+ }
81
+ throw err;
82
+ }
83
  });
84
 
85
  // 5. Transcribe audio (called by Railway worker after downloading media from Meta)
apps/api/src/routes/internal.ts CHANGED
@@ -7,7 +7,7 @@ import { WhatsAppService } from '../services/whatsapp';
7
  */
8
  export async function internalRoutes(fastify: FastifyInstance) {
9
  fastify.post<{
10
- Body: { phone: string; text: string }
11
  }>('/v1/internal/handle-message', {
12
  config: { requireAuth: true } as any,
13
  schema: {
@@ -16,18 +16,19 @@ export async function internalRoutes(fastify: FastifyInstance) {
16
  required: ['phone', 'text'],
17
  properties: {
18
  phone: { type: 'string' },
19
- text: { type: 'string' }
 
20
  }
21
  }
22
  }
23
  }, async (request, reply) => {
24
- const { phone, text } = request.body;
25
- if (!phone || !text) {
26
  return reply.code(400).send({ error: 'phone and text are required' });
27
  }
28
 
29
  // Fire and return — processing is async
30
- WhatsAppService.handleIncomingMessage(phone, text).catch((err: Error) => {
31
  request.log.error(`[INTERNAL] handleIncomingMessage error: ${err.message}`);
32
  });
33
 
 
7
  */
8
  export async function internalRoutes(fastify: FastifyInstance) {
9
  fastify.post<{
10
+ Body: { phone: string; text: string; audioUrl?: string }
11
  }>('/v1/internal/handle-message', {
12
  config: { requireAuth: true } as any,
13
  schema: {
 
16
  required: ['phone', 'text'],
17
  properties: {
18
  phone: { type: 'string' },
19
+ text: { type: 'string' },
20
+ audioUrl: { type: 'string' }
21
  }
22
  }
23
  }
24
  }, async (request, reply) => {
25
+ const { phone, text, audioUrl } = request.body;
26
+ if (!phone || text === undefined) {
27
  return reply.code(400).send({ error: 'phone and text are required' });
28
  }
29
 
30
  // Fire and return — processing is async
31
+ WhatsAppService.handleIncomingMessage(phone, text, audioUrl).catch((err: Error) => {
32
  request.log.error(`[INTERNAL] handleIncomingMessage error: ${err.message}`);
33
  });
34
 
apps/api/src/services/ai/openai-provider.ts CHANGED
@@ -79,13 +79,20 @@ export class OpenAIProvider implements LLMProvider {
79
  async generateSpeech(text: string): Promise<Buffer> {
80
  console.log('[OPENAI] Generating speech TTS...');
81
 
82
- const mp3 = await this.openai.audio.speech.create({
83
- model: 'tts-1',
84
- voice: 'alloy',
85
- input: text,
86
- });
87
-
88
- return Buffer.from(await mp3.arrayBuffer());
 
 
 
 
 
 
 
89
  }
90
  }
91
 
 
79
  async generateSpeech(text: string): Promise<Buffer> {
80
  console.log('[OPENAI] Generating speech TTS...');
81
 
82
+ try {
83
+ const mp3 = await this.openai.audio.speech.create({
84
+ model: 'tts-1',
85
+ voice: 'alloy',
86
+ input: text,
87
+ });
88
+ return Buffer.from(await mp3.arrayBuffer());
89
+ } catch (err: any) {
90
+ if (err?.status === 429 || err?.code === 'insufficient_quota') {
91
+ console.warn('[OPENAI] 429 on generateSpeech');
92
+ throw new QuotaExceededError();
93
+ }
94
+ throw err;
95
+ }
96
  }
97
  }
98
 
apps/api/src/services/whatsapp.ts CHANGED
@@ -3,9 +3,9 @@ import { scheduleMessage, enrollUser, whatsappQueue, scheduleTrackDay, scheduleI
3
  import { QuotaExceededError } from './ai/openai-provider';
4
 
5
  export class WhatsAppService {
6
- static async handleIncomingMessage(phone: string, text: string) {
7
  const normalizedText = text.trim().toUpperCase();
8
- console.log(`Received message from ${phone}: ${normalizedText}`);
9
 
10
  // 1. Find or Create User
11
  let user = await prisma.user.findUnique({ where: { phone } });
@@ -27,6 +27,20 @@ export class WhatsAppService {
27
  }
28
  }
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  // 1.5. Testing / Cheat Codes (Only for registered users)
31
  if (normalizedText === 'INSCRIPTION') {
32
  await prisma.enrollment.deleteMany({ where: { userId: user.id } });
@@ -64,8 +78,8 @@ export class WhatsAppService {
64
  }
65
 
66
  // ─── Interactive LIST action IDs ──────────────────────────────────────
67
- // Format: DAY{N}_EXERCISE | DAY{N}_REPLAY | DAY{N}_CONTINUE
68
- const dayActionMatch = normalizedText.match(/^DAY(\d+)_(EXERCISE|REPLAY|CONTINUE)$/);
69
  if (dayActionMatch) {
70
  const action = dayActionMatch[2];
71
 
@@ -87,10 +101,25 @@ export class WhatsAppService {
87
  return;
88
  } else if (action === 'EXERCISE') {
89
  await scheduleMessage(user.id, user.language === 'WOLOF'
90
- ? "🎙️ Yónnee sa tontu (audio walla texte) :"
91
  : "🎙️ Envoie ta réponse (audio ou texte) :"
92
  );
93
  return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
  // CONTINUE → fall through to SUITE logic below
96
  }
 
3
  import { QuotaExceededError } from './ai/openai-provider';
4
 
5
  export class WhatsAppService {
6
+ static async handleIncomingMessage(phone: string, text: string, audioUrl?: string) {
7
  const normalizedText = text.trim().toUpperCase();
8
+ console.log(`Received message from ${phone}: ${normalizedText} (Audio: ${audioUrl || 'N/A'})`);
9
 
10
  // 1. Find or Create User
11
  let user = await prisma.user.findUnique({ where: { phone } });
 
27
  }
28
  }
29
 
30
+ // 1.2 Log the incoming message in the DB
31
+ try {
32
+ await prisma.message.create({
33
+ data: {
34
+ content: text,
35
+ mediaUrl: audioUrl,
36
+ direction: 'INBOUND',
37
+ userId: user.id
38
+ }
39
+ });
40
+ } catch (err: any) {
41
+ console.error('[WhatsAppService] Failed to log incoming message:', err.message);
42
+ }
43
+
44
  // 1.5. Testing / Cheat Codes (Only for registered users)
45
  if (normalizedText === 'INSCRIPTION') {
46
  await prisma.enrollment.deleteMany({ where: { userId: user.id } });
 
78
  }
79
 
80
  // ─── Interactive LIST action IDs ──────────────────────────────────────
81
+ // Format: DAY{N}_EXERCISE | DAY{N}_REPLAY | DAY{N}_CONTINUE | DAY{N}_PROMPT
82
+ const dayActionMatch = normalizedText.match(/^DAY(\d+)_(EXERCISE|REPLAY|CONTINUE|PROMPT)$/);
83
  if (dayActionMatch) {
84
  const action = dayActionMatch[2];
85
 
 
101
  return;
102
  } else if (action === 'EXERCISE') {
103
  await scheduleMessage(user.id, user.language === 'WOLOF'
104
+ ? "🎙️ Yónnee sa tontu (audio walla bind) :"
105
  : "🎙️ Envoie ta réponse (audio ou texte) :"
106
  );
107
  return;
108
+ } else if (action === 'PROMPT') {
109
+ const enrollment = await prisma.enrollment.findFirst({
110
+ where: { userId: user.id, status: 'ACTIVE' }
111
+ });
112
+ if (enrollment) {
113
+ const trackDay = await prisma.trackDay.findFirst({
114
+ where: { trackId: enrollment.trackId, dayNumber: enrollment.currentDay }
115
+ });
116
+ if (trackDay?.exercisePrompt) {
117
+ await scheduleMessage(user.id, trackDay.exercisePrompt);
118
+ } else {
119
+ await scheduleMessage(user.id, user.language === 'WOLOF' ? "Amul lëjj" : "Pas d'exercice pour ce jour");
120
+ }
121
+ }
122
+ return;
123
  }
124
  // CONTINUE → fall through to SUITE logic below
125
  }
apps/whatsapp-worker/src/index.ts CHANGED
@@ -120,19 +120,26 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
120
  console.log(`[WORKER] Downloading media ${mediaId} for ${phone}...`);
121
 
122
  let transcribedText = '';
 
123
  try {
124
  const { buffer } = await downloadMedia(mediaId, accessToken);
125
 
126
- // Best-effort: store audio on R2 via the API (non-blocking, silent if fails)
127
- // No direct require — that path doesn't exist in the worker Docker image
128
  const API_URL_STORE = process.env.API_URL || 'http://localhost:3001';
129
  const apiKeyStore = process.env.ADMIN_API_KEY || '';
130
- fetch(`${API_URL_STORE}/v1/ai/store-audio`, {
131
- method: 'POST',
132
- headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKeyStore}` },
133
- body: JSON.stringify({ audioBase64: buffer.toString('base64'), mimeType, phone })
134
- }).catch(() => { /* silent audio archiving is optional */ });
135
-
 
 
 
 
 
 
 
136
 
137
  // Transcribe with Whisper via API
138
  const API_URL = process.env.API_URL || 'http://localhost:3001';
@@ -148,17 +155,15 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
148
  transcribedText = data.text || '';
149
  console.log(`[WORKER] ✅ Transcribed: "${transcribedText.substring(0, 80)}"`);
150
  } else if (transcribeRes.status === 429) {
151
- // OpenAI quota exceeded — send fallback and requeue
152
  const user = await prisma.user.findFirst({ where: { phone } });
153
  if (user) {
154
  await sendTextMessage(phone, user.language === 'WOLOF'
155
- ? "Bañ ma dégg sa kàddu léegi. Yónnee ci 2 minute."
156
- : "Je n'arrive pas à analyser l'audio maintenant. Réessaie dans 2 minutes ou envoie ta réponse en texte."
157
  );
158
  }
159
- // Requeue with 2-minute delay
160
- const requeueQ = new Queue('whatsapp-queue', { connection: connection as any });
161
- await requeueQ.add('download-media', job.data, { delay: 120_000, priority: 1 });
162
  return;
163
  }
164
  } catch (err: any) {
@@ -174,7 +179,7 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
174
  await fetch(`${API_URL}/v1/internal/handle-message`, {
175
  method: 'POST',
176
  headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
177
- body: JSON.stringify({ phone, text: transcribedText })
178
  }).catch(e => console.error('[WORKER] handle-message call failed:', e.message));
179
  }
180
  }
 
120
  console.log(`[WORKER] Downloading media ${mediaId} for ${phone}...`);
121
 
122
  let transcribedText = '';
123
+ let audioUrl = '';
124
  try {
125
  const { buffer } = await downloadMedia(mediaId, accessToken);
126
 
127
+ // Store audio on R2 via the API
 
128
  const API_URL_STORE = process.env.API_URL || 'http://localhost:3001';
129
  const apiKeyStore = process.env.ADMIN_API_KEY || '';
130
+ try {
131
+ const storeRes = await fetch(`${API_URL_STORE}/v1/ai/store-audio`, {
132
+ method: 'POST',
133
+ headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKeyStore}` },
134
+ body: JSON.stringify({ audioBase64: buffer.toString('base64'), mimeType, phone })
135
+ });
136
+ if (storeRes.ok) {
137
+ const storeData = await storeRes.json() as any;
138
+ if (storeData.url) audioUrl = storeData.url;
139
+ }
140
+ } catch (err) {
141
+ console.error('[WORKER] store-audio failed to save inbound audio archive (non-blocking)');
142
+ }
143
 
144
  // Transcribe with Whisper via API
145
  const API_URL = process.env.API_URL || 'http://localhost:3001';
 
155
  transcribedText = data.text || '';
156
  console.log(`[WORKER] ✅ Transcribed: "${transcribedText.substring(0, 80)}"`);
157
  } else if (transcribeRes.status === 429) {
158
+ // OpenAI quota exceeded — send fallback and do NOT requeue (requested by user to not block)
159
  const user = await prisma.user.findFirst({ where: { phone } });
160
  if (user) {
161
  await sendTextMessage(phone, user.language === 'WOLOF'
162
+ ? "Bañ ma dégg sa kàddu léegi. Yónnee sa tontu ci bind (texte)."
163
+ : "Je n'arrive pas à analyser l'audio maintenant. Envoie ta réponse en texte (1 phrase)."
164
  );
165
  }
166
+ // Stop processing. The user will still be PENDING in DB, waiting for text reply.
 
 
167
  return;
168
  }
169
  } catch (err: any) {
 
179
  await fetch(`${API_URL}/v1/internal/handle-message`, {
180
  method: 'POST',
181
  headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
182
+ body: JSON.stringify({ phone, text: transcribedText, audioUrl })
183
  }).catch(e => console.error('[WORKER] handle-message call failed:', e.message));
184
  }
185
  }
apps/whatsapp-worker/src/pedagogy.ts CHANGED
@@ -101,16 +101,21 @@ export async function sendLessonDay(userId: string, trackId: string, dayNumber:
101
  [{
102
  title: isWolof ? "Jëfandikoo" : "Actions",
103
  rows: [
104
- {
105
- id: `DAY${dayNumber}_EXERCISE`,
106
- title: isWolof ? "🎙️ Yónni tontu" : "🎙️ Répondre",
107
- description: isWolof ? "Yónnee sa tontu ji" : "Envoyer ta réponse à l'exercice"
108
- },
109
  {
110
  id: `DAY${dayNumber}_REPLAY`,
111
- title: isWolof ? "🔁 Dégg ci kanam" : "🔁 Réécouter",
112
  description: isWolof ? "Waxtu bi ci kaw" : "Réécouter la leçon"
113
  },
 
 
 
 
 
 
 
 
 
 
114
  {
115
  id: `DAY${dayNumber}_CONTINUE`,
116
  title: isWolof ? "⏭️ Ci kanam" : "⏭️ Passer",
 
101
  [{
102
  title: isWolof ? "Jëfandikoo" : "Actions",
103
  rows: [
 
 
 
 
 
104
  {
105
  id: `DAY${dayNumber}_REPLAY`,
106
+ title: isWolof ? "🎧 Dégg ci kanam" : "🎧 Réécouter",
107
  description: isWolof ? "Waxtu bi ci kaw" : "Réécouter la leçon"
108
  },
109
+ {
110
+ id: `DAY${dayNumber}_PROMPT`,
111
+ title: isWolof ? "📝 Lëjj bi" : "📝 Faire l'exercice",
112
+ description: isWolof ? "Jàngaat laaj bi" : "Relire la question"
113
+ },
114
+ {
115
+ id: `DAY${dayNumber}_EXERCISE`,
116
+ title: isWolof ? "🎙️ Yónni tontu" : "🎙️ Répondre",
117
+ description: isWolof ? "Yónnee sa tontu ji" : "Envoyer ta réponse"
118
+ },
119
  {
120
  id: `DAY${dayNumber}_CONTINUE`,
121
  title: isWolof ? "⏭️ Ci kanam" : "⏭️ Passer",
packages/database/prisma/schema.prisma CHANGED
@@ -113,6 +113,8 @@ model Message {
113
  userId String
114
  direction Direction // INBOUND, OUTBOUND
115
  channel String @default("WHATSAPP")
 
 
116
  payload Json? // Raw payload from provider
117
  createdAt DateTime @default(now())
118
 
 
113
  userId String
114
  direction Direction // INBOUND, OUTBOUND
115
  channel String @default("WHATSAPP")
116
+ content String?
117
+ mediaUrl String?
118
  payload Json? // Raw payload from provider
119
  createdAt DateTime @default(now())
120