Talha812 commited on
Commit
12e9c1c
·
verified ·
1 Parent(s): 8c809a4

Update src/modules/voice/voice.service.ts

Browse files
Files changed (1) hide show
  1. src/modules/voice/voice.service.ts +139 -139
src/modules/voice/voice.service.ts CHANGED
@@ -1,137 +1,3 @@
1
- // import { config } from "@/config";
2
- // import * as googleTTS from 'google-tts-api';
3
- // import axios from 'axios';
4
- // import FormData from 'form-data';
5
-
6
- // export class VoiceService {
7
-
8
- // async transcribeAudio(audioBuffer: Buffer): Promise<any> {
9
- // try {
10
- // // Detect file type from magic bytes
11
- // const ext = this.detectAudioType(audioBuffer);
12
- // console.log(`[STT] Received ${audioBuffer.length} bytes, detected type: ${ext}`);
13
-
14
- // const form = new FormData();
15
- // form.append('file', audioBuffer, {
16
- // filename: `audio.${ext}`,
17
- // contentType: this.getMimeType(ext),
18
- // });
19
- // form.append('model', 'whisper-large-v3');
20
- // // Remove hardcoded 'ur' to allow auto-detection for English/Urdu bilingual support
21
- // // form.append('language', 'ur');
22
- // form.append('temperature', '0');
23
- // form.append('response_format', 'json');
24
-
25
- // const response = await axios.post(
26
- // 'https://api.groq.com/openai/v1/audio/transcriptions',
27
- // form,
28
- // {
29
- // headers: {
30
- // 'Authorization': `Bearer ${config.llmModel.groqApiKey}`,
31
- // ...form.getHeaders(),
32
- // },
33
- // maxContentLength: Infinity,
34
- // maxBodyLength: Infinity,
35
- // }
36
- // );
37
-
38
- // const text = response.data?.text || '';
39
-
40
- // if (!text.trim()) {
41
- // console.warn('[STT] Empty transcription returned');
42
- // return { error: "STT returned empty text" };
43
- // }
44
-
45
- // console.log(`\n\x1b[32m========== USER INPUT ==========\x1b[0m`);
46
- // console.log(`\x1b[32m[User]: ${text}\x1b[0m`);
47
- // console.log(`\x1b[32m================================\x1b[0m\n`);
48
-
49
- // return { text };
50
-
51
- // } catch (error: any) {
52
- // if (error.response) {
53
- // console.error(`[STT] Groq API Error ${error.response.status}:`, error.response.data);
54
- // return { error: "STT Failed", status: error.response.status, details: error.response.data };
55
- // }
56
- // console.error("Groq Transcribe Error:", error.message || error);
57
- // return { error: "STT Failed", details: error.message || String(error) };
58
- // }
59
- // }
60
-
61
- // private detectAudioType(buffer: Buffer): string {
62
- // // WebM magic bytes: 1A 45 DF A3
63
- // if (buffer.length >= 4 && buffer[0] === 0x1A && buffer[1] === 0x45 && buffer[2] === 0xDF && buffer[3] === 0xA3) {
64
- // return 'webm';
65
- // }
66
- // // WAV magic bytes: RIFF....WAVE
67
- // if (buffer.length >= 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE') {
68
- // return 'wav';
69
- // }
70
- // // MP4/M4A magic bytes: ....ftyp
71
- // if (buffer.length >= 8 && buffer.toString('ascii', 4, 8) === 'ftyp') {
72
- // return 'm4a';
73
- // }
74
- // // OGG magic bytes: OggS
75
- // if (buffer.length >= 4 && buffer.toString('ascii', 0, 4) === 'OggS') {
76
- // return 'ogg';
77
- // }
78
- // // Default to webm (most common from browser)
79
- // return 'webm';
80
- // }
81
-
82
- // private getMimeType(ext: string): string {
83
- // const mimeTypes: Record<string, string> = {
84
- // 'webm': 'audio/webm',
85
- // 'wav': 'audio/wav',
86
- // 'm4a': 'audio/mp4',
87
- // 'ogg': 'audio/ogg',
88
- // 'mp3': 'audio/mpeg',
89
- // };
90
- // return mimeTypes[ext] || 'application/octet-stream';
91
- // }
92
-
93
- // async generateTTSAudio(text: string): Promise<Buffer> {
94
- // try {
95
- // console.log(`[TTS] Requesting Google TTS for: "${text.substring(0, 30)}..." (Length: ${text.length})`);
96
-
97
- // // Detect if Urdu script present to set language
98
- // const isUrdu = /[\u0600-\u06FF]/.test(text);
99
- // const lang = isUrdu ? 'ur' : 'en';
100
-
101
- // // Split text into chunks of 200 characters (Google TTS limit)
102
- // const chunks = googleTTS.getAllAudioUrls(text, {
103
- // lang: lang,
104
- // slow: false,
105
- // host: 'https://translate.google.com',
106
- // splitPunct: '. ',
107
- // });
108
-
109
- // console.log(`[TTS] Fetching ${chunks.length} audio chunks from Google...`);
110
-
111
- // const bufferChunks: Buffer[] = [];
112
- // // Run requests in parallel to reduce delay
113
- // const requests = chunks.map(chunk =>
114
- // axios.get(chunk.url, { responseType: 'arraybuffer' })
115
- // );
116
-
117
- // const responses = await Promise.all(requests);
118
- // for (const response of responses) {
119
- // bufferChunks.push(Buffer.from(response.data));
120
- // }
121
-
122
- // const finalBuffer = Buffer.concat(bufferChunks);
123
- // console.log(`[TTS] Successfully received and concatenated ${chunks.length} chunks (${finalBuffer.byteLength} bytes)`);
124
- // return finalBuffer;
125
-
126
- // } catch (error: any) {
127
- // console.error("Google TTS Service Error:", error.message || error);
128
- // throw error;
129
- // }
130
- // }
131
- // }
132
-
133
-
134
-
135
  import { config } from "@/config";
136
  import * as googleTTS from 'google-tts-api';
137
  import axios from 'axios';
@@ -150,7 +16,7 @@ export class VoiceService {
150
  filename: `audio.${ext}`,
151
  contentType: this.getMimeType(ext),
152
  });
153
- form.append('model', 'whisper-large-v3-turbo');
154
  // Remove hardcoded 'ur' to allow auto-detection for English/Urdu bilingual support
155
  // form.append('language', 'ur');
156
  form.append('temperature', '0');
@@ -224,14 +90,13 @@ export class VoiceService {
224
  return mimeTypes[ext] || 'application/octet-stream';
225
  }
226
 
227
- async generateTTSAudio(text: string, language?: string): Promise<Buffer> {
228
  try {
229
  console.log(`[TTS] Requesting Google TTS for: "${text.substring(0, 30)}..." (Length: ${text.length})`);
230
 
231
- // Use explicit language if provided, otherwise auto-detect from text
232
  const isUrdu = /[\u0600-\u06FF]/.test(text);
233
- const lang = language || (isUrdu ? 'ur' : 'en');
234
- console.log(`[TTS] Language: ${lang} (explicit: ${!!language}, urduScript: ${isUrdu})`);
235
 
236
  // Split text into chunks of 200 characters (Google TTS limit)
237
  const chunks = googleTTS.getAllAudioUrls(text, {
@@ -264,3 +129,138 @@ export class VoiceService {
264
  }
265
  }
266
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import { config } from "@/config";
2
  import * as googleTTS from 'google-tts-api';
3
  import axios from 'axios';
 
16
  filename: `audio.${ext}`,
17
  contentType: this.getMimeType(ext),
18
  });
19
+ form.append('model', 'whisper-large-v3');
20
  // Remove hardcoded 'ur' to allow auto-detection for English/Urdu bilingual support
21
  // form.append('language', 'ur');
22
  form.append('temperature', '0');
 
90
  return mimeTypes[ext] || 'application/octet-stream';
91
  }
92
 
93
+ async generateTTSAudio(text: string): Promise<Buffer> {
94
  try {
95
  console.log(`[TTS] Requesting Google TTS for: "${text.substring(0, 30)}..." (Length: ${text.length})`);
96
 
97
+ // Detect if Urdu script present to set language
98
  const isUrdu = /[\u0600-\u06FF]/.test(text);
99
+ const lang = isUrdu ? 'ur' : 'en';
 
100
 
101
  // Split text into chunks of 200 characters (Google TTS limit)
102
  const chunks = googleTTS.getAllAudioUrls(text, {
 
129
  }
130
  }
131
  }
132
+
133
+
134
+
135
+ // import { config } from "@/config";
136
+ // import * as googleTTS from 'google-tts-api';
137
+ // import axios from 'axios';
138
+ // import FormData from 'form-data';
139
+
140
+ // export class VoiceService {
141
+
142
+ // async transcribeAudio(audioBuffer: Buffer): Promise<any> {
143
+ // try {
144
+ // // Detect file type from magic bytes
145
+ // const ext = this.detectAudioType(audioBuffer);
146
+ // console.log(`[STT] Received ${audioBuffer.length} bytes, detected type: ${ext}`);
147
+
148
+ // const form = new FormData();
149
+ // form.append('file', audioBuffer, {
150
+ // filename: `audio.${ext}`,
151
+ // contentType: this.getMimeType(ext),
152
+ // });
153
+ // form.append('model', 'whisper-large-v3-turbo');
154
+ // // Remove hardcoded 'ur' to allow auto-detection for English/Urdu bilingual support
155
+ // // form.append('language', 'ur');
156
+ // form.append('temperature', '0');
157
+ // form.append('response_format', 'json');
158
+
159
+ // const response = await axios.post(
160
+ // 'https://api.groq.com/openai/v1/audio/transcriptions',
161
+ // form,
162
+ // {
163
+ // headers: {
164
+ // 'Authorization': `Bearer ${config.llmModel.groqApiKey}`,
165
+ // ...form.getHeaders(),
166
+ // },
167
+ // maxContentLength: Infinity,
168
+ // maxBodyLength: Infinity,
169
+ // }
170
+ // );
171
+
172
+ // const text = response.data?.text || '';
173
+
174
+ // if (!text.trim()) {
175
+ // console.warn('[STT] Empty transcription returned');
176
+ // return { error: "STT returned empty text" };
177
+ // }
178
+
179
+ // console.log(`\n\x1b[32m========== USER INPUT ==========\x1b[0m`);
180
+ // console.log(`\x1b[32m[User]: ${text}\x1b[0m`);
181
+ // console.log(`\x1b[32m================================\x1b[0m\n`);
182
+
183
+ // return { text };
184
+
185
+ // } catch (error: any) {
186
+ // if (error.response) {
187
+ // console.error(`[STT] Groq API Error ${error.response.status}:`, error.response.data);
188
+ // return { error: "STT Failed", status: error.response.status, details: error.response.data };
189
+ // }
190
+ // console.error("Groq Transcribe Error:", error.message || error);
191
+ // return { error: "STT Failed", details: error.message || String(error) };
192
+ // }
193
+ // }
194
+
195
+ // private detectAudioType(buffer: Buffer): string {
196
+ // // WebM magic bytes: 1A 45 DF A3
197
+ // if (buffer.length >= 4 && buffer[0] === 0x1A && buffer[1] === 0x45 && buffer[2] === 0xDF && buffer[3] === 0xA3) {
198
+ // return 'webm';
199
+ // }
200
+ // // WAV magic bytes: RIFF....WAVE
201
+ // if (buffer.length >= 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE') {
202
+ // return 'wav';
203
+ // }
204
+ // // MP4/M4A magic bytes: ....ftyp
205
+ // if (buffer.length >= 8 && buffer.toString('ascii', 4, 8) === 'ftyp') {
206
+ // return 'm4a';
207
+ // }
208
+ // // OGG magic bytes: OggS
209
+ // if (buffer.length >= 4 && buffer.toString('ascii', 0, 4) === 'OggS') {
210
+ // return 'ogg';
211
+ // }
212
+ // // Default to webm (most common from browser)
213
+ // return 'webm';
214
+ // }
215
+
216
+ // private getMimeType(ext: string): string {
217
+ // const mimeTypes: Record<string, string> = {
218
+ // 'webm': 'audio/webm',
219
+ // 'wav': 'audio/wav',
220
+ // 'm4a': 'audio/mp4',
221
+ // 'ogg': 'audio/ogg',
222
+ // 'mp3': 'audio/mpeg',
223
+ // };
224
+ // return mimeTypes[ext] || 'application/octet-stream';
225
+ // }
226
+
227
+ // async generateTTSAudio(text: string, language?: string): Promise<Buffer> {
228
+ // try {
229
+ // console.log(`[TTS] Requesting Google TTS for: "${text.substring(0, 30)}..." (Length: ${text.length})`);
230
+
231
+ // // Use explicit language if provided, otherwise auto-detect from text
232
+ // const isUrdu = /[\u0600-\u06FF]/.test(text);
233
+ // const lang = language || (isUrdu ? 'ur' : 'en');
234
+ // console.log(`[TTS] Language: ${lang} (explicit: ${!!language}, urduScript: ${isUrdu})`);
235
+
236
+ // // Split text into chunks of 200 characters (Google TTS limit)
237
+ // const chunks = googleTTS.getAllAudioUrls(text, {
238
+ // lang: lang,
239
+ // slow: false,
240
+ // host: 'https://translate.google.com',
241
+ // splitPunct: '. ',
242
+ // });
243
+
244
+ // console.log(`[TTS] Fetching ${chunks.length} audio chunks from Google...`);
245
+
246
+ // const bufferChunks: Buffer[] = [];
247
+ // // Run requests in parallel to reduce delay
248
+ // const requests = chunks.map(chunk =>
249
+ // axios.get(chunk.url, { responseType: 'arraybuffer' })
250
+ // );
251
+
252
+ // const responses = await Promise.all(requests);
253
+ // for (const response of responses) {
254
+ // bufferChunks.push(Buffer.from(response.data));
255
+ // }
256
+
257
+ // const finalBuffer = Buffer.concat(bufferChunks);
258
+ // console.log(`[TTS] Successfully received and concatenated ${chunks.length} chunks (${finalBuffer.byteLength} bytes)`);
259
+ // return finalBuffer;
260
+
261
+ // } catch (error: any) {
262
+ // console.error("Google TTS Service Error:", error.message || error);
263
+ // throw error;
264
+ // }
265
+ // }
266
+ // }