INDEX / modules /AudioProcessor.js
akra35567's picture
Upload 18 files
7226ab4 verified
/**
* ═══════════════════════════════════════════════════════════════════════
* CLASSE: AudioProcessor
* ═══════════════════════════════════════════════════════════════════════
* Gerencia STT (Speech-to-Text), TTS (Text-to-Speech) e processamento de Γ‘udio
* IntegraΓ§Γ£o com Deepgram e Google TTS
* ═══════════════════════════════════════════════════════════════════════
*/
const axios = require('axios');
const fs = require('fs');
const path = require('path');
const ffmpeg = require('fluent-ffmpeg');
const googleTTS = require('google-tts-api');
const ConfigManager = require('./ConfigManager');
class AudioProcessor {
constructor(logger = null) {
this.config = ConfigManager.getInstance();
this.logger = logger || console;
this.tempFolder = this.config.TEMP_FOLDER;
this.sttCache = new Map();
this.ttsCache = new Map();
}
/**
* Gera nome de arquivo aleatΓ³rio
*/
generateRandomFilename(ext = '') {
return path.join(
this.tempFolder,
`${Date.now()}-${Math.random().toString(36).slice(2, 8)}${ext ? '.' + ext : ''}`
);
}
/**
* Limpa arquivo apΓ³s uso
*/
async cleanupFile(filePath) {
try {
if (!filePath || !fs.existsSync(filePath)) return;
return new Promise((resolve) => {
fs.unlink(filePath, (err) => {
if (err && err.code !== 'ENOENT') {
this.logger.warn(`⚠️ Erro ao limpar ${path.basename(filePath)}: ${err.code}`);
}
resolve();
});
});
} catch (e) {
this.logger.error('Erro ao limpar arquivo:', e.message);
}
}
/**
* STT usando Deepgram
* Transcreve Γ‘udio para texto
*/
async speechToText(audioBuffer, language = 'pt') {
try {
if (!this.config.DEEPGRAM_API_KEY) {
this.logger.warn('⚠️ Deepgram API Key não configurada');
return {
sucesso: false,
texto: '[Audio recebido mas Deepgram nΓ£o configurado]',
erro: 'API_KEY_MISSING'
};
}
this.logger.info('πŸ”Š Iniciando STT (Deepgram)...');
// Converte OGG para MP3
const audioPath = this.generateRandomFilename('ogg');
const convertedPath = this.generateRandomFilename('mp3');
fs.writeFileSync(audioPath, audioBuffer);
// Converte para MP3
await new Promise((resolve, reject) => {
ffmpeg(audioPath)
.toFormat('mp3')
.audioCodec('libmp3lame')
.on('end', resolve)
.on('error', reject)
.save(convertedPath);
});
const convertedBuffer = fs.readFileSync(convertedPath);
// Chama Deepgram API
this.logger.info('πŸ“€ Enviando para Deepgram...');
const response = await axios.post(
this.config.DEEPGRAM_API_URL,
convertedBuffer,
{
headers: {
'Authorization': `Token ${this.config.DEEPGRAM_API_KEY}`,
'Content-Type': 'audio/mpeg'
},
params: {
model: this.config.DEEPGRAM_MODEL,
language: language || this.config.STT_LANGUAGE,
smart_format: true,
punctuate: true,
diarize: false,
numerals: true
},
timeout: 30000
}
);
let textoTranscrito = '';
if (response.data?.results?.channels?.[0]?.alternatives?.[0]?.transcript) {
textoTranscrito = response.data.results.channels[0].alternatives[0].transcript.trim();
}
if (!textoTranscrito || textoTranscrito.length < 2) {
textoTranscrito = '[NΓ£o consegui entender claramente]';
}
// Limpeza
await Promise.all([
this.cleanupFile(audioPath),
this.cleanupFile(convertedPath)
]);
this.logger.info(`πŸ“ STT Completo: ${textoTranscrito.substring(0, 80)}...`);
return {
sucesso: true,
texto: textoTranscrito,
fonte: 'Deepgram STT',
confidence: response.data?.results?.channels?.[0]?.alternatives?.[0]?.confidence || 0
};
} catch (error) {
this.logger.error('❌ Erro STT:', error.message);
let errorCode = 'UNKNOWN';
if (error.response?.status === 401) {
errorCode = 'INVALID_API_KEY';
} else if (error.code === 'ECONNREFUSED') {
errorCode = 'CONNECTION_FAILED';
}
return {
sucesso: false,
texto: '[Recebi seu Γ‘udio mas houve um erro na transcriΓ§Γ£o]',
erro: errorCode,
mensagem: error.message
};
}
}
/**
* TTS usando Google TTS
* Converte texto para Γ‘udio
*/
async textToSpeech(text, language = 'pt') {
try {
if (!text || text.length === 0) {
return {
sucesso: false,
error: 'Texto vazio'
};
}
// Verifica cache
const cacheKey = `${text.substring(0, 50)}_${language}`;
if (this.ttsCache.has(cacheKey)) {
this.logger.debug('πŸ’Ύ TTS from cache');
return this.ttsCache.get(cacheKey);
}
this.logger.info('πŸ”Š Iniciando TTS (Google)...');
// Trunca texto se necessΓ‘rio (Google TTS tem limite)
const maxChars = 500;
const textTruncated = text.substring(0, maxChars);
const audioUrl = googleTTS.getAudioUrl(textTruncated, {
lang: language || this.config.TTS_LANGUAGE,
slow: this.config.TTS_SLOW,
host: 'https://translate.google.com'
});
const outputPath = this.generateRandomFilename('mp3');
// Download do Γ‘udio
const response = await axios({
url: audioUrl,
method: 'GET',
responseType: 'arraybuffer',
timeout: 15000
});
const audioBuffer = Buffer.from(response.data);
if (audioBuffer.length === 0) {
throw new Error('Audio buffer vazio');
}
fs.writeFileSync(outputPath, audioBuffer);
const stats = fs.statSync(outputPath);
if (stats.size > this.config.MAX_AUDIO_SIZE_MB * 1024 * 1024) {
await this.cleanupFile(outputPath);
return {
sucesso: false,
error: 'Áudio TTS muito grande'
};
}
const finalBuffer = fs.readFileSync(outputPath);
await this.cleanupFile(outputPath);
const result = {
sucesso: true,
buffer: finalBuffer,
fonte: 'Google TTS',
size: finalBuffer.length
};
// Cache
this.ttsCache.set(cacheKey, result);
if (this.ttsCache.size > 50) {
const firstKey = this.ttsCache.keys().next().value;
this.ttsCache.delete(firstKey);
}
this.logger.info(`πŸ”Š TTS Completo: ${textTruncated.substring(0, 50)}... (${finalBuffer.length} bytes)`);
return result;
} catch (error) {
this.logger.error('❌ Erro TTS:', error.message);
return {
sucesso: false,
error: 'Erro ao gerar TTS: ' + error.message
};
}
}
/**
* Detecta se Γ© Γ‘udio animado (apenas tipo)
*/
detectAudioType(buffer) {
if (!buffer || buffer.length < 12) return 'unknown';
const header = buffer.slice(0, 4).toString('hex').toLowerCase();
// OGG Vorbis
if (header === '4f676753') return 'ogg';
// RIFF (WAV)
if (header === '52494646') return 'wav';
// MP3
if (header === '494433' || header === 'fffb') return 'mp3';
// FLAC
if (header === '664c6143') return 'flac';
// AAC
if (header === 'fff1' || header === 'fff9') return 'aac';
return 'unknown';
}
/**
* Aplica efeito de Γ‘udio (nightcore, slow, bass, etc)
*/
async applyAudioEffect(inputBuffer, effect = 'normal') {
try {
const inputPath = this.generateRandomFilename('mp3');
const outputPath = this.generateRandomFilename('mp3');
fs.writeFileSync(inputPath, inputBuffer);
let audioFilter = '';
let speed = 1;
let pitch = 0;
switch (effect.toLowerCase()) {
case 'nightcore':
speed = 1.5;
pitch = 8;
audioFilter = 'asetrate=44100*1.5,atempo=1/1.5';
break;
case 'slow':
speed = 0.7;
audioFilter = 'atempo=0.7';
break;
case 'fast':
speed = 1.3;
audioFilter = 'atempo=1.3';
break;
case 'bass':
audioFilter = 'bass=g=10';
break;
case 'treble':
audioFilter = 'treble=g=10';
break;
case 'echo':
audioFilter = 'aecho=0.8:0.9:1000:0.3';
break;
default:
// normal - no filter
break;
}
if (!audioFilter) {
// Sem efeito, copia direto
await this.cleanupFile(inputPath);
return { sucesso: true, buffer: inputBuffer };
}
await new Promise((resolve, reject) => {
let cmd = ffmpeg(inputPath);
if (audioFilter) {
cmd = cmd.audioFilter(audioFilter);
}
cmd
.outputOptions('-q:a 5')
.on('end', resolve)
.on('error', reject)
.save(outputPath);
});
const resultBuffer = fs.readFileSync(outputPath);
await Promise.all([
this.cleanupFile(inputPath),
this.cleanupFile(outputPath)
]);
return {
sucesso: true,
buffer: resultBuffer,
effect: effect,
size: resultBuffer.length
};
} catch (error) {
this.logger.error(`❌ Erro ao aplicar efeito ${effect}:`, error.message);
return {
sucesso: false,
error: error.message
};
}
}
/**
* Limpa cache de TTS
*/
clearCache() {
this.sttCache.clear();
this.ttsCache.clear();
this.logger.info('πŸ’Ύ Caches de Γ‘udio limpos');
}
/**
* Retorna estatΓ­sticas
*/
getStats() {
return {
sttCacheSize: this.sttCache.size,
ttsCacheSize: this.ttsCache.size,
deepgramConfigured: !!this.config.DEEPGRAM_API_KEY,
sttEnabled: this.config.FEATURE_STT_ENABLED,
ttsEnabled: this.config.FEATURE_TTS_ENABLED
};
}
}
module.exports = AudioProcessor;