Spaces:
Paused
Paused
File size: 6,539 Bytes
af750ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | const express = require('express');
const WebSocket = require('ws');
const crypto = require('crypto');
const fs = require('fs');
const path = require('path');
const ffmpeg = require('fluent-ffmpeg');
const app = express();
const PORT = 7860;
// Konfigurasi TTS
const TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
const GEC_VERSION = "1-143.0.3650.75";
const WSS_URL = "wss://speech.text-to-speech.online/consumer/speech/synthesize/readaloud/edge/v1";
// Pastikan folder temp ada
const tempDir = path.join(__dirname, 'temp');
if (!fs.existsSync(tempDir)) fs.mkdirSync(tempDir);
/**
* RE Logics
*/
function generateMSHash() {
const WIN_EPOCH = 11644473600n;
const S_TO_NS = 1000000000n;
let ticks = BigInt(Math.floor(Date.now() / 1000));
ticks += WIN_EPOCH;
ticks -= (ticks % 300n);
ticks *= (S_TO_NS / 100n);
const strToHash = `${ticks}${TRUSTED_CLIENT_TOKEN}`;
return crypto.createHash('sha256').update(strToHash).digest('hex').toUpperCase();
}
function createGuid() {
return crypto.randomBytes(16).toString('hex').toUpperCase();
}
/**
* Core TTS Synthesis
*/
function synthesize(text, voice, speed) {
return new Promise((resolve, reject) => {
const requestId = createGuid();
const connectionId = createGuid();
const gec = generateMSHash();
const fullUrl = `${WSS_URL}?TrustedClientToken=${TRUSTED_CLIENT_TOKEN}&Sec-MS-GEC=${gec}&Sec-MS-GEC-Version=${GEC_VERSION}&Authorization=bearer%20undefined&ConnectionId=${connectionId}`;
const ws = new WebSocket(fullUrl, {
origin: 'https://www.text-to-speech.online',
headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36' }
});
let audioChunks = [];
const ttsRate = speed >= 1 ? `+${(speed - 1) * 100}%` : `-${(1 - speed) * 100}%`;
ws.on('open', () => {
ws.send(`Path: speech.config\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/json\r\n\r\n{"context":{"system":{"name":"SpeechSDK","version":"1.19.0","build":"JavaScript","lang":"JavaScript"},"os":{"platform":"Browser/Win32","name":"Chrome","version":"120.0.0.0"}}}`);
ws.send(`Path: synthesis.context\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/json\r\n\r\n{"synthesis":{"audio":{"metadataOptions":{"bookmarkEnabled":false,"sentenceBoundaryEnabled":false,"visemeEnabled":false,"wordBoundaryEnabled":false},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"},"language":{"autoDetection":false}}}`);
ws.send(`Path: ssml\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/ssml+xml\r\n\r\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="${voice}"><prosody rate="${ttsRate}" pitch="0%">${text}</prosody></voice></speak>`);
});
ws.on('message', (data, isBinary) => {
if (isBinary) {
const separator = Buffer.from('Path:audio\r\n');
const index = data.indexOf(separator);
if (index !== -1) audioChunks.push(data.slice(index + separator.length));
} else if (data.toString().includes('turn.end')) {
ws.close();
resolve(Buffer.concat(audioChunks));
}
});
ws.on('error', reject);
});
}
/**
* Express Routes
*/
// UI Sederhana
app.get('/', (req, res) => {
res.send(`
<html>
<head><title>Edge TTS Mixer</title></head>
<body style="font-family:sans-serif; max-width:500px; margin:50px auto;">
<h2>Edge TTS Web UI</h2>
<form action="/generate" method="GET">
<textarea name="text" style="width:100%" rows="4" placeholder="Masukkan teks..."></textarea><br><br>
Kecepatan: <input type="number" name="speed" value="1.0" step="0.1" min="0.5" max="2.0"><br><br>
Pakai Backsound: <input type="checkbox" name="use_bg" value="true" checked><br><br>
Volume Backsound (0.1 - 1.0): <input type="number" name="bg_vol" value="0.3" step="0.1"><br><br>
<button type="submit" style="padding:10px 20px">Generate & Download</button>
</form>
</body>
</html>
`);
});
// Endpoint Generate
app.get('/generate', async (req, res) => {
const text = req.query.text || "Halo selamat datang";
const speed = parseFloat(req.query.speed) || 1.0;
const useBg = req.query.use_bg === 'true';
const bgVol = parseFloat(req.query.bg_vol) || 0.3;
const voice = "id-ID-ArdiNeural";
const jobId = Date.now();
const vocalPath = path.join(tempDir, `vocal_${jobId}.mp3`);
const finalPath = path.join(tempDir, `final_${jobId}.mp3`);
const backsoundPath = path.join(__dirname, 'backsound.mp3');
try {
// 1. Ambil Vokal dari TTS
const audioBuffer = await synthesize(text, voice, speed);
fs.writeFileSync(vocalPath, audioBuffer);
if (useBg && fs.existsSync(backsoundPath)) {
// 2. Mix dengan Backsound
ffmpeg()
.input(vocalPath)
.input(backsoundPath)
.complexFilter([
{ filter: 'volume', options: { volume: bgVol }, inputs: '1:a', outputs: 'bg' },
{ filter: 'amix', options: { inputs: 2, duration: 'first' }, inputs: ['0:a', 'bg'], outputs: 'out' }
])
.map('out')
.audioCodec('libmp3lame')
.on('error', (err) => res.status(500).send("FFmpeg Error: " + err.message))
.on('end', () => {
res.download(finalPath, "audio_result.mp3", () => {
// Cleanup
if (fs.existsSync(vocalPath)) fs.unlinkSync(vocalPath);
if (fs.existsSync(finalPath)) fs.unlinkSync(finalPath);
});
})
.save(finalPath);
} else {
// 3. Tanpa Backsound
res.download(vocalPath, "audio_vocal.mp3", () => {
if (fs.existsSync(vocalPath)) fs.unlinkSync(vocalPath);
});
}
} catch (err) {
res.status(500).send("Error: " + err.message);
}
});
app.listen(PORT, () => {
console.log(`Server berjalan di http://localhost:${PORT}`);
}); |