File size: 6,539 Bytes
af750ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
const express = require('express');
const WebSocket = require('ws');
const crypto = require('crypto');
const fs = require('fs');
const path = require('path');
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const PORT = 7860;

// Konfigurasi TTS
const TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
const GEC_VERSION = "1-143.0.3650.75";
const WSS_URL = "wss://speech.text-to-speech.online/consumer/speech/synthesize/readaloud/edge/v1";

// Pastikan folder temp ada
const tempDir = path.join(__dirname, 'temp');
if (!fs.existsSync(tempDir)) fs.mkdirSync(tempDir);

/** 
 * RE Logics 
 */
function generateMSHash() {
    const WIN_EPOCH = 11644473600n;
    const S_TO_NS = 1000000000n;
    let ticks = BigInt(Math.floor(Date.now() / 1000));
    ticks += WIN_EPOCH;
    ticks -= (ticks % 300n);
    ticks *= (S_TO_NS / 100n);
    const strToHash = `${ticks}${TRUSTED_CLIENT_TOKEN}`;
    return crypto.createHash('sha256').update(strToHash).digest('hex').toUpperCase();
}

function createGuid() {
    return crypto.randomBytes(16).toString('hex').toUpperCase();
}

/**
 * Core TTS Synthesis
 */
function synthesize(text, voice, speed) {
    return new Promise((resolve, reject) => {
        const requestId = createGuid();
        const connectionId = createGuid();
        const gec = generateMSHash();
        const fullUrl = `${WSS_URL}?TrustedClientToken=${TRUSTED_CLIENT_TOKEN}&Sec-MS-GEC=${gec}&Sec-MS-GEC-Version=${GEC_VERSION}&Authorization=bearer%20undefined&ConnectionId=${connectionId}`;

        const ws = new WebSocket(fullUrl, {
            origin: 'https://www.text-to-speech.online',
            headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36' }
        });

        let audioChunks = [];
        const ttsRate = speed >= 1 ? `+${(speed - 1) * 100}%` : `-${(1 - speed) * 100}%`;

        ws.on('open', () => {
            ws.send(`Path: speech.config\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/json\r\n\r\n{"context":{"system":{"name":"SpeechSDK","version":"1.19.0","build":"JavaScript","lang":"JavaScript"},"os":{"platform":"Browser/Win32","name":"Chrome","version":"120.0.0.0"}}}`);
            ws.send(`Path: synthesis.context\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/json\r\n\r\n{"synthesis":{"audio":{"metadataOptions":{"bookmarkEnabled":false,"sentenceBoundaryEnabled":false,"visemeEnabled":false,"wordBoundaryEnabled":false},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"},"language":{"autoDetection":false}}}`);
            ws.send(`Path: ssml\r\nX-RequestId: ${requestId}\r\nX-Timestamp: ${new Date().toISOString()}\r\nContent-Type: application/ssml+xml\r\n\r\n<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="${voice}"><prosody rate="${ttsRate}" pitch="0%">${text}</prosody></voice></speak>`);
        });

        ws.on('message', (data, isBinary) => {
            if (isBinary) {
                const separator = Buffer.from('Path:audio\r\n');
                const index = data.indexOf(separator);
                if (index !== -1) audioChunks.push(data.slice(index + separator.length));
            } else if (data.toString().includes('turn.end')) {
                ws.close();
                resolve(Buffer.concat(audioChunks));
            }
        });
        ws.on('error', reject);
    });
}

/**
 * Express Routes
 */

// UI Sederhana
app.get('/', (req, res) => {
    res.send(`
        <html>
            <head><title>Edge TTS Mixer</title></head>
            <body style="font-family:sans-serif; max-width:500px; margin:50px auto;">
                <h2>Edge TTS Web UI</h2>
                <form action="/generate" method="GET">
                    <textarea name="text" style="width:100%" rows="4" placeholder="Masukkan teks..."></textarea><br><br>
                    Kecepatan: <input type="number" name="speed" value="1.0" step="0.1" min="0.5" max="2.0"><br><br>
                    Pakai Backsound: <input type="checkbox" name="use_bg" value="true" checked><br><br>
                    Volume Backsound (0.1 - 1.0): <input type="number" name="bg_vol" value="0.3" step="0.1"><br><br>
                    <button type="submit" style="padding:10px 20px">Generate & Download</button>
                </form>
            </body>
        </html>
    `);
});

// Endpoint Generate
app.get('/generate', async (req, res) => {
    const text = req.query.text || "Halo selamat datang";
    const speed = parseFloat(req.query.speed) || 1.0;
    const useBg = req.query.use_bg === 'true';
    const bgVol = parseFloat(req.query.bg_vol) || 0.3;
    const voice = "id-ID-ArdiNeural";

    const jobId = Date.now();
    const vocalPath = path.join(tempDir, `vocal_${jobId}.mp3`);
    const finalPath = path.join(tempDir, `final_${jobId}.mp3`);
    const backsoundPath = path.join(__dirname, 'backsound.mp3');

    try {
        // 1. Ambil Vokal dari TTS
        const audioBuffer = await synthesize(text, voice, speed);
        fs.writeFileSync(vocalPath, audioBuffer);

        if (useBg && fs.existsSync(backsoundPath)) {
            // 2. Mix dengan Backsound
            ffmpeg()
                .input(vocalPath)
                .input(backsoundPath)
                .complexFilter([
                    { filter: 'volume', options: { volume: bgVol }, inputs: '1:a', outputs: 'bg' },
                    { filter: 'amix', options: { inputs: 2, duration: 'first' }, inputs: ['0:a', 'bg'], outputs: 'out' }
                ])
                .map('out')
                .audioCodec('libmp3lame')
                .on('error', (err) => res.status(500).send("FFmpeg Error: " + err.message))
                .on('end', () => {
                    res.download(finalPath, "audio_result.mp3", () => {
                        // Cleanup
                        if (fs.existsSync(vocalPath)) fs.unlinkSync(vocalPath);
                        if (fs.existsSync(finalPath)) fs.unlinkSync(finalPath);
                    });
                })
                .save(finalPath);
        } else {
            // 3. Tanpa Backsound
            res.download(vocalPath, "audio_vocal.mp3", () => {
                if (fs.existsSync(vocalPath)) fs.unlinkSync(vocalPath);
            });
        }

    } catch (err) {
        res.status(500).send("Error: " + err.message);
    }
});

app.listen(PORT, () => {
    console.log(`Server berjalan di http://localhost:${PORT}`);
});