Spaces:
Running
A newer version of the Gradio SDK is available:
6.6.0
π NuralVoiceSTT API Documentation
Developed by Blink Digital
Complete API documentation for integrating NuralVoiceSTT into your applications.
π‘ API Endpoints
WebSocket API (Real-time Streaming)
Endpoint: wss://ashishkblink-NuralVoice.hf.space/ws/transcribe
Protocol: WebSocket (WSS for secure connection)
Best for: Real-time audio streaming, live transcription, low-latency applications
π― Quick Start
Prerequisites
- Node.js 14+ installed
- WebSocket library (
wspackage) - Audio capture capability (microphone or audio file)
Installation
npm install ws
π Node.js Examples
Example 1: Real-time Microphone Streaming
const WebSocket = require('ws');
const { spawn } = require('child_process');
// WebSocket URL
const WS_URL = 'wss://ashishkblink-NuralVoice.hf.space/ws/transcribe';
// Connect to WebSocket
const ws = new WebSocket(WS_URL);
ws.on('open', () => {
console.log('β
Connected to NuralVoiceSTT API');
// Start recording from microphone using arecord (Linux) or sox (macOS/Linux)
// For macOS, you might need: brew install sox
const recorder = spawn('sox', [
'-d', // Default audio device (microphone)
'-t', 'raw', // Raw audio format
'-r', '16000', // Sample rate: 16kHz
'-c', '1', // Channels: mono
'-b', '16', // Bit depth: 16-bit
'-e', 'signed-integer', // Encoding
'-' // Output to stdout
]);
// Send audio chunks to WebSocket
recorder.stdout.on('data', (chunk) => {
if (ws.readyState === WebSocket.OPEN) {
ws.send(chunk);
}
});
recorder.on('error', (error) => {
console.error('Recording error:', error);
});
// Stop recording after 10 seconds (example)
setTimeout(() => {
recorder.kill();
ws.send(JSON.stringify({ action: 'stop' }));
}, 10000);
});
ws.on('message', (data) => {
try {
const message = JSON.parse(data.toString());
if (message.status === 'connected') {
console.log('π‘ Ready:', message.message);
} else if (message.text) {
if (message.is_final) {
console.log('β
Final:', message.text);
} else if (message.is_partial) {
console.log('β³ Partial:', message.text);
} else {
console.log('π Text:', message.text);
}
} else if (message.error) {
console.error('β Error:', message.error);
}
} catch (e) {
console.error('Parse error:', e);
}
});
ws.on('error', (error) => {
console.error('WebSocket error:', error);
});
ws.on('close', () => {
console.log('π Disconnected from API');
});
Example 2: Audio File Transcription
const WebSocket = require('ws');
const fs = require('fs');
const WS_URL = 'wss://ashishkblink-NuralVoice.hf.space/ws/transcribe';
const AUDIO_FILE = 'audio.wav'; // Your audio file path
// Connect to WebSocket
const ws = new WebSocket(WS_URL);
let transcription = '';
ws.on('open', () => {
console.log('β
Connected to NuralVoiceSTT API');
// Read audio file
const audioBuffer = fs.readFileSync(AUDIO_FILE);
// Convert to 16-bit PCM if needed
// Note: This assumes the file is already in 16kHz, 16-bit, mono PCM format
// You may need to convert your audio file first using ffmpeg:
// ffmpeg -i input.mp3 -ar 16000 -ac 1 -f s16le output.raw
// Send audio in chunks (4000 bytes = ~0.25 seconds at 16kHz)
const chunkSize = 4000;
let offset = 0;
const sendChunk = () => {
if (offset < audioBuffer.length && ws.readyState === WebSocket.OPEN) {
const chunk = audioBuffer.slice(offset, offset + chunkSize);
ws.send(chunk);
offset += chunkSize;
// Send next chunk after a small delay
setTimeout(sendChunk, 100);
} else {
// All chunks sent, request final result
ws.send(JSON.stringify({ action: 'stop' }));
}
};
sendChunk();
});
ws.on('message', (data) => {
try {
const message = JSON.parse(data.toString());
if (message.text) {
if (message.is_final) {
transcription += ' ' + message.text;
console.log('β
Final transcription:', transcription.trim());
} else if (message.is_partial) {
console.log('β³ Partial:', message.text);
}
} else if (message.error) {
console.error('β Error:', message.error);
}
} catch (e) {
// Handle binary data or other formats
}
});
ws.on('close', () => {
console.log('\nπ Complete Transcription:');
console.log(transcription.trim());
});
Example 3: Browser Audio Streaming (Node.js Server Proxy)
// server.js - Node.js server that proxies browser audio to HF Space
const express = require('express');
const WebSocket = require('ws');
const http = require('http');
const cors = require('cors');
const app = express();
app.use(cors());
app.use(express.json());
const server = http.createServer(app);
const wss = new WebSocket.Server({ server, path: '/ws' });
const HF_WS_URL = 'wss://ashishkblink-NuralVoice.hf.space/ws/transcribe';
wss.on('connection', (clientWs) => {
console.log('β
Client connected');
// Connect to HF Space WebSocket
const hfWs = new WebSocket(HF_WS_URL);
hfWs.on('open', () => {
console.log('β
Connected to HF Space');
clientWs.send(JSON.stringify({
type: 'status',
message: 'Connected to STT service'
}));
});
// Forward audio from client to HF Space
clientWs.on('message', (data) => {
if (hfWs.readyState === WebSocket.OPEN) {
// If data is JSON, parse it
try {
const message = JSON.parse(data.toString());
if (message.type === 'audio') {
// Convert array to buffer
const buffer = Buffer.from(message.data);
hfWs.send(buffer);
} else if (message.action === 'stop') {
hfWs.send(JSON.stringify({ action: 'stop' }));
}
} catch (e) {
// Binary data - send directly
hfWs.send(data);
}
}
});
// Forward transcription from HF Space to client
hfWs.on('message', (data) => {
try {
const message = JSON.parse(data.toString());
clientWs.send(JSON.stringify({
type: 'transcription',
text: message.text || '',
isFinal: message.is_final || false,
isPartial: message.is_partial || false
}));
} catch (e) {
// Handle non-JSON messages
}
});
hfWs.on('error', (error) => {
console.error('HF WebSocket error:', error);
clientWs.send(JSON.stringify({
type: 'error',
message: error.message
}));
});
clientWs.on('close', () => {
hfWs.close();
console.log('β Client disconnected');
});
});
const PORT = process.env.PORT || 3001;
server.listen(PORT, () => {
console.log(`π Server running on port ${PORT}`);
console.log(`π‘ WebSocket: ws://localhost:${PORT}/ws`);
});
Example 4: Complete Client-Server Application
// client-example.js - Complete example with error handling
const WebSocket = require('ws');
class NuralVoiceClient {
constructor(wsUrl = 'wss://ashishkblink-NuralVoice.hf.space/ws/transcribe') {
this.wsUrl = wsUrl;
this.ws = null;
this.isConnected = false;
this.transcription = '';
this.onTranscription = null;
this.onError = null;
}
connect() {
return new Promise((resolve, reject) => {
this.ws = new WebSocket(this.wsUrl);
this.ws.on('open', () => {
this.isConnected = true;
console.log('β
Connected to NuralVoiceSTT');
resolve();
});
this.ws.on('message', (data) => {
try {
const message = JSON.parse(data.toString());
if (message.status === 'connected') {
console.log('π‘ Ready:', message.message);
} else if (message.text) {
if (message.is_final) {
this.transcription += ' ' + message.text;
if (this.onTranscription) {
this.onTranscription(message.text, true);
}
} else if (message.is_partial) {
if (this.onTranscription) {
this.onTranscription(message.text, false);
}
}
} else if (message.error) {
console.error('β Error:', message.error);
if (this.onError) {
this.onError(message.error);
}
}
} catch (e) {
console.error('Parse error:', e);
}
});
this.ws.on('error', (error) => {
this.isConnected = false;
if (this.onError) {
this.onError(error.message);
}
reject(error);
});
this.ws.on('close', () => {
this.isConnected = false;
console.log('π Disconnected');
});
});
}
sendAudio(audioBuffer) {
if (this.ws && this.isConnected && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(audioBuffer);
return true;
}
return false;
}
stop() {
if (this.ws && this.isConnected) {
this.ws.send(JSON.stringify({ action: 'stop' }));
}
}
close() {
if (this.ws) {
this.ws.close();
}
}
getTranscription() {
return this.transcription.trim();
}
}
// Usage example
async function main() {
const client = new NuralVoiceClient();
client.onTranscription = (text, isFinal) => {
if (isFinal) {
console.log('β
Final:', text);
} else {
console.log('β³ Partial:', text);
}
};
client.onError = (error) => {
console.error('Error:', error);
};
try {
await client.connect();
// Send audio chunks (example)
// In real usage, you'd get audio from microphone or file
const audioChunk = Buffer.alloc(4000); // Example chunk
client.sendAudio(audioChunk);
// Stop after some time
setTimeout(() => {
client.stop();
console.log('π Complete:', client.getTranscription());
client.close();
}, 5000);
} catch (error) {
console.error('Connection failed:', error);
}
}
// Uncomment to run
// main();
π API Protocol
Connection
- Connect to
wss://ashishkblink-NuralVoice.hf.space/ws/transcribe - Wait for connection confirmation message
- Send audio data as binary (16-bit PCM, 16kHz, mono)
- Receive transcription results as JSON
Audio Format Requirements
- Sample Rate: 16,000 Hz (16kHz)
- Channels: Mono (1 channel)
- Bit Depth: 16-bit
- Encoding: Signed integer PCM
- Format: Raw binary data (no headers)
Converting Audio Files
Use ffmpeg to convert audio files to the required format:
# Convert MP3 to required format
ffmpeg -i input.mp3 -ar 16000 -ac 1 -f s16le output.raw
# Convert WAV to required format
ffmpeg -i input.wav -ar 16000 -ac 1 -f s16le output.raw
# Record from microphone directly
ffmpeg -f avfoundation -i ":0" -ar 16000 -ac 1 -f s16le output.raw
Message Format
Client β Server (Send Audio)
Send raw binary audio data (16-bit PCM):
ws.send(audioBuffer); // Buffer containing 16-bit PCM audio
Send stop command:
ws.send(JSON.stringify({ action: 'stop' }));
Server β Client (Receive Transcription)
Status Message:
{
"status": "connected",
"message": "Ready to receive audio. Send 16-bit PCM mono audio at 16kHz sample rate.",
"sample_rate": 16000
}
Partial Transcription:
{
"text": "hello world",
"is_final": false,
"is_partial": true
}
Final Transcription:
{
"text": "hello world",
"is_final": true,
"words": [
{
"word": "hello",
"start": 0.5,
"end": 1.2,
"conf": 0.95
},
{
"word": "world",
"start": 1.3,
"end": 2.0,
"conf": 0.92
}
]
}
Error Message:
{
"error": "Error description",
"status": "error"
}
π§ Integration Examples
Express.js Server
const express = require('express');
const WebSocket = require('ws');
const http = require('http');
const app = express();
const server = http.createServer(app);
// WebSocket endpoint
const wss = new WebSocket.Server({ server, path: '/api/transcribe' });
wss.on('connection', (ws) => {
const hfWs = new WebSocket('wss://ashishkblink-NuralVoice.hf.space/ws/transcribe');
ws.on('message', (data) => {
if (hfWs.readyState === WebSocket.OPEN) {
hfWs.send(data);
}
});
hfWs.on('message', (data) => {
ws.send(data);
});
});
server.listen(3000, () => {
console.log('Server running on port 3000');
});
React Integration
// In your React component
import { useEffect, useRef, useState } from 'react';
function SpeechToText() {
const [transcription, setTranscription] = useState('');
const wsRef = useRef(null);
useEffect(() => {
const ws = new WebSocket('wss://ashishkblink-NuralVoice.hf.space/ws/transcribe');
wsRef.current = ws;
ws.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.text) {
setTranscription(prev => prev + ' ' + data.text);
}
};
return () => ws.close();
}, []);
const sendAudio = (audioBuffer) => {
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(audioBuffer);
}
};
return <div>{transcription}</div>;
}
β οΈ Important Notes
Rate Limiting: Be mindful of API usage. Don't send too many requests simultaneously.
Connection Management: Always close WebSocket connections when done to free resources.
Error Handling: Implement proper error handling for network issues and API errors.
Audio Quality: Better audio quality = better transcription accuracy. Use noise reduction when possible.
Latency: WebSocket provides low-latency streaming. For best results, send audio in small chunks (2000-4000 bytes).
π Troubleshooting
Connection Refused
- Check if the Space is running
- Verify the WebSocket URL is correct
- Ensure you're using
wss://(secure WebSocket)
No Transcription
- Verify audio format (16kHz, 16-bit, mono PCM)
- Check if audio is being sent correctly
- Ensure WebSocket connection is open
Poor Accuracy
- Use better quality audio
- Reduce background noise
- Speak clearly and at moderate pace
π Support
For issues or questions:
- Check the Space page
- Review error messages in WebSocket responses
- Ensure your audio format matches requirements
Developed by Blink Digital | Model Repository