my-tts-player / server.py
ssasio's picture
Upload server.py
ef4b556 verified
import http.server
import socketserver
import os
import sys
import threading
import urllib.request
import urllib.parse
import json
import subprocess
import asyncio
import hashlib
import re
import tempfile
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
os.chdir(SCRIPT_DIR)
# ── Check for system ffmpeg ──
def find_ffmpeg():
"""Search for ffmpeg in the system PATH and common install locations."""
try:
result = subprocess.run(
['ffmpeg', '-version'],
capture_output=True, text=True, timeout=5
)
if result.returncode == 0:
print(" [OK] System ffmpeg found")
return 'ffmpeg'
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
# Fallback paths
for path in ['/usr/bin/ffmpeg', '/usr/local/bin/ffmpeg']:
if os.path.exists(path):
print(f" [OK] ffmpeg found: {path}")
return path
print(" [!] ffmpeg not found β€” server-side conversion will not work")
return None
FFMPEG_BIN = find_ffmpeg()
FFMPEG_DIR = os.path.join(SCRIPT_DIR, 'ffmpeg-wasm')
FFMPEG_FILES = {
'ffmpeg.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/ffmpeg@0.12.10/dist/umd/ffmpeg.js',
'ffmpeg-util.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/util@0.12.1/dist/umd/index.js',
'ffmpeg-core.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.js',
'ffmpeg-core.wasm': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.wasm',
'ffmpeg-core.worker.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.worker.js',
'814.ffmpeg.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/ffmpeg@0.12.10/dist/umd/814.ffmpeg.js',
}
# ── Edge-TTS: Azure Neural voices without an API key ──
EDGE_TTS_AVAILABLE = False
def ensure_edge_tts():
"""Install edge-tts if not already available."""
global EDGE_TTS_AVAILABLE
try:
import edge_tts
EDGE_TTS_AVAILABLE = True
print(" [OK] edge-tts is available")
return True
except ImportError:
print(" [!] edge-tts not found β€” installing...")
try:
subprocess.run(
[sys.executable, '-m', 'pip', 'install', 'edge-tts', '--quiet'],
check=True, capture_output=True
)
import edge_tts
EDGE_TTS_AVAILABLE = True
print(" [OK] edge-tts installed successfully")
return True
except Exception as e:
print(f" [!] Failed to install edge-tts: {e}")
print(" [!] Run: pip install edge-tts")
EDGE_TTS_AVAILABLE = False
return False
# Voice map: Edge voice display name β†’ edge-tts voice string
EDGE_VOICE_MAP = {
# Multilingual Azure voices
'Microsoft Ava Multilingual Online (Natural) - en-US': 'en-US-AvaMultilingualNeural',
'Microsoft Andrew Multilingual Online (Natural) - en-US': 'en-US-AndrewMultilingualNeural',
'Microsoft Emma Multilingual Online (Natural) - en-US': 'en-US-EmmaMultilingualNeural',
'Microsoft Brian Multilingual Online (Natural) - en-US': 'en-US-BrianMultilingualNeural',
'Microsoft Aria Multilingual Online (Natural) - en-US': 'en-US-AriaNeural',
'Microsoft Guy Multilingual Online (Natural) - en-US': 'en-US-GuyNeural',
'Microsoft Jenny Multilingual Online (Natural) - en-US': 'en-US-JennyNeural',
'Microsoft Davis Multilingual Online (Natural) - en-US': 'en-US-DavisNeural',
'Microsoft Jane Multilingual Online (Natural) - en-US': 'en-US-JaneNeural',
'Microsoft Jason Multilingual Online (Natural) - en-US': 'en-US-JasonNeural',
'Microsoft Sara Multilingual Online (Natural) - en-US': 'en-US-SaraNeural',
'Microsoft Tony Multilingual Online (Natural) - en-US': 'en-US-TonyNeural',
'Microsoft Nancy Multilingual Online (Natural) - en-US': 'en-US-NancyNeural',
'Microsoft Ryan Multilingual Online (Natural) - en-GB': 'en-GB-RyanNeural',
'Microsoft Sonia Multilingual Online (Natural) - en-GB': 'en-GB-SoniaNeural',
'Microsoft Libby Multilingual Online (Natural) - en-GB': 'en-GB-LibbyNeural',
'Microsoft Thomas Multilingual Online (Natural) - fr-FR': 'fr-FR-HenriNeural',
'Microsoft Henri Multilingual Online (Natural) - fr-FR': 'fr-FR-HenriNeural',
'Microsoft Denise Multilingual Online (Natural) - fr-FR': 'fr-FR-DeniseNeural',
'Microsoft Katja Multilingual Online (Natural) - de-DE': 'de-DE-KatjaNeural',
'Microsoft Seraphina Multilingual Online (Natural) - de-DE': 'de-DE-SeraphinaMultilingualNeural',
'Microsoft Serafina Online (Natural) - bg-BG': 'bg-BG-KalinaNeural',
# Fallback for unknown Multilingual voices
'_multilingual_default': 'en-US-AndrewMultilingualNeural',
# Bulgarian
'Microsoft Kalina Online (Natural) - bg-BG': 'bg-BG-KalinaNeural',
'Microsoft Boris Online (Natural) - bg-BG': 'bg-BG-BorislavNeural',
}
def resolve_edge_voice(voice_name):
"""Return the edge-tts voice string for the given Web Speech API voice name."""
if voice_name in EDGE_VOICE_MAP:
return EDGE_VOICE_MAP[voice_name]
# Try partial match
for key, val in EDGE_VOICE_MAP.items():
if key.lower() in voice_name.lower() or voice_name.lower() in key.lower():
return val
# If Multilingual β†’ use default multilingual voice
if 'multilingual' in voice_name.lower():
return EDGE_VOICE_MAP['_multilingual_default']
return 'bg-BG-KalinaNeural' # absolute fallback
def tts_cache_key(text, voice_name, rate, index=-1):
"""SHA-1 hash identical to js hashText(). If index >= 0, formats as '0042_hash'."""
raw = text + voice_name + f'{float(rate):.1f}'
hash_part = hashlib.sha1(raw.encode('utf-8')).hexdigest()[:16]
if index >= 0:
return f'{index:04d}_{hash_part}'
return hash_part
async def _synthesize_edge_tts(text, voice, rate_ratio, out_path):
"""Synthesize speech with edge-tts and save as MP3 β†’ out_path."""
import edge_tts
# edge-tts expects rate as "+20%" / "-10%" relative to normal (1.3 β†’ +30%)
pct = int(round((float(rate_ratio) - 1.0) * 100))
rate_str = f'+{pct}%' if pct >= 0 else f'{pct}%'
communicate = edge_tts.Communicate(text, voice, rate=rate_str)
await communicate.save(out_path)
def synthesize_to_cache(text, voice_name, rate, cache_dir, index=-1):
"""Synthesize text β†’ MP3 file in cache_dir. Returns the path or None on error."""
if not EDGE_TTS_AVAILABLE:
return None
key = tts_cache_key(text, voice_name, rate, index)
out_path = os.path.join(cache_dir, key + '.mp3')
if os.path.exists(out_path) and os.path.getsize(out_path) > 500:
return out_path # already cached
edge_voice = resolve_edge_voice(voice_name)
try:
# asyncio.run() is unreliable inside ThreadingTCPServer threads β€”
# create a fresh event loop per thread instead
loop = asyncio.new_event_loop()
try:
loop.run_until_complete(_synthesize_edge_tts(text, edge_voice, rate, out_path))
finally:
loop.close()
if os.path.exists(out_path) and os.path.getsize(out_path) > 500:
return out_path
except Exception as e:
print(f' [TTS] Synthesis error: {e}')
return None
# ── Audio cache auto-eviction ──
# Maximum allowed cache size in MB. Override with env var AUDIO_CACHE_MAX_MB.
AUDIO_CACHE_MAX_MB = int(os.environ.get('AUDIO_CACHE_MAX_MB', 500))
def evict_cache_if_needed(cache_dir, max_mb=None):
"""
Delete the oldest audio cache files (by modification time) until the
total cache size is below max_mb. Called automatically after every
write to audio_cache/.
Parameters
----------
cache_dir : str – path to the audio_cache directory
max_mb : int – size limit in MB (defaults to AUDIO_CACHE_MAX_MB)
"""
if max_mb is None:
max_mb = AUDIO_CACHE_MAX_MB
if not os.path.isdir(cache_dir):
return
# Collect all cache files with their sizes and modification times
entries = []
total_bytes = 0
for fname in os.listdir(cache_dir):
if not fname.endswith(('.mp3', '.wav')):
continue
fpath = os.path.join(cache_dir, fname)
try:
size = os.path.getsize(fpath)
mtime = os.path.getmtime(fpath)
entries.append((mtime, size, fpath))
total_bytes += size
except OSError:
pass # file may have been deleted by a concurrent request
limit_bytes = max_mb * 1024 * 1024
if total_bytes <= limit_bytes:
return # still within budget β€” nothing to do
# Sort oldest-first so we remove least-recently-created files first
entries.sort(key=lambda e: e[0])
freed = 0
deleted = 0
for mtime, size, fpath in entries:
if total_bytes - freed <= limit_bytes:
break
try:
os.remove(fpath)
freed += size
deleted += 1
except OSError:
pass # already gone
if deleted:
print(f' [Cache] Auto-evicted {deleted} file(s), '
f'freed {freed / 1024 / 1024:.1f} MB '
f'(limit: {max_mb} MB)')
# ── VLC Detection ──
VLC_PATHS = [
r'C:\Program Files\VideoLAN\VLC\vlc.exe',
r'C:\Program Files (x86)\VideoLAN\VLC\vlc.exe',
'/usr/bin/vlc',
'/usr/local/bin/vlc',
'/Applications/VLC.app/Contents/MacOS/VLC',
]
VLC_HTTP_PORT = 9090
VLC_PASSWORD = 'vlcpass'
vlc_process = None
def find_vlc():
for p in VLC_PATHS:
if os.path.exists(p):
return p
try:
result = subprocess.run(
['where' if sys.platform == 'win32' else 'which', 'vlc'],
capture_output=True, text=True
)
if result.returncode == 0:
return result.stdout.strip().splitlines()[0]
except:
pass
return None
VLC_EXE = find_vlc()
def download_ffmpeg_files():
os.makedirs(FFMPEG_DIR, exist_ok=True)
all_ok = all(
os.path.exists(os.path.join(FFMPEG_DIR, f)) and os.path.getsize(os.path.join(FFMPEG_DIR, f)) > 1024
for f in FFMPEG_FILES
)
if all_ok:
return True
print(" Downloading FFmpeg files (one-time download, ~25 MB)...")
for fname, url in FFMPEG_FILES.items():
dest = os.path.join(FFMPEG_DIR, fname)
if os.path.exists(dest) and os.path.getsize(dest) > 1024:
print(f" [OK] {fname} (already downloaded)")
continue
print(f" Downloading: {fname} ...", end='', flush=True)
try:
urllib.request.urlretrieve(url, dest)
size = os.path.getsize(dest)
print(f" {size//1024}KB OK")
except Exception as e:
print(f" ERROR: {e}")
return False
return True
class CORPHandler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
# ── Serve an audio cache file (WAV or MP3) ──
if self.path.startswith('/audio-cache/'):
fname = os.path.basename(self.path)
cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache')
fpath = os.path.join(cache_dir, fname)
if os.path.exists(fpath):
ext = fname.rsplit('.', 1)[-1].lower()
ctype = 'audio/mpeg' if ext == 'mp3' else 'audio/wav'
self.send_response(200)
self.send_header('Content-Type', ctype)
self.end_headers()
with open(fpath, 'rb') as f:
self.wfile.write(f.read())
else:
self.send_response(404)
self.end_headers()
return
# ── List cache contents β€” returns keys (without extension) ──
if self.path == '/audio-cache-list':
cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache')
os.makedirs(cache_dir, exist_ok=True)
files = os.listdir(cache_dir)
# Strip extensions so the browser can compare by key
keys = list({f.rsplit('.', 1)[0] for f in files if f.endswith(('.mp3', '.wav'))})
total_size = sum(
os.path.getsize(os.path.join(cache_dir, f))
for f in files if os.path.isfile(os.path.join(cache_dir, f))
)
# Parse index prefix from keys of the form "0042_a3f9c1de"
# Return indexed_keys: { "42": "0042_a3f9c1de" } for O(1) lookup by position
indexed_keys = {}
for key in keys:
m = re.match(r'^(\d+)_', key)
if m:
indexed_keys[int(m.group(1))] = key
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({
'keys': keys,
'indexed_keys': indexed_keys,
'count': len(keys),
'size_mb': round(total_size / 1024 / 1024, 2)
}).encode())
return
# ── List available Azure voices (for Chrome without Microsoft voices) ──
if self.path == '/tts-voices':
voices = [
{'name': k, 'edgeVoice': v, 'lang': k.split(' - ')[-1] if ' - ' in k else 'en-US'}
for k, v in EDGE_VOICE_MAP.items()
if not k.startswith('_')
]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'voices': voices}).encode())
return
# ── TTS status: check whether edge-tts is available ──
if self.path == '/tts-status':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'available': EDGE_TTS_AVAILABLE}).encode())
return
# ── Proxy for VLC HTTP API ──
if self.path.startswith('/vlc/'):
self.proxy_vlc(self.path[5:])
return
# ── VLC status ──
if self.path == '/vlc-check':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
result = {'installed': VLC_EXE is not None, 'path': VLC_EXE or ''}
self.wfile.write(json.dumps(result).encode())
return
# ── Check whether server-side conversion is available ──
if self.path == '/convert-check':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'available': FFMPEG_BIN is not None}).encode())
return
super().do_GET()
def do_POST(self):
# ── Server-side video conversion via system ffmpeg ──
if self.path == '/convert':
if not FFMPEG_BIN:
self.send_response(503)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'ffmpeg not found on server'}).encode())
return
fname = self.headers.get('X-Filename', 'input.mkv')
fname = os.path.basename(fname)
length = int(self.headers.get('Content-Length', 0))
# Write the input file to a temp directory
tmp_dir = tempfile.mkdtemp(prefix='convert_', dir=SCRIPT_DIR)
in_path = os.path.join(tmp_dir, fname)
out_path = os.path.join(tmp_dir, 'output.mp4')
try:
with open(in_path, 'wb') as f:
remaining = length
while remaining > 0:
chunk = self.rfile.read(min(65536, remaining))
if not chunk:
break
f.write(chunk)
remaining -= len(chunk)
# Convert with ffmpeg
cmd = [
FFMPEG_BIN, '-y',
'-i', in_path,
'-c:v', 'libx264',
'-preset', 'fast',
'-crf', '23',
'-c:a', 'aac',
'-b:a', '128k',
'-movflags', '+faststart',
out_path
]
result = subprocess.run(cmd, capture_output=True, timeout=600)
if result.returncode != 0:
err = result.stderr.decode('utf-8', errors='replace')[-500:]
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': err}).encode())
return
out_size = os.path.getsize(out_path)
self.send_response(200)
self.send_header('Content-Type', 'video/mp4')
self.send_header('Content-Length', str(out_size))
self.send_header('Content-Disposition', 'inline; filename="output.mp4"')
self.end_headers()
with open(out_path, 'rb') as f:
while True:
chunk = f.read(65536)
if not chunk:
break
self.wfile.write(chunk)
except subprocess.TimeoutExpired:
self.send_response(504)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode())
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode())
finally:
# Clean up temporary files
try:
import shutil
shutil.rmtree(tmp_dir, ignore_errors=True)
except Exception:
pass
return
# ── Clear audio cache (delete all .mp3/.wav from audio_cache/) ──
if self.path == '/audio-cache-clear':
cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache')
deleted = 0
size_freed = 0
if os.path.exists(cache_dir):
for fname in os.listdir(cache_dir):
if fname.endswith(('.mp3', '.wav')):
fpath = os.path.join(cache_dir, fname)
try:
size_freed += os.path.getsize(fpath)
os.remove(fpath)
deleted += 1
except Exception:
pass
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({
'ok': True,
'deleted': deleted,
'freed_mb': round(size_freed / 1024 / 1024, 2)
}).encode())
return
# ── Save audio to cache (WAV from browser) ──
if self.path == '/audio-cache-save':
fname = self.headers.get('X-Cache-Key', 'unknown.wav')
fname = os.path.basename(fname)
length = int(self.headers.get('Content-Length', 0))
data = self.rfile.read(length)
cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache')
os.makedirs(cache_dir, exist_ok=True)
with open(os.path.join(cache_dir, fname), 'wb') as f:
f.write(data)
# Auto-evict oldest files if cache exceeds the size limit
evict_cache_if_needed(cache_dir)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'ok': True}).encode())
return
# ── TTS synthesis via edge-tts (for online/Azure voices) ──
if self.path == '/tts-synthesize':
length = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(length))
text = body.get('text', '')
voice_name = body.get('voice', '')
rate = float(body.get('rate', 1.3))
sub_index = int(body.get('index', -1))
cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache')
os.makedirs(cache_dir, exist_ok=True)
if not text or not EDGE_TTS_AVAILABLE:
self.send_response(503)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'edge-tts not available'}).encode())
return
# Cache key β€” with index prefix "0042_hash"
key = tts_cache_key(text, voice_name, rate, sub_index)
mp3_path = os.path.join(cache_dir, key + '.mp3')
if not (os.path.exists(mp3_path) and os.path.getsize(mp3_path) > 500):
result = synthesize_to_cache(text, voice_name, rate, cache_dir, sub_index)
if not result:
self.send_response(500)
self.end_headers()
self.wfile.write(b'TTS synthesis failed')
return
# Auto-evict oldest files if cache exceeds the size limit
evict_cache_if_needed(cache_dir)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'key': key, 'file': key + '.mp3'}).encode())
return
# ── Upload a video file for VLC ──
if self.path == '/vlc-upload':
fname = self.headers.get('X-Filename', 'vlc_temp.mkv')
fname = os.path.basename(fname) # security
length = int(self.headers.get('Content-Length', 0))
data = self.rfile.read(length)
tmp_path = os.path.join(SCRIPT_DIR, fname)
with open(tmp_path, 'wb') as f:
f.write(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'tmpPath': tmp_path}).encode())
return
# ── Launch VLC ──
if self.path == '/vlc-open':
length = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(length))
video_path = body.get('path', '')
self.start_vlc(video_path)
return
self.send_response(404)
self.end_headers()
def proxy_vlc(self, vlc_path):
try:
import base64
credentials = base64.b64encode(f':{VLC_PASSWORD}'.encode()).decode()
req = urllib.request.Request(
f'http://localhost:{VLC_HTTP_PORT}/{vlc_path}',
headers={'Authorization': f'Basic {credentials}'}
)
with urllib.request.urlopen(req, timeout=0.5) as resp:
data = resp.read()
# Cache the last successful response
if 'status.xml' in vlc_path:
CORPHandler._last_vlc_status = data
self.send_response(200)
self.send_header('Content-Type', resp.headers.get('Content-Type', 'text/xml'))
self.end_headers()
self.wfile.write(data)
except Exception:
# On timeout β†’ return last cached status instead of an error
if 'status.xml' in vlc_path and hasattr(CORPHandler, '_last_vlc_status'):
self.send_response(200)
self.send_header('Content-Type', 'text/xml')
self.end_headers()
self.wfile.write(CORPHandler._last_vlc_status)
else:
self.send_response(503)
self.end_headers()
self.wfile.write(b'<root><time>-1</time></root>')
def start_vlc(self, video_path):
global vlc_process
if not VLC_EXE:
self.send_response(404)
self.end_headers()
self.wfile.write(b'VLC not found')
return
try:
if vlc_process and vlc_process.poll() is None:
vlc_process.terminate()
vlc_process = subprocess.Popen([
VLC_EXE, video_path,
'--extraintf', 'http',
'--http-host', '127.0.0.1',
'--http-port', str(VLC_HTTP_PORT),
'--http-password', VLC_PASSWORD,
'--no-video-title-show',
])
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'ok': True, 'vlc_port': VLC_HTTP_PORT}).encode())
except Exception as e:
self.send_response(500)
self.end_headers()
self.wfile.write(str(e).encode())
def end_headers(self):
self.send_header('Cross-Origin-Opener-Policy', 'same-origin')
self.send_header('Cross-Origin-Embedder-Policy', 'require-corp')
self.send_header('Cache-Control', 'no-cache')
super().end_headers()
def log_message(self, fmt, *args):
pass
print()
print(" ============================================")
print(" Voice Player - Starting up")
print(" ============================================")
print()
if VLC_EXE:
print(f" [OK] VLC found: {VLC_EXE}")
else:
print(" [!] VLC not found - unsupported formats will be converted with FFmpeg")
print(" [!] Download VLC from: https://www.videolan.org/vlc/")
print()
# ── Initialise edge-tts for Azure Neural TTS cache ──
ensure_edge_tts()
print()
if not download_ffmpeg_files():
print()
print(" [!] Failed to download FFmpeg files.")
print(" [!] Check your internet connection and try again.")
sys.exit(1)
print()
PORT = int(os.environ.get("PORT", 7860))
try:
server = socketserver.ThreadingTCPServer(('', PORT), CORPHandler)
server.daemon_threads = True
print(f" Server running on port: {PORT}")
print(f" Open: http://localhost:{PORT}/VIDEO_new.html")
print(f" Audio cache limit: {AUDIO_CACHE_MAX_MB} MB (set AUDIO_CACHE_MAX_MB env var to change)")
print()
server.serve_forever()
except Exception as e:
print(f" [!] Failed to start server: {e}")
sys.exit(1)