import http.server import socketserver import os import sys import threading import urllib.request import urllib.parse import json import subprocess import asyncio import hashlib import re import tempfile SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) os.chdir(SCRIPT_DIR) # ── Check for system ffmpeg ── def find_ffmpeg(): """Search for ffmpeg in the system PATH and common install locations.""" try: result = subprocess.run( ['ffmpeg', '-version'], capture_output=True, text=True, timeout=5 ) if result.returncode == 0: print(" [OK] System ffmpeg found") return 'ffmpeg' except (FileNotFoundError, subprocess.TimeoutExpired): pass # Fallback paths for path in ['/usr/bin/ffmpeg', '/usr/local/bin/ffmpeg']: if os.path.exists(path): print(f" [OK] ffmpeg found: {path}") return path print(" [!] ffmpeg not found — server-side conversion will not work") return None FFMPEG_BIN = find_ffmpeg() FFMPEG_DIR = os.path.join(SCRIPT_DIR, 'ffmpeg-wasm') FFMPEG_FILES = { 'ffmpeg.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/ffmpeg@0.12.10/dist/umd/ffmpeg.js', 'ffmpeg-util.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/util@0.12.1/dist/umd/index.js', 'ffmpeg-core.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.js', 'ffmpeg-core.wasm': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.wasm', 'ffmpeg-core.worker.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/core-mt@0.12.6/dist/esm/ffmpeg-core.worker.js', '814.ffmpeg.js': 'https://cdn.jsdelivr.net/npm/@ffmpeg/ffmpeg@0.12.10/dist/umd/814.ffmpeg.js', } # ── Edge-TTS: Azure Neural voices without an API key ── EDGE_TTS_AVAILABLE = False def ensure_edge_tts(): """Install edge-tts if not already available.""" global EDGE_TTS_AVAILABLE try: import edge_tts EDGE_TTS_AVAILABLE = True print(" [OK] edge-tts is available") return True except ImportError: print(" [!] edge-tts not found — installing...") try: subprocess.run( [sys.executable, '-m', 'pip', 'install', 'edge-tts', '--quiet'], check=True, capture_output=True ) import edge_tts EDGE_TTS_AVAILABLE = True print(" [OK] edge-tts installed successfully") return True except Exception as e: print(f" [!] Failed to install edge-tts: {e}") print(" [!] Run: pip install edge-tts") EDGE_TTS_AVAILABLE = False return False # Voice map: Edge voice display name → edge-tts voice string EDGE_VOICE_MAP = { # Multilingual Azure voices 'Microsoft Ava Multilingual Online (Natural) - en-US': 'en-US-AvaMultilingualNeural', 'Microsoft Andrew Multilingual Online (Natural) - en-US': 'en-US-AndrewMultilingualNeural', 'Microsoft Emma Multilingual Online (Natural) - en-US': 'en-US-EmmaMultilingualNeural', 'Microsoft Brian Multilingual Online (Natural) - en-US': 'en-US-BrianMultilingualNeural', 'Microsoft Aria Multilingual Online (Natural) - en-US': 'en-US-AriaNeural', 'Microsoft Guy Multilingual Online (Natural) - en-US': 'en-US-GuyNeural', 'Microsoft Jenny Multilingual Online (Natural) - en-US': 'en-US-JennyNeural', 'Microsoft Davis Multilingual Online (Natural) - en-US': 'en-US-DavisNeural', 'Microsoft Jane Multilingual Online (Natural) - en-US': 'en-US-JaneNeural', 'Microsoft Jason Multilingual Online (Natural) - en-US': 'en-US-JasonNeural', 'Microsoft Sara Multilingual Online (Natural) - en-US': 'en-US-SaraNeural', 'Microsoft Tony Multilingual Online (Natural) - en-US': 'en-US-TonyNeural', 'Microsoft Nancy Multilingual Online (Natural) - en-US': 'en-US-NancyNeural', 'Microsoft Ryan Multilingual Online (Natural) - en-GB': 'en-GB-RyanNeural', 'Microsoft Sonia Multilingual Online (Natural) - en-GB': 'en-GB-SoniaNeural', 'Microsoft Libby Multilingual Online (Natural) - en-GB': 'en-GB-LibbyNeural', 'Microsoft Thomas Multilingual Online (Natural) - fr-FR': 'fr-FR-HenriNeural', 'Microsoft Henri Multilingual Online (Natural) - fr-FR': 'fr-FR-HenriNeural', 'Microsoft Denise Multilingual Online (Natural) - fr-FR': 'fr-FR-DeniseNeural', 'Microsoft Katja Multilingual Online (Natural) - de-DE': 'de-DE-KatjaNeural', 'Microsoft Seraphina Multilingual Online (Natural) - de-DE': 'de-DE-SeraphinaMultilingualNeural', 'Microsoft Serafina Online (Natural) - bg-BG': 'bg-BG-KalinaNeural', # Fallback for unknown Multilingual voices '_multilingual_default': 'en-US-AndrewMultilingualNeural', # Bulgarian 'Microsoft Kalina Online (Natural) - bg-BG': 'bg-BG-KalinaNeural', 'Microsoft Boris Online (Natural) - bg-BG': 'bg-BG-BorislavNeural', } def resolve_edge_voice(voice_name): """Return the edge-tts voice string for the given Web Speech API voice name.""" if voice_name in EDGE_VOICE_MAP: return EDGE_VOICE_MAP[voice_name] # Try partial match for key, val in EDGE_VOICE_MAP.items(): if key.lower() in voice_name.lower() or voice_name.lower() in key.lower(): return val # If Multilingual → use default multilingual voice if 'multilingual' in voice_name.lower(): return EDGE_VOICE_MAP['_multilingual_default'] return 'bg-BG-KalinaNeural' # absolute fallback def tts_cache_key(text, voice_name, rate, index=-1): """SHA-1 hash identical to js hashText(). If index >= 0, formats as '0042_hash'.""" raw = text + voice_name + f'{float(rate):.1f}' hash_part = hashlib.sha1(raw.encode('utf-8')).hexdigest()[:16] if index >= 0: return f'{index:04d}_{hash_part}' return hash_part async def _synthesize_edge_tts(text, voice, rate_ratio, out_path): """Synthesize speech with edge-tts and save as MP3 → out_path.""" import edge_tts # edge-tts expects rate as "+20%" / "-10%" relative to normal (1.3 → +30%) pct = int(round((float(rate_ratio) - 1.0) * 100)) rate_str = f'+{pct}%' if pct >= 0 else f'{pct}%' communicate = edge_tts.Communicate(text, voice, rate=rate_str) await communicate.save(out_path) def synthesize_to_cache(text, voice_name, rate, cache_dir, index=-1): """Synthesize text → MP3 file in cache_dir. Returns the path or None on error.""" if not EDGE_TTS_AVAILABLE: return None key = tts_cache_key(text, voice_name, rate, index) out_path = os.path.join(cache_dir, key + '.mp3') if os.path.exists(out_path) and os.path.getsize(out_path) > 500: return out_path # already cached edge_voice = resolve_edge_voice(voice_name) try: # asyncio.run() is unreliable inside ThreadingTCPServer threads — # create a fresh event loop per thread instead loop = asyncio.new_event_loop() try: loop.run_until_complete(_synthesize_edge_tts(text, edge_voice, rate, out_path)) finally: loop.close() if os.path.exists(out_path) and os.path.getsize(out_path) > 500: return out_path except Exception as e: print(f' [TTS] Synthesis error: {e}') return None # ── Audio cache auto-eviction ── # Maximum allowed cache size in MB. Override with env var AUDIO_CACHE_MAX_MB. AUDIO_CACHE_MAX_MB = int(os.environ.get('AUDIO_CACHE_MAX_MB', 500)) def evict_cache_if_needed(cache_dir, max_mb=None): """ Delete the oldest audio cache files (by modification time) until the total cache size is below max_mb. Called automatically after every write to audio_cache/. Parameters ---------- cache_dir : str – path to the audio_cache directory max_mb : int – size limit in MB (defaults to AUDIO_CACHE_MAX_MB) """ if max_mb is None: max_mb = AUDIO_CACHE_MAX_MB if not os.path.isdir(cache_dir): return # Collect all cache files with their sizes and modification times entries = [] total_bytes = 0 for fname in os.listdir(cache_dir): if not fname.endswith(('.mp3', '.wav')): continue fpath = os.path.join(cache_dir, fname) try: size = os.path.getsize(fpath) mtime = os.path.getmtime(fpath) entries.append((mtime, size, fpath)) total_bytes += size except OSError: pass # file may have been deleted by a concurrent request limit_bytes = max_mb * 1024 * 1024 if total_bytes <= limit_bytes: return # still within budget — nothing to do # Sort oldest-first so we remove least-recently-created files first entries.sort(key=lambda e: e[0]) freed = 0 deleted = 0 for mtime, size, fpath in entries: if total_bytes - freed <= limit_bytes: break try: os.remove(fpath) freed += size deleted += 1 except OSError: pass # already gone if deleted: print(f' [Cache] Auto-evicted {deleted} file(s), ' f'freed {freed / 1024 / 1024:.1f} MB ' f'(limit: {max_mb} MB)') # ── VLC Detection ── VLC_PATHS = [ r'C:\Program Files\VideoLAN\VLC\vlc.exe', r'C:\Program Files (x86)\VideoLAN\VLC\vlc.exe', '/usr/bin/vlc', '/usr/local/bin/vlc', '/Applications/VLC.app/Contents/MacOS/VLC', ] VLC_HTTP_PORT = 9090 VLC_PASSWORD = 'vlcpass' vlc_process = None def find_vlc(): for p in VLC_PATHS: if os.path.exists(p): return p try: result = subprocess.run( ['where' if sys.platform == 'win32' else 'which', 'vlc'], capture_output=True, text=True ) if result.returncode == 0: return result.stdout.strip().splitlines()[0] except: pass return None VLC_EXE = find_vlc() def download_ffmpeg_files(): os.makedirs(FFMPEG_DIR, exist_ok=True) all_ok = all( os.path.exists(os.path.join(FFMPEG_DIR, f)) and os.path.getsize(os.path.join(FFMPEG_DIR, f)) > 1024 for f in FFMPEG_FILES ) if all_ok: return True print(" Downloading FFmpeg files (one-time download, ~25 MB)...") for fname, url in FFMPEG_FILES.items(): dest = os.path.join(FFMPEG_DIR, fname) if os.path.exists(dest) and os.path.getsize(dest) > 1024: print(f" [OK] {fname} (already downloaded)") continue print(f" Downloading: {fname} ...", end='', flush=True) try: urllib.request.urlretrieve(url, dest) size = os.path.getsize(dest) print(f" {size//1024}KB OK") except Exception as e: print(f" ERROR: {e}") return False return True class CORPHandler(http.server.SimpleHTTPRequestHandler): def do_GET(self): # ── Serve an audio cache file (WAV or MP3) ── if self.path.startswith('/audio-cache/'): fname = os.path.basename(self.path) cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache') fpath = os.path.join(cache_dir, fname) if os.path.exists(fpath): ext = fname.rsplit('.', 1)[-1].lower() ctype = 'audio/mpeg' if ext == 'mp3' else 'audio/wav' self.send_response(200) self.send_header('Content-Type', ctype) self.end_headers() with open(fpath, 'rb') as f: self.wfile.write(f.read()) else: self.send_response(404) self.end_headers() return # ── List cache contents — returns keys (without extension) ── if self.path == '/audio-cache-list': cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache') os.makedirs(cache_dir, exist_ok=True) files = os.listdir(cache_dir) # Strip extensions so the browser can compare by key keys = list({f.rsplit('.', 1)[0] for f in files if f.endswith(('.mp3', '.wav'))}) total_size = sum( os.path.getsize(os.path.join(cache_dir, f)) for f in files if os.path.isfile(os.path.join(cache_dir, f)) ) # Parse index prefix from keys of the form "0042_a3f9c1de" # Return indexed_keys: { "42": "0042_a3f9c1de" } for O(1) lookup by position indexed_keys = {} for key in keys: m = re.match(r'^(\d+)_', key) if m: indexed_keys[int(m.group(1))] = key self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({ 'keys': keys, 'indexed_keys': indexed_keys, 'count': len(keys), 'size_mb': round(total_size / 1024 / 1024, 2) }).encode()) return # ── List available Azure voices (for Chrome without Microsoft voices) ── if self.path == '/tts-voices': voices = [ {'name': k, 'edgeVoice': v, 'lang': k.split(' - ')[-1] if ' - ' in k else 'en-US'} for k, v in EDGE_VOICE_MAP.items() if not k.startswith('_') ] self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'voices': voices}).encode()) return # ── TTS status: check whether edge-tts is available ── if self.path == '/tts-status': self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'available': EDGE_TTS_AVAILABLE}).encode()) return # ── Proxy for VLC HTTP API ── if self.path.startswith('/vlc/'): self.proxy_vlc(self.path[5:]) return # ── VLC status ── if self.path == '/vlc-check': self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() result = {'installed': VLC_EXE is not None, 'path': VLC_EXE or ''} self.wfile.write(json.dumps(result).encode()) return # ── Check whether server-side conversion is available ── if self.path == '/convert-check': self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'available': FFMPEG_BIN is not None}).encode()) return super().do_GET() def do_POST(self): # ── Server-side video conversion via system ffmpeg ── if self.path == '/convert': if not FFMPEG_BIN: self.send_response(503) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'error': 'ffmpeg not found on server'}).encode()) return fname = self.headers.get('X-Filename', 'input.mkv') fname = os.path.basename(fname) length = int(self.headers.get('Content-Length', 0)) # Write the input file to a temp directory tmp_dir = tempfile.mkdtemp(prefix='convert_', dir=SCRIPT_DIR) in_path = os.path.join(tmp_dir, fname) out_path = os.path.join(tmp_dir, 'output.mp4') try: with open(in_path, 'wb') as f: remaining = length while remaining > 0: chunk = self.rfile.read(min(65536, remaining)) if not chunk: break f.write(chunk) remaining -= len(chunk) # Convert with ffmpeg cmd = [ FFMPEG_BIN, '-y', '-i', in_path, '-c:v', 'libx264', '-preset', 'fast', '-crf', '23', '-c:a', 'aac', '-b:a', '128k', '-movflags', '+faststart', out_path ] result = subprocess.run(cmd, capture_output=True, timeout=600) if result.returncode != 0: err = result.stderr.decode('utf-8', errors='replace')[-500:] self.send_response(500) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'error': err}).encode()) return out_size = os.path.getsize(out_path) self.send_response(200) self.send_header('Content-Type', 'video/mp4') self.send_header('Content-Length', str(out_size)) self.send_header('Content-Disposition', 'inline; filename="output.mp4"') self.end_headers() with open(out_path, 'rb') as f: while True: chunk = f.read(65536) if not chunk: break self.wfile.write(chunk) except subprocess.TimeoutExpired: self.send_response(504) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'error': 'timeout'}).encode()) except Exception as e: self.send_response(500) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'error': str(e)}).encode()) finally: # Clean up temporary files try: import shutil shutil.rmtree(tmp_dir, ignore_errors=True) except Exception: pass return # ── Clear audio cache (delete all .mp3/.wav from audio_cache/) ── if self.path == '/audio-cache-clear': cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache') deleted = 0 size_freed = 0 if os.path.exists(cache_dir): for fname in os.listdir(cache_dir): if fname.endswith(('.mp3', '.wav')): fpath = os.path.join(cache_dir, fname) try: size_freed += os.path.getsize(fpath) os.remove(fpath) deleted += 1 except Exception: pass self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({ 'ok': True, 'deleted': deleted, 'freed_mb': round(size_freed / 1024 / 1024, 2) }).encode()) return # ── Save audio to cache (WAV from browser) ── if self.path == '/audio-cache-save': fname = self.headers.get('X-Cache-Key', 'unknown.wav') fname = os.path.basename(fname) length = int(self.headers.get('Content-Length', 0)) data = self.rfile.read(length) cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache') os.makedirs(cache_dir, exist_ok=True) with open(os.path.join(cache_dir, fname), 'wb') as f: f.write(data) # Auto-evict oldest files if cache exceeds the size limit evict_cache_if_needed(cache_dir) self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'ok': True}).encode()) return # ── TTS synthesis via edge-tts (for online/Azure voices) ── if self.path == '/tts-synthesize': length = int(self.headers.get('Content-Length', 0)) body = json.loads(self.rfile.read(length)) text = body.get('text', '') voice_name = body.get('voice', '') rate = float(body.get('rate', 1.3)) sub_index = int(body.get('index', -1)) cache_dir = os.path.join(SCRIPT_DIR, 'audio_cache') os.makedirs(cache_dir, exist_ok=True) if not text or not EDGE_TTS_AVAILABLE: self.send_response(503) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'error': 'edge-tts not available'}).encode()) return # Cache key — with index prefix "0042_hash" key = tts_cache_key(text, voice_name, rate, sub_index) mp3_path = os.path.join(cache_dir, key + '.mp3') if not (os.path.exists(mp3_path) and os.path.getsize(mp3_path) > 500): result = synthesize_to_cache(text, voice_name, rate, cache_dir, sub_index) if not result: self.send_response(500) self.end_headers() self.wfile.write(b'TTS synthesis failed') return # Auto-evict oldest files if cache exceeds the size limit evict_cache_if_needed(cache_dir) self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'key': key, 'file': key + '.mp3'}).encode()) return # ── Upload a video file for VLC ── if self.path == '/vlc-upload': fname = self.headers.get('X-Filename', 'vlc_temp.mkv') fname = os.path.basename(fname) # security length = int(self.headers.get('Content-Length', 0)) data = self.rfile.read(length) tmp_path = os.path.join(SCRIPT_DIR, fname) with open(tmp_path, 'wb') as f: f.write(data) self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'tmpPath': tmp_path}).encode()) return # ── Launch VLC ── if self.path == '/vlc-open': length = int(self.headers.get('Content-Length', 0)) body = json.loads(self.rfile.read(length)) video_path = body.get('path', '') self.start_vlc(video_path) return self.send_response(404) self.end_headers() def proxy_vlc(self, vlc_path): try: import base64 credentials = base64.b64encode(f':{VLC_PASSWORD}'.encode()).decode() req = urllib.request.Request( f'http://localhost:{VLC_HTTP_PORT}/{vlc_path}', headers={'Authorization': f'Basic {credentials}'} ) with urllib.request.urlopen(req, timeout=0.5) as resp: data = resp.read() # Cache the last successful response if 'status.xml' in vlc_path: CORPHandler._last_vlc_status = data self.send_response(200) self.send_header('Content-Type', resp.headers.get('Content-Type', 'text/xml')) self.end_headers() self.wfile.write(data) except Exception: # On timeout → return last cached status instead of an error if 'status.xml' in vlc_path and hasattr(CORPHandler, '_last_vlc_status'): self.send_response(200) self.send_header('Content-Type', 'text/xml') self.end_headers() self.wfile.write(CORPHandler._last_vlc_status) else: self.send_response(503) self.end_headers() self.wfile.write(b'') def start_vlc(self, video_path): global vlc_process if not VLC_EXE: self.send_response(404) self.end_headers() self.wfile.write(b'VLC not found') return try: if vlc_process and vlc_process.poll() is None: vlc_process.terminate() vlc_process = subprocess.Popen([ VLC_EXE, video_path, '--extraintf', 'http', '--http-host', '127.0.0.1', '--http-port', str(VLC_HTTP_PORT), '--http-password', VLC_PASSWORD, '--no-video-title-show', ]) self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps({'ok': True, 'vlc_port': VLC_HTTP_PORT}).encode()) except Exception as e: self.send_response(500) self.end_headers() self.wfile.write(str(e).encode()) def end_headers(self): self.send_header('Cross-Origin-Opener-Policy', 'same-origin') self.send_header('Cross-Origin-Embedder-Policy', 'require-corp') self.send_header('Cache-Control', 'no-cache') super().end_headers() def log_message(self, fmt, *args): pass print() print(" ============================================") print(" Voice Player - Starting up") print(" ============================================") print() if VLC_EXE: print(f" [OK] VLC found: {VLC_EXE}") else: print(" [!] VLC not found - unsupported formats will be converted with FFmpeg") print(" [!] Download VLC from: https://www.videolan.org/vlc/") print() # ── Initialise edge-tts for Azure Neural TTS cache ── ensure_edge_tts() print() if not download_ffmpeg_files(): print() print(" [!] Failed to download FFmpeg files.") print(" [!] Check your internet connection and try again.") sys.exit(1) print() PORT = int(os.environ.get("PORT", 7860)) try: server = socketserver.ThreadingTCPServer(('', PORT), CORPHandler) server.daemon_threads = True print(f" Server running on port: {PORT}") print(f" Open: http://localhost:{PORT}/VIDEO_new.html") print(f" Audio cache limit: {AUDIO_CACHE_MAX_MB} MB (set AUDIO_CACHE_MAX_MB env var to change)") print() server.serve_forever() except Exception as e: print(f" [!] Failed to start server: {e}") sys.exit(1)