Spaces:

hetchyy
/

quranic-universal-aligner

Running on Zero

App Files Files Community

quranic-universal-aligner / app.py

hetchyy

Upload folder using huggingface_hub

4a0777e verified about 17 hours ago

raw

history blame contribute delete

7.8 kB

	"""Quran Aligner — Automatic Quran recitation segmentation and alignment.
	Copyright 2026 Wider Community. Licensed under Apache 2.0.
	See LICENSE in the repository root."""
	import os
	import sys
	from pathlib import Path

	# Suppress HF model download progress bars (hundreds of lines on cold start)
	os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")

	# Load .env file for local dev (HF_TOKEN for private model access)
	_env_path = Path(__file__).parent / ".env"
	if _env_path.exists():
	for _line in _env_path.read_text().splitlines():
	_line = _line.strip()
	if _line and not _line.startswith("#") and "=" in _line:
	_key, _val = _line.split("=", 1)
	os.environ.setdefault(_key.strip(), _val.strip())

	# Add paths for imports BEFORE importing anything else
	_app_path = Path(__file__).parent.resolve()
	sys.path.insert(0, str(_app_path))

	# Build Cython extensions in-place (falls back to pure Python if it fails)
	import subprocess
	subprocess.run(
	[sys.executable, str(_app_path / "setup.py"), "build_ext", "--inplace"],
	cwd=str(_app_path),
	capture_output=True,
	)

	# Log Cython DP status
	try:
	from src.alignment.phoneme_matcher import _USE_CYTHON_DP
	print(f"Cython DP: {'enabled' if _USE_CYTHON_DP else 'disabled (pure Python fallback)'}")
	except ImportError:
	print("Cython DP: disabled (import error)")

	# Start YouTube PO token server (needed for yt-dlp on datacenter IPs)
	_pot_server_dir = _app_path / ".pot-server"
	_pot_main = _pot_server_dir / "server" / "build" / "main.js"
	if not _pot_main.exists():
	print("Setting up PO token server...")
	subprocess.run(["git", "clone", "--depth=1", "--single-branch",
	"https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git",
	str(_pot_server_dir)], capture_output=True)
	subprocess.run(["npm", "ci"], cwd=str(_pot_server_dir / "server"), capture_output=True)
	subprocess.run(["npx", "tsc"], cwd=str(_pot_server_dir / "server"), capture_output=True)
	if _pot_main.exists():
	subprocess.Popen(["node", str(_pot_main)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	print("PO token server started on port 4416")
	else:
	print("PO token server setup failed (YouTube downloads may not work)")

	from src.ui.interface import build_interface

	# =============================================================================
	# Module-level demo for Gradio hot-reload (`gradio app.py`)
	# =============================================================================
	demo = build_interface()

	# Concurrency: main Space serves many parallel requests (GPU requests funnel into
	# ZeroGPU's own queue; CPU-dispatches are cheap I/O to worker Spaces). Worker Spaces
	# run single-threaded — 2 vCPUs would thrash under concurrent ML inference.
	from src.core.zero_gpu import IS_CPU_WORKER
	demo.queue(default_concurrency_limit=1 if IS_CPU_WORKER else 20)

	# =============================================================================
	# Main
	# =============================================================================

	if __name__ == "__main__":
	import multiprocessing
	multiprocessing.freeze_support()

	# =============================================================================
	# Persistent CPU worker pool — spawn BEFORE any GPU use if enabled.
	# Must be inside __main__ guard: spawn re-imports app.py as __mp_main__ during
	# worker bootstrap; without the guard, module-level Process.start() would be
	# called inside a spawned child, triggering the "bootstrapping phase" error.
	# =============================================================================
	try:
	from config import (
	CPU_STRATEGY as _CPU_STRATEGY,
	CPU_WORKER_MODE as _CPU_WORKER_MODE,
	CPU_SUBPROCESS_CONCURRENCY as _CPU_SUBPROCESS_CONCURRENCY,
	CPU_POOL_PRELOAD_LARGE as _CPU_POOL_PRELOAD_LARGE,
	)
	from src.core.zero_gpu import IS_CPU_WORKER as _IS_CPU_WORKER
	if (
	_CPU_STRATEGY == "subprocess"
	and _CPU_WORKER_MODE == "persistent"
	and not _IS_CPU_WORKER
	):
	print(f"[APP] Bootstrapping persistent CPU pool: {_CPU_SUBPROCESS_CONCURRENCY} worker(s), preload_large={_CPU_POOL_PRELOAD_LARGE}")
	from src.core.cpu_worker_pool import start_pool as _start_pool
	_start_pool(_CPU_SUBPROCESS_CONCURRENCY, preload_large=_CPU_POOL_PRELOAD_LARGE)
	except Exception as _e:
	print(f"[APP] Persistent CPU pool bootstrap failed (non-fatal): {_e}")

	import argparse
	import numpy as np
	import librosa
	from config import PORT, IS_HF_SPACE, RESAMPLE_TYPE
	from src.core.zero_gpu import ZERO_GPU_AVAILABLE
	from src.segmenter.segmenter_model import load_segmenter
	from src.segmenter.segmenter_aoti import apply_aoti_compiled
	from src.pipeline import test_aoti_compilation_gpu
	from src.alignment.phoneme_asr import load_phoneme_asr
	from src.alignment.ngram_index import get_ngram_index
	from src.alignment.phoneme_matcher_cache import preload_all_chapters

	parser = argparse.ArgumentParser()
	parser.add_argument("--share", action="store_true", help="Create public link")
	parser.add_argument("--port", type=int, default=PORT, help="Port to run on")
	parser.add_argument("--dev", action="store_true", help="Dev mode: skip model preloading for fast startup")
	args = parser.parse_args()

	port = 7860

	print(f"ZeroGPU available: {ZERO_GPU_AVAILABLE}")
	print(f"Launching Gradio on port {port}")

	if args.dev:
	print("Dev mode: skipping model preloading (models load on first request)")
	else:
	# Preload models and caches at startup so first request is fast
	print("Preloading models...")
	load_segmenter()
	load_phoneme_asr("Base")
	load_phoneme_asr("Large")
	print("Models preloaded.")
	print("Preloading caches...")
	get_ngram_index()
	preload_all_chapters()
	print("Caches preloaded.")

	# Warm up soxr resampler so first request doesn't pay initialization cost
	_dummy = librosa.resample(np.zeros(1600, dtype=np.float32),
	orig_sr=44100, target_sr=16000, res_type=RESAMPLE_TYPE)
	del _dummy
	print("Resampler warmed up.")

	# Telemetry sampler — daemon thread samples host + CPU pool every N seconds
	# and flushes to the telemetry dataset. Skip on CPU workers (they have no
	# pool of their own and don't host the main Space's schedulers).
	from src.core.zero_gpu import IS_CPU_WORKER as _IS_CPU_WORKER_TEL
	if not _IS_CPU_WORKER_TEL:
	try:
	from src.core.telemetry_sampler import start_sampler
	start_sampler()
	except Exception as _e:
	print(f"[APP] Telemetry sampler start failed (non-fatal): {_e}")

	# AoT compilation for VAD model (requires GPU lease — skip on CPU workers)
	from src.core.zero_gpu import IS_CPU_WORKER
	if IS_HF_SPACE and ZERO_GPU_AVAILABLE and not IS_CPU_WORKER:
	print("Running AoT compilation for VAD model...")
	try:
	aoti_result = test_aoti_compilation_gpu()
	print(f"AoT compile result: {aoti_result}")
	# Apply compiled model OUTSIDE GPU lease (critical for persistence)
	if aoti_result.get("compiled"):
	apply_aoti_compiled(aoti_result["compiled"])
	except Exception as e:
	print(f"AoT compilation failed (non-fatal): {e}")

	demo.launch(
	server_name="0.0.0.0",
	server_port=port,
	share=args.share,
	allowed_paths=["/tmp"],
	ssr_mode=False, # Gradio 6.5.1 SSR probes localhost; fails on some HF container restarts
	)