#!/usr/bin/env python3 import argparse, os, sys, re import numpy as np import soundfile as sf from tqdm import tqdm from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent from utils.audio import crossfade_concat, normalize_lufs, save_wav from backends.espeech_backend import EspeechBackend def read_input(path: str) -> str: if path.lower().endswith(".txt"): with open(path, "r", encoding="utf-8") as f: return f.read() elif path.lower().endswith(".epub"): try: from ebooklib import epub from bs4 import BeautifulSoup except Exception: print("Для EPUB установите: pip install ebooklib beautifulsoup4 lxml", file=sys.stderr) sys.exit(2) book = epub.read_epub(path) texts = [] for item in book.get_items(): if item.get_type() == 9: # DOCUMENT soup = BeautifulSoup(item.get_body_content(), "lxml") texts.append(soup.get_text(" ", strip=True)) return "\n\n".join(texts) else: raise ValueError("Поддерживаются .txt и .epub") def load_default_ref_text(): """Load default reference text from local sample""" try: with open("samples/001/sample.text", "r", encoding="utf-8") as f: return f.read().strip() except FileNotFoundError: return "" def main(): ap = argparse.ArgumentParser() ap.add_argument("--input", required=True, help="Путь к TXT/EPUB") ap.add_argument("--outdir", required=True, help="Каталог для результата") ap.add_argument("--ref-audio", required=False, default="samples/001/sample.mp3", help="Путь к референс-аудио (6–12с)") ap.add_argument("--ref-text", required=False, default=load_default_ref_text(), help="Референс-текст") ap.add_argument("--model-repo", default=os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2")) ap.add_argument("--speed", type=float, default=1.0) ap.add_argument("--nfe-steps", type=int, default=48) ap.add_argument("--crossfade-ms", type=int, default=150) ap.add_argument("--target-lufs", type=float, default=-20.0) args = ap.parse_args() os.makedirs(args.outdir, exist_ok=True) backend = EspeechBackend(model_id=args.model_repo) raw = read_input(args.input) text = normalize_text(raw) paragraphs = split_into_paragraphs(text) paragraphs = [maybe_ruaccent(p) for p in paragraphs] print(f"Абзацев: {len(paragraphs)}") pieces = [] sr = None for i, para in enumerate(tqdm(paragraphs, desc="Генерация")): audio, sr = backend.synthesize( text=para, ref_audio_path=args.ref_audio, ref_text=args.ref_text, speed=args.speed, nfe_steps=args.nfe_steps, seed=None, ) pieces.append(audio) # Сохраняем черновики по абзацам (опционально) # sf.write(os.path.join(args.outdir, f"para_{i:05d}.wav"), audio, sr) final = crossfade_concat(pieces, crossfade_ms=args.crossfade_ms, sample_rate=sr) final = normalize_lufs(final, sr, target_lufs=args.target_lufs) out_path = os.path.join(args.outdir, "book.wav") save_wav(out_path, final, sr) print(f"ГОТОВО: {out_path}") if __name__ == "__main__": main()