Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| import argparse, os, sys, re | |
| import numpy as np | |
| import soundfile as sf | |
| from tqdm import tqdm | |
| from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent | |
| from utils.audio import crossfade_concat, normalize_lufs, save_wav | |
| from backends.espeech_backend import EspeechBackend | |
| def read_input(path: str) -> str: | |
| if path.lower().endswith(".txt"): | |
| with open(path, "r", encoding="utf-8") as f: | |
| return f.read() | |
| elif path.lower().endswith(".epub"): | |
| try: | |
| from ebooklib import epub | |
| from bs4 import BeautifulSoup | |
| except Exception: | |
| print("Для EPUB установите: pip install ebooklib beautifulsoup4 lxml", file=sys.stderr) | |
| sys.exit(2) | |
| book = epub.read_epub(path) | |
| texts = [] | |
| for item in book.get_items(): | |
| if item.get_type() == 9: # DOCUMENT | |
| soup = BeautifulSoup(item.get_body_content(), "lxml") | |
| texts.append(soup.get_text(" ", strip=True)) | |
| return "\n\n".join(texts) | |
| else: | |
| raise ValueError("Поддерживаются .txt и .epub") | |
| def load_default_ref_text(): | |
| """Load default reference text from local sample""" | |
| try: | |
| with open("samples/001/sample.text", "r", encoding="utf-8") as f: | |
| return f.read().strip() | |
| except FileNotFoundError: | |
| return "" | |
| def main(): | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--input", required=True, help="Путь к TXT/EPUB") | |
| ap.add_argument("--outdir", required=True, help="Каталог для результата") | |
| ap.add_argument("--ref-audio", required=False, default="samples/001/sample.mp3", help="Путь к референс-аудио (6–12с)") | |
| ap.add_argument("--ref-text", required=False, default=load_default_ref_text(), help="Референс-текст") | |
| ap.add_argument("--model-repo", default=os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2")) | |
| ap.add_argument("--speed", type=float, default=1.0) | |
| ap.add_argument("--nfe-steps", type=int, default=48) | |
| ap.add_argument("--crossfade-ms", type=int, default=150) | |
| ap.add_argument("--target-lufs", type=float, default=-20.0) | |
| args = ap.parse_args() | |
| os.makedirs(args.outdir, exist_ok=True) | |
| backend = EspeechBackend(model_id=args.model_repo) | |
| raw = read_input(args.input) | |
| text = normalize_text(raw) | |
| paragraphs = split_into_paragraphs(text) | |
| paragraphs = [maybe_ruaccent(p) for p in paragraphs] | |
| print(f"Абзацев: {len(paragraphs)}") | |
| pieces = [] | |
| sr = None | |
| for i, para in enumerate(tqdm(paragraphs, desc="Генерация")): | |
| audio, sr = backend.synthesize( | |
| text=para, | |
| ref_audio_path=args.ref_audio, | |
| ref_text=args.ref_text, | |
| speed=args.speed, | |
| nfe_steps=args.nfe_steps, | |
| seed=None, | |
| ) | |
| pieces.append(audio) | |
| # Сохраняем черновики по абзацам (опционально) | |
| # sf.write(os.path.join(args.outdir, f"para_{i:05d}.wav"), audio, sr) | |
| final = crossfade_concat(pieces, crossfade_ms=args.crossfade_ms, sample_rate=sr) | |
| final = normalize_lufs(final, sr, target_lufs=args.target_lufs) | |
| out_path = os.path.join(args.outdir, "book.wav") | |
| save_wav(out_path, final, sr) | |
| print(f"ГОТОВО: {out_path}") | |
| if __name__ == "__main__": | |
| main() | |