File size: 4,621 Bytes
aa8e154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
modules/tts.py β€” Text-to-Speech (Low-Latency)

Strategy:
  1. pyttsx3  β€” fully offline, zero network latency, engine warmed-up once.
  2. gTTS     β€” online fallback if pyttsx3 unavailable.
  3. os/wmplayer β€” last resort Windows fallback.
"""

import threading
import tempfile
import os

# ── pyttsx3 singleton ──────────────────────────────────────────────────────────
_pyttsx3_engine = None
_pyttsx3_lock = threading.Lock()
_pyttsx3_available = False

def _init_pyttsx3():
    """Initialize pyttsx3 engine once at module load (background thread)."""
    global _pyttsx3_engine, _pyttsx3_available
    try:
        import pyttsx3
        engine = pyttsx3.init()
        # Tune: rate 160 wpm feels natural for an interviewer
        engine.setProperty('rate', 160)
        engine.setProperty('volume', 1.0)

        # Prefer a clear English voice if available
        voices = engine.getProperty('voices')
        for v in voices:
            if 'english' in v.name.lower() or 'zira' in v.name.lower() or 'david' in v.name.lower():
                engine.setProperty('voice', v.id)
                break

        _pyttsx3_engine = engine
        _pyttsx3_available = True
        print('[TTS] pyttsx3 engine ready (offline, low-latency).')
    except Exception as e:
        print(f'[TTS] pyttsx3 unavailable ({e}), will fall back to gTTS.')

# Warm-up in background at import time so first speak() has no init overhead
threading.Thread(target=_init_pyttsx3, daemon=True).start()

# ── pygame mixer singleton (for gTTS fallback) ─────────────────────────────────
_pygame_ready = False

def _ensure_pygame():
    global _pygame_ready
    if _pygame_ready:
        return True
    try:
        import pygame
        pygame.mixer.init()
        _pygame_ready = True
        return True
    except Exception:
        return False


# ── Public API ─────────────────────────────────────────────────────────────────

def speak(text: str) -> str:
    """
    Convert text to speech and play it synchronously.
    Returns the path to audio file (if gTTS path was used) or '' for pyttsx3.

    Latency profile:
      pyttsx3 : ~50-150 ms to first audio sample (fully offline).
      gTTS    : ~800-2000 ms (network + disk save) β€” only used as fallback.
    """
    if not text or not text.strip():
        return ''

    # ── Path 1: pyttsx3 (fast offline) ────────────────────────────────────────
    with _pyttsx3_lock:
        if _pyttsx3_available and _pyttsx3_engine is not None:
            try:
                _pyttsx3_engine.say(text)
                _pyttsx3_engine.runAndWait()
                return ''
            except Exception as e:
                print(f'[TTS] pyttsx3 speak failed ({e}), trying gTTS...')

    # ── Path 2: gTTS + pygame ─────────────────────────────────────────────────
    tmp_path = ''
    try:
        from gtts import gTTS
        tts = gTTS(text=text, lang='en', slow=False)
        tmp_path = tempfile.mktemp(suffix='.mp3')
        tts.save(tmp_path)

        if _ensure_pygame():
            import pygame
            pygame.mixer.music.load(tmp_path)
            pygame.mixer.music.play()
            while pygame.mixer.music.get_busy():
                pygame.time.Clock().tick(10)
            pygame.mixer.music.unload()
            return tmp_path
    except Exception as e:
        print(f'[TTS] gTTS/pygame failed ({e}), trying wmplayer...')

    # ── Path 3: Windows wmplayer ───────────────────────────────────────────────
    if tmp_path and os.path.exists(tmp_path):
        try:
            os.system(f'start /wait wmplayer "{tmp_path}"')
            return tmp_path
        except Exception:
            pass

    print(f'[TTS] All playback methods failed. Text: {text[:60]}...')
    return tmp_path


def speak_async(text: str) -> threading.Thread:
    """
    Fire-and-forget version of speak().
    Returns the thread so caller can join() if needed.
    """
    t = threading.Thread(target=speak, args=(text,), daemon=True)
    t.start()
    return t