File size: 9,682 Bytes
c250141
25d8d89
9364d37
25d8d89
99eec13
4ce7bbc
25d8d89
 
a366e82
3387383
c2b7e8e
a366e82
25d8d89
ee03549
25d8d89
 
 
 
 
c2b7e8e
 
 
a33b605
 
72f1b9f
c2b7e8e
a33b605
6efc063
c2b7e8e
c5fe5fc
c2b7e8e
 
 
 
 
 
 
a33b605
 
c2b7e8e
a33b605
 
c2b7e8e
a33b605
6efc063
 
c2b7e8e
 
a366e82
4ce7bbc
 
 
 
a366e82
 
25d8d89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2b7e8e
 
 
a366e82
 
c2b7e8e
a366e82
b5549d7
25d8d89
c2b7e8e
 
b5549d7
c2b7e8e
 
 
ae4e7e6
c2b7e8e
ae4e7e6
c2b7e8e
 
 
 
b5549d7
ae4e7e6
25d8d89
b5549d7
3387383
d39f28b
b5549d7
3387383
b5549d7
ae4e7e6
b5549d7
 
 
 
 
 
f948caf
c2b7e8e
 
25d8d89
ae4e7e6
c2b7e8e
a366e82
4127e5e
a366e82
 
4ce7bbc
a366e82
4ce7bbc
a366e82
4ce7bbc
a366e82
4ce7bbc
 
 
 
a366e82
4ce7bbc
 
 
a366e82
4ce7bbc
a366e82
680e093
 
4ce7bbc
 
 
 
 
e904dcf
 
a366e82
 
 
 
4ce7bbc
 
a366e82
 
4ce7bbc
 
a366e82
4ce7bbc
 
 
 
 
 
 
 
 
 
a366e82
 
 
4ce7bbc
 
 
 
6d47d39
 
 
 
 
bf98578
6d47d39
 
 
 
 
 
 
 
 
bf98578
 
6d47d39
 
 
 
 
 
bf98578
 
 
 
 
 
 
 
 
 
6d47d39
bf98578
4ce7bbc
 
6d47d39
 
 
 
 
9364d37
6d47d39
 
9364d37
 
 
6d47d39
 
 
9364d37
6d47d39
 
9364d37
 
6d47d39
 
 
 
4ce7bbc
6d47d39
 
 
4ce7bbc
25d8d89
 
 
 
 
 
d39f28b
 
 
 
 
 
 
 
 
4ce7bbc
d39f28b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
from fastapi import FastAPI, Response, HTTPException
from fastapi.responses import StreamingResponse
import numpy as np
from piper import PiperVoice
import sherpa_onnx
import base64
import io
import os
import httpx
import wave
from pydantic import BaseModel
from typing import Optional, Literal

app = FastAPI(title="TTS App for my projects")

# Path where models will be stored in the container
MODEL_DIR = "./models"
os.makedirs(MODEL_DIR, exist_ok=True)

VOICE_MAP = {
    # Gendered Languages (Male and Female models available)
    "en": {"gendered": True, "male": "en_GB-alan-medium", "female": "en_GB-semaine-medium"},
    "es": {"gendered": True, "male": "es_ES-sharvard-medium", "female": "es_ES-davefx-medium"},
    "fr": {"gendered": True, "male": "fr_FR-upmc-medium", "female": "fr_FR-siwis-medium"},
    "de": {"gendered": True, "male": "de_DE-thorsten-medium", "female": "de_DE-kerstin-low"},
    "it": {"gendered": True, "male": "it_IT-riccardo-x_low", "female": "it_IT-paola-medium"},
    "pl": {"gendered": True, "male": "pl_PL-darkman-medium", "female": "pl_PL-gosia-medium"},
    "uk": {"gendered": True, "male": "uk_UA-ukrainian_tts-medium", "female": "uk_UA-lada-x_low"},
    "nl": {"gendered": True, "male": "nl_NL-ronnie-medium", "female": "nl_NL-mls-medium"},
    "eu": {"gendered": True, "male": "eu_ES-antton-medium", "female": "eu_ES-maider-medium"},

    # Non-Gendered / Single-Voice Languages (Default model used)
    "bg": {"gendered": False, "default": "bg_BG-dimitar-medium"},
    "ca": {"gendered": False, "default": "ca_ES-upc_ona-medium"},
    "cs": {"gendered": False, "default": "cs_CZ-jirka-medium"},
    "da": {"gendered": False, "default": "da_DK-talesyntese-medium"},
    "fi": {"gendered": False, "default": "fi_FI-harri-medium"},
    "el": {"gendered": False, "default": "el_GR-rapunzelina-low"},
    "hu": {"gendered": False, "default": "hu_HU-anna-medium"},
    "is": {"gendered": False, "default": "is_IS-ugla-medium"},
    "lv": {"gendered": False, "default": "lv_LV-aivars-medium"},
    "ro": {"gendered": False, "default": "ro_RO-mihai-medium"},
    "sk": {"gendered": False, "default": "sk_SK-lili-medium"},
    "sl": {"gendered": False, "default": "sl_SI-artur-medium"},
    "sv": {"gendered": False, "default": "sv_SE-lisa-medium"},
    "cy": {"gendered": False, "default": "cy_GB-gwryw_gogleddol-medium"}
}

IRISH_MAP = {
    "Donegal": {"gendered":True, "male": "ga_UL_doc_piper", "female":"ga_UL_anb_piper"},
    "Kerry": {"gendered":True, "male": "ga_MU_cmg_piper", "female":"ga_MU_nnc_piper"},
    "Ring": {"gendered":False,"default":"ga_MU_ar_fnm_piper"},
    "Connemara": {"gendered":False,"default":"ga_CO_snc_piper"}
}

# Cache for loaded models to avoid re-loading from disk every request
loaded_voices = {}

def get_voice(model_name: str):
    if model_name not in loaded_voices:
        # Assumes model files (onnx and json) are in MODEL_DIR
        model_path = os.path.join(MODEL_DIR, f"{model_name}.onnx")
        config_path = f"{model_path}.json"
        
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Model {model_name} not found.")
            
        loaded_voices[model_name] = PiperVoice.load(model_path, config_path)
    return loaded_voices[model_name]

class TTSRequest(BaseModel):
    text: str
    language: str
    gender: Literal["male","female"] = "male"
    dialect: Optional[Literal["Kerry", "Donegal", "Ring", "Connemara"]] = None

@app.post("/tts/piper")
async def tts_post(request: TTSRequest):
    try:
        lang_code = request.language.lower()
        lang_entry = VOICE_MAP.get(lang_code)

        if not lang_entry:
            raise HTTPException(status_code=400, detail=f"Language '{lang_code}' not supported.")

        # Determine model name
        if lang_entry["gendered"]:
            model_name = lang_entry.get(request.gender.lower(), lang_entry["male"])
        else:
            model_name = lang_entry["default"]

        voice = get_voice(model_name)

        # Create an in-memory buffer for the WAV file
        wav_buffer = io.BytesIO()

        with wave.open(wav_buffer, "wb") as wav_file:
            wav_file.setnchannels(1)
            wav_file.setsampwidth(2)  # 16-bit audio
            wav_file.setframerate(voice.config.sample_rate)

            for chunk in voice.synthesize(request.text):
                # Convert the audio float array to 16-bit PCM
                audio_int16 = (chunk.audio_float_array * 32767).astype("int16")

                # Write the PCM data to the WAV file
                wav_file.writeframes(audio_int16.tobytes())

        wav_buffer.seek(0)
        return Response(content=wav_buffer.getvalue(), media_type="audio/wav")

    except Exception as e:
        print(f"Error during TTS: {e}")
        raise HTTPException(status_code=500, detail=str(e))

ABAIR_URL = "https://synthesis.abair.ie/api/synthesise"

@app.post("/tts/irish")
async def get_irish_tts(request: TTSRequest):
    """
    Fetches Irish speech from the new ABAIR synthesis API.
    """
    dialect = request.dialect or "Donegal"
    
    # 1. Determine the correct voice string
    entry = IRISH_MAP.get(dialect, IRISH_MAP["Donegal"])
    if entry.get("gendered"):
        voice = entry.get(request.gender.lower(), entry["male"])
    else:
        voice = entry["default"]

    # 2. Set up the request as per your working example
    params = {
        "input": request.text,
        "voice": voice,
        "normalise": "true",
        "speed": 0.9
    }
    
    headers = {
        "Origin": "https://abair.ie",
        "Referer": "https://abair.ie/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "*/*"
    }

    async with httpx.AsyncClient() as client:
        try:
            # Note: ABAIR expects a GET request for this specific endpoint
            response = await client.get(ABAIR_URL, params=params, headers=headers, timeout=15.0)
            
            if response.status_code != 200:
                print(f"ABAIR Error: {response.status_code} - {response.text}")
                raise HTTPException(status_code=502, detail=f"ABAIR service error: {response.status_code}")

            data = response.json()
            
            # 3. Handle Base64 decoding
            if "audioContent" not in data:
                raise HTTPException(status_code=500, detail="Invalid response format from ABAIR")

            audio_bytes = base64.b64decode(data["audioContent"])

            # 4. Return the decoded WAV binary
            return Response(content=audio_bytes, media_type="audio/wav")

        except httpx.RequestError as exc:
            raise HTTPException(status_code=503, detail=f"Could not connect to ABAIR: {exc}")
        except Exception as e:
            print(f"Internal Error: {str(e)}")
            raise HTTPException(status_code=500, detail=str(e))

breton_engine = None

def get_breton_engine():
    global breton_engine
    if breton_engine is None:
        # 1. Specific VITS model settings
        vits_config = sherpa_onnx.OfflineTtsVitsModelConfig(
            model=os.path.join(MODEL_DIR, "breton-model.onnx"),
            tokens=os.path.join(MODEL_DIR, "breton-tokens.txt"),
            data_dir="", 
            noise_scale=0.667,
            noise_scale_w=0.8,
            length_scale=1.0,
        )
        
        # 2. Wrap VITS into the Model Config
        model_config = sherpa_onnx.OfflineTtsModelConfig(
            vits=vits_config,
            num_threads=1,
            debug=False,
            provider="cpu",
        )
        
        # 3. Wrap everything into the Top-Level OfflineTtsConfig (The missing step!)
        full_config = sherpa_onnx.OfflineTtsConfig(
            model=model_config,
            # rule_fsts is required for some models, empty string is fine here
            rule_fsts="", 
            max_num_sentences=1,
        )
        
        # Now pass the full_config to the constructor
        breton_engine = sherpa_onnx.OfflineTts(full_config)
    return breton_engine
    
@app.post("/tts/breton")
async def get_breton_tts(request: TTSRequest):
    try:
        engine = get_breton_engine()
        
        sid = 0 if request.gender.lower() == "female" else 1
        
        # 1. Generate audio (this returns an object with a .samples list)
        audio = engine.generate(request.text, sid=sid)
        
        # 2. Convert the Python list to a NumPy array
        samples_array = np.array(audio.samples, dtype=np.float32)
        
        wav_buffer = io.BytesIO()
        with wave.open(wav_buffer, "wb") as wav_file:
            wav_file.setnchannels(1)
            wav_file.setsampwidth(2)  # 16-bit
            wav_file.setframerate(audio.sample_rate)
            
            # 3. Now .astype("int16") will work perfectly on the NumPy array
            audio_int16 = (samples_array * 32767).astype("int16")
            wav_file.writeframes(audio_int16.tobytes())
            
        wav_buffer.seek(0)
        return Response(content=wav_buffer.getvalue(), media_type="audio/wav")

    except Exception as e:
        print(f"Breton TTS Error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
def home():
    return {"status": "Piper TTS is running"}

@app.get("/")
def home():
    # List all files in the models directory
    try:
        files = os.listdir(MODEL_DIR)
    except Exception as e:
        files = [f"Error reading directory: {str(e)}"]

    return {
        "message": "Piper TTS API is running",
        "models_in_folder": files,
        "supported_languages": [v for v in list(VOICE_MAP.keys())]+["ga","br"]
    }