| import os |
| import re |
| from fastapi import FastAPI, HTTPException, Body |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import StreamingResponse, JSONResponse |
| import azure.cognitiveservices.speech as speechsdk |
| from typing import Optional |
|
|
| |
| AZURE_SPEECH_KEY = os.getenv("AZURE_SPEECH_KEY") |
| AZURE_SPEECH_REGION = os.getenv("AZURE_SPEECH_REGION", "southeastasia") |
|
|
| if not AZURE_SPEECH_KEY: |
| print("[WARN] AZURE_SPEECH_KEY is not set. Set it in HF Spaces (Settings → Repository secrets).") |
|
|
| app = FastAPI(title="Azure TTS API", version="1.0.0") |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| DEFAULT_VOICE = "th-TH-PremwadeeNeural" |
|
|
| |
| |
| EMOJI_RE = re.compile( |
| "[" |
| "\U0001F300-\U0001F5FF" |
| "\U0001F600-\U0001F64F" |
| "\U0001F680-\U0001F6FF" |
| "\U0001F700-\U0001F77F" |
| "\U0001F780-\U0001F7FF" |
| "\U0001F800-\U0001F8FF" |
| "\U0001F900-\U0001F9FF" |
| "\U0001FA00-\U0001FA6F" |
| "\U0001FA70-\U0001FAFF" |
| "\U00002600-\U000026FF" |
| "\U000023F0-\U000023FA" |
| "\U00002700-\U000027BF" |
| "\U0001F1E6-\U0001F1FF" |
| "\U0001F3FB-\U0001F3FF" |
| "]", |
| flags=re.UNICODE |
| ) |
|
|
| def strip_emoji(s: str) -> str: |
| |
| s = EMOJI_RE.sub("", s) |
| s = s.replace("\u200d", "").replace("\ufe0f", "") |
| return re.sub(r"\s{2,}", " ", s).strip() |
|
|
| |
| FORMAT_MAP = { |
| "wav": speechsdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm, |
| "mp3": speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3, |
| } |
|
|
| @app.get("/") |
| def root(): |
| return {"ok": True, "service": "Azure TTS API", "region": AZURE_SPEECH_REGION} |
|
|
| @app.get("/health") |
| def health(): |
| return {"status": "healthy"} |
|
|
| @app.post("/synthesize") |
| def synthesize( |
| text: str = Body(..., embed=True, description="ข้อความที่จะสังเคราะห์เสียง"), |
| voice: Optional[str] = Body(DEFAULT_VOICE, embed=True), |
| audio_format: Optional[str] = Body("mp3", embed=True, description="mp3 หรือ wav"), |
| |
| strip_emoji_before_tts: Optional[bool] = Body(True, embed=True) |
| ): |
| """ |
| คืน audio bytes เป็น StreamingResponse (Content-Type ตามฟอร์แมต) |
| """ |
| if not AZURE_SPEECH_KEY: |
| raise HTTPException(status_code=500, detail="AZURE_SPEECH_KEY not set") |
|
|
| audio_format = audio_format.lower() |
| if audio_format not in FORMAT_MAP: |
| raise HTTPException(status_code=400, detail=f"Unsupported audio_format: {audio_format}. Use one of {list(FORMAT_MAP.keys())}") |
|
|
| try: |
| |
| speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION) |
| speech_config.speech_synthesis_voice_name = voice or DEFAULT_VOICE |
| speech_config.set_speech_synthesis_output_format(FORMAT_MAP[audio_format]) |
|
|
| |
| text_for_tts = strip_emoji(text) if strip_emoji_before_tts else text |
| if not text_for_tts: |
| raise HTTPException(status_code=400, detail="Text contains only emojis after sanitization") |
|
|
| |
| synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None) |
|
|
| |
| result = synthesizer.speak_text_async(text_for_tts).get() |
|
|
| if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: |
| audio_bytes = result.audio_data |
| if not audio_bytes: |
| raise HTTPException(status_code=500, detail="No audio data produced") |
|
|
| mime = "audio/mpeg" if audio_format == "mp3" else "audio/wav" |
| filename = f"speech.{audio_format}" |
|
|
| return StreamingResponse( |
| iter([audio_bytes]), |
| media_type=mime, |
| headers={"Content-Disposition": f'inline; filename="{filename}"'} |
| ) |
|
|
| elif result.reason == speechsdk.ResultReason.Canceled: |
| details = result.cancellation_details |
| msg = f"Synthesis canceled: {details.reason}" |
| if details.reason == speechsdk.CancellationReason.Error: |
| msg += f" | error: {details.error_details}" |
| raise HTTPException(status_code=500, detail=msg) |
|
|
| else: |
| raise HTTPException(status_code=500, detail=f"Unknown result: {result.reason}") |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.get("/voices") |
| def list_voices(): |
| if not AZURE_SPEECH_KEY: |
| raise HTTPException(status_code=500, detail="AZURE_SPEECH_KEY not set") |
|
|
| try: |
| speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION) |
| synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None) |
| voices_result = synthesizer.get_voices_async().get() |
|
|
| if voices_result.reason == speechsdk.ResultReason.VoicesListRetrieved: |
| voices = [] |
| for v in voices_result.voices: |
| voices.append({ |
| "name": v.name, |
| "locale": v.locale, |
| "gender": v.gender.name if v.gender else None, |
| "shortName": getattr(v, "short_name", None) |
| }) |
| return JSONResponse(content={"count": len(voices), "voices": voices}) |
| else: |
| raise HTTPException(status_code=500, detail=f"Failed to list voices: {voices_result.reason}") |
|
|
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|