File size: 5,761 Bytes
975f9a3
 
 
 
 
 
 
4bef769
 
0d0b668
4bef769
 
0d0b668
975f9a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48c3b28
975f9a3
 
 
 
 
 
 
 
 
48c3b28
975f9a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48c3b28
975f9a3
 
 
 
 
48c3b28
975f9a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0b668
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
975f9a3
 
 
b9dda80
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from __future__ import annotations

import os
import tempfile

from fastapi import FastAPI, File, Form, HTTPException, UploadFile

try:
    from .config import VoiceRuntimeConfig
    from .service import process_diarization_only, process_voice
except ImportError:  # HF flat-root execution fallback
    from config import VoiceRuntimeConfig
    from service import process_diarization_only, process_voice

app = FastAPI(title="Voice Intelligence Module", version="1.0.0")

_ALLOWED = {".wav", ".mp3", ".m4a", ".ogg", ".flac", ".aac", ".mp4", ".mov", ".mkv", ".webm"}


def _save_upload_temp(upload: UploadFile) -> str:
    filename = upload.filename or "input_audio"
    ext = os.path.splitext(filename)[1].lower()
    if ext not in _ALLOWED:
        allowed = ", ".join(sorted(_ALLOWED))
        raise HTTPException(status_code=400, detail=f"Unsupported input extension: {ext or 'unknown'}. Allowed: {allowed}")

    fd, tmp_path = tempfile.mkstemp(prefix="voice-intel-upload-", suffix=ext)
    os.close(fd)

    with open(tmp_path, "wb") as f:
        while True:
            chunk = upload.file.read(1024 * 1024)
            if not chunk:
                break
            f.write(chunk)

    return tmp_path


@app.get("/health")
def health() -> dict:
    return {"ok": True, "module": "voice-intelligence"}


@app.post("/v1/voice/align/trimmed")
def align_trimmed(

    file: UploadFile = File(...),

    language_hint: str = Form("auto"),

    silence_threshold_db: float = Form(-40.0),

    min_silence_sec: float = Form(0.30),

    keep_padding_sec: float = Form(0.05),

    analysis_window_ms: int = Form(10),

    diarization_enabled: str = Form("true"),

    diarization_min_speakers: int = Form(0),

    diarization_max_speakers: int = Form(0),

) -> dict:
    tmp_path = _save_upload_temp(file)
    config = VoiceRuntimeConfig.from_env()
    config.silence_threshold_db = silence_threshold_db
    config.min_silence_sec = min_silence_sec
    config.keep_padding_sec = keep_padding_sec
    config.analysis_window_ms = analysis_window_ms
    config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
    config.diarization_min_speakers = diarization_min_speakers
    config.diarization_max_speakers = diarization_max_speakers

    try:
        response = process_voice(
            input_audio_path=tmp_path,
            config=config,
            language_hint=language_hint,
            trim_silence_enabled=True,
            include_audio_payload=True,
            minimal_output=False,
        )
        response["endpoint"] = "/v1/voice/align/trimmed"
        response["description"] = "Returns word-level data and silence-removed WAV payload."
        return response
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"trimmed endpoint failed: {exc}") from exc
    finally:
        try:
            os.remove(tmp_path)
        except OSError:
            pass


@app.post("/v1/voice/align/raw")
def align_raw(

    file: UploadFile = File(...),

    language_hint: str = Form("auto"),

    diarization_enabled: str = Form("true"),

    diarization_min_speakers: int = Form(0),

    diarization_max_speakers: int = Form(0),

) -> dict:
    tmp_path = _save_upload_temp(file)
    config = VoiceRuntimeConfig.from_env()
    config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
    config.diarization_min_speakers = diarization_min_speakers
    config.diarization_max_speakers = diarization_max_speakers

    try:
        response = process_voice(
            input_audio_path=tmp_path,
            config=config,
            language_hint=language_hint,
            trim_silence_enabled=False,
            include_audio_payload=False,
            minimal_output=True,
        )
        response["endpoint"] = "/v1/voice/align/raw"
        response["description"] = "Returns only required alignment data; input audio remains unchanged."
        return response
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"raw endpoint failed: {exc}") from exc
    finally:
        try:
            os.remove(tmp_path)
        except OSError:
            pass


@app.post("/v1/voice/diarization")
def diarization_only(

    file: UploadFile = File(...),

    diarization_enabled: str = Form("true"),

    diarization_min_speakers: int = Form(0),

    diarization_max_speakers: int = Form(0),

) -> dict:
    tmp_path = _save_upload_temp(file)
    config = VoiceRuntimeConfig.from_env()
    config.diarization_enabled = diarization_enabled.strip().lower() in ("1", "true", "yes", "on")
    config.diarization_min_speakers = diarization_min_speakers
    config.diarization_max_speakers = diarization_max_speakers

    try:
        response = process_diarization_only(
            input_audio_path=tmp_path,
            config=config,
        )
        response["endpoint"] = "/v1/voice/diarization"
        response["description"] = "Returns diarization only (speaker segments + summary), no transcription payload."
        return response
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"diarization endpoint failed: {exc}") from exc
    finally:
        try:
            os.remove(tmp_path)
        except OSError:
            pass


if __name__ == "__main__":
    import uvicorn

    port = int(os.environ.get("PORT", "8091"))
    uvicorn.run("core.voice_intelligence.api:app", host="0.0.0.0", port=port, reload=False)