drrobot9 commited on
Commit
44ae209
·
verified ·
1 Parent(s): 5f555e8

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +43 -0
  2. app/main.py +179 -0
  3. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim-bookworm
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive \
4
+ PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PIP_NO_CACHE_DIR=1 \
7
+ HF_HOME=/opt/models \
8
+ TRANSFORMERS_CACHE=/opt/models \
9
+ HUGGINGFACE_HUB_CACHE=/opt/models
10
+
11
+ WORKDIR /code
12
+
13
+ RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ git \
15
+ wget \
16
+ curl \
17
+ libsndfile1 \
18
+ ffmpeg \
19
+ gcc \
20
+ g++ \
21
+ build-essential \
22
+ python3-dev \
23
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
24
+
25
+ COPY requirements.txt .
26
+ RUN pip install --no-cache-dir -r requirements.txt
27
+
28
+ # Pre-download model
29
+ RUN python - <<EOF
30
+ from huggingface_hub import snapshot_download
31
+ snapshot_download(
32
+ repo_id="LiquidAI/LFM2.5-Audio-1.5B",
33
+ local_dir="/opt/models/LiquidAI/LFM2.5-Audio-1.5B",
34
+ local_dir_use_symlinks=False
35
+ )
36
+ print("Model downloaded successfully.")
37
+ EOF
38
+
39
+ COPY . .
40
+
41
+ EXPOSE 7860
42
+
43
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import asyncio
4
+ import json
5
+ import torch
6
+ import numpy as np
7
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect
8
+ from pydantic import BaseModel
9
+ from liquid_audio import (
10
+ LFM2AudioModel,
11
+ LFM2AudioProcessor,
12
+ ChatState,
13
+ )
14
+
15
+
16
+ HF_REPO = "LiquidAI/LFM2.5-Audio-1.5B"
17
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
+ SAMPLE_RATE = 24_000
19
+ CHUNK_SIZE = 6
20
+
21
+ if DEVICE == "cuda" and torch.cuda.is_bf16_supported():
22
+ DTYPE = torch.bfloat16
23
+ else:
24
+ DTYPE = torch.float32
25
+
26
+ torch.backends.cuda.matmul.allow_tf32 = True
27
+
28
+
29
+
30
+
31
+
32
+ processor = LFM2AudioProcessor.from_pretrained(HF_REPO)
33
+ model = LFM2AudioModel.from_pretrained(
34
+ HF_REPO,
35
+ torch_dtype=DTYPE,
36
+ ).to(DEVICE).eval()
37
+
38
+ print(f"[BOOT] LFM2.5 Loaded on {DEVICE}")
39
+
40
+
41
+
42
+
43
+
44
+ app = FastAPI(title="LFM2.5 WebSocket TTS", version="2.0.0")
45
+
46
+
47
+ # WAV HEADER
48
+
49
+
50
+ def wav_header(sample_rate: int, channels: int = 1, bits: int = 16) -> bytes:
51
+ byte_rate = sample_rate * channels * bits // 8
52
+ block_align = channels * bits // 8
53
+ return (
54
+ b"RIFF"
55
+ + (b"\xff\xff\xff\xff")
56
+ + b"WAVEfmt "
57
+ + (16).to_bytes(4, "little")
58
+ + (1).to_bytes(2, "little")
59
+ + channels.to_bytes(2, "little")
60
+ + sample_rate.to_bytes(4, "little")
61
+ + byte_rate.to_bytes(4, "little")
62
+ + block_align.to_bytes(2, "little")
63
+ + bits.to_bytes(2, "little")
64
+ + b"data"
65
+ + (b"\xff\xff\xff\xff")
66
+ )
67
+
68
+
69
+ # STREAM CORE
70
+
71
+
72
+ async def stream_lfm_tts(websocket: WebSocket, text: str):
73
+ chat = ChatState(processor)
74
+
75
+ chat.new_turn("system")
76
+ chat.add_text("Respond with interleaved text and audio.")
77
+ chat.end_turn()
78
+
79
+ chat.new_turn("user")
80
+ chat.add_text(text)
81
+ chat.end_turn()
82
+
83
+ chat.new_turn("assistant")
84
+
85
+ await websocket.send_bytes(wav_header(SAMPLE_RATE))
86
+
87
+ audio_buffer = []
88
+ stop_flag = False
89
+
90
+ async def listen_for_stop():
91
+ nonlocal stop_flag
92
+ try:
93
+ while True:
94
+ msg = await websocket.receive_text()
95
+ data = json.loads(msg)
96
+ if data.get("type") == "stop":
97
+ stop_flag = True
98
+ break
99
+ except:
100
+ stop_flag = True
101
+
102
+ listener_task = asyncio.create_task(listen_for_stop())
103
+
104
+ try:
105
+ with torch.inference_mode():
106
+ for token in model.generate_interleaved(
107
+ **chat,
108
+ max_new_tokens=4096,
109
+ audio_temperature=0.8,
110
+ audio_top_k=4,
111
+ ):
112
+ if stop_flag:
113
+ break
114
+
115
+ if token.numel() == 1:
116
+ continue
117
+
118
+ audio_buffer.append(token)
119
+
120
+ if len(audio_buffer) >= CHUNK_SIZE:
121
+ audio_codes = (
122
+ torch.stack(audio_buffer, dim=1)
123
+ .unsqueeze(0)
124
+ .to(DEVICE)
125
+ )
126
+
127
+ waveform = processor.decode(audio_codes)
128
+ waveform = waveform.squeeze().cpu().numpy()
129
+ waveform = np.clip(waveform, -1.0, 1.0)
130
+ audio_int16 = (waveform * 32767.0).astype(np.int16)
131
+
132
+ await websocket.send_bytes(audio_int16.tobytes())
133
+ audio_buffer.clear()
134
+
135
+ # flush
136
+ if not stop_flag and len(audio_buffer) > 1:
137
+ audio_codes = (
138
+ torch.stack(audio_buffer[:-1], dim=1)
139
+ .unsqueeze(0)
140
+ .to(DEVICE)
141
+ )
142
+ waveform = processor.decode(audio_codes)
143
+ waveform = waveform.squeeze().cpu().numpy()
144
+ waveform = np.clip(waveform, -1.0, 1.0)
145
+ audio_int16 = (waveform * 32767.0).astype(np.int16)
146
+
147
+ await websocket.send_bytes(audio_int16.tobytes())
148
+
149
+ await websocket.send_text(json.dumps({"type": "done"}))
150
+
151
+ finally:
152
+ listener_task.cancel()
153
+
154
+
155
+ # WEBSOCKET ENDPOINT
156
+
157
+
158
+ @app.websocket("/ws/tts")
159
+ async def websocket_tts(websocket: WebSocket):
160
+ await websocket.accept()
161
+
162
+ try:
163
+ while True:
164
+ message = await websocket.receive_text()
165
+ payload = json.loads(message)
166
+
167
+ if payload.get("type") == "start":
168
+ text = payload.get("text", "").strip()
169
+ if not text:
170
+ await websocket.send_text(json.dumps({
171
+ "type": "error",
172
+ "message": "Text is empty"
173
+ }))
174
+ continue
175
+
176
+ await stream_lfm_tts(websocket, text)
177
+
178
+ except WebSocketDisconnect:
179
+ print("Client disconnected")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torchaudio
2
+ soundfile
3
+ accelerate
4
+ huggingface_hub==0.23.2
5
+ sentencepiece
6
+ tokenizers
7
+ fastapi==0.110.0
8
+ uvicorn[standard]==0.27.1
9
+ torch==2.1.2
10
+ numpy==1.26.4
11
+ pydantic==2.6.4
12
+ transformers==4.40.2
13
+ liquid-audio