agkavin commited on
Commit
9400b83
·
1 Parent(s): a4cc15e
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +27 -10
  2. .gitignore +3 -3
  3. backend/api/pipeline.py +306 -289
  4. backend/api/server.py +157 -217
  5. backend/avatars/christine/coords.pkl +3 -0
  6. backend/avatars/christine/full_imgs/00000000.png +3 -0
  7. backend/avatars/christine/full_imgs/00000001.png +3 -0
  8. backend/avatars/christine/full_imgs/00000002.png +3 -0
  9. backend/avatars/christine/full_imgs/00000003.png +3 -0
  10. backend/avatars/christine/full_imgs/00000004.png +3 -0
  11. backend/avatars/christine/full_imgs/00000005.png +3 -0
  12. backend/avatars/christine/full_imgs/00000006.png +3 -0
  13. backend/avatars/christine/full_imgs/00000007.png +3 -0
  14. backend/avatars/christine/full_imgs/00000008.png +3 -0
  15. backend/avatars/christine/full_imgs/00000009.png +3 -0
  16. backend/avatars/christine/full_imgs/00000010.png +3 -0
  17. backend/avatars/christine/full_imgs/00000011.png +3 -0
  18. backend/avatars/christine/full_imgs/00000012.png +3 -0
  19. backend/avatars/christine/full_imgs/00000013.png +3 -0
  20. backend/avatars/christine/full_imgs/00000014.png +3 -0
  21. backend/avatars/christine/full_imgs/00000015.png +3 -0
  22. backend/avatars/christine/full_imgs/00000016.png +3 -0
  23. backend/avatars/christine/full_imgs/00000017.png +3 -0
  24. backend/avatars/christine/full_imgs/00000018.png +3 -0
  25. backend/avatars/christine/full_imgs/00000019.png +3 -0
  26. backend/avatars/christine/full_imgs/00000020.png +3 -0
  27. backend/avatars/christine/full_imgs/00000021.png +3 -0
  28. backend/avatars/christine/full_imgs/00000022.png +3 -0
  29. backend/avatars/christine/full_imgs/00000023.png +3 -0
  30. backend/avatars/christine/full_imgs/00000024.png +3 -0
  31. backend/avatars/christine/full_imgs/00000025.png +3 -0
  32. backend/avatars/christine/mask/00000000.png +3 -0
  33. backend/avatars/christine/mask/00000001.png +3 -0
  34. backend/avatars/christine/mask/00000002.png +3 -0
  35. backend/avatars/christine/mask/00000003.png +3 -0
  36. backend/avatars/christine/mask/00000004.png +3 -0
  37. backend/avatars/christine/mask/00000005.png +3 -0
  38. backend/avatars/christine/mask/00000006.png +3 -0
  39. backend/avatars/christine/mask/00000007.png +3 -0
  40. backend/avatars/christine/mask/00000008.png +3 -0
  41. backend/avatars/christine/mask/00000009.png +3 -0
  42. backend/avatars/christine/mask/00000010.png +3 -0
  43. backend/avatars/christine/mask/00000011.png +3 -0
  44. backend/avatars/christine/mask/00000012.png +3 -0
  45. backend/avatars/christine/mask/00000013.png +3 -0
  46. backend/avatars/christine/mask/00000014.png +3 -0
  47. backend/avatars/christine/mask/00000015.png +3 -0
  48. backend/avatars/christine/mask/00000016.png +3 -0
  49. backend/avatars/christine/mask/00000017.png +3 -0
  50. backend/avatars/christine/mask/00000018.png +3 -0
.gitattributes CHANGED
@@ -1,10 +1,28 @@
1
- *.pth filter=lfs diff=lfs merge=lfs -text
2
- *.pt filter=lfs diff=lfs merge=lfs -text
 
 
 
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.gguf filter=lfs diff=lfs merge=lfs -text
5
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
6
  *.safetensors filter=lfs diff=lfs merge=lfs -text
 
7
  *.pkl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
8
  *.jpg filter=lfs diff=lfs merge=lfs -text
9
  *.jpeg filter=lfs diff=lfs merge=lfs -text
10
  *.png filter=lfs diff=lfs merge=lfs -text
@@ -13,10 +31,9 @@
13
  *.mp3 filter=lfs diff=lfs merge=lfs -text
14
  *.mp4 filter=lfs diff=lfs merge=lfs -text
15
  *.webm filter=lfs diff=lfs merge=lfs -text
16
- *.zip filter=lfs diff=lfs merge=lfs -text
17
- *.tar filter=lfs diff=lfs merge=lfs -text
18
- *.gz filter=lfs diff=lfs merge=lfs -text
19
- *.npz filter=lfs diff=lfs merge=lfs -text
20
- *.npy filter=lfs diff=lfs merge=lfs -text
21
- *.h5 filter=lfs diff=lfs merge=lfs -text
22
- *.hdf5 filter=lfs diff=lfs merge=lfs -text
 
1
+ # Git LFS attributes for large binary files
2
+ # patterns matched with filter=lfs and -text to avoid diffing
3
+ # only include filetypes that are typically large or binary.
4
+
5
+ # common model formats
6
  *.bin filter=lfs diff=lfs merge=lfs -text
7
+ *.model filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.pth filter=lfs diff=lfs merge=lfs -text
10
+ *.pt filter=lfs diff=lfs merge=lfs -text
11
  *.safetensors filter=lfs diff=lfs merge=lfs -text
12
+ *.gguf filter=lfs diff=lfs merge=lfs -text
13
  *.pkl filter=lfs diff=lfs merge=lfs -text
14
+ *.h5 filter=lfs diff=lfs merge=lfs -text
15
+ *.hdf5 filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.npy filter=lfs diff=lfs merge=lfs -text
18
+
19
+ # archives
20
+ *.zip filter=lfs diff=lfs merge=lfs -text
21
+ *.tar filter=lfs diff=lfs merge=lfs -text
22
+ *.gz filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+
25
+ # media assets
26
  *.jpg filter=lfs diff=lfs merge=lfs -text
27
  *.jpeg filter=lfs diff=lfs merge=lfs -text
28
  *.png filter=lfs diff=lfs merge=lfs -text
 
31
  *.mp3 filter=lfs diff=lfs merge=lfs -text
32
  *.mp4 filter=lfs diff=lfs merge=lfs -text
33
  *.webm filter=lfs diff=lfs merge=lfs -text
34
+
35
+ # project-specific large paths
36
+ backend/personaplex-7b-v1-bnb-4bit/model_bnb_4bit.pt filter=lfs diff=lfs merge=lfs -text
37
+ backend/avatars/**/* filter=lfs diff=lfs merge=lfs -text
38
+ src/musetalk/models/**/* filter=lfs diff=lfs merge=lfs -text
39
+
 
.gitignore CHANGED
@@ -7,9 +7,9 @@ __pycache__/
7
  # are tracked via Git LFS (see .gitattributes)
8
 
9
  # Avatar image frames (pre-computed, regenerated by precompute_avatar.py)
10
- backend/avatars/*/full_imgs/
11
- backend/avatars/*/mask/
12
- backend/avatars/*/*.pkl
13
 
14
  # Frontend dependencies
15
  frontend/node_modules/
 
7
  # are tracked via Git LFS (see .gitattributes)
8
 
9
  # Avatar image frames (pre-computed, regenerated by precompute_avatar.py)
10
+ # backend/avatars/*/full_imgs/
11
+ # backend/avatars/*/mask/
12
+ # backend/avatars/*/*.pkl
13
 
14
  # Frontend dependencies
15
  frontend/node_modules/
backend/api/pipeline.py CHANGED
@@ -1,8 +1,21 @@
1
  """
2
- Speech-to-Video Pipeline Orchestrator
3
- =====================================
4
- Coordinates TTS MuseTalk LiveKit publishing.
5
- Optimized for low latency (<200ms initial response).
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
  from __future__ import annotations
8
 
@@ -13,162 +26,38 @@ from typing import Optional
13
 
14
  import numpy as np
15
 
16
- # Use relative imports for standalone
17
  import sys
18
  from pathlib import Path
19
 
20
- _backend_dir = Path(__file__).parent.parent.parent
21
  if str(_backend_dir) not in sys.path:
22
  sys.path.insert(0, str(_backend_dir))
23
 
24
  from config import (
25
- CHUNK_DURATION,
26
  FRAMES_PER_CHUNK,
27
  TTS_SAMPLE_RATE,
28
- TTS_SAMPLES_PER_CHUNK,
29
  VIDEO_FPS,
30
- SYSTEM_PROMPT,
31
  )
32
  from tts.kokoro_tts import KokoroTTS
33
- from musetalk.worker import MuseTalkWorker, AVChunk
34
- from sync.av_sync import AVSyncGate, SimpleAVSync
35
  from publisher.livekit_publisher import AVPublisher, IdleFrameGenerator
36
 
37
  log = logging.getLogger(__name__)
38
 
39
-
40
- class SpeechToVideoPipeline:
41
- """
42
- Main pipeline: Text → TTS → MuseTalk → LiveKit
43
-
44
- Optimized for smooth, synchronized AV output.
45
- """
46
-
47
- def __init__(
48
- self,
49
- tts: KokoroTTS,
50
- musetalk: MuseTalkWorker,
51
- publisher: AVPublisher,
52
- avatar_assets,
53
- ):
54
- self._tts = tts
55
- self._musetalk = musetalk
56
- self._publisher = publisher
57
- self._avatar_assets = avatar_assets
58
-
59
- self._idle_generator = IdleFrameGenerator(
60
- avatar_assets,
61
- target_width=publisher._video_width,
62
- target_height=publisher._video_height,
63
- )
64
- self._av_sync = SimpleAVSync(video_fps=VIDEO_FPS)
65
-
66
- self._running = False
67
- self._idle_task: Optional[asyncio.Task] = None
68
-
69
- log.info("SpeechToVideoPipeline initialized")
70
-
71
- async def start(self):
72
- """Start the pipeline."""
73
- self._running = True
74
- self._idle_task = asyncio.create_task(self._idle_loop())
75
- log.info("Pipeline started")
76
-
77
- async def stop(self):
78
- """Stop the pipeline."""
79
- self._running = False
80
-
81
- if self._idle_task:
82
- self._idle_task.cancel()
83
- try:
84
- await self._idle_task
85
- except asyncio.CancelledError:
86
- pass
87
-
88
- log.info("Pipeline stopped")
89
-
90
- async def speak(self, text: str) -> float:
91
- """
92
- Process text and generate synchronized AV output.
93
-
94
- Args:
95
- text: Text to speak
96
-
97
- Returns:
98
- Start latency in seconds
99
- """
100
- start_time = time.monotonic()
101
-
102
- # Process in chunks for low latency
103
- chunk_id = 0
104
- current_pts = 0.0
105
-
106
- # Stream TTS and process through MuseTalk
107
- async for audio_chunk, pts_start, pts_end in self._tts.synthesize_stream(text):
108
- # Process through MuseTalk
109
- av_chunk = await self._musetalk.process_chunk(
110
- audio_pcm=audio_chunk,
111
- chunk_id=chunk_id,
112
- pts_start=pts_start,
113
- pts_end=pts_end,
114
- is_last=False,
115
- )
116
-
117
- # Publish synchronized AV
118
- await self._publisher.publish_av_chunk(
119
- audio=av_chunk.audio_pcm,
120
- video_frames=av_chunk.video_frames,
121
- pts_start=pts_start,
122
- )
123
-
124
- chunk_id += 1
125
- current_pts = pts_end
126
-
127
- latency = time.monotonic() - start_time
128
- log.info(f"Speech completed in {latency:.3f}s")
129
-
130
- return latency
131
-
132
- async def _idle_loop(self):
133
- """Idle animation loop when not speaking."""
134
- frame_interval = 1.0 / VIDEO_FPS
135
- session_start = time.monotonic()
136
-
137
- log.info("Idle loop started")
138
-
139
- try:
140
- while self._running:
141
- frame_start = time.monotonic()
142
-
143
- # Get idle frame
144
- idle_frame = self._idle_generator.next_frame()
145
-
146
- # Calculate PTS
147
- pts_us = int((frame_start - session_start) * 1_000_000)
148
-
149
- # Publish video frame
150
- await self._publisher.publish_video_frame(idle_frame, pts_us)
151
-
152
- # Maintain frame rate
153
- elapsed = time.monotonic() - frame_start
154
- sleep_time = frame_interval - elapsed
155
-
156
- if sleep_time > 0:
157
- await asyncio.sleep(sleep_time)
158
- elif sleep_time < -0.01:
159
- log.warning("Frame took too long: %.3fs", -sleep_time)
160
-
161
- except asyncio.CancelledError:
162
- log.info("Idle loop cancelled")
163
- raise
164
 
165
 
166
  class StreamingPipeline:
167
  """
168
- Streaming version of the pipeline for real-time text input.
169
- Processes text incrementally for lower latency.
 
 
 
 
170
  """
171
-
172
  def __init__(
173
  self,
174
  tts: KokoroTTS,
@@ -180,200 +69,328 @@ class StreamingPipeline:
180
  self._musetalk = musetalk
181
  self._publisher = publisher
182
  self._avatar_assets = avatar_assets
 
 
 
 
 
183
  self._idle_generator = IdleFrameGenerator(
184
  avatar_assets,
 
185
  target_width=publisher._video_width,
186
  target_height=publisher._video_height,
187
  )
188
-
189
  self._running = False
190
- self._processing = False
191
- self._speaking = False # True while _speak_text is active
192
- self._idle_task: Optional[asyncio.Task] = None
193
-
194
- # Queue holds (video_frame, audio_slice_or_None) tuples.
195
- # The idle loop drains at 25fps and publishes both in lockstep.
196
- # Size 256 ≈ ~10s of video at 25fps.
197
- self._video_queue: asyncio.Queue = asyncio.Queue(maxsize=256)
198
-
199
  self._text_queue: asyncio.Queue = asyncio.Queue()
200
-
201
- log.info("StreamingPipeline initialized")
202
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  async def start(self):
204
- """Start the pipeline."""
205
  self._running = True
206
- self._idle_task = asyncio.create_task(self._idle_loop())
 
 
 
 
 
 
 
207
  log.info("StreamingPipeline started")
208
-
209
  async def stop(self):
210
- """Stop the pipeline."""
211
  self._running = False
212
-
213
- if self._idle_task:
214
- self._idle_task.cancel()
215
- try:
216
- await self._idle_task
217
- except asyncio.CancelledError:
218
- pass
219
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  log.info("StreamingPipeline stopped")
221
-
 
 
222
  async def push_text(self, text: str):
 
 
 
 
 
 
223
  """
224
- Push text to be spoken.
225
- Non-blocking - starts processing immediately.
 
 
 
 
 
 
226
  """
227
- await self._text_queue.put(text)
228
-
229
- if not self._processing:
230
- self._processing = True # Set before task creation to prevent double-spawn
231
- asyncio.create_task(self._process_queue())
232
-
233
- async def _process_queue(self):
234
- """Process text queue."""
235
- self._processing = True
236
-
237
  try:
238
  while self._running:
239
  try:
240
- text = await asyncio.wait_for(
241
- self._text_queue.get(),
242
- timeout=0.1
243
- )
244
  except asyncio.TimeoutError:
245
- break
246
-
247
- await self._speak_text(text)
248
-
249
- finally:
250
- self._processing = False
251
-
252
- async def _speak_text(self, text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  """
254
- Speak text with sub-batch streaming.
255
-
256
- Flow per Kokoro audio chunk:
257
- 1. Whisper encoder — once (~40 ms)
258
- 2. For each sub-batch of 4 frames:
259
- a. MuseTalk UNet (~100 ms)
260
- b. Chop audio into per-frame slices
261
- c. Push (frame, audio_slice) tuples to queue
262
- The idle loop drains at 25fps, publishing video + audio in lockstep.
263
  """
264
- start_time = time.monotonic()
265
- first_batch_logged = False
266
- chunk_id = 0
267
- BATCH = self._musetalk.BATCH_FRAMES # 4
268
-
269
- self._speaking = True
270
  try:
271
- async for audio_chunk, pts_start, pts_end in self._tts.synthesize_stream(text):
272
- # Flatten audio
273
- audio_flat = audio_chunk.flatten() if audio_chunk.ndim > 1 else audio_chunk
274
- audio_dur = len(audio_flat) / TTS_SAMPLE_RATE
275
- total_frames = max(1, round(audio_dur * VIDEO_FPS))
276
- samples_per_frame = len(audio_flat) / total_frames
277
-
278
- # Phase 1 Whisper (once for the whole Kokoro chunk)
279
  t0 = time.monotonic()
280
- feats, _ = await self._musetalk.extract_features(audio_flat)
281
- whisper_ms = (time.monotonic() - t0) * 1000
282
 
283
- # Phase 2 stream sub-batches of BATCH frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  for batch_start in range(0, total_frames, BATCH):
285
  n = min(BATCH, total_frames - batch_start)
 
286
 
287
- t1 = time.monotonic()
288
  frames = await self._musetalk.generate_batch(feats, batch_start, n)
289
- unet_ms = (time.monotonic() - t1) * 1000
290
-
291
- if not first_batch_logged:
292
- elapsed = (time.monotonic() - start_time) * 1000
293
- log.info("first batch: whisper %.0fms unet %.0fms total %.0fms "
294
- "(%d frames)", whisper_ms, unet_ms, elapsed, n)
295
- first_batch_logged = True
296
-
297
- # Slice matching audio for this sub-batch
298
- a_start = int(batch_start * samples_per_frame)
299
- a_end = int((batch_start + n) * samples_per_frame)
300
- batch_audio = audio_flat[a_start:a_end]
301
-
302
- # Chop into per-frame audio and push (frame, audio) tuples
303
- frame_samples = int(samples_per_frame)
304
- for fi, vf in enumerate(frames):
305
- fa_s = fi * frame_samples
306
- fa_e = min((fi + 1) * frame_samples, len(batch_audio))
307
- per_frame_audio = batch_audio[fa_s:fa_e] if fa_e > fa_s else None
308
- try:
309
- self._video_queue.put_nowait((vf, per_frame_audio))
310
- except asyncio.QueueFull:
311
- log.warning("Video queue full — dropping oldest frame (audio gap risk)")
312
- try:
313
- self._video_queue.get_nowait()
314
- except asyncio.QueueEmpty:
315
- pass
316
- self._video_queue.put_nowait((vf, per_frame_audio))
317
-
318
- chunk_id += 1
319
-
320
- finally:
321
- self._speaking = False
322
-
323
- latency = time.monotonic() - start_time
324
- log.info("Text spoken in %.3fs (%d tts chunks)", latency, chunk_id)
325
-
326
- async def _idle_loop(self):
327
- """Idle animation loop — drains video queue at 25fps.
328
-
329
- During speech (_speaking=True):
330
- - Pulls (frame, audio) tuples from queue.
331
- - If the queue is momentarily empty, block-waits up to 500ms
332
- for the next sub-batch instead of flashing to idle.
333
- - Publishes video + audio in lockstep.
334
-
335
- When idle:
336
- - Queue is empty → shows base.mp4 loop, no audio.
337
  """
338
- frame_interval = 1.0 / VIDEO_FPS
 
 
 
 
 
 
 
 
 
339
  session_start = time.monotonic()
340
-
 
 
 
 
341
  try:
342
  while self._running:
343
- frame_start = time.monotonic()
344
- frame = None
345
- audio_slice = None
346
 
347
- # --- pull next frame ---
348
  try:
349
- item = self._video_queue.get_nowait()
350
- frame, audio_slice = item
351
  except asyncio.QueueEmpty:
352
- if self._speaking:
353
- # UNet is still generating — wait for next batch
354
- try:
355
- item = await asyncio.wait_for(
356
- self._video_queue.get(), timeout=0.5,
357
- )
358
- frame, audio_slice = item
359
- except asyncio.TimeoutError:
360
- # Safety: if nothing arrived in 500ms, show idle
361
- frame = self._idle_generator.next_frame()
362
  else:
363
  frame = self._idle_generator.next_frame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
- pts_us = int((frame_start - session_start) * 1_000_000)
 
366
  await self._publisher.publish_video_frame(frame, pts_us)
367
 
368
- # Publish per-frame audio if present (lockstep with video)
369
  if audio_slice is not None and len(audio_slice) > 0:
370
- await self._publisher.publish_audio_chunk(audio_slice, 0.0)
371
-
372
- elapsed = time.monotonic() - frame_start
 
 
 
373
  sleep_time = frame_interval - elapsed
374
-
375
  if sleep_time > 0:
376
  await asyncio.sleep(sleep_time)
377
-
 
 
 
 
 
378
  except asyncio.CancelledError:
379
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Three-Queue Parallel Pipeline (api/ canonical version)
3
+ ========================================================
4
+ Promoted from e2e/pipeline.py this is now the default StreamingPipeline
5
+ used by api/server.py.
6
+
7
+ Architecture:
8
+ _tts_producer → _tts_queue(6) → _whisper_worker
9
+ _whisper_worker → _whisper_queue(3) → _unet_worker
10
+ _unet_worker → _frame_queue(64) → _publish_loop (VIDEO_FPS drain)
11
+
12
+ Key properties:
13
+ - TTS (CPU/ONNX) runs ahead of Whisper/UNet, absorbing inter-fragment
14
+ Kokoro reinit time in the bounded queue buffer. No inter-sentence stall.
15
+ - _publish_loop holds the last speech frame during inter-batch gaps instead
16
+ of flashing to idle — prevents LiveKit bitrate drops from irregular delivery.
17
+ - Audio PTS tracked via monotonic sample counter.
18
+ - stop() cancels all tasks and drains queues — safe to restart cleanly.
19
  """
20
  from __future__ import annotations
21
 
 
26
 
27
  import numpy as np
28
 
 
29
  import sys
30
  from pathlib import Path
31
 
32
+ _backend_dir = Path(__file__).parent.parent
33
  if str(_backend_dir) not in sys.path:
34
  sys.path.insert(0, str(_backend_dir))
35
 
36
  from config import (
 
37
  FRAMES_PER_CHUNK,
38
  TTS_SAMPLE_RATE,
 
39
  VIDEO_FPS,
 
40
  )
41
  from tts.kokoro_tts import KokoroTTS
42
+ from musetalk.worker import MuseTalkWorker
 
43
  from publisher.livekit_publisher import AVPublisher, IdleFrameGenerator
44
 
45
  log = logging.getLogger(__name__)
46
 
47
+ # Sentinel: distinguishes "queue was empty" from the None end-of-utterance marker
48
+ _QUEUE_EMPTY = object()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  class StreamingPipeline:
52
  """
53
+ Three-queue parallel pipeline: text TTS Whisper → UNet → LiveKit.
54
+
55
+ Public interface:
56
+ await pipeline.start()
57
+ await pipeline.push_text("Hello world.")
58
+ await pipeline.stop()
59
  """
60
+
61
  def __init__(
62
  self,
63
  tts: KokoroTTS,
 
69
  self._musetalk = musetalk
70
  self._publisher = publisher
71
  self._avatar_assets = avatar_assets
72
+
73
+ # Use idle.png from the avatar folder if available (static frame, no flicker)
74
+ _avatar_idle_png = (
75
+ Path(__file__).parent.parent / "avatars" / avatar_assets.name / "idle.png"
76
+ )
77
  self._idle_generator = IdleFrameGenerator(
78
  avatar_assets,
79
+ image_path=str(_avatar_idle_png) if _avatar_idle_png.exists() else None,
80
  target_width=publisher._video_width,
81
  target_height=publisher._video_height,
82
  )
83
+
84
  self._running = False
85
+
86
+ # ── three-stage async queues ──────────────────────────────────────────
87
+ # Unbounded: holds raw text requests
 
 
 
 
 
 
88
  self._text_queue: asyncio.Queue = asyncio.Queue()
89
+
90
+ # Stage 1→2: Kokoro audio chunks. 6 slots ≈ ~2 full sentences of
91
+ # buffering — absorbs the Kokoro create_stream() reinit gap (~50-100ms)
92
+ # between sentence fragments so _whisper_worker never stalls.
93
+ self._tts_queue: asyncio.Queue = asyncio.Queue(maxsize=6)
94
+
95
+ # Stage 2→3: Whisper features. Small — GPU is the bottleneck here.
96
+ self._whisper_queue: asyncio.Queue = asyncio.Queue(maxsize=3)
97
+
98
+ # Stage 3→publish: composited RGBA frames + per-frame audio.
99
+ # 64 slots ≈ 2.56s of video at 25fps — publish loop never starves.
100
+ self._frame_queue: asyncio.Queue = asyncio.Queue(maxsize=64)
101
+
102
+ # ── worker task handles ───────────────────────────────────────────────
103
+ self._tts_task: Optional[asyncio.Task] = None
104
+ self._whisper_task: Optional[asyncio.Task] = None
105
+ self._unet_task: Optional[asyncio.Task] = None
106
+ self._publish_task: Optional[asyncio.Task] = None
107
+ self._log_task: Optional[asyncio.Task] = None
108
+
109
+ log.info("StreamingPipeline (3-queue) initialized")
110
+
111
+ # ── lifecycle ─────────────────────────────────────────────────────────────
112
+
113
+ def _task_done_cb(self, task: asyncio.Task):
114
+ """Log unhandled exceptions from worker tasks immediately."""
115
+ if task.cancelled():
116
+ return
117
+ exc = task.exception()
118
+ if exc is not None:
119
+ log.error("Worker task '%s' crashed: %s", task.get_name(), exc, exc_info=exc)
120
+
121
  async def start(self):
122
+ """Spawn all worker coroutines and start the pipeline."""
123
  self._running = True
124
+ self._tts_task = asyncio.create_task(self._tts_producer(), name="tts_producer")
125
+ self._whisper_task = asyncio.create_task(self._whisper_worker(), name="whisper_worker")
126
+ self._unet_task = asyncio.create_task(self._unet_worker(), name="unet_worker")
127
+ self._publish_task = asyncio.create_task(self._publish_loop(), name="publish_loop")
128
+ self._log_task = asyncio.create_task(self._log_queue_depths(), name="log_depths")
129
+ for t in (self._tts_task, self._whisper_task, self._unet_task,
130
+ self._publish_task, self._log_task):
131
+ t.add_done_callback(self._task_done_cb)
132
  log.info("StreamingPipeline started")
133
+
134
  async def stop(self):
135
+ """Cancel all workers, drain queues, and reset state."""
136
  self._running = False
137
+
138
+ for task in (
139
+ self._tts_task,
140
+ self._whisper_task,
141
+ self._unet_task,
142
+ self._publish_task,
143
+ self._log_task,
144
+ ):
145
+ if task and not task.done():
146
+ task.cancel()
147
+ try:
148
+ await task
149
+ except asyncio.CancelledError:
150
+ pass
151
+
152
+ # Drain all queues — no stale data on reconnect
153
+ for q in (
154
+ self._text_queue,
155
+ self._tts_queue,
156
+ self._whisper_queue,
157
+ self._frame_queue,
158
+ ):
159
+ while not q.empty():
160
+ try:
161
+ q.get_nowait()
162
+ except asyncio.QueueEmpty:
163
+ break
164
+
165
  log.info("StreamingPipeline stopped")
166
+
167
+ # ── public API ────────────────────────────────────────────────────────────
168
+
169
  async def push_text(self, text: str):
170
+ """Enqueue text to be spoken. Non-blocking; returns immediately."""
171
+ await self._text_queue.put(text)
172
+
173
+ # ── Stage 1: TTS producer ─────────────────────────────────────────────────
174
+
175
+ async def _tts_producer(self):
176
  """
177
+ Reads text from _text_queue, streams Kokoro audio into _tts_queue.
178
+
179
+ Sentinel convention: None is pushed after each utterance to signal
180
+ end-of-utterance to downstream workers.
181
+
182
+ NOTE: text is passed directly to synthesize_stream() — no outer
183
+ _split_to_fragments() call here. synthesize_stream() handles splitting
184
+ internally, preventing double-split and PTS reset at fragment boundaries.
185
  """
 
 
 
 
 
 
 
 
 
 
186
  try:
187
  while self._running:
188
  try:
189
+ text = await asyncio.wait_for(self._text_queue.get(), timeout=0.1)
 
 
 
190
  except asyncio.TimeoutError:
191
+ continue
192
+
193
+ log.debug("tts_producer: utterance (%d chars)", len(text))
194
+ first_chunk = True
195
+
196
+ async for audio, pts_s, pts_e in self._tts.synthesize_stream(text):
197
+ audio_flat = audio.flatten() if audio.ndim > 1 else audio
198
+ if first_chunk:
199
+ log.debug("tts_producer: first chunk pts=%.3f→%.3f len=%d",
200
+ pts_s, pts_e, len(audio_flat))
201
+ first_chunk = False
202
+ await self._tts_queue.put((audio_flat, pts_s, pts_e))
203
+
204
+ # End-of-utterance sentinel
205
+ await self._tts_queue.put(None)
206
+ log.debug("tts_producer: utterance done")
207
+ except asyncio.CancelledError:
208
+ raise
209
+ except Exception:
210
+ log.exception("tts_producer: unhandled exception — worker stopped")
211
+ raise
212
+
213
+ # ── Stage 2: Whisper worker ───────────────────────────────────────────────
214
+
215
+ async def _whisper_worker(self):
216
  """
217
+ Consumes audio chunks from _tts_queue, runs Whisper encoder, pushes
218
+ (feats, audio_flat, pts_s, pts_e, total_frames) into _whisper_queue.
219
+ Forwards None sentinel downstream on end-of-utterance.
 
 
 
 
 
 
220
  """
 
 
 
 
 
 
221
  try:
222
+ while self._running:
223
+ item = await self._tts_queue.get()
224
+
225
+ if item is None:
226
+ await self._whisper_queue.put(None)
227
+ continue
228
+
229
+ audio_flat, pts_s, pts_e = item
230
  t0 = time.monotonic()
 
 
231
 
232
+ feats, total_frames = await self._musetalk.extract_features(audio_flat)
233
+
234
+ log.debug(
235
+ "whisper_worker: %.0fms audio → %d frames (took %.0fms)",
236
+ len(audio_flat) / TTS_SAMPLE_RATE * 1000,
237
+ total_frames,
238
+ (time.monotonic() - t0) * 1000,
239
+ )
240
+ await self._whisper_queue.put((feats, audio_flat, pts_s, pts_e, total_frames))
241
+ except asyncio.CancelledError:
242
+ raise
243
+ except Exception:
244
+ log.exception("whisper_worker: unhandled exception — worker stopped")
245
+ raise
246
+
247
+ # ── Stage 3: UNet worker ──────────────────────────────────────────────────
248
+
249
+ async def _unet_worker(self):
250
+ """
251
+ Consumes Whisper features from _whisper_queue, runs MuseTalk UNet in
252
+ FRAMES_PER_CHUNK-sized batches, pushes (frame_rgba, audio_slice) into
253
+ _frame_queue. Forwards None sentinel downstream.
254
+ """
255
+ BATCH = self._musetalk.BATCH_FRAMES
256
+
257
+ try:
258
+ while self._running:
259
+ item = await self._whisper_queue.get()
260
+
261
+ if item is None:
262
+ await self._frame_queue.put(None)
263
+ continue
264
+
265
+ feats, audio_flat, pts_s, pts_e, total_frames = item
266
+ spf = len(audio_flat) / max(total_frames, 1)
267
+
268
+ first_batch = True
269
  for batch_start in range(0, total_frames, BATCH):
270
  n = min(BATCH, total_frames - batch_start)
271
+ t0 = time.monotonic()
272
 
 
273
  frames = await self._musetalk.generate_batch(feats, batch_start, n)
274
+
275
+ if first_batch:
276
+ log.debug("unet_worker: first batch %d frames (%.0fms)",
277
+ n, (time.monotonic() - t0) * 1000)
278
+ first_batch = False
279
+
280
+ for fi, frame in enumerate(frames):
281
+ a_s = int((batch_start + fi) * spf)
282
+ a_e = min(int((batch_start + fi + 1) * spf), len(audio_flat))
283
+ audio_slice = audio_flat[a_s:a_e] if a_e > a_s else None
284
+ await self._frame_queue.put((frame, audio_slice))
285
+ except asyncio.CancelledError:
286
+ raise
287
+ except Exception:
288
+ log.exception("unet_worker: unhandled exception — worker stopped")
289
+ raise
290
+
291
+ # ── Publish loop ──────────────────────────────────────────────────────────
292
+
293
+ async def _publish_loop(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  """
295
+ Ticks at exactly VIDEO_FPS — always.
296
+
297
+ Frame selection priority per tick:
298
+ 1. Next speech frame from _frame_queue (non-blocking get_nowait)
299
+ 2. If speaking and queue empty: hold last speech frame
300
+ (UNet is generating the next batch — freeze beats idle flash)
301
+ 3. Utterance sentinel (None): switch to idle immediately
302
+ 4. Truly idle: idle frame
303
+ """
304
+ frame_interval = 1.0 / VIDEO_FPS # e.g. 62.5ms @ 16fps
305
  session_start = time.monotonic()
306
+ audio_pts_samples = 0
307
+ is_speaking = False
308
+ last_speech_frame = None
309
+ hold_count = 0
310
+
311
  try:
312
  while self._running:
313
+ tick_start = time.monotonic()
 
 
314
 
315
+ # ── non-blocking frame pick ───────────────────────────────────
316
  try:
317
+ item = self._frame_queue.get_nowait()
 
318
  except asyncio.QueueEmpty:
319
+ item = _QUEUE_EMPTY
320
+
321
+ if item is _QUEUE_EMPTY:
322
+ if is_speaking and last_speech_frame is not None:
323
+ frame = last_speech_frame
324
+ audio_slice = None
325
+ hold_count += 1
 
 
 
326
  else:
327
  frame = self._idle_generator.next_frame()
328
+ audio_slice = None
329
+
330
+ elif item is None:
331
+ # End-of-utterance sentinel
332
+ if is_speaking:
333
+ log.info(
334
+ "publish_loop: utterance ended → idle"
335
+ " (held %d frames for inter-batch gaps)", hold_count
336
+ )
337
+ is_speaking = False
338
+ last_speech_frame = None
339
+ hold_count = 0
340
+ frame = self._idle_generator.next_frame()
341
+ audio_slice = None
342
+
343
+ else:
344
+ # Real speech frame
345
+ frame, audio_slice = item
346
+ last_speech_frame = frame
347
+ if not is_speaking:
348
+ log.info(
349
+ "publish_loop: speaking started (frame_q=%d)",
350
+ self._frame_queue.qsize(),
351
+ )
352
+ is_speaking = True
353
+ hold_count = 0
354
 
355
+ # ── video publish ─────────────────────────────────────────────
356
+ pts_us = int((tick_start - session_start) * 1_000_000)
357
  await self._publisher.publish_video_frame(frame, pts_us)
358
 
359
+ # ── audio publish ─────────────────────────────────────────────
360
  if audio_slice is not None and len(audio_slice) > 0:
361
+ audio_pts_sec = audio_pts_samples / TTS_SAMPLE_RATE
362
+ await self._publisher.publish_audio_chunk(audio_slice, audio_pts_sec)
363
+ audio_pts_samples += len(audio_slice)
364
+
365
+ # ── pace to VIDEO_FPS ─────────────────────────────────────────
366
+ elapsed = time.monotonic() - tick_start
367
  sleep_time = frame_interval - elapsed
 
368
  if sleep_time > 0:
369
  await asyncio.sleep(sleep_time)
370
+ elif sleep_time < -0.010:
371
+ log.warning(
372
+ "publish_loop: over budget by %.0fms",
373
+ -sleep_time * 1000,
374
+ )
375
+
376
  except asyncio.CancelledError:
377
  raise
378
+
379
+ # ── Debug helper ──────────────────────────────────────────────────────────
380
+
381
+ async def _log_queue_depths(self):
382
+ """Log queue depths every 2 seconds for pipeline health monitoring."""
383
+ while self._running:
384
+ tts_q = self._tts_queue.qsize()
385
+ whi_q = self._whisper_queue.qsize()
386
+ frm_q = self._frame_queue.qsize()
387
+ lvl = logging.INFO if (tts_q or whi_q or frm_q) else logging.DEBUG
388
+ log.log(
389
+ lvl,
390
+ "queues — text=%d tts=%d/%d whisper=%d/%d frame=%d/%d",
391
+ self._text_queue.qsize(),
392
+ tts_q, self._tts_queue.maxsize,
393
+ whi_q, self._whisper_queue.maxsize,
394
+ frm_q, self._frame_queue.maxsize,
395
+ )
396
+ await asyncio.sleep(2.0)
backend/api/server.py CHANGED
@@ -1,39 +1,40 @@
1
  """
2
- Speech-to-Video Server
3
- ====================
4
- FastAPI server for text-to-speech-to-video pipeline.
5
- Uses Kokoro TTS + MuseTalk + LiveKit.
 
 
 
 
 
 
 
 
 
6
  """
7
  from __future__ import annotations
8
 
9
  import asyncio
10
  import logging
11
- import os
12
  import sys
13
  import time
14
  from contextlib import asynccontextmanager
15
  from pathlib import Path
16
  from typing import Optional
17
 
18
- # NOTE: do NOT load root .env (parent project sets SPEECHX_AVATAR=christine).
19
- # speech_to_video/backend/config.py has all defaults we need.
20
-
21
- # Add local backend to path (PRIORITY over parent)
22
- import sys
23
- from pathlib import Path
24
-
25
- # Get the directory containing this file (backend/api/)
26
  _current_file = Path(__file__).resolve()
27
- _api_dir = _current_file.parent # backend/api/
28
- _backend_dir = _api_dir.parent # backend/
29
- _speech_to_video_dir = _backend_dir.parent # speech_to_video/
30
 
31
- # Add paths in order of priority
32
- for p in [_backend_dir, _speech_to_video_dir]:
33
  if str(p) not in sys.path:
34
  sys.path.insert(0, str(p))
35
 
36
- # Now import after path is set
 
37
  import uvicorn
38
  from fastapi import FastAPI, HTTPException
39
  from fastapi.middleware.cors import CORSMiddleware
@@ -48,20 +49,17 @@ from config import (
48
  LIVEKIT_API_KEY,
49
  LIVEKIT_API_SECRET,
50
  LIVEKIT_ROOM_NAME,
51
- VIDEO_WIDTH,
52
- VIDEO_HEIGHT,
53
  VIDEO_FPS,
54
  DEFAULT_AVATAR,
55
  DEVICE,
56
  )
57
  from tts.kokoro_tts import KokoroTTS
58
- from musetalk.worker import load_musetalk_models, MuseTalkWorker
59
  from publisher.livekit_publisher import AVPublisher
60
  from api.pipeline import StreamingPipeline
61
 
62
  import torch
63
- torch.set_float32_matmul_precision('high') # Use TF32 on Ampere+ for ~5-10% free speedup
64
- # If torch.compile Triton JIT fails (e.g. first-run slow compile, SIGINT), fall back to eager
65
  torch._dynamo.config.suppress_errors = True
66
 
67
  log = logging.getLogger(__name__)
@@ -70,51 +68,73 @@ logging.basicConfig(
70
  format="%(asctime)s %(levelname)-7s %(name)s %(message)s",
71
  )
72
 
73
- # Global state
74
- _pipeline: Optional[StreamingPipeline] = None
75
- _room: Optional[rtc.Room] = None
76
- _publisher: Optional[AVPublisher] = None
77
- _models_loaded = False
78
 
 
 
 
 
 
 
 
79
 
80
  @asynccontextmanager
81
  async def lifespan(app: FastAPI):
82
- """Load models at startup."""
83
- global _models_loaded
84
-
85
  log.info("=== Speech-to-Video Server Starting ===")
86
- log.info(f"Device: {DEVICE}")
87
- log.info(f"Avatar: {DEFAULT_AVATAR}")
88
-
89
- # Models are loaded lazily on first request
90
- _models_loaded = True
91
-
92
- log.info("=== Server Ready ===")
93
-
94
- yield
95
-
96
- # Cleanup
97
- global _pipeline, _room, _publisher
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
 
 
 
 
99
  if _pipeline:
100
  await _pipeline.stop()
101
- _pipeline = None
102
- # Stop publisher BEFORE disconnecting the room — unpublish_track requires an
103
- # active room connection; room.disconnect() tears down the session first.
104
  if _publisher:
105
  await _publisher.stop()
106
- _publisher = None
107
  if _room:
108
  await _room.disconnect()
109
- _room = None
110
-
111
  log.info("=== Server Shutdown ===")
112
 
113
 
 
 
114
  app = FastAPI(
115
- title="Speech-to-Video",
116
- description="Text → Kokoro TTS → MuseTalk → LiveKit pipeline",
117
- version="1.0.0",
118
  lifespan=lifespan,
119
  )
120
 
@@ -126,260 +146,180 @@ app.add_middleware(
126
  )
127
 
128
 
 
 
129
  class SpeakRequest(BaseModel):
130
  text: str
131
  voice: Optional[str] = None
132
  speed: Optional[float] = None
133
 
134
-
135
  class TokenRequest(BaseModel):
136
- room_name: str = "speech-to-video-room"
137
  identity: str = "user"
138
 
139
 
140
- # ──────────────────────────────────────────────────────────────────────────────
141
- # Endpoints
142
- # ──────────────────────────────────────────────────────────────────────────────
143
 
144
  @app.get("/health")
145
  async def health():
146
- """Liveness probe."""
147
  return {
148
  "status": "ok",
149
- "models_loaded": _models_loaded,
150
- "pipeline_active": _pipeline is not None and getattr(_pipeline, '_running', False),
151
  }
152
 
153
-
154
  @app.get("/status")
155
  async def status():
156
- """Get server status."""
157
- import torch
158
  vram = {}
159
  if torch.cuda.is_available():
160
  vram = {
161
  "allocated_gb": round(torch.cuda.memory_allocated() / 1024**3, 2),
162
- "reserved_gb": round(torch.cuda.memory_reserved() / 1024**3, 2),
163
  }
164
  return {
165
- "models_loaded": _models_loaded,
166
- "pipeline_active": _pipeline is not None and getattr(_pipeline, '_running', False),
 
167
  "avatar": DEFAULT_AVATAR,
168
  "device": DEVICE,
169
  "vram": vram,
170
  }
171
 
172
 
 
 
173
  @app.post("/connect")
174
  async def connect():
175
- """
176
- Connect to LiveKit room and start the pipeline.
177
- """
178
  global _room, _publisher, _pipeline
179
-
180
- if _pipeline is not None and getattr(_pipeline, '_running', False):
 
 
 
181
  raise HTTPException(status_code=400, detail="Already connected")
182
-
183
  log.info("Connecting to LiveKit room...")
184
-
 
185
  try:
186
- # Load models
187
- log.info("Loading MuseTalk models...")
188
- musetalk_bundle = load_musetalk_models(
189
- avatar_name=DEFAULT_AVATAR,
190
- device=DEVICE,
191
- )
192
-
193
- log.info("Loading Kokoro TTS...")
194
- tts = KokoroTTS()
195
-
196
- # Create LiveKit room
197
  room = rtc.Room()
198
-
199
- # Generate token for backend agent
200
- token = lk_api.AccessToken(
201
- LIVEKIT_API_KEY,
202
- LIVEKIT_API_SECRET,
203
- ).with_identity("backend-agent").with_name("Speech-to-Video Agent")
204
  token.with_grants(lk_api.VideoGrants(
205
  room_join=True,
206
  room=LIVEKIT_ROOM_NAME,
207
  can_publish=True,
208
  can_subscribe=True,
209
  ))
210
-
211
- # Determine actual video dimensions from precomputed avatar frames
212
- first_frame = musetalk_bundle.avatar_assets.frame_list[0]
213
- actual_h, actual_w = first_frame.shape[:2] # cv2 shape is (H, W, C)
214
- log.info(f"Avatar frame size: {actual_w}x{actual_h}")
215
 
216
- # Create publisher
217
  publisher = AVPublisher(
218
  room,
219
  video_width=actual_w,
220
  video_height=actual_h,
221
  video_fps=VIDEO_FPS,
222
  )
223
-
224
- # Create MuseTalk worker
225
- musetalk_worker = MuseTalkWorker(musetalk_bundle)
226
-
227
- # Create pipeline
228
  pipeline = StreamingPipeline(
229
- tts=tts,
230
  musetalk=musetalk_worker,
231
  publisher=publisher,
232
- avatar_assets=musetalk_bundle.avatar_assets,
233
- )
234
-
235
- # Connect to room
236
- await room.connect(
237
- url=LIVEKIT_URL,
238
- token=token.to_jwt(),
239
  )
240
- log.info(f"Connected to LiveKit: {LIVEKIT_ROOM_NAME}")
241
-
242
- # Start publishing
 
243
  await publisher.start()
244
-
245
- # Start pipeline
246
  await pipeline.start()
247
-
248
- # Warm up Whisper + Kokoro synchronously (~0.5s each, no Triton JIT).
249
- # UNet warm-up runs in background only when torch.compile is active.
250
- import os
251
- import numpy as np
252
- log.info("Warming up Whisper + TTS...")
253
- dummy_audio = np.zeros(int(0.32 * 24000), dtype=np.float32) # 320ms silence
254
- _feats, _ = await musetalk_worker.extract_features(dummy_audio)
255
- # Warm Kokoro ONNX session (first call initializes thread pool)
256
- tts.synthesize_full("Hello.")
257
- log.info("Whisper + TTS warm-up done")
258
-
259
- if os.environ.get("MUSETALK_TORCH_COMPILE", "0") == "1":
260
- log.info("torch.compile enabled — UNet JIT starting in background...")
261
-
262
- async def _background_unet_warmup():
263
- try:
264
- _batch_n = min(8, len(musetalk_bundle.avatar_assets.frame_list))
265
- await musetalk_worker.generate_batch(_feats, 0, _batch_n)
266
- log.info("UNet warm-up / torch.compile complete")
267
- except Exception as _e:
268
- log.warning("UNet background warm-up failed (non-fatal): %s", _e)
269
-
270
- asyncio.ensure_future(_background_unet_warmup())
271
- else:
272
- # Eager mode: run one synchronous warm-up pass to prime CUDA kernels
273
- log.info("Warming up UNet (eager mode)...")
274
- _batch_n = min(8, len(musetalk_bundle.avatar_assets.frame_list))
275
- await musetalk_worker.generate_batch(_feats, 0, _batch_n)
276
- log.info("UNet warm-up complete")
277
-
278
- # Store references
279
- _room = room
280
  _publisher = publisher
281
- _pipeline = pipeline
282
-
283
- return {
284
- "status": "connected",
285
- "room": LIVEKIT_ROOM_NAME,
286
- "url": LIVEKIT_URL,
287
- }
288
-
289
- except Exception as e:
290
- log.error(f"Connection failed: {e}", exc_info=True)
291
- raise HTTPException(status_code=500, detail=str(e))
292
 
293
 
 
 
294
  @app.post("/disconnect")
295
  async def disconnect():
296
- """Disconnect from LiveKit."""
297
  global _room, _publisher, _pipeline
298
-
299
  if _pipeline is None:
300
  raise HTTPException(status_code=400, detail="Not connected")
301
-
302
  log.info("Disconnecting...")
303
-
304
  if _pipeline:
305
  await _pipeline.stop()
306
  if _publisher:
307
  await _publisher.stop()
308
  if _room:
309
  await _room.disconnect()
310
-
311
- _room = None
312
- _publisher = None
313
- _pipeline = None
314
-
315
  return {"status": "disconnected"}
316
 
317
 
 
 
318
  @app.post("/speak")
319
  async def speak(request: SpeakRequest):
320
- """
321
- Speak text through the avatar.
322
-
323
- Returns latency metrics.
324
- """
325
- global _pipeline
326
-
327
- if _pipeline is None or not getattr(_pipeline, '_running', False):
328
  raise HTTPException(status_code=400, detail="Not connected")
329
-
330
- start_time = time.monotonic()
331
-
332
- # Push text to pipeline
333
  await _pipeline.push_text(request.text)
334
-
335
- # Calculate latency
336
- latency_ms = (time.monotonic() - start_time) * 1000
337
-
338
- return {
339
- "status": "processing",
340
- "latency_ms": round(latency_ms, 1),
341
- }
342
 
 
343
 
344
  @app.post("/get-token")
345
  @app.get("/livekit-token")
346
  async def get_token(request: TokenRequest = TokenRequest()):
347
- """Get LiveKit token for frontend.
348
-
349
- Does NOT require the pipeline to be connected — tokens are issued
350
- from the API key/secret alone, same pattern as Avatar_gen/backend/agent.py.
351
- The frontend passes roomName + identity in the POST body.
352
- """
353
- room = request.room_name or LIVEKIT_ROOM_NAME
354
- identity = request.identity or "frontend-user"
355
-
356
- token = lk_api.AccessToken(
357
- LIVEKIT_API_KEY,
358
- LIVEKIT_API_SECRET,
359
- ).with_identity(identity).with_name(identity)
360
  token.with_grants(lk_api.VideoGrants(
361
  room_join=True,
362
  room=room,
363
  can_publish=True,
364
  can_subscribe=True,
365
  ))
366
-
367
- return {
368
- "token": token.to_jwt(),
369
- "url": LIVEKIT_URL,
370
- "room": room,
371
- }
372
 
373
 
374
- # ──────────────────────────────────────────────────────────────────────────────
375
- # Entry point
376
- # ──────────────────────────────────────────────────────────────────────────────
377
 
378
  if __name__ == "__main__":
379
- uvicorn.run(
380
- app, # Direct app reference instead of string
381
- host=HOST,
382
- port=PORT,
383
- reload=False,
384
- log_level="info",
385
- )
 
1
  """
2
+ Speech-to-Video Server (api/ — warm-load version)
3
+ ====================================================
4
+ Models are loaded ONCE at server startup (lifespan), not at /connect.
5
+ This means /connect is instant for subsequent sessions.
6
+
7
+ Model loading split:
8
+ lifespan → MuseTalk bundle + Kokoro TTS + UNet warmup (stay in VRAM)
9
+ /connect → Room, Publisher, MuseTalkWorker, Pipeline (per-session)
10
+ /disconnect → session objects torn down; models stay loaded
11
+
12
+ Run:
13
+ cd backend && python api/server.py
14
+ # or: uvicorn api.server:app --host 0.0.0.0 --port 8767
15
  """
16
  from __future__ import annotations
17
 
18
  import asyncio
19
  import logging
 
20
  import sys
21
  import time
22
  from contextlib import asynccontextmanager
23
  from pathlib import Path
24
  from typing import Optional
25
 
26
+ # ── path setup ────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
27
  _current_file = Path(__file__).resolve()
28
+ _api_dir = _current_file.parent # backend/api/
29
+ _backend_dir = _api_dir.parent # backend/
30
+ _project_dir = _backend_dir.parent # speech_to_video/
31
 
32
+ for p in [_backend_dir, _project_dir]:
 
33
  if str(p) not in sys.path:
34
  sys.path.insert(0, str(p))
35
 
36
+ # ── imports ───────────────────────────────────────────────────────────────────
37
+ import numpy as np
38
  import uvicorn
39
  from fastapi import FastAPI, HTTPException
40
  from fastapi.middleware.cors import CORSMiddleware
 
49
  LIVEKIT_API_KEY,
50
  LIVEKIT_API_SECRET,
51
  LIVEKIT_ROOM_NAME,
 
 
52
  VIDEO_FPS,
53
  DEFAULT_AVATAR,
54
  DEVICE,
55
  )
56
  from tts.kokoro_tts import KokoroTTS
57
+ from musetalk.worker import load_musetalk_models, MuseTalkWorker, MuseTalkBundle
58
  from publisher.livekit_publisher import AVPublisher
59
  from api.pipeline import StreamingPipeline
60
 
61
  import torch
62
+ torch.set_float32_matmul_precision("high")
 
63
  torch._dynamo.config.suppress_errors = True
64
 
65
  log = logging.getLogger(__name__)
 
68
  format="%(asctime)s %(levelname)-7s %(name)s %(message)s",
69
  )
70
 
71
+ # ── global model state (loaded once, lives for server lifetime) ───────────────
72
+ _musetalk_bundle: Optional[MuseTalkBundle] = None
73
+ _tts: Optional[KokoroTTS] = None
 
 
74
 
75
+ # ── session state (created/destroyed on connect/disconnect) ──────────────────
76
+ _pipeline: Optional[StreamingPipeline] = None
77
+ _room: Optional[rtc.Room] = None
78
+ _publisher: Optional[AVPublisher] = None
79
+
80
+
81
+ # ── lifespan: load models once at startup ────────────────────────────────────
82
 
83
  @asynccontextmanager
84
  async def lifespan(app: FastAPI):
85
+ global _musetalk_bundle, _tts
86
+
87
+ t_start = time.monotonic()
88
  log.info("=== Speech-to-Video Server Starting ===")
89
+ log.info("Device: %s Avatar: %s", DEVICE, DEFAULT_AVATAR)
90
+
91
+ # 1. Load MuseTalk (VAE + UNet + Whisper + avatar latents)
92
+ log.info("Loading MuseTalk models...")
93
+ _musetalk_bundle = await asyncio.to_thread(
94
+ load_musetalk_models, DEFAULT_AVATAR, DEVICE
95
+ )
96
+ log.info("MuseTalk loaded (%.1fs)", time.monotonic() - t_start)
97
+
98
+ # 2. Load Kokoro TTS
99
+ log.info("Loading Kokoro TTS...")
100
+ _tts = await asyncio.to_thread(KokoroTTS)
101
+ log.info("Kokoro TTS loaded")
102
+
103
+ # 3. UNet warmup — prime GPU caches
104
+ worker_tmp = MuseTalkWorker(_musetalk_bundle)
105
+ dummy_audio = np.zeros(int(0.32 * 24_000), dtype=np.float32)
106
+ feats, _ = await worker_tmp.extract_features(dummy_audio)
107
+ t0 = time.monotonic()
108
+ n = min(8, len(_musetalk_bundle.avatar_assets.frame_list))
109
+ await worker_tmp.generate_batch(feats, 0, n)
110
+ log.info("UNet warm-up done (%.1fs)", time.monotonic() - t0)
111
+ worker_tmp.shutdown()
112
+
113
+ _tts.synthesize_full("Hello.")
114
+ log.info("TTS warm-up done")
115
+
116
+ log.info("=== Server ready in %.1fs — waiting for /connect (port %d) ===",
117
+ time.monotonic() - t_start, PORT)
118
 
119
+ yield # ── server running ────────────────────────────────────────────────
120
+
121
+ # ── shutdown ──────────────────────────────────────────────────────────────
122
+ global _pipeline, _room, _publisher
123
  if _pipeline:
124
  await _pipeline.stop()
 
 
 
125
  if _publisher:
126
  await _publisher.stop()
 
127
  if _room:
128
  await _room.disconnect()
 
 
129
  log.info("=== Server Shutdown ===")
130
 
131
 
132
+ # ── FastAPI app ───────────────────────────────────────────────────────────────
133
+
134
  app = FastAPI(
135
+ title="Speech-to-Video (api — 3-queue)",
136
+ description="Text → Kokoro TTS → Whisper → MuseTalk → LiveKit",
137
+ version="2.0.0",
138
  lifespan=lifespan,
139
  )
140
 
 
146
  )
147
 
148
 
149
+ # ── request models ────────────────────────────────────────────────────────────
150
+
151
  class SpeakRequest(BaseModel):
152
  text: str
153
  voice: Optional[str] = None
154
  speed: Optional[float] = None
155
 
 
156
  class TokenRequest(BaseModel):
157
+ room_name: str = LIVEKIT_ROOM_NAME
158
  identity: str = "user"
159
 
160
 
161
+ # ── /health and /status ───────────────────────────────────────────────────────
 
 
162
 
163
  @app.get("/health")
164
  async def health():
 
165
  return {
166
  "status": "ok",
167
+ "models_loaded": _musetalk_bundle is not None and _tts is not None,
168
+ "pipeline_active": _pipeline is not None and getattr(_pipeline, "_running", False),
169
  }
170
 
 
171
  @app.get("/status")
172
  async def status():
 
 
173
  vram = {}
174
  if torch.cuda.is_available():
175
  vram = {
176
  "allocated_gb": round(torch.cuda.memory_allocated() / 1024**3, 2),
177
+ "reserved_gb": round(torch.cuda.memory_reserved() / 1024**3, 2),
178
  }
179
  return {
180
+ "pipeline": "api-3-queue",
181
+ "models_loaded": _musetalk_bundle is not None,
182
+ "pipeline_active": _pipeline is not None and getattr(_pipeline, "_running", False),
183
  "avatar": DEFAULT_AVATAR,
184
  "device": DEVICE,
185
  "vram": vram,
186
  }
187
 
188
 
189
+ # ── /connect ──────────────────────────────────────────────────────────────────
190
+
191
  @app.post("/connect")
192
  async def connect():
 
 
 
193
  global _room, _publisher, _pipeline
194
+
195
+ if _musetalk_bundle is None or _tts is None:
196
+ raise HTTPException(status_code=503, detail="Server still loading models")
197
+
198
+ if _pipeline is not None and getattr(_pipeline, "_running", False):
199
  raise HTTPException(status_code=400, detail="Already connected")
200
+
201
  log.info("Connecting to LiveKit room...")
202
+ t0 = time.monotonic()
203
+
204
  try:
205
+ first_frame = _musetalk_bundle.avatar_assets.frame_list[0]
206
+ actual_h, actual_w = first_frame.shape[:2]
207
+
 
 
 
 
 
 
 
 
208
  room = rtc.Room()
209
+ token = (
210
+ lk_api.AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
211
+ .with_identity("backend-agent")
212
+ .with_name("Speech-to-Video Agent")
213
+ )
 
214
  token.with_grants(lk_api.VideoGrants(
215
  room_join=True,
216
  room=LIVEKIT_ROOM_NAME,
217
  can_publish=True,
218
  can_subscribe=True,
219
  ))
 
 
 
 
 
220
 
 
221
  publisher = AVPublisher(
222
  room,
223
  video_width=actual_w,
224
  video_height=actual_h,
225
  video_fps=VIDEO_FPS,
226
  )
227
+
228
+ # MuseTalkWorker wraps the already-loaded bundle — no model reload
229
+ musetalk_worker = MuseTalkWorker(_musetalk_bundle)
230
+
 
231
  pipeline = StreamingPipeline(
232
+ tts=_tts,
233
  musetalk=musetalk_worker,
234
  publisher=publisher,
235
+ avatar_assets=_musetalk_bundle.avatar_assets,
 
 
 
 
 
 
236
  )
237
+
238
+ await room.connect(url=LIVEKIT_URL, token=token.to_jwt())
239
+ log.info("Connected to LiveKit: %s", LIVEKIT_ROOM_NAME)
240
+
241
  await publisher.start()
 
 
242
  await pipeline.start()
243
+
244
+ # Fast warmup (models already hot in VRAM)
245
+ dummy_audio = np.zeros(int(0.32 * 24_000), dtype=np.float32)
246
+ feats, _ = await musetalk_worker.extract_features(dummy_audio)
247
+ n = min(8, len(_musetalk_bundle.avatar_assets.frame_list))
248
+ await musetalk_worker.generate_batch(feats, 0, n)
249
+ log.info("Session warm-up done")
250
+
251
+ _room = room
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  _publisher = publisher
253
+ _pipeline = pipeline
254
+
255
+ log.info("/connect done in %.1fs", time.monotonic() - t0)
256
+ return {"status": "connected", "room": LIVEKIT_ROOM_NAME, "url": LIVEKIT_URL}
257
+
258
+ except Exception as exc:
259
+ log.error("Connection failed: %s", exc, exc_info=True)
260
+ raise HTTPException(status_code=500, detail=str(exc))
 
 
 
261
 
262
 
263
+ # ── /disconnect ───────────────────────────────────────────────────────────────
264
+
265
  @app.post("/disconnect")
266
  async def disconnect():
 
267
  global _room, _publisher, _pipeline
268
+
269
  if _pipeline is None:
270
  raise HTTPException(status_code=400, detail="Not connected")
271
+
272
  log.info("Disconnecting...")
273
+
274
  if _pipeline:
275
  await _pipeline.stop()
276
  if _publisher:
277
  await _publisher.stop()
278
  if _room:
279
  await _room.disconnect()
280
+
281
+ _room = _publisher = _pipeline = None
282
+ # NOTE: _musetalk_bundle and _tts are intentionally NOT cleared —
283
+ # models stay in VRAM so the next /connect is instant.
284
+ log.info("Disconnected — models remain loaded for next session")
285
  return {"status": "disconnected"}
286
 
287
 
288
+ # ── /speak ────────────────────────────────────────────────────────────────────
289
+
290
  @app.post("/speak")
291
  async def speak(request: SpeakRequest):
292
+ if _pipeline is None or not getattr(_pipeline, "_running", False):
 
 
 
 
 
 
 
293
  raise HTTPException(status_code=400, detail="Not connected")
294
+
295
+ t0 = time.monotonic()
 
 
296
  await _pipeline.push_text(request.text)
297
+ return {"status": "processing", "latency_ms": round((time.monotonic() - t0) * 1000, 1)}
298
+
 
 
 
 
 
 
299
 
300
+ # ── /get-token ────────────────────────────────────────────────────────────────
301
 
302
  @app.post("/get-token")
303
  @app.get("/livekit-token")
304
  async def get_token(request: TokenRequest = TokenRequest()):
305
+ room = request.room_name or LIVEKIT_ROOM_NAME
306
+ identity = request.identity or "frontend-user"
307
+
308
+ token = (
309
+ lk_api.AccessToken(LIVEKIT_API_KEY, LIVEKIT_API_SECRET)
310
+ .with_identity(identity)
311
+ .with_name(identity)
312
+ )
 
 
 
 
 
313
  token.with_grants(lk_api.VideoGrants(
314
  room_join=True,
315
  room=room,
316
  can_publish=True,
317
  can_subscribe=True,
318
  ))
319
+ return {"token": token.to_jwt(), "url": LIVEKIT_URL, "room": room}
 
 
 
 
 
320
 
321
 
322
+ # ── entry point ───────────────────────────────────────────────────────────────
 
 
323
 
324
  if __name__ == "__main__":
325
+ uvicorn.run(app, host=HOST, port=PORT, reload=False, log_level="info")
 
 
 
 
 
 
backend/avatars/christine/coords.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842e96bc4fd963cc836e96b881dd1840cb00e7cd5f99a4ae9e707de32e6d6900
3
+ size 777
backend/avatars/christine/full_imgs/00000000.png ADDED

Git LFS Details

  • SHA256: 20ad0f6ffec2bd8c0d868307eb6541eb2d0c8f579d5076031d98c6f7446d56a5
  • Pointer size: 131 Bytes
  • Size of remote file: 293 kB
backend/avatars/christine/full_imgs/00000001.png ADDED

Git LFS Details

  • SHA256: b96d4d3f9807ddbbdb8311986254d8765fbd47a02bca260eb333630e977c2ffc
  • Pointer size: 131 Bytes
  • Size of remote file: 425 kB
backend/avatars/christine/full_imgs/00000002.png ADDED

Git LFS Details

  • SHA256: 17cfafc5adfa3e550a0fad050ecd992b54b4699ec80a5441404ad648c4ec73de
  • Pointer size: 131 Bytes
  • Size of remote file: 413 kB
backend/avatars/christine/full_imgs/00000003.png ADDED

Git LFS Details

  • SHA256: 74ebded05f02e73bb627e0f49887e96c9caa02d8a14894afd13e783f1ff2d835
  • Pointer size: 131 Bytes
  • Size of remote file: 415 kB
backend/avatars/christine/full_imgs/00000004.png ADDED

Git LFS Details

  • SHA256: 64ac29c4d48b902a889479eb67ce43ff01c73bbe50b4fb5d2d6ab6bd024be9c9
  • Pointer size: 131 Bytes
  • Size of remote file: 389 kB
backend/avatars/christine/full_imgs/00000005.png ADDED

Git LFS Details

  • SHA256: 39f220ca43019e4e35332d7a7b269452e05eaaa5ee1d199f9052ca03b6624de8
  • Pointer size: 131 Bytes
  • Size of remote file: 393 kB
backend/avatars/christine/full_imgs/00000006.png ADDED

Git LFS Details

  • SHA256: 372e49e19501c1be1168cdee4541a06d8d9eae9b76adb00977dc3f274acdf68d
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
backend/avatars/christine/full_imgs/00000007.png ADDED

Git LFS Details

  • SHA256: c0d8cef00028e1e3d462ae252c2fd5ec17213db40059f09ab84d5d05f97420c4
  • Pointer size: 131 Bytes
  • Size of remote file: 421 kB
backend/avatars/christine/full_imgs/00000008.png ADDED

Git LFS Details

  • SHA256: 58ca18422db51815da990b756f2c0f95f9a8e4b6aa6fafcb489fca29dbcf07bd
  • Pointer size: 131 Bytes
  • Size of remote file: 424 kB
backend/avatars/christine/full_imgs/00000009.png ADDED

Git LFS Details

  • SHA256: a0b26e2a8a5477abeb8bb17e7cc484aa80dc965c91b2469833cf70166c6f5182
  • Pointer size: 131 Bytes
  • Size of remote file: 418 kB
backend/avatars/christine/full_imgs/00000010.png ADDED

Git LFS Details

  • SHA256: 8fbd75ddd14d2d3bf8b41b771d69719aceb37a53af434c13502d2c682812aea7
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
backend/avatars/christine/full_imgs/00000011.png ADDED

Git LFS Details

  • SHA256: b025ba7e94d89e43b25a246aa84f1c29301b663dd6034e8e2ebc8b07ea24e44f
  • Pointer size: 131 Bytes
  • Size of remote file: 416 kB
backend/avatars/christine/full_imgs/00000012.png ADDED

Git LFS Details

  • SHA256: 13e50f1653dfcf717edcf4c7742f6440a443ba979c459534893ae9656db8349a
  • Pointer size: 131 Bytes
  • Size of remote file: 415 kB
backend/avatars/christine/full_imgs/00000013.png ADDED

Git LFS Details

  • SHA256: 13e50f1653dfcf717edcf4c7742f6440a443ba979c459534893ae9656db8349a
  • Pointer size: 131 Bytes
  • Size of remote file: 415 kB
backend/avatars/christine/full_imgs/00000014.png ADDED

Git LFS Details

  • SHA256: b025ba7e94d89e43b25a246aa84f1c29301b663dd6034e8e2ebc8b07ea24e44f
  • Pointer size: 131 Bytes
  • Size of remote file: 416 kB
backend/avatars/christine/full_imgs/00000015.png ADDED

Git LFS Details

  • SHA256: 8fbd75ddd14d2d3bf8b41b771d69719aceb37a53af434c13502d2c682812aea7
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
backend/avatars/christine/full_imgs/00000016.png ADDED

Git LFS Details

  • SHA256: a0b26e2a8a5477abeb8bb17e7cc484aa80dc965c91b2469833cf70166c6f5182
  • Pointer size: 131 Bytes
  • Size of remote file: 418 kB
backend/avatars/christine/full_imgs/00000017.png ADDED

Git LFS Details

  • SHA256: 58ca18422db51815da990b756f2c0f95f9a8e4b6aa6fafcb489fca29dbcf07bd
  • Pointer size: 131 Bytes
  • Size of remote file: 424 kB
backend/avatars/christine/full_imgs/00000018.png ADDED

Git LFS Details

  • SHA256: c0d8cef00028e1e3d462ae252c2fd5ec17213db40059f09ab84d5d05f97420c4
  • Pointer size: 131 Bytes
  • Size of remote file: 421 kB
backend/avatars/christine/full_imgs/00000019.png ADDED

Git LFS Details

  • SHA256: 372e49e19501c1be1168cdee4541a06d8d9eae9b76adb00977dc3f274acdf68d
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
backend/avatars/christine/full_imgs/00000020.png ADDED

Git LFS Details

  • SHA256: 39f220ca43019e4e35332d7a7b269452e05eaaa5ee1d199f9052ca03b6624de8
  • Pointer size: 131 Bytes
  • Size of remote file: 393 kB
backend/avatars/christine/full_imgs/00000021.png ADDED

Git LFS Details

  • SHA256: 64ac29c4d48b902a889479eb67ce43ff01c73bbe50b4fb5d2d6ab6bd024be9c9
  • Pointer size: 131 Bytes
  • Size of remote file: 389 kB
backend/avatars/christine/full_imgs/00000022.png ADDED

Git LFS Details

  • SHA256: 74ebded05f02e73bb627e0f49887e96c9caa02d8a14894afd13e783f1ff2d835
  • Pointer size: 131 Bytes
  • Size of remote file: 415 kB
backend/avatars/christine/full_imgs/00000023.png ADDED

Git LFS Details

  • SHA256: 17cfafc5adfa3e550a0fad050ecd992b54b4699ec80a5441404ad648c4ec73de
  • Pointer size: 131 Bytes
  • Size of remote file: 413 kB
backend/avatars/christine/full_imgs/00000024.png ADDED

Git LFS Details

  • SHA256: b96d4d3f9807ddbbdb8311986254d8765fbd47a02bca260eb333630e977c2ffc
  • Pointer size: 131 Bytes
  • Size of remote file: 425 kB
backend/avatars/christine/full_imgs/00000025.png ADDED

Git LFS Details

  • SHA256: 20ad0f6ffec2bd8c0d868307eb6541eb2d0c8f579d5076031d98c6f7446d56a5
  • Pointer size: 131 Bytes
  • Size of remote file: 293 kB
backend/avatars/christine/mask/00000000.png ADDED

Git LFS Details

  • SHA256: 06907c26c0d2193822d529c63a5afc1820e0d63bfeed0b44f2e19a7e3c20a55f
  • Pointer size: 129 Bytes
  • Size of remote file: 6.54 kB
backend/avatars/christine/mask/00000001.png ADDED

Git LFS Details

  • SHA256: 0de4852caf063fb7883deeecdaf982b5f269008c5b09bc2bf5a0d180fb097681
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
backend/avatars/christine/mask/00000002.png ADDED

Git LFS Details

  • SHA256: 4b2374ae4f18f7eb6a44eda4de695597dbc0c0c8b71b1af20f12df571ed6ed02
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
backend/avatars/christine/mask/00000003.png ADDED

Git LFS Details

  • SHA256: 8dba2d9b0f07297bd4dc74337f5a9cd3c13b260da42238060b87bcbbc96e268a
  • Pointer size: 129 Bytes
  • Size of remote file: 6.62 kB
backend/avatars/christine/mask/00000004.png ADDED

Git LFS Details

  • SHA256: 536155c0cffc81088945588fee7c689eb1a7123dbe275e1686dd4f112c09e4da
  • Pointer size: 129 Bytes
  • Size of remote file: 6.62 kB
backend/avatars/christine/mask/00000005.png ADDED

Git LFS Details

  • SHA256: 27ef32778029353be2bea68b3450edaf585e0c36c03a9a4bdbc779a2401228b1
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
backend/avatars/christine/mask/00000006.png ADDED

Git LFS Details

  • SHA256: 79c58a39b70b5ba2809726a3aad860d3ee027da47e27f8d95a798cc6173057a3
  • Pointer size: 129 Bytes
  • Size of remote file: 6.62 kB
backend/avatars/christine/mask/00000007.png ADDED

Git LFS Details

  • SHA256: b23425f0f8c744f63c6db3ebf50b20b059758cb025deae9dfe02c7dff821b4c3
  • Pointer size: 129 Bytes
  • Size of remote file: 6.54 kB
backend/avatars/christine/mask/00000008.png ADDED

Git LFS Details

  • SHA256: 58e052ea940446e84561cf3478d710bada6b5f84bc0ec9803eff67b3555e8133
  • Pointer size: 129 Bytes
  • Size of remote file: 6.69 kB
backend/avatars/christine/mask/00000009.png ADDED

Git LFS Details

  • SHA256: 31e918689f9023049a23c20e29346285242c2da11e732bdc4ce7b45abcbe06b9
  • Pointer size: 129 Bytes
  • Size of remote file: 6.69 kB
backend/avatars/christine/mask/00000010.png ADDED

Git LFS Details

  • SHA256: 558de7129bf48f92104e2ad60035742aeb0a049e5de737b80d7349259b4a760d
  • Pointer size: 129 Bytes
  • Size of remote file: 6.71 kB
backend/avatars/christine/mask/00000011.png ADDED

Git LFS Details

  • SHA256: 7d6d82a045e8f52c55c9ad979cd8bb88da9b4fcafbecc1189b81a6f355041d1a
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
backend/avatars/christine/mask/00000012.png ADDED

Git LFS Details

  • SHA256: 9b878013518acf76a8e5e52a74a7900d9bf8aa109b2d93d13b414532d613dedf
  • Pointer size: 129 Bytes
  • Size of remote file: 6.59 kB
backend/avatars/christine/mask/00000013.png ADDED

Git LFS Details

  • SHA256: 9b878013518acf76a8e5e52a74a7900d9bf8aa109b2d93d13b414532d613dedf
  • Pointer size: 129 Bytes
  • Size of remote file: 6.59 kB
backend/avatars/christine/mask/00000014.png ADDED

Git LFS Details

  • SHA256: 7d6d82a045e8f52c55c9ad979cd8bb88da9b4fcafbecc1189b81a6f355041d1a
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
backend/avatars/christine/mask/00000015.png ADDED

Git LFS Details

  • SHA256: 558de7129bf48f92104e2ad60035742aeb0a049e5de737b80d7349259b4a760d
  • Pointer size: 129 Bytes
  • Size of remote file: 6.71 kB
backend/avatars/christine/mask/00000016.png ADDED

Git LFS Details

  • SHA256: 31e918689f9023049a23c20e29346285242c2da11e732bdc4ce7b45abcbe06b9
  • Pointer size: 129 Bytes
  • Size of remote file: 6.69 kB
backend/avatars/christine/mask/00000017.png ADDED

Git LFS Details

  • SHA256: 58e052ea940446e84561cf3478d710bada6b5f84bc0ec9803eff67b3555e8133
  • Pointer size: 129 Bytes
  • Size of remote file: 6.69 kB
backend/avatars/christine/mask/00000018.png ADDED

Git LFS Details

  • SHA256: b23425f0f8c744f63c6db3ebf50b20b059758cb025deae9dfe02c7dff821b4c3
  • Pointer size: 129 Bytes
  • Size of remote file: 6.54 kB