zirobtc commited on
Commit
478eeb0
·
verified ·
1 Parent(s): f3fe6da

Uploading DART folder into model repo

Browse files
Files changed (9) hide show
  1. aligner.py +70 -0
  2. main.py +244 -0
  3. orpheus_engine.py +127 -0
  4. readme.md +3 -0
  5. requirements.txt +1 -0
  6. setup.sh +19 -0
  7. temp.py +248 -0
  8. transcript.txt +356 -0
  9. util.py +58 -0
aligner.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aligner.py
2
+
3
+ import os
4
+ import re
5
+ import tempfile
6
+ from typing import Dict, Any
7
+
8
+ # These imports are from your original script and are installed by your setup.sh
9
+ from aeneas.executetask import ExecuteTask
10
+ from aeneas.task import Task
11
+
12
+ def setup_aligner():
13
+ """
14
+ Aeneas does not require a model to be loaded, so this function does nothing.
15
+ It exists to keep the structure of main.py consistent.
16
+ """
17
+ print("✅ Aeneas aligner is ready (no setup required).")
18
+ pass
19
+
20
+ def align_words(audio_bytes: bytes, transcript: str) -> Dict[str, Any]:
21
+ """
22
+ Performs word alignment using the file-based aeneas library.
23
+ This is run sequentially to ensure stability.
24
+ """
25
+ config = (
26
+ "task_language=eng|"
27
+ "is_text_type=plain|"
28
+ "os_task_file_format=json|"
29
+ "task_adjust_boundary_algorithm=percent|"
30
+ "task_adjust_boundary_percent_value=30"
31
+ )
32
+
33
+ # Use a with statement to ensure temporary files are always cleaned up
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as af, \
35
+ tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as tf:
36
+
37
+ # Write audio to a temporary file
38
+ af.write(audio_bytes)
39
+ audio_path = af.name
40
+
41
+ # Write the formatted transcript to a temporary file
42
+ words_only = re.findall(r"\b[a-zA-Z']+\b", transcript)
43
+ formatted_transcript = "\n".join(words_only)
44
+ tf.write(formatted_transcript)
45
+ text_path = tf.name
46
+
47
+ try:
48
+ # Setup and run the aeneas alignment task
49
+ task = Task(config_string=config)
50
+ task.audio_file_path_absolute = audio_path
51
+ task.text_file_path_absolute = text_path
52
+
53
+ ExecuteTask(task).execute()
54
+
55
+ # Extract the aligned words and start times
56
+ words = []
57
+ start_times = []
58
+ if task.sync_map is not None:
59
+ for fragment in task.sync_map.fragments:
60
+ word = fragment.text.strip()
61
+ if word:
62
+ words.append(word)
63
+ start_times.append(float(fragment.begin))
64
+
65
+ return {"word": words, "startTime": start_times}
66
+
67
+ finally:
68
+ # Manually clean up the temporary files
69
+ os.unlink(audio_path)
70
+ os.unlink(text_path)
main.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ import socket
4
+ import struct
5
+ import json
6
+ import msgpack
7
+
8
+ import zlib
9
+ import re
10
+ from util import calculate_duration_from_bytes, update_motion_generator_duration,load_yaml
11
+
12
+ import base64
13
+ from typing import Dict, Any, List, Tuple
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from aligner import align_words, setup_aligner
16
+
17
+ from orpheus_engine import synthesize_for_scene, setup_model
18
+
19
+
20
+ config = load_yaml()
21
+ HOST = config["HOST"]
22
+ PORT = config["PORT"]
23
+
24
+ print(f"Connecting to {HOST}:{PORT}")
25
+ MAGIC = 0x2333
26
+
27
+ def patch_socket_keepalive(sock: socket.socket) -> None:
28
+ """Set keepalive + long timeout to prevent halts on idle."""
29
+ sock.settimeout(None) # Never timeout on recv
30
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
31
+
32
+ # Platform-specific tuning
33
+ if hasattr(socket, 'TCP_KEEPIDLE'):
34
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 10)
35
+ if hasattr(socket, 'TCP_KEEPINTVL'):
36
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 5)
37
+ if hasattr(socket, 'TCP_KEEPCNT'):
38
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3)
39
+
40
+ def recv_exact(sock: socket.socket, n: int) -> bytes:
41
+ buf = bytearray()
42
+ while len(buf) < n:
43
+ chunk = sock.recv(n - len(buf))
44
+ if not chunk:
45
+ raise EOFError("Connection closed prematurely")
46
+ buf.extend(chunk)
47
+ return bytes(buf)
48
+
49
+ def send_frame(sock: socket.socket, event: str, payload: Any) -> None:
50
+ # Use msgpack instead of JSON
51
+ raw = msgpack.packb({"event": event, "payload": payload}, use_bin_type=True)
52
+ comp = zlib.compress(raw)
53
+
54
+ # <MAGIC><raw_len><comp_len>
55
+ header = struct.pack("<III", MAGIC, len(raw), len(comp))
56
+ sock.sendall(header + comp)
57
+
58
+ MAGIC_JSON = 0xDEADBEEF
59
+
60
+ def recv_frame(sock: socket.socket) -> Dict[str, Any]:
61
+ header = recv_exact(sock, 12)
62
+ magic, raw_len, comp_len = struct.unpack("<III", header)
63
+ if magic != MAGIC_JSON:
64
+ raise RuntimeError("Bad magic number – protocol mismatch")
65
+ comp_bytes = recv_exact(sock, comp_len)
66
+ raw_bytes = zlib.decompress(comp_bytes)
67
+ return json.loads(raw_bytes.decode())
68
+
69
+ def strip_tags(text: str) -> str:
70
+ no_tags = re.sub(r"<[^>]+>", "", text)
71
+ words = re.findall(r"\b[a-zA-Z']+\b", no_tags)
72
+ return " ".join(words).strip()
73
+
74
+ def align_audio(audio_bytes: bytes, scene_text: str) -> Tuple:
75
+ """
76
+ Helper function that runs both TTS and alignment for a single scene.
77
+ This entire function will be executed in a parallel thread.
78
+ """
79
+
80
+ """
81
+ dummy_path = "output_0.wav"
82
+ if not os.path.exists(dummy_path):
83
+ raise FileNotFoundError("Dummy file 'output_0.wav' not found.")
84
+
85
+ # Read dummy WAV file as bytes
86
+ with open(dummy_path, "rb") as f:
87
+ audio_bytes = f.read()
88
+
89
+ # Strip tags from text (optional)
90
+ spoken_text = strip_tags(scene_text)
91
+ """
92
+ # Align
93
+ alignment = align_words(audio_bytes, scene_text)
94
+
95
+ return alignment
96
+
97
+ def generate_audio(scene: Dict[str, Any]) -> Tuple[bytes, str]:
98
+ audio_bytes, audio_base64 = synthesize_for_scene(
99
+ prompt=scene["txt"],
100
+ voice=scene.get("voice", "miko"),
101
+ temperature=scene.get("temperature", 0.6),
102
+ top_p=scene.get("top_p", 0.8),
103
+ repetition_penalty=scene.get("repetition_penalty", 1.3),
104
+ max_tokens=scene.get("max_tokens", 1200),
105
+ )
106
+
107
+ # In a real scenario, this would call your TTS engine.
108
+ """
109
+ dummy_path = "output_0.wav"
110
+ if not os.path.exists(dummy_path):
111
+ raise FileNotFoundError("Dummy file 'output_0.wav' not found.")
112
+
113
+ with open(dummy_path, "rb") as f:
114
+ audio_bytes = f.read()
115
+
116
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")"""
117
+ return audio_bytes, audio_base64
118
+
119
+ def handle_connection(sock: socket.socket) -> None:
120
+ send_frame(sock, "hello", {"role": "tts"})
121
+ print("→ hello (role=tts) sent")
122
+
123
+ while True:
124
+ try:
125
+ frame = recv_frame(sock)
126
+ except EOFError:
127
+ print('[ "Connection closed by the other side" ]')
128
+ break
129
+
130
+ event = frame.get("event")
131
+ payload = frame.get("payload")
132
+
133
+ if event != "generate-voice":
134
+ print(f"⚠️ unknown event {event}, ignored")
135
+ continue
136
+
137
+ scenes: List[dict] = payload.get("scenes", [])
138
+
139
+ # --- STAGE 1: FAST Audio Generation & Duration Notification ---
140
+ # The goal here is to get durations to the motion generator ASAP.
141
+ generated_audio_data = []
142
+ print("")
143
+ print("--- Generating Audios Thread ---")
144
+ with ThreadPoolExecutor(max_workers=10) as executor:
145
+ # Submit all the FAST audio generation tasks
146
+ future_to_scene = {
147
+ executor.submit(generate_audio, scene): scene
148
+ for scene in scenes if scene.get("txt")
149
+ }
150
+
151
+ # As each FAST audio generation task completes...
152
+ for future in as_completed(future_to_scene):
153
+ scene = future_to_scene[future]
154
+ try:
155
+ scene_id = scene["sceneId"]
156
+ motion_index = scene.get("motionIndex", 0)
157
+ # 1. Get the generated audio
158
+ audio_bytes, audio_base64 = future.result()
159
+ print("")
160
+ print(f'[ "Generated Audio {scene_id}, Motion: {motion_index}" ]')
161
+
162
+ # 2. Calculate duration instantly
163
+ duration = calculate_duration_from_bytes(audio_bytes)
164
+
165
+ # 3. Notify motion generator IMMEDIATELY
166
+ if duration > 0:
167
+ update_motion_generator_duration(scene["sceneId"], scene.get("motionIndex", 0), duration)
168
+
169
+ # 4. Store the results to be used in the next (slow) stage
170
+ generated_audio_data.append({
171
+ "scene": scene,
172
+ "audio_bytes": audio_bytes,
173
+ "audio_base64": audio_base64
174
+ })
175
+ except Exception as e:
176
+ print(f'[ "Error during audio generation for {scene['sceneId']}: {e}" ]')
177
+
178
+ # --- STAGE 2: SLOW Word Alignment in Parallel ---
179
+ # Now that all notifications are sent, we can perform the slow alignment work.
180
+ response_by_scene: Dict[str, Any] = {}
181
+ print("")
182
+ print("--- Word Alignments Thread ---")
183
+ with ThreadPoolExecutor(max_workers=10) as executor:
184
+ # Use the data from Stage 1 to submit SLOW alignment tasks.
185
+ # We call `align_words` directly (your `align_audio` function is not needed).
186
+ future_to_data = {
187
+ executor.submit(align_words, data["audio_bytes"], strip_tags(data["scene"]["txt"])): data
188
+ for data in generated_audio_data
189
+ }
190
+
191
+ # As each SLOW alignment task completes...
192
+ for future in as_completed(future_to_data):
193
+ data = future_to_data[future]
194
+ scene = data["scene"]
195
+ scene_id = scene["sceneId"]
196
+ motion_index = scene.get("motionIndex", 0)
197
+
198
+ try:
199
+ # 1. Get the alignment result
200
+ alignment = future.result()
201
+ print("")
202
+ print(f'[ "Aligned {scene_id}, Motion: {motion_index}" ]')
203
+
204
+ # 2. Now, build the final response object with all the data
205
+ voice_audio = {
206
+ "motion": motion_index,
207
+ "audio_base64": data["audio_base64"], # From Stage 1
208
+ "alignment": alignment, # From Stage 2
209
+ }
210
+ if scene_id not in response_by_scene:
211
+ response_by_scene[scene_id] = {"sceneId": scene_id, "audioEvents": []}
212
+ response_by_scene[scene_id]["audioEvents"].append(voice_audio)
213
+
214
+ except Exception as e:
215
+ print(f"Error during alignment for scene {scene_id}: {e}")
216
+
217
+ if response_by_scene:
218
+ send_frame(sock, "voice-generated", list(response_by_scene.values()))
219
+ print("")
220
+ print(f"[ ← Audios ({len(response_by_scene)}) sent ]")
221
+
222
+ def main() -> None:
223
+ # Setup the Orpheus TTS model on startup.
224
+ setup_model()
225
+ # Setup the aligner (does nothing for aeneas, but keeps pattern consistent)
226
+ setup_aligner()
227
+
228
+ while True:
229
+ try:
230
+ with socket.create_connection((HOST, PORT), timeout=60) as sock:
231
+ patch_socket_keepalive(sock)
232
+ print(f'["Connected to server at {HOST}:{PORT}"]')
233
+ handle_connection(sock)
234
+ except (ConnectionRefusedError, OSError) as e:
235
+ print(f"Connection error: {e}, retrying in 5s")
236
+ except Exception as e:
237
+ print(f"Unhandled error: {e}, reconnecting in 5s")
238
+ finally:
239
+ import time
240
+ time.sleep(5)
241
+
242
+ if __name__ == "__main__":
243
+ main()
244
+
orpheus_engine.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_engine.py
2
+
3
+ import wave
4
+ import asyncio
5
+ import uuid # Import uuid to generate unique IDs
6
+ import threading
7
+ import queue
8
+ import base64
9
+ from io import BytesIO
10
+ from util import load_yaml
11
+
12
+ from orpheus_tts.engine_class import OrpheusModel
13
+ from vllm.outputs import RequestOutput
14
+ from vllm import SamplingParams
15
+
16
+ # --- Background loop to keep vLLM stable across requests ---
17
+ # This class is correct and does not need changes.
18
+ class BackgroundEventLoop:
19
+ def __init__(self):
20
+ self._loop = asyncio.new_event_loop()
21
+ self._thread = threading.Thread(target=self._run_loop, daemon=True)
22
+ self._thread.start()
23
+
24
+ def _run_loop(self):
25
+ asyncio.set_event_loop(self._loop)
26
+ self._loop.run_forever()
27
+
28
+ def run_generator(self, async_gen):
29
+ q = queue.Queue()
30
+ sentinel = object()
31
+
32
+ async def producer():
33
+ try:
34
+ async for item in async_gen:
35
+ q.put(item)
36
+ except Exception as e:
37
+ q.put(e)
38
+ finally:
39
+ q.put(sentinel)
40
+
41
+ asyncio.run_coroutine_threadsafe(producer(), self._loop)
42
+
43
+ while True:
44
+ item = q.get()
45
+ if item is sentinel:
46
+ break
47
+ if isinstance(item, Exception):
48
+ raise item
49
+ yield item
50
+
51
+ # --- Patched Orpheus model using background loop ---
52
+ tts_event_loop = BackgroundEventLoop()
53
+
54
+ class PatchedOrpheusModel(OrpheusModel):
55
+ # THE FIX IS HERE
56
+ def generate_tokens_sync(self, prompt, voice=None, request_id=None, temperature=0.6, top_p=0.8, max_tokens=1200, stop_token_ids=[49158], repetition_penalty=1.3):
57
+
58
+ # If no request_id is provided, generate a new unique one.
59
+ # This solves the "id already running" error.
60
+ if request_id is None:
61
+ request_id = str(uuid.uuid4())
62
+
63
+ prompt_string = self._format_prompt(prompt, voice)
64
+ sampling_params = SamplingParams(
65
+ temperature=temperature,
66
+ top_p=top_p,
67
+ max_tokens=max_tokens,
68
+ stop_token_ids=stop_token_ids,
69
+ repetition_penalty=repetition_penalty,
70
+ )
71
+ async_gen = self.engine.generate(
72
+ prompt=prompt_string,
73
+ sampling_params=sampling_params,
74
+ request_id=request_id # Use the unique ID
75
+ )
76
+ for result in tts_event_loop.run_generator(async_gen):
77
+ if not isinstance(result, RequestOutput):
78
+ raise TypeError(f"Unexpected result type: {type(result)}")
79
+ yield result.outputs[0].text
80
+
81
+ # --- Persistent global model ---
82
+ # This section is correct and does not need changes.
83
+ model = None
84
+
85
+
86
+
87
+ def setup_model():
88
+ global model
89
+ if model is None:
90
+ print("Loading TTS model...")
91
+ config = load_yaml()
92
+ model = PatchedOrpheusModel(model_name=config["tts"]["model_name"])
93
+ print("✅ Model loaded and ready.")
94
+
95
+ def synthesize_for_scene(
96
+ prompt: str,
97
+ voice: str = "miko",
98
+ temperature: float = 0.6,
99
+ top_p: float = 0.9,
100
+ repetition_penalty: float = 1.3,
101
+ max_tokens: int = 1200,
102
+ ):
103
+ global model
104
+
105
+ # This function now works correctly in parallel because each call
106
+ # will trigger a unique request_id in the PatchedOrpheusModel above.
107
+ chunks = bytearray()
108
+ for chunk in model.generate_speech(
109
+ prompt=prompt,
110
+ voice=voice,
111
+ temperature=temperature,
112
+ top_p=top_p,
113
+ max_tokens=max_tokens,
114
+ repetition_penalty=repetition_penalty,
115
+ ):
116
+ chunks.extend(chunk)
117
+
118
+ buffer = BytesIO()
119
+ with wave.open(buffer, "wb") as wf:
120
+ wf.setnchannels(1)
121
+ wf.setsampwidth(2)
122
+ wf.setframerate(24000)
123
+ wf.writeframes(chunks)
124
+
125
+ audio_bytes = buffer.getvalue()
126
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
127
+ return audio_bytes, audio_base64
readme.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Miko TTS
2
+
3
+ Fine-Tuned version of Orpheus TTS
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ orpheus-speech
setup.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ apt update && apt install -y \
5
+ ffmpeg \
6
+ libespeak-dev \
7
+ python3.10-dev \
8
+ python3.10-distutils \
9
+ build-essential \
10
+ curl
11
+
12
+ curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
13
+
14
+ python3.10 -m pip install "numpy<2.0.0" "setuptools<60"
15
+ python3.10 -m pip install aeneas
16
+ python3.10 -m pip install orpheus-speech
17
+
18
+
19
+ echo "✅ Done. Aeneas and Orpheus-Speech are installed globally for Python 3.10."
temp.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ import socket
4
+ import struct
5
+ import json
6
+ import msgpack
7
+
8
+ import zlib
9
+ import re
10
+ from util import calculate_duration_from_bytes, update_motion_generator_duration
11
+
12
+ import base64
13
+ from typing import Dict, Any, List, Tuple
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from aligner import align_words, setup_aligner
16
+ import os
17
+
18
+ #from orpheus_engine import synthesize_for_scene, setup_model
19
+
20
+ # Config and basic networking functions
21
+ config_path = os.path.join(os.path.dirname(__file__), '..', 'config.json')
22
+ with open(config_path, 'r') as f:
23
+ config = json.load(f)
24
+ HOST = config["HOST"]
25
+ PORT = config["PORT"]
26
+
27
+ print(f"Connecting to {HOST}:{PORT}")
28
+ MAGIC = 0x2333
29
+
30
+ def patch_socket_keepalive(sock: socket.socket) -> None:
31
+ """Set keepalive + long timeout to prevent halts on idle."""
32
+ sock.settimeout(None) # Never timeout on recv
33
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
34
+
35
+ # Platform-specific tuning
36
+ if hasattr(socket, 'TCP_KEEPIDLE'):
37
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 10)
38
+ if hasattr(socket, 'TCP_KEEPINTVL'):
39
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 5)
40
+ if hasattr(socket, 'TCP_KEEPCNT'):
41
+ sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3)
42
+
43
+ def recv_exact(sock: socket.socket, n: int) -> bytes:
44
+ buf = bytearray()
45
+ while len(buf) < n:
46
+ chunk = sock.recv(n - len(buf))
47
+ if not chunk:
48
+ raise EOFError("Connection closed prematurely")
49
+ buf.extend(chunk)
50
+ return bytes(buf)
51
+
52
+ def send_frame(sock: socket.socket, event: str, payload: Any) -> None:
53
+ # Use msgpack instead of JSON
54
+ raw = msgpack.packb({"event": event, "payload": payload}, use_bin_type=True)
55
+ comp = zlib.compress(raw)
56
+
57
+ # <MAGIC><raw_len><comp_len>
58
+ header = struct.pack("<III", MAGIC, len(raw), len(comp))
59
+ sock.sendall(header + comp)
60
+
61
+ MAGIC_JSON = 0xDEADBEEF
62
+
63
+ def recv_frame(sock: socket.socket) -> Dict[str, Any]:
64
+ header = recv_exact(sock, 12)
65
+ magic, raw_len, comp_len = struct.unpack("<III", header)
66
+ if magic != MAGIC_JSON:
67
+ raise RuntimeError("Bad magic number – protocol mismatch")
68
+ comp_bytes = recv_exact(sock, comp_len)
69
+ raw_bytes = zlib.decompress(comp_bytes)
70
+ return json.loads(raw_bytes.decode())
71
+
72
+ def strip_tags(text: str) -> str:
73
+ no_tags = re.sub(r"<[^>]+>", "", text)
74
+ words = re.findall(r"\b[a-zA-Z']+\b", no_tags)
75
+ return " ".join(words).strip()
76
+
77
+ def align_audio(audio_bytes: bytes, scene_text: str) -> Tuple:
78
+ """
79
+ Helper function that runs both TTS and alignment for a single scene.
80
+ This entire function will be executed in a parallel thread.
81
+ """
82
+
83
+ """
84
+ dummy_path = "output_0.wav"
85
+ if not os.path.exists(dummy_path):
86
+ raise FileNotFoundError("Dummy file 'output_0.wav' not found.")
87
+
88
+ # Read dummy WAV file as bytes
89
+ with open(dummy_path, "rb") as f:
90
+ audio_bytes = f.read()
91
+
92
+ # Strip tags from text (optional)
93
+ spoken_text = strip_tags(scene_text)
94
+ """
95
+ # Align
96
+ alignment = align_words(audio_bytes, scene_text)
97
+
98
+ return alignment
99
+
100
+ def generate_audio(scene: Dict[str, Any]) -> Tuple[bytes, str]:
101
+ """
102
+ audio_bytes, audio_base64 = synthesize_for_scene(
103
+ prompt=scene["txt"],
104
+ voice=scene.get("voice", "miko"),
105
+ temperature=scene.get("temperature", 0.6),
106
+ top_p=scene.get("top_p", 0.8),
107
+ repetition_penalty=scene.get("repetition_penalty", 1.3),
108
+ max_tokens=scene.get("max_tokens", 1200),
109
+ )"""
110
+
111
+ # In a real scenario, this would call your TTS engine.
112
+ """
113
+ dummy_path = "output_0.wav"
114
+ if not os.path.exists(dummy_path):
115
+ raise FileNotFoundError("Dummy file 'output_0.wav' not found.")
116
+
117
+ with open(dummy_path, "rb") as f:
118
+ audio_bytes = f.read()
119
+
120
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")"""
121
+ return audio_bytes, audio_base64
122
+
123
+ def handle_connection(sock: socket.socket) -> None:
124
+ send_frame(sock, "hello", {"role": "tts"})
125
+ print("→ hello (role=tts) sent")
126
+
127
+ while True:
128
+ try:
129
+ frame = recv_frame(sock)
130
+ except EOFError:
131
+ print('[ "Connection closed by the other side" ]')
132
+ break
133
+
134
+ event = frame.get("event")
135
+ payload = frame.get("payload")
136
+
137
+ if event != "generate-voice":
138
+ print(f"⚠️ unknown event {event}, ignored")
139
+ continue
140
+
141
+ scenes: List[dict] = payload.get("scenes", [])
142
+
143
+ # --- STAGE 1: FAST Audio Generation & Duration Notification ---
144
+ # The goal here is to get durations to the motion generator ASAP.
145
+ generated_audio_data = []
146
+ print("")
147
+ print("--- Generating Audios Thread ---")
148
+ with ThreadPoolExecutor(max_workers=10) as executor:
149
+ # Submit all the FAST audio generation tasks
150
+ future_to_scene = {
151
+ executor.submit(generate_audio, scene): scene
152
+ for scene in scenes if scene.get("txt")
153
+ }
154
+
155
+ # As each FAST audio generation task completes...
156
+ for future in as_completed(future_to_scene):
157
+ scene = future_to_scene[future]
158
+ try:
159
+ scene_id = scene["sceneId"]
160
+ motion_index = scene.get("motionIndex", 0)
161
+ # 1. Get the generated audio
162
+ audio_bytes, audio_base64 = future.result()
163
+ print("")
164
+ print(f'[ "Generated Audio {scene_id}, Motion: {motion_index}" ]')
165
+
166
+ # 2. Calculate duration instantly
167
+ duration = calculate_duration_from_bytes(audio_bytes)
168
+
169
+ # 3. Notify motion generator IMMEDIATELY
170
+ if duration > 0:
171
+ update_motion_generator_duration(scene["sceneId"], scene.get("motionIndex", 0), duration)
172
+
173
+ # 4. Store the results to be used in the next (slow) stage
174
+ generated_audio_data.append({
175
+ "scene": scene,
176
+ "audio_bytes": audio_bytes,
177
+ "audio_base64": audio_base64
178
+ })
179
+ except Exception as e:
180
+ print(f"Error during audio generation for {scene['sceneId']}: {e}")
181
+
182
+ # --- STAGE 2: SLOW Word Alignment in Parallel ---
183
+ # Now that all notifications are sent, we can perform the slow alignment work.
184
+ response_by_scene: Dict[str, Any] = {}
185
+ print("")
186
+ print("--- Word Alignments Thread ---")
187
+ with ThreadPoolExecutor(max_workers=10) as executor:
188
+ # Use the data from Stage 1 to submit SLOW alignment tasks.
189
+ # We call `align_words` directly (your `align_audio` function is not needed).
190
+ future_to_data = {
191
+ executor.submit(align_words, data["audio_bytes"], strip_tags(data["scene"]["txt"])): data
192
+ for data in generated_audio_data
193
+ }
194
+
195
+ # As each SLOW alignment task completes...
196
+ for future in as_completed(future_to_data):
197
+ data = future_to_data[future]
198
+ scene = data["scene"]
199
+ scene_id = scene["sceneId"]
200
+ motion_index = scene.get("motionIndex", 0)
201
+
202
+ try:
203
+ # 1. Get the alignment result
204
+ alignment = future.result()
205
+ print("")
206
+ print(f'[ "Aligned {scene_id}, Motion: {motion_index}" ]')
207
+
208
+ # 2. Now, build the final response object with all the data
209
+ voice_audio = {
210
+ "motion": motion_index,
211
+ "audio_base64": data["audio_base64"], # From Stage 1
212
+ "alignment": alignment, # From Stage 2
213
+ }
214
+ if scene_id not in response_by_scene:
215
+ response_by_scene[scene_id] = {"sceneId": scene_id, "audioEvents": []}
216
+ response_by_scene[scene_id]["audioEvents"].append(voice_audio)
217
+
218
+ except Exception as e:
219
+ print(f"Error during alignment for scene {scene_id}: {e}")
220
+
221
+ if response_by_scene:
222
+ send_frame(sock, "voice-generated", list(response_by_scene.values()))
223
+ print("")
224
+ print(f"[ ← Audios ({len(response_by_scene)}) sent ]")
225
+
226
+ def main() -> None:
227
+ # Setup the Orpheus TTS model on startup.
228
+ #setup_model()
229
+ # Setup the aligner (does nothing for aeneas, but keeps pattern consistent)
230
+ setup_aligner()
231
+
232
+ while True:
233
+ try:
234
+ with socket.create_connection((HOST, PORT), timeout=60) as sock:
235
+ patch_socket_keepalive(sock)
236
+ print(f'["Connected to server at {HOST}:{PORT}"]')
237
+ handle_connection(sock)
238
+ except (ConnectionRefusedError, OSError) as e:
239
+ print(f"Connection error: {e}, retrying in 5s")
240
+ except Exception as e:
241
+ print(f"Unhandled error: {e}, reconnecting in 5s")
242
+ finally:
243
+ import time
244
+ time.sleep(5)
245
+
246
+ if __name__ == "__main__":
247
+ main()
248
+
transcript.txt ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eye_blink_1
2
+ eye_blink_1_L
3
+ eye_blink_1_R
4
+ eye_blink_2
5
+ eye_blink_2_L
6
+ eye_blink_2_R
7
+ eye_smile_1
8
+ eye_smile_1_L
9
+ eye_smile_1_R
10
+ eye_smile_2
11
+ eye_smile_2_L
12
+ eye_smile_2_R
13
+ eye_nagomi
14
+ eye_nagomi_L
15
+ eye_nagomi_R
16
+ eye_happy
17
+ eye_happy_L
18
+ eye_happy_R
19
+ eye_angry
20
+ eye_angry_L
21
+ eye_angry_R
22
+ eye_sad
23
+ eye_sad_L
24
+ eye_sad_R
25
+ eye_surprise
26
+ eye_surprise_L
27
+ eye_surprise_R
28
+ eye_sleepy
29
+ eye_sleepy_L
30
+ eye_sleepy_R
31
+ eye_tsuri_1
32
+ eye_tsuri_1_L
33
+ eye_tsuri_1_R
34
+ eye_tsuri_2
35
+ eye_tsuri_2_L
36
+ eye_tsuri_2_R
37
+ eye_tsuri_3
38
+ eye_tsuri_3_L
39
+ eye_tsuri_3_R
40
+ eye_tare_1
41
+ eye_tare_1_L
42
+ eye_tare_1_R
43
+ eye_tare_2
44
+ eye_tare_2_L
45
+ eye_tare_2_R
46
+ eye_tare_3
47
+ eye_tare_3_L
48
+ eye_tare_3_R
49
+ eye_maru
50
+ eye_maru_L
51
+ eye_maru_R
52
+ eye_><
53
+ eye_><_L
54
+ eye_><_R
55
+ eye_marushiro
56
+ eye_dark
57
+ eye_OO
58
+ eye_big
59
+ eye_small
60
+ eye_○_1
61
+ eye_○_2
62
+ eye_cat
63
+ eye_uru
64
+ eye_heart
65
+ eye_shiitake
66
+ eye_guruguru
67
+ eye_x
68
+ eye_look_up_1
69
+ eye_look_up_2
70
+ eye_look_down
71
+ eye_look_L
72
+ eye_look_R
73
+ eye_look_inside
74
+ eye_look_outside
75
+ eye_back
76
+ eye_pupil_big
77
+ eye_pupil_small
78
+ eye_pupil_up
79
+ eye_pupil_down
80
+ eye_pupil_close
81
+ eye_pupil_far
82
+ eye_pupil_x
83
+ eye_pupil_cat
84
+ eye_pupil_heart
85
+ eye_pupil_heart_big
86
+ eye_pupil_star
87
+ eye_pupil_star_big
88
+ eye_pupil_0
89
+ eye_pupil_0_big
90
+ eye_pupil_○
91
+ eye_pupil_○_big
92
+ eye_pupil_□
93
+ eye_pupil_□_big
94
+ eye_pupil_+
95
+ eye_pupil_+_big
96
+ eye_pupil_X
97
+ eye_pupil_X_big
98
+ eye_up
99
+ eye_down
100
+ eye_close
101
+ eye_far
102
+ eye_size_big
103
+ eye_size_small
104
+ eye_width_up
105
+ eye_width_down
106
+ eye_height_up
107
+ eye_height_down
108
+
109
+ FULL EYE listen
110
+
111
+ eyebrow_smile
112
+ eyebrow_smile_L
113
+ eyebrow_smile_R
114
+ eyebrow_straight
115
+ eyebrow_straight_L
116
+ eyebrow_straight_R
117
+ eyebrow_angry_1
118
+ eyebrow_angry_1_L
119
+ eyebrow_angry_1_R
120
+ eyebrow_angry_2
121
+ eyebrow_angry_2_L
122
+ eyebrow_angry_2_R
123
+ eyebrow_sad_1
124
+ eyebrow_sad_1_L
125
+ eyebrow_sad_1_R
126
+ eyebrow_sad_2
127
+ eyebrow_sad_2_L
128
+ eyebrow_sad_2_R
129
+ eyebrow_trouble
130
+ eyebrow_trouble_L
131
+ eyebrow_trouble_R
132
+ eyebrow_up
133
+ eyebrow_up_L
134
+ eyebrow_down
135
+ eyebrow_down_L
136
+ eyebrow_down_R
137
+ eyebrow_tsuri
138
+ eyebrow_tsuri_L
139
+ eyebrow_tsuri_R
140
+ eyebrow_tare
141
+ eyebrow_tare_L
142
+ eyebrow_tare_R
143
+ eyebrow_close
144
+ eyebrow_far
145
+ eyebrow_thin
146
+ eyebrow_thick
147
+ eyebrow_wide
148
+ eyebrow_narrow
149
+ eyebrow_forward_1
150
+ eyebrow_forward_1_L
151
+ eyebrow_forward_1_R
152
+ eyebrow_forward_2
153
+ eyebrow_forward_2_L
154
+ eyebrow_forward_2_R
155
+ eyebrow_forward_3
156
+ eyebrow_forward_3_L
157
+ eyebrow_forward_3_R
158
+ eyebrow_back_1
159
+ eyebrow_back_1_L
160
+ eyebrow_back_1_R
161
+ eyebrow_back_2
162
+ eyebrow_back_2_L
163
+ eyebrow_back_2_R
164
+ eyebrow_back_3
165
+ eyebrow_back_3_L
166
+ eyebrow_back_3_R
167
+ eyebrow_maromayu
168
+ eyebrow_shadow_
169
+ eyebrow_×
170
+
171
+ FULL EYEBROW
172
+
173
+ MOUTH
174
+ mouth_straight
175
+ mouth_straight_L
176
+ mouth_straight_R
177
+ mouth_smile_1
178
+ mouth_smile_1_L
179
+ mouth_smile_1_R
180
+ mouth_smile_2
181
+ mouth_angry_1
182
+ mouth_angry_1_L
183
+ mouth_angry_1_R
184
+ mouth_angry_2
185
+ mouth_wide
186
+ mouth_wide_L
187
+ mouth_wide_R
188
+ mouth_narrow
189
+ mouth_narrow_L
190
+ mouth_narrow_R
191
+ mouth_V
192
+ mouth_∧_1
193
+ mouth_∧_2
194
+ mouth_n
195
+ mouth_pokan
196
+ mouth_hawa
197
+ mouth_nihe
198
+ mouth_o_big
199
+ mouth_o_small
200
+ mouth_△_1
201
+ mouth_△_2
202
+ mouth_□
203
+ mouth__
204
+ mouth___
205
+ mouth_wa
206
+ mouth_ω
207
+ mouth_ω_open_1
208
+ mouth_ω_open_2
209
+ mouth_yodare_L
210
+ mouth_yodare_R
211
+ mouth_grin_L
212
+ mouth_grin_L_open
213
+ mouth_grin_R
214
+ mouth_grin_R_open
215
+ mouth_puku-
216
+ mouth_puku-_L
217
+ mouth_puku-_R
218
+ mouth_pero_1
219
+ mouth_pero_2
220
+ mouth_pero_3
221
+ mouth_pero_4
222
+ mouth_pero_5
223
+ mouth_pero_6
224
+ mouth_tehepero_1
225
+ mouth_tehepero_2
226
+ mouth_tehepero_3
227
+ mouth_wide_ALL
228
+ mouth_narrow_ALL
229
+ mouth_up
230
+ mouth_down
231
+ mouth_forward
232
+ mouth_back
233
+
234
+ Current Pelvis Smplx Rot: (X: -0.021377, Y: 0.684438, Z: -0.728309, W: 0.025578)
235
+ Current Pelvis Smplx Rot: (X: -0.361679, Y: 0.580401, Z: -0.611046, W: 0.39868)
236
+ Current Pelvis Smplx Rot: (X: -0.692902, Y: -0.097601, Z: 0.098692, W: 0.707545)
237
+ Current Pelvis Smplx Rot: (X: -0.006667, Y: 0.70305, Z: -0.711109, W: 0.000571)
238
+ Current Pelvis Smplx Rot: (X: -0.493132, Y: 0.452919, Z: -0.492166, W: 0.556289)
239
+ Current Pelvis Smplx Rot: (X: -0.587168, Y: 0.351233, Z: -0.366381, W: 0.630582)
240
+ Current Pelvis Smplx Rot: (X: -0.668992, Y: 0.209959, Z: -0.24474, W: 0.669678)
241
+ Current Pelvis Smplx Rot: (X: -0.670558, Y: -0.093318, Z: 0.111714, W: 0.727436)
242
+ Current Pelvis Smplx Rot: (X: -0.57651, Y: -0.34049, Z: 0.389817, W: 0.632255)
243
+ Current Pelvis Smplx Rot: (X: -0.265756, Y: 0.609998, Z: -0.671782, W: 0.325552)
244
+
245
+ <chat_message> Hey Miko did you know that a banana is a berry, and strawberries aren't actually berries.
246
+ <normal> Hey Miko, did you know that a banana is a berry, and strawberries aren't actually berries.
247
+ <normal> Actually, I did know that
248
+ <normal> Bananas are a baby, a baby. <laugh> Bananas are a Berry.
249
+ <normal> Did you know that avocados are berry? Yeah for real.
250
+
251
+ <chat_message> We're eating babies?
252
+ <normal> We're eating babies?
253
+ <normal> No, you're not eating babies!
254
+ <normal> You're eating the reproductive organs of plants. <chuckle> That is true! That's what fruit are, literally! <laugh>
255
+
256
+
257
+ [00:26] <chat_message> Biboo do you eat your fruit with or without the shell? Ong no cap fr they got me geekin like im on a mission when i finna bust one open and go skrrrt skrrrt like im delulu flexin with the low taper fade
258
+ [00:26] <normal> Biboo do you eat your fruit with or without the shell? Ong no cap fr they got me geekin like I'm on a mission when I finna bust one open and go skrrt skrrt like I'm delulu flexin with the low taper fade.
259
+ [00:42] <normal> <chuckle> The bad thing is I understood all of that.
260
+ [00:45] <normal> Yeah, that got me geekin' for real! <laugh> No cap!
261
+ [00:50] <normal> I eat fruit with the shell! Unless it's kiwis! No shell for kiwis! But I will eat apple shell!
262
+
263
+ [00:57] <chat_message> Pineapples?
264
+ [00:58] <normal> Pineapples? have a shell? Yes? <chuckle> I've never... <laugh> I'm about to sound so spoiled. I've never had to eat a pineapple that wasn't cut up for me! Thanks, mom! <laugh> I've never had to eat fruit that wasn't cut up for me. Mom... <laugh>
265
+ [01:13] <normal> <laugh> that's a love language! That's a love language for Asian parents though! That's how you know your mom truly loves you is when she cuts up fruit for you! It's not 'I'm proud of you!', it's when she cuts up fruit after your meal. That's, That's how you know.
266
+ [01:27] <normal> So I've never had to eat pineapple with the shell, okay. I have seen a pineapple with the shell. Yes, it's spiky.
267
+
268
+ [01:36] <chat_message> What about Durian?
269
+ [01:36] <normal> What about Durian?
270
+ [01:37] <whisper> I've never had a Durian! oops, I've never had one! I've never eaten one! Sorry.
271
+ [01:43] <chat_message> What?
272
+ [01:43] <normal> What?
273
+ [01:44] <normal> Yeah. though I am willing to give it a chance. I know it smells like really-really bad. I think I smelled it before, but I'm willing to try it! <chuckle> Yeah, it's a really pungent fruit
274
+
275
+ 00:00] <chat_message> it's super easy to get a new wife, barely an inconvenience
276
+ [00:00] <normal> It's super easy to get a new wife, barely an inconvenience.
277
+ [00:05] <normal> Does that mean it's super easy to get a new Biboo? Huh?
278
+ [00:09] <angry> No! <normal> It's not super easy to get a new Biboo. Cause only Biboo is Biboo. Biboo is just Biboo. We got Momseki. <chuckle>
279
+ [00:19] <shout> NO!
280
+ [00:20] <normal> Biboo is Biboo, and Momseki is Momseki. I still don't hear the... the voice resemblance. Still. <chuckle>
281
+ [00:29] <normal> <sings> ...hitori de ikite ikesou, sore de ii no... <sings>
282
+ [00:37] <normal> I don't think I sound like her, but there were so many comments like: You sound like Momseki. I don't hear it! I don't hear it! She is cute, I guess. She is very cute when she's singing. I will agree with that.
283
+
284
+ [00:48] <chat_message> Cuter than you.
285
+ [00:48] <normal> Cuter than you. <gasp> Maybe. <chuckle>
286
+
287
+ [01:03] <chat_message> O Wise and Mighty Bojou, Master of Pronunciation, would you please do us the honor of enlightening us to the correct pronunciation of Werno?
288
+ [01:04] <normal> O Wise and Mighty Bijou, Master of Pronunciation, would you please do us the honor of enlightening us to the correct pronunciation of... <breath> I don't know that word, is it safe? Is this a trick? Is this a trap?
289
+ [01:19] <shout> Ohhh!
290
+ [01:25] <normal> Oh, I see... <chuckle> You know the rules and so do aah
291
+
292
+ [01:31] <chat_message> Mission failed we'll get her next time
293
+ [01:31] <normal> Mission failed we'll get her next time. You have to try harder! I'm always careful. You have to try harder, okay? If you wanna beat the master of memes, you need to... I'm always careful. I'm always careful because I don't wanna get Deez nutted. <laugh>
294
+
295
+ [01:46] <chat_message> Biboo! Can't wait to see you and the rest of Advent girls at conventions soon! ACEN (Anime Central) and AX (Anime Expo) have happened already, but it would be awesome if we can see you at SauCon!
296
+ [01:47] <normal> Biboo! Can't wait to see you and the rest of Advent girls at conventions soon! Oh yeah! That'd be cool! I- I would love to do a meet and greet someday. I would love to meet you guys at SauCon... Deez nutz. Try again.
297
+
298
+ [02:03] <chat_message> You should use the Stunseed powerup
299
+ [02:04] <shout> Stunseed is just Deez nutz backwards! Try again!
300
+
301
+ [02:08] <chat_message> Do you know the 2nd evolution for Ryhorn Biboo?
302
+ [02:08] <normal> Do you know the 2nd evolution for Ryhorn Biboo?
303
+ [02:11] <shout> Rhydon Deez Nutz! Jottem!
304
+ [02:15] <normal> Try better. Try better. I know all of the easy ones.
305
+ [02:19] <normal> He- hey FromSoft DLC people, if you're listening, I would really appreciate it if you made Ranni a summon. So she could fight with me. I feel like that would be the coolest thing ever and it make people buy the DLC more. So, I'm just saying, maybe make my wife a DLC summon... would be pretty, great.
306
+
307
+ [02:41] <chat_message> Does that make Ranni an i-Ladies?
308
+ [02:42] <shout> Does that make Ranni an i-Ladies? I Lay- I LayDeez Nutz on your face! That's EZ! You can't use the obvious ones I keep telling you!
309
+
310
+ [02:52] <chat_message> Does Ranni listen to Imagine Dragons?
311
+ [02:52] <normal> Does Ranni listen to Imagine Dragons?
312
+ [02:55] <shout> Imagine dragging Deez Nutz on your face! Again! Again, I warned you! I warned you!
313
+
314
+ [03:02] <chat_message> Does Ranni play Sea of Thieves?
315
+ [03:03] <shout> Sea of Thieves? See of- See of Deez Nutz on your face! Dang it! I know it! I know them all! Dang it!
316
+
317
+ [04:34] <chat_message> Biboo what do you think about ppl saying m'lady unironically in 2023?
318
+ [04:34] <normal> Biboo what do you think about people saying m'lady unironically in 2023?
319
+ [04:42] <normal> You mean m'layDeez Nutz in your mouth? <chuckle> Yeah! I think that's a Deez Nuts joke. I'm pretty sure. I'm pretty sure that's a deez- I'm pretty sure. <chuckle> I'm pretty sure. <chuckle> GOTTEM! If it's not, then uh... <chuckle> oops, my bad. Thank you! I can never be too careful.
320
+
321
+ [05:02] <chat_message> Biboo, have you played Metal Gear Rising: Revengeance? It's an action Metal Gear game where you play as a cyborg named Raidin
322
+ [05:02] <normal> Hey Biboo, have you played Metal Gear Rising: Revengeance? It's an action Metal Gear game where you play as a cyborg named Raidin-
323
+ [05:12] <normal> Raidin Deez Nutz! Good try! <chuckle> And yes I have played Revengeance before. Good try!
324
+
325
+ [05:20] <chat_message> They gave you a C because you still need CD's
326
+ [05:20] <normal> AKB, thank you for the super.
327
+ [05:22] <normal> They gave you a C because you still need CD's- <shout> CDEEZ NUTS! <normal> Nice try! <chuckle> Nice try!
328
+
329
+ [05:30] <chat_message> Fruit is looking good. Do you like fruit pudding?
330
+ [05:30] <normal> Fruit is looking good. Do you like fruit-
331
+ [05:33] <normal> pudding Deez Nutz in your mouth!
332
+ [05:35] <angry> What even is fruit pudding?! That doesn't exist, not really! <chuckle> Nice try!
333
+
334
+ [05:40] <chat_message> Hi Biboo, LOVED your Alice costume in the Advent Halloween cover! But did you know your senpai are also getting new costumes? Whose is your favourite and why is it Gawr Ghoul?
335
+ [05:41] <normal> Hi Biboo, LOVED your Alice costume in the Halloween oven co- <shout> Advent cover! <normal> But did you know your senpai are also getting new costumes?
336
+ [05:48] <normal> Are they?? <normal> Are they? I- that's the first I've heard of it. Whose your favorite and why is it Gawr Ghoul? <laugh> Ah, that's a- that's a funny pun. <laugh>
337
+ [05:58] <normal> Are they? Are they? I can't say who is my favorite in... in like EN Senpai because... It depends on like who is nice to me at the time. And they're all nice to me. So like, if they- I said this before, but... <chuckle>
338
+
339
+ [00:00] <chat_message> Hello Biboo, I just wanted to congratulate you for winning the Hololive Fan Discord Sever LEAST Sexiest hololive member Contest. Here's your prize! $100
340
+ [00:00] <normal> Hello Biboo, I just wanted to congratulate you for winning the hololive fan discord server 'Least Sexiest' hololive member contest.
341
+ [00:09] <sad> here's your prize...
342
+ [00:12] <sad> Kobo and Gura were second and third?
343
+ [00:15] <normal> But I can be sexy...
344
+ [00:17] <shout> Hey! Whatever!
345
+ [00:19] <normal> Least sexiest means all my points on the... on the cute are maxed out on cuteness, okay? So, that just means I am the most cute! Yes
346
+ [00:00] <happy> Peter thank you for the super rock rock <chuckle>
347
+
348
+ [00:03] <chat_message> Biboo. Do I Have Your Blessing to Lick the Candy Feet Slippers In the Comfort of My Own Home? Do You Know What They Taste Like? Are They Scented too?
349
+ [00:04] <normal> Biboo. Do I have your blessing- <gasp> what? Do I have your blessing to lick the... uh... candy feet slippers in the comfort of my own home? Do you know what they taste like? Are they scented too?
350
+ [00:21] <normal> Um... you don't need my permission to do anything. Um... what you do in the comfort of your own home... is uh up to you, but I will not tell you what... what... huh? <chuckle> <sigh>
351
+ [00:46] <normal> um... but also get some help. rock rock.
352
+
353
+ [00:49] <chat_message> That's a yes? haha
354
+ [00:50] <normal> That's a yes?
355
+ [00:51] <angry> no it's you do you <normal> I will be over here. I will be... I will be over here. You... you do you. I will be uh...
356
+ [01:08] <normal> Oh. <laugh>
util.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wave
2
+ import io
3
+ import os
4
+ import json
5
+ import requests
6
+ import yaml
7
+
8
+
9
+ def load_yaml():
10
+ """
11
+ Loads a YAML file and extracts the 'model_name' string.
12
+ """
13
+ file_path = "../config.yaml"
14
+ try:
15
+ with open(file_path, 'r') as file:
16
+ config = yaml.safe_load(file)
17
+ return config
18
+
19
+ except FileNotFoundError:
20
+ print(f"Error: The file '{file_path}' was not found.")
21
+ return None
22
+ except yaml.YAMLError as e:
23
+ print(f"Error parsing YAML file: {e}")
24
+ return None
25
+
26
+ config = load_yaml()
27
+
28
+ SYNC_PORT = config["SYNC_PORT"]
29
+ MOTION_SYNC_URL = f"http://localhost:{SYNC_PORT}/update_duration"
30
+
31
+ def update_motion_generator_duration(scene_id: str, motion_index: int, duration: float):
32
+ """Sends the actual audio duration to the motion generator's sync server."""
33
+ try:
34
+ payload = {
35
+ "sceneId": scene_id,
36
+ "motionIndex": motion_index,
37
+ "duration": duration
38
+ }
39
+ # This sends the data to the server running inside the motion generator script
40
+ response = requests.post(MOTION_SYNC_URL, json=payload, timeout=5)
41
+ if response.status_code == 200:
42
+ print(f"[ sent duration for {scene_id}:{motion_index} ]")
43
+ else:
44
+ print(f"⚠️ Failed to notify motion generator for {scene_id}_{motion_index}. Status: {response.status_code}, Response: {response.text}")
45
+ except requests.exceptions.RequestException as e:
46
+ print(f"⚠️ Error connecting to motion generator sync server: {e}")
47
+
48
+ def calculate_duration_from_bytes(audio_bytes: bytes) -> float:
49
+ """Calculates audio duration in seconds directly from WAV bytes in memory."""
50
+ try:
51
+ with wave.open(io.BytesIO(audio_bytes), 'rb') as wf:
52
+ frames = wf.getnframes()
53
+ rate = wf.getframerate()
54
+ return frames / float(rate) if rate > 0 else 0.0
55
+ except (wave.Error, ZeroDivisionError) as e:
56
+ print(f"⚠️ Could not calculate duration from bytes: {e}")
57
+ return 0.0
58
+