Spaces:

Remostartdev
/

STREAM_TTS

Runtime error

App Files Files Community

drrobot9 commited on 20 days ago

Commit

c56c006

verified ·

1 Parent(s): 6b6a9ba

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +13 -18

app/main.py CHANGED Viewed

@@ -47,7 +47,6 @@ def wav_header(sr=24000, ch=1, bits=16):
 async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
     chat = ChatState(processor)
     chat.new_turn("system")
@@ -56,7 +55,7 @@ async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
     chat.new_turn("user")
     audio_tensor = torch.from_numpy(audio_np[np.newaxis, :]).to(dtype=torch.float32)
-    chat.add_audio(audio_tensor, sampling_rate=SAMPLE_RATE)
     chat.end_turn()
     chat.new_turn("assistant")
@@ -72,13 +71,12 @@ async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
             audio_temperature=0.8,
             audio_top_k=4,
         ):
             if token.numel() == 1:
                 continue
-            token_id = token.item()
-            if processor.audio_token_start <= token_id <= processor.audio_token_end:
-                audio_buffer.append(token)
             if len(audio_buffer) >= CHUNK_SIZE:
                 audio_codes = (
@@ -86,19 +84,16 @@ async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
                     .unsqueeze(0)
                     .to(DEVICE)
                 )
                 try:
                     waveform = processor.decode(audio_codes)
-                except Exception:
                     audio_buffer.clear()
-                    continue
-                waveform = waveform.squeeze().cpu().numpy()
-                waveform = np.clip(waveform, -1.0, 1.0)
-                audio_int16 = (waveform * 32767).astype(np.int16)
-                await websocket.send_bytes(audio_int16.tobytes())
-                audio_buffer.clear()
     # flush remaining
     if len(audio_buffer) > 1:
@@ -113,8 +108,8 @@ async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
             waveform = np.clip(waveform, -1.0, 1.0)
             audio_int16 = (waveform * 32767).astype(np.int16)
             await websocket.send_bytes(audio_int16.tobytes())
-        except Exception:
-            pass
     await websocket.send_text(json.dumps({"type": "done"}))

 async def generate_response(websocket: WebSocket, audio_np: np.ndarray):
     chat = ChatState(processor)
     chat.new_turn("system")
     chat.new_turn("user")
     audio_tensor = torch.from_numpy(audio_np[np.newaxis, :]).to(dtype=torch.float32)
+    chat.add_audio(audio_tensor, sampling_rate=SAMPLE_RATE)
     chat.end_turn()
     chat.new_turn("assistant")
             audio_temperature=0.8,
             audio_top_k=4,
         ):
+            # numel()==1 means text token
             if token.numel() == 1:
                 continue
+            # multi-element tensor = audio codes chunk
+            audio_buffer.append(token)
             if len(audio_buffer) >= CHUNK_SIZE:
                 audio_codes = (
                     .unsqueeze(0)
                     .to(DEVICE)
                 )
                 try:
                     waveform = processor.decode(audio_codes)
+                    waveform = waveform.squeeze().cpu().numpy()
+                    waveform = np.clip(waveform, -1.0, 1.0)
+                    audio_int16 = (waveform * 32767).astype(np.int16)
+                    await websocket.send_bytes(audio_int16.tobytes())
+                except Exception as e:
+                    print(f"[WARN] decode error: {e}")
+                finally:
                     audio_buffer.clear()
     # flush remaining
     if len(audio_buffer) > 1:
             waveform = np.clip(waveform, -1.0, 1.0)
             audio_int16 = (waveform * 32767).astype(np.int16)
             await websocket.send_bytes(audio_int16.tobytes())
+        except Exception as e:
+            print(f"[WARN] flush decode error: {e}")
     await websocket.send_text(json.dumps({"type": "done"}))