Spaces:
Paused
Paused
| # -- coding: utf-8 -- | |
| """ | |
| https://help.aliyun.com/zh/model-studio/realtime#1234095db03g3 | |
| """ | |
| import os, time, base64, asyncio | |
| from omni_realtime_client import OmniRealtimeClient, TurnDetectionMode | |
| import pyaudio | |
| import queue | |
| import threading | |
| from project_settings import environment | |
| DASHSCOPE_API_KEY = environment.get("QWEN_API_KEY") | |
| os.environ["DASHSCOPE_API_KEY"] = DASHSCOPE_API_KEY | |
| # 创建一个全局音频队列和播放线程 | |
| audio_queue = queue.Queue() | |
| audio_player = None | |
| # 初始化PyAudio | |
| p = pyaudio.PyAudio() | |
| RATE = 16000 # 采样率 16kHz | |
| CHUNK = 3200 # 每个音频块的大小 | |
| FORMAT = pyaudio.paInt16 # 16位PCM格式 | |
| CHANNELS = 1 # 单声道 | |
| def audio_player_thread(): | |
| """后台线程用于播放音频数据""" | |
| stream = p.open(format=FORMAT, | |
| channels=CHANNELS, | |
| rate=24000, | |
| output=True, | |
| frames_per_buffer=CHUNK) | |
| try: | |
| while True: | |
| try: | |
| # 从队列获取音频数据 | |
| audio_data = audio_queue.get(block=True, timeout=0.5) | |
| if audio_data is None: # 结束信号 | |
| break | |
| # 播放音频数据 | |
| stream.write(audio_data) | |
| audio_queue.task_done() | |
| except queue.Empty: | |
| # 如果队列为空,继续等待 | |
| continue | |
| finally: | |
| # 清理 | |
| stream.stop_stream() | |
| stream.close() | |
| def start_audio_player(): | |
| """启动音频播放线程""" | |
| global audio_player | |
| if audio_player is None or not audio_player.is_alive(): | |
| audio_player = threading.Thread(target=audio_player_thread, daemon=True) | |
| audio_player.start() | |
| def handle_audio_data(audio_data): | |
| """处理接收到的音频数据""" | |
| # 打印接收到的音频数据长度(调试用) | |
| print(f"\n接收到音频数据: {len(audio_data)} 字节") | |
| # 将音频数据放入队列 | |
| audio_queue.put(audio_data) | |
| async def start_microphone_streaming(client: OmniRealtimeClient): | |
| CHUNK = 3200 | |
| FORMAT = pyaudio.paInt16 | |
| CHANNELS = 1 | |
| RATE = 16000 | |
| p = pyaudio.PyAudio() | |
| stream = p.open(format=FORMAT, | |
| channels=CHANNELS, | |
| rate=RATE, | |
| input=True, | |
| frames_per_buffer=CHUNK) | |
| try: | |
| print("开始录音,请讲话...") | |
| while True: | |
| audio_data = stream.read(CHUNK) | |
| encoded_data = base64.b64encode(audio_data).decode('utf-8') | |
| eventd = { | |
| "event_id": "event_" + str(int(time.time() * 1000)), | |
| "type": "input_audio_buffer.append", | |
| "audio": encoded_data | |
| } | |
| await client.send_event(eventd) | |
| # 保持较短的等待时间以模拟实时交互 | |
| await asyncio.sleep(0.05) | |
| finally: | |
| stream.stop_stream() | |
| stream.close() | |
| p.terminate() | |
| async def main(): | |
| # 启动音频播放线程 | |
| start_audio_player() | |
| realtime_client = OmniRealtimeClient( | |
| base_url="wss://dashscope.aliyuncs.com/api-ws/v1/realtime", | |
| api_key=os.environ.get("DASHSCOPE_API_KEY"), | |
| model="qwen-omni-turbo-realtime-2025-05-08", | |
| voice="Chelsie", | |
| on_text_delta=lambda text: print(f"\nAssistant: {text}", end="", flush=True), | |
| on_audio_delta=handle_audio_data, | |
| turn_detection_mode=TurnDetectionMode.SERVER_VAD | |
| ) | |
| try: | |
| await realtime_client.connect() | |
| # 启动消息处理和麦克风录音 | |
| message_handler = asyncio.create_task(realtime_client.handle_messages()) | |
| streaming_task = asyncio.create_task(start_microphone_streaming(realtime_client)) | |
| while True: | |
| await asyncio.Queue().get() | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| finally: | |
| # 结束音频播放线程 | |
| audio_queue.put(None) | |
| if audio_player: | |
| audio_player.join(timeout=1) | |
| await realtime_client.close() | |
| p.terminate() | |
| if __name__ == "__main__": | |
| # Install required packages: | |
| asyncio.run(main()) | |