| import io |
| import base64 |
| import json |
| import os |
| from pathlib import Path |
| import gradio as gr |
| import numpy as np |
| import openai |
| from fastrtc import ( |
| AdditionalOutputs, |
| ReplyOnStopWords, |
| Stream, |
| get_stt_model, |
| get_twilio_turn_credentials, |
| ) |
|
|
| class SambanovaVoiceService: |
| """Dịch vụ Sambanova AI với Voice Streaming hoàn chỉnh""" |
| |
| def __init__(self, tts_service=None): |
| |
| self.client = openai.OpenAI( |
| api_key=os.environ.get("SAMBANOVA_API_KEY"), |
| base_url="https://api.sambanova.ai/v1", |
| ) |
| |
| self.stt_model = get_stt_model() |
| |
| self.tts_service = tts_service |
| print("✅ Sambanova Voice Service initialized với TTS") |
| |
| def get_available_models(self): |
| """Lấy danh sách model có sẵn""" |
| return [ |
| "Meta-Llama-3.1-8B-Instruct", |
| "Meta-Llama-3.1-70B-Instruct" |
| ] |
| |
| def generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): |
| """Generate response từ Sambanova API""" |
| try: |
| response = self.client.chat.completions.create( |
| model=model, |
| messages=messages, |
| temperature=temperature, |
| top_p=top_p, |
| max_tokens=1024, |
| ) |
| return response.choices[0].message.content |
| except Exception as e: |
| print(f"❌ Sambanova API Error: {e}") |
| return f"Xin lỗi, có lỗi xảy ra: {str(e)}" |
| |
| def stream_generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): |
| """Stream response từ Sambanova API""" |
| try: |
| response = self.client.chat.completions.create( |
| model=model, |
| messages=messages, |
| temperature=temperature, |
| top_p=top_p, |
| max_tokens=1024, |
| stream=True |
| ) |
| |
| full_response = "" |
| for chunk in response: |
| if chunk.choices[0].delta.content: |
| text_chunk = chunk.choices[0].delta.content |
| full_response += text_chunk |
| yield text_chunk, full_response |
| |
| except Exception as e: |
| error_msg = f"❌ Lỗi: {str(e)}" |
| yield error_msg, error_msg |
| |
| def speech_to_text(self, audio): |
| """Chuyển speech thành text""" |
| try: |
| text = self.stt_model.stt(audio) |
| print(f"🎤 STT Result: {text}") |
| return text |
| except Exception as e: |
| print(f"❌ STT Error: {e}") |
| return "" |
| |
| def text_to_speech(self, text, language='vi'): |
| """Chuyển text thành speech sử dụng TTS service""" |
| if self.tts_service is None: |
| print("❌ TTS service chưa được khởi tạo") |
| return None |
| |
| try: |
| audio_bytes = self.tts_service.text_to_speech(text, language) |
| if audio_bytes: |
| |
| filename = f"tts_{int(time.time())}.mp3" |
| filepath = self.tts_service.save_tts_audio(audio_bytes, filename) |
| return filepath |
| return None |
| except Exception as e: |
| print(f"❌ TTS Error: {e}") |
| return None |
| |
| def generate_response_with_voice(self, messages, model="Meta-Llama-3.1-8B-Instruct", language='vi'): |
| """Generate response và chuyển thành voice""" |
| try: |
| |
| text_response = self.generate_response(messages, model) |
| |
| |
| audio_filepath = self.text_to_speech(text_response, language) |
| |
| return { |
| "text": text_response, |
| "audio": audio_filepath, |
| "audio_bytes": self.tts_service.text_to_speech(text_response, language) if audio_filepath else None |
| } |
| except Exception as e: |
| print(f"❌ Error in generate_response_with_voice: {e}") |
| return { |
| "text": f"Xin lỗi, có lỗi xảy ra: {str(e)}", |
| "audio": None, |
| "audio_bytes": None |
| } |