|
|
|
|
|
import os |
|
|
import numpy as np |
|
|
from fastapi import FastAPI, Form |
|
|
from fastapi.responses import PlainTextResponse |
|
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
|
from pydub import AudioSegment |
|
|
from gtts import gTTS |
|
|
|
|
|
|
|
|
os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk" |
|
|
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash") |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
def text_to_pcm_array(text: str) -> str: |
|
|
response = llm.invoke(text).content or "No response generated." |
|
|
|
|
|
|
|
|
mp3_path = "/tmp/response.mp3" |
|
|
wav_path = "/tmp/response.wav" |
|
|
|
|
|
tts = gTTS(response) |
|
|
tts.save(mp3_path) |
|
|
|
|
|
|
|
|
audio = AudioSegment.from_mp3(mp3_path) |
|
|
audio.export(wav_path, format="wav") |
|
|
|
|
|
|
|
|
audio = AudioSegment.from_wav(wav_path) |
|
|
audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1) |
|
|
|
|
|
raw_data = audio.raw_data |
|
|
samples = np.frombuffer(raw_data, dtype=np.uint8) |
|
|
|
|
|
|
|
|
return "{ " + ",".join(map(str, samples)) + " }" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/send_message_recive_pcm", response_class=PlainTextResponse) |
|
|
async def send_message_recive_pcm(message: str = Form(...)): |
|
|
pcm_array = text_to_pcm_array(message) |
|
|
return pcm_array |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|