File size: 1,799 Bytes
3246bd9 a1559e8 3246bd9 6e3bced 3246bd9 6e3bced 3246bd9 6e3bced 3246bd9 6e3bced 3246bd9 6e3bced 3246bd9 4e1b108 a8040a5 4e1b108 a8040a5 4e1b108 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# app.py
import os
import numpy as np
from fastapi import FastAPI, Form
from fastapi.responses import PlainTextResponse
from langchain_google_genai import ChatGoogleGenerativeAI
from pydub import AudioSegment
from gtts import gTTS
# ---------- CONFIG ----------
os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
app = FastAPI()
# ---------- HELPERS ----------
def text_to_pcm_array(text: str) -> str:
response = llm.invoke(text).content or "No response generated."
# 1. Save TTS as MP3 (gTTS always outputs MP3)
mp3_path = "/tmp/response.mp3"
wav_path = "/tmp/response.wav"
tts = gTTS(response)
tts.save(mp3_path)
# 2. Convert MP3 → WAV with pydub
audio = AudioSegment.from_mp3(mp3_path)
audio.export(wav_path, format="wav")
# 3. Load WAV and downsample
audio = AudioSegment.from_wav(wav_path)
audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1)
raw_data = audio.raw_data
samples = np.frombuffer(raw_data, dtype=np.uint8)
# 4. Convert to C-style array
return "{ " + ",".join(map(str, samples)) + " }"
# ---------- ENDPOINT ----------
@app.post("/send_message_recive_pcm", response_class=PlainTextResponse)
async def send_message_recive_pcm(message: str = Form(...)):
pcm_array = text_to_pcm_array(message)
return pcm_array
# from fastapi.responses import StreamingResponse
# import io
# @app.post("/send_message_recive_pcm")
# async def send_message_recive_pcm(message: str = Form(...)):
# pcm_array = text_to_pcm_array(message) # numpy array dtype=int16
# buf = io.BytesIO()
# buf.write(pcm_array.tobytes())
# buf.seek(0)
# return StreamingResponse(buf, media_type="application/octet-stream") |