pcm / app.py
1MR's picture
Update app.py
4e1b108 verified
# app.py
import os
import numpy as np
from fastapi import FastAPI, Form
from fastapi.responses import PlainTextResponse
from langchain_google_genai import ChatGoogleGenerativeAI
from pydub import AudioSegment
from gtts import gTTS
# ---------- CONFIG ----------
os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
app = FastAPI()
# ---------- HELPERS ----------
def text_to_pcm_array(text: str) -> str:
response = llm.invoke(text).content or "No response generated."
# 1. Save TTS as MP3 (gTTS always outputs MP3)
mp3_path = "/tmp/response.mp3"
wav_path = "/tmp/response.wav"
tts = gTTS(response)
tts.save(mp3_path)
# 2. Convert MP3 → WAV with pydub
audio = AudioSegment.from_mp3(mp3_path)
audio.export(wav_path, format="wav")
# 3. Load WAV and downsample
audio = AudioSegment.from_wav(wav_path)
audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1)
raw_data = audio.raw_data
samples = np.frombuffer(raw_data, dtype=np.uint8)
# 4. Convert to C-style array
return "{ " + ",".join(map(str, samples)) + " }"
# ---------- ENDPOINT ----------
@app.post("/send_message_recive_pcm", response_class=PlainTextResponse)
async def send_message_recive_pcm(message: str = Form(...)):
pcm_array = text_to_pcm_array(message)
return pcm_array
# from fastapi.responses import StreamingResponse
# import io
# @app.post("/send_message_recive_pcm")
# async def send_message_recive_pcm(message: str = Form(...)):
# pcm_array = text_to_pcm_array(message) # numpy array dtype=int16
# buf = io.BytesIO()
# buf.write(pcm_array.tobytes())
# buf.seek(0)
# return StreamingResponse(buf, media_type="application/octet-stream")