# app.py import os import numpy as np from fastapi import FastAPI, Form from fastapi.responses import PlainTextResponse from langchain_google_genai import ChatGoogleGenerativeAI from pydub import AudioSegment from gtts import gTTS # ---------- CONFIG ---------- os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk" llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash") app = FastAPI() # ---------- HELPERS ---------- def text_to_pcm_array(text: str) -> str: response = llm.invoke(text).content or "No response generated." # 1. Save TTS as MP3 (gTTS always outputs MP3) mp3_path = "/tmp/response.mp3" wav_path = "/tmp/response.wav" tts = gTTS(response) tts.save(mp3_path) # 2. Convert MP3 → WAV with pydub audio = AudioSegment.from_mp3(mp3_path) audio.export(wav_path, format="wav") # 3. Load WAV and downsample audio = AudioSegment.from_wav(wav_path) audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1) raw_data = audio.raw_data samples = np.frombuffer(raw_data, dtype=np.uint8) # 4. Convert to C-style array return "{ " + ",".join(map(str, samples)) + " }" # ---------- ENDPOINT ---------- @app.post("/send_message_recive_pcm", response_class=PlainTextResponse) async def send_message_recive_pcm(message: str = Form(...)): pcm_array = text_to_pcm_array(message) return pcm_array # from fastapi.responses import StreamingResponse # import io # @app.post("/send_message_recive_pcm") # async def send_message_recive_pcm(message: str = Form(...)): # pcm_array = text_to_pcm_array(message) # numpy array dtype=int16 # buf = io.BytesIO() # buf.write(pcm_array.tobytes()) # buf.seek(0) # return StreamingResponse(buf, media_type="application/octet-stream")