File size: 1,799 Bytes
3246bd9
 
 
 
 
 
 
a1559e8
3246bd9
 
 
 
 
 
 
 
 
6e3bced
 
 
 
 
 
3246bd9
6e3bced
3246bd9
6e3bced
 
 
3246bd9
6e3bced
 
3246bd9
 
 
 
 
 
6e3bced
 
3246bd9
 
 
4e1b108
 
 
 
a8040a5
4e1b108
 
a8040a5
4e1b108
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# app.py
import os
import numpy as np
from fastapi import FastAPI, Form
from fastapi.responses import PlainTextResponse
from langchain_google_genai import ChatGoogleGenerativeAI
from pydub import AudioSegment
from gtts import gTTS

# ---------- CONFIG ----------
os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

app = FastAPI()

# ---------- HELPERS ----------
def text_to_pcm_array(text: str) -> str:
    response = llm.invoke(text).content or "No response generated."

    # 1. Save TTS as MP3 (gTTS always outputs MP3)
    mp3_path = "/tmp/response.mp3"
    wav_path = "/tmp/response.wav"

    tts = gTTS(response)
    tts.save(mp3_path)

    # 2. Convert MP3 → WAV with pydub
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")

    # 3. Load WAV and downsample
    audio = AudioSegment.from_wav(wav_path)
    audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1)

    raw_data = audio.raw_data
    samples = np.frombuffer(raw_data, dtype=np.uint8)

    # 4. Convert to C-style array
    return "{ " + ",".join(map(str, samples)) + " }"



# ---------- ENDPOINT ----------
@app.post("/send_message_recive_pcm", response_class=PlainTextResponse)
async def send_message_recive_pcm(message: str = Form(...)):
    pcm_array = text_to_pcm_array(message)
    return pcm_array

# from fastapi.responses import StreamingResponse
# import io

# @app.post("/send_message_recive_pcm")
# async def send_message_recive_pcm(message: str = Form(...)):
#     pcm_array = text_to_pcm_array(message)  # numpy array dtype=int16
#     buf = io.BytesIO()
#     buf.write(pcm_array.tobytes())
#     buf.seek(0)
#     return StreamingResponse(buf, media_type="application/octet-stream")