Spaces:

1MR
/

pcm

Sleeping

App Files Files Community

pcm / app.py

1MR

Update app.py

4e1b108 verified 4 months ago

raw

history blame contribute delete

1.8 kB

	# app.py
	import os
	import numpy as np
	from fastapi import FastAPI, Form
	from fastapi.responses import PlainTextResponse
	from langchain_google_genai import ChatGoogleGenerativeAI
	from pydub import AudioSegment
	from gtts import gTTS

	# ---------- CONFIG ----------
	os.environ["GOOGLE_API_KEY"] = "AIzaSyD2DMFgcL0kWTQYhii8wseSHY3BRGWSebk"
	llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

	app = FastAPI()

	# ---------- HELPERS ----------
	def text_to_pcm_array(text: str) -> str:
	response = llm.invoke(text).content or "No response generated."

	# 1. Save TTS as MP3 (gTTS always outputs MP3)
	mp3_path = "/tmp/response.mp3"
	wav_path = "/tmp/response.wav"

	tts = gTTS(response)
	tts.save(mp3_path)

	# 2. Convert MP3 → WAV with pydub
	audio = AudioSegment.from_mp3(mp3_path)
	audio.export(wav_path, format="wav")

	# 3. Load WAV and downsample
	audio = AudioSegment.from_wav(wav_path)
	audio = audio.set_frame_rate(8000).set_channels(1).set_sample_width(1)

	raw_data = audio.raw_data
	samples = np.frombuffer(raw_data, dtype=np.uint8)

	# 4. Convert to C-style array
	return "{ " + ",".join(map(str, samples)) + " }"



	# ---------- ENDPOINT ----------
	@app.post("/send_message_recive_pcm", response_class=PlainTextResponse)
	async def send_message_recive_pcm(message: str = Form(...)):
	pcm_array = text_to_pcm_array(message)
	return pcm_array

	# from fastapi.responses import StreamingResponse
	# import io

	# @app.post("/send_message_recive_pcm")
	# async def send_message_recive_pcm(message: str = Form(...)):
	# pcm_array = text_to_pcm_array(message) # numpy array dtype=int16
	# buf = io.BytesIO()
	# buf.write(pcm_array.tobytes())
	# buf.seek(0)
	# return StreamingResponse(buf, media_type="application/octet-stream")