banao-tech's picture
Update backend/app.py
34de7da verified
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
import whisper
import soundfile as sf
import numpy as np
import uuid, os
from piper.voice import PiperVoice
app = Flask(__name__, static_folder="../frontend", static_url_path="")
CORS(app)
OUTPUT = "backend/output"
os.makedirs(OUTPUT, exist_ok=True)
print("Loading Piper…")
voice = PiperVoice.load(
"backend/models/piper/en_US-lessac-medium.onnx",
"backend/models/piper/en_US-lessac-medium.onnx.json"
)
print("Piper ready")
print("Loading Whisper…")
whisper_model = whisper.load_model("base")
print("Whisper ready")
PHONEME_TO_VISEME = {
"AA": "A", "AE": "A", "AH": "A",
"EH": "E", "IY": "E",
"OW": "O", "UH": "O",
"M": "BMP", "B": "BMP", "P": "BMP",
"F": "FV", "V": "FV",
"S": "SZ", "Z": "SZ",
"L": "L",
}
@app.route("/")
def index():
return app.send_static_file("index.html")
@app.route("/speak", methods=["POST"])
def speak():
text = request.json.get("text", "").strip()
if not text:
return jsonify({"error": "Empty text"}), 400
uid = str(uuid.uuid4())
wav_path = f"{OUTPUT}/{uid}.wav"
audio_chunks = []
# Piper synthesis (robust)
for chunk in voice.synthesize(text):
# Case 1: bytes
if isinstance(chunk, (bytes, bytearray)):
audio_chunks.append(
np.frombuffer(chunk, dtype=np.int16)
)
# Case 2: numpy array
elif isinstance(chunk, np.ndarray):
audio_chunks.append(chunk)
# Case 3: object with samples
elif hasattr(chunk, "samples"):
audio_chunks.append(chunk.samples)
if not audio_chunks:
return jsonify({"error": "Piper produced no audio"}), 500
audio = np.concatenate(audio_chunks)
sf.write(wav_path, audio, 22050)
# Whisper alignment
result = whisper_model.transcribe(wav_path, word_timestamps=True)
timeline = []
for seg in result["segments"]:
for w in seg.get("words", []):
timeline.append({
"t": w["start"],
"v": "A" # simple viseme placeholder
})
return jsonify({
"audio": f"/audio/{uid}.wav",
"timeline": timeline
})
@app.route("/audio/<name>")
def audio(name):
return send_from_directory(OUTPUT, name)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)