from flask import Flask, request, jsonify, send_from_directory from flask_cors import CORS import whisper import soundfile as sf import numpy as np import uuid, os from piper.voice import PiperVoice app = Flask(__name__, static_folder="../frontend", static_url_path="") CORS(app) OUTPUT = "backend/output" os.makedirs(OUTPUT, exist_ok=True) print("Loading Piper…") voice = PiperVoice.load( "backend/models/piper/en_US-lessac-medium.onnx", "backend/models/piper/en_US-lessac-medium.onnx.json" ) print("Piper ready") print("Loading Whisper…") whisper_model = whisper.load_model("base") print("Whisper ready") PHONEME_TO_VISEME = { "AA": "A", "AE": "A", "AH": "A", "EH": "E", "IY": "E", "OW": "O", "UH": "O", "M": "BMP", "B": "BMP", "P": "BMP", "F": "FV", "V": "FV", "S": "SZ", "Z": "SZ", "L": "L", } @app.route("/") def index(): return app.send_static_file("index.html") @app.route("/speak", methods=["POST"]) def speak(): text = request.json.get("text", "").strip() if not text: return jsonify({"error": "Empty text"}), 400 uid = str(uuid.uuid4()) wav_path = f"{OUTPUT}/{uid}.wav" audio_chunks = [] # Piper synthesis (robust) for chunk in voice.synthesize(text): # Case 1: bytes if isinstance(chunk, (bytes, bytearray)): audio_chunks.append( np.frombuffer(chunk, dtype=np.int16) ) # Case 2: numpy array elif isinstance(chunk, np.ndarray): audio_chunks.append(chunk) # Case 3: object with samples elif hasattr(chunk, "samples"): audio_chunks.append(chunk.samples) if not audio_chunks: return jsonify({"error": "Piper produced no audio"}), 500 audio = np.concatenate(audio_chunks) sf.write(wav_path, audio, 22050) # Whisper alignment result = whisper_model.transcribe(wav_path, word_timestamps=True) timeline = [] for seg in result["segments"]: for w in seg.get("words", []): timeline.append({ "t": w["start"], "v": "A" # simple viseme placeholder }) return jsonify({ "audio": f"/audio/{uid}.wav", "timeline": timeline }) @app.route("/audio/") def audio(name): return send_from_directory(OUTPUT, name) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)