from flask import Flask, render_template, request, jsonify, send_file from kokoro_onnx import Kokoro import numpy as np import scipy.io.wavfile as wav import io import os import ollama BASE_DIR = os.path.dirname(os.path.abspath(__file__)) app = Flask(__name__, template_folder=os.path.join(BASE_DIR, 'templates')) # Load Kokoro model model_path = os.path.join(BASE_DIR, "model.onnx") voices_path = os.path.join(BASE_DIR, "voices.bin") print(f"Loading Kokoro model from {model_path}...") kokoro = Kokoro(model_path, voices_path) # Chat history storage chat_sessions = [] @app.route('/') def index(): return render_template('index.html') @app.route('/chat', methods=['POST']) def chat(): data = request.get_json() user_msg = data.get('message', '') if not user_msg: return jsonify({"response": "Darling, silence is only for those who have nothing to say."}) # Add user message to history chat_sessions.append({"role": "user", "content": user_msg}) # Trim history if len(chat_sessions) > 10: chat_sessions.pop(0) try: print(f"Ollama is thinking (Unfiltered Mode): {user_msg}") # Passing explicit options to override any 'tame' defaults response = ollama.chat( model='willam-viv-peg', messages=chat_sessions, options={ 'temperature': 1.2, # Slightly more heat 'top_p': 0.9, 'num_predict': 100, # Force brevity for sharp wit } ) ai_msg = response['message']['content'] # Add AI response to history chat_sessions.append({"role": "assistant", "content": ai_msg}) return jsonify({"response": ai_msg}) except Exception as e: print(f"Ollama Error: {e}") return jsonify({"response": "A minor technical glitch, darling. Even my perfection has its limits."}), 500 @app.route('/tts', methods=['POST']) def tts(): try: data = request.get_json() text = data.get('text', '') voice = data.get('voice', 'bf_emma') if not text: return "No text provided", 400 # Determine language based on voice prefix lang = "en-gb" if voice.startswith("bf_") else "en-us" print(f"Synthesizing ({lang}, {voice}): {text[:50]}...") # Increased speed to 1.3 for that sharp, staccato drag queen delivery samples, sample_rate = kokoro.create(text, voice=voice, speed=1.3, lang=lang) # Convert samples to 16-bit PCM for WAV samples_16 = (samples * 32767).astype(np.int16) # Save to a memory buffer byte_io = io.BytesIO() wav.write(byte_io, sample_rate, samples_16) byte_io.seek(0) return send_file(byte_io, mimetype="audio/wav") except Exception as e: print(f"TTS Error: {e}") return str(e), 500 if __name__ == '__main__': print("Server starting at http://0.0.0.0:5001") # Using threaded=True to handle long synthesis requests without blocking app.run(debug=False, host='0.0.0.0', port=5001, threaded=True)