willammonster / server.py
artecnosomatic's picture
Deploy Willam-viv-peg
6077461
from flask import Flask, render_template, request, jsonify, send_file
from kokoro_onnx import Kokoro
import numpy as np
import scipy.io.wavfile as wav
import io
import os
import ollama
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
app = Flask(__name__, template_folder=os.path.join(BASE_DIR, 'templates'))
# Load Kokoro model
model_path = os.path.join(BASE_DIR, "model.onnx")
voices_path = os.path.join(BASE_DIR, "voices.bin")
print(f"Loading Kokoro model from {model_path}...")
kokoro = Kokoro(model_path, voices_path)
# Chat history storage
chat_sessions = []
@app.route('/')
def index():
return render_template('index.html')
@app.route('/chat', methods=['POST'])
def chat():
data = request.get_json()
user_msg = data.get('message', '')
if not user_msg:
return jsonify({"response": "Darling, silence is only for those who have nothing to say."})
# Add user message to history
chat_sessions.append({"role": "user", "content": user_msg})
# Trim history
if len(chat_sessions) > 10:
chat_sessions.pop(0)
try:
print(f"Ollama is thinking (Unfiltered Mode): {user_msg}")
# Passing explicit options to override any 'tame' defaults
response = ollama.chat(
model='willam-viv-peg',
messages=chat_sessions,
options={
'temperature': 1.2, # Slightly more heat
'top_p': 0.9,
'num_predict': 100, # Force brevity for sharp wit
}
)
ai_msg = response['message']['content']
# Add AI response to history
chat_sessions.append({"role": "assistant", "content": ai_msg})
return jsonify({"response": ai_msg})
except Exception as e:
print(f"Ollama Error: {e}")
return jsonify({"response": "A minor technical glitch, darling. Even my perfection has its limits."}), 500
@app.route('/tts', methods=['POST'])
def tts():
try:
data = request.get_json()
text = data.get('text', '')
voice = data.get('voice', 'bf_emma')
if not text:
return "No text provided", 400
# Determine language based on voice prefix
lang = "en-gb" if voice.startswith("bf_") else "en-us"
print(f"Synthesizing ({lang}, {voice}): {text[:50]}...")
# Increased speed to 1.3 for that sharp, staccato drag queen delivery
samples, sample_rate = kokoro.create(text, voice=voice, speed=1.3, lang=lang)
# Convert samples to 16-bit PCM for WAV
samples_16 = (samples * 32767).astype(np.int16)
# Save to a memory buffer
byte_io = io.BytesIO()
wav.write(byte_io, sample_rate, samples_16)
byte_io.seek(0)
return send_file(byte_io, mimetype="audio/wav")
except Exception as e:
print(f"TTS Error: {e}")
return str(e), 500
if __name__ == '__main__':
print("Server starting at http://0.0.0.0:5001")
# Using threaded=True to handle long synthesis requests without blocking
app.run(debug=False, host='0.0.0.0', port=5001, threaded=True)