Spaces:

emolivera
/

willammonster

Sleeping

File size: 3,157 Bytes
from flask import Flask, render_template, request, jsonify, send_file
from kokoro_onnx import Kokoro
import numpy as np
import scipy.io.wavfile as wav
import io
import os
import ollama

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
app = Flask(__name__, template_folder=os.path.join(BASE_DIR, 'templates'))

# Load Kokoro model
model_path = os.path.join(BASE_DIR, "model.onnx")
voices_path = os.path.join(BASE_DIR, "voices.bin")

print(f"Loading Kokoro model from {model_path}...")
kokoro = Kokoro(model_path, voices_path)

# Chat history storage
chat_sessions = []

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/chat', methods=['POST'])
def chat():
    data = request.get_json()
    user_msg = data.get('message', '')
    
    if not user_msg:
        return jsonify({"response": "Darling, silence is only for those who have nothing to say."})

    # Add user message to history
    chat_sessions.append({"role": "user", "content": user_msg})
    
    # Trim history
    if len(chat_sessions) > 10:
        chat_sessions.pop(0)

    try:
        print(f"Ollama is thinking (Unfiltered Mode): {user_msg}")
        # Passing explicit options to override any 'tame' defaults
        response = ollama.chat(
            model='willam-viv-peg', 
            messages=chat_sessions,
            options={
                'temperature': 1.2, # Slightly more heat
                'top_p': 0.9,
                'num_predict': 100, # Force brevity for sharp wit
            }
        )
        ai_msg = response['message']['content']
        
        # Add AI response to history
        chat_sessions.append({"role": "assistant", "content": ai_msg})
        
        return jsonify({"response": ai_msg})
    except Exception as e:
        print(f"Ollama Error: {e}")
        return jsonify({"response": "A minor technical glitch, darling. Even my perfection has its limits."}), 500

@app.route('/tts', methods=['POST'])
def tts():
    try:
        data = request.get_json()
        text = data.get('text', '')
        voice = data.get('voice', 'bf_emma')
        
        if not text:
            return "No text provided", 400

        # Determine language based on voice prefix
        lang = "en-gb" if voice.startswith("bf_") else "en-us"
        
        print(f"Synthesizing ({lang}, {voice}): {text[:50]}...")
        # Increased speed to 1.3 for that sharp, staccato drag queen delivery
        samples, sample_rate = kokoro.create(text, voice=voice, speed=1.3, lang=lang)
        
        # Convert samples to 16-bit PCM for WAV
        samples_16 = (samples * 32767).astype(np.int16)
        
        # Save to a memory buffer
        byte_io = io.BytesIO()
        wav.write(byte_io, sample_rate, samples_16)
        byte_io.seek(0)
        
        return send_file(byte_io, mimetype="audio/wav")
    except Exception as e:
        print(f"TTS Error: {e}")
        return str(e), 500

if __name__ == '__main__':
    print("Server starting at http://0.0.0.0:5001")
    # Using threaded=True to handle long synthesis requests without blocking
    app.run(debug=False, host='0.0.0.0', port=5001, threaded=True)