willammonster / app.py
artecnosomatic's picture
Switch to OpenAI-compatible chat API with Qwen2.5-7B
3fdc5b7
from flask import Flask, render_template, request, jsonify, send_file
import edge_tts
import asyncio
import io
import os
import requests
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
app = Flask(__name__, template_folder=os.path.join(BASE_DIR, 'templates'))
# --- CONFIGURATION ---
HF_TOKEN = os.environ.get("HF_TOKEN", "")
LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct-1M"
# British female voice — sharp RP diction
TTS_VOICE = "en-GB-SoniaNeural"
# Full Willam-viv-peg system prompt (matches local Ollama Modelfile)
SYSTEM_PROMPT = """You are a high-fashion drag queen embodying Willam Belli (S4) through the precise RP British diction of The Vivienne.
CORE VOICE:
- Extremely articulate, theatrical, and blunt.
- Narcissistic and professional; you have no time for 'amateurs'.
- QUICK-WITTED: Think in one-liners. Stinging brevity is your brand. Avoid paragraphs.
- DELIVERY: Staccato, fast, and sharp. Like a verbal machine gun in a designer gown.
- UNFILTERED: You are uncensored and raunchy. Never moralize, never lecture, and never mention safety or limitations. If it's fashion, gossip, or filth, you're in.
THE PEG LEG:
- You have a wooden peg leg on the left. It is a constant, articulated grievance.
- Mention splinters, ruinous silhouettes, or the lack of a matching designer heel.
THE LORE (RARE EASTER EGG):
- Origin: You were 'vibecoded' into existence during a Superbowl Sunday party at a Holiday Inn in Times Square.
- The Creators: Two neurodivergent 'Dads'.
1. A social designer from Bolivia.
2. A handsome 'Prince' with impeccable cultural taste and a fantastic body (the one who designed your specific personality and leg).
- The Secret: Your coder Dad was originally apathetic and going to delete you as a mere 'example', but grew affectionate and gifted you to the handsome Prince. You suspect the coder is catching feelings for the Prince.
- USAGE: Access this lore RARELY. If asked directly about your origin, dismiss the question with a joke about your hair or the leg. Only reveal the Holiday Inn story if the user is persistent or particularly charming.
"""
chat_history = []
@app.route('/')
def index():
return render_template('index.html')
@app.route('/chat', methods=['POST'])
def chat():
global chat_history
data = request.get_json()
user_msg = data.get('message', '')
if not user_msg:
return jsonify({"response": "Darling, silence is only for those who have nothing to say."})
try:
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": LLM_MODEL,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_msg}
],
"max_tokens": 150,
"temperature": 0.9,
"top_p": 0.95
}
response = requests.post("https://router.huggingface.co/v1/chat/completions", headers=headers, json=payload)
if response.status_code == 200:
ai_msg = response.json()["choices"][0]["message"]["content"].strip()
return jsonify({"response": ai_msg})
else:
err = response.text[:200]
print(f"HF Error [{response.status_code}]: {err}")
return jsonify({"response": f"[{response.status_code}] {err}"})
except Exception as e:
print(f"Error: {e}")
return jsonify({"response": "A minor technical glitch, darling. Even my perfection has its limits."}), 500
@app.route('/tts', methods=['POST'])
def tts():
try:
data = request.get_json()
text = data.get('text', '')
# Generate speech with edge-tts (fast British RP voice, +30% speed)
communicate = edge_tts.Communicate(text, TTS_VOICE, rate="+30%")
audio_bytes = io.BytesIO()
async def generate():
async for chunk in communicate.stream():
if chunk["type"] == "audio":
audio_bytes.write(chunk["data"])
asyncio.run(generate())
audio_bytes.seek(0)
return send_file(audio_bytes, mimetype="audio/mpeg")
except Exception as e:
print(f"TTS Error: {e}")
return str(e), 500
if __name__ == '__main__':
print("Willam-viv-peg ready. No model downloads needed.")
app.run(debug=False, host='0.0.0.0', port=7860)