Update app.py
Browse files
app.py
CHANGED
|
@@ -6,168 +6,142 @@ import threading
|
|
| 6 |
import traceback
|
| 7 |
import asyncio
|
| 8 |
from pathlib import Path
|
| 9 |
-
from flask import Flask, request, jsonify,
|
| 10 |
import torch
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
import edge_tts
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
# CONFIG
|
| 16 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
MAX_MEMORY = 20
|
| 18 |
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
|
| 19 |
TTS_VOICE = "zh-CN-XiaoyiNeural"
|
| 20 |
-
TTS_RATE = int(os.environ.get("TTS_RATE", "
|
| 21 |
-
TTS_PITCH = int(os.environ.get("TTS_PITCH", "
|
| 22 |
IMG_DIR = Path(__file__).parent / "img"
|
| 23 |
MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
-
SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
|
| 29 |
-
... (Keep your full original system prompt here) ..."""
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
# MODEL LOADING (BACKGROUND THREAD)
|
| 33 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
tokenizer = None
|
| 35 |
model = None
|
| 36 |
|
| 37 |
def load_model_async():
|
| 38 |
global tokenizer, model
|
| 39 |
try:
|
| 40 |
-
print(f"[
|
| 41 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 42 |
model = AutoModelForCausalLM.from_pretrained(
|
| 43 |
MODEL_ID,
|
| 44 |
-
|
| 45 |
device_map="cpu",
|
| 46 |
trust_remote_code=True,
|
| 47 |
low_cpu_mem_usage=True,
|
| 48 |
)
|
| 49 |
model.eval()
|
| 50 |
-
|
| 51 |
-
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 52 |
-
print(" OK Model loaded successfully!")
|
| 53 |
except Exception as exc:
|
| 54 |
-
print(f"
|
| 55 |
-
traceback.print_exc()
|
| 56 |
|
| 57 |
-
# Start the
|
| 58 |
threading.Thread(target=load_model_async, daemon=True).start()
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
# UTILITIES & MEMORY
|
| 62 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
-
EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
|
| 64 |
sessions = {}
|
| 65 |
sessions_lock = threading.Lock()
|
| 66 |
|
| 67 |
-
def extract_emotions(text: str):
|
| 68 |
-
emotions = EMOTION_RE.findall(text)
|
| 69 |
-
clean = EMOTION_RE.sub('', text).strip()
|
| 70 |
-
return emotions, clean
|
| 71 |
-
|
| 72 |
-
def clean_for_tts(text: str) -> str:
|
| 73 |
-
_, clean = extract_emotions(text)
|
| 74 |
-
clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
|
| 75 |
-
clean = re.sub(r'\s+', ' ', clean).strip()
|
| 76 |
-
return clean
|
| 77 |
-
|
| 78 |
-
def get_memory(sid: str) -> list:
|
| 79 |
-
with sessions_lock:
|
| 80 |
-
return list(sessions.get(sid, []))
|
| 81 |
-
|
| 82 |
-
def add_to_memory(sid: str, role: str, content: str):
|
| 83 |
-
with sessions_lock:
|
| 84 |
-
sessions.setdefault(sid, [])
|
| 85 |
-
sessions[sid].append({"role": role, "content": content})
|
| 86 |
-
if len(sessions[sid]) > MAX_MEMORY * 2:
|
| 87 |
-
sessions[sid] = sessions[sid][-(MAX_MEMORY * 2):]
|
| 88 |
-
|
| 89 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½βββββββββββββββββ
|
| 90 |
-
# RESPONSE GENERATION
|
| 91 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
-
STOP_TOKENS = ["<end_of_turn>", "<start_of_turn>", "Tur:", "User:", "<|endoftext|>", "[/INST]"]
|
| 93 |
-
|
| 94 |
-
def generate_response(user_input: str, session_id: str) -> str:
|
| 95 |
-
if model is None or tokenizer is None:
|
| 96 |
-
return "[sad] My mind is still booting up... give me another minute?"
|
| 97 |
-
|
| 98 |
-
memory = get_memory(session_id)
|
| 99 |
-
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 100 |
-
for msg in memory[-(6 * 2):]:
|
| 101 |
-
messages.append({"role": "user" if msg["role"] == "user" else "assistant", "content": msg["content"]})
|
| 102 |
-
messages.append({"role": "user", "content": user_input})
|
| 103 |
-
|
| 104 |
-
try:
|
| 105 |
-
enc = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True, return_dict=True)
|
| 106 |
-
input_ids = enc["input_ids"].to("cpu")
|
| 107 |
-
|
| 108 |
-
with torch.no_grad():
|
| 109 |
-
outputs = model.generate(
|
| 110 |
-
input_ids,
|
| 111 |
-
max_new_tokens=MAX_NEW_TOKENS,
|
| 112 |
-
do_sample=True,
|
| 113 |
-
temperature=0.85,
|
| 114 |
-
pad_token_id=tokenizer.eos_token_id
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True).strip()
|
| 118 |
-
for stop in STOP_TOKENS: response = response.split(stop)[0].strip()
|
| 119 |
-
|
| 120 |
-
if not EMOTION_RE.search(response): response = "[default] " + response
|
| 121 |
-
add_to_memory(session_id, "user", user_input)
|
| 122 |
-
add_to_memory(session_id, "assistant", response)
|
| 123 |
-
return response
|
| 124 |
-
except Exception as e:
|
| 125 |
-
print(f"Gen Error: {e}")
|
| 126 |
-
return "[sad] I lost my train of thought. Say that again?"
|
| 127 |
-
|
| 128 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 129 |
-
# TTS & ROUTES
|
| 130 |
-
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
-
async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
|
| 132 |
-
rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
|
| 133 |
-
pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
|
| 134 |
-
comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
|
| 135 |
-
audio = b""
|
| 136 |
-
async for chunk in comm.stream():
|
| 137 |
-
if chunk["type"] == "audio": audio += chunk["data"]
|
| 138 |
-
return audio
|
| 139 |
-
|
| 140 |
-
def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
|
| 141 |
-
clean = clean_for_tts(text)
|
| 142 |
-
if not clean: return None
|
| 143 |
-
loop = asyncio.new_event_loop()
|
| 144 |
-
try:
|
| 145 |
-
audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
|
| 146 |
-
finally:
|
| 147 |
-
loop.close()
|
| 148 |
-
return base64.b64encode(audio).decode() if audio else None
|
| 149 |
-
|
| 150 |
app = Flask(__name__)
|
| 151 |
|
| 152 |
@app.route("/")
|
| 153 |
-
def index():
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
@app.route("/chat", methods=["POST"])
|
| 161 |
def chat():
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
@app.route("/tts", methods=["POST"])
|
| 167 |
def tts_endpoint():
|
| 168 |
-
data = request.json
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
if __name__ == "__main__":
|
| 173 |
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 6 |
import traceback
|
| 7 |
import asyncio
|
| 8 |
from pathlib import Path
|
| 9 |
+
from flask import Flask, request, jsonify, send_from_directory, Response
|
| 10 |
import torch
|
| 11 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 12 |
import edge_tts
|
| 13 |
|
| 14 |
+
# --- CONFIG ---
|
|
|
|
|
|
|
| 15 |
MAX_MEMORY = 20
|
| 16 |
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
|
| 17 |
TTS_VOICE = "zh-CN-XiaoyiNeural"
|
| 18 |
+
TTS_RATE = int(os.environ.get("TTS_RATE", "7"))
|
| 19 |
+
TTS_PITCH = int(os.environ.get("TTS_PITCH", "0"))
|
| 20 |
IMG_DIR = Path(__file__).parent / "img"
|
| 21 |
MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
|
| 22 |
|
| 23 |
+
# --- SYSTEM PROMPT ---
|
| 24 |
+
SYSTEM_PROMPT = "You are Ana, a warm, emotionally expressive AI companion speaking to Tur. Every response MUST start with an emotion tag like [happy] or [sad]."
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
# --- MODEL LOADING (BACKGROUND THREAD) ---
|
|
|
|
|
|
|
| 27 |
tokenizer = None
|
| 28 |
model = None
|
| 29 |
|
| 30 |
def load_model_async():
|
| 31 |
global tokenizer, model
|
| 32 |
try:
|
| 33 |
+
print(f"[BOOT] Starting background load for {MODEL_ID}...")
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(
|
| 36 |
MODEL_ID,
|
| 37 |
+
torch_dtype=torch.bfloat16,
|
| 38 |
device_map="cpu",
|
| 39 |
trust_remote_code=True,
|
| 40 |
low_cpu_mem_usage=True,
|
| 41 |
)
|
| 42 |
model.eval()
|
| 43 |
+
print("[BOOT] Model is ONLINE and ready!")
|
|
|
|
|
|
|
| 44 |
except Exception as exc:
|
| 45 |
+
print(f"[BOOT] Critical Error: {exc}")
|
|
|
|
| 46 |
|
| 47 |
+
# Start the thread so Flask can bind to port 7860 immediately
|
| 48 |
threading.Thread(target=load_model_async, daemon=True).start()
|
| 49 |
|
| 50 |
+
# --- APP LOGIC ---
|
|
|
|
|
|
|
|
|
|
| 51 |
sessions = {}
|
| 52 |
sessions_lock = threading.Lock()
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
app = Flask(__name__)
|
| 55 |
|
| 56 |
@app.route("/")
|
| 57 |
+
def index():
|
| 58 |
+
return """
|
| 59 |
+
<!DOCTYPE html>
|
| 60 |
+
<html>
|
| 61 |
+
<head>
|
| 62 |
+
<title>Visual AI</title>
|
| 63 |
+
<style>
|
| 64 |
+
body { background: #0a0a0a; color: #00ffcc; font-family: sans-serif; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; margin: 0; }
|
| 65 |
+
#chat { width: 80%; max-width: 600px; height: 400px; border: 1px solid #333; overflow-y: auto; padding: 20px; background: #111; border-radius: 10px; }
|
| 66 |
+
#input-area { margin-top: 20px; display: flex; width: 80%; max-width: 600px; }
|
| 67 |
+
input { flex: 1; padding: 10px; background: #222; border: 1px solid #444; color: white; border-radius: 5px; }
|
| 68 |
+
button { padding: 10px 20px; background: #00ffcc; border: none; color: black; font-weight: bold; cursor: pointer; border-radius: 5px; margin-left: 10px; }
|
| 69 |
+
</style>
|
| 70 |
+
</head>
|
| 71 |
+
<body>
|
| 72 |
+
<div id="chat">Welcome to Visual AI. Ana is booting up...</div>
|
| 73 |
+
<div id="input-area">
|
| 74 |
+
<input type="text" id="msg" placeholder="Type a message..." onkeypress="if(event.key==='Enter') send()">
|
| 75 |
+
<button onclick="send()">SEND</button>
|
| 76 |
+
</div>
|
| 77 |
+
<script>
|
| 78 |
+
async function send() {
|
| 79 |
+
const input = document.getElementById('msg');
|
| 80 |
+
const chat = document.getElementById('chat');
|
| 81 |
+
const text = input.value;
|
| 82 |
+
if(!text) return;
|
| 83 |
+
input.value = '';
|
| 84 |
+
chat.innerHTML += '<p><b>Tur:</b> ' + text + '</p>';
|
| 85 |
+
|
| 86 |
+
const res = await fetch('/chat', {
|
| 87 |
+
method: 'POST',
|
| 88 |
+
headers: {'Content-Type': 'application/json'},
|
| 89 |
+
body: JSON.stringify({message: text, session_id: 'default'})
|
| 90 |
+
});
|
| 91 |
+
const data = await res.json();
|
| 92 |
+
chat.innerHTML += '<p><b>Ana:</b> ' + data.response + '</p>';
|
| 93 |
+
chat.scrollTop = chat.scrollHeight;
|
| 94 |
+
|
| 95 |
+
const ttsRes = await fetch('/tts', {
|
| 96 |
+
method: 'POST',
|
| 97 |
+
headers: {'Content-Type': 'application/json'},
|
| 98 |
+
body: JSON.stringify({text: data.response})
|
| 99 |
+
});
|
| 100 |
+
const ttsData = await ttsRes.json();
|
| 101 |
+
if(ttsData.audio) {
|
| 102 |
+
const audio = new Audio("data:audio/mp3;base64," + ttsData.audio);
|
| 103 |
+
audio.play();
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
</script>
|
| 107 |
+
</body>
|
| 108 |
+
</html>
|
| 109 |
+
"""
|
| 110 |
|
| 111 |
@app.route("/chat", methods=["POST"])
|
| 112 |
def chat():
|
| 113 |
+
if model is None:
|
| 114 |
+
return jsonify({"response": "[sad] I'm still waking up. Please wait about 2 minutes for the model to finish loading."})
|
| 115 |
+
|
| 116 |
+
data = request.json
|
| 117 |
+
user_input = data.get("message", "")
|
| 118 |
+
|
| 119 |
+
# Simple generation logic
|
| 120 |
+
inputs = tokenizer(f"User: {user_input}\nAssistant:", return_tensors="pt")
|
| 121 |
+
with torch.no_grad():
|
| 122 |
+
outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS)
|
| 123 |
+
|
| 124 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
|
| 125 |
+
return jsonify({"response": response})
|
| 126 |
|
| 127 |
@app.route("/tts", methods=["POST"])
|
| 128 |
def tts_endpoint():
|
| 129 |
+
data = request.json
|
| 130 |
+
text = data.get("text", "")
|
| 131 |
+
|
| 132 |
+
# Clean tags for TTS
|
| 133 |
+
clean_text = re.sub(r'\[.*?\]', '', text).strip()
|
| 134 |
+
|
| 135 |
+
async def get_tts():
|
| 136 |
+
communicate = edge_tts.Communicate(clean_text, TTS_VOICE)
|
| 137 |
+
audio_data = b""
|
| 138 |
+
async for chunk in communicate.stream():
|
| 139 |
+
if chunk["type"] == "audio":
|
| 140 |
+
audio_data += chunk["data"]
|
| 141 |
+
return base64.b64encode(audio_data).decode()
|
| 142 |
+
|
| 143 |
+
audio_b64 = asyncio.run(get_tts())
|
| 144 |
+
return jsonify({"audio": audio_b64})
|
| 145 |
|
| 146 |
if __name__ == "__main__":
|
| 147 |
app.run(host="0.0.0.0", port=7860)
|