Update app.py
Browse files
app.py
CHANGED
|
@@ -1,635 +1,233 @@
|
|
| 1 |
import os
|
| 2 |
-
import
|
| 3 |
-
import
|
| 4 |
-
import
|
| 5 |
-
import
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
# CONFIG
|
| 17 |
-
# ══════════════════════════════════════════
|
| 18 |
-
|
| 19 |
-
# TTS Options (switch via env var TTS_MODE):
|
| 20 |
-
# nano-fp32 → 15M params, 56MB (fastest, default)
|
| 21 |
-
# nano-int8 → 15M params, 25MB (smallest)
|
| 22 |
-
# micro → 40M params, 41MB (balanced)
|
| 23 |
-
# mini → 80M params, 80MB (best quality)
|
| 24 |
-
TTS_MODE = os.environ.get("TTS_MODE", "nano-fp32")
|
| 25 |
-
|
| 26 |
-
TTS_MODEL_MAP = {
|
| 27 |
-
"nano-fp32": "KittenML/kitten-tts-nano-0.8-fp32",
|
| 28 |
-
"nano-int8": "KittenML/kitten-tts-nano-0.8-int8",
|
| 29 |
-
"micro": "KittenML/kitten-tts-micro-0.8",
|
| 30 |
-
"mini": "KittenML/kitten-tts-mini-0.8",
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
# Voice: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
|
| 34 |
-
TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
|
| 35 |
-
TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.15"))
|
| 36 |
-
MAX_MEMORY = 20
|
| 37 |
-
MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
|
| 38 |
-
|
| 39 |
-
# ══════════════════════════════════════════
|
| 40 |
-
# SYSTEM PROMPT
|
| 41 |
-
# ══════════════════════════════════════════
|
| 42 |
-
SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
|
| 43 |
-
Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
|
| 44 |
-
TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
|
| 45 |
-
Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response short—no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
|
| 46 |
-
|
| 47 |
-
# ══════════════════════════════════════════
|
| 48 |
-
# TEXT CLEANING FOR TTS
|
| 49 |
-
# ══════════════════════════════════════════
|
| 50 |
-
def clean_text_for_tts(text):
|
| 51 |
-
text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
|
| 52 |
-
text = re.sub(r'https?://\S+', '', text)
|
| 53 |
-
def replace_number(match):
|
| 54 |
-
try:
|
| 55 |
-
return num2words(int(match.group()))
|
| 56 |
-
except Exception:
|
| 57 |
-
return match.group()
|
| 58 |
-
text = re.sub(r'\b\d+\b', replace_number, text)
|
| 59 |
-
text = re.sub(r'\s+', ' ', text).strip()
|
| 60 |
-
return text
|
| 61 |
-
|
| 62 |
-
# ══════════════════════════════════════════
|
| 63 |
-
# LOAD GEMMA 3 270M-IT
|
| 64 |
-
# ══════════════════════════════════════════
|
| 65 |
-
print("=" * 55)
|
| 66 |
-
print(" J.A.R.V.I.S. — Booting Systems")
|
| 67 |
-
print("=" * 55)
|
| 68 |
-
|
| 69 |
-
print("[1/2] Loading Gemma 3 270M-IT...")
|
| 70 |
-
GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
|
| 71 |
-
try:
|
| 72 |
-
tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
|
| 73 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 74 |
-
GEMMA_ID,
|
| 75 |
-
torch_dtype=torch.float32,
|
| 76 |
-
device_map="cpu",
|
| 77 |
-
)
|
| 78 |
-
model.eval()
|
| 79 |
-
print(" ✅ Gemma 3 270M-IT loaded!")
|
| 80 |
-
except Exception as e:
|
| 81 |
-
print(f" ❌ Gemma 3 FAILED: {e}")
|
| 82 |
-
traceback.print_exc()
|
| 83 |
-
raise SystemExit("Cannot start without Gemma. Check HF_TOKEN and license agreement.")
|
| 84 |
-
|
| 85 |
-
# ══════════════════════════════════════════
|
| 86 |
-
# LOAD KITTENTTS
|
| 87 |
-
# ══════════════════════════════════════════
|
| 88 |
-
tts = None
|
| 89 |
-
tts_model_name = TTS_MODEL_MAP.get(TTS_MODE, TTS_MODEL_MAP["nano-fp32"])
|
| 90 |
-
print(f"[2/2] Loading KittenTTS: {TTS_MODE} → {tts_model_name}...")
|
| 91 |
-
try:
|
| 92 |
-
from kittentts import KittenTTS
|
| 93 |
-
tts = KittenTTS(tts_model_name)
|
| 94 |
-
test_audio = tts.generate("online", voice=TTS_VOICE, speed=TTS_SPEED)
|
| 95 |
-
if test_audio is not None and len(test_audio) > 0:
|
| 96 |
-
print(f" ✅ KittenTTS ready. Model: {TTS_MODE} | Voice: {TTS_VOICE}")
|
| 97 |
-
else:
|
| 98 |
-
print(" ⚠️ KittenTTS test returned empty audio!")
|
| 99 |
-
tts = None
|
| 100 |
-
except Exception as e:
|
| 101 |
-
print(f" ⚠️ KittenTTS FAILED: {e}")
|
| 102 |
-
tts = None
|
| 103 |
-
|
| 104 |
-
print("=" * 55)
|
| 105 |
-
print(f" LLM : Gemma 3 270M-IT")
|
| 106 |
-
print(f" TTS : {TTS_MODE} ({'READY' if tts else 'DISABLED'})")
|
| 107 |
-
print(f" Voice: {TTS_VOICE} | Speed: {TTS_SPEED}")
|
| 108 |
-
print(f" Max tokens: {MAX_NEW_TOKENS}")
|
| 109 |
-
print("=" * 55)
|
| 110 |
-
|
| 111 |
-
# ══════════════════════════════════════════
|
| 112 |
-
# CHAT MEMORY
|
| 113 |
-
# ══════════════════════════════════════════
|
| 114 |
-
sessions = {}
|
| 115 |
-
|
| 116 |
-
def get_memory(sid):
|
| 117 |
-
if sid not in sessions:
|
| 118 |
-
sessions[sid] = []
|
| 119 |
-
return sessions[sid]
|
| 120 |
-
|
| 121 |
-
def add_to_memory(sid, role, content):
|
| 122 |
-
mem = get_memory(sid)
|
| 123 |
-
mem.append({
|
| 124 |
-
"role": role,
|
| 125 |
-
"content": content,
|
| 126 |
-
"ts": datetime.datetime.now().isoformat(),
|
| 127 |
-
})
|
| 128 |
-
if len(mem) > MAX_MEMORY * 2:
|
| 129 |
-
sessions[sid] = mem[-(MAX_MEMORY * 2):]
|
| 130 |
-
|
| 131 |
-
# ══════════════════════════════════════════
|
| 132 |
-
# GEMMA RESPONSE GENERATION
|
| 133 |
-
# ══════════════════════════════════════════
|
| 134 |
-
def generate_response(user_input, session_id):
|
| 135 |
-
memory = get_memory(session_id)
|
| 136 |
-
|
| 137 |
-
# Build chat messages: system instruction → memory → new message
|
| 138 |
-
messages = [
|
| 139 |
-
{"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
|
| 140 |
-
{"role": "assistant", "content": "I am waiting for you!"},
|
| 141 |
-
]
|
| 142 |
-
|
| 143 |
-
# Add recent memory (last 6 turns = 12 messages)
|
| 144 |
-
recent = memory[-(6 * 2):]
|
| 145 |
-
for msg in recent:
|
| 146 |
-
role = "user" if msg["role"] == "user" else "assistant"
|
| 147 |
-
messages.append({"role": role, "content": msg["content"]})
|
| 148 |
-
|
| 149 |
-
# Current user message
|
| 150 |
-
messages.append({"role": "user", "content": user_input})
|
| 151 |
-
|
| 152 |
-
# Tokenize with Gemma chat template
|
| 153 |
-
input_ids = tokenizer.apply_chat_template(
|
| 154 |
-
messages,
|
| 155 |
-
return_tensors="pt",
|
| 156 |
-
add_generation_prompt=True,
|
| 157 |
-
)
|
| 158 |
-
|
| 159 |
-
# Generate
|
| 160 |
-
with torch.no_grad():
|
| 161 |
-
outputs = model.generate(
|
| 162 |
-
input_ids,
|
| 163 |
-
max_new_tokens=MAX_NEW_TOKENS,
|
| 164 |
-
do_sample=True,
|
| 165 |
-
temperature=0.9,
|
| 166 |
-
top_k=45,
|
| 167 |
-
top_p=0.97,
|
| 168 |
-
)
|
| 169 |
-
|
| 170 |
-
# Decode only new tokens
|
| 171 |
-
new_tokens = outputs[0][input_ids.shape[-1]:]
|
| 172 |
-
response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
| 173 |
-
|
| 174 |
-
# Clean artifacts
|
| 175 |
-
response = response.split("<end_of_turn>")[0].strip()
|
| 176 |
-
response = response.split("<start_of_turn>")[0].strip()
|
| 177 |
-
|
| 178 |
-
if not response or len(response) < 2:
|
| 179 |
-
response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
|
| 180 |
-
|
| 181 |
-
add_to_memory(session_id, "user", user_input)
|
| 182 |
-
add_to_memory(session_id, "assistant", response)
|
| 183 |
-
return response
|
| 184 |
-
|
| 185 |
-
# ══════════════════════════════════════════
|
| 186 |
-
# TTS SYNTHESIS
|
| 187 |
-
# ══════════════════════════════════════════
|
| 188 |
-
def synthesize_speech(text, voice=None):
|
| 189 |
-
if tts is None:
|
| 190 |
-
return None
|
| 191 |
-
try:
|
| 192 |
-
voice = voice or TTS_VOICE
|
| 193 |
-
clean = clean_text_for_tts(text)
|
| 194 |
-
if not clean or len(clean) < 2:
|
| 195 |
-
return None
|
| 196 |
-
if len(clean) > 400:
|
| 197 |
-
clean = clean[:400]
|
| 198 |
-
audio = tts.generate(clean, voice=voice, speed=TTS_SPEED)
|
| 199 |
-
if audio is None or len(audio) == 0:
|
| 200 |
-
return None
|
| 201 |
-
buf = io.BytesIO()
|
| 202 |
-
sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
|
| 203 |
-
buf.seek(0)
|
| 204 |
-
return base64.b64encode(buf.read()).decode('utf-8')
|
| 205 |
-
except Exception as e:
|
| 206 |
-
print(f"TTS Error: {e}")
|
| 207 |
-
return None
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
# ══════════════════════════════════════════
|
| 212 |
-
HTML_PAGE = """<!DOCTYPE html>
|
| 213 |
<html lang="en">
|
| 214 |
<head>
|
| 215 |
<meta charset="UTF-8">
|
| 216 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 217 |
-
<title>
|
| 218 |
<style>
|
| 219 |
-
*{margin:0;padding:0;
|
| 220 |
-
body{
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
}
|
| 225 |
-
.
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
}
|
| 231 |
-
.
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
}
|
| 238 |
-
.
|
| 239 |
-
|
| 240 |
-
transform:translate(-50%,-50%);width:12px;height:12px;
|
| 241 |
-
border-radius:50%;background:#00d4ff;box-shadow:0 0 8px #00d4ff;
|
| 242 |
-
}
|
| 243 |
-
@keyframes pulse{
|
| 244 |
-
0%,100%{box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44}
|
| 245 |
-
50%{box-shadow:0 0 30px #00d4ffaa,0 0 60px #00d4ff66}
|
| 246 |
-
}
|
| 247 |
-
.htitle h1{font-size:1.2rem;color:#00d4ff;letter-spacing:3px;text-transform:uppercase}
|
| 248 |
-
.htitle p{font-size:.65rem;color:#5a8a9a;letter-spacing:1px}
|
| 249 |
-
.hctrl{display:flex;gap:8px;align-items:center;flex-wrap:wrap}
|
| 250 |
-
.cbtn{
|
| 251 |
-
background:#0d1b2a;border:1px solid #00d4ff44;color:#00d4ff;
|
| 252 |
-
padding:5px 12px;border-radius:6px;cursor:pointer;
|
| 253 |
-
font-size:.7rem;transition:all .3s;letter-spacing:.5px;
|
| 254 |
-
}
|
| 255 |
-
.cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
|
| 256 |
-
.cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
|
| 257 |
-
.sdot{width:8px;height:8px;border-radius:50%;background:#00ff88;box-shadow:0 0 6px #00ff88}
|
| 258 |
-
.sdot.err{background:#ff4444;box-shadow:0 0 6px #ff4444}
|
| 259 |
-
|
| 260 |
-
.cfgbar{
|
| 261 |
-
background:#0d1117;border-bottom:1px solid #00d4ff15;
|
| 262 |
-
padding:8px 20px;display:none;flex-wrap:wrap;gap:12px;
|
| 263 |
-
align-items:center;flex-shrink:0;
|
| 264 |
-
}
|
| 265 |
-
.cfgbar.open{display:flex}
|
| 266 |
-
.cgrp{display:flex;align-items:center;gap:6px}
|
| 267 |
-
.cgrp label{font-size:.65rem;color:#5a8a9a;text-transform:uppercase;letter-spacing:1px}
|
| 268 |
-
.cgrp select{
|
| 269 |
-
background:#0f1923;border:1px solid #00d4ff33;color:#00d4ff;
|
| 270 |
-
padding:4px 8px;border-radius:4px;font-size:.7rem;cursor:pointer;outline:none;
|
| 271 |
-
}
|
| 272 |
-
.cgrp select:focus{border-color:#00d4ff}
|
| 273 |
-
.ctag{
|
| 274 |
-
font-size:.6rem;padding:3px 8px;border-radius:10px;
|
| 275 |
-
background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ffaa;
|
| 276 |
-
}
|
| 277 |
-
|
| 278 |
-
.chat{
|
| 279 |
-
flex:1;overflow-y:auto;padding:16px 20px;
|
| 280 |
-
display:flex;flex-direction:column;gap:14px;scroll-behavior:smooth;
|
| 281 |
-
}
|
| 282 |
-
.chat::-webkit-scrollbar{width:3px}
|
| 283 |
-
.chat::-webkit-scrollbar-thumb{background:#00d4ff33;border-radius:2px}
|
| 284 |
-
|
| 285 |
-
.msg{
|
| 286 |
-
max-width:80%;padding:12px 16px;border-radius:14px;
|
| 287 |
-
font-size:.9rem;line-height:1.6;animation:fadeIn .3s ease-out;
|
| 288 |
-
}
|
| 289 |
-
@keyframes fadeIn{
|
| 290 |
-
from{opacity:0;transform:translateY(8px)}
|
| 291 |
-
to{opacity:1;transform:translateY(0)}
|
| 292 |
-
}
|
| 293 |
-
.msg.user{
|
| 294 |
-
align-self:flex-end;background:linear-gradient(135deg,#1a3a5c,#0d2847);
|
| 295 |
-
border:1px solid #00d4ff33;color:#c8e6ff;border-bottom-right-radius:4px;
|
| 296 |
-
}
|
| 297 |
-
.msg.bot{
|
| 298 |
-
align-self:flex-start;background:linear-gradient(135deg,#141e30,#0f1923);
|
| 299 |
-
border:1px solid #00d4ff22;color:#e0e0e0;border-bottom-left-radius:4px;
|
| 300 |
-
}
|
| 301 |
-
.msg .lbl{font-size:.58rem;color:#00d4ff88;letter-spacing:2px;margin-bottom:5px;text-transform:uppercase}
|
| 302 |
-
.msg .txt{white-space:pre-wrap;word-wrap:break-word}
|
| 303 |
-
.msg .actrl{margin-top:8px;display:flex;align-items:center;gap:8px}
|
| 304 |
-
.abtn{
|
| 305 |
-
display:inline-flex;align-items:center;gap:4px;
|
| 306 |
-
background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ff;
|
| 307 |
-
padding:3px 10px;border-radius:10px;cursor:pointer;
|
| 308 |
-
font-size:.65rem;transition:all .2s;
|
| 309 |
-
}
|
| 310 |
-
.abtn:hover{background:#00d4ff25;border-color:#00d4ff66}
|
| 311 |
-
.abtn:disabled{opacity:.3;cursor:wait}
|
| 312 |
-
.astat{font-size:.58rem;color:#5a8a9a}
|
| 313 |
-
|
| 314 |
-
.typi{align-self:flex-start;display:flex;gap:5px;padding:14px 18px}
|
| 315 |
-
.typi span{width:7px;height:7px;border-radius:50%;background:#00d4ff;animation:typ 1.4s infinite}
|
| 316 |
-
.typi span:nth-child(2){animation-delay:.2s}
|
| 317 |
-
.typi span:nth-child(3){animation-delay:.4s}
|
| 318 |
-
@keyframes typ{
|
| 319 |
-
0%,60%,100%{opacity:.2;transform:scale(.8)}
|
| 320 |
-
30%{opacity:1;transform:scale(1.1)}
|
| 321 |
-
}
|
| 322 |
-
|
| 323 |
-
.welcome{
|
| 324 |
-
display:flex;flex-direction:column;align-items:center;
|
| 325 |
-
justify-content:center;flex:1;gap:10px;opacity:.5;
|
| 326 |
-
}
|
| 327 |
-
.welcome .breact{
|
| 328 |
-
width:70px;height:70px;border-radius:50%;
|
| 329 |
-
background:radial-gradient(circle,#00d4ff 0%,#0088aa 35%,#004466 65%,transparent 100%);
|
| 330 |
-
box-shadow:0 0 40px #00d4ff66;animation:pulse 2s ease-in-out infinite;
|
| 331 |
-
}
|
| 332 |
-
.welcome h2{color:#00d4ff;font-size:1rem;letter-spacing:4px}
|
| 333 |
-
.welcome p{color:#5a8a9a;font-size:.75rem}
|
| 334 |
-
.welcome .minfo{font-size:.65rem;color:#3a5a6a;margin-top:4px}
|
| 335 |
-
|
| 336 |
-
.inbar{
|
| 337 |
-
padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
|
| 338 |
-
border-top:1px solid #00d4ff22;flex-shrink:0;
|
| 339 |
-
}
|
| 340 |
-
.inwrap{display:flex;gap:8px;max-width:900px;margin:0 auto}
|
| 341 |
-
#msgIn{
|
| 342 |
-
flex:1;background:#0f1923;border:1px solid #00d4ff33;border-radius:12px;
|
| 343 |
-
padding:11px 16px;color:#e0e0e0;font-size:.9rem;outline:none;
|
| 344 |
-
transition:border-color .3s;font-family:inherit;
|
| 345 |
-
}
|
| 346 |
-
#msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
|
| 347 |
-
#msgIn::placeholder{color:#3a5a6a}
|
| 348 |
-
#sendBtn{
|
| 349 |
-
background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
|
| 350 |
-
padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
|
| 351 |
-
font-size:.8rem;letter-spacing:1px;transition:all .3s;text-transform:uppercase;
|
| 352 |
-
}
|
| 353 |
-
#sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
|
| 354 |
-
#sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
|
| 355 |
-
.infoot{
|
| 356 |
-
display:flex;justify-content:space-between;margin-top:5px;
|
| 357 |
-
max-width:900px;margin-left:auto;margin-right:auto;
|
| 358 |
-
}
|
| 359 |
-
.infoot span{font-size:.6rem;color:#3a5a6a}
|
| 360 |
-
|
| 361 |
-
@media(max-width:640px){
|
| 362 |
-
.header{padding:10px 12px}
|
| 363 |
-
.htitle h1{font-size:1rem}
|
| 364 |
-
.msg{max-width:92%;font-size:.82rem}
|
| 365 |
-
.chat{padding:10px}
|
| 366 |
-
.inbar{padding:10px}
|
| 367 |
-
.cfgbar{padding:6px 12px}
|
| 368 |
-
}
|
| 369 |
</style>
|
| 370 |
</head>
|
| 371 |
<body>
|
| 372 |
-
|
| 373 |
-
<
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
</div>
|
| 387 |
-
</div>
|
| 388 |
-
|
| 389 |
-
<div class="cfgbar" id="cfgPanel">
|
| 390 |
-
<div class="cgrp">
|
| 391 |
-
<label>LLM:</label>
|
| 392 |
-
<span class="ctag">Gemma 3 270M-IT</span>
|
| 393 |
-
</div>
|
| 394 |
-
<div class="cgrp">
|
| 395 |
-
<label>TTS:</label>
|
| 396 |
-
<span class="ctag" id="ttsTag">loading...</span>
|
| 397 |
-
</div>
|
| 398 |
-
<div class="cgrp">
|
| 399 |
-
<label>Voice:</label>
|
| 400 |
-
<select id="voiceSel">
|
| 401 |
-
<option value="Kiki">Kiki</option>
|
| 402 |
-
<option value="Bella">Bella</option>
|
| 403 |
-
<option value="Jasper">Jasper</option>
|
| 404 |
-
<option value="Luna">Luna</option>
|
| 405 |
-
<option value="Bruno">Bruno</option>
|
| 406 |
-
<option value="Rosie">Rosie</option>
|
| 407 |
-
<option value="Hugo">Hugo</option>
|
| 408 |
-
<option value="Leo">Leo</option>
|
| 409 |
-
</select>
|
| 410 |
-
</div>
|
| 411 |
-
<div class="cgrp">
|
| 412 |
-
<label>TTS env options:</label>
|
| 413 |
-
<span class="ctag">nano-fp32</span>
|
| 414 |
-
<span class="ctag">nano-int8</span>
|
| 415 |
-
<span class="ctag">micro</span>
|
| 416 |
-
<span class="ctag">mini</span>
|
| 417 |
-
</div>
|
| 418 |
-
</div>
|
| 419 |
-
|
| 420 |
-
<div class="chat" id="chatBox">
|
| 421 |
-
<div class="welcome" id="welc">
|
| 422 |
-
<div class="breact"></div>
|
| 423 |
-
<h2>SYSTEMS ONLINE</h2>
|
| 424 |
-
<p>Type a message below to begin interaction</p>
|
| 425 |
-
<div class="minfo" id="wInfo">Initializing...</div>
|
| 426 |
-
</div>
|
| 427 |
</div>
|
| 428 |
-
|
| 429 |
-
<div class="inbar">
|
| 430 |
-
<div class="inwrap">
|
| 431 |
-
<input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
|
| 432 |
-
<button id="sendBtn" onclick="send()">SEND</button>
|
| 433 |
-
</div>
|
| 434 |
-
<div class="infoot">
|
| 435 |
-
<span id="memCt">Memory: 0 turns</span>
|
| 436 |
-
<span id="modInfo">Loading...</span>
|
| 437 |
-
</div>
|
| 438 |
-
</div>
|
| 439 |
-
|
| 440 |
<script>
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
const
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
document.getElementById('cfgPanel').classList.toggle('open');
|
| 456 |
-
document.getElementById('cfgBtn').classList.toggle('active');
|
| 457 |
-
}
|
| 458 |
-
|
| 459 |
-
async function send(){
|
| 460 |
-
const t=I.value.trim();
|
| 461 |
-
if(!t||busy)return;
|
| 462 |
-
const w=document.getElementById('welc');
|
| 463 |
-
if(w)w.style.display='none';
|
| 464 |
-
addMsg(t,'user');
|
| 465 |
-
I.value='';busy=true;B.disabled=true;
|
| 466 |
-
const ty=showTyp();
|
| 467 |
-
const id=++mc;
|
| 468 |
-
try{
|
| 469 |
-
const r=await fetch('/chat',{
|
| 470 |
-
method:'POST',headers:{'Content-Type':'application/json'},
|
| 471 |
-
body:JSON.stringify({message:t,session_id:sid})
|
| 472 |
-
});
|
| 473 |
-
if(!r.ok)throw new Error('HTTP '+r.status);
|
| 474 |
-
const d=await r.json();
|
| 475 |
-
ty.remove();
|
| 476 |
-
const el=addBot(d.response,id);
|
| 477 |
-
document.getElementById('memCt').textContent='Memory: '+d.memory_length+' turns';
|
| 478 |
-
if(ttsOn&&d.tts_available)fetchAudio(d.response,el);
|
| 479 |
-
}catch(e){
|
| 480 |
-
ty.remove();
|
| 481 |
-
addBot('System malfunction. Please try again.',id);
|
| 482 |
-
console.error(e);
|
| 483 |
}
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
}
|
| 505 |
-
|
| 506 |
-
if(st)st.textContent='⚠️ Voice error';
|
| 507 |
-
if(pb)pb.style.display='none';
|
| 508 |
-
}
|
| 509 |
-
}
|
| 510 |
-
|
| 511 |
-
function addMsg(t,role){
|
| 512 |
-
const d=document.createElement('div');
|
| 513 |
-
d.className='msg '+role;
|
| 514 |
-
d.innerHTML='<div class="txt">'+esc(t)+'</div>';
|
| 515 |
-
C.appendChild(d);sc();
|
| 516 |
-
}
|
| 517 |
-
function addBot(t,id){
|
| 518 |
-
const d=document.createElement('div');
|
| 519 |
-
d.className='msg bot';d.id='m'+id;
|
| 520 |
-
d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
|
| 521 |
-
(ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
|
| 522 |
-
C.appendChild(d);sc();return d;
|
| 523 |
-
}
|
| 524 |
-
function showTyp(){
|
| 525 |
-
const d=document.createElement('div');
|
| 526 |
-
d.className='typi';
|
| 527 |
-
d.innerHTML='<span></span><span></span><span></span>';
|
| 528 |
-
C.appendChild(d);sc();return d;
|
| 529 |
-
}
|
| 530 |
-
|
| 531 |
-
function playB64(b){
|
| 532 |
-
try{
|
| 533 |
-
const bin=atob(b),u8=new Uint8Array(bin.length);
|
| 534 |
-
for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
|
| 535 |
-
const url=URL.createObjectURL(new Blob([u8],{type:'audio/wav'}));
|
| 536 |
-
const a=new Audio(url);
|
| 537 |
-
a.play().catch(e=>console.log('Autoplay blocked:',e));
|
| 538 |
-
a.onended=()=>URL.revokeObjectURL(url);
|
| 539 |
-
}catch(e){console.error(e)}
|
| 540 |
-
}
|
| 541 |
-
function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}
|
| 542 |
-
|
| 543 |
-
async function clearChat(){
|
| 544 |
-
await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
|
| 545 |
-
C.innerHTML='<div class="welcome" id="welc"><div class="breact"></div><h2>SYSTEMS ONLINE</h2><p>Type a message below to begin</p></div>';
|
| 546 |
-
document.getElementById('memCt').textContent='Memory: 0 turns';
|
| 547 |
-
sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
|
| 548 |
-
}
|
| 549 |
|
| 550 |
-
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
document.getElementById('modInfo').textContent='Gemma 3 · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
|
| 556 |
-
const wi=document.getElementById('wInfo');
|
| 557 |
-
if(wi)wi.textContent='LLM: Gemma 3 270M-IT | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
|
| 558 |
-
if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
|
| 559 |
-
if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
|
| 560 |
-
}).catch(()=>{});
|
| 561 |
|
| 562 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
</script>
|
| 564 |
</body>
|
| 565 |
-
</html>
|
|
|
|
| 566 |
|
| 567 |
-
# ══════════════════════════════════════════
|
| 568 |
-
# FLASK APP
|
| 569 |
-
# ══════════════════════════════════════════
|
| 570 |
-
app = Flask(__name__)
|
| 571 |
|
| 572 |
@app.route("/")
|
| 573 |
def index():
|
| 574 |
return HTML_PAGE
|
| 575 |
|
|
|
|
| 576 |
@app.route("/chat", methods=["POST"])
|
| 577 |
def chat():
|
| 578 |
-
data = request.json
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
"memory_length": len(get_memory(session_id)),
|
| 597 |
-
})
|
| 598 |
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
|
| 605 |
-
|
| 606 |
-
return jsonify({"error": "Empty text"}), 400
|
| 607 |
-
if tts is None:
|
| 608 |
-
return jsonify({"error": "TTS not available", "audio": None}), 200
|
| 609 |
|
| 610 |
-
audio_b64 = synthesize_speech(text, voice=voice)
|
| 611 |
-
return jsonify({"audio": audio_b64})
|
| 612 |
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
sid = data.get("session_id", "")
|
| 617 |
-
if sid in sessions:
|
| 618 |
-
del sessions[sid]
|
| 619 |
-
return jsonify({"status": "cleared"})
|
| 620 |
|
| 621 |
-
@app.route("/health")
|
| 622 |
-
def health():
|
| 623 |
-
return jsonify({
|
| 624 |
-
"status": "online",
|
| 625 |
-
"llm": "Gemma 3 270M-IT",
|
| 626 |
-
"tts_mode": TTS_MODE,
|
| 627 |
-
"tts_model": tts_model_name if tts else "DISABLED",
|
| 628 |
-
"tts_voice": TTS_VOICE,
|
| 629 |
-
"tts_voices": ["Bella","Jasper","Luna","Bruno","Rosie","Hugo","Kiki","Leo"],
|
| 630 |
-
"max_new_tokens": MAX_NEW_TOKENS,
|
| 631 |
-
})
|
| 632 |
|
| 633 |
if __name__ == "__main__":
|
| 634 |
-
|
| 635 |
-
app.run(host="0.0.0.0", port=
|
|
|
|
| 1 |
import os
|
| 2 |
+
import time
|
| 3 |
+
import threading
|
| 4 |
+
from flask import Flask, request, jsonify, Response, stream_with_context
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
# --- Download model at startup ---
|
| 8 |
+
MODEL_DIR = "/tmp/models"
|
| 9 |
+
REPO = "mradermacher/LFM2-2.6B-Uncensored-X64-GGUF"
|
| 10 |
+
FILENAME = "LFM2-2.6B-Uncensored-X64.Q3_K_S.gguf"
|
| 11 |
+
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
|
| 12 |
+
|
| 13 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 14 |
+
|
| 15 |
+
if not os.path.exists(MODEL_PATH):
|
| 16 |
+
print(f"Downloading {FILENAME} ...")
|
| 17 |
+
hf_hub_download(repo_id=REPO, filename=FILENAME, local_dir=MODEL_DIR)
|
| 18 |
+
print("Download complete.")
|
| 19 |
+
|
| 20 |
+
from llama_cpp import Llama
|
| 21 |
+
|
| 22 |
+
print("Loading model ...")
|
| 23 |
+
llm = Llama(
|
| 24 |
+
model_path=MODEL_PATH,
|
| 25 |
+
n_ctx=2048,
|
| 26 |
+
n_threads=os.cpu_count() or 4,
|
| 27 |
+
verbose=False,
|
| 28 |
+
)
|
| 29 |
+
print("Model loaded.")
|
| 30 |
|
| 31 |
+
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
HTML_PAGE = """
|
| 34 |
+
<!DOCTYPE html>
|
|
|
|
|
|
|
| 35 |
<html lang="en">
|
| 36 |
<head>
|
| 37 |
<meta charset="UTF-8">
|
| 38 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 39 |
+
<title>LFM2-2.6B Chat</title>
|
| 40 |
<style>
|
| 41 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 42 |
+
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #1a1a2e; color: #eee; display: flex; justify-content: center; align-items: center; min-height: 100vh; }
|
| 43 |
+
.container { width: 100%; max-width: 800px; padding: 20px; }
|
| 44 |
+
h1 { text-align: center; margin-bottom: 6px; color: #e94560; font-size: 1.5rem; }
|
| 45 |
+
.subtitle { text-align: center; margin-bottom: 20px; color: #888; font-size: 0.85rem; }
|
| 46 |
+
.chatbox { background: #16213e; border-radius: 12px; padding: 20px; height: 55vh; overflow-y: auto; margin-bottom: 15px; border: 1px solid #0f3460; }
|
| 47 |
+
.msg { margin-bottom: 14px; line-height: 1.6; }
|
| 48 |
+
.msg.user { color: #e94560; }
|
| 49 |
+
.msg.user::before { content: "You: "; font-weight: bold; }
|
| 50 |
+
.msg.bot { color: #a8d8ea; }
|
| 51 |
+
.msg.bot::before { content: "AI: "; font-weight: bold; }
|
| 52 |
+
.stats { color: #666; font-size: 0.78rem; margin-top: 4px; }
|
| 53 |
+
.input-row { display: flex; gap: 10px; }
|
| 54 |
+
textarea { flex: 1; padding: 12px; border-radius: 8px; border: 1px solid #0f3460; background: #16213e; color: #eee; font-size: 1rem; resize: none; height: 60px; font-family: inherit; }
|
| 55 |
+
textarea:focus { outline: none; border-color: #e94560; }
|
| 56 |
+
button { padding: 12px 28px; border-radius: 8px; border: none; background: #e94560; color: #fff; font-size: 1rem; cursor: pointer; font-weight: bold; }
|
| 57 |
+
button:hover { background: #c73650; }
|
| 58 |
+
button:disabled { background: #555; cursor: not-allowed; }
|
| 59 |
+
.settings { display: flex; gap: 15px; margin-bottom: 15px; flex-wrap: wrap; align-items: center; }
|
| 60 |
+
.settings label { font-size: 0.85rem; color: #aaa; }
|
| 61 |
+
.settings input, .settings select { background: #16213e; border: 1px solid #0f3460; color: #eee; padding: 5px 8px; border-radius: 6px; width: 80px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
</style>
|
| 63 |
</head>
|
| 64 |
<body>
|
| 65 |
+
<div class="container">
|
| 66 |
+
<h1>LFM2-2.6B Uncensored</h1>
|
| 67 |
+
<p class="subtitle">Running locally on CPU with llama.cpp</p>
|
| 68 |
+
<div class="settings">
|
| 69 |
+
<label>Max tokens: <input type="number" id="maxTokens" value="256" min="16" max="2048"></label>
|
| 70 |
+
<label>Temperature: <input type="number" id="temperature" value="0.7" min="0" max="2" step="0.1"></label>
|
| 71 |
+
<label>Top-P: <input type="number" id="topP" value="0.9" min="0" max="1" step="0.05"></label>
|
| 72 |
+
<button onclick="clearChat()" style="padding:5px 14px;font-size:0.85rem;">Clear</button>
|
| 73 |
+
</div>
|
| 74 |
+
<div class="chatbox" id="chatbox"></div>
|
| 75 |
+
<div class="input-row">
|
| 76 |
+
<textarea id="userInput" placeholder="Type your message..." onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();sendMsg();}"></textarea>
|
| 77 |
+
<button id="sendBtn" onclick="sendMsg()">Send</button>
|
| 78 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
<script>
|
| 81 |
+
const chatbox = document.getElementById('chatbox');
|
| 82 |
+
const userInput = document.getElementById('userInput');
|
| 83 |
+
const sendBtn = document.getElementById('sendBtn');
|
| 84 |
+
let history = [];
|
| 85 |
+
|
| 86 |
+
function addMsg(role, text, stats) {
|
| 87 |
+
const div = document.createElement('div');
|
| 88 |
+
div.className = 'msg ' + role;
|
| 89 |
+
div.textContent = text;
|
| 90 |
+
if (stats) {
|
| 91 |
+
const s = document.createElement('div');
|
| 92 |
+
s.className = 'stats';
|
| 93 |
+
s.textContent = stats;
|
| 94 |
+
div.appendChild(s);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
}
|
| 96 |
+
chatbox.appendChild(div);
|
| 97 |
+
chatbox.scrollTop = chatbox.scrollHeight;
|
| 98 |
+
return div;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
function clearChat() {
|
| 102 |
+
history = [];
|
| 103 |
+
chatbox.innerHTML = '';
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
async function sendMsg() {
|
| 107 |
+
const text = userInput.value.trim();
|
| 108 |
+
if (!text) return;
|
| 109 |
+
userInput.value = '';
|
| 110 |
+
addMsg('user', text);
|
| 111 |
+
history.push({role: 'user', content: text});
|
| 112 |
+
sendBtn.disabled = true;
|
| 113 |
+
|
| 114 |
+
const botDiv = addMsg('bot', '');
|
| 115 |
+
botDiv.textContent = '';
|
| 116 |
+
|
| 117 |
+
try {
|
| 118 |
+
const resp = await fetch('/chat', {
|
| 119 |
+
method: 'POST',
|
| 120 |
+
headers: {'Content-Type': 'application/json'},
|
| 121 |
+
body: JSON.stringify({
|
| 122 |
+
messages: history,
|
| 123 |
+
max_tokens: parseInt(document.getElementById('maxTokens').value) || 256,
|
| 124 |
+
temperature: parseFloat(document.getElementById('temperature').value) || 0.7,
|
| 125 |
+
top_p: parseFloat(document.getElementById('topP').value) || 0.9
|
| 126 |
+
})
|
| 127 |
+
});
|
| 128 |
+
const reader = resp.body.getReader();
|
| 129 |
+
const decoder = new TextDecoder();
|
| 130 |
+
let full = '';
|
| 131 |
+
let statsText = '';
|
| 132 |
+
|
| 133 |
+
while (true) {
|
| 134 |
+
const {done, value} = await reader.read();
|
| 135 |
+
if (done) break;
|
| 136 |
+
const chunk = decoder.decode(value, {stream: true});
|
| 137 |
+
const lines = chunk.split('\\n');
|
| 138 |
+
for (const line of lines) {
|
| 139 |
+
if (!line.startsWith('data: ')) continue;
|
| 140 |
+
const data = line.slice(6);
|
| 141 |
+
if (data === '[DONE]') continue;
|
| 142 |
+
try {
|
| 143 |
+
const j = JSON.parse(data);
|
| 144 |
+
if (j.token) { full += j.token; botDiv.childNodes[0].textContent = full; }
|
| 145 |
+
if (j.stats) { statsText = j.stats; }
|
| 146 |
+
} catch(e) {}
|
| 147 |
}
|
| 148 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
if (statsText) {
|
| 151 |
+
const s = document.createElement('div');
|
| 152 |
+
s.className = 'stats';
|
| 153 |
+
s.textContent = statsText;
|
| 154 |
+
botDiv.appendChild(s);
|
| 155 |
+
}
|
| 156 |
|
| 157 |
+
// Ensure there's a text node
|
| 158 |
+
if (!botDiv.childNodes.length) botDiv.textContent = full;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
+
history.push({role: 'assistant', content: full});
|
| 161 |
+
} catch(e) {
|
| 162 |
+
botDiv.textContent = 'Error: ' + e.message;
|
| 163 |
+
}
|
| 164 |
+
sendBtn.disabled = false;
|
| 165 |
+
chatbox.scrollTop = chatbox.scrollHeight;
|
| 166 |
+
}
|
| 167 |
</script>
|
| 168 |
</body>
|
| 169 |
+
</html>
|
| 170 |
+
"""
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
@app.route("/")
|
| 174 |
def index():
|
| 175 |
return HTML_PAGE
|
| 176 |
|
| 177 |
+
|
| 178 |
@app.route("/chat", methods=["POST"])
|
| 179 |
def chat():
|
| 180 |
+
data = request.json
|
| 181 |
+
messages = data.get("messages", [])
|
| 182 |
+
max_tokens = min(data.get("max_tokens", 256), 2048)
|
| 183 |
+
temperature = data.get("temperature", 0.7)
|
| 184 |
+
top_p = data.get("top_p", 0.9)
|
| 185 |
+
|
| 186 |
+
# Build prompt — simple ChatML-ish format
|
| 187 |
+
prompt = ""
|
| 188 |
+
for msg in messages:
|
| 189 |
+
role = msg["role"]
|
| 190 |
+
content = msg["content"]
|
| 191 |
+
if role == "user":
|
| 192 |
+
prompt += f"<|user|>\n{content}\n"
|
| 193 |
+
elif role == "assistant":
|
| 194 |
+
prompt += f"<|assistant|>\n{content}\n"
|
| 195 |
+
elif role == "system":
|
| 196 |
+
prompt += f"<|system|>\n{content}\n"
|
| 197 |
+
prompt += "<|assistant|>\n"
|
| 198 |
+
|
| 199 |
+
def generate():
|
| 200 |
+
token_count = 0
|
| 201 |
+
start = time.perf_counter()
|
| 202 |
+
|
| 203 |
+
stream = llm(
|
| 204 |
+
prompt,
|
| 205 |
+
max_tokens=max_tokens,
|
| 206 |
+
temperature=temperature,
|
| 207 |
+
top_p=top_p,
|
| 208 |
+
stop=["<|user|>", "<|assistant|>", "<|end|>", "<|endoftext|>"],
|
| 209 |
+
stream=True,
|
| 210 |
+
)
|
| 211 |
|
| 212 |
+
for output in stream:
|
| 213 |
+
token_text = output["choices"][0]["text"]
|
| 214 |
+
token_count += 1
|
| 215 |
+
yield f"data: {jsonify_compact({'token': token_text})}\n\n"
|
|
|
|
|
|
|
| 216 |
|
| 217 |
+
elapsed = time.perf_counter() - start
|
| 218 |
+
tps = token_count / elapsed if elapsed > 0 else 0
|
| 219 |
+
stats = f"{token_count} tokens in {elapsed:.1f}s — {tps:.2f} tokens/s"
|
| 220 |
+
yield f"data: {jsonify_compact({'stats': stats})}\n\n"
|
| 221 |
+
yield "data: [DONE]\n\n"
|
| 222 |
|
| 223 |
+
return Response(stream_with_context(generate()), mimetype="text/event-stream")
|
|
|
|
|
|
|
|
|
|
| 224 |
|
|
|
|
|
|
|
| 225 |
|
| 226 |
+
def jsonify_compact(obj):
|
| 227 |
+
import json
|
| 228 |
+
return json.dumps(obj, ensure_ascii=False, separators=(",", ":"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
if __name__ == "__main__":
|
| 232 |
+
port = int(os.environ.get("PORT", 7860))
|
| 233 |
+
app.run(host="0.0.0.0", port=port, debug=False)
|