Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,371 +1,470 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
-
import re
|
| 4 |
import json
|
| 5 |
-
import time
|
| 6 |
-
from functools import lru_cache
|
| 7 |
-
from typing import List, Dict, Any, Tuple
|
| 8 |
-
|
| 9 |
-
import gradio as gr
|
| 10 |
import torch
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
- Return raw JSON only.
|
| 46 |
-
- Use this schema:
|
| 47 |
-
{{
|
| 48 |
-
"reply": "short natural assistant response",
|
| 49 |
-
"emotion": one of {EMOTIONS},
|
| 50 |
-
"mood_score": number from 0.0 to 1.0,
|
| 51 |
-
"memory_hint": "short note to save for later, or empty string"
|
| 52 |
-
}}
|
| 53 |
-
|
| 54 |
-
Style:
|
| 55 |
-
- Keep the reply clear and friendly.
|
| 56 |
-
- Use short sentences.
|
| 57 |
-
- Match the user's tone.
|
| 58 |
-
- If the user asks for memory, produce a useful memory_hint.
|
| 59 |
-
- If the user gives a preference or profile detail, include it in memory_hint.
|
| 60 |
-
""".strip()
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
MODEL = None
|
| 64 |
-
TOKENIZER = None
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
def normalize_messages(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
|
| 68 |
-
cleaned = []
|
| 69 |
-
for msg in messages:
|
| 70 |
-
role = msg.get("role", "")
|
| 71 |
-
content = (msg.get("content") or "").strip()
|
| 72 |
-
if role in {"system", "user", "assistant"} and content:
|
| 73 |
-
cleaned.append({"role": role, "content": content})
|
| 74 |
-
return cleaned[-MAX_HISTORY_MESSAGES:]
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
def build_prompt(messages: List[Dict[str, str]]) -> str:
|
| 78 |
-
msgs = [{"role": "system", "content": SYSTEM_PROMPT}] + normalize_messages(messages)
|
| 79 |
-
tokenizer = get_tokenizer()
|
| 80 |
-
if hasattr(tokenizer, "apply_chat_template"):
|
| 81 |
-
return tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
|
| 82 |
-
text = []
|
| 83 |
-
for msg in msgs:
|
| 84 |
-
text.append(f"{msg['role'].upper()}: {msg['content']}")
|
| 85 |
-
text.append("ASSISTANT:")
|
| 86 |
-
return "\n".join(text)
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
def safe_json_from_text(text: str) -> Dict[str, Any]:
|
| 90 |
-
raw = (text or "").strip()
|
| 91 |
-
candidates = [
|
| 92 |
-
raw,
|
| 93 |
-
re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.I | re.S).strip(),
|
| 94 |
-
]
|
| 95 |
-
for candidate in candidates:
|
| 96 |
-
try:
|
| 97 |
-
data = json.loads(candidate)
|
| 98 |
-
if isinstance(data, dict):
|
| 99 |
-
return data
|
| 100 |
-
except Exception:
|
| 101 |
-
pass
|
| 102 |
-
|
| 103 |
-
start = raw.find("{")
|
| 104 |
-
end = raw.rfind("}")
|
| 105 |
-
if start != -1 and end != -1 and end > start:
|
| 106 |
-
chunk = raw[start : end + 1]
|
| 107 |
try:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
except Exception:
|
| 112 |
pass
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
if MODEL is not None and TOKENIZER is not None:
|
| 136 |
-
return MODEL, TOKENIZER
|
| 137 |
-
|
| 138 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
| 139 |
-
|
| 140 |
-
load_kwargs = dict(low_cpu_mem_usage=True)
|
| 141 |
-
try:
|
| 142 |
-
load_kwargs["dtype"] = torch.float16
|
| 143 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, **load_kwargs)
|
| 144 |
-
except TypeError:
|
| 145 |
-
load_kwargs.pop("dtype", None)
|
| 146 |
-
load_kwargs["torch_dtype"] = torch.float16
|
| 147 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, **load_kwargs)
|
| 148 |
-
|
| 149 |
-
if torch.cuda.is_available():
|
| 150 |
-
model = model.to("cuda")
|
| 151 |
-
|
| 152 |
-
model.eval()
|
| 153 |
-
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
|
| 154 |
-
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 155 |
-
|
| 156 |
-
MODEL = model
|
| 157 |
-
TOKENIZER = tokenizer
|
| 158 |
-
return MODEL, TOKENIZER
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
@spaces.GPU
|
| 162 |
-
def generate_reply(messages: List[Dict[str, str]]) -> Dict[str, Any]:
|
| 163 |
-
model, tokenizer = load_model_once()
|
| 164 |
-
prompt = build_prompt(messages)
|
| 165 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 166 |
-
|
| 167 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 168 |
-
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 169 |
-
|
| 170 |
-
with torch.no_grad():
|
| 171 |
-
output = model.generate(
|
| 172 |
-
**inputs,
|
| 173 |
-
max_new_tokens=MAX_TOKENS,
|
| 174 |
-
do_sample=True,
|
| 175 |
-
temperature=TEMPERATURE,
|
| 176 |
-
top_p=TOP_P,
|
| 177 |
-
repetition_penalty=REPETITION_PENALTY,
|
| 178 |
-
pad_token_id=tokenizer.pad_token_id,
|
| 179 |
-
eos_token_id=tokenizer.eos_token_id,
|
| 180 |
-
)
|
| 181 |
-
|
| 182 |
-
generated = tokenizer.decode(output[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
|
| 183 |
-
data = safe_json_from_text(generated)
|
| 184 |
-
|
| 185 |
-
reply = str(data.get("reply", "")).strip()
|
| 186 |
-
if not reply:
|
| 187 |
-
reply = "I’m here."
|
| 188 |
-
|
| 189 |
-
emotion = str(data.get("emotion", "neutral")).strip().lower()
|
| 190 |
-
if emotion not in EMOTIONS:
|
| 191 |
-
emotion = "neutral"
|
| 192 |
-
|
| 193 |
-
mood_score = data.get("mood_score", 0.5)
|
| 194 |
-
try:
|
| 195 |
-
mood_score = clamp(float(mood_score))
|
| 196 |
-
except Exception:
|
| 197 |
-
mood_score = 0.5
|
| 198 |
-
|
| 199 |
-
memory_hint = str(data.get("memory_hint", "")).strip()
|
| 200 |
-
|
| 201 |
-
return {
|
| 202 |
-
"reply": reply,
|
| 203 |
-
"emotion": emotion,
|
| 204 |
-
"mood_score": mood_score,
|
| 205 |
-
"memory_hint": memory_hint,
|
| 206 |
-
}
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
def extract_memory_candidate(user_text: str, assistant_text: str, memory_hint: str) -> str:
|
| 210 |
-
text = " ".join([user_text or "", assistant_text or "", memory_hint or ""]).strip()
|
| 211 |
-
if not text:
|
| 212 |
-
return ""
|
| 213 |
-
patterns = [
|
| 214 |
-
r"\bmy name is ([^.!,?\n]+)",
|
| 215 |
-
r"\bcall me ([^.!,?\n]+)",
|
| 216 |
-
r"\bi work as ([^.!,?\n]+)",
|
| 217 |
-
r"\bi like ([^.!,?\n]+)",
|
| 218 |
-
r"\bi prefer ([^.!,?\n]+)",
|
| 219 |
-
r"\bremember that ([^.!,?\n]+)",
|
| 220 |
-
]
|
| 221 |
-
for pat in patterns:
|
| 222 |
-
m = re.search(pat, text, flags=re.I)
|
| 223 |
-
if m:
|
| 224 |
-
return m.group(1).strip()[:120]
|
| 225 |
-
if memory_hint:
|
| 226 |
-
return memory_hint[:120]
|
| 227 |
-
return ""
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
def render_status(emotion: str, mood_score: float, memory_count: int) -> str:
|
| 231 |
-
bars = "■" * max(1, int(round(mood_score * 10)))
|
| 232 |
-
bars = bars.ljust(10, "□")
|
| 233 |
-
return f"**Mood:** `{emotion}` | **Energy:** `{bars}` | **Memory items:** `{memory_count}`"
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
def add_turn(user_text: str, response: Dict[str, Any], chat: List[Dict[str, str]], memory: List[str]) -> Tuple[List[Dict[str, str]], List[str], str]:
|
| 237 |
-
chat.append({"role": "user", "content": user_text})
|
| 238 |
-
chat.append({"role": "assistant", "content": response["reply"]})
|
| 239 |
-
|
| 240 |
-
mem = extract_memory_candidate(user_text, response["reply"], response.get("memory_hint", ""))
|
| 241 |
-
if mem:
|
| 242 |
-
if mem not in memory:
|
| 243 |
-
memory = (memory + [mem])[-MAX_MEMORY_ITEMS:]
|
| 244 |
-
|
| 245 |
-
status = render_status(response["emotion"], response["mood_score"], len(memory))
|
| 246 |
-
return chat, memory, status
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
def clear_session():
|
| 250 |
-
return [], [], [], "Ready.", ""
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
def seed_examples():
|
| 254 |
-
return [
|
| 255 |
-
["Help me plan my day.", None],
|
| 256 |
-
["Remember that I build apps with Hugging Face and Python.", None],
|
| 257 |
-
]
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
with gr.Blocks(theme=gr.themes.Soft(), css="""
|
| 261 |
-
#app-wrap { max-width: 1200px; margin: 0 auto; }
|
| 262 |
-
#header-card { border-radius: 24px; }
|
| 263 |
-
#chatbox { min-height: 560px; }
|
| 264 |
-
#memory-box { min-height: 220px; }
|
| 265 |
-
""") as demo:
|
| 266 |
-
chat_state = gr.State([])
|
| 267 |
-
memory_state = gr.State([])
|
| 268 |
-
|
| 269 |
-
with gr.Column(elem_id="app-wrap"):
|
| 270 |
-
with gr.Row():
|
| 271 |
-
with gr.Column(scale=3):
|
| 272 |
-
gr.Markdown(
|
| 273 |
-
f"# {APP_NAME}\nA local GPU companion built with Gradio and Qwen."
|
| 274 |
-
)
|
| 275 |
-
status_md = gr.Markdown("Ready.")
|
| 276 |
-
with gr.Column(scale=1):
|
| 277 |
-
clear_btn = gr.Button("Clear session", variant="secondary")
|
| 278 |
-
|
| 279 |
-
with gr.Row():
|
| 280 |
-
with gr.Column(scale=3):
|
| 281 |
-
chatbot = gr.Chatbot(
|
| 282 |
-
value=[],
|
| 283 |
-
type="messages",
|
| 284 |
-
height=560,
|
| 285 |
-
elem_id="chatbox",
|
| 286 |
-
show_copy_button=True,
|
| 287 |
-
)
|
| 288 |
-
with gr.Row():
|
| 289 |
-
user_text = gr.Textbox(
|
| 290 |
-
placeholder="Message FastLLM...",
|
| 291 |
-
scale=6,
|
| 292 |
-
show_label=False,
|
| 293 |
-
)
|
| 294 |
-
send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 295 |
-
|
| 296 |
-
with gr.Accordion("Voice input", open=False):
|
| 297 |
-
audio_in = gr.Audio(
|
| 298 |
-
sources=["microphone", "upload"],
|
| 299 |
-
type="filepath",
|
| 300 |
-
label="Audio input",
|
| 301 |
-
)
|
| 302 |
-
transcribe_btn = gr.Button("Transcribe with local GPU model", variant="secondary")
|
| 303 |
-
transcript_box = gr.Textbox(label="Transcript", lines=3)
|
| 304 |
-
|
| 305 |
-
with gr.Column(scale=1):
|
| 306 |
-
emotion_box = gr.Textbox(label="Emotion", value="neutral", interactive=False)
|
| 307 |
-
mood_box = gr.Slider(label="Mood score", minimum=0, maximum=1, value=0.5, step=0.01, interactive=False)
|
| 308 |
-
memory_box = gr.Textbox(label="Session memory", lines=12, elem_id="memory-box")
|
| 309 |
-
|
| 310 |
-
def respond(user_message, chat, memory):
|
| 311 |
-
user_message = (user_message or "").strip()
|
| 312 |
-
if not user_message:
|
| 313 |
-
return "", chat, memory, chat, memory, "Ready.", "neutral", 0.5, ""
|
| 314 |
-
|
| 315 |
-
current_messages = chat + [{"role": "user", "content": user_message}]
|
| 316 |
-
result = generate_reply(current_messages)
|
| 317 |
-
chat, memory, status = add_turn(user_message, result, chat, memory)
|
| 318 |
-
|
| 319 |
-
memory_text = "\n".join(f"- {m}" for m in memory) if memory else "No saved memory yet."
|
| 320 |
-
return (
|
| 321 |
-
"",
|
| 322 |
-
chat,
|
| 323 |
-
memory,
|
| 324 |
-
chat,
|
| 325 |
-
memory_text,
|
| 326 |
-
status,
|
| 327 |
-
result["emotion"],
|
| 328 |
-
result["mood_score"],
|
| 329 |
-
result["reply"],
|
| 330 |
-
)
|
| 331 |
-
|
| 332 |
-
def transcribe(audio_path):
|
| 333 |
-
if not audio_path:
|
| 334 |
-
return ""
|
| 335 |
-
# Stub kept local and simple. Add a Whisper GPU pipeline here when you want audio-to-text.
|
| 336 |
-
return "Audio input connected. Add Whisper transcription in this slot."
|
| 337 |
-
|
| 338 |
-
send_btn.click(
|
| 339 |
-
respond,
|
| 340 |
-
inputs=[user_text, chat_state, memory_state],
|
| 341 |
-
outputs=[user_text, chat_state, memory_state, chatbot, memory_box, status_md, emotion_box, mood_box, transcript_box],
|
| 342 |
-
)
|
| 343 |
-
user_text.submit(
|
| 344 |
-
respond,
|
| 345 |
-
inputs=[user_text, chat_state, memory_state],
|
| 346 |
-
outputs=[user_text, chat_state, memory_state, chatbot, memory_box, status_md, emotion_box, mood_box, transcript_box],
|
| 347 |
-
)
|
| 348 |
-
clear_btn.click(
|
| 349 |
-
clear_session,
|
| 350 |
-
inputs=[],
|
| 351 |
-
outputs=[chat_state, memory_state, chatbot, status_md, memory_box],
|
| 352 |
)
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
if __name__ == "__main__":
|
| 367 |
-
|
| 368 |
-
server_name="0.0.0.0",
|
| 369 |
-
server_port=7860,
|
| 370 |
-
show_error=True,
|
| 371 |
-
)
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
# Production script for the FastLLM Space.
|
| 3 |
+
# Required dependencies: pip install gradio transformers torch spaces accelerate
|
| 4 |
import os
|
|
|
|
| 5 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import torch
|
| 7 |
+
import spaces
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from threading import Thread
|
| 10 |
+
from typing import Generator
|
| 11 |
+
from fastapi.responses import HTMLResponse
|
| 12 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 13 |
+
|
| 14 |
+
# --- 1. LOCAL MODEL SPECIFICATION AND INITIAL CRITICAL CPU LOADING ---
|
| 15 |
+
# Selection of Qwen2.5-1.5B fits the <4B parameters Tiny Titan bracket
|
| 16 |
+
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 17 |
+
|
| 18 |
+
# Initialize tokenizer and load base model onto system RAM (CPU) to prevent cold startup allocation errors
|
| 19 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 20 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 21 |
+
MODEL_ID,
|
| 22 |
+
torch_dtype=torch.float16,
|
| 23 |
+
device_map="cpu"
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# --- 2. THE QUEUED ASYNC SERVERLESS INFERENCE PIPELINE ---
|
| 27 |
+
@spaces.GPU(duration=30)
|
| 28 |
+
def run_inference(message: str, history_str: str) -> Generator[str, None, None]:
|
| 29 |
+
"""
|
| 30 |
+
Spins up the GPU model instance and runs real-time text streaming
|
| 31 |
+
by executing within the ephemeral ZeroGPU scheduling boundary.
|
| 32 |
+
"""
|
| 33 |
+
# Move model parameters to physical GPU context inside the execution function
|
| 34 |
+
model.to("cuda")
|
| 35 |
+
|
| 36 |
+
# Establish base system context and constraints
|
| 37 |
+
messages =
|
| 38 |
+
|
| 39 |
+
# Parse and append past conversational context
|
| 40 |
+
if history_str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
+
history = json.loads(history_str)
|
| 43 |
+
for turn in history:
|
| 44 |
+
if isinstance(turn, list) and len(turn) == 2:
|
| 45 |
+
messages.append({"role": "user", "content": turn})
|
| 46 |
+
messages.append({"role": "assistant", "content": turn})
|
| 47 |
except Exception:
|
| 48 |
pass
|
| 49 |
+
|
| 50 |
+
# Append the current prompt
|
| 51 |
+
messages.append({"role": "user", "content": message})
|
| 52 |
+
|
| 53 |
+
# Process text sequences, utilizing return_dict to prevent sequence shape errors
|
| 54 |
+
inputs = tokenizer.apply_chat_template(
|
| 55 |
+
messages,
|
| 56 |
+
tokenize=True,
|
| 57 |
+
add_generation_prompt=True,
|
| 58 |
+
return_tensors="pt",
|
| 59 |
+
return_dict=True
|
| 60 |
+
).to("cuda")
|
| 61 |
+
|
| 62 |
+
# Set up streaming generators
|
| 63 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
| 64 |
+
generation_kwargs = dict(
|
| 65 |
+
**inputs,
|
| 66 |
+
streamer=streamer,
|
| 67 |
+
max_new_tokens=192,
|
| 68 |
+
do_sample=True,
|
| 69 |
+
temperature=0.7,
|
| 70 |
+
top_p=0.9,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
)
|
| 72 |
+
|
| 73 |
+
# Execute model forward pass on a dedicated worker thread
|
| 74 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
| 75 |
+
thread.start()
|
| 76 |
+
|
| 77 |
+
# Yield incremental text updates as they are generated
|
| 78 |
+
accumulated_text = ""
|
| 79 |
+
for new_text in streamer:
|
| 80 |
+
accumulated_text += new_text
|
| 81 |
+
yield accumulated_text
|
| 82 |
+
|
| 83 |
+
# --- 3. THE 98% CUSTOM FRONTEND SYSTEM (FRONTEND_HTML) ---
|
| 84 |
+
FRONTEND_HTML = """
|
| 85 |
+
<!DOCTYPE html>
|
| 86 |
+
<html lang="en">
|
| 87 |
+
<head>
|
| 88 |
+
<meta charset="UTF-8">
|
| 89 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
|
| 90 |
+
<title>FastLLM Companion</title>
|
| 91 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 92 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
|
| 93 |
+
<style>
|
| 94 |
+
body {
|
| 95 |
+
background-color: #ffe082; /* Gold/yellow background from specifications */
|
| 96 |
+
margin: 0;
|
| 97 |
+
overflow: hidden;
|
| 98 |
+
font-family: system-ui, -apple-system, sans-serif;
|
| 99 |
+
-webkit-user-select: none;
|
| 100 |
+
user-select: none;
|
| 101 |
+
-webkit-tap-highlight-color: transparent;
|
| 102 |
+
}
|
| 103 |
+
#c {
|
| 104 |
+
position: fixed;
|
| 105 |
+
top: 0;
|
| 106 |
+
left: 0;
|
| 107 |
+
width: 100%;
|
| 108 |
+
height: 100%;
|
| 109 |
+
z-index: 1;
|
| 110 |
+
}
|
| 111 |
+
.glass-panel {
|
| 112 |
+
background: rgba(8, 10, 22, 0.93);
|
| 113 |
+
backdrop-filter: blur(12px);
|
| 114 |
+
-webkit-backdrop-filter: blur(12px);
|
| 115 |
+
border: 1px solid rgba(255, 255, 255, 0.08);
|
| 116 |
+
}
|
| 117 |
+
</style>
|
| 118 |
+
</head>
|
| 119 |
+
<body class="text-white relative w-screen h-screen">
|
| 120 |
+
|
| 121 |
+
<canvas id="c"></canvas>
|
| 122 |
+
|
| 123 |
+
<div id="drop" class="absolute inset-0 flex flex-col items-center justify-center border-4 border-dashed border-cyan-500/50 m-10 rounded-3xl z-10 pointer-events-none transition-opacity duration-300 opacity-0">
|
| 124 |
+
<h2 class="text-3xl font-extrabold text-[#6cf] mb-2">Drop VRM Model</h2>
|
| 125 |
+
<p class="text-sm text-gray-100 opacity-60">Upload custom characters directly into viewport</p>
|
| 126 |
+
</div>
|
| 127 |
+
|
| 128 |
+
<div id="vrmPanel" class="absolute top-20 left-4 w-80 glass-panel p-4 rounded-2xl z-20 hidden flex-col gap-3">
|
| 129 |
+
<h3 class="font-bold text-sm text-cyan-400 uppercase tracking-widest">Available Companions</h3>
|
| 130 |
+
<div id="vrmList" class="flex-grow overflow-y-auto max-h-48 pr-1">
|
| 131 |
+
<div class="vrmItem flex items-center justify-between p-2 hover:bg-slate-800/60 rounded-xl cursor-pointer">
|
| 132 |
+
<span class="text-xs">Procedural Cyber-Core v1.0</span>
|
| 133 |
+
<span class="dot w-2 h-2 rounded-full bg-emerald-500 shadow-md"></span>
|
| 134 |
+
</div>
|
| 135 |
+
</div>
|
| 136 |
+
<button id="vrmPanelClose" class="text-center text-xs py-2 bg-slate-800 hover:bg-slate-700 rounded-xl mt-2 transition-all">Close Panel</button>
|
| 137 |
+
</div>
|
| 138 |
+
|
| 139 |
+
<div id="speakDot" class="absolute top-1/2 left-1/2 -translate-x-1/2 -translate-y-1/2 z-10 hidden flex items-center gap-1.5 bg-cyan-950/90 px-5 py-2.5 rounded-full border border-cyan-500/30">
|
| 140 |
+
<span class="text-xs text-cyan-400 mr-2 font-mono uppercase tracking-widest">Active</span>
|
| 141 |
+
<div class="sdot w-2 h-2 rounded-full bg-cyan-400 animate-bounce"></div>
|
| 142 |
+
<div class="sdot w-2 h-2 rounded-full bg-cyan-400 animate-bounce" style="animation-delay: 0.15s"></div>
|
| 143 |
+
<div class="sdot w-2 h-2 rounded-full bg-cyan-400 animate-bounce" style="animation-delay: 0.3s"></div>
|
| 144 |
+
</div>
|
| 145 |
+
|
| 146 |
+
<div class="absolute bottom-24 left-1/2 -translate-x-1/2 z-10 w-[90%] max-w-2xl px-6 py-4 rounded-2xl glass-panel text-center hidden pointer-events-auto border-t-2 border-cyan-500/20 shadow-lg" id="subtitle-panel">
|
| 147 |
+
<p id="subtitle-text" class="text-sm text-gray-100 leading-relaxed text-left"></p>
|
| 148 |
+
</div>
|
| 149 |
+
|
| 150 |
+
<div id="vrmaQueue" class="absolute bottom-28 right-4 w-64 max-h-32 overflow-y-auto glass-panel p-2 rounded-xl text-[10px] font-mono text-gray-400 hidden flex flex-col gap-1 z-10">
|
| 151 |
+
<div class="qitem border-b border-gray-800/30 pb-1">Queue: Syncing bones...</div>
|
| 152 |
+
</div>
|
| 153 |
+
|
| 154 |
+
<div id="bar" class="absolute left-1/2 -translate-x-1/2 z-10 w-[95%] max-w-4xl p-2 rounded-3xl glass-panel flex items-center gap-2 pointer-events-auto shadow-2xl">
|
| 155 |
+
<button id="mb" class="p-3 rounded-2xl bg-slate-800/80 hover:bg-slate-700 border border-slate-700 text-cyan-400 transition-all flex-shrink-0 flex items-center justify-center" onclick="toggleMenu()">
|
| 156 |
+
<svg class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 6V4m0 2a2 2 0 100 4m0-4a2 2 0 110 4m-6 8a2 2 0 100-4m0 4a2 2 0 110-4m0 4v2m0-6V4m6 6v10m6-2a2 2 0 100-4m0 4a2 2 0 110-4m0 4v2m0-6V4"></path></svg>
|
| 157 |
+
</button>
|
| 158 |
+
|
| 159 |
+
<input type="text" id="ti" class="flex-1 py-3 px-4 rounded-2xl bg-slate-900/95 border border-slate-700/60 text-white placeholder-slate-500 focus:outline-none focus:ring-2 focus:ring-cyan-500/50 transition-all" placeholder="Enter message to local AI...">
|
| 160 |
+
|
| 161 |
+
<button id="sb" class="p-3 rounded-2xl bg-gradient-to-r from-[#6cf] to-[#3ae] hover:opacity-95 text-white font-semibold transition-all flex-shrink-0" onclick="handleSend()">
|
| 162 |
+
<svg class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14 5l7 7m0 0l-7 7m7-7H3"></path></svg>
|
| 163 |
+
</button>
|
| 164 |
+
</div>
|
| 165 |
+
|
| 166 |
+
<div id="load" class="absolute top-20 left-1/2 -translate-x-1/2 glass-panel px-4 py-2 rounded-full text-xs text-amber-400 font-mono tracking-widest uppercase transition-opacity duration-300 opacity-0 z-20">Syncing Local Engine...</div>
|
| 167 |
+
<div id="err" class="absolute top-4 left-4 right-4 bg-red-950/90 border border-red-500/30 text-red-200 px-4 py-2 rounded-xl text-xs font-mono text-center hidden z-30">GPU assignment latency detected. Retrying connection...</div>
|
| 168 |
+
<div id="info" class="absolute top-20 right-4 w-72 glass-panel p-3 rounded-xl border border-blue-500/20 text-xs text-blue-200 hidden z-20">Notice: Running local weights on serverless hardware.</div>
|
| 169 |
+
<div id="fps" class="absolute top-4 right-4 text-xs font-mono text-emerald-400 bg-slate-950/90 px-3 py-1.5 rounded border border-emerald-500/20 z-20">FPS: --</div>
|
| 170 |
+
|
| 171 |
+
<div id="menu" class="absolute bottom-0 left-0 right-0 glass-panel p-6 rounded-t-3xl z-30 transform translate-y-full transition-transform duration-300 max-h-[60vh] overflow-y-auto">
|
| 172 |
+
<div class="flex justify-between items-center mb-4 border-b border-gray-800 pb-2">
|
| 173 |
+
<h3 class="font-bold text-cyan-400 uppercase tracking-widest text-sm">Customization Panel</h3>
|
| 174 |
+
<button class="text-gray-500 hover:text-white text-xl" onclick="toggleMenu()">×</button>
|
| 175 |
+
</div>
|
| 176 |
+
<div class="flex flex-col gap-4">
|
| 177 |
+
<div class="row flex justify-between items-center text-sm">
|
| 178 |
+
<span class="text-gray-300">Ambient Lighting</span>
|
| 179 |
+
<input type="range" min="0.5" max="3" step="0.1" value="1.5" class="accent-cyan-500" oninput="updateGlowIntensity(this.value)">
|
| 180 |
+
</div>
|
| 181 |
+
<div class="row flex justify-between items-center text-sm">
|
| 182 |
+
<span class="text-gray-300">Companion Eye Tint</span>
|
| 183 |
+
<input type="color" value="#06b6d4" class="w-8 h-8 rounded border-none bg-transparent cursor-pointer" onchange="updateEyeColor(this.value)">
|
| 184 |
+
</div>
|
| 185 |
+
<div class="row flex justify-between items-center text-sm">
|
| 186 |
+
<span class="text-gray-300">Key Registration</span>
|
| 187 |
+
<input type="password" placeholder="Key token..." class="bg-slate-900 border border-slate-700 rounded px-2 py-1 text-xs text-white">
|
| 188 |
+
</div>
|
| 189 |
+
<div class="row flex justify-between items-center text-sm">
|
| 190 |
+
<span class="text-gray-300">Mesh Designation</span>
|
| 191 |
+
<input type="text" value="Aya-Companion" class="bg-slate-900 border border-slate-700 rounded px-2 py-1 text-xs text-white">
|
| 192 |
+
</div>
|
| 193 |
+
<div class="row flex justify-between items-center text-sm">
|
| 194 |
+
<span class="text-gray-300">Interaction Node</span>
|
| 195 |
+
<select class="bg-slate-900 border border-slate-700 rounded px-2 py-1 text-xs text-white">
|
| 196 |
+
<option>Empathetic</option>
|
| 197 |
+
<option>Analytical</option>
|
| 198 |
+
</select>
|
| 199 |
+
</div>
|
| 200 |
+
<div class="chips flex gap-2 flex-wrap">
|
| 201 |
+
<span class="chip bg-cyan-950 text-cyan-300 px-3 py-1 rounded-full text-xs cursor-pointer border border-cyan-500/20">Voice Sync</span>
|
| 202 |
+
<span class="chip bg-slate-800 text-slate-300 px-3 py-1 rounded-full text-xs cursor-pointer">Local Text</span>
|
| 203 |
+
</div>
|
| 204 |
+
<div class="fbtn flex gap-2 mt-2">
|
| 205 |
+
<button class="flex-1 py-2 bg-rose-950/50 hover:bg-rose-950 border border-rose-500/30 text-rose-300 rounded-xl text-xs font-semibold">Clear Profile</button>
|
| 206 |
+
<button class="flex-1 py-2 bg-cyan-950/50 hover:bg-cyan-950 border border-cyan-500/30 text-cyan-300 rounded-xl text-xs font-semibold">Save Profile</button>
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
</div>
|
| 210 |
+
|
| 211 |
+
<div class="voice-locked absolute top-4 left-1/2 -translate-x-1/2 bg-slate-950/90 border border-amber-500/30 text-amber-200 px-4 py-2 rounded-xl text-xs font-mono hidden flex items-center gap-2 z-30 shadow-lg">
|
| 212 |
+
<svg class="w-4 h-4 text-amber-500" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 15v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2zm10-10V7a4 4 0 00-8 0v4h8z"></path></svg>
|
| 213 |
+
Voice function locked. Upgrade to premium.
|
| 214 |
+
</div>
|
| 215 |
+
|
| 216 |
+
<script type="module">
|
| 217 |
+
import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
|
| 218 |
+
|
| 219 |
+
let client;
|
| 220 |
+
let isGenerating = false;
|
| 221 |
+
let chatHistory =;
|
| 222 |
+
|
| 223 |
+
// Setup control bar positioning for mobile Apple safe area inset
|
| 224 |
+
const inputBar = document.getElementById('bar');
|
| 225 |
+
inputBar.style.bottom = `calc(16px + env(safe-area-inset-bottom, 0px))`;
|
| 226 |
+
|
| 227 |
+
async function connectEngine() {
|
| 228 |
+
const loader = document.getElementById('load');
|
| 229 |
+
loader.style.opacity = '1';
|
| 230 |
+
try {
|
| 231 |
+
// Connect utilizing the local window origin to ensure ZeroGPU token handshakes are verified
|
| 232 |
+
client = await Client.connect(window.location.origin);
|
| 233 |
+
loader.style.opacity = '0';
|
| 234 |
+
} catch (err) {
|
| 235 |
+
console.error("Gradio initialization failure:", err);
|
| 236 |
+
document.getElementById('err').classList.remove('hidden');
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
window.handleSend = async function() {
|
| 241 |
+
const inputField = document.getElementById('ti');
|
| 242 |
+
const messageText = inputField.value.trim();
|
| 243 |
+
if (!messageText || isGenerating) return;
|
| 244 |
+
|
| 245 |
+
inputField.value = '';
|
| 246 |
+
isGenerating = true;
|
| 247 |
+
|
| 248 |
+
// Show subtitle panel and active speaking indicator
|
| 249 |
+
const subtitlePanel = document.getElementById('subtitle-panel');
|
| 250 |
+
const subtitleText = document.getElementById('subtitle-text');
|
| 251 |
+
const speakIndicator = document.getElementById('speakDot');
|
| 252 |
+
|
| 253 |
+
subtitlePanel.classList.remove('hidden');
|
| 254 |
+
speakIndicator.classList.remove('hidden');
|
| 255 |
+
subtitleText.textContent = "Processing message...";
|
| 256 |
+
|
| 257 |
+
try {
|
| 258 |
+
// Submit request to the local serverless execution queue
|
| 259 |
+
const job = client.submit("/chat",);
|
| 260 |
+
|
| 261 |
+
job.on("data", (event) => {
|
| 262 |
+
const latestChunk = event.data;
|
| 263 |
+
subtitleText.textContent = latestChunk;
|
| 264 |
+
|
| 265 |
+
// Trigger character jaw scaling based on active streaming
|
| 266 |
+
speakingIntensity = 1.0;
|
| 267 |
+
});
|
| 268 |
+
|
| 269 |
+
job.on("status", (status) => {
|
| 270 |
+
if (status.stage === "complete") {
|
| 271 |
+
const finalResponse = subtitleText.textContent;
|
| 272 |
+
chatHistory.push();
|
| 273 |
+
if (chatHistory.length > 8) chatHistory.shift();
|
| 274 |
+
|
| 275 |
+
isGenerating = false;
|
| 276 |
+
speakIndicator.classList.add('hidden');
|
| 277 |
+
|
| 278 |
+
setTimeout(() => {
|
| 279 |
+
if (!isGenerating) subtitlePanel.classList.add('hidden');
|
| 280 |
+
}, 5000);
|
| 281 |
+
}
|
| 282 |
+
});
|
| 283 |
+
} catch (err) {
|
| 284 |
+
console.error("Inference execution failure:", err);
|
| 285 |
+
subtitleText.textContent = "Pipeline error. Retrying...";
|
| 286 |
+
isGenerating = false;
|
| 287 |
+
speakIndicator.classList.add('hidden');
|
| 288 |
+
}
|
| 289 |
+
};
|
| 290 |
+
|
| 291 |
+
document.getElementById('ti').addEventListener('keypress', (e) => {
|
| 292 |
+
if (e.key === 'Enter') handleSend();
|
| 293 |
+
});
|
| 294 |
+
|
| 295 |
+
window.toggleMenu = function() {
|
| 296 |
+
const menu = document.getElementById('menu');
|
| 297 |
+
if (menu.style.transform === 'translateY(0%)') {
|
| 298 |
+
menu.style.transform = 'translateY(100%)';
|
| 299 |
+
} else {
|
| 300 |
+
menu.style.transform = 'translateY(0%)';
|
| 301 |
+
}
|
| 302 |
+
};
|
| 303 |
+
|
| 304 |
+
// --- 4. PROCEDURAL THREE.JS WEBGL RENDER LOOP ---
|
| 305 |
+
let scene, camera, renderer;
|
| 306 |
+
let headMesh, leftEye, rightEye, mouthMesh;
|
| 307 |
+
let baseEyeColor, targetEyeColor;
|
| 308 |
+
let mouseX = 0, mouseY = 0;
|
| 309 |
+
let speakingIntensity = 0;
|
| 310 |
+
let clock = new THREE.Clock();
|
| 311 |
+
let fpsLastTime = performance.now();
|
| 312 |
+
let fpsFrames = 0;
|
| 313 |
+
|
| 314 |
+
function initWebGLScene() {
|
| 315 |
+
const canvas = document.getElementById('c');
|
| 316 |
+
scene = new THREE.Scene();
|
| 317 |
+
scene.fog = new THREE.FogExp2(0xffe082, 0.05);
|
| 318 |
+
|
| 319 |
+
camera = new THREE.PerspectiveCamera(40, window.innerWidth / window.innerHeight, 0.1, 100);
|
| 320 |
+
camera.position.set(0, 0.2, 4.5);
|
| 321 |
+
|
| 322 |
+
renderer = new THREE.WebGLRenderer({ canvas: canvas, antialias: true, alpha: true });
|
| 323 |
+
renderer.setSize(window.innerWidth, window.innerHeight);
|
| 324 |
+
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
|
| 325 |
+
renderer.setClearColor(0x000000, 0); // Transparent WebGL overlay
|
| 326 |
+
|
| 327 |
+
// Lighting Configuration
|
| 328 |
+
const ambient = new THREE.AmbientLight(0xfffbeb, 1.0);
|
| 329 |
+
scene.add(ambient);
|
| 330 |
+
|
| 331 |
+
const direction = new THREE.DirectionalLight(0x06b6d4, 1.8);
|
| 332 |
+
direction.position.set(5, 5, 5);
|
| 333 |
+
scene.add(direction);
|
| 334 |
+
|
| 335 |
+
// Cybernetic companion structure
|
| 336 |
+
const metalMat = new THREE.MeshStandardMaterial({
|
| 337 |
+
color: 0x1e293b,
|
| 338 |
+
roughness: 0.12,
|
| 339 |
+
metalness: 0.88
|
| 340 |
+
});
|
| 341 |
+
|
| 342 |
+
headMesh = new THREE.Mesh(new THREE.CylinderGeometry(0.8, 0.6, 1.2, 8), metalMat);
|
| 343 |
+
headMesh.position.set(0, 0, 0);
|
| 344 |
+
scene.add(headMesh);
|
| 345 |
+
|
| 346 |
+
// Expressive glowing eye spheres
|
| 347 |
+
baseEyeColor = new THREE.Color(0x06b6d4);
|
| 348 |
+
targetEyeColor = new THREE.Color(0x06b6d4);
|
| 349 |
+
const eyeMat = new THREE.MeshBasicMaterial({ color: baseEyeColor });
|
| 350 |
+
const eyeGeo = new THREE.SphereGeometry(0.14, 32, 32);
|
| 351 |
+
|
| 352 |
+
leftEye = new THREE.Mesh(eyeGeo, eyeMat);
|
| 353 |
+
leftEye.position.set(-0.28, 0.15, 0.58);
|
| 354 |
+
headMesh.add(leftEye);
|
| 355 |
+
|
| 356 |
+
rightEye = new THREE.Mesh(eyeGeo, eyeMat);
|
| 357 |
+
rightEye.position.set(0.28, 0.15, 0.58);
|
| 358 |
+
headMesh.add(rightEye);
|
| 359 |
+
|
| 360 |
+
// Dynamic speaking mesh
|
| 361 |
+
mouthMesh = new THREE.Mesh(new THREE.BoxGeometry(0.35, 0.04, 0.06), new THREE.MeshBasicMaterial({ color: 0x06b6d4 }));
|
| 362 |
+
mouthMesh.position.set(0, -0.28, 0.61);
|
| 363 |
+
headMesh.add(mouthMesh);
|
| 364 |
+
|
| 365 |
+
// Floating halo ring
|
| 366 |
+
const ringGeo = new THREE.TorusGeometry(1.2, 0.03, 8, 48);
|
| 367 |
+
ringGeo.rotateX(Math.PI / 2);
|
| 368 |
+
const ringMesh = new THREE.Mesh(ringGeo, new THREE.MeshStandardMaterial({
|
| 369 |
+
color: 0x06b6d4,
|
| 370 |
+
emissive: 0x06b6d4,
|
| 371 |
+
emissiveIntensity: 0.8
|
| 372 |
+
}));
|
| 373 |
+
ringMesh.position.y = 0.8;
|
| 374 |
+
headMesh.add(ringMesh);
|
| 375 |
+
|
| 376 |
+
window.addEventListener('resize', onResize);
|
| 377 |
+
window.addEventListener('mousemove', (e) => {
|
| 378 |
+
mouseX = (e.clientX / window.innerWidth) * 2 - 1;
|
| 379 |
+
mouseY = -(e.clientY / window.innerHeight) * 2 + 1;
|
| 380 |
+
});
|
| 381 |
+
|
| 382 |
+
connectEngine();
|
| 383 |
+
animate();
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
function onResize() {
|
| 387 |
+
camera.aspect = window.innerWidth / window.innerHeight;
|
| 388 |
+
camera.updateProjectionMatrix();
|
| 389 |
+
renderer.setSize(window.innerWidth, window.innerHeight);
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
window.updateEyeColor = function(colorHex) {
|
| 393 |
+
baseEyeColor.set(colorHex);
|
| 394 |
+
targetEyeColor.set(colorHex);
|
| 395 |
+
mouthMesh.material.color.set(colorHex);
|
| 396 |
+
};
|
| 397 |
+
|
| 398 |
+
window.updateGlowIntensity = function(val) {
|
| 399 |
+
scene.children.forEach(c => {
|
| 400 |
+
if (c.isDirectionalLight) c.intensity = parseFloat(val);
|
| 401 |
+
});
|
| 402 |
+
};
|
| 403 |
+
|
| 404 |
+
function animate() {
|
| 405 |
+
requestAnimationFrame(animate);
|
| 406 |
+
const time = clock.getElapsedTime();
|
| 407 |
+
|
| 408 |
+
// Dynamic idle float movements
|
| 409 |
+
headMesh.position.y = Math.sin(time * 1.8) * 0.06;
|
| 410 |
+
|
| 411 |
+
// Head rotation mechanics
|
| 412 |
+
const targetRotY = mouseX * 0.35;
|
| 413 |
+
const targetRotX = -mouseY * 0.18;
|
| 414 |
+
headMesh.rotation.y += (targetRotY - headMesh.rotation.y) * 0.1;
|
| 415 |
+
headMesh.rotation.x += (targetRotX - headMesh.rotation.x) * 0.1;
|
| 416 |
+
|
| 417 |
+
// Linear jaw scaling during token streaming
|
| 418 |
+
if (speakingIntensity > 0.01) {
|
| 419 |
+
speakingIntensity *= 0.90;
|
| 420 |
+
const mouthY = 1 + Math.sin(time * 28) * 3.0 * speakingIntensity;
|
| 421 |
+
mouthMesh.scale.set(1.0, mouthY, 1.0);
|
| 422 |
+
} else {
|
| 423 |
+
mouthMesh.scale.set(1.0, 1.0, 1.0);
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// Expressive blinks
|
| 427 |
+
const isBlink = Math.floor(time) % 6 === 0 && (time - Math.floor(time)) < 0.15;
|
| 428 |
+
leftEye.scale.y = isBlink? 0.15 : 1.0;
|
| 429 |
+
rightEye.scale.y = isBlink? 0.15 : 1.0;
|
| 430 |
+
|
| 431 |
+
renderer.render(scene, camera);
|
| 432 |
+
|
| 433 |
+
// Track client render metrics
|
| 434 |
+
fpsFrames++;
|
| 435 |
+
const now = performance.now();
|
| 436 |
+
if (now >= fpsLastTime + 1000) {
|
| 437 |
+
document.getElementById('fps').textContent = `FPS: ${fpsFrames}`;
|
| 438 |
+
fpsFrames = 0;
|
| 439 |
+
fpsLastTime = now;
|
| 440 |
+
}
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
initWebGLScene();
|
| 444 |
+
</script>
|
| 445 |
+
</body>
|
| 446 |
+
</html>
|
| 447 |
+
"""
|
| 448 |
+
|
| 449 |
+
# --- 5. SYSTEM REGISTRATION AND COMPOSITION ---
|
| 450 |
+
# Initialize the custom Server Mode FastAPI app
|
| 451 |
+
app = gr.Server()
|
| 452 |
+
|
| 453 |
+
@app.get("/", response_class=HTMLResponse)
|
| 454 |
+
async def homepage() -> HTMLResponse:
|
| 455 |
+
"""
|
| 456 |
+
Serves the custom HTML single-page application and its embedded WebGL engine.
|
| 457 |
+
"""
|
| 458 |
+
return HTMLResponse(content=FRONTEND_HTML, status_code=200)
|
| 459 |
+
|
| 460 |
+
@app.api(name="chat")
|
| 461 |
+
def chat(message: str, history_str: str) -> Generator[str, None, None]:
|
| 462 |
+
"""
|
| 463 |
+
API endpoint wrapped in Gradio's serialized concurrency queue, supporting spaces.GPU.
|
| 464 |
+
"""
|
| 465 |
+
for chunk in run_inference(message, history_str):
|
| 466 |
+
yield chunk
|
| 467 |
+
|
| 468 |
+
# Launch the unified server
|
| 469 |
if __name__ == "__main__":
|
| 470 |
+
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|