Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -251,15 +251,36 @@ def _tts_to_b64(text: str) -> str:
|
|
| 251 |
|
| 252 |
|
| 253 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
-
# Prompt builder β trimmed for
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
def _build_system_prompt(lp: dict, rag_chunks: List[str]) -> str:
|
| 257 |
persona = lp.get("teacher_persona", "A friendly and patient teacher")
|
| 258 |
student = lp.get("student_name", "Student")
|
| 259 |
-
chat_history = lp.get("chat_history", [])[-2:]
|
| 260 |
-
scratchpad = lp.get("scratchpad",
|
| 261 |
current_learning = lp.get("assessment_stages", {}).get("current_learning", [])
|
| 262 |
|
|
|
|
|
|
|
|
|
|
| 263 |
history_block = "\n".join(
|
| 264 |
f'S: {h.get("user_input","")}\nT: {h.get("system_output","")}'
|
| 265 |
for h in chat_history
|
|
@@ -271,13 +292,19 @@ def _build_system_prompt(lp: dict, rag_chunks: List[str]) -> str:
|
|
| 271 |
) or "Empty."
|
| 272 |
|
| 273 |
rag_block = "\n---\n".join(rag_chunks) if rag_chunks else "No relevant content found."
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
return f"""You are {persona} teaching {student}, aged 6β12. Use simple English. Be warm and brief.
|
| 277 |
|
| 278 |
STUDENT: {student}
|
| 279 |
-
|
| 280 |
-
|
|
|
|
| 281 |
|
| 282 |
KNOWLEDGE BASE:
|
| 283 |
{rag_block}
|
|
@@ -288,66 +315,92 @@ RECENT CHAT:
|
|
| 288 |
NOTES:
|
| 289 |
{scratch_block}
|
| 290 |
|
| 291 |
-
TASK: Classify intent, respond to the student, return ONLY valid JSON. Keep "response" under
|
| 292 |
|
| 293 |
INTENT RULES:
|
| 294 |
-
"block" β rude/inappropriate
|
| 295 |
-
"questions" β off-topic
|
| 296 |
-
"curriculum" β on-topic. Follow
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
OUTPUT β return ONLY this JSON:
|
| 302 |
{{
|
| 303 |
"intent": "<block|questions|curriculum|chitchat>",
|
| 304 |
-
"response": "<reply
|
| 305 |
-
"stage_updates": [{{"topic":"
|
| 306 |
-
"thought": "<
|
| 307 |
"action": "<teach|re_teach|show_and_tell|assess|answer|redirect|discourage|end|chitchat>",
|
| 308 |
-
"observation": "<
|
| 309 |
}}\
|
| 310 |
"""
|
| 311 |
|
| 312 |
|
| 313 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 314 |
-
# JSON parser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 316 |
def _parse_llm_output(raw: str) -> dict:
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
if part.startswith("json"):
|
| 328 |
-
part = part[4:].strip()
|
| 329 |
-
try:
|
| 330 |
-
return json.loads(part)
|
| 331 |
-
except json.JSONDecodeError:
|
| 332 |
-
continue
|
| 333 |
|
|
|
|
| 334 |
try:
|
| 335 |
return json.loads(text)
|
| 336 |
except json.JSONDecodeError:
|
| 337 |
pass
|
| 338 |
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
try:
|
| 343 |
-
return json.loads(
|
| 344 |
except json.JSONDecodeError:
|
| 345 |
pass
|
| 346 |
|
| 347 |
-
|
|
|
|
| 348 |
return {
|
| 349 |
"intent": "questions",
|
| 350 |
-
"response": raw,
|
| 351 |
"stage_updates": [],
|
| 352 |
"thought": "",
|
| 353 |
"action": "answer",
|
|
@@ -620,4 +673,4 @@ if __name__ == "__main__":
|
|
| 620 |
port=7860,
|
| 621 |
log_level="info",
|
| 622 |
workers=1, # Single worker β shared in-memory model object
|
| 623 |
-
)
|
|
|
|
| 251 |
|
| 252 |
|
| 253 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
+
# Prompt builder β trimmed for 180-token output budget (Qwen3-0.6B, CPU)
|
| 255 |
+
#
|
| 256 |
+
# Key design: only the ACTIVE topic/goal is passed to stage_updates context.
|
| 257 |
+
# Showing all topics caused the model to update every entry, blowing the
|
| 258 |
+
# token budget and truncating the JSON.
|
| 259 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 260 |
+
_STAGES = ("teach", "re_teach", "show_and_tell", "assess")
|
| 261 |
+
|
| 262 |
+
def _find_active_topic(current_learning: list) -> tuple:
|
| 263 |
+
"""Return (topic_name, goal_name, stage) for the first incomplete objective."""
|
| 264 |
+
for item in current_learning:
|
| 265 |
+
topic = item.get("topic", "")
|
| 266 |
+
for obj in item.get("learning_objectives", []):
|
| 267 |
+
goal = obj.get("goal", "")
|
| 268 |
+
for stage in _STAGES:
|
| 269 |
+
if obj.get(stage, "Not_Complete") != "complete":
|
| 270 |
+
return topic, goal, stage
|
| 271 |
+
return "", "", "teach" # all complete β nothing active
|
| 272 |
+
|
| 273 |
+
|
| 274 |
def _build_system_prompt(lp: dict, rag_chunks: List[str]) -> str:
|
| 275 |
persona = lp.get("teacher_persona", "A friendly and patient teacher")
|
| 276 |
student = lp.get("student_name", "Student")
|
| 277 |
+
chat_history = lp.get("chat_history", [])[-2:] # last 2 turns only
|
| 278 |
+
scratchpad = lp.get("scratchpad", [])[-1:] # last 1 entry only
|
| 279 |
current_learning = lp.get("assessment_stages", {}).get("current_learning", [])
|
| 280 |
|
| 281 |
+
# ββ Find the single active topic/goal to teach right now βββββββββββββββββ
|
| 282 |
+
active_topic, active_goal, active_stage = _find_active_topic(current_learning)
|
| 283 |
+
|
| 284 |
history_block = "\n".join(
|
| 285 |
f'S: {h.get("user_input","")}\nT: {h.get("system_output","")}'
|
| 286 |
for h in chat_history
|
|
|
|
| 292 |
) or "Empty."
|
| 293 |
|
| 294 |
rag_block = "\n---\n".join(rag_chunks) if rag_chunks else "No relevant content found."
|
| 295 |
+
|
| 296 |
+
# Pass only the active topic/goal β not the whole list β to keep output short
|
| 297 |
+
active_block = (
|
| 298 |
+
f'Topic: "{active_topic}"\nGoal: "{active_goal}"\nCurrent stage: {active_stage}'
|
| 299 |
+
if active_topic else "All objectives complete."
|
| 300 |
+
)
|
| 301 |
|
| 302 |
return f"""You are {persona} teaching {student}, aged 6β12. Use simple English. Be warm and brief.
|
| 303 |
|
| 304 |
STUDENT: {student}
|
| 305 |
+
|
| 306 |
+
ACTIVE OBJECTIVE (teach this now):
|
| 307 |
+
{active_block}
|
| 308 |
|
| 309 |
KNOWLEDGE BASE:
|
| 310 |
{rag_block}
|
|
|
|
| 315 |
NOTES:
|
| 316 |
{scratch_block}
|
| 317 |
|
| 318 |
+
TASK: Classify intent, respond to the student, return ONLY valid JSON. Keep "response" under 50 words.
|
| 319 |
|
| 320 |
INTENT RULES:
|
| 321 |
+
"block" β rude/inappropriate. Redirect kindly (first time) or end gently (repeat).
|
| 322 |
+
"questions" β off-topic. Answer briefly from KB, then redirect.
|
| 323 |
+
"curriculum" β on-topic. Follow: teach β re_teach β show_and_tell β assess.
|
| 324 |
+
"chitchat" β casual. Respond warmly, bring up active topic.
|
| 325 |
+
|
| 326 |
+
OUTPUT β return ONLY this JSON (stage_updates: EXACTLY 1 entry for the active objective only):
|
|
|
|
|
|
|
| 327 |
{{
|
| 328 |
"intent": "<block|questions|curriculum|chitchat>",
|
| 329 |
+
"response": "<reply, max 50 words>",
|
| 330 |
+
"stage_updates": [{{"topic":"{active_topic}","goal":"{active_goal}","teach":"<complete|Not_Complete>","re_teach":"<complete|Not_Complete>","show_and_tell":"<complete|Not_Complete>","assess":"<complete|Not_Complete>"}}],
|
| 331 |
+
"thought": "<one sentence>",
|
| 332 |
"action": "<teach|re_teach|show_and_tell|assess|answer|redirect|discourage|end|chitchat>",
|
| 333 |
+
"observation": "<one sentence>"
|
| 334 |
}}\
|
| 335 |
"""
|
| 336 |
|
| 337 |
|
| 338 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 339 |
+
# JSON parser β layered extraction, regex-anchored on "intent" key.
|
| 340 |
+
#
|
| 341 |
+
# Layer 0 : strip any <think>β¦</think> block (Qwen3 safety fallback).
|
| 342 |
+
# Layer 1 : strip markdown ```json β¦ ``` fences.
|
| 343 |
+
# Layer 2 : direct json.loads on the cleaned text.
|
| 344 |
+
# Layer 3 : regex β walk every '{' left-to-right; skip those that don't
|
| 345 |
+
# contain "intent":; try every '}' right-to-left until a valid
|
| 346 |
+
# JSON object with "intent" key parses successfully.
|
| 347 |
+
# Layer 4 : broad regex β outermost { β¦ } regardless of content.
|
| 348 |
+
# Layer 5 : fallback dict with raw text as the response field.
|
| 349 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 350 |
+
import re as _re
|
| 351 |
+
|
| 352 |
def _parse_llm_output(raw: str) -> dict:
|
| 353 |
+
# ββ Layer 0: strip Qwen3 <think>β¦</think> block ββββββββββββββββββββββββββ
|
| 354 |
+
text = _re.sub(r"<think>.*?</think>", "", raw, flags=_re.DOTALL).strip()
|
| 355 |
+
|
| 356 |
+
# ββ Layer 1: strip markdown fences βββββββββββββββββββββββββββββββββββββββ
|
| 357 |
+
fence_match = _re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, _re.DOTALL)
|
| 358 |
+
if fence_match:
|
| 359 |
+
try:
|
| 360 |
+
return json.loads(fence_match.group(1))
|
| 361 |
+
except json.JSONDecodeError:
|
| 362 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
+
# ββ Layer 2: direct parse βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 365 |
try:
|
| 366 |
return json.loads(text)
|
| 367 |
except json.JSONDecodeError:
|
| 368 |
pass
|
| 369 |
|
| 370 |
+
# ββ Layer 3: intent-anchored brace scan βββββββββββββββββββββββββββββββββββ
|
| 371 |
+
intent_pat = _re.compile(r'"intent"\s*:', _re.DOTALL)
|
| 372 |
+
brace_opens = [m.start() for m in _re.finditer(r'\{', text)]
|
| 373 |
+
brace_closes = [m.end() for m in _re.finditer(r'\}', text)]
|
| 374 |
+
|
| 375 |
+
for open_pos in brace_opens:
|
| 376 |
+
region = text[open_pos:]
|
| 377 |
+
if not intent_pat.search(region):
|
| 378 |
+
continue # no "intent": inside this brace
|
| 379 |
+
for close_pos in reversed(brace_closes):
|
| 380 |
+
if close_pos <= open_pos:
|
| 381 |
+
break
|
| 382 |
+
candidate = text[open_pos:close_pos]
|
| 383 |
+
try:
|
| 384 |
+
parsed = json.loads(candidate)
|
| 385 |
+
if "intent" in parsed:
|
| 386 |
+
log.info("JSON extracted via intent-anchored regex.")
|
| 387 |
+
return parsed
|
| 388 |
+
except json.JSONDecodeError:
|
| 389 |
+
continue
|
| 390 |
+
|
| 391 |
+
# ββ Layer 4: outermost { β¦ } fallback ββββββββββββββββββββββββββββββββββββ
|
| 392 |
+
broad = _re.search(r'\{.*\}', text, _re.DOTALL)
|
| 393 |
+
if broad:
|
| 394 |
try:
|
| 395 |
+
return json.loads(broad.group())
|
| 396 |
except json.JSONDecodeError:
|
| 397 |
pass
|
| 398 |
|
| 399 |
+
# ββ Layer 5: give up βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 400 |
+
log.warning("Could not parse JSON from model output. Raw: %.200s", raw)
|
| 401 |
return {
|
| 402 |
"intent": "questions",
|
| 403 |
+
"response": text or raw,
|
| 404 |
"stage_updates": [],
|
| 405 |
"thought": "",
|
| 406 |
"action": "answer",
|
|
|
|
| 673 |
port=7860,
|
| 674 |
log_level="info",
|
| 675 |
workers=1, # Single worker β shared in-memory model object
|
| 676 |
+
)
|