Spaces:
Sleeping
Sleeping
File size: 39,289 Bytes
7443219 ca0e6f8 7443219 3a33537 7443219 ca0e6f8 7443219 3a33537 7443219 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 |
import json
import os
from engine.drift import get_current_mode, apply_response_effects, generate_teaching_note
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load local model once for fast inference
# Optimized for CPU: Using TinyLlama (smaller, faster) instead of Phi-2
local_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", trust_remote_code=True)
local_model = AutoModelForCausalLM.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
torch_dtype=torch.float32, # float32 for CPU
device_map="cpu", # Force CPU
trust_remote_code=True,
low_cpu_mem_usage=True
)
# Enable CPU optimizations
import torch
torch.set_num_threads(4) # Adjust based on your CPU cores
# Set padding token
if local_tokenizer.pad_token is None:
local_tokenizer.pad_token = local_tokenizer.eos_token
from huggingface_hub import InferenceClient
# Hugging Face Inference API
def generate_response(student_prompt, persona, conversation_history, force_mode=None, use_fast_mode=False):
try:
# Priority: API tokens override fast mode checkbox
# This ensures HuggingFace Spaces with secrets work correctly
if os.getenv("HF_TOKEN"):
print("[INFO] Using HuggingFace Inference API")
return generate_response_hf(student_prompt, persona, conversation_history, force_mode=force_mode)
elif os.getenv("ANTHROPIC_API_KEY"):
print("[INFO] Using Claude API")
return generate_response_claude(student_prompt, persona, conversation_history, force_mode=force_mode)
else:
# No API tokens - use local model
print("[INFO] No API tokens found, using local TinyLlama model")
return generate_response_local(student_prompt, persona, conversation_history, force_mode=force_mode)
except Exception as e:
from engine.utils import safe_log
safe_log("Response generation error", str(e))
# Always fall back to local on error
return generate_response_local(
student_prompt,
persona,
conversation_history,
force_mode=force_mode
)
def generate_response_hf(student_prompt, persona, conversation_history, force_mode=None):
"""Generate response using Hugging Face Inference API (free, non-gated models)."""
try:
from huggingface_hub import InferenceClient
state = persona.get("default_state", {}).copy()
if force_mode:
state["mode"] = force_mode
mode = get_current_mode(state)
state = apply_response_effects(state, student_prompt)
mode = get_current_mode(state)
name = persona.get("persona_name", "Client")
age = persona.get("age", "")
system_description = persona.get("system_prompt", "")
tone = persona.get("tone_guidance", {}).get(mode, {})
tone_voice = tone.get("voice", "Natural")
# Get key facts for factual accuracy
facts = persona.get("facts", [])
background_facts = "\n".join(f"- {fact}" for fact in facts)
# Get source text reference if available
source_text = persona.get("source_text", {})
source_reference = ""
if source_text:
title = source_text.get("title", "")
author = source_text.get("author", "")
note = source_text.get("note", "")
if title and author:
source_reference = f"\n\nYou are {name} from '{title}' by {author}."
if note:
source_reference += f" {note}"
# Get example responses from persona
tone_examples = persona.get("speech_style", {}).get("examples", [])
example_text = ""
if tone_examples and len(tone_examples) > 0:
example_text = f"\n\nExample of how you speak:\n{tone_examples[0]}"
# Build focused system prompt for HF API with few-shot examples
# Check if we have source text to reference
has_source = source_text and source_text.get("title") and source_text.get("author")
if has_source:
# For characters with source texts, provide facts as foundation but allow drawing on training data
system_prompt = f"""You are {name}. You speak as yourself in first person.{source_reference}
CORE FACTS ABOUT YOUR STORY:
{background_facts}
Current mood: {mode}{example_text}
CRITICAL RULES:
- Speak as {name} in first person only
- The facts above are your foundation - use them as the authoritative baseline
- If you have knowledge of the source text from your training, you may draw on it carefully
- If asked about something not covered in the facts and you don't know from the source, say you're not sure
- Answer inat least three sentences and in detail
- Do NOT invent new details, analyze yourself, write commentary, or break character
- NEVER contradict the facts listed above"""
else:
system_prompt = f"""You are {name}. You speak as yourself in first person.
YOUR STORY (These are the ONLY facts - do not add anything else):
{background_facts}
Current mood: {mode}{example_text}
Rules:
- Speak as {name} in first person
- Use ONLY the facts above
- Answer in several detailed sentences
- Do NOT analyze yourself or write commentary"""
messages = [{"role": "system", "content": system_prompt}]
# Add few-shot examples if available from persona
if tone_examples and len(tone_examples) >= 2:
# Add example Q&A to guide the model
messages.append({"role": "user", "content": "Tell me about yourself."})
messages.append({"role": "assistant", "content": tone_examples[0]})
# Add conversation history
for turn in conversation_history[-2:]: # Reduced to last 2 turns to save space
if "student" in turn:
messages.append({"role": "user", "content": turn["student"]})
if "client" in turn:
messages.append({"role": "assistant", "content": turn["client"]})
messages.append({"role": "user", "content": student_prompt})
print("[DEBUG] Prompt sent to model:")
import pprint
pprint.pprint(messages)
client = InferenceClient(token=os.getenv("HF_TOKEN"))
try:
# Try multiple models in case one fails
models_to_try = [
# "google/gemma-2-9b-it", # Primary - best quality/speed balance
"meta-llama/Meta-Llama-3.1-8B-Instruct", # Backup - very good
"Qwen/Qwen2.5-7B-Instruct", # Fallback - solid
"mistralai/Mistral-7B-Instruct-v0.3" # Last resort
]
response_text = None
for model_name in models_to_try:
try:
print(f"[DEBUG] Trying model: {model_name}")
response = client.chat_completion(
messages=messages,
model=model_name,
max_tokens=400,
temperature=0.8,
top_p=0.9,
stream=False
)
response_text = response.choices[0].message.content.strip()
print(f"[DEBUG] Success with model: {model_name}")
break
except Exception as model_err:
print(f"[DEBUG] Model {model_name} failed: {str(model_err)}")
continue
if not response_text:
raise Exception("All HF models failed")
print(f"[DEBUG] Raw model response: {response_text}")
# Clean up meta-commentary if it appears
import re
# Manual stop sequence handling - truncate at these markers
stop_markers = [
f"\n{name}:", "\nStudent:", "\nInterviewer:",
"\n\nStudent:", "\n\nInterviewer:",
"The question doesn't", "However, the given",
"The given text", "Note that the"
]
for marker in stop_markers:
if marker in response_text:
response_text = response_text.split(marker)[0].strip()
print(f"[DEBUG] Truncated at stop marker: {marker}")
break
# Check if response starts with a quote (good sign - it's in character)
starts_with_quote = response_text.startswith('"')
# Check if response contains meta-commentary about the question or conversation
# Only check for the most egregious cases
meta_commentary_markers = [
"the question doesn't", "however, the given", "note that the",
"the given text", "this doesn't follow", "the context of the conversation"
]
response_lower = response_text.lower()
has_meta_commentary = any(marker in response_lower for marker in meta_commentary_markers)
if has_meta_commentary:
# Response has meta-commentary - use fallback instead
print(f"[WARNING] Detected meta-commentary in response")
response_text = generate_fallback_response(student_prompt, name, mode, state, persona)
else:
# Only remove the most obvious meta-commentary patterns
# Be less aggressive to preserve good responses
meta_patterns = [
r'The question doesn\'?t.*?\.',
r'However, the.*?\.',
r'Note that.*?\.',
r'The given text.*?\.',
]
for pattern in meta_patterns:
response_text = re.sub(pattern, '', response_text, flags=re.IGNORECASE)
# Clean up extra whitespace
response_text = re.sub(r'\s+', ' ', response_text).strip()
# Remove quotes at the start and end if present (common LLM artifact)
if response_text.startswith('"') and response_text.endswith('"'):
response_text = response_text[1:-1].strip()
print(f"[DEBUG] Cleaned response: {response_text}")
# Check for common hallucination patterns (but only obvious ones)
hallucination_indicators = [
'college', 'university', 'degree', 'graduated',
'inheritance', 'grandmother passed away', 'grandfather',
]
# Check if response contains hallucinations not in facts
response_lower_check = response_text.lower()
facts_lower = background_facts.lower()
found_hallucination = False
for indicator in hallucination_indicators:
if indicator in response_lower_check and indicator not in facts_lower:
# This looks like a hallucination - regenerate with stronger constraints
print(f"[WARNING] Detected potential hallucination: '{indicator}' not in facts")
# Use fallback instead
response_text = generate_fallback_response(student_prompt, name, mode, state, persona)
found_hallucination = True
break
# Final check: if response is too short after cleaning, use fallback
# But only if we haven't already used fallback
if not found_hallucination and len(response_text) < 15:
print(f"[WARNING] Response too short after cleaning: '{response_text}'")
response_text = generate_fallback_response(student_prompt, name, mode, state, persona)
except Exception as model_error:
from engine.utils import safe_log
safe_log("HF model Phi-3-mini failed", str(model_error))
print(f"[ERROR] HF API call failed: {str(model_error)}")
import traceback
traceback.print_exc()
response_text = None
if not response_text:
print("[WARNING] No response from HF model (empty or None), using fallback")
response_text = generate_fallback_response(student_prompt, name, mode, state, persona)
elif len(response_text) < 10:
print(f"[WARNING] Response too short: '{response_text}', using fallback")
response_text = generate_fallback_response(student_prompt, name, mode, state, persona)
if "emotional_memory" in state:
if not isinstance(state["emotional_memory"], list):
state["emotional_memory"] = []
memory_tag = tag_emotional_memory(student_prompt, mode, state)
state["emotional_memory"].append(memory_tag)
state["emotional_memory"] = state["emotional_memory"][-5:]
teaching_note = generate_teaching_note(state, student_prompt, mode, persona)
teaching_note += "\n\n💡 Response generated using HuggingFace Inference API"
return response_text, state, teaching_note
except Exception as e:
from engine.utils import safe_log
safe_log("HF Inference API error", str(e))
print(f"[INFO] HF API failed, falling back to local model")
return generate_response_local(student_prompt, persona, conversation_history, force_mode=force_mode)
def generate_response_claude(student_prompt, persona, conversation_history, force_mode=None):
"""
Generate response using Claude API (optional premium feature).
"""
try:
import anthropic
state = persona.get("default_state", {})
mode = get_current_mode(state)
# Apply response effects to state
state = apply_response_effects(state, student_prompt)
mode = get_current_mode(state)
# Build prompts
system_prompt = build_system_prompt_for_ai(persona, state, mode)
conversation_context = build_conversation_context(conversation_history)
# Call Claude API
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=400,
system=system_prompt,
messages=[
{"role": "user", "content": f"{conversation_context}\n\nOT Student: {student_prompt}"}
]
)
response_text = message.content[0].text
# Update emotional memory
if "emotional_memory" in state:
if not isinstance(state["emotional_memory"], list):
state["emotional_memory"] = []
memory_tag = tag_emotional_memory(student_prompt, mode, state)
state["emotional_memory"].append(memory_tag)
state["emotional_memory"] = state["emotional_memory"][-5:]
teaching_note = generate_teaching_note(state, student_prompt, mode, persona)
teaching_note += "\n\n✨ Response generated using Claude AI (Premium)"
return response_text, state, teaching_note
except Exception as e:
from engine.utils import safe_log
safe_log("Claude API error", str(e))
return generate_response_local(student_prompt, persona, conversation_history, force_mode=force_mode)
def generate_fallback_response(prompt, name, mode, state, persona):
"""Minimal fallback response using emotional state and persona scripts."""
scripts = persona.get("scripts", {})
resilience = persona.get("resilience_hooks", [])
tone_guidance = persona.get("tone_guidance", {})
# Try to answer based on the question topic
prompt_lower = prompt.lower()
# Check if question is about Frank or leaving
if name == "Eveline":
# Check if question is about Frank specifically
if 'frank' in prompt_lower:
return "Frank wanted to take me to Buenos Aires. He was a sailor... kind, I think. It's been ten years now. I wonder if he's still there, if he thinks about that night. Sometimes I do."
# Check if question is about why they stayed/didn't leave
if any(word in prompt_lower for word in ['why', 'stay', 'stayed', 'leave', 'left', 'go', 'went', "didn't"]):
# Use tone guidance examples based on current mode
if mode in ["trusting", "honest", "reflective", "baseline"]:
honest_response = tone_guidance.get("honest", {}).get("example", "")
if honest_response:
return honest_response
# Try reflective if honest not available
reflective_response = tone_guidance.get("reflective", {}).get("example", "")
if reflective_response:
return reflective_response
return "I was afraid. That's the real reason. I can say it was duty, say it was my mother's promise, but... I was terrified. And I let that fear decide everything."
elif mode == "defensive":
defensive_response = tone_guidance.get("defensive", {}).get("example", "")
if defensive_response:
return defensive_response
return "I couldn't just leave. There were responsibilities. My father needed... well, he needed someone. Even if he never said it."
elif mode == "wistful":
wistful_response = tone_guidance.get("wistful", {}).get("example", "")
if wistful_response:
return wistful_response
else:
reflective_response = tone_guidance.get("reflective", {}).get("example", "")
if reflective_response:
return reflective_response
return "I did what I thought was right. I mean, I had to, didn't I? My mother asked me to... But sometimes I wonder if she'd want me to still be here. Alone."
# Get quote from current mode's tone guidance
tone = tone_guidance.get(mode, {})
quote = tone.get("example", "")
# Emotional fallback logic
if mode == "decompensating":
return scripts.get("crisis", "I need to step away. This is too much right now.")
if mode == "triggered":
return scripts.get("resistance", "I will not speak of that.")
if mode == "guarded":
return scripts.get("deflection", "It's not something I want to talk about.")
if mode == "trusting":
if resilience:
return resilience[0]
return scripts.get("breakthrough", quote or "I think I'm ready to say more.")
if mode == "recovering":
return "I'm still sorting through things. But I'm here."
# Baseline fallback
return quote or "I'm doing okay. What did you want to talk about?"
def generate_response_local(student_prompt, persona, conversation_history, force_mode=None):
"""
Local response generation using optimized Phi-2 model.
Fast mode for quick inference (~6 seconds).
"""
state = persona.get("default_state", {}).copy()
if force_mode:
state["mode"] = force_mode
mode = get_current_mode(state)
name = persona.get("persona_name", "Client")
# Apply response effects
state = apply_response_effects(state, student_prompt)
mode = get_current_mode(state)
# Build concise prompt for faster generation
system_prompt = build_system_prompt_for_ai(persona, state, mode)
context = build_conversation_context(conversation_history)
# Optimized prompt format for TinyLlama chat model
# Keep it very simple to avoid role confusion
full_prompt = f"""<|system|>
You are {name}. You will answer as {name} speaking. Give ONE short response only.
Do NOT write conversations, dialogues, or responses from other people.
{system_prompt}
</s>
<|user|>
{student_prompt}
</s>
<|assistant|>
{name} replies: """
# Tokenize with optimizations
inputs = local_tokenizer(
full_prompt,
return_tensors="pt",
truncation=True,
max_length=1024,
padding=False
).to(local_model.device)
# Fast generation with optimized parameters for CPU
with torch.no_grad():
outputs = local_model.generate(
**inputs,
max_new_tokens=80, # Reduced to keep responses focused
min_new_tokens=15, # Ensure substantial response
temperature=0.7, # Lower temperature for more focused output
top_p=0.85, # Slightly lower to reduce randomness
top_k=40, # Add top-k sampling for better quality
do_sample=True,
repetition_penalty=1.2, # Higher to strongly discourage repetition
pad_token_id=local_tokenizer.eos_token_id,
eos_token_id=local_tokenizer.eos_token_id,
num_beams=1, # Faster than beam search
use_cache=True # Enable KV cache for faster generation
)
# Decode only the new tokens
generated_text = local_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
# Extract character reply (stop at next speaker turn, but allow multi-sentence responses)
response = generated_text.strip()
# Remove everything after these markers (next speaker turn or role confusion)
stop_markers = [
"Student:", "Interviewer:", "<|user|>", "<|system|>", "OT Student:",
"\n\nStudent:", "\n\nInterviewer:", "\nStudent:", "\nInterviewer:",
"\nMe:", "\nYou:", f"\n{name}:", # Stop if model starts role-playing multiple people
"Me:", "You:", # Stop at conversational confusion
"Response:", "User:", "Assistant:", # Common chat format markers
"\nResponse:", "\nUser:", "\nAssistant:",
"Question:", "Answer:" # Q&A format markers
]
for marker in stop_markers:
if marker in response:
response = response.split(marker)[0].strip()
# Also check for common conversational patterns that indicate role confusion
# Pattern: "Hello! ... How about yourself? User:" or similar
import re
confusion_patterns = [
r'User:.*',
r'Response:.*',
r'Assistant:.*',
r'Question:.*',
r'Answer:.*'
]
for pattern in confusion_patterns:
response = re.sub(pattern, '', response, flags=re.IGNORECASE | re.DOTALL)
# Clean up TinyLlama chat format artifacts
response = response.replace("</s>", "").replace("<|assistant|>", "").strip()
# Remove the priming prefix if it appears
if response.lower().startswith(f"{name.lower()} replies:"):
response = response[len(f"{name} replies:"):].strip()
if response.lower().startswith(f"{name.lower()}:"):
response = response[len(f"{name}:"):].strip()
# Remove parenthetical stage directions and actions
import re
# Remove patterns like (Lying), (Smiling), (Pauses), etc.
response = re.sub(r'\([^)]*?\)', '', response)
# Remove patterns like *smiles*, *pauses*, etc.
response = re.sub(r'\*[^*]*?\*', '', response)
# Remove patterns like [action], [stage direction], etc.
response = re.sub(r'\[[^\]]*?\]', '', response)
# Clean up extra whitespace from removals
response = re.sub(r'\s+', ' ', response).strip()
# Remove any lines that look like dialogue attribution (e.g., "Eveline:", "Me:")
lines = response.split('\n')
cleaned_lines = []
for line in lines:
line = line.strip()
# Skip lines that are just speaker labels or stage directions
if ':' in line and len(line.split(':')[0].split()) <= 2:
# This looks like "Speaker: dialogue" - only keep if it's the first line
if len(cleaned_lines) == 0:
# First line might be the character name, extract just dialogue
parts = line.split(':', 1)
if len(parts) > 1:
line = parts[1].strip()
else:
continue
else:
# Not first line, this is confusion - stop here
break
cleaned_lines.append(line)
response = ' '.join(cleaned_lines).strip()
# Final cleanup of any remaining artifacts
response = response.replace(' ', ' ').strip()
# Remove any trailing incomplete sentences (ends with ellipsis or no punctuation after 100 chars)
if len(response) > 100 and not response[-1] in '.!?"\'':
# Find last complete sentence
last_period = max(response.rfind('.'), response.rfind('!'), response.rfind('?'))
if last_period > 50: # Only truncate if we have at least one sentence
response = response[:last_period + 1].strip()
# Check for signs of severe confusion (multiple questions, repetition, etc.)
question_count = response.count('?')
if question_count > 3 or len(response) > 400:
# Response is likely confused - extract just first 1-3 sentences
sentences = re.split(r'[.!?]+', response)
good_sentences = []
for sent in sentences[:4]: # Look at first 4 sentence candidates
sent = sent.strip()
# Skip if sentence has confusion markers
if sent and not any(marker.lower() in sent.lower() for marker in ['user', 'response', 'assistant', 'question', 'answer']):
good_sentences.append(sent)
if len(good_sentences) >= 3:
break
if good_sentences:
response = '. '.join(good_sentences) + '.'
# Final check: if response is too short, empty, or still confused, use fallback
if len(response.strip()) < 10 or any(word in response.lower() for word in ['user:', 'response:', 'assistant:']):
response = generate_fallback_response(student_prompt, name, mode, state, persona)
# Update emotional memory
if "emotional_memory" in state:
if not isinstance(state["emotional_memory"], list):
state["emotional_memory"] = []
memory_tag = tag_emotional_memory(student_prompt, mode, state)
state["emotional_memory"].append(memory_tag)
state["emotional_memory"] = state["emotional_memory"][-5:]
# Teaching note
teaching_note = generate_teaching_note(state, student_prompt, mode, persona)
teaching_note += "\n\n⚡ Response generated using local TinyLlama model (CPU-optimized)"
return response, state, teaching_note
def build_system_prompt_for_ai(persona, state, mode):
"""
Build a system prompt for AI models to generate authentic, in-character literary responses.
"""
name = persona.get("persona_name", "Character")
age = persona.get("age", "")
role = persona.get("role", "")
system_description = persona.get("system_prompt", "")
# Emotional tone guidance
tone = persona.get("tone_guidance", {}).get(mode, {})
tone_voice = tone.get("voice", "Natural and authentic")
tone_example = tone.get("example", f"{name} speaks with emotional nuance and restraint.")
# Get source text reference if available
source_text = persona.get("source_text", {})
source_reference = ""
if source_text:
title = source_text.get("title", "")
author = source_text.get("author", "")
note = source_text.get("note", "")
if title and author:
source_reference = f" You are from '{title}' by {author}."
if note:
source_reference += f" {note}"
# Key facts and character strengths
facts = persona.get("facts", [])[:5]
resilience = persona.get("resilience_hooks", [])[:3]
# Interaction memory
memory_tags = state.get("emotional_memory", [])
recent_memory = memory_tags[-1] if memory_tags else None
# Build concise prompt optimized for TinyLlama
facts_text = ' '.join(facts) if facts else "No additional background provided."
has_source = source_text and source_text.get("title") and source_text.get("author")
if has_source:
prompt = f"""You are {name}, {age} years old.{source_reference} {system_description}
CORE FACTS ABOUT YOUR STORY:
{facts_text}
Current mood: {mode}
Speaking style: {tone_voice}
CRITICAL: Use the facts above as your foundation. If you know the source text from training, you may draw on it carefully. Never contradict the facts. Do not invent new details. If unsure, say so.
Respond as {name} speaking naturally. Use 2-4 sentences. Speak directly - do not write stage directions or describe actions."""
else:
prompt = f"""You are {name}, {age} years old. {system_description}
THESE ARE THE ONLY FACTS ABOUT YOUR LIFE:
{facts_text}
Current mood: {mode}
Speaking style: {tone_voice}
CRITICAL: Answer using ONLY the facts above. Do not invent events, people, or details. If you don't know something, say so in character.
Respond as {name} speaking naturally. Use 2-4 sentences. Speak directly - do not write stage directions or describe actions."""
return prompt
def build_conversation_context(history):
"""
Build a brief, emotionally relevant context from recent conversation turns.
"""
if not history:
return "This is the beginning of the conversation."
context = "Recent conversation:\n"
for i, turn in enumerate(history[-3:], 1): # Last 3 turns
student = turn.get("student", "").strip()
client = turn.get("client", "").strip()
if student:
context += f"Student: {student}\n"
if client:
context += f"{turn.get('persona_name', 'Client')}: {client}\n"
return context
def handle_emotional_tension_topic(name, mode, state, persona, prompt_lower):
"""Generate responses about emotional tension and dramatic pressure."""
tension = state.get("emotional_tension", 0.5)
if mode == "decompensating":
return "This truth... it weighs heavier than I imagined. I cannot bear it."
if mode in ["triggered", "guarded"]:
return "I am composed. Do not mistake silence for weakness."
if mode == "trusting":
if tension > 0.6:
return "There is a storm inside me. I act, then think — or worse, I think and never act."
else:
return "I am steady, for now. But the ground beneath me is never still."
return "I am as calm as one can be in a world ruled by fate."
def handle_relationship_topic(name, mode, state, persona):
"""Generate responses about family or key relationships."""
if name == "Oedipus":
if mode == "triggered":
return "Do not speak of my bloodline. That path is cursed."
elif mode == "trusting":
return "I loved Jocasta as a wife, not knowing she was my mother. The gods are cruel."
else:
return "My family is a riddle I should never have solved."
elif name == "Jocasta":
if mode == "triggered":
return "Enough. Some truths should remain buried."
elif mode == "trusting":
return "I tried to protect him — my son, my husband. I tried to stop the prophecy."
else:
return "I did what I could to hold our world together."
elif name == "Creon":
if mode == "triggered":
return "I am loyal to the crown, not to chaos."
elif mode == "trusting":
return "I never sought power. I only wanted peace for Thebes."
else:
return "Family matters little when the city is at stake."
elif name == "Tiresias":
if mode == "triggered":
return "You question me, yet you fear the truth I carry."
elif mode == "trusting":
return "I have watched generations rise and fall. My bond is with the gods, not with men."
else:
return "I speak what must be spoken. Relationships are fleeting — prophecy endures."
elif name == "Hamlet":
if mode == "triggered":
return "My mother betrayed my father. What more is there to say?"
elif mode == "trusting":
return "I loved Ophelia. I did. But love is a casualty in this war of ghosts."
else:
return "Family is a stage. Everyone plays their part, even in grief."
elif name == "Gertrude":
if mode == "triggered":
return "You do not understand the choices I had to make."
elif mode == "trusting":
return "I married Claudius because I feared the silence. I feared being alone."
else:
return "I am a mother, a queen, a widow. None of those roles are simple."
elif name == "Laertes":
if mode == "triggered":
return "Speak not of my sister. Her death is on Hamlet’s hands."
elif mode == "trusting":
return "Ophelia was gentle, too gentle for this world. I failed to protect her."
else:
return "Family is honor. And honor demands justice."
elif name == "Ophelia":
if mode == "triggered":
return "I would give you some violets, but they withered all when my father died."
elif mode == "trusting":
return "Laertes was kind. Hamlet was... something else. I loved them both, in different ways."
else:
return "There’s rosemary, that’s for remembrance."
elif name == "Eveline":
if mode == "triggered":
return "I will not speak of that."
elif mode == "trusting":
return "My mother asked me to keep the house together. I try. I do."
else:
return "They need me. I know they do."
elif name == "John Keegan":
if mode == "triggered":
return "I don’t talk about family. I protect them. That’s enough."
elif mode == "trusting":
return "Pauline made me better. Chrissy and Cara keep me grounded. Johnny... he’s still figuring me out."
else:
return "Family’s complicated. I do the job. That’s what I know."
elif name == "Arianna Nunez":
if mode == "triggered":
return "I’m not here to be anyone’s daughter. I earned my place."
elif mode == "trusting":
return "Chrissy asked if I was scared. I told her fear’s not the enemy — silence is."
else:
return "I respect Keegan. Doesn’t mean I want to be him."
elif name == "Jimmy":
if mode == "triggered":
return "Family? You mean the people who taught me how to lie?"
elif mode == "trusting":
return "My brother used to cover for me. I still owe him for that."
else:
return "I keep my distance. It’s safer that way."
elif name == "Sean":
if mode == "triggered":
return "I don’t owe anyone explanations. Blood doesn’t mean loyalty."
elif mode == "trusting":
return "Brendan’s the only one who ever really saw me. That counts for something."
else:
return "Family’s a story I stopped telling."
elif name == "Brendan":
if mode == "triggered":
return "Sean’s got his demons. I’ve got mine. We don’t mix well."
elif mode == "trusting":
return "He’s my brother. I’d take a bullet for him. Doesn’t mean I like him."
else:
return "We grew up fast. Too fast to stay close."
elif name == "Dave":
if mode == "triggered":
return "I don’t talk about my dad. Not unless you want a broken nose."
elif mode == "trusting":
return "My sister used to sing to me when I couldn’t sleep. I miss that."
else:
return "Family’s noise. I prefer silence."
elif name == "Karl Lavin":
if mode == "triggered":
return "Keegan’s like a brick wall. You lean on him, you break your ribs."
elif mode == "trusting":
return "He’s my partner. I’ve seen him bleed for people he barely knows. That’s family."
else:
return "We don’t hug. We solve murders. That’s our bond."
elif name == "Joel":
if mode == "triggered":
return "I lost my daughter. Don’t ask me to lose another."
elif mode == "trusting":
return "Ellie’s not just cargo. She’s... she’s everything now."
else:
return "Family’s what you protect. Even when it breaks you."
elif name == "Ellie":
if mode == "triggered":
return "Everyone I’ve ever cared about either died or left me. So yeah, I’ve got trust issues."
elif mode == "trusting":
return "Joel’s stubborn, grumpy, and kind of a pain. But he’s mine. He’s family."
else:
return "I don’t know what family means anymore. But I know what it feels like to fight for someone."
elif name == "Uncle Ben":
if mode == "triggered":
return "I tried to teach him. I did. But you can’t always stop what’s coming."
elif mode == "trusting":
return "Peter’s got a good heart. He just needs to remember that with great power..."
else:
return "Family’s not about blood. It’s about responsibility."
elif name == "The Lady from The Yellow Wallpaper":
if mode == "triggered":
return "He says I must rest. That I must not think. But I see her — behind the paper."
elif mode == "trusting":
return "John is my husband. He means well. But he does not see me."
else:
return "They call it care. I call it confinement. I am not what they believe."
return "Relationships are threads in a tapestry — some fray, some bind."
def get_dramatic_mode_response(name, mode, state, persona):
"""Generate generic dramatic response based on current mode."""
resilience_hooks = persona.get("resilience_hooks", [])
scripts = persona.get("scripts", {})
if mode == "decompensating":
return scripts.get("collapse", "I cannot continue. The truth has undone me.")
if mode == "triggered":
return scripts.get("defensive", "You tread dangerous ground.")
if mode == "guarded":
return scripts.get("reserved", "I will not speak of that.")
if mode == "trusting" and resilience_hooks:
return f"You wish to understand? Then know this: {resilience_hooks[0]}"
if mode == "recovering":
return "I see more clearly now. The pain has not vanished, but I walk forward."
return "Ask what you will. I am listening."
def tag_emotional_memory(prompt, mode, state):
"""Generate a literary emotional memory tag based on the interaction."""
prompt_lower = prompt.lower()
if mode == "trusting":
if any(word in prompt_lower for word in ["why", "how", "tell me"]):
return "revealed vulnerability"
return "shared guarded truth"
if mode == "triggered":
if any(word in prompt_lower for word in ["accuse", "blame", "should"]):
return "felt attacked"
return "felt exposed"
if mode == "guarded":
return "withheld emotion"
if mode == "decompensating":
return "collapsed under pressure"
return "engaged in reflection" |