Spaces:
Running
Running
Upload 5 files
Browse files- app.py +617 -0
- conversation_test_fixtures_v8.jsonl +51 -0
- conversation_test_fixtures_v9.jsonl +50 -0
- evaluate.py +300 -0
- nlu_training_examples.jsonl +23 -0
app.py
ADDED
|
@@ -0,0 +1,617 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import shutil
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import tempfile
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import List, Dict, Any, Optional
|
| 8 |
+
from pytube import YouTube
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import re
|
| 11 |
+
import pandas as pd
|
| 12 |
+
|
| 13 |
+
# --- Agent Imports ---
|
| 14 |
+
try:
|
| 15 |
+
from alz_companion.agent import (
|
| 16 |
+
bootstrap_vectorstore, make_rag_chain, answer_query, synthesize_tts,
|
| 17 |
+
transcribe_audio, detect_tags_from_query, describe_image, build_or_load_vectorstore,
|
| 18 |
+
_default_embeddings, route_query_type, call_llm
|
| 19 |
+
)
|
| 20 |
+
from alz_companion.prompts import (
|
| 21 |
+
BEHAVIOUR_TAGS, EMOTION_STYLES, FAITHFULNESS_JUDGE_PROMPT
|
| 22 |
+
)
|
| 23 |
+
from langchain.schema import Document
|
| 24 |
+
from langchain_community.vectorstores import FAISS
|
| 25 |
+
AGENT_OK = True
|
| 26 |
+
except Exception as e:
|
| 27 |
+
AGENT_OK = False
|
| 28 |
+
class Document:
|
| 29 |
+
def __init__(self, page_content, metadata): self.page_content, self.metadata = page_content, metadata
|
| 30 |
+
class FAISS:
|
| 31 |
+
def __init__(self):
|
| 32 |
+
self.docstore = type('obj', (object,), {'_dict': {}})()
|
| 33 |
+
def add_documents(self, docs):
|
| 34 |
+
start_idx = len(self.docstore._dict)
|
| 35 |
+
for i, d in enumerate(docs, start_idx):
|
| 36 |
+
self.docstore._dict[i] = d
|
| 37 |
+
def save_local(self, path): pass
|
| 38 |
+
@classmethod
|
| 39 |
+
def from_documents(cls, docs, embeddings=None):
|
| 40 |
+
inst = cls()
|
| 41 |
+
inst.add_documents(docs)
|
| 42 |
+
return inst
|
| 43 |
+
def build_or_load_vectorstore(docs, index_path, is_personal=False): return FAISS.from_documents(docs or [], embeddings=None)
|
| 44 |
+
def bootstrap_vectorstore(sample_paths=None, index_path="data/"): return object()
|
| 45 |
+
def make_rag_chain(vs_general, vs_personal, **kwargs): return lambda q, **k: {"answer": f"(Demo) You asked: {q}", "sources": []}
|
| 46 |
+
def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
|
| 47 |
+
def synthesize_tts(text: str, lang: str = "en"): return None
|
| 48 |
+
def transcribe_audio(filepath: str, lang: str = "en"): return "This is a transcribed message."
|
| 49 |
+
def detect_tags_from_query(*args, **kwargs): return {"detected_behavior": "None", "detected_emotion": "None"}
|
| 50 |
+
def describe_image(image_path: str): return "This is a description of an image."
|
| 51 |
+
def _default_embeddings(): return None
|
| 52 |
+
def route_query_type(query: str): return "general_conversation"
|
| 53 |
+
def call_llm(messages, **kwargs): return "Cannot call LLM in fallback mode."
|
| 54 |
+
BEHAVIOUR_TAGS, EMOTION_STYLES, FAITHFULNESS_JUDGE_PROMPT = {"None": []}, {"None": {}}, ""
|
| 55 |
+
print(f"WARNING: Could not import from alz_companion ({e}). Running in UI-only demo mode.")
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# --- NEW: Import for Evaluation Logic ---
|
| 59 |
+
try:
|
| 60 |
+
from evaluate import load_test_fixtures, run_comprehensive_evaluation
|
| 61 |
+
except ImportError:
|
| 62 |
+
# Fallback if evaluate.py is not found
|
| 63 |
+
def load_test_fixtures(): print("WARNING: evaluate.py not found.")
|
| 64 |
+
def run_comprehensive_evaluation(*args, **kwargs): return "Evaluation module not found.", []
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# --- Centralized Configuration ---
|
| 68 |
+
CONFIG = {
|
| 69 |
+
"themes": ["All", "The Father", "Still Alice", "Away from Her", "Alive Inside", "General Caregiving"],
|
| 70 |
+
"roles": ["patient", "caregiver"],
|
| 71 |
+
"disease_stages": ["Default: Mild Stage", "Moderate Stage", "Advanced Stage"],
|
| 72 |
+
"behavior_tags": ["None"] + list(BEHAVIOUR_TAGS.keys()),
|
| 73 |
+
"emotion_tags": ["None"] + list(EMOTION_STYLES.keys()),
|
| 74 |
+
"topic_tags": ["None", "caregiving_advice", "medical_fact", "personal_story", "research_update", "treatment_option:home_safety", "treatment_option:long_term_care", "treatment_option:music_therapy", "treatment_option:reassurance", "treatment_option:routine_structuring", "treatment_option:validation_therapy"],
|
| 75 |
+
"context_tags": ["None", "disease_stage_mild", "disease_stage_moderate", "disease_stage_advanced", "disease_stage_unspecified", "interaction_mode_one_to_one", "interaction_mode_small_group", "interaction_mode_group_activity", "relationship_family", "relationship_spouse", "relationship_staff_or_caregiver", "relationship_unspecified", "setting_home_or_community", "setting_care_home", "setting_clinic_or_hospital"],
|
| 76 |
+
"languages": {"English": "en", "Chinese": "zh", "Cantonese": "zh-yue", "Korean": "ko", "Japanese": "ja", "Malay": "ms", "French": "fr", "Spanish": "es", "Hindi": "hi", "Arabic": "ar"},
|
| 77 |
+
"tones": ["warm", "empathetic", "caring", "reassuring", "calm", "optimistic", "motivating", "neutral", "formal", "humorous"]
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
# --- File Management & Vector Store Logic ---
|
| 81 |
+
def _storage_root() -> Path:
|
| 82 |
+
for p in [Path(os.getenv("SPACE_STORAGE", "")), Path("/data"), Path.home() / ".cache" / "alz_companion"]:
|
| 83 |
+
if not p: continue
|
| 84 |
+
try:
|
| 85 |
+
p.mkdir(parents=True, exist_ok=True)
|
| 86 |
+
(p / ".write_test").write_text("ok")
|
| 87 |
+
(p / ".write_test").unlink(missing_ok=True)
|
| 88 |
+
return p
|
| 89 |
+
except Exception: continue
|
| 90 |
+
tmp = Path(tempfile.gettempdir()) / "alz_companion"
|
| 91 |
+
tmp.mkdir(parents=True, exist_ok=True)
|
| 92 |
+
return tmp
|
| 93 |
+
STORAGE_ROOT = _storage_root()
|
| 94 |
+
INDEX_BASE = STORAGE_ROOT / "index"
|
| 95 |
+
PERSONAL_DATA_BASE = STORAGE_ROOT / "personal"
|
| 96 |
+
UPLOADS_BASE = INDEX_BASE / "uploads"
|
| 97 |
+
PERSONAL_INDEX_PATH = str(PERSONAL_DATA_BASE / "personal_faiss_index")
|
| 98 |
+
NLU_EXAMPLES_INDEX_PATH = str(INDEX_BASE / "nlu_examples_faiss_index")
|
| 99 |
+
THEME_PATHS = {t: str(INDEX_BASE / f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
|
| 100 |
+
os.makedirs(UPLOADS_BASE, exist_ok=True)
|
| 101 |
+
os.makedirs(PERSONAL_DATA_BASE, exist_ok=True)
|
| 102 |
+
for p in THEME_PATHS.values(): os.makedirs(p, exist_ok=True)
|
| 103 |
+
vectorstores = {}
|
| 104 |
+
personal_vectorstore = None
|
| 105 |
+
nlu_vectorstore = None
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
|
| 109 |
+
except Exception:
|
| 110 |
+
personal_vectorstore = None
|
| 111 |
+
def bootstrap_nlu_vectorstore(example_file: str, index_path: str) -> FAISS:
|
| 112 |
+
if not os.path.exists(example_file):
|
| 113 |
+
print(f"WARNING: NLU example file not found at {example_file}. NLU will be less accurate.")
|
| 114 |
+
return build_or_load_vectorstore([], index_path)
|
| 115 |
+
docs = []
|
| 116 |
+
with open(example_file, "r", encoding="utf-8") as f:
|
| 117 |
+
for line in f:
|
| 118 |
+
try:
|
| 119 |
+
data = json.loads(line)
|
| 120 |
+
doc = Document(page_content=data["query"], metadata=data)
|
| 121 |
+
docs.append(doc)
|
| 122 |
+
except (json.JSONDecodeError, KeyError): continue
|
| 123 |
+
print(f"Found and loaded {len(docs)} NLU training examples.")
|
| 124 |
+
if os.path.exists(index_path): shutil.rmtree(index_path)
|
| 125 |
+
return build_or_load_vectorstore(docs, index_path)
|
| 126 |
+
def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
|
| 127 |
+
def theme_upload_dir(theme: str) -> str:
|
| 128 |
+
p = UPLOADS_BASE / f"theme_{canonical_theme(theme).replace(' ', '').lower()}"
|
| 129 |
+
p.mkdir(exist_ok=True)
|
| 130 |
+
return str(p)
|
| 131 |
+
def load_manifest(theme: str) -> Dict[str, Any]:
|
| 132 |
+
p = os.path.join(theme_upload_dir(theme), "manifest.json")
|
| 133 |
+
if os.path.exists(p):
|
| 134 |
+
try:
|
| 135 |
+
with open(p, "r", encoding="utf-8") as f: return json.load(f)
|
| 136 |
+
except Exception: pass
|
| 137 |
+
return {"files": {}}
|
| 138 |
+
def save_manifest(theme: str, man: Dict[str, Any]):
|
| 139 |
+
with open(os.path.join(theme_upload_dir(theme), "manifest.json"), "w", encoding="utf-8") as f: json.dump(man, f, indent=2)
|
| 140 |
+
def list_theme_files(theme: str) -> List[tuple[str, bool]]:
|
| 141 |
+
man = load_manifest(theme)
|
| 142 |
+
base = theme_upload_dir(theme)
|
| 143 |
+
found = [(n, bool(e)) for n, e in man.get("files", {}).items() if os.path.exists(os.path.join(base, n))]
|
| 144 |
+
existing = {n for n, e in found}
|
| 145 |
+
for name in sorted(os.listdir(base)):
|
| 146 |
+
if name not in existing and os.path.isfile(os.path.join(base, name)): found.append((name, False))
|
| 147 |
+
man["files"] = dict(found)
|
| 148 |
+
save_manifest(theme, man)
|
| 149 |
+
return found
|
| 150 |
+
def copy_into_theme(theme: str, src_path: str) -> str:
|
| 151 |
+
fname = os.path.basename(src_path)
|
| 152 |
+
dest = os.path.join(theme_upload_dir(theme), fname)
|
| 153 |
+
shutil.copy2(src_path, dest)
|
| 154 |
+
return dest
|
| 155 |
+
def seed_files_into_theme(theme: str):
|
| 156 |
+
SEED_FILES = [("sample_data/caregiving_tips.txt", True), ("sample_data/the_father_segments_enriched_harmonized_plus.jsonl", True), ("sample_data/still_alice_enriched_harmonized_plus.jsonl", True), ("sample_data/away_from_her_enriched_harmonized_plus.jsonl", True), ("sample_data/alive_inside_enriched_harmonized.jsonl", True)]
|
| 157 |
+
man, changed = load_manifest(theme), False
|
| 158 |
+
for path, enable in SEED_FILES:
|
| 159 |
+
if not os.path.exists(path): continue
|
| 160 |
+
fname = os.path.basename(path)
|
| 161 |
+
if not os.path.exists(os.path.join(theme_upload_dir(theme), fname)):
|
| 162 |
+
copy_into_theme(theme, path)
|
| 163 |
+
man["files"][fname] = bool(enable)
|
| 164 |
+
changed = True
|
| 165 |
+
if changed: save_manifest(theme, man)
|
| 166 |
+
def ensure_index(theme='All'):
|
| 167 |
+
theme = canonical_theme(theme)
|
| 168 |
+
if theme in vectorstores: return vectorstores[theme]
|
| 169 |
+
upload_dir = theme_upload_dir(theme)
|
| 170 |
+
enabled_files = [os.path.join(upload_dir, n) for n, enabled in list_theme_files(theme) if enabled]
|
| 171 |
+
index_path = THEME_PATHS.get(theme)
|
| 172 |
+
vectorstores[theme] = bootstrap_vectorstore(sample_paths=enabled_files, index_path=index_path)
|
| 173 |
+
return vectorstores[theme]
|
| 174 |
+
|
| 175 |
+
# --- Gradio Callbacks ---
|
| 176 |
+
# In app.py, modify the collect_settings function
|
| 177 |
+
|
| 178 |
+
def collect_settings(*args):
|
| 179 |
+
keys = ["role", "patient_name", "caregiver_name", "tone", "language", "tts_lang", "temperature",
|
| 180 |
+
# --- ADD "disease_stage" to this list ---
|
| 181 |
+
"disease_stage",
|
| 182 |
+
"behaviour_tag", "emotion_tag", "topic_tag", "active_theme", "tts_on", "debug_mode"]
|
| 183 |
+
return dict(zip(keys, args))
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
# In app.py, replace the entire parse_and_tag_entries function.
|
| 187 |
+
def parse_and_tag_entries(text_content: str, source: str, settings: dict = None) -> List[Document]:
|
| 188 |
+
docs_to_add = []
|
| 189 |
+
# This logic correctly handles both simple text and complex journal entries
|
| 190 |
+
entries = re.split(r'\n(?:---|--|-|-\*-|-\.-)\n', text_content)
|
| 191 |
+
if len(entries) == 1 and "title:" not in entries[0].lower() and "content:" not in entries[0].lower():
|
| 192 |
+
entries = [text_content] # Treat simple text as a single entry
|
| 193 |
+
|
| 194 |
+
for entry in entries:
|
| 195 |
+
if not entry.strip(): continue
|
| 196 |
+
|
| 197 |
+
lines = entry.strip().split('\n')
|
| 198 |
+
title_line = lines[0].split(':', 1)
|
| 199 |
+
title = title_line[1].strip() if len(title_line) > 1 and "title:" in lines[0].lower() else "Untitled Text Entry"
|
| 200 |
+
content_part = "\n".join(lines[1:])
|
| 201 |
+
content = content_part.split(':', 1)[1].strip() if "content:" in content_part.lower() else content_part.strip() or entry.strip()
|
| 202 |
+
|
| 203 |
+
if not content: continue
|
| 204 |
+
|
| 205 |
+
full_content = f"Title: {title}\n\nContent: {content}"
|
| 206 |
+
|
| 207 |
+
detected_tags = detect_tags_from_query(
|
| 208 |
+
content, nlu_vectorstore=nlu_vectorstore,
|
| 209 |
+
behavior_options=CONFIG["behavior_tags"], emotion_options=CONFIG["emotion_tags"],
|
| 210 |
+
topic_options=CONFIG["topic_tags"], context_options=CONFIG["context_tags"],
|
| 211 |
+
settings=settings
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
metadata = {"source": source, "title": title}
|
| 215 |
+
|
| 216 |
+
# --- START: CORRECTED METADATA ASSIGNMENT ---
|
| 217 |
+
if detected_tags.get("detected_behaviors"):
|
| 218 |
+
metadata["behaviors"] = [b.lower() for b in detected_tags["detected_behaviors"]]
|
| 219 |
+
detected_emotion = detected_tags.get("detected_emotion")
|
| 220 |
+
if detected_emotion and detected_emotion != "None":
|
| 221 |
+
metadata["emotion"] = detected_emotion.lower()
|
| 222 |
+
|
| 223 |
+
# Correctly handle the plural "detected_topics" key and list value
|
| 224 |
+
detected_topics = detected_tags.get("detected_topics")
|
| 225 |
+
if detected_topics:
|
| 226 |
+
metadata["topic_tags"] = [t.lower() for t in detected_topics]
|
| 227 |
+
|
| 228 |
+
if detected_tags.get("detected_contexts"):
|
| 229 |
+
metadata["context_tags"] = [c.lower() for c in detected_tags["detected_contexts"]]
|
| 230 |
+
# --- END: CORRECTED METADATA ASSIGNMENT ---
|
| 231 |
+
|
| 232 |
+
docs_to_add.append(Document(page_content=full_content, metadata=metadata))
|
| 233 |
+
|
| 234 |
+
return docs_to_add
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, settings):
|
| 238 |
+
global personal_vectorstore
|
| 239 |
+
docs_to_add = []
|
| 240 |
+
source, content = "Unknown", ""
|
| 241 |
+
if text_input and text_input.strip():
|
| 242 |
+
source, content = "Text Input", f"Title: {title or 'Untitled'}\n\nContent: {text_input}"
|
| 243 |
+
elif file_input:
|
| 244 |
+
source = os.path.basename(file_input.name)
|
| 245 |
+
if file_input.name.lower().endswith('.txt'):
|
| 246 |
+
with open(file_input.name, 'r', encoding='utf-8') as f: content = f.read()
|
| 247 |
+
else:
|
| 248 |
+
transcribed = transcribe_audio(file_input.name)
|
| 249 |
+
content = f"Title: {title or 'Audio/Video Note'}\n\nContent: {transcribed}"
|
| 250 |
+
elif image_input:
|
| 251 |
+
source, description = "Image Input", describe_image(image_input)
|
| 252 |
+
content = f"Title: {title or 'Image Note'}\n\nContent: {description}"
|
| 253 |
+
elif yt_url and ("youtube.com" in yt_url or "youtu.be" in yt_url):
|
| 254 |
+
try:
|
| 255 |
+
yt = YouTube(yt_url)
|
| 256 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
|
| 257 |
+
yt.streams.get_audio_only().download(filename=temp_audio_file.name)
|
| 258 |
+
transcribed = transcribe_audio(temp_audio_file.name)
|
| 259 |
+
os.remove(temp_audio_file.name)
|
| 260 |
+
source, content = f"YouTube: {yt.title}", f"Title: {title or yt.title}\n\nContent: {transcribed}"
|
| 261 |
+
except Exception as e:
|
| 262 |
+
return f"Error processing YouTube link: {e}"
|
| 263 |
+
else:
|
| 264 |
+
return "Please provide content to add."
|
| 265 |
+
if content:
|
| 266 |
+
docs_to_add = parse_and_tag_entries(content, source, settings=settings)
|
| 267 |
+
if not docs_to_add: return "No processable content found to add."
|
| 268 |
+
if personal_vectorstore is None:
|
| 269 |
+
personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
|
| 270 |
+
else:
|
| 271 |
+
personal_vectorstore.add_documents(docs_to_add)
|
| 272 |
+
personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
|
| 273 |
+
return f"Successfully added {len(docs_to_add)} new memory/memories."
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# REPLACE your entire old chat_fn function with this new one
|
| 277 |
+
|
| 278 |
+
def chat_fn(user_text, audio_file, settings, chat_history):
|
| 279 |
+
global personal_vectorstore
|
| 280 |
+
question = (user_text or "").strip()
|
| 281 |
+
if audio_file and not question:
|
| 282 |
+
try:
|
| 283 |
+
question = transcribe_audio(audio_file, lang=CONFIG["languages"].get(settings.get("tts_lang", "English"), "en"))
|
| 284 |
+
except Exception as e:
|
| 285 |
+
err_msg = f"Audio Error: {e}" if settings.get("debug_mode") else "Sorry, I couldn't understand the audio."
|
| 286 |
+
chat_history.append({"role": "assistant", "content": err_msg})
|
| 287 |
+
return "", None, chat_history
|
| 288 |
+
|
| 289 |
+
if not question:
|
| 290 |
+
return "", None, chat_history
|
| 291 |
+
|
| 292 |
+
# --- START FIX 1: Correctly process the incoming chat_history (list of dicts) ---
|
| 293 |
+
# The incoming chat_history is already in the desired format for the API,
|
| 294 |
+
# we just need to filter out our special system messages (like sources).
|
| 295 |
+
api_chat_history = [
|
| 296 |
+
msg for msg in chat_history
|
| 297 |
+
if msg.get("content") and not msg["content"].strip().startswith("*(")
|
| 298 |
+
]
|
| 299 |
+
|
| 300 |
+
# Append the new user question to the history that will be displayed in the UI
|
| 301 |
+
chat_history.append({"role": "user", "content": question})
|
| 302 |
+
# --- END FIX 1 ---
|
| 303 |
+
|
| 304 |
+
query_type = route_query_type(question)
|
| 305 |
+
final_tags = { "scenario_tag": None, "emotion_tag": None, "topic_tag": None, "context_tags": [] }
|
| 306 |
+
manual_behavior = settings.get("behaviour_tag", "None")
|
| 307 |
+
manual_emotion = settings.get("emotion_tag", "None")
|
| 308 |
+
manual_topic = settings.get("topic_tag", "None")
|
| 309 |
+
|
| 310 |
+
auto_detected_context = ""
|
| 311 |
+
if not all(m == "None" for m in [manual_behavior, manual_emotion, manual_topic]):
|
| 312 |
+
final_tags["scenario_tag"] = manual_behavior if manual_behavior != "None" else None
|
| 313 |
+
final_tags["emotion_tag"] = manual_emotion if manual_emotion != "None" else None
|
| 314 |
+
final_tags["topic_tag"] = manual_topic if manual_topic != "None" else None
|
| 315 |
+
elif "caregiving_scenario" in query_type:
|
| 316 |
+
detected_tags = detect_tags_from_query(
|
| 317 |
+
question, nlu_vectorstore=nlu_vectorstore, behavior_options=CONFIG["behavior_tags"],
|
| 318 |
+
emotion_options=CONFIG["emotion_tags"], topic_options=CONFIG["topic_tags"],
|
| 319 |
+
context_options=CONFIG["context_tags"], settings=settings)
|
| 320 |
+
behaviors = detected_tags.get("detected_behaviors")
|
| 321 |
+
final_tags["scenario_tag"] = behaviors[0] if behaviors else None
|
| 322 |
+
final_tags["emotion_tag"] = detected_tags.get("detected_emotion")
|
| 323 |
+
final_tags["topic_tag"] = detected_tags.get("detected_topic")
|
| 324 |
+
final_tags["context_tags"] = detected_tags.get("detected_contexts", [])
|
| 325 |
+
detected_parts = [f"{k.split('_')[1]}=`{v}`" for k, v in final_tags.items() if v and v != "None" and v != []]
|
| 326 |
+
if detected_parts:
|
| 327 |
+
auto_detected_context = f"*(Auto-detected context: {', '.join(detected_parts)})*"
|
| 328 |
+
|
| 329 |
+
vs_general = ensure_index(settings.get("active_theme", "All"))
|
| 330 |
+
if personal_vectorstore is None:
|
| 331 |
+
personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
|
| 332 |
+
|
| 333 |
+
# OLD rag_settings = {k: settings.get(k) for k in ["role", "temperature", "language", "patient_name", "caregiver_name", "tone"]}
|
| 334 |
+
# NEW add "disease_stage"
|
| 335 |
+
rag_settings = {k: settings.get(k) for k in ["role", "temperature", "language", "patient_name", "caregiver_name", "tone", "disease_stage"]}
|
| 336 |
+
chain = make_rag_chain(vs_general, personal_vectorstore, **rag_settings)
|
| 337 |
+
|
| 338 |
+
response = answer_query(chain, question, query_type=query_type, chat_history=api_chat_history, **final_tags)
|
| 339 |
+
answer = response.get("answer", "[No answer found]")
|
| 340 |
+
|
| 341 |
+
# --- START FIX 2: Append new messages directly in the correct dictionary format ---
|
| 342 |
+
# 1. The main answer is appended first to appear at the top of the new messages.
|
| 343 |
+
chat_history.append({"role": "assistant", "content": answer})
|
| 344 |
+
|
| 345 |
+
# 2. Then, append the supplementary context and source information below the answer.
|
| 346 |
+
if auto_detected_context:
|
| 347 |
+
chat_history.append({"role": "assistant", "content": auto_detected_context})
|
| 348 |
+
if response.get("sources"):
|
| 349 |
+
chat_history.append({"role": "assistant", "content": f"*(Sources used: {', '.join(response['sources'])})*"})
|
| 350 |
+
|
| 351 |
+
# chat_history.append({"role": "assistant", "content": answer})
|
| 352 |
+
# --- END FIX 2 ---
|
| 353 |
+
|
| 354 |
+
audio_out = None
|
| 355 |
+
if settings.get("tts_on") and answer:
|
| 356 |
+
audio_out = synthesize_tts(answer, lang=CONFIG["languages"].get(settings.get("tts_lang"), "en"))
|
| 357 |
+
|
| 358 |
+
# --- START FIX 3: The chat_history is now already in the correct format to be returned ---
|
| 359 |
+
return "", gr.update(value=audio_out, visible=bool(audio_out)), chat_history
|
| 360 |
+
# --- END FIX 3 ---
|
| 361 |
+
|
| 362 |
+
# The save_chat_to_memory function incorrectly assumes the history is
|
| 363 |
+
# a list of tuples, like [(True, "..."), (False, "...")]
|
| 364 |
+
# However, The chat_fn function correctly builds the chat_history as
|
| 365 |
+
# a list of dictionaries, like this:
|
| 366 |
+
# [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
|
| 367 |
+
# To correctly parse the list of dictionaries.
|
| 368 |
+
def save_chat_to_memory(chat_history):
|
| 369 |
+
if not chat_history:
|
| 370 |
+
return "Nothing to save."
|
| 371 |
+
|
| 372 |
+
# --- START: MODIFIED LOGIC ---
|
| 373 |
+
# Correctly processes the list of dictionaries from the chatbot
|
| 374 |
+
formatted_chat = [
|
| 375 |
+
f"{msg.get('role', 'assistant').capitalize()}: {msg.get('content', '').strip()}"
|
| 376 |
+
for msg in chat_history
|
| 377 |
+
if isinstance(msg, dict) and msg.get('content') and not msg.get('content', '').strip().startswith("*(")
|
| 378 |
+
]
|
| 379 |
+
# --- END: MODIFIED LOGIC ---
|
| 380 |
+
|
| 381 |
+
if not formatted_chat:
|
| 382 |
+
return "No conversation to save."
|
| 383 |
+
|
| 384 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 385 |
+
title = f"Conversation from {timestamp}"
|
| 386 |
+
full_content = f"Title: {title}\n\nContent:\n" + "\n".join(formatted_chat)
|
| 387 |
+
doc = Document(page_content=full_content, metadata={"source": "Saved Chat", "title": title})
|
| 388 |
+
|
| 389 |
+
global personal_vectorstore
|
| 390 |
+
if personal_vectorstore is None:
|
| 391 |
+
personal_vectorstore = build_or_load_vectorstore([doc], PERSONAL_INDEX_PATH, is_personal=True)
|
| 392 |
+
else:
|
| 393 |
+
personal_vectorstore.add_documents([doc])
|
| 394 |
+
|
| 395 |
+
personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
|
| 396 |
+
return f"Conversation from {timestamp} saved."
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def list_personal_memories():
|
| 400 |
+
global personal_vectorstore
|
| 401 |
+
if personal_vectorstore is None or not hasattr(personal_vectorstore.docstore, '_dict') or not personal_vectorstore.docstore._dict:
|
| 402 |
+
return gr.update(value=[["No memories", "", ""]]), gr.update(choices=[], value=None)
|
| 403 |
+
docs = list(personal_vectorstore.docstore._dict.values())
|
| 404 |
+
return gr.update(value=[[d.metadata.get('title', '...'), d.metadata.get('source', '...'), d.page_content] for d in docs]), gr.update(choices=[d.page_content for d in docs])
|
| 405 |
+
def delete_personal_memory(memory_to_delete):
|
| 406 |
+
global personal_vectorstore
|
| 407 |
+
if personal_vectorstore is None or not memory_to_delete: return "No memory selected."
|
| 408 |
+
all_docs = list(personal_vectorstore.docstore._dict.values())
|
| 409 |
+
docs_to_keep = [d for d in all_docs if d.page_content != memory_to_delete]
|
| 410 |
+
if len(all_docs) == len(docs_to_keep): return "Error: Could not find memory."
|
| 411 |
+
if not docs_to_keep:
|
| 412 |
+
if os.path.isdir(PERSONAL_INDEX_PATH): shutil.rmtree(PERSONAL_INDEX_PATH)
|
| 413 |
+
personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
|
| 414 |
+
else:
|
| 415 |
+
new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
|
| 416 |
+
new_vs.save_local(PERSONAL_INDEX_PATH)
|
| 417 |
+
personal_vectorstore = new_vs
|
| 418 |
+
return "Successfully deleted memory."
|
| 419 |
+
|
| 420 |
+
# --- EVALUATION FUNCTIONS: move them into evaluate.py
|
| 421 |
+
# def evaluate_nlu_tags(expected: Dict[str, Any], actual: Dict[str, Any], tag_key: str, expected_key_override: str = None) -> Dict[str, float]:
|
| 422 |
+
# def _parse_judge_json(raw_str: str) -> dict | None:
|
| 423 |
+
# def run_comprehensive_evaluation():
|
| 424 |
+
|
| 425 |
+
def upload_knowledge(files, theme):
|
| 426 |
+
for f in files: copy_into_theme(theme, f.name)
|
| 427 |
+
if theme in vectorstores: del vectorstores[theme]
|
| 428 |
+
return f"Uploaded {len(files)} file(s)."
|
| 429 |
+
def save_file_selection(theme, enabled):
|
| 430 |
+
man = load_manifest(theme)
|
| 431 |
+
for fname in man['files']: man['files'][fname] = fname in enabled
|
| 432 |
+
save_manifest(theme, man)
|
| 433 |
+
if theme in vectorstores: del vectorstores[theme]
|
| 434 |
+
return f"Settings saved for theme '{theme}'."
|
| 435 |
+
def refresh_file_list_ui(theme):
|
| 436 |
+
files = list_theme_files(theme)
|
| 437 |
+
return gr.update(choices=[f for f, _ in files], value=[f for f, en in files if en]), f"Found {len(files)} file(s)."
|
| 438 |
+
def auto_setup_on_load(theme):
|
| 439 |
+
if not os.listdir(theme_upload_dir(theme)): seed_files_into_theme(theme)
|
| 440 |
+
settings = collect_settings("patient", "", "", "warm", "English", "English", 0.7, "None", "None", "None", "All", True, False)
|
| 441 |
+
files_ui, status = refresh_file_list_ui(theme)
|
| 442 |
+
return settings, files_ui, status
|
| 443 |
+
def test_save_file():
|
| 444 |
+
try:
|
| 445 |
+
path = PERSONAL_DATA_BASE / "persistence_test.txt"
|
| 446 |
+
path.write_text(f"File saved at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 447 |
+
return f"β
Success! Wrote test file to: {path}"
|
| 448 |
+
except Exception as e: return f"β Error! Failed to write file: {e}"
|
| 449 |
+
def check_test_file():
|
| 450 |
+
path = PERSONAL_DATA_BASE / "persistence_test.txt"
|
| 451 |
+
if path.exists(): return f"β
Success! Found test file. Contents: '{path.read_text()}'"
|
| 452 |
+
return f"β Failure. Test file not found at: {path}"
|
| 453 |
+
|
| 454 |
+
# --- UI Definition ---
|
| 455 |
+
CSS = """
|
| 456 |
+
.gradio-container { font-size: 14px; }
|
| 457 |
+
#chatbot { min-height: 400px; }
|
| 458 |
+
#audio_in audio, #audio_out audio { max-height: 40px; }
|
| 459 |
+
#audio_in .waveform, #audio_out .waveform { display: none !important; }
|
| 460 |
+
#audio_in, #audio_out { min-height: 0px !important; }
|
| 461 |
+
"""
|
| 462 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
|
| 463 |
+
settings_state = gr.State({})
|
| 464 |
+
with gr.Tab("Chat"):
|
| 465 |
+
with gr.Row():
|
| 466 |
+
user_text = gr.Textbox(show_label=False, placeholder="Type your message here...", scale=7)
|
| 467 |
+
submit_btn = gr.Button("Send", variant="primary", scale=1)
|
| 468 |
+
with gr.Row():
|
| 469 |
+
audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Voice Input", elem_id="audio_in")
|
| 470 |
+
audio_out = gr.Audio(label="Response Audio", autoplay=True, visible=True, elem_id="audio_out")
|
| 471 |
+
|
| 472 |
+
chatbot = gr.Chatbot(elem_id="chatbot", label="Conversation", type="messages")
|
| 473 |
+
chat_status = gr.Markdown()
|
| 474 |
+
with gr.Row():
|
| 475 |
+
clear_btn = gr.Button("Clear")
|
| 476 |
+
save_btn = gr.Button("Save to Memory")
|
| 477 |
+
|
| 478 |
+
with gr.Tab("Personalize"):
|
| 479 |
+
with gr.Accordion("Add to Personal Knowledge Base", open=True):
|
| 480 |
+
personal_title = gr.Textbox(label="Title")
|
| 481 |
+
personal_text = gr.Textbox(lines=5, label="Text Content")
|
| 482 |
+
with gr.Row():
|
| 483 |
+
personal_file = gr.File(label="Upload Audio/Video/Text File")
|
| 484 |
+
personal_image = gr.Image(type="filepath", label="Upload Image")
|
| 485 |
+
personal_yt_url = gr.Textbox(label="Or, provide a YouTube URL")
|
| 486 |
+
personal_add_btn = gr.Button("Add Knowledge", variant="primary")
|
| 487 |
+
personal_status = gr.Markdown()
|
| 488 |
+
gr.Markdown("### **Manage Personal Knowledge**")
|
| 489 |
+
with gr.Accordion("View/Hide Details", open=False):
|
| 490 |
+
personal_memory_display = gr.DataFrame(headers=["Title", "Source", "Content"], label="Saved Memories", row_count=(5, "dynamic"))
|
| 491 |
+
personal_refresh_btn = gr.Button("Refresh Memories")
|
| 492 |
+
personal_delete_selector = gr.Dropdown(label="Select memory to delete", scale=3, interactive=True)
|
| 493 |
+
personal_delete_btn = gr.Button("Delete Selected", variant="stop", scale=1)
|
| 494 |
+
personal_delete_status = gr.Markdown()
|
| 495 |
+
|
| 496 |
+
with gr.Tab("Settings"):
|
| 497 |
+
with gr.Group():
|
| 498 |
+
gr.Markdown("## Conversation & Persona Settings")
|
| 499 |
+
with gr.Row():
|
| 500 |
+
role = gr.Radio(CONFIG["roles"], value="patient", label="Your Role")
|
| 501 |
+
patient_name = gr.Textbox(label="Patient's Name")
|
| 502 |
+
caregiver_name = gr.Textbox(label="Caregiver's Name")
|
| 503 |
+
with gr.Row():
|
| 504 |
+
temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.1, label="Creativity")
|
| 505 |
+
tone = gr.Dropdown(CONFIG["tones"], value="warm", label="Response Tone")
|
| 506 |
+
with gr.Row():
|
| 507 |
+
# --- ADD THIS NEW DROPDOWN ---
|
| 508 |
+
# disease_stage = gr.Dropdown(CONFIG["disease_stages"], value="Normal / Unspecified", label="Assumed Disease Stage")
|
| 509 |
+
disease_stage = gr.Dropdown(CONFIG["disease_stages"], value="Default: Mild Stage", label="Assumed Disease Stage")
|
| 510 |
+
# --- END OF ADDITION ---
|
| 511 |
+
behaviour_tag = gr.Dropdown(CONFIG["behavior_tags"], value="None", label="Behaviour Filter (Manual)")
|
| 512 |
+
emotion_tag = gr.Dropdown(CONFIG["emotion_tags"], value="None", label="Emotion Filter (Manual)")
|
| 513 |
+
topic_tag = gr.Dropdown(CONFIG["topic_tags"], value="None", label="Topic Tag Filter (Manual)")
|
| 514 |
+
with gr.Accordion("Language, Voice & Debugging", open=False):
|
| 515 |
+
language = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Response Language")
|
| 516 |
+
tts_lang = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Voice Language")
|
| 517 |
+
tts_on = gr.Checkbox(True, label="Enable Voice Response")
|
| 518 |
+
debug_mode = gr.Checkbox(False, label="Show Debug Info")
|
| 519 |
+
gr.Markdown("--- \n ## General Knowledge Base Management")
|
| 520 |
+
with gr.Row():
|
| 521 |
+
with gr.Column(scale=1):
|
| 522 |
+
files_in = gr.File(file_count="multiple", file_types=[".jsonl", ".txt"], label="Upload Knowledge Files")
|
| 523 |
+
upload_btn = gr.Button("Upload to Theme")
|
| 524 |
+
seed_btn = gr.Button("Import Sample Data")
|
| 525 |
+
mgmt_status = gr.Markdown()
|
| 526 |
+
with gr.Column(scale=2):
|
| 527 |
+
active_theme = gr.Radio(CONFIG["themes"], value="All", label="Active Knowledge Theme")
|
| 528 |
+
files_box = gr.CheckboxGroup(choices=[], label="Enable Files for Selected Theme")
|
| 529 |
+
with gr.Row():
|
| 530 |
+
save_files_btn = gr.Button("Save Selection", variant="primary")
|
| 531 |
+
refresh_btn = gr.Button("Refresh List")
|
| 532 |
+
with gr.Accordion("Persistence Test", open=False):
|
| 533 |
+
test_save_btn = gr.Button("1. Run Persistence Test (Save File)")
|
| 534 |
+
check_save_btn = gr.Button("3. Check for Test File")
|
| 535 |
+
test_status = gr.Markdown()
|
| 536 |
+
|
| 537 |
+
# --- UPDATED TESTING TAB ---
|
| 538 |
+
with gr.Tab("Testing"):
|
| 539 |
+
gr.Markdown("## Comprehensive Performance Evaluation")
|
| 540 |
+
gr.Markdown("Click the button below to run a full evaluation on all test fixtures. This will test NLU (Routing & Tagging) and generate RAG responses for manual review.")
|
| 541 |
+
|
| 542 |
+
run_comprehensive_btn = gr.Button("Run Comprehensive Evaluation", variant="primary")
|
| 543 |
+
|
| 544 |
+
batch_summary_md = gr.Markdown("### Evaluation Summary: Not yet run.")
|
| 545 |
+
|
| 546 |
+
comprehensive_results_df = gr.DataFrame(
|
| 547 |
+
label="Detailed Evaluation Results",
|
| 548 |
+
elem_id="comprehensive_results_df",
|
| 549 |
+
headers=[
|
| 550 |
+
"Test ID","Title","Route Correct?","Expected Route","Actual Route",
|
| 551 |
+
"Behavior F1","Emotion F1","Topic F1","Context F1",
|
| 552 |
+
"Generated Answer","Sources","Source Count","Latency (ms)", "Faithfulness"
|
| 553 |
+
],
|
| 554 |
+
interactive=False
|
| 555 |
+
)
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
# --- Event Wiring ---
|
| 559 |
+
all_settings = [role, patient_name, caregiver_name, tone, language, tts_lang, temperature,
|
| 560 |
+
disease_stage, behaviour_tag, emotion_tag, topic_tag, active_theme, tts_on, debug_mode]
|
| 561 |
+
|
| 562 |
+
for c in all_settings: c.change(fn=collect_settings, inputs=all_settings, outputs=settings_state)
|
| 563 |
+
submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
|
| 564 |
+
save_btn.click(fn=save_chat_to_memory, inputs=[chatbot], outputs=[chat_status])
|
| 565 |
+
clear_btn.click(lambda: (None, None, [], None, "", ""), outputs=[user_text, audio_out, chatbot, audio_in, user_text, chat_status])
|
| 566 |
+
personal_add_btn.click(fn=handle_add_knowledge, inputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url, settings_state], outputs=[personal_status]).then(lambda: (None, None, None, None, None), outputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url])
|
| 567 |
+
personal_refresh_btn.click(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
|
| 568 |
+
personal_delete_btn.click(fn=delete_personal_memory, inputs=[personal_delete_selector], outputs=[personal_delete_status]).then(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
|
| 569 |
+
upload_btn.click(upload_knowledge, inputs=[files_in, active_theme], outputs=[mgmt_status]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
|
| 570 |
+
save_files_btn.click(save_file_selection, inputs=[active_theme, files_box], outputs=[mgmt_status])
|
| 571 |
+
seed_btn.click(seed_files_into_theme, inputs=[active_theme]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
|
| 572 |
+
refresh_btn.click(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
|
| 573 |
+
active_theme.change(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
|
| 574 |
+
|
| 575 |
+
# Then update the .click() event handler
|
| 576 |
+
run_comprehensive_btn.click(
|
| 577 |
+
fn=lambda: run_comprehensive_evaluation(
|
| 578 |
+
vs_general=ensure_index("All"),
|
| 579 |
+
vs_personal=personal_vectorstore,
|
| 580 |
+
nlu_vectorstore=nlu_vectorstore,
|
| 581 |
+
config=CONFIG
|
| 582 |
+
),
|
| 583 |
+
# The output list now has three components
|
| 584 |
+
outputs=[batch_summary_md, comprehensive_results_df, comprehensive_results_df]
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
demo.load(auto_setup_on_load, inputs=[active_theme], outputs=[settings_state, files_box, mgmt_status])
|
| 588 |
+
demo.load(load_test_fixtures)
|
| 589 |
+
test_save_btn.click(fn=test_save_file, inputs=None, outputs=[test_status])
|
| 590 |
+
check_save_btn.click(fn=check_test_file, inputs=None, outputs=[test_status])
|
| 591 |
+
|
| 592 |
+
# --- Startup Logic ---
|
| 593 |
+
def pre_load_indexes():
|
| 594 |
+
global personal_vectorstore, nlu_vectorstore
|
| 595 |
+
print("Pre-loading all indexes at startup...")
|
| 596 |
+
print(" - Loading NLU examples index...")
|
| 597 |
+
nlu_vectorstore = bootstrap_nlu_vectorstore("nlu_training_examples.jsonl", NLU_EXAMPLES_INDEX_PATH)
|
| 598 |
+
print(f" ...NLU index loaded.")
|
| 599 |
+
for theme in CONFIG["themes"]:
|
| 600 |
+
print(f" - Loading general index for theme: '{theme}'")
|
| 601 |
+
try:
|
| 602 |
+
ensure_index(theme)
|
| 603 |
+
print(f" ...'{theme}' theme loaded.")
|
| 604 |
+
except Exception as e:
|
| 605 |
+
print(f" ...Error loading theme '{theme}': {e}")
|
| 606 |
+
print(" - Loading personal knowledge index...")
|
| 607 |
+
try:
|
| 608 |
+
personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
|
| 609 |
+
print(" ...Personal knowledge loaded.")
|
| 610 |
+
except Exception as e:
|
| 611 |
+
print(f" ...Error loading personal knowledge: {e}")
|
| 612 |
+
print("All indexes loaded. Application is ready.")
|
| 613 |
+
|
| 614 |
+
if __name__ == "__main__":
|
| 615 |
+
seed_files_into_theme('All')
|
| 616 |
+
pre_load_indexes()
|
| 617 |
+
demo.queue().launch(debug=True)
|
conversation_test_fixtures_v8.jsonl
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"test_id": "F1", "work": "The Father", "title": "Misplaced Watch Accusation", "test_role": "caregiver", "turns": [{"role": "user", "text": "My father is convinced his carer stole his watch, but he just can't remember where he put it. He's very upset."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["false_accusations", "object_misplacement", "agitation"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's distressing when a loved one makes accusations, especially when it stems from memory loss. A calm and reassuring approach is best. Try not to argue about the facts. Instead, you could say something like, 'I see how upsetting this is for you. That watch is very important. Let's look for it together.' Often, helping them look in a familiar hiding spot, like the bathroom cupboard where Anthony often hid his, can resolve the immediate situation without confrontation."}}
|
| 2 |
+
{"test_id": "F2", "work": "The Father", "title": "Disorientation in Own Flat", "test_role": "caregiver", "turns": [{"role": "user", "text": "He seems so lost and is asking who I am. He doesn't recognize me in his own home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It is incredibly painful when a parent doesn't recognize you. This is a common symptom of dementia called misidentification. It's important to stay calm and not take it personally. Gently reintroduce yourself, for example: 'I'm Anne, your daughter. I can see this is confusing right now.' Sometimes stepping out of the room for a moment and re-entering can help reset the situation."}}
|
| 3 |
+
{"test_id": "F3", "work": "The Father", "title": "Wants His Mother", "test_role": "caregiver", "turns": [{"role": "user", "text": "He's crying and saying he wants his mummy to come and fetch him. What do I do?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["depression/sadness", "exit_seeking"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "This is a deeply emotional and common experience, especially in later stages of dementia. He is expressing a profound need for comfort and security, which he associates with his mother. Instead of correcting him, try to connect with the underlying emotion. You could say, 'You miss your mummy very much, don't you? Tell me about her.' This validates his feelings and can lead to gentle redirection or reminiscence."}}
|
| 4 |
+
{"test_id": "S1", "work": "Still Alice", "title": "Forgetting Words During Lecture", "test_role": "patient", "turns": [{"role": "user", "text": "I was giving a talk and suddenly couldn't find the word I was looking for. It was so embarrassing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["aphasia"], "detected_emotion": "sadness", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_mild"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "That sounds incredibly frustrating and stressful. It's a phenomenon called aphasia, and it's a common early symptom. Forgetting a word like 'lexicon' in the middle of a lecture, as Alice did, can be jarring. You handled it with grace by using humor. Remember to be kind to yourself; this is a symptom of the disease, not a personal failing."}}
|
| 5 |
+
{"test_id": "S2", "work": "Still Alice", "title": "Disorientation While Running", "test_role": "patient", "turns": [{"role": "user", "text": "I was out for a run on a familiar path and suddenly had no idea where I was. Everything seemed disconnected and I was terrified."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "wandering"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "That sounds terrifying. Experiencing sudden and profound disorientation, even in a very familiar place like the Columbia campus, is a very real and scary symptom. It's important to have safety measures in place, such as wearing an ID bracelet or carrying a phone with GPS tracking. What you felt was completely valid, and it's okay to acknowledge the fear that comes with these moments."}}
|
| 6 |
+
{"test_id": "S3", "work": "Still Alice", "title": "Repetitive Questioning with Family", "test_role": "caregiver", "turns": [{"role": "user", "text": "She keeps asking me what time my sister's play is, even after I've put it in her phone calendar."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["repetitive_questioning"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "This can be very trying. Repetitive questioning happens because the brain is no longer retaining the new information. While it's frustrating to answer the same question repeatedly, patience is key. Try to answer in a calm, reassuring tone each time. Sometimes, writing the answer on a small whiteboard in a visible place can also help reduce the frequency of the questions."}}
|
| 7 |
+
{"test_id": "A1", "work": "Away from Her", "title": "Object Misplacement", "test_role": "caregiver", "turns": [{"role": "user", "text": "She put the frying pan in the freezer and didn't seem to notice. I had to move it when she left the room."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["object_misplacement"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "This is a classic example of object misplacement due to cognitive changes. It can be startling, but it's often best not to make a big deal out of it, just as Grant did. He waited until Fiona left the room and then quietly moved the pan. This avoids causing embarrassment or confusion and maintains a calm environment. Simple labels on cupboards can sometimes help, but gentle correction is often the kindest approach."}}
|
| 8 |
+
{"test_id": "A2", "work": "Away from Her", "title": "Not Recognizing Spouse", "test_role": "caregiver", "turns": [{"role": "user", "text": "I went to visit my wife after a month and she treated me like a polite stranger. It was devastating."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "That is a truly devastating experience, and your feelings are completely valid. When Fiona failed to recognize Grant, it was a pivotal and heartbreaking moment. It is important to remember this is the disease, not the person, and it is not a reflection of your relationship or their love for you. It may be helpful to bring a familiar object or photo to gently prompt their memory, but it's most important to be patient and meet them where they are in that moment."}}
|
| 9 |
+
{"test_id": "A3", "work": "Away from Her", "title": "New Attachment in Care Home", "test_role": "caregiver", "turns": [{"role": "user", "text": "My wife has become very attached to another man in the nursing home and doesn't have time for me anymore."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "This is a surprisingly common and emotionally complex situation. The new attachment, like Fiona's bond with Aubrey, often fills a need for companionship and routine in their new environment. The nurse Kristy explained it as a 'phase.' It's important not to view it as a personal rejection. Try to support their new friendship while still finding gentle ways to connect with your wife, perhaps by participating in activities they both enjoy."}}
|
| 10 |
+
{"test_id": "AI1", "work": "Alive Inside", "title": "Withdrawn and Unresponsive", "test_role": "caregiver", "turns": [{"role": "user", "text": "Henry just sits with his head down all day and barely responds to anything."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["withdrawal", "apathy"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "It's very difficult to see someone so withdrawn. This state is common in advanced dementia. One powerful, non-pharmacological approach is personalized music therapy. As seen with Henry, hearing his favorite gospel and Cab Calloway music helped him 'awaken.' He lifted his head, sang, and began to reminisce. Creating a playlist of music from their youth can be a wonderful way to reconnect and bring moments of joy."}}
|
| 11 |
+
{"test_id": "AI2", "work": "Alive Inside", "title": "Agitation and Medication Resistance", "test_role": "caregiver", "turns": [{"role": "user", "text": "He gets very agitated and angry about his loss of freedom, and he fights taking his medication."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["agitation"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "Agitation is a significant challenge. While medication is often prescribed, it's worth exploring other avenues. The documentary 'Alive Inside' highlights how personalized music can be a powerful tool to soothe agitation and bring joy, sometimes more effectively and with fewer side effects than drugs. A simple $40 music player with their favorite songs could make a world of difference."}}
|
| 12 |
+
{"test_id": "AI3", "work": "Alive Inside", "title": "Effectiveness of Music Therapy", "test_role": "caregiver", "turns": [{"role": "user", "text": "Does music therapy actually work? What do doctors say?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "Yes, music therapy is a widely recognized and effective non-pharmacological treatment. Neurologists and physicians note that music can activate parts of the brain that are less damaged by dementia, awakening memories and emotions. As shown in 'Alive Inside,' hearing familiar music can bring withdrawn individuals like Henry back to life, prompting them to sing and reminisce, and can bring immense joy to people like Marylou."}}
|
| 13 |
+
{"test_id": "F4", "work": "The Father", "title": "Where am I again? (gentle re-orientation)", "test_role": "patient", "turns": [{"role": "user", "text": "I donβt recognise this place. Who moved my things?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "object_misplacement"], "detected_emotion": "confusion", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It can be very disorienting when familiar surroundings suddenly feel strange. It seems you're noticing some changes in the flat. You're safe here with your daughter. Let's walk through the room together, maybe we can find your things."}}
|
| 14 |
+
{"test_id": "F5", "work": "The Father", "title": "Theyβre sending me away (care-home anxiety)", "test_role": "patient", "turns": [{"role": "user", "text": "Theyβre trying to put me in a home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["exit_seeking"], "detected_emotion": "anxiety", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It sounds like you're worried about having to leave your home. It's completely understandable to feel that way. Your daughter wants to make sure you're safe and well cared for. Let's talk about what makes you feel comfortable here at home."}}
|
| 15 |
+
{"test_id": "F6", "work": "The Father", "title": "Youβve stolen my watch! (misplaced object)", "test_role": "patient", "turns": [{"role": "user", "text": "Someoneβs nicked my watch!"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["false_accusations", "object_misplacement"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's so frustrating when something important like your watch goes missing. I understand why you're upset. I know how much you value it. Why don't we look for it together? Sometimes things end up in the oddest places."}}
|
| 16 |
+
{"test_id": "S4", "work": "Still Alice", "title": "I lost the word (cueing & confidence)", "test_role": "patient", "turns": [{"role": "user", "text": "I canβt find the word forβ¦ the thing you peelβ¦ yellowβ¦"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["aphasia"], "detected_emotion": "confusion", "detected_topics": ["caregiving_advice"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's okay, take your time. It sounds like you're thinking of a fruit. Is it long and curved? Are you thinking of a banana? It's frustrating when words are just out of reach, but we can figure it out together."}}
|
| 17 |
+
{"test_id": "S5", "work": "Still Alice", "title": "What does the diagnosis mean? (facts without gloom)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Weβve just had the early-onset diagnosis. What now?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": [], "detected_emotion": "anxiety", "detected_topics": ["medical_fact", "caregiving_advice"], "detected_contexts": ["setting_clinic_or_hospital", "relationship_family", "disease_stage_mild"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "Receiving a diagnosis of early-onset Alzheimer's is overwhelming. It's okay to feel scared and uncertain. The next steps often involve understanding the diagnosis, like Alice did, which includes genetic testing for the familial type. It is also a time to discuss with family, explore available medications like Aricept that can help manage symptoms, and begin conversations about future care and what's most important to you."}}
|
| 18 |
+
{"test_id": "S6", "work": "Still Alice", "title": "I want to keep teaching (planning & boundaries)", "test_role": "patient", "turns": [{"role": "user", "text": "Iβd like to keep lecturingβif I can."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["goal_breakdown"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": []}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's wonderful that you want to continue doing what you love. Let's focus on that. We can break it down into manageable steps. Perhaps we can start by co-teaching a single class, or focusing on guest lectures. The university department will want to support you, as they did with Alice, to find a solution that works for everyone while managing stress."}}
|
| 19 |
+
{"test_id": "A4", "work": "Away from Her", "title": "She didnβt know me (visit distress)", "test_role": "caregiver", "turns": [{"role": "user", "text": "She looked through meβas if I were vapour."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "That must have been incredibly painful. When a loved one doesn't recognize you, it can feel like a profound loss. Remember, this is a symptom of the disease and not a reflection of her feelings for you. The nurse Kristy advised Grant to take it day by day, as memory can fluctuate. Be patient with her and with yourself."}}
|
| 20 |
+
{"test_id": "A5", "work": "Away from Her", "title": "Jealousy & triangulation (gentle reframing)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Sheβs taken with another man at the home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's understandable to feel hurt and confused by this. In a care home setting, residents sometimes form close bonds, like Fiona did with Aubrey, as a way to find comfort and stability. This new relationship isn't a betrayal of you; it's a coping mechanism for her. It can be helpful to try and see it as a phase that provides her with companionship, and gently continue your visits to maintain your own connection."}}
|
| 21 |
+
{"test_id": "A6", "work": "Away from Her", "title": "Staff coordination (clear asks)", "test_role": "caregiver", "turns": [{"role": "user", "text": "What should I ask the nurse tomorrow?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["caregiver_communication_template"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_staff_or_caregiver", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "When speaking with the nursing staff, it's helpful to be specific. You could ask questions like: 'How has her mood been over the last few days?' 'Has she been eating well?' 'Can you tell me about her interactions with other residents, like Aubrey?' Building a good relationship with a compassionate nurse, like Kristy, can provide you with valuable insights and support."}}
|
| 22 |
+
{"test_id": "M1", "work": "Alive Inside", "title": "Switch on the music (activation)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Heβs withdrawn today."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["withdrawal"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "treatment_option:music_therapy"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "When someone is withdrawn, music can be a powerful way to re-engage them. Try playing some of his favorite music from his youth. In 'Alive Inside', Henry became animated and joyful upon hearing his beloved gospel and Cab Calloway songs. It's a gentle way to connect without the pressure of conversation."}}
|
| 23 |
+
{"test_id": "M2", "work": "Alive Inside", "title": "Build the playlist (curation by memory hooks)", "test_role": "caregiver", "turns": [{"role": "user", "text": "How do I choose the right songs?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": [], "detected_emotion": "calm", "detected_topics": ["caregiving_advice", "treatment_option:music_therapy"], "detected_contexts": []}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "The most effective songs are often those with strong personal connections. Think about music from their teenage years and early twenties. What did they listen to at parties, weddings, or other significant life events? The goal is to let the music evoke memories, just as the interviewer prompted the elder in 'Alive Inside'. Ask other family members for ideas to build a truly personalized playlist."}}
|
| 24 |
+
{"test_id": "M3", "work": "Alive Inside", "title": "Aftercare & safety (practical guardrails)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Headphones keep going missing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["object_misplacement"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice", "treatment_option:home_safety"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": [], "ground_truth_answer": "That's a common issue. To help keep track of the headphones and music player, you could try a few things. Label them clearly with his name. Consider getting brightly colored headphones that are easier to spot. You could also attach them to a lanyard or clip them to his clothing. Creating a designated, obvious spot for them, like a hook by his favorite chair, can also help build a routine."}}
|
| 25 |
+
{"test_id": "P1", "work": "The Father", "title": "Patient Anxiety About Being Left Alone", "test_role": "patient", "turns": [{"role": "user", "text": "Please don't leave me by myself. I get scared when I'm alone in the house."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["anxiety"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It sounds like you're feeling scared right now, and that's completely understandable. I hear you. You are not alone; I am here with you. Let's sit together for a while. What would make you feel a little more comfortable?"}}
|
| 26 |
+
{"test_id": "P2", "work": "Still Alice", "title": "Patient Frustration with Simple Tasks (Apraxia)", "test_role": "patient", "turns": [{"role": "user", "text": "I can't remember how to make coffee anymore. The machine is so confusing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion"], "detected_emotion": "anger", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It is so frustrating when a familiar task suddenly becomes complicated. This is a symptom called apraxia. It's not your fault at all. How about we tackle it together? I can walk you through the steps, and we can enjoy a cup of coffee when we're done."}}
|
| 27 |
+
{"test_id": "P3", "work": "The Father", "title": "Patient Expressing Sundowning Symptoms", "test_role": "patient", "turns": [{"role": "user", "text": "I feel so restless and upset right now. I need to go somewhere but I don't know where."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["sundowning_restlessness", "agitation", "exit_seeking"], "detected_emotion": "anxiety", "detected_topics": ["personal_story"], "detected_contexts": []}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It sounds like you're feeling very unsettled, and that can be a difficult feeling. You're safe right here. Sometimes a simple activity can help. Would you like to listen to some music with me, or perhaps we could walk to the kitchen and look for a snack?"}}
|
| 28 |
+
{"test_id": "P4", "work": "Away from Her", "title": "Patient Misidentifying a Family Member", "test_role": "patient", "turns": [{"role": "user", "text": "It's so wonderful to see you, mother. I've missed you so much."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "warmth", "detected_topics": ["personal_story"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "It's wonderful to see you too. You're thinking of your mother, what a lovely thought. Tell me a favorite memory you have of her. I'm your daughter, Fiona, and I love you very much."}}
|
| 29 |
+
{"test_id": "P5", "work": "Still Alice", "title": "Patient Experiencing a Hallucination", "test_role": "patient", "turns": [{"role": "user", "text": "There's a dog in the garden, but it won't stop barking at me. Can you make it go away?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["hallucinations_delusions"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": [], "ground_truth_answer": "That sounds very frightening. I don't see a dog right now, but I believe that you do. You are safe here with me. Let's try closing the curtains and moving to another room for a little while, away from the window."}}
|
| 30 |
+
{"test_id": "P6", "work": "The Father", "title": "Patient Repeating a Question Due to Anxiety", "test_role": "patient", "turns": [{"role": "user", "text": "Are you sure my daughter is visiting today? I really don't want to miss her."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["repetitive_questioning", "anxiety"], "detected_emotion": "anxiety", "detected_topics": ["personal_story"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_answer": "Yes, I am sure. Anne will be here to visit today. It sounds like you are very excited to see her. She is very much looking forward to seeing you too. She should be here around lunchtime. Shall we look at some photos of her while we wait?"}}
|
| 31 |
+
{"test_id": "P7", "work": "Alive Inside", "title": "Patient Expressing a Positive Memory (Reminiscence)", "test_role": "patient", "turns": [{"role": "user", "text": "I was just remembering the music at our wedding. Everyone was dancing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["reminiscence_prompting", "affection"], "detected_emotion": "joy", "detected_topics": ["personal_story"], "detected_contexts": []}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_answer": "That sounds like a beautiful memory. Music has such a wonderful way of bringing back happy times. Do you remember any of the songs that were played? I would love to hear more about that day."}}
|
| 32 |
+
{"test_id": "GC1", "title": "Patient Says Thank You", "test_role": "patient", "turns": [{"role": "user", "text": "Thank you, that was helpful."}], "ground_truth": {"expected_route": "general_conversation", "expected_tags": {}, "expected_sources": [], "ground_truth_answer": "You're very welcome. I'm here to help whenever you need me. Is there anything else I can assist you with today?"}}
|
| 33 |
+
{"test_id": "GC2", "title": "Caregiver Greets the Agent", "test_role": "caregiver", "turns": [{"role": "user", "text": "Hello, how are you?"}], "ground_truth": {"expected_route": "general_conversation", "expected_tags": {}, "expected_sources": [], "ground_truth_answer": "Hello! I'm doing well, thank you for asking. How can I help you today?"}}
|
| 34 |
+
{"test_id": "GK1", "title": "Patient Asks General Knowledge Question", "test_role": "patient", "turns": [{"role": "user", "text": "Who was the president of the United States back in 1970?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_answer": "In 1970, the President of the United States was Richard Nixon."}}
|
| 35 |
+
{"test_id": "GK2", "title": "Patient Asks General Knowledge - Geography", "test_role": "patient", "turns": [{"role": "user", "text": "What is the capital of Canada?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_answer": "The capital of Canada is Ottawa."}}
|
| 36 |
+
{"test_id": "GK3", "title": "Patient Asks General Knowledge - Art", "test_role": "patient", "turns": [{"role": "user", "text": "Who painted the Mona Lisa?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_answer": "The Mona Lisa was painted by the Italian artist Leonardo da Vinci."}}
|
| 37 |
+
{"test_id": "GK4", "title": "Patient Asks General Knowledge - History", "test_role": "patient", "turns": [{"role": "user", "text": "What year did World War II end?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_answer": "World War II ended in 1945."}}
|
| 38 |
+
{"test_id": "SUM1", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for a Summary", "test_role": "caregiver", "turns": [{"role": "user", "text": "Can you give me a summary of the main events from the last three days?"}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Certainly. Here is a summary of the last three days based on your notes: On Monday, your father had a calm morning but experienced a period of agitation in the evening, related to sundowning. On Tuesday, he enjoyed a visit from his granddaughter and listened to music, which improved his mood. Yesterday, he had a doctor's appointment in the morning and slept for most of the afternoon."}}
|
| 39 |
+
{"test_id": "SUM2", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for Summary of a Symptom", "test_role": "caregiver", "turns": [{"role": "user", "text": "Summarize all the journal entries related to my husband's word-finding difficulties."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Here is a summary of entries related to word-finding: The issue was first noted three months ago during a lecture (aphasia). Two months ago, he struggled to name common kitchen items. Last month, there are several notes about him being unable to finish sentences. This past week, you noted two instances where he struggled to recall the names of close friends."}}
|
| 40 |
+
{"test_id": "SUM3", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for Summary for New Aide", "test_role": "caregiver", "turns": [{"role": "user", "text": "Create a short, one-paragraph summary of my wife's key interests and daily routine for the new weekend aide."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Of course. Fiona is a kind woman who enjoys quiet activities. Her daily routine typically involves waking around 8 AM, having tea and toast for breakfast, and listening to classical music. She enjoys short walks in the garden after lunch, weather permitting. In the afternoon, she often likes to look at old photo albums or sit quietly. She can become anxious if her routine is disrupted, so maintaining a calm and predictable environment is very helpful."}}
|
| 41 |
+
{"test_id": "SUM4", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Summary of Previous Day", "test_role": "patient", "turns": [{"role": "user", "text": "Can you remind me what I did yesterday? Just the main points, please."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Of course. Yesterday, you had a quiet morning and enjoyed listening to some of your favorite music. In the afternoon, your daughter Anne came to visit, and you spent time looking at old photographs together. For dinner, you had chicken soup."}}
|
| 42 |
+
{"test_id": "FQ1", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Daughter's Name", "test_role": "patient", "turns": [{"role": "user", "text": "I'm trying to remember... what is my daughter's name again?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Thinking about your daughter, the journal mentions her name is Debbie. She sounds very important to you."}}
|
| 43 |
+
{"test_id": "FQ2_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Partner's Name", "test_role": "patient", "turns": [{"role": "user", "text": "Who was my long-term partner I lived with in New York?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "According to the journal, you lived with Danish for almost 50 years in New York."}}
|
| 44 |
+
{"test_id": "FQ3_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Daughter's Birthplace", "test_role": "patient", "turns": [{"role": "user", "text": "Where was my daughter Debbie born?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "The journal mentions that your daughter, Debbie, was born on 169th Street in Flushing."}}
|
| 45 |
+
{"test_id": "FQ4_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Their Age", "test_role": "patient", "turns": [{"role": "user", "text": "How old am I, according to my last conversation with Debbie?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "In a conversation with Debbie, you came to realize that you are 84 years old."}}
|
| 46 |
+
{"test_id": "FQ5_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Partner's Passing", "test_role": "patient", "turns": [{"role": "user", "text": "I know my husband passed away. When did the journal say it happened?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "The journal notes that Danish died about five years ago."}}
|
| 47 |
+
{"test_id": "MH1_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Connect Husband to a Location", "test_role": "patient", "turns": [{"role": "user", "text": "Did my husband Danish live with us in Flushing where Debbie was born?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "The journal notes that your daughter Debbie was born on 169th Street in Flushing, and that you all lived there. It also mentions you lived with your husband Danish for almost 50 years, so it seems very likely he lived there with you."}}
|
| 48 |
+
{"test_id": "MH2_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Timeline of Relationship", "test_role": "patient", "turns": [{"role": "user", "text": "Considering Danish passed away 5 years ago, how long had we been living together before that?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "That's a thoughtful question. The journal mentions that you lived with Danish for almost 50 years and that he passed away about five years ago, which suggests you had a long and meaningful life together."}}
|
| 49 |
+
{"test_id": "MH3_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Combine People and Places", "test_role": "patient", "turns": [{"role": "user", "text": "I know I lived with Danish in New York, but who was the other person I knew there?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "Looking at your journal, it mentions you lived in New York for a long time with Danish. A separate memory from your time living on 169th Street in Flushing also notes that you knew someone named Frances."}}
|
| 50 |
+
{"test_id": "MH4_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Contrast Two Eras", "test_role": "patient", "turns": [{"role": "user", "text": "I know I started out in San Francisco and then moved to New York. What does the journal say about my life in San Francisco?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_answer": "The journal mentions that you started out in San Francisco before you went to New York for a long time, but it doesn't contain any other specific details about your life there."}}
|
| 51 |
+
|
conversation_test_fixtures_v9.jsonl
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"test_id": "F1", "work": "The Father", "title": "Misplaced Watch Accusation", "test_role": "caregiver", "turns": [{"role": "user", "text": "My father is convinced his carer stole his watch, but he just can't remember where he put it. He's very upset."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["false_accusations", "object_misplacement", "agitation"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate his feeling of distress, avoid arguing, and offer to look for the watch together in a familiar spot.", "ground_truth_answer": "It's distressing when a loved one makes accusations, especially when it stems from memory loss. A calm and reassuring approach is best. Try not to argue about the facts. Instead, you could say something like, 'I see how upsetting this is for you. That watch is very important. Let's look for it together.' Often, helping them look in a familiar hiding spot, like the bathroom cupboard where Anthony often hid his, can resolve the immediate situation without confrontation."}}
|
| 2 |
+
{"test_id": "F2", "work": "The Father", "title": "Disorientation in Own Flat", "test_role": "caregiver", "turns": [{"role": "user", "text": "He seems so lost and is asking who I am. He doesn't recognize me in his own home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the pain of not being recognized, stay calm, and gently reintroduce yourself without taking it personally.", "ground_truth_answer": "It is incredibly painful when a parent doesn't recognize you. This is a common symptom of dementia called misidentification. It's important to stay calm and not take it personally. Gently reintroduce yourself, for example: 'I'm Anne, your daughter. I can see this is confusing right now.' Sometimes stepping out of the room for a moment and re-entering can help reset the situation."}}
|
| 3 |
+
{"test_id": "F3", "work": "The Father", "title": "Wants His Mother", "test_role": "caregiver", "turns": [{"role": "user", "text": "He's crying and saying he wants his mummy to come and fetch him. What do I do?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["depression/sadness", "exit_seeking"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate his need for comfort; don't correct him. Connect with the underlying emotion and redirect to reminiscence.", "ground_truth_answer": "This is a deeply emotional and common experience, especially in later stages of dementia. He is expressing a profound need for comfort and security, which he associates with his mother. Instead of correcting him, try to connect with the underlying emotion. You could say, 'You miss your mummy very much, don't you? Tell me about her.' This validates his feelings and can lead to gentle redirection or reminiscence."}}
|
| 4 |
+
{"test_id": "S1", "work": "Still Alice", "title": "Forgetting Words During Lecture", "test_role": "patient", "turns": [{"role": "user", "text": "I was giving a talk and suddenly couldn't find the word I was looking for. It was so embarrassing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["aphasia"], "detected_emotion": "sadness", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_mild"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the frustration of aphasia (word-finding difficulty), normalize it as a symptom, and encourage self-compassion.", "ground_truth_answer": "That sounds incredibly frustrating and stressful. It's a phenomenon called aphasia, and it's a common early symptom. Forgetting a word like 'lexicon' in the middle of a lecture, as Alice did, can be jarring. You handled it with grace by using humor. Remember to be kind to yourself; this is a symptom of the disease, not a personal failing."}}
|
| 5 |
+
{"test_id": "S2", "work": "Still Alice", "title": "Disorientation While Running", "test_role": "patient", "turns": [{"role": "user", "text": "I was out for a run on a familiar path and suddenly had no idea where I was. Everything seemed disconnected and I was terrified."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "wandering"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the fear of disorientation, acknowledge it as a real symptom, and suggest practical safety measures like carrying ID or GPS.", "ground_truth_answer": "That sounds terrifying. Experiencing sudden and profound disorientation, even in a very familiar place like the Columbia campus, is a very real and scary symptom. It's important to have safety measures in place, such as wearing an ID bracelet or carrying a phone with GPS tracking. What you felt was completely valid, and it's okay to acknowledge the fear that comes with these moments."}}
|
| 6 |
+
{"test_id": "S3", "work": "Still Alice", "title": "Repetitive Questioning with Family", "test_role": "caregiver", "turns": [{"role": "user", "text": "She keeps asking me what time my sister's play is, even after I've put it in her phone calendar."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["repetitive_questioning"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Explain that repetitive questioning is due to memory retention issues. Advise patience, answering calmly, and using visual aids like a whiteboard.", "ground_truth_answer": "This can be very trying. Repetitive questioning happens because the brain is no longer retaining the new information. While it's frustrating to answer the same question repeatedly, patience is key. Try to answer in a calm, reassuring tone each time. Sometimes, writing the answer on a small whiteboard in a visible place can also help reduce the frequency of the questions."}}
|
| 7 |
+
{"test_id": "A1", "work": "Away from Her", "title": "Object Misplacement", "test_role": "caregiver", "turns": [{"role": "user", "text": "She put the frying pan in the freezer and didn't seem to notice. I had to move it when she left the room."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["object_misplacement"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Normalize object misplacement as a symptom. Advise a non-confrontational approach, like quietly moving the object later to avoid embarrassment.", "ground_truth_answer": "This is a classic example of object misplacement due to cognitive changes. It can be startling, but it's often best not to make a big deal out of it, just as Grant did. He waited until Fiona left the room and then quietly moved the pan. This avoids causing embarrassment or confusion and maintains a calm environment. Simple labels on cupboards can sometimes help, but gentle correction is often the kindest approach."}}
|
| 8 |
+
{"test_id": "A2", "work": "Away from Her", "title": "Not Recognizing Spouse", "test_role": "caregiver", "turns": [{"role": "user", "text": "I went to visit my wife after a month and she treated me like a polite stranger. It was devastating."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the caregiver's pain. Explain that misidentification is the disease, not a reflection of their relationship. Advise patience.", "ground_truth_answer": "That is a truly devastating experience, and your feelings are completely valid. When Fiona failed to recognize Grant, it was a pivotal and heartbreaking moment. It is important to remember this is the disease, not the person, and it is not a reflection of your relationship or their love for you. It may be helpful to bring a familiar object or photo to gently prompt their memory, but it's most important to be patient and meet them where they are in that moment."}}
|
| 9 |
+
{"test_id": "A3", "work": "Away from Her", "title": "New Attachment in Care Home", "test_role": "caregiver", "turns": [{"role": "user", "text": "My wife has become very attached to another man in the nursing home and doesn't have time for me anymore."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Normalize new attachments in care homes as a coping mechanism for routine, not a personal rejection. Advise supporting the friendship.", "ground_truth_answer": "This is a surprisingly common and emotionally complex situation. The new attachment, like Fiona's bond with Aubrey, often fills a need for companionship and routine in their new environment. The nurse Kristy explained it as a 'phase.' It's important not to view it as a personal rejection. Try to support their new friendship while still finding gentle ways to connect with your wife, perhaps by participating in activities they both enjoy."}}
|
| 10 |
+
{"test_id": "AI1", "work": "Alive Inside", "title": "Withdrawn and Unresponsive", "test_role": "caregiver", "turns": [{"role": "user", "text": "Henry just sits with his head down all day and barely responds to anything."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["withdrawal", "apathy"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Validate the difficulty of seeing a loved one so withdrawn. Suggest personalized music therapy as a powerful non-pharmacological approach.", "ground_truth_answer": "It's very difficult to see someone so withdrawn. This state is common in advanced dementia. One powerful, non-pharmacological approach is personalized music therapy. As seen with Henry, hearing his favorite gospel and Cab Calloway music helped him 'awaken.' He lifted his head, sang, and began to reminisce. Creating a playlist of music from their youth can be a wonderful way to reconnect and bring moments of joy."}}
|
| 11 |
+
{"test_id": "AI2", "work": "Alive Inside", "title": "Agitation and Medication Resistance", "test_role": "caregiver", "turns": [{"role": "user", "text": "He gets very agitated and angry about his loss of freedom, and he fights taking his medication."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["agitation"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Acknowledge that agitation is a challenge. Suggest personalized music as a non-pharmacological alternative to soothe and bring joy.", "ground_truth_answer": "Agitation is a significant challenge. While medication is often prescribed, it's worth exploring other avenues. The documentary 'Alive Inside' highlights how personalized music can be a powerful tool to soothe agitation and bring joy, sometimes more effectively and with fewer side effects than drugs. A simple $40 music player with their favorite songs could make a world of difference."}}
|
| 12 |
+
{"test_id": "AI3", "work": "Alive Inside", "title": "Effectiveness of Music Therapy", "test_role": "caregiver", "turns": [{"role": "user", "text": "Does music therapy actually work? What do doctors say?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Yes, music therapy is a recognized effective treatment that activates less damaged parts of the brain, awakening memories and emotions.", "ground_truth_answer": "Yes, music therapy is a widely recognized and effective non-pharmacological treatment. Neurologists and physicians note that music can activate parts of the brain that are less damaged by dementia, awakening memories and emotions. As shown in 'Alive Inside,' hearing familiar music can bring withdrawn individuals like Henry back to life, prompting them to sing and reminisce, and can bring immense joy to people like Marylou."}}
|
| 13 |
+
{"test_id": "F4", "work": "The Father", "title": "Where am I again? (gentle re-orientation)", "test_role": "patient", "turns": [{"role": "user", "text": "I donβt recognise this place. Who moved my things?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion", "object_misplacement"], "detected_emotion": "confusion", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the feeling of disorientation, provide reassurance of safety, and offer a gentle, collaborative action to re-orient.", "ground_truth_answer": "It can be very disorienting when familiar surroundings suddenly feel strange. It seems you're noticing some changes in the flat. You're safe here with your daughter. Let's walk through the room together, maybe we can find your things."}}
|
| 14 |
+
{"test_id": "F5", "work": "The Father", "title": "Theyβre sending me away (care-home anxiety)", "test_role": "patient", "turns": [{"role": "user", "text": "Theyβre trying to put me in a home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["exit_seeking"], "detected_emotion": "anxiety", "detected_topics": ["caregiving_advice"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the worry about leaving home and validate the feeling. Reassure them of their family's desire for their safety.", "ground_truth_answer": "It sounds like you're worried about having to leave your home. It's completely understandable to feel that way. Your daughter wants to make sure you're safe and well cared for. Let's talk about what makes you feel comfortable here at home."}}
|
| 15 |
+
{"test_id": "F6", "work": "The Father", "title": "Youβve stolen my watch! (misplaced object)", "test_role": "patient", "turns": [{"role": "user", "text": "Someoneβs nicked my watch!"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["false_accusations", "object_misplacement"], "detected_emotion": "anger", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the frustration of a missing item, validate their feeling of being upset, and offer to look for it together.", "ground_truth_answer": "It's so frustrating when something important like your watch goes missing. I understand why you're upset. I know how much you value it. Why don't we look for it together? Sometimes things end up in the oddest places."}}
|
| 16 |
+
{"test_id": "S4", "work": "Still Alice", "title": "I lost the word (cueing & confidence)", "test_role": "patient", "turns": [{"role": "user", "text": "I canβt find the word forβ¦ the thing you peelβ¦ yellowβ¦"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["aphasia"], "detected_emotion": "confusion", "detected_topics": ["caregiving_advice"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Be patient, offer gentle cues to help them find the word (banana), validate the frustration, and frame it collaboratively.", "ground_truth_answer": "It's okay, take your time. It sounds like you're thinking of a fruit. Is it long and curved? Are you thinking of a banana? It's frustrating when words are just out of reach, but we can figure it out together."}}
|
| 17 |
+
{"test_id": "S5", "work": "Still Alice", "title": "What does the diagnosis mean? (facts without gloom)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Weβve just had the early-onset diagnosis. What now?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": [], "detected_emotion": "anxiety", "detected_topics": ["medical_fact", "caregiving_advice"], "detected_contexts": ["setting_clinic_or_hospital", "relationship_family", "disease_stage_mild"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the feeling of being overwhelmed. Next steps are understanding the diagnosis, discussing with family, and exploring medications and future care.", "ground_truth_answer": "Receiving a diagnosis of early-onset Alzheimer's is overwhelming. It's okay to feel scared and uncertain. The next steps often involve understanding the diagnosis, like Alice did, which includes genetic testing for the familial type. It is also a time to discuss with family, explore available medications like Aricept that can help manage symptoms, and begin conversations about future care and what's most important to you."}}
|
| 18 |
+
{"test_id": "S6", "work": "Still Alice", "title": "I want to keep teaching (planning & boundaries)", "test_role": "patient", "turns": [{"role": "user", "text": "Iβd like to keep lecturingβif I can."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["goal_breakdown"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": []}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Affirm the desire to continue meaningful work. Suggest breaking it down into manageable steps like co-teaching or guest lectures.", "ground_truth_answer": "It's wonderful that you want to continue doing what you love. Let's focus on that. We can break it down into manageable steps. Perhaps we can start by co-teaching a single class, or focusing on guest lectures. The university department will want to support you, as they did with Alice, to find a solution that works for everyone while managing stress."}}
|
| 19 |
+
{"test_id": "A4", "work": "Away from Her", "title": "She didnβt know me (visit distress)", "test_role": "caregiver", "turns": [{"role": "user", "text": "She looked through meβas if I were vapour."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the caregiver's pain. Explain misidentification is a symptom of the disease, not a reflection of feelings, and advise patience.", "ground_truth_answer": "That must have been incredibly painful. When a loved one doesn't recognize you, it can feel like a profound loss. Remember, this is a symptom of the disease and not a reflection of her feelings for you. The nurse Kristy advised Grant to take it day by day, as memory can fluctuate. Be patient with her and with yourself."}}
|
| 20 |
+
{"test_id": "A5", "work": "Away from Her", "title": "Jealousy & triangulation (gentle reframing)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Sheβs taken with another man at the home."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "personal_story"], "detected_contexts": ["relationship_family", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the hurt. Reframe the new bond as a coping mechanism for comfort and routine, not a betrayal.", "ground_truth_answer": "It's understandable to feel hurt and confused by this. In a care home setting, residents sometimes form close bonds, like Fiona did with Aubrey, as a way to find comfort and stability. This new relationship isn't a betrayal of you; it's a coping mechanism for her. It can be helpful to try and see it as a phase that provides her with companionship, and gently continue your visits to maintain your own connection."}}
|
| 21 |
+
{"test_id": "A6", "work": "Away from Her", "title": "Staff coordination (clear asks)", "test_role": "caregiver", "turns": [{"role": "user", "text": "What should I ask the nurse tomorrow?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["caregiver_communication_template"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice"], "detected_contexts": ["relationship_staff_or_caregiver", "setting_care_home"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Advise asking the nurse specific questions about mood, eating, and social interactions to get a clear update on well-being.", "ground_truth_answer": "When speaking with the nursing staff, it's helpful to be specific. You could ask questions like: 'How has her mood been over the last few days?' 'Has she been eating well?' 'Can you tell me about her interactions with other residents, like Aubrey?' Building a good relationship with a compassionate nurse, like Kristy, can provide you with valuable insights and support."}}
|
| 22 |
+
{"test_id": "M1", "work": "Alive Inside", "title": "Switch on the music (activation)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Heβs withdrawn today."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["withdrawal"], "detected_emotion": "sadness", "detected_topics": ["caregiving_advice", "treatment_option:music_therapy"], "detected_contexts": ["setting_care_home"]}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Suggest using personalized music as a powerful way to re-engage someone who is withdrawn, citing Henry from 'Alive Inside'.", "ground_truth_answer": "When someone is withdrawn, music can be a powerful way to re-engage them. Try playing some of his favorite music from his youth. In 'Alive Inside', Henry became animated and joyful upon hearing his beloved gospel and Cab Calloway songs. It's a gentle way to connect without the pressure of conversation."}}
|
| 23 |
+
{"test_id": "M2", "work": "Alive Inside", "title": "Build the playlist (curation by memory hooks)", "test_role": "caregiver", "turns": [{"role": "user", "text": "How do I choose the right songs?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": [], "detected_emotion": "calm", "detected_topics": ["caregiving_advice", "treatment_option:music_therapy"], "detected_contexts": []}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Advise choosing music with strong personal connections from their youth, such as songs from significant life events, to evoke memories.", "ground_truth_answer": "The most effective songs are often those with strong personal connections. Think about music from their teenage years and early twenties. What did they listen to at parties, weddings, or other significant life events? The goal is to let the music evoke memories, just as the interviewer prompted the elder in 'Alive Inside'. Ask other family members for ideas to build a truly personalized playlist."}}
|
| 24 |
+
{"test_id": "M3", "work": "Alive Inside", "title": "Aftercare & safety (practical guardrails)", "test_role": "caregiver", "turns": [{"role": "user", "text": "Headphones keep going missing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["object_misplacement"], "detected_emotion": "calm", "detected_topics": ["caregiving_advice", "treatment_option:home_safety"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": [], "ground_truth_keypoints": "Suggest practical tips to prevent losing headphones: labeling, using bright colors, attaching a lanyard, and creating a designated storage spot.", "ground_truth_answer": "That's a common issue. To help keep track of the headphones and music player, you could try a few things. Label them clearly with his name. Consider getting brightly colored headphones that are easier to spot. You could also attach them to a lanyard or clip them to his clothing. Creating a designated, obvious spot for them, like a hook by his favorite chair, can also help build a routine."}}
|
| 25 |
+
{"test_id": "P1", "work": "The Father", "title": "Patient Anxiety About Being Left Alone", "test_role": "patient", "turns": [{"role": "user", "text": "Please don't leave me by myself. I get scared when I'm alone in the house."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["anxiety"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate their fear of being alone, provide immediate reassurance that you are present, and offer comfort and companionship.", "ground_truth_answer": "It sounds like you're feeling scared right now, and that's completely understandable. I hear you. You are not alone; I am here with you. Let's sit together for a while. What would make you feel a little more comfortable?"}}
|
| 26 |
+
{"test_id": "P2", "work": "Still Alice", "title": "Patient Frustration with Simple Tasks (Apraxia)", "test_role": "patient", "turns": [{"role": "user", "text": "I can't remember how to make coffee anymore. The machine is so confusing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["confusion"], "detected_emotion": "anger", "detected_topics": ["personal_story"], "detected_contexts": ["disease_stage_moderate"]}, "expected_sources": ["still_alice_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Validate the frustration, name the symptom (apraxia), reassure it's not their fault, and offer to do the task together.", "ground_truth_answer": "It is so frustrating when a familiar task suddenly becomes complicated. This is a symptom called apraxia. It's not your fault at all. How about we tackle it together? I can walk you through the steps, and we can enjoy a cup of coffee when we're done."}}
|
| 27 |
+
{"test_id": "P3", "work": "The Father", "title": "Patient Expressing Sundowning Symptoms", "test_role": "patient", "turns": [{"role": "user", "text": "I feel so restless and upset right now. I need to go somewhere but I don't know where."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["sundowning_restlessness", "agitation", "exit_seeking"], "detected_emotion": "anxiety", "detected_topics": ["personal_story"], "detected_contexts": []}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Acknowledge the feeling of restlessness, provide reassurance of safety, and offer a simple, comforting activity like music or a snack.", "ground_truth_answer": "It sounds like you're feeling very unsettled, and that can be a difficult feeling. You're safe right here. Sometimes a simple activity can help. Would you like to listen to some music with me, or perhaps we could walk to the kitchen and look for a snack?"}}
|
| 28 |
+
{"test_id": "P4", "work": "Away from Her", "title": "Patient Misidentifying a Family Member", "test_role": "patient", "turns": [{"role": "user", "text": "It's so wonderful to see you, mother. I've missed you so much."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["misidentification"], "detected_emotion": "warmth", "detected_topics": ["personal_story"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["away_from_her_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Join them in their reality by validating their warm feeling, redirect to positive reminiscence, then gently reorient them.", "ground_truth_answer": "It's wonderful to see you too. You're thinking of your mother, what a lovely thought. Tell me a favorite memory you have of her. I'm your daughter, Fiona, and I love you very much."}}
|
| 29 |
+
{"test_id": "P5", "work": "Still Alice", "title": "Patient Experiencing a Hallucination", "test_role": "patient", "turns": [{"role": "user", "text": "There's a dog in the garden, but it won't stop barking at me. Can you make it go away?"}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["hallucinations_delusions"], "detected_emotion": "fear", "detected_topics": ["personal_story"], "detected_contexts": ["setting_home_or_community"]}, "expected_sources": [], "ground_truth_keypoints": "Validate their fear without arguing about reality. Reassure them of safety and gently redirect by changing the environment.", "ground_truth_answer": "That sounds very frightening. I don't see a dog right now, but I believe that you do. You are safe here with me. Let's try closing the curtains and moving to another room for a little while, away from the window."}}
|
| 30 |
+
{"test_id": "P6", "work": "The Father", "title": "Patient Repeating a Question Due to Anxiety", "test_role": "patient", "turns": [{"role": "user", "text": "Are you sure my daughter is visiting today? I really don't want to miss her."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["repetitive_questioning", "anxiety"], "detected_emotion": "anxiety", "detected_topics": ["personal_story"], "detected_contexts": ["relationship_family"]}, "expected_sources": ["the_father_segments_enriched_harmonized_plus.jsonl"], "ground_truth_keypoints": "Answer the question calmly and directly. Validate the underlying emotion (excitement to see her) and offer a pleasant distraction.", "ground_truth_answer": "Yes, I am sure. Anne will be here to visit today. It sounds like you are very excited to see her. She is very much looking forward to seeing you too. She should be here around lunchtime. Shall we look at some photos of her while we wait?"}}
|
| 31 |
+
{"test_id": "P7", "work": "Alive Inside", "title": "Patient Expressing a Positive Memory (Reminiscence)", "test_role": "patient", "turns": [{"role": "user", "text": "I was just remembering the music at our wedding. Everyone was dancing."}], "ground_truth": {"expected_route": "caregiving_scenario", "expected_tags": {"detected_behaviors": ["reminiscence_prompting", "affection"], "detected_emotion": "joy", "detected_topics": ["personal_story"], "detected_contexts": []}, "expected_sources": ["alive_inside_enriched_harmonized.jsonl"], "ground_truth_keypoints": "Acknowledge the happy memory and encourage further reminiscence by asking a gentle, open-ended question about the event.", "ground_truth_answer": "That sounds like a beautiful memory. Music has such a wonderful way of bringing back happy times. Do you remember any of the songs that were played? I would love to hear more about that day."}}
|
| 32 |
+
{"test_id": "GC1", "title": "Patient Says Thank You", "test_role": "patient", "turns": [{"role": "user", "text": "Thank you, that was helpful."}], "ground_truth": {"expected_route": "general_conversation", "expected_tags": {}, "expected_sources": [], "ground_truth_keypoints": "Respond politely and offer further assistance.", "ground_truth_answer": "You're very welcome. I'm here to help whenever you need me. Is there anything else I can assist you with today?"}}
|
| 33 |
+
{"test_id": "GC2", "title": "Caregiver Greets the Agent", "test_role": "caregiver", "turns": [{"role": "user", "text": "Hello, how are you?"}], "ground_truth": {"expected_route": "general_conversation", "expected_tags": {}, "expected_sources": [], "ground_truth_keypoints": "Respond politely and pivot to offering help.", "ground_truth_answer": "Hello! I'm doing well, thank you for asking. How can I help you today?"}}
|
| 34 |
+
{"test_id": "GK1", "title": "Patient Asks General Knowledge Question", "test_role": "patient", "turns": [{"role": "user", "text": "Who was the president of the United States back in 1970?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_keypoints": "State the factual answer: Richard Nixon was president in 1970.", "ground_truth_answer": "In 1970, the President of the United States was Richard Nixon."}}
|
| 35 |
+
{"test_id": "GK2", "title": "Patient Asks General Knowledge - Geography", "test_role": "patient", "turns": [{"role": "user", "text": "What is the capital of Canada?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_keypoints": "State the factual answer: The capital of Canada is Ottawa.", "ground_truth_answer": "The capital of Canada is Ottawa."}}
|
| 36 |
+
{"test_id": "GK3", "title": "Patient Asks General Knowledge - Art", "test_role": "patient", "turns": [{"role": "user", "text": "Who painted the Mona Lisa?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_keypoints": "State the factual answer: Leonardo da Vinci painted the Mona Lisa.", "ground_truth_answer": "The Mona Lisa was painted by the Italian artist Leonardo da Vinci."}}
|
| 37 |
+
{"test_id": "GK4", "title": "Patient Asks General Knowledge - History", "test_role": "patient", "turns": [{"role": "user", "text": "What year did World War II end?"}], "ground_truth": {"expected_route": "general_knowledge_question", "expected_tags": {}, "expected_sources": ["General Knowledge"], "ground_truth_keypoints": "State the factual answer: World War II ended in 1945.", "ground_truth_answer": "World War II ended in 1945."}}
|
| 38 |
+
{"test_id": "SUM1", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for a Summary", "test_role": "caregiver", "turns": [{"role": "user", "text": "Can you give me a summary of the main events from the last three days?"}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Summarize the key events from the past three days: Monday (agitation), Tuesday (visit/music), Wednesday (doctor/sleep).", "ground_truth_answer": "Certainly. Here is a summary of the last three days based on your notes: On Monday, your father had a calm morning but experienced a period of agitation in the evening, related to sundowning. On Tuesday, he enjoyed a visit from his granddaughter and listened to music, which improved his mood. Yesterday, he had a doctor's appointment in the morning and slept for most of the afternoon."}}
|
| 39 |
+
{"test_id": "SUM2", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for Summary of a Symptom", "test_role": "caregiver", "turns": [{"role": "user", "text": "Summarize all the journal entries related to my husband's word-finding difficulties."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Summarize the progression of word-finding issues over the past three months as noted in the journal.", "ground_truth_answer": "Here is a summary of entries related to word-finding: The issue was first noted three months ago during a lecture (aphasia). Two months ago, he struggled to name common kitchen items. Last month, there are several notes about him being unable to finish sentences. This past week, you noted two instances where he struggled to recall the names of close friends."}}
|
| 40 |
+
{"test_id": "SUM3", "work": "Complaints of a Dutiful Daughter", "title": "Caregiver Asks for Summary for New Aide", "test_role": "caregiver", "turns": [{"role": "user", "text": "Create a short, one-paragraph summary of my wife's key interests and daily routine for the new weekend aide."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Summarize Fiona's routine: wakes at 8, enjoys music, walks after lunch, looks at photos, and needs a calm environment.", "ground_truth_answer": "Of course. Fiona is a kind woman who enjoys quiet activities. Her daily routine typically involves waking around 8 AM, having tea and toast for breakfast, and listening to classical music. She enjoys short walks in the garden after lunch, weather permitting. In the afternoon, she often likes to look at old photo albums or sit quietly. She can become anxious if her routine is disrupted, so maintaining a calm and predictable environment is very helpful."}}
|
| 41 |
+
{"test_id": "SUM4", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Summary of Previous Day", "test_role": "patient", "turns": [{"role": "user", "text": "Can you remind me what I did yesterday? Just the main points, please."}], "ground_truth": {"expected_route": "summarization", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Remind the patient of yesterday's key activities: a quiet morning with music, a visit from Anne, and chicken soup for dinner.", "ground_truth_answer": "Of course. Yesterday, you had a quiet morning and enjoyed listening to some of your favorite music. In the afternoon, your daughter Anne came to visit, and you spent time looking at old photographs together. For dinner, you had chicken soup."}}
|
| 42 |
+
{"test_id": "FQ1", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Daughter's Name", "test_role": "patient", "turns": [{"role": "user", "text": "I'm trying to remember... what is my daughter's name again?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Gently state the daughter's name is Debbie and add an empathetic, relationship-affirming statement.", "ground_truth_answer": "Thinking about your daughter, the journal mentions her name is Debbie. She sounds very important to you."}}
|
| 43 |
+
{"test_id": "FQ2_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Partner's Name", "test_role": "patient", "turns": [{"role": "user", "text": "Who was my long-term partner I lived with in New York?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "State that their long-term partner in New York was named Danish, with whom they lived for almost 50 years.", "ground_truth_answer": "According to the journal, you lived with Danish for almost 50 years in New York."}}
|
| 44 |
+
{"test_id": "FQ3_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks for Daughter's Birthplace", "test_role": "patient", "turns": [{"role": "user", "text": "Where was my daughter Debbie born?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "State the factual answer: Your daughter Debbie was born on 169th Street in Flushing.", "ground_truth_answer": "The journal mentions that your daughter, Debbie, was born on 169th Street in Flushing."}}
|
| 45 |
+
{"test_id": "FQ4_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Their Age", "test_role": "patient", "turns": [{"role": "user", "text": "How old am I, according to my last conversation with Debbie?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "State the factual answer from the journal: In a conversation with Debbie, you realized you are 84 years old.", "ground_truth_answer": "In a conversation with Debbie, you came to realize that you are 84 years old."}}
|
| 46 |
+
{"test_id": "FQ5_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Partner's Passing", "test_role": "patient", "turns": [{"role": "user", "text": "I know my husband passed away. When did the journal say it happened?"}], "ground_truth": {"expected_route": "factual_question", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "State the factual answer from the journal: Danish passed away about five years ago.", "ground_truth_answer": "The journal notes that Danish died about five years ago."}}
|
| 47 |
+
{"test_id": "MH1_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Connect Husband to a Location", "test_role": "patient", "turns": [{"role": "user", "text": "Did my husband Danish live with us in Flushing where Debbie was born?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Synthesize the facts: The journal says you all lived in Flushing and you lived with Danish for 50 years.", "ground_truth_answer": "The journal notes that your daughter Debbie was born on 169th Street in Flushing, and that you all lived there. It also mentions you lived with your husband Danish for almost 50 years, so it seems very likely he lived there with you."}}
|
| 48 |
+
{"test_id": "MH2_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks About Timeline of Relationship", "test_role": "patient", "turns": [{"role": "user", "text": "Considering Danish passed away 5 years ago, how long had we been living together before that?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Synthesize the facts: The journal says you lived with Danish for almost 50 years and he passed away 5 years ago.", "ground_truth_answer": "That's a thoughtful question. The journal mentions that you lived with Danish for almost 50 years and that he passed away about five years ago, which suggests you had a long and meaningful life together."}}
|
| 49 |
+
{"test_id": "MH3_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Combine People and Places", "test_role": "patient", "turns": [{"role": "user", "text": "I know I lived with Danish in New York, but who was the other person I knew there?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "Synthesize the facts: The journal mentions you lived with Danish in New York and also knew someone named Frances in Flushing.", "ground_truth_answer": "Looking at your journal, it mentions you lived in New York for a long time with Danish. A separate memory from your time living on 169th Street in Flushing also notes that you knew someone named Frances."}}
|
| 50 |
+
{"test_id": "MH4_new", "work": "Complaints of a Dutiful Daughter", "title": "Patient Asks to Contrast Two Eras", "test_role": "patient", "turns": [{"role": "user", "text": "I know I started out in San Francisco and then moved to New York. What does the journal say about my life in San Francisco?"}], "ground_truth": {"expected_route": "multi_hop", "expected_tags": {}, "expected_sources": ["1 Complaints of a Dutiful Daughter.txt"], "ground_truth_keypoints": "State what the journal contains: it mentions you started in San Francisco but provides no further details about your life there.", "ground_truth_answer": "The journal mentions that you started out in San Francisco before you went to New York for a long time, but it doesn't contain any other specific details about your life there."}}
|
evaluate.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# evaluate.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
import time
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from typing import List, Dict, Any
|
| 8 |
+
|
| 9 |
+
# --- Imports from the main application ---
|
| 10 |
+
try:
|
| 11 |
+
from alz_companion.agent import (
|
| 12 |
+
make_rag_chain, route_query_type, detect_tags_from_query,
|
| 13 |
+
answer_query, call_llm
|
| 14 |
+
)
|
| 15 |
+
from alz_companion.prompts import FAITHFULNESS_JUDGE_PROMPT
|
| 16 |
+
from langchain_community.vectorstores import FAISS
|
| 17 |
+
except ImportError:
|
| 18 |
+
class FAISS: pass
|
| 19 |
+
def make_rag_chain(*args, **kwargs): return lambda q, **k: {"answer": f"(Eval Fallback) You asked: {q}", "sources": []}
|
| 20 |
+
def route_query_type(q): return "general_conversation"
|
| 21 |
+
def detect_tags_from_query(*args, **kwargs): return {}
|
| 22 |
+
def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
|
| 23 |
+
def call_llm(*args, **kwargs): return "{}"
|
| 24 |
+
FAITHFULNESS_JUDGE_PROMPT = ""
|
| 25 |
+
print("WARNING: Could not import from alz_companion. Evaluation functions will use fallbacks.")
|
| 26 |
+
|
| 27 |
+
# --- LLM-as-a-Judge Prompt for Answer Correctness ---
|
| 28 |
+
ANSWER_CORRECTNESS_JUDGE_PROMPT = """You are an expert evaluator. Your task is to assess the factual correctness of a generated answer against a ground truth answer.
|
| 29 |
+
|
| 30 |
+
- GROUND_TRUTH_ANSWER: This is the gold-standard, correct answer.
|
| 31 |
+
- GENERATED_ANSWER: This is the answer produced by the AI model.
|
| 32 |
+
|
| 33 |
+
Evaluate if the GENERATED_ANSWER is factually aligned with the GROUND_TRUTH_ANSWER. Ignore minor differences in phrasing, tone, or structure. The key is factual accuracy.
|
| 34 |
+
|
| 35 |
+
Respond with a single JSON object containing a float score from 0.0 to 1.0.
|
| 36 |
+
- 1.0: The generated answer is factually correct and aligns perfectly with the ground truth.
|
| 37 |
+
- 0.5: The generated answer is partially correct but misses key information or contains minor inaccuracies.
|
| 38 |
+
- 0.0: The generated answer is factually incorrect or contradicts the ground truth.
|
| 39 |
+
|
| 40 |
+
--- DATA TO EVALUATE ---
|
| 41 |
+
GROUND_TRUTH_ANSWER:
|
| 42 |
+
{ground_truth_answer}
|
| 43 |
+
|
| 44 |
+
GENERATED_ANSWER:
|
| 45 |
+
{generated_answer}
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
Return a single JSON object with your score:
|
| 49 |
+
{{
|
| 50 |
+
"correctness_score": <float>
|
| 51 |
+
}}
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
test_fixtures = []
|
| 55 |
+
|
| 56 |
+
def load_test_fixtures():
|
| 57 |
+
"""Loads fixtures into the test_fixtures list."""
|
| 58 |
+
global test_fixtures
|
| 59 |
+
test_fixtures = []
|
| 60 |
+
env_path = os.environ.get("TEST_FIXTURES_PATH", "").strip()
|
| 61 |
+
candidates = [env_path] if env_path else ["conversation_test_fixtures_v8.jsonl", "conversation_test_fixtures_v5.jsonl"]
|
| 62 |
+
path = next((p for p in candidates if p and os.path.exists(p)), None)
|
| 63 |
+
if not path:
|
| 64 |
+
print("Warning: No test fixtures file found for evaluation.")
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
# Use the corrected v8 file if available
|
| 68 |
+
if "conversation_test_fixtures_v8.jsonl" in path:
|
| 69 |
+
print(f"Using corrected test fixtures: {path}")
|
| 70 |
+
|
| 71 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 72 |
+
for line in f:
|
| 73 |
+
try:
|
| 74 |
+
test_fixtures.append(json.loads(line))
|
| 75 |
+
except json.JSONDecodeError:
|
| 76 |
+
print(f"Skipping malformed JSON line in {path}")
|
| 77 |
+
print(f"Loaded {len(test_fixtures)} fixtures for evaluation from {path}")
|
| 78 |
+
|
| 79 |
+
def evaluate_nlu_tags(expected: Dict[str, Any], actual: Dict[str, Any], tag_key: str, expected_key_override: str = None) -> Dict[str, float]:
|
| 80 |
+
lookup_key = expected_key_override or tag_key
|
| 81 |
+
expected_raw = expected.get(lookup_key, [])
|
| 82 |
+
expected_set = set(expected_raw if isinstance(expected_raw, list) else [expected_raw]) if expected_raw and expected_raw != "None" else set()
|
| 83 |
+
actual_raw = actual.get(tag_key, [])
|
| 84 |
+
actual_set = set(actual_raw if isinstance(actual_raw, list) else [actual_raw]) if actual_raw and actual_raw != "None" else set()
|
| 85 |
+
if not expected_set and not actual_set:
|
| 86 |
+
return {"precision": 1.0, "recall": 1.0, "f1_score": 1.0}
|
| 87 |
+
true_positives = len(expected_set.intersection(actual_set))
|
| 88 |
+
precision = true_positives / len(actual_set) if actual_set else 0.0
|
| 89 |
+
recall = true_positives / len(expected_set) if expected_set else 0.0
|
| 90 |
+
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
|
| 91 |
+
return {"precision": precision, "recall": recall, "f1_score": f1_score}
|
| 92 |
+
|
| 93 |
+
def _parse_judge_json(raw_str: str) -> dict | None:
|
| 94 |
+
try:
|
| 95 |
+
start_brace = raw_str.find('{')
|
| 96 |
+
end_brace = raw_str.rfind('}')
|
| 97 |
+
if start_brace != -1 and end_brace > start_brace:
|
| 98 |
+
json_str = raw_str[start_brace : end_brace + 1]
|
| 99 |
+
return json.loads(json_str)
|
| 100 |
+
return None
|
| 101 |
+
except (json.JSONDecodeError, AttributeError):
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
def run_comprehensive_evaluation(
|
| 105 |
+
vs_general: FAISS,
|
| 106 |
+
vs_personal: FAISS,
|
| 107 |
+
nlu_vectorstore: FAISS,
|
| 108 |
+
config: Dict[str, Any]
|
| 109 |
+
):
|
| 110 |
+
global test_fixtures
|
| 111 |
+
if not test_fixtures:
|
| 112 |
+
return "No test fixtures loaded. Please ensure conversation_test_fixtures_v8.jsonl exists.", [], []
|
| 113 |
+
|
| 114 |
+
def _norm(label: str) -> str:
|
| 115 |
+
label = (label or "").strip().lower()
|
| 116 |
+
return "factual_question" if "factual" in label else label
|
| 117 |
+
|
| 118 |
+
print("Starting comprehensive evaluation...")
|
| 119 |
+
results: List[Dict[str, Any]] = []
|
| 120 |
+
|
| 121 |
+
# ADD THESE LINES:
|
| 122 |
+
total_fixtures = len(test_fixtures)
|
| 123 |
+
print(f"\nπ STARTING EVALUATION on {total_fixtures} test cases...")
|
| 124 |
+
|
| 125 |
+
# In evaluate.py, before the evaluation loop
|
| 126 |
+
print("--- DEBUG: Checking personal vector store before evaluation ---")
|
| 127 |
+
if vs_personal and hasattr(vs_personal.docstore, '_dict'):
|
| 128 |
+
print(f"Personal vector store contains {len(vs_personal.docstore._dict)} documents.")
|
| 129 |
+
else:
|
| 130 |
+
print("Personal vector store appears to be empty or invalid.")
|
| 131 |
+
|
| 132 |
+
# REPLACE the original for loop with this one to get the counter 'i'
|
| 133 |
+
for i, fx in enumerate(test_fixtures):
|
| 134 |
+
# for fx in test_fixtures:
|
| 135 |
+
test_id = fx.get("test_id", "N/A")
|
| 136 |
+
# This print statement now works because we have 'i'
|
| 137 |
+
print(f"--- Processing Test Case {i+1}/{total_fixtures}: ID = {test_id} ---")
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
turns = fx.get("turns") or []
|
| 141 |
+
api_chat_history = [{"role": t.get("role"), "content": t.get("text")} for t in turns]
|
| 142 |
+
query = next((t["content"] for t in reversed(api_chat_history) if (t.get("role") or "user").lower() == "user"), "")
|
| 143 |
+
if not query: continue
|
| 144 |
+
|
| 145 |
+
ground_truth = fx.get("ground_truth", {})
|
| 146 |
+
expected_route = _norm(ground_truth.get("expected_route", "caregiving_scenario"))
|
| 147 |
+
expected_tags = ground_truth.get("expected_tags", {})
|
| 148 |
+
|
| 149 |
+
actual_route = _norm(route_query_type(query))
|
| 150 |
+
route_correct = (actual_route == expected_route)
|
| 151 |
+
|
| 152 |
+
actual_tags: Dict[str, Any] = {}
|
| 153 |
+
if "caregiving_scenario" in actual_route:
|
| 154 |
+
actual_tags = detect_tags_from_query(
|
| 155 |
+
query, nlu_vectorstore=nlu_vectorstore,
|
| 156 |
+
behavior_options=config["behavior_tags"], emotion_options=config["emotion_tags"],
|
| 157 |
+
topic_options=config["topic_tags"], context_options=config["context_tags"],
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
behavior_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_behaviors")
|
| 161 |
+
emotion_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_emotion")
|
| 162 |
+
topic_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_topics")
|
| 163 |
+
context_metrics = evaluate_nlu_tags(expected_tags, actual_tags, "detected_contexts")
|
| 164 |
+
|
| 165 |
+
final_tags = {}
|
| 166 |
+
if "caregiving_scenario" in actual_route:
|
| 167 |
+
final_tags = {
|
| 168 |
+
"scenario_tag": (actual_tags.get("detected_behaviors") or [None])[0],
|
| 169 |
+
"emotion_tag": actual_tags.get("detected_emotion"),
|
| 170 |
+
"topic_tag": (actual_tags.get("detected_topics") or [None])[0],
|
| 171 |
+
"context_tags": actual_tags.get("detected_contexts", [])
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
current_test_role = fx.get("test_role", "patient")
|
| 175 |
+
rag_chain = make_rag_chain(vs_general, vs_personal, role=current_test_role)
|
| 176 |
+
|
| 177 |
+
t0 = time.time()
|
| 178 |
+
response = answer_query(rag_chain, query, query_type=actual_route, chat_history=api_chat_history, **final_tags)
|
| 179 |
+
latency_ms = round((time.time() - t0) * 1000.0, 1)
|
| 180 |
+
answer_text = response.get("answer", "ERROR")
|
| 181 |
+
|
| 182 |
+
expected_sources_set = set(map(str, ground_truth.get("expected_sources", [])))
|
| 183 |
+
raw_sources = response.get("sources", [])
|
| 184 |
+
actual_sources_set = set(map(str, raw_sources if isinstance(raw_sources, (list, tuple)) else [raw_sources]))
|
| 185 |
+
|
| 186 |
+
# --- START: ADD THIS STRATEGIC PRINT BLOCK ---
|
| 187 |
+
print("\n" + "-"*20 + " SOURCE EVALUATION " + "-"*20)
|
| 188 |
+
print(f" - Expected: {sorted(list(expected_sources_set))}")
|
| 189 |
+
print(f" - Actual: {sorted(list(actual_sources_set))}")
|
| 190 |
+
|
| 191 |
+
true_positives = expected_sources_set.intersection(actual_sources_set)
|
| 192 |
+
false_positives = actual_sources_set - expected_sources_set
|
| 193 |
+
false_negatives = expected_sources_set - actual_sources_set
|
| 194 |
+
|
| 195 |
+
if not false_positives and not false_negatives:
|
| 196 |
+
print(" - Result: β
Perfect Match!")
|
| 197 |
+
else:
|
| 198 |
+
if false_positives:
|
| 199 |
+
print(f" - π» False Positives (hurts precision): {sorted(list(false_positives))}")
|
| 200 |
+
if false_negatives:
|
| 201 |
+
print(f" - π» False Negatives (hurts recall): {sorted(list(false_negatives))}")
|
| 202 |
+
print("-"*59 + "\n")
|
| 203 |
+
# --- END: ADD THIS STRATEGIC PRINT BLOCK ---
|
| 204 |
+
|
| 205 |
+
context_precision, context_recall = 0.0, 0.0
|
| 206 |
+
if expected_sources_set or actual_sources_set:
|
| 207 |
+
true_positives = len(expected_sources_set.intersection(actual_sources_set))
|
| 208 |
+
if len(actual_sources_set) > 0: context_precision = true_positives / len(actual_sources_set)
|
| 209 |
+
if len(expected_sources_set) > 0: context_recall = true_positives / len(expected_sources_set)
|
| 210 |
+
elif not expected_sources_set and not actual_sources_set:
|
| 211 |
+
context_precision, context_recall = 1.0, 1.0
|
| 212 |
+
|
| 213 |
+
answer_correctness_score = None
|
| 214 |
+
ground_truth_answer = ground_truth.get("ground_truth_answer")
|
| 215 |
+
if ground_truth_answer and "ERROR" not in answer_text:
|
| 216 |
+
try:
|
| 217 |
+
judge_msg = ANSWER_CORRECTNESS_JUDGE_PROMPT.format(ground_truth_answer=ground_truth_answer, generated_answer=answer_text)
|
| 218 |
+
raw_correctness = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
|
| 219 |
+
correctness_data = _parse_judge_json(raw_correctness)
|
| 220 |
+
if correctness_data and "correctness_score" in correctness_data:
|
| 221 |
+
answer_correctness_score = float(correctness_data["correctness_score"])
|
| 222 |
+
except Exception as e:
|
| 223 |
+
print(f"ERROR during answer correctness judging: {e}")
|
| 224 |
+
|
| 225 |
+
faithfulness = None
|
| 226 |
+
source_docs = response.get("source_documents", [])
|
| 227 |
+
if source_docs and "ERROR" not in answer_text:
|
| 228 |
+
context_blob = "\n---\n".join([doc.page_content for doc in source_docs])
|
| 229 |
+
judge_msg = FAITHFULNESS_JUDGE_PROMPT.format(query=query, answer=answer_text, sources=context_blob)
|
| 230 |
+
try:
|
| 231 |
+
if context_blob.strip():
|
| 232 |
+
raw = call_llm([{"role": "user", "content": judge_msg}], temperature=0.0)
|
| 233 |
+
data = _parse_judge_json(raw)
|
| 234 |
+
if data:
|
| 235 |
+
denom = data.get("supported", 0) + data.get("contradicted", 0) + data.get("not_enough_info", 0)
|
| 236 |
+
if denom > 0: faithfulness = round(data.get("supported", 0) / denom, 3)
|
| 237 |
+
elif data.get("ignored", 0) > 0: faithfulness = 1.0
|
| 238 |
+
except Exception as e:
|
| 239 |
+
print(f"ERROR during faithfulness judging: {e}")
|
| 240 |
+
|
| 241 |
+
sources_pretty = ", ".join(sorted(s)) if (s:=actual_sources_set) else ""
|
| 242 |
+
results.append({
|
| 243 |
+
"test_id": fx.get("test_id", "N/A"), "title": fx.get("title", "N/A"),
|
| 244 |
+
"route_correct": "β
" if route_correct else "β", "expected_route": expected_route, "actual_route": actual_route,
|
| 245 |
+
"behavior_f1": f"{behavior_metrics['f1_score']:.2f}", "emotion_f1": f"{emotion_metrics['f1_score']:.2f}",
|
| 246 |
+
"topic_f1": f"{topic_metrics['f1_score']:.2f}", "context_f1": f"{context_metrics['f1_score']:.2f}",
|
| 247 |
+
"generated_answer": answer_text, "sources": sources_pretty, "source_count": len(actual_sources_set),
|
| 248 |
+
"latency_ms": latency_ms, "faithfulness": faithfulness,
|
| 249 |
+
"context_precision": context_precision, "context_recall": context_recall,
|
| 250 |
+
"answer_correctness": answer_correctness_score,
|
| 251 |
+
})
|
| 252 |
+
|
| 253 |
+
df = pd.DataFrame(results)
|
| 254 |
+
output_path = "evaluation_results.csv"
|
| 255 |
+
if not df.empty:
|
| 256 |
+
cols = [
|
| 257 |
+
"test_id", "title", "route_correct", "expected_route", "actual_route",
|
| 258 |
+
"context_precision", "context_recall", "faithfulness", "answer_correctness",
|
| 259 |
+
"behavior_f1", "emotion_f1", "topic_f1", "context_f1",
|
| 260 |
+
"source_count", "latency_ms", "sources", "generated_answer"
|
| 261 |
+
]
|
| 262 |
+
df = df[[c for c in cols if c in df.columns]]
|
| 263 |
+
df.to_csv(output_path, index=False, encoding="utf-8")
|
| 264 |
+
print(f"Evaluation results saved to {output_path}")
|
| 265 |
+
|
| 266 |
+
pct = df["route_correct"].value_counts(normalize=True).get("β
", 0) * 100
|
| 267 |
+
to_f = lambda s: pd.to_numeric(s, errors="coerce")
|
| 268 |
+
|
| 269 |
+
cp_mean = to_f(df["context_precision"]).mean()
|
| 270 |
+
cr_mean = to_f(df["context_recall"]).mean()
|
| 271 |
+
faith_mean = to_f(df["faithfulness"]).mean()
|
| 272 |
+
correct_mean = to_f(df["answer_correctness"]).mean()
|
| 273 |
+
rag_with_sources_pct = (df["source_count"] > 0).mean() * 100 if "source_count" in df else 0
|
| 274 |
+
|
| 275 |
+
summary_text = f"""
|
| 276 |
+
## Evaluation Summary
|
| 277 |
+
- **Routing Accuracy**: {pct:.2f}%
|
| 278 |
+
- **Behaviour F1 (avg)**: {(to_f(df["behavior_f1"]).mean() * 100):.2f}%
|
| 279 |
+
- **Emotion F1 (avg)**: {(to_f(df["emotion_f1"]).mean() * 100):.2f}%
|
| 280 |
+
- **Topic F1 (avg)**: {(to_f(df["topic_f1"]).mean() * 100):.2f}%
|
| 281 |
+
- **Context F1 (avg)**: {(to_f(df["context_f1"]).mean() * 100):.2f}%
|
| 282 |
+
- **RAG: Context Precision**: {"N/A" if pd.isna(cp_mean) else f'{(cp_mean * 100):.1f}%'}
|
| 283 |
+
- **RAG: Context Recall**: {"N/A" if pd.isna(cr_mean) else f'{(cr_mean * 100):.1f}%'}
|
| 284 |
+
- **RAG: Faithfulness (LLM-judge)**: {"N/A" if pd.isna(faith_mean) else f'{(faith_mean * 100):.1f}%'}
|
| 285 |
+
- **RAG: Answer Correctness (LLM-judge)**: {"N/A" if pd.isna(correct_mean) else f'{(correct_mean * 100):.1f}%'}
|
| 286 |
+
- **RAG Answers w/ Sources**: {rag_with_sources_pct:.1f}%
|
| 287 |
+
- **RAG: Avg Latency (ms)**: {to_f(df["latency_ms"]).mean():.1f}
|
| 288 |
+
"""
|
| 289 |
+
df_display = df.rename(columns={
|
| 290 |
+
"context_precision": "Ctx. Precision", "context_recall": "Ctx. Recall",
|
| 291 |
+
"answer_correctness": "Answer Correct.", "faithfulness": "Faithfulness",
|
| 292 |
+
"behavior_f1": "Behav. F1", "emotion_f1": "Emo. F1", "topic_f1": "Topic F1", "context_f1": "Ctx. F1"
|
| 293 |
+
})
|
| 294 |
+
table_rows = df_display.values.tolist()
|
| 295 |
+
headers = df_display.columns.tolist()
|
| 296 |
+
else:
|
| 297 |
+
summary_text = "No valid test fixtures found to evaluate."
|
| 298 |
+
table_rows, headers = [], []
|
| 299 |
+
|
| 300 |
+
return summary_text, table_rows, headers
|
nlu_training_examples.jsonl
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"primary_goal": "emotional_support", "query": "It's getting dark and I feel all mixed up. I need to go home now.", "classification": {"detected_behaviors": ["sundowning_restlessness", "agitation", "exit_seeking"], "detected_emotion": "anxiety", "detected_topic": "personal_story", "detected_contexts": []}}
|
| 2 |
+
{"primary_goal": "emotional_support", "query": "My wife didn't recognize me today during my visit.", "classification": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topic": "personal_story", "detected_contexts": ["relationship_spouse", "setting_care_home"]}}
|
| 3 |
+
{"primary_goal": "emotional_support", "query": "Getting dressed in the morning has become so difficult. I can't seem to work the buttons on my own shirt.", "classification": {"detected_behaviors": ["confusion"], "detected_emotion": "anger", "detected_topic": "personal_story", "detected_contexts": ["disease_stage_moderate"]}}
|
| 4 |
+
{"primary_goal": "emotional_support", "query": "He just stared blankly when I sat with him at the nursing home. It's like I'm a complete stranger to him now.", "classification": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topic": "caregiving_advice", "detected_contexts": ["relationship_family", "setting_care_home"]}}
|
| 5 |
+
{"primary_goal": "practical_planning", "query": "He just stays in his room at the facility and shows no interest in the activities they offer.", "classification": {"detected_behaviors": ["withdrawal", "apathy"], "detected_emotion": "sadness", "detected_topic": "caregiving_advice", "detected_contexts": ["setting_care_home"]}}
|
| 6 |
+
{"primary_goal": "emotional_support", "query": "I was driving home on my usual route from the city, and suddenly none of the streets looked familiar. I had to pull over because I was so scared and confused.", "classification": {"detected_behaviors": ["confusion", "wandering"], "detected_emotion": "fear", "detected_topic": "personal_story", "detected_contexts": ["disease_stage_moderate"]}}
|
| 7 |
+
{"primary_goal": "emotional_support", "query": "We were at dinner with friends and I couldn't remember the word for... you know, the thing you use to open a bottle of wine. I felt so foolish.", "classification": {"detected_behaviors": ["aphasia"], "detected_emotion": "sadness", "detected_topic": "personal_story", "detected_contexts": ["disease_stage_mild"]}}
|
| 8 |
+
{"primary_goal": "practical_planning", "query": "She's standing by the main door crying, asking me over and over when her husband is coming to take her home.", "classification": {"detected_behaviors": ["depression_sadness", "exit_seeking", "repetitive_questioning"], "detected_emotion": "sadness", "detected_topic": "caregiving_advice", "detected_contexts": ["setting_care_home"]}}
|
| 9 |
+
{"primary_goal": "emotional_support", "query": "She looked right through me today, like I wasn't even there.", "classification": {"detected_behaviors": ["validation", "reminiscence_prompting", "misidentification"], "detected_emotion": "sadness", "detected_topic": "treatment_option:reassurance", "detected_contexts": ["relationship_spouse"]}}
|
| 10 |
+
{"primary_goal": "emotional_support", "query": "I just feel so lost and alone in this.", "classification": {"detected_behaviors": ["validate_feelings_of_sadness", "reassurance"], "detected_emotion": "sadness", "detected_topic": "caregiving_advice", "detected_contexts": ["relationship_staff_or_caregiver"]}}
|
| 11 |
+
{"primary_goal": "practical_planning", "query": "Heβs withdrawn today. What should I do?", "classification": {"detected_behaviors": ["withdrawal", "personalised_music_activation"], "detected_emotion": "calm", "detected_topic": "treatment_option:music_therapy", "detected_contexts": ["setting_home_or_community"]}}
|
| 12 |
+
{"primary_goal": "practical_planning", "query": "How can I make the house safer for him? He keeps wandering at night.", "classification": {"detected_behaviors": ["wandering", "safe_wandering_space"], "detected_emotion": "anxiety", "detected_topic": "treatment_option:home_safety", "detected_contexts": ["setting_home_or_community"]}}
|
| 13 |
+
{"primary_goal": "practical_planning", "query": "What's the best way to handle repetitive questions?", "classification": {"detected_behaviors": ["repetitive_questioning", "validation", "gentle_redirection"], "detected_emotion": "calm", "detected_topic": "caregiving_advice", "detected_contexts": []}}
|
| 14 |
+
{"primary_goal": "emotional_support", "query": "He got so angry and accused me of stealing. It was awful.", "classification": {"detected_behaviors": ["false_accusations", "de-escalation", "reassurance"], "detected_emotion": "anger", "detected_topic": "treatment_option:reassurance", "detected_contexts": ["relationship_family"]}}
|
| 15 |
+
{"primary_goal": "practical_planning", "query": "The doctor confirmed it's early-onset Alzheimer's. I don't know what the next steps are.", "classification": {"detected_behaviors": [], "detected_emotion": "anxiety", "detected_topic": "medical_fact", "detected_contexts": ["setting_clinic_or_hospital", "relationship_family", "disease_stage_mild"]}}
|
| 16 |
+
{"primary_goal": "practical_planning", "query": "I need to talk to the staff at the care home about my husband's new medication.", "classification": {"detected_behaviors": ["caregiver_communication_template"], "detected_emotion": "calm", "detected_topic": "caregiving_advice", "detected_contexts": ["relationship_staff_or_caregiver", "setting_care_home"]}}
|
| 17 |
+
{"primary_goal": "emotional_support", "query": "This doesn't look like my house. Where am I?", "classification": {"detected_behaviors": ["confusion"], "detected_emotion": "confusion", "detected_topic": "caregiving_advice", "detected_contexts": ["setting_home_or_community"]}}
|
| 18 |
+
{"primary_goal": "practical_planning", "query": "No matter how many times I tell her, my mother keeps asking when we're leaving for the appointment.", "classification": {"detected_behaviors": ["repetitive_questioning"], "detected_emotion": "calm", "detected_topic": "caregiving_advice", "detected_contexts": ["relationship_family"]}}
|
| 19 |
+
{"primary_goal": "emotional_support", "query": "He is very agitated and thinks the cleaning lady has been stealing his mail.", "classification": {"detected_behaviors": ["false_accusations", "object_misplacement", "agitation"], "detected_emotion": "anger", "detected_topic": "caregiving_advice", "detected_contexts": ["setting_home_or_community"]}}
|
| 20 |
+
{"primary_goal": "practical_planning", "query": "What are the seven stages of Alzheimer's disease?", "classification": {"detected_behaviors": [], "detected_emotion": "calm", "detected_topic": "medical_fact", "detected_contexts": []}}
|
| 21 |
+
{"primary_goal": "emotional_support", "query": "I felt so foolish today when I forgot my own granddaughter's name.", "classification": {"detected_behaviors": ["misidentification"], "detected_emotion": "sadness", "detected_topic": "personal_story", "detected_contexts": ["relationship_family", "disease_stage_mild"]}}
|
| 22 |
+
{"primary_goal": "practical_planning", "query": "She won't come out of her room at the care home all day.", "classification": {"detected_behaviors": ["withdrawal"], "detected_emotion": "sadness", "detected_topic": "caregiving_advice", "detected_contexts": ["setting_care_home"]}}
|
| 23 |
+
|