KeenWoo commited on
Commit
fd190b4
·
verified ·
1 Parent(s): 53c8496

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +730 -0
app.py ADDED
@@ -0,0 +1,730 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ import gradio as gr
5
+ import tempfile
6
+ from datetime import datetime
7
+ from typing import List, Dict, Any, Optional
8
+ from pytube import YouTube
9
+ from pathlib import Path
10
+ import re
11
+ from moviepy.editor import VideoFileClip
12
+
13
+ # --- Agent Imports & Safe Fallbacks ---
14
+ try:
15
+ from alz_companion.agent import (
16
+ bootstrap_vectorstore, make_rag_chain, answer_query, synthesize_tts,
17
+ transcribe_audio, detect_tags_from_query, describe_image, build_or_load_vectorstore,
18
+ _default_embeddings
19
+ )
20
+ from alz_companion.prompts import BEHAVIOUR_TAGS, EMOTION_STYLES
21
+ from langchain.schema import Document
22
+ from langchain_community.vectorstores import FAISS
23
+ AGENT_OK = True
24
+ except Exception as e:
25
+ AGENT_OK = False
26
+ # Define all fallback functions and classes
27
+ def bootstrap_vectorstore(sample_paths=None, index_path="data/"): return object()
28
+ def build_or_load_vectorstore(docs, index_path, is_personal=False): return object()
29
+ def make_rag_chain(vs_general, vs_personal, **kwargs): return lambda q, **k: {"answer": f"(Demo) You asked: {q}", "sources": []}
30
+ def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
31
+ def synthesize_tts(text: str, lang: str = "en"): return None
32
+ def transcribe_audio(filepath: str, lang: str = "en"): return "This is a transcribed message."
33
+ def detect_tags_from_query(query: str, behavior_options: list, emotion_options: list, topic_options: list, context_options: list, **kwargs): return {"detected_behaviors": [], "detected_emotion": "None", "detected_topic": "None", "detected_contexts": []}
34
+ def describe_image(image_path: str): return "This is a description of an image."
35
+ def _default_embeddings(): return None
36
+ class Document:
37
+ def __init__(self, page_content, metadata):
38
+ self.page_content = page_content
39
+ self.metadata = metadata
40
+ class FAISS:
41
+ def __init__(self):
42
+ self.docstore = type('obj', (object,), {'_dict': {}})()
43
+ BEHAVIOUR_TAGS = {"None": []}
44
+ EMOTION_STYLES = {"None": {}}
45
+ print(f"WARNING: Could not import from alz_companion ({e}). Running in UI-only demo mode.")
46
+
47
+ # --- Centralized Configuration ---
48
+ CONFIG = {
49
+ "themes": ["All", "The Father", "Still Alice", "Away from Her", "Alive Inside", "General Caregiving"],
50
+ "roles": ["patient", "caregiver"],
51
+ "behavior_tags": ["None"] + list(BEHAVIOUR_TAGS.keys()),
52
+ "emotion_tags": ["None"] + list(EMOTION_STYLES.keys()),
53
+ "topic_tags": [
54
+ "None", "caregiving_advice", "medical_fact", "personal_story", "research_update",
55
+ "treatment_option:home_safety", "treatment_option:long_term_care", "treatment_option:music_therapy",
56
+ "treatment_option:reassurance", "treatment_option:routine_structuring", "treatment_option:validation_therapy"
57
+ ],
58
+ "context_tags": [
59
+ "None", "disease_stage_mild", "disease_stage_moderate", "disease_stage_advanced",
60
+ "disease_stage_unspecified", "interaction_mode_one_to_one", "interaction_mode_small_group",
61
+ "interaction_mode_group_activity", "relationship_family", "relationship_spouse",
62
+ "relationship_staff_or_caregiver", "relationship_unspecified", "setting_home_or_community",
63
+ "setting_care_home", "setting_clinic_or_hospital"
64
+ ],
65
+ "languages": {"English": "en", "Chinese": "zh", "Malay": "ms", "French": "fr", "Spanish": "es"},
66
+ "tones": ["warm", "neutral", "formal", "playful"]
67
+ }
68
+
69
+ # --- File Management & Vector Store Logic ---
70
+ INDEX_BASE = os.getenv('INDEX_BASE', 'data')
71
+ UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
72
+ PERSONAL_INDEX_PATH = os.path.join(INDEX_BASE, "personal_faiss_index")
73
+ os.makedirs(UPLOADS_BASE, exist_ok=True)
74
+ os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
75
+ THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
76
+
77
+ # Global variables
78
+ vectorstores = {}
79
+ personal_vectorstore = None
80
+ test_fixtures = []
81
+ example_retriever = None
82
+
83
+ def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
84
+ def theme_upload_dir(theme: str) -> str:
85
+ p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
86
+ os.makedirs(p, exist_ok=True)
87
+ return p
88
+ def load_manifest(theme: str) -> Dict[str, Any]:
89
+ p = os.path.join(theme_upload_dir(theme), "manifest.json")
90
+ if os.path.exists(p):
91
+ try:
92
+ with open(p, "r", encoding="utf-8") as f: return json.load(f)
93
+ except Exception: pass
94
+ return {"files": {}}
95
+ def save_manifest(theme: str, man: Dict[str, Any]):
96
+ with open(os.path.join(theme_upload_dir(theme), "manifest.json"), "w", encoding="utf-8") as f: json.dump(man, f, indent=2)
97
+ def list_theme_files(theme: str) -> List[tuple[str, bool]]:
98
+ man = load_manifest(theme)
99
+ base = theme_upload_dir(theme)
100
+ found = [(n, bool(e)) for n, e in man.get("files", {}).items() if os.path.exists(os.path.join(base, n))]
101
+ existing = {n for n, e in found}
102
+ for name in sorted(os.listdir(base)):
103
+ if name not in existing and os.path.isfile(os.path.join(base, name)): found.append((name, False))
104
+ man["files"] = dict(found)
105
+ save_manifest(theme, man)
106
+ return found
107
+ def copy_into_theme(theme: str, src_path: str) -> str:
108
+ fname = os.path.basename(src_path)
109
+ dest = os.path.join(theme_upload_dir(theme), fname)
110
+ shutil.copy2(src_path, dest)
111
+ return dest
112
+ def seed_files_into_theme(theme: str):
113
+ SEED_FILES = [
114
+ ("sample_data/caregiving_tips.txt", True),
115
+ ("sample_data/the_father_segments_enriched_harmonized_plus.jsonl", True),
116
+ ("sample_data/still_alice_enriched_harmonized_plus.jsonl", True),
117
+ ("sample_data/away_from_her_enriched_harmonized_plus.jsonl", True),
118
+ ("sample_data/alive_inside_enriched_harmonized.jsonl", True)
119
+ ]
120
+ man, changed = load_manifest(theme), False
121
+ for path, enable in SEED_FILES:
122
+ if not os.path.exists(path): continue
123
+ fname = os.path.basename(path)
124
+ if not os.path.exists(os.path.join(theme_upload_dir(theme), fname)):
125
+ copy_into_theme(theme, path)
126
+ man["files"][fname] = bool(enable)
127
+ changed = True
128
+ if changed: save_manifest(theme, man)
129
+
130
+ def ensure_index(theme='All'):
131
+ theme = canonical_theme(theme)
132
+ if theme in vectorstores: return vectorstores[theme]
133
+ upload_dir = theme_upload_dir(theme)
134
+ enabled_files = [os.path.join(upload_dir, n) for n, enabled in list_theme_files(theme) if enabled]
135
+ index_path = THEME_PATHS.get(theme)
136
+ vectorstores[theme] = bootstrap_vectorstore(sample_paths=enabled_files, index_path=index_path)
137
+ return vectorstores[theme]
138
+
139
+ # --- Gradio Callbacks ---
140
+ def collect_settings(*args):
141
+ keys = ["role", "patient_name", "caregiver_name", "tone", "language", "tts_lang", "temperature", "behaviour_tag", "emotion_tag", "topic_tag", "active_theme", "tts_on", "debug_mode"]
142
+ return dict(zip(keys, args))
143
+
144
+ def parse_and_tag_entries(text_content: str, source: str) -> List[Document]:
145
+ separator_pattern = r'\n(?:---|--|-|-\*-|-\.-)\n'
146
+ entries = re.split(separator_pattern, text_content)
147
+ docs_to_add = []
148
+
149
+ for entry in entries:
150
+ if not entry.strip(): continue
151
+ title = "Untitled Text Entry"
152
+ content = entry.strip()
153
+
154
+ lines = entry.strip().split('\n')
155
+ if lines and "title:" in lines[0].lower():
156
+ title_line = lines[0].split(':', 1)
157
+ title = title_line[1].strip() if len(title_line) > 1 else "Untitled"
158
+ content_part = "\n".join(lines[1:])
159
+ if "content:" in content_part.lower():
160
+ content = content_part.split(':', 1)[1].strip()
161
+ else:
162
+ content = content_part.strip()
163
+
164
+ full_content = f"Title: {title}\n\nContent: {content}"
165
+ print(f" - Parsing entry: '{title}'")
166
+ behavior_options = CONFIG.get("behavior_tags", [])
167
+ emotion_options = CONFIG.get("emotion_tags", [])
168
+ topic_options = CONFIG.get("topic_tags", [])
169
+ context_options = CONFIG.get("context_tags", [])
170
+
171
+ detected_tags = detect_tags_from_query(
172
+ content,
173
+ behavior_options=behavior_options, emotion_options=emotion_options,
174
+ topic_options=topic_options, context_options=context_options,
175
+ example_retriever=example_retriever
176
+ )
177
+
178
+ metadata = {"source": source, "title": title}
179
+ detected_behaviors = detected_tags.get("detected_behaviors", [])
180
+ if detected_behaviors:
181
+ metadata["behaviors"] = [b.lower() for b in detected_behaviors]
182
+ if detected_tags.get("detected_emotion") != "None":
183
+ metadata["emotion"] = detected_tags.get("detected_emotion").lower()
184
+ detected_topics = detected_tags.get("detected_topic")
185
+ if detected_topics and detected_topics != "None":
186
+ metadata["topic_tags"] = [detected_topics.lower()]
187
+ detected_contexts = detected_tags.get("detected_contexts", [])
188
+ if detected_contexts:
189
+ metadata["context_tags"] = [c.lower() for c in detected_contexts]
190
+
191
+ docs_to_add.append(Document(page_content=full_content, metadata=metadata))
192
+ return docs_to_add
193
+
194
+ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url):
195
+ global personal_vectorstore
196
+ docs_to_add = []
197
+ if text_input and text_input.strip():
198
+ docs_to_add = parse_and_tag_entries(f"Title: {title}\n\nContent: {text_input}", "Text Input")
199
+ elif file_input:
200
+ content_source = os.path.basename(file_input)
201
+ path_for_transcription = file_input
202
+ temp_audio_file_path = None
203
+ if file_input.lower().endswith('.mov'):
204
+ print(f"Detected .mov file. Converting {file_input} to audio...")
205
+ try:
206
+ fd, temp_mp3_path = tempfile.mkstemp(suffix=".mp3")
207
+ os.close(fd)
208
+ with VideoFileClip(file_input) as video_clip:
209
+ audio_clip = video_clip.audio
210
+ audio_clip.write_audiofile(temp_mp3_path, logger=None)
211
+ path_for_transcription = temp_mp3_path
212
+ temp_audio_file_path = temp_mp3_path
213
+ print("Conversion successful.")
214
+ except Exception as e:
215
+ return f"Error converting .mov file: {e}"
216
+
217
+ if file_input.lower().endswith('.txt'):
218
+ with open(file_input, 'r', encoding='utf-8') as f:
219
+ file_content = f.read()
220
+ docs_to_add = parse_and_tag_entries(file_content, content_source)
221
+ else:
222
+ final_title = title.strip() if title and title.strip() else "Audio/Video Note"
223
+ content_text = transcribe_audio(path_for_transcription)
224
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
225
+ docs_to_add = parse_and_tag_entries(full_content, content_source)
226
+
227
+ if temp_audio_file_path:
228
+ os.remove(temp_audio_file_path)
229
+ elif image_input:
230
+ final_title = title.strip() if title and title.strip() else "Image Note"
231
+ content_text = describe_image(image_input)
232
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
233
+ docs_to_add = parse_and_tag_entries(full_content, "Image Input")
234
+ elif yt_url and ("youtube.com" in yt_url or "youtu.be" in yt_url):
235
+ try:
236
+ yt = YouTube(yt_url)
237
+ video_title = yt.title
238
+ final_title = title.strip() if title and title.strip() else video_title
239
+ audio_stream = yt.streams.get_audio_only()
240
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
241
+ audio_stream.download(filename=temp_audio_file.name)
242
+ temp_audio_path = temp_audio_file.name
243
+ content_text = transcribe_audio(temp_audio_path)
244
+ content_source = f"YouTube: {video_title}"
245
+ os.remove(temp_audio_path)
246
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
247
+ docs_to_add = parse_and_tag_entries(full_content, content_source)
248
+ except Exception as e:
249
+ return f"Error processing YouTube link: {e}"
250
+ else:
251
+ return "Please provide a title and content, or another input source."
252
+
253
+ if not docs_to_add:
254
+ return "No processable content found to add."
255
+
256
+ if personal_vectorstore is None:
257
+ personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
258
+ else:
259
+ personal_vectorstore.add_documents(docs_to_add)
260
+
261
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
262
+ return f"Successfully added {len(docs_to_add)} new memory/memories."
263
+
264
+ def save_chat_to_memory(chat_history):
265
+ global personal_vectorstore
266
+ if not chat_history: return "Nothing to save."
267
+ formatted_chat = []
268
+ for message in chat_history:
269
+ role = "User" if message["role"] == "user" else "Assistant"
270
+ content = message["content"].strip()
271
+ if content.startswith("*(Auto-detected context:") or content.startswith("*(Sources used:"): continue
272
+ formatted_chat.append(f"{role}: {content}")
273
+ conversation_text = "\n".join(formatted_chat)
274
+ if not conversation_text: return "No conversation content to save."
275
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
276
+ title = f"Conversation from {timestamp}"
277
+ full_content = f"Title: {title}\n\nContent:\n{conversation_text}"
278
+ doc_to_add = Document(page_content=full_content, metadata={"source": "Saved Chat", "title": title})
279
+ if personal_vectorstore is None:
280
+ personal_vectorstore = build_or_load_vectorstore([doc_to_add], PERSONAL_INDEX_PATH, is_personal=True)
281
+ else:
282
+ personal_vectorstore.add_documents([doc_to_add])
283
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
284
+ print(f"Saved conversation to long-term memory.")
285
+ return f"Conversation from {timestamp} saved successfully!"
286
+
287
+ def list_personal_memories():
288
+ global personal_vectorstore
289
+ if personal_vectorstore is None or not hasattr(personal_vectorstore.docstore, '_dict') or not personal_vectorstore.docstore._dict:
290
+ return gr.update(value=[["No memories to display", "", ""]]), gr.update(choices=["No memories to select"], value=None)
291
+ docs = list(personal_vectorstore.docstore._dict.values())
292
+ dataframe_data = [[doc.metadata.get('title', 'Untitled'), doc.metadata.get('source', 'Unknown'), doc.page_content] for doc in docs]
293
+ dropdown_choices = [doc.page_content for doc in docs]
294
+ return gr.update(value=dataframe_data), gr.update(choices=dropdown_choices)
295
+
296
+ def delete_personal_memory(memory_to_delete):
297
+ global personal_vectorstore
298
+ if personal_vectorstore is None or not memory_to_delete:
299
+ return "Knowledge base is empty or no memory selected."
300
+ all_docs = list(personal_vectorstore.docstore._dict.values())
301
+ docs_to_keep = [doc for doc in all_docs if doc.page_content != memory_to_delete]
302
+ if len(all_docs) == len(docs_to_keep):
303
+ return "Error: Could not find the selected memory to delete."
304
+ print(f"Deleting memory. {len(docs_to_keep)} memories remaining.")
305
+ if not docs_to_keep:
306
+ if os.path.isdir(PERSONAL_INDEX_PATH):
307
+ shutil.rmtree(PERSONAL_INDEX_PATH)
308
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
309
+ else:
310
+ new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
311
+ new_vs.save_local(PERSONAL_INDEX_PATH)
312
+ personal_vectorstore = new_vs
313
+ return "Successfully deleted memory. The list will now refresh."
314
+
315
+ def chat_fn(user_text, audio_file, settings, chat_history):
316
+ global personal_vectorstore, example_retriever
317
+ question = (user_text or "").strip()
318
+ if audio_file and not question:
319
+ try:
320
+ voice_lang_name = settings.get("tts_lang", "English")
321
+ voice_lang_code = CONFIG["languages"].get(voice_lang_name, "en")
322
+ question = transcribe_audio(audio_file, lang=voice_lang_code)
323
+ except Exception as e:
324
+ err_msg = f"Audio Error: {e}" if settings.get("debug_mode") else "Sorry, I couldn't understand the audio."
325
+ chat_history.append({"role": "assistant", "content": err_msg})
326
+ return "", None, chat_history
327
+ if not question:
328
+ return "", None, chat_history
329
+
330
+ chat_history.append({"role": "user", "content": question})
331
+
332
+ manual_behavior_tag = settings.get("behaviour_tag", "None")
333
+ manual_emotion_tag = settings.get("emotion_tag", "None")
334
+ manual_topic_tag = settings.get("topic_tag", "None")
335
+
336
+ scenario_tag = None
337
+ emotion_tag = None
338
+ topic_tag = None
339
+
340
+ if manual_behavior_tag != "None" or manual_emotion_tag != "None" or manual_topic_tag != "None":
341
+ print("Manual tags detected, skipping auto-detection.")
342
+ scenario_tag = [manual_behavior_tag] if manual_behavior_tag != "None" else []
343
+ emotion_tag = manual_emotion_tag
344
+ topic_tag = manual_topic_tag
345
+ else:
346
+ print("No manual tags set, running auto-detection...")
347
+ behavior_options = CONFIG.get("behavior_tags", [])
348
+ emotion_options = CONFIG.get("emotion_tags", [])
349
+ topic_options = CONFIG.get("topic_tags", [])
350
+ context_options = CONFIG.get("context_tags", [])
351
+
352
+ detected_tags = detect_tags_from_query(
353
+ question,
354
+ behavior_options=behavior_options, emotion_options=emotion_options,
355
+ topic_options=topic_options, context_options=context_options,
356
+ example_retriever=example_retriever
357
+ )
358
+
359
+ scenario_tag = detected_tags.get("detected_behaviors", [])
360
+ emotion_tag = detected_tags.get("detected_emotion", "None")
361
+ topic_tag = detected_tags.get("detected_topic", "None")
362
+
363
+ detected_parts = []
364
+ if scenario_tag: detected_parts.append(f"Behaviors=`{', '.join(scenario_tag)}`")
365
+ if emotion_tag and emotion_tag != "None": detected_parts.append(f"Emotion=`{emotion_tag}`")
366
+ if topic_tag and topic_tag != "None": detected_parts.append(f"Topic=`{topic_tag}`")
367
+ if detected_parts and settings.get("debug_mode"):
368
+ detected_msg = f"*(Auto-detected context: {', '.join(detected_parts)})*"
369
+ chat_history.append({"role": "assistant", "content": detected_msg})
370
+
371
+ active_theme = settings.get("active_theme", "All")
372
+ vs_general = ensure_index(active_theme)
373
+ if personal_vectorstore is None:
374
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
375
+
376
+ rag_chain_settings = {
377
+ "role": settings.get("role"), "temperature": settings.get("temperature"), "language": settings.get("language"),
378
+ "patient_name": settings.get("patient_name"), "caregiver_name": settings.get("caregiver_name"), "tone": settings.get("tone"),
379
+ }
380
+
381
+ chain = make_rag_chain(vs_general, personal_vectorstore, **rag_chain_settings)
382
+
383
+ final_scenario_tag = scenario_tag if scenario_tag else None
384
+ final_emotion_tag = emotion_tag if emotion_tag != "None" else None
385
+ final_topic_tag = topic_tag if topic_tag != "None" else None
386
+
387
+ # Determine history length to exclude debug messages
388
+ history_offset = 0
389
+ if chat_history:
390
+ for msg in reversed(chat_history):
391
+ if msg["role"] == "user": break
392
+ history_offset += 1
393
+ simple_history = chat_history[:-history_offset] if history_offset > 0 else []
394
+
395
+ response = answer_query(chain, question, chat_history=simple_history, scenario_tag=final_scenario_tag, emotion_tag=final_emotion_tag, topic_tag=final_topic_tag)
396
+
397
+ answer = response.get("answer", "[No answer found]")
398
+ chat_history.append({"role": "assistant", "content": answer})
399
+
400
+ if settings.get("debug_mode") and response.get("sources"):
401
+ sources = response.get("sources", [])
402
+ valid_sources = [s for s in sources if s and s not in ["unknown", "placeholder", "General Knowledge"]]
403
+ if valid_sources:
404
+ source_msg = f"*(Sources used: {', '.join(valid_sources)})*"
405
+ chat_history.append({"role": "assistant", "content": source_msg})
406
+
407
+ audio_out = None
408
+ if settings.get("tts_on") and answer:
409
+ tts_lang_code = CONFIG["languages"].get(settings.get("tts_lang"), "en")
410
+ audio_out = synthesize_tts(answer, lang=tts_lang_code)
411
+
412
+ from gradio import update
413
+ return "", (update(value=audio_out, visible=bool(audio_out))), chat_history
414
+
415
+ def upload_knowledge(files, current_theme):
416
+ if not files: return "No files were selected to upload."
417
+ added = 0
418
+ for f in files:
419
+ try:
420
+ copy_into_theme(current_theme, f.name); added += 1
421
+ except Exception as e: print(f"Error uploading file {f.name}: {e}")
422
+ if added > 0 and current_theme in vectorstores: del vectorstores[current_theme]
423
+ return f"Uploaded {added} file(s). Refreshing file list..."
424
+ def save_file_selection(current_theme, enabled_files):
425
+ man = load_manifest(current_theme)
426
+ for fname in man['files']: man['files'][fname] = fname in enabled_files
427
+ save_manifest(current_theme, man)
428
+ if current_theme in vectorstores: del vectorstores[current_theme]
429
+ return f"Settings saved. Index for theme '{current_theme}' will rebuild on the next query."
430
+ def refresh_file_list_ui(current_theme):
431
+ files = list_theme_files(current_theme)
432
+ enabled = [f for f, en in files if en]
433
+ msg = f"Found {len(files)} file(s). {len(enabled)} enabled."
434
+ return gr.update(choices=[f for f, _ in files], value=enabled), msg
435
+ def auto_setup_on_load(current_theme):
436
+ theme_dir = theme_upload_dir(current_theme)
437
+ if not os.listdir(theme_dir):
438
+ print("First-time setup: Auto-seeding sample data...")
439
+ seed_files_into_theme(current_theme)
440
+ all_settings = collect_settings("caregiver", "", "", "warm", "English", "English", 0.7, "None", "None", "None", "All", True, False)
441
+ files_ui, status_msg = refresh_file_list_ui(current_theme)
442
+ return all_settings, files_ui, status_msg
443
+
444
+ def setup_example_retriever():
445
+ global example_retriever
446
+ print("Setting up example retriever...")
447
+ fixtures_path = "conversation_test_fixtures.jsonl"
448
+ if not os.path.exists(fixtures_path):
449
+ print(f"WARNING: Test fixtures file not found at {fixtures_path}")
450
+ return
451
+ example_docs = []
452
+ with open(fixtures_path, "r", encoding="utf-8") as f:
453
+ for line in f:
454
+ data = json.loads(line)
455
+ doc = Document(page_content=data["turns"][0]["text"], metadata={"full_fixture": data})
456
+ example_docs.append(doc)
457
+ if example_docs:
458
+ example_vs = FAISS.from_documents(example_docs, _default_embeddings())
459
+ example_retriever = example_vs.as_retriever(search_kwargs={"k": 3})
460
+ print(f"Example retriever created with {len(example_docs)} examples.")
461
+
462
+ def load_test_fixtures():
463
+ global test_fixtures
464
+ test_fixtures = []
465
+ try:
466
+ script_dir = os.path.dirname(os.path.abspath(__file__))
467
+ fixtures_path = os.path.join(script_dir, "conversation_test_fixtures.jsonl")
468
+ if not os.path.exists(fixtures_path):
469
+ print("WARNING: Test fixtures file not found.")
470
+ return gr.update(choices=[])
471
+ with open(fixtures_path, "r", encoding="utf-8") as f:
472
+ for line in f:
473
+ test_fixtures.append(json.loads(line))
474
+ test_titles = [fixture["title"] for fixture in test_fixtures]
475
+ return gr.update(choices=test_titles)
476
+ except Exception as e:
477
+ print(f"UNEXPECTED ERROR during file loading: {e}")
478
+ return gr.update(choices=[])
479
+
480
+ def run_nlu_test(test_title: str):
481
+ print("\n--- RUNNING NLU TEST (Flexible Grading) ---")
482
+ if not test_title or not test_fixtures:
483
+ return "Please select a test case.", None
484
+ selected_fixture = next((f for f in test_fixtures if f["title"] == test_title), None)
485
+ if not selected_fixture:
486
+ return f"Error: Could not find test case titled '{test_title}'.", None
487
+ user_query = selected_fixture["turns"][0]["text"]
488
+ expected_results = selected_fixture["expected"]
489
+ print(f"Test Case: '{test_title}'")
490
+ print(f"User Query: '{user_query}'")
491
+ behavior_options = CONFIG.get("behavior_tags", [])
492
+ emotion_options = CONFIG.get("emotion_tags", [])
493
+ topic_options = CONFIG.get("topic_tags", [])
494
+ context_options = CONFIG.get("context_tags", [])
495
+ actual_results_raw = detect_tags_from_query(
496
+ user_query,
497
+ behavior_options=behavior_options, emotion_options=emotion_options,
498
+ topic_options=topic_options, context_options=context_options,
499
+ example_retriever=example_retriever
500
+ )
501
+ print(f"\nRAW NLU RESULTS from detect_tags_from_query:\n{actual_results_raw}\n")
502
+ actual_results = {
503
+ "emotion": [actual_results_raw.get("detected_emotion")],
504
+ "behaviors": actual_results_raw.get("detected_behaviors", []),
505
+ "topic_tags": [actual_results_raw.get("detected_topic")],
506
+ "context_tags": actual_results_raw.get("detected_contexts", [])
507
+ }
508
+ pass_count, total_count = 0, 0
509
+ comparison_data = []
510
+ all_keys = set(expected_results.keys()) | set(actual_results.keys())
511
+ print("--- COMPARING RESULTS ---")
512
+ for key in sorted(list(all_keys)):
513
+ expected_set = set(expected_results.get(key, []))
514
+ actual_set = set(a for a in actual_results.get(key, []) if a and a != "None")
515
+ if not expected_set: continue
516
+ total_count += 1
517
+ is_pass = len(expected_set.intersection(actual_set)) > 0
518
+ print(f"Category: '{key}'", f" - Expected Set: {expected_set}", f" - Actual Set : {actual_set}", f" - Logic : len(intersection) > 0", f" - Result : {is_pass}", sep="\n")
519
+ if is_pass: pass_count += 1
520
+ comparison_data.append([
521
+ test_title, key, ", ".join(sorted(list(expected_set))),
522
+ ", ".join(sorted(list(actual_set))) if actual_set else "None",
523
+ "✅ Pass" if is_pass else "❌ Fail"
524
+ ])
525
+ status = f"## Test Result: {pass_count} / {total_count} Categories Passed"
526
+ print(f"Final Status: {pass_count}/{total_count} passed.")
527
+ print("--- TEST COMPLETE ---\n")
528
+ return status, comparison_data
529
+
530
+ def run_all_nlu_tests():
531
+ if not test_fixtures:
532
+ return "Test fixtures are not loaded. Please check your JSONL file.", None
533
+ print("\n--- RUNNING ALL NLU TESTS ---")
534
+ all_results_data = []
535
+ total_pass_count, total_categories_count = 0, 0
536
+ for fixture in test_fixtures:
537
+ user_query = fixture["turns"][0]["text"]
538
+ expected_results = fixture["expected"]
539
+ test_title = fixture["title"]
540
+ print(f"\n-- Testing Case: {test_title} --")
541
+ behavior_options = CONFIG.get("behavior_tags", [])
542
+ emotion_options = CONFIG.get("emotion_tags", [])
543
+ topic_options = CONFIG.get("topic_tags", [])
544
+ context_options = CONFIG.get("context_tags", [])
545
+ actual_results_raw = detect_tags_from_query(
546
+ user_query,
547
+ behavior_options=behavior_options, emotion_options=emotion_options,
548
+ topic_options=topic_options, context_options=context_options,
549
+ example_retriever=example_retriever
550
+ )
551
+ actual_results = {
552
+ "emotion": [actual_results_raw.get("detected_emotion")],
553
+ "behaviors": actual_results_raw.get("detected_behaviors", []),
554
+ "topic_tags": [actual_results_raw.get("detected_topic")],
555
+ "context_tags": actual_results_raw.get("detected_contexts", [])
556
+ }
557
+ all_keys = set(expected_results.keys())
558
+ for key in sorted(list(all_keys)):
559
+ expected_set = set(expected_results.get(key, []))
560
+ actual_set = set(a for a in actual_results.get(key, []) if a and a != "None")
561
+ if not expected_set: continue
562
+ total_categories_count += 1
563
+ is_pass = len(expected_set.intersection(actual_set)) > 0
564
+ if is_pass: total_pass_count += 1
565
+ all_results_data.append([
566
+ test_title, key, ", ".join(sorted(list(expected_set))),
567
+ ", ".join(sorted(list(actual_set))) if actual_set else "None",
568
+ "✅ Pass" if is_pass else "❌ Fail"
569
+ ])
570
+ pass_rate = (total_pass_count / total_categories_count * 100) if total_categories_count > 0 else 0
571
+ summary_status = f"## Batch Test Complete\n- **Overall Score:** {total_pass_count} / {total_categories_count} Categories Passed\n- **Pass Rate:** {pass_rate:.2f}%"
572
+ print(summary_status)
573
+ print("--- BATCH TEST COMPLETE ---")
574
+ return summary_status, all_results_data
575
+
576
+ def run_all_startup_tasks(current_theme):
577
+ """A single wrapper function to handle all application startup tasks."""
578
+ print("--- Running all startup tasks ---")
579
+ settings, files_ui, status_msg = auto_setup_on_load(current_theme)
580
+ setup_example_retriever()
581
+ test_case_choices = load_test_fixtures()
582
+ return settings, files_ui, status_msg, test_case_choices
583
+
584
+
585
+ # --- UI Definition ---
586
+ CSS = ".gradio-container { font-size: 14px; } #chatbot { min-height: 250px; } #audio_out audio { max-height: 40px; } #audio_in audio { max-height: 40px; padding: 0; }"
587
+ with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
588
+ settings_state = gr.State({})
589
+ with gr.Tab("Chat"):
590
+ user_text = gr.Textbox(show_label=False, placeholder="Type your message here...")
591
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Voice Input", elem_id="audio_in")
592
+ with gr.Row():
593
+ submit_btn = gr.Button("Send", variant="primary")
594
+ save_btn = gr.Button("Save to Memory")
595
+ clear_btn = gr.Button("Clear")
596
+ chat_status = gr.Markdown()
597
+ audio_out = gr.Audio(label="Response Audio", autoplay=True, visible=True, elem_id="audio_out")
598
+ chatbot = gr.Chatbot(elem_id="chatbot", label="Conversation", type="messages")
599
+ with gr.Tab("Personalize"):
600
+ with gr.Accordion("Add to Personal Knowledge Base", open=True):
601
+ gr.Markdown("Add personal notes, memories, or descriptions. A descriptive title helps the AI find memories more accurately.")
602
+ personal_title = gr.Textbox(label="Title / Entry Name", placeholder="e.g., 'Dad's favorite songs'")
603
+ personal_text = gr.Textbox(lines=5, label="Text Content (or use file upload)", placeholder="Type or paste text here. Use '—' on a new line to separate multiple entries.")
604
+ personal_file = gr.File(label="Upload Audio/Video/Text File")
605
+ personal_image = gr.Image(type="filepath", label="Upload Image")
606
+ personal_yt_url = gr.Textbox(label="Or, provide a YouTube URL", placeholder="Paste a YouTube link here...")
607
+ with gr.Row():
608
+ personal_add_btn = gr.Button("Add Knowledge to Memory", variant="primary")
609
+ personal_status = gr.Markdown()
610
+ with gr.Accordion("Manage Personal Knowledge", open=False):
611
+ personal_memory_display = gr.DataFrame(headers=["Title", "Source", "Content"], label="Saved Personal Memories", interactive=False, row_count=(5, "dynamic"))
612
+ with gr.Row():
613
+ personal_refresh_btn = gr.Button("Refresh Memories")
614
+ with gr.Row():
615
+ personal_delete_selector = gr.Dropdown(label="Select a memory to delete (by its full content)", scale=3, interactive=True)
616
+ personal_delete_btn = gr.Button("Delete Selected Memory", variant="stop", scale=1)
617
+ personal_delete_status = gr.Markdown()
618
+ with gr.Tab("Testing"):
619
+ gr.Markdown("## NLU Context Detection Tests")
620
+ gr.Markdown("Select a single test case to run, or run all tests at once for a full report.")
621
+ with gr.Row():
622
+ test_case_dropdown = gr.Dropdown(label="Select Single Test Case", scale=2)
623
+ run_single_test_btn = gr.Button("Run Single Test", variant="secondary", scale=1)
624
+ run_all_tests_btn = gr.Button("Run All Tests", variant="primary", scale=1)
625
+ test_status_md = gr.Markdown("### Please select and run a test.")
626
+ test_results_df = gr.DataFrame(
627
+ label="Test Results Comparison",
628
+ headers=["Test Case", "Category", "Expected", "Actual", "Result"],
629
+ interactive=False, row_count=(20, "dynamic")
630
+ )
631
+ with gr.Tab("Settings"):
632
+ with gr.Group():
633
+ gr.Markdown("## Conversation & Persona Settings")
634
+ with gr.Row():
635
+ role = gr.Radio(CONFIG["roles"], value="caregiver", label="Your Role")
636
+ temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.1, label="Creativity")
637
+ tone = gr.Dropdown(CONFIG["tones"], value="warm", label="Response Tone")
638
+ with gr.Row():
639
+ patient_name = gr.Textbox(label="Patient's Name", placeholder="e.g., 'Dad' or 'John'")
640
+ caregiver_name = gr.Textbox(label="Caregiver's Name", placeholder="e.g., 'me' or 'Jane'")
641
+ behaviour_tag = gr.Dropdown(CONFIG["behavior_tags"], value="None", label="Behaviour Filter (Manual Override)")
642
+ emotion_tag = gr.Dropdown(CONFIG["emotion_tags"], value="None", label="Emotion Filter (Manual Override)")
643
+ topic_tag = gr.Dropdown(CONFIG["topic_tags"], value="None", label="Topic Tag Filter (Manual Override)")
644
+ with gr.Accordion("Language, Voice & Debugging", open=False):
645
+ language = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Response Language")
646
+ tts_lang = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Voice Language")
647
+ tts_on = gr.Checkbox(True, label="Enable Voice Response (TTS)")
648
+ debug_mode = gr.Checkbox(False, label="Show Debug Info")
649
+ gr.Markdown("--- \n ## General Knowledge Base Management")
650
+ active_theme = gr.Radio(CONFIG["themes"], value="All", label="Active Knowledge Theme")
651
+ with gr.Row():
652
+ with gr.Column(scale=1):
653
+ files_in = gr.File(file_count="multiple", file_types=[".jsonl", ".txt"], label="Upload Knowledge Files")
654
+ upload_btn = gr.Button("Upload to Theme", variant="secondary")
655
+ seed_btn = gr.Button("Import Sample Data", variant="secondary")
656
+ with gr.Column(scale=2):
657
+ mgmt_status = gr.Markdown()
658
+ files_box = gr.CheckboxGroup(choices=[], label="Enable Files for the Selected Theme")
659
+ with gr.Row():
660
+ save_files_btn = gr.Button("Save Selection", variant="primary")
661
+ refresh_btn = gr.Button("Refresh List")
662
+
663
+ # --- Event Wiring ---
664
+ all_settings_components = [role, patient_name, caregiver_name, tone, language, tts_lang, temperature, behaviour_tag, emotion_tag, topic_tag, active_theme, tts_on, debug_mode]
665
+ for component in all_settings_components:
666
+ component.change(fn=collect_settings, inputs=all_settings_components, outputs=settings_state)
667
+
668
+ submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
669
+ save_btn.click(fn=save_chat_to_memory, inputs=[chatbot], outputs=[chat_status])
670
+ clear_btn.click(lambda: (None, None, [], None, "", ""), outputs=[user_text, audio_out, chatbot, audio_in, user_text, chat_status])
671
+
672
+ personal_add_btn.click(
673
+ fn=handle_add_knowledge,
674
+ inputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url],
675
+ outputs=[personal_status]
676
+ ).then(
677
+ lambda: (None, None, None, None, None),
678
+ outputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url]
679
+ )
680
+
681
+ personal_refresh_btn.click(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
682
+ personal_delete_btn.click(fn=delete_personal_memory, inputs=[personal_delete_selector], outputs=[personal_delete_status]).then(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
683
+
684
+ upload_btn.click(upload_knowledge, inputs=[files_in, active_theme], outputs=[mgmt_status]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
685
+ save_files_btn.click(save_file_selection, inputs=[active_theme, files_box], outputs=[mgmt_status])
686
+ seed_btn.click(seed_files_into_theme, inputs=[active_theme]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
687
+ refresh_btn.click(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
688
+ active_theme.change(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
689
+
690
+ # Wiring for the Testing Tab
691
+ run_single_test_btn.click(
692
+ fn=run_nlu_test,
693
+ inputs=[test_case_dropdown],
694
+ outputs=[test_status_md, test_results_df]
695
+ )
696
+ run_all_tests_btn.click(
697
+ fn=run_all_nlu_tests,
698
+ inputs=None,
699
+ outputs=[test_status_md, test_results_df]
700
+ )
701
+
702
+ # Single, unified startup event
703
+ demo.load(
704
+ fn=run_all_startup_tasks,
705
+ inputs=[active_theme],
706
+ outputs=[settings_state, files_box, mgmt_status, test_case_dropdown]
707
+ )
708
+
709
+ # --- Startup Logic ---
710
+ def pre_load_indexes():
711
+ global personal_vectorstore
712
+ print("Pre-loading all knowledge base indexes at startup...")
713
+ for theme in CONFIG["themes"]:
714
+ print(f" - Loading general index for theme: '{theme}'")
715
+ try:
716
+ ensure_index(theme)
717
+ print(f" ...'{theme}' theme loaded successfully.")
718
+ except Exception as e:
719
+ print(f" ...Error loading theme '{theme}': {e}")
720
+ print(" - Loading personal knowledge index...")
721
+ try:
722
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
723
+ print(" ...Personal knowledge loaded successfully.")
724
+ except Exception as e:
725
+ print(f" ...Error loading personal knowledge: {e}")
726
+ print("All indexes loaded. Application is ready.")
727
+
728
+ if __name__ == "__main__":
729
+ pre_load_indexes()
730
+ demo.queue().launch(debug=True)