KeenWoo commited on
Commit
c66abb4
Β·
verified Β·
1 Parent(s): 8960ee9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +786 -0
app.py ADDED
@@ -0,0 +1,786 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ import gradio as gr
5
+ import tempfile
6
+ from datetime import datetime
7
+ from typing import List, Dict, Any, Optional
8
+ from pytube import YouTube
9
+ from pathlib import Path # <-- Add this import at the top of your file with the other imports
10
+ import re
11
+
12
+
13
+ # --- Agent Imports & Safe Fallbacks ---
14
+ try:
15
+ from alz_companion.agent import (
16
+ bootstrap_vectorstore, make_rag_chain, answer_query, synthesize_tts,
17
+ transcribe_audio, detect_tags_from_query, describe_image, build_or_load_vectorstore,
18
+ _default_embeddings
19
+ )
20
+ from alz_companion.prompts import BEHAVIOUR_TAGS, EMOTION_STYLES
21
+ from langchain.schema import Document
22
+ from langchain_community.vectorstores import FAISS
23
+ AGENT_OK = True
24
+ except Exception as e:
25
+ AGENT_OK = False
26
+ # Define all fallback functions and classes
27
+ def bootstrap_vectorstore(sample_paths=None, index_path="data/"): return object()
28
+ def build_or_load_vectorstore(docs, index_path, is_personal=False): return object()
29
+ def make_rag_chain(vs_general, vs_personal, **kwargs): return lambda q, **k: {"answer": f"(Demo) You asked: {q}", "sources": []}
30
+ def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
31
+ def synthesize_tts(text: str, lang: str = "en"): return None
32
+ def transcribe_audio(filepath: str, lang: str = "en"): return "This is a transcribed message."
33
+ def detect_tags_from_query(query: str, behavior_options: list, emotion_options: list): return {"detected_behavior": "None", "detected_emotion": "None"}
34
+ def describe_image(image_path: str): return "This is a description of an image."
35
+ def _default_embeddings(): return None
36
+ class Document:
37
+ def __init__(self, page_content, metadata):
38
+ self.page_content = page_content
39
+ self.metadata = metadata
40
+ class FAISS:
41
+ def __init__(self):
42
+ self.docstore = type('obj', (object,), {'_dict': {}})()
43
+ BEHAVIOUR_TAGS = {"None": []}
44
+ EMOTION_STYLES = {"None": {}}
45
+ print(f"WARNING: Could not import from alz_companion ({e}). Running in UI-only demo mode.")
46
+
47
+ # --- Centralized Configuration ---
48
+ CONFIG = {
49
+ "themes": ["All", "The Father", "Still Alice", "Away from Her", "Alive Inside", "General Caregiving"],
50
+ "roles": ["patient", "caregiver"],
51
+ "behavior_tags": ["None"] + list(BEHAVIOUR_TAGS.keys()),
52
+ "emotion_tags": ["None"] + list(EMOTION_STYLES.keys()),
53
+ # --- THIS LIST HAS BEEN UPDATED AND EXPANDED ---
54
+ "topic_tags": [
55
+ "None",
56
+ "caregiving_advice",
57
+ "medical_fact",
58
+ "personal_story",
59
+ "research_update",
60
+ "treatment_option:home_safety",
61
+ "treatment_option:long_term_care",
62
+ "treatment_option:music_therapy",
63
+ "treatment_option:reassurance",
64
+ "treatment_option:routine_structuring",
65
+ "treatment_option:validation_therapy"
66
+ ],
67
+ # --- END OF Topic_tag UPDATE ---
68
+ # --- ADD THIS NEW LIST to handle context_tag ---
69
+ "context_tags": [
70
+ "None", "disease_stage_mild",
71
+ "disease_stage_moderate", "disease_stage_advanced",
72
+ "disease_stage_unspecified", "interaction_mode_one_to_one",
73
+ "interaction_mode_small_group", "interaction_mode_group_activity",
74
+ "relationship_family", "relationship_spouse",
75
+ "relationship_staff_or_caregiver", "relationship_unspecified",
76
+ "setting_home_or_community", "setting_care_home",
77
+ "setting_clinic_or_hospital"
78
+ ],
79
+ # --- END OF Context_tag UPDATE ---
80
+ "languages": {"English": "en", "Chinese": "zh", "Malay": "ms", "French": "fr", "Spanish": "es"},
81
+ "tones": ["warm", "neutral", "formal", "playful"]
82
+ }
83
+
84
+
85
+ # --- File Management & Vector Store Logic ---
86
+ INDEX_BASE = os.getenv('INDEX_BASE', 'data')
87
+ UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
88
+
89
+ # OPTION A: --- CHANGE THIS LINE ---
90
+ PERSONAL_INDEX_PATH = os.path.join(str(Path.home()), "AlzCompanionData", "personal_faiss_index")
91
+ # for another space: PERSONAL_INDEX_PATH = os.path.join(str(Path.home()), "AlzPersonalData", "personal_faiss_index")
92
+ # changing it from the absolute path (designed for a single local computer) to relative path (perfect for Hugging Face Spaces):
93
+ # Does NOT work -> PERSONAL_INDEX_PATH = os.path.join(INDEX_BASE, "personal_faiss_index")
94
+ # OPTION A: --- END CHANGE ---
95
+ # old code PERSONAL_INDEX_PATH = os.path.join(INDEX_BASE, "personal_faiss_index")
96
+ os.makedirs(UPLOADS_BASE, exist_ok=True)
97
+ # OPTION A: Also create the parent directory for the personal index
98
+ os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
99
+
100
+ # OPTION B: --- Example for macOS or Linux ---
101
+ # OPTION B: PERSONAL_INDEX_PATH = "/Users/YourUsername/AlzCompanionData/personal_faiss_index"
102
+ # OPTION B: Make sure to create the directory
103
+ # OPTION B: os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
104
+
105
+ THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
106
+ vectorstores = {}
107
+ personal_vectorstore = None
108
+ test_fixtures = [] # <-- ADD THIS LINE
109
+
110
+ def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
111
+ def theme_upload_dir(theme: str) -> str:
112
+ p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
113
+ os.makedirs(p, exist_ok=True)
114
+ return p
115
+ def load_manifest(theme: str) -> Dict[str, Any]:
116
+ p = os.path.join(theme_upload_dir(theme), "manifest.json")
117
+ if os.path.exists(p):
118
+ try:
119
+ with open(p, "r", encoding="utf-8") as f: return json.load(f)
120
+ except Exception: pass
121
+ return {"files": {}}
122
+ def save_manifest(theme: str, man: Dict[str, Any]):
123
+ with open(os.path.join(theme_upload_dir(theme), "manifest.json"), "w", encoding="utf-8") as f: json.dump(man, f, indent=2)
124
+ def list_theme_files(theme: str) -> List[tuple[str, bool]]:
125
+ man = load_manifest(theme)
126
+ base = theme_upload_dir(theme)
127
+ found = [(n, bool(e)) for n, e in man.get("files", {}).items() if os.path.exists(os.path.join(base, n))]
128
+ existing = {n for n, e in found}
129
+ for name in sorted(os.listdir(base)):
130
+ if name not in existing and os.path.isfile(os.path.join(base, name)): found.append((name, False))
131
+ man["files"] = dict(found)
132
+ save_manifest(theme, man)
133
+ return found
134
+ def copy_into_theme(theme: str, src_path: str) -> str:
135
+ fname = os.path.basename(src_path)
136
+ dest = os.path.join(theme_upload_dir(theme), fname)
137
+ shutil.copy2(src_path, dest)
138
+ return dest
139
+ def seed_files_into_theme(theme: str):
140
+ SEED_FILES = [
141
+ ("sample_data/caregiving_tips.txt", True),
142
+ ("sample_data/the_father_segments_enriched_harmonized_plus.jsonl", True),
143
+ ("sample_data/still_alice_enriched_harmonized_plus.jsonl", True),
144
+ ("sample_data/away_from_her_enriched_harmonized_plus.jsonl", True),
145
+ ("sample_data/alive_inside_enriched_harmonized.jsonl", True)
146
+ ]
147
+ man, changed = load_manifest(theme), False
148
+ for path, enable in SEED_FILES:
149
+ if not os.path.exists(path): continue
150
+ fname = os.path.basename(path)
151
+ if not os.path.exists(os.path.join(theme_upload_dir(theme), fname)):
152
+ copy_into_theme(theme, path)
153
+ man["files"][fname] = bool(enable)
154
+ changed = True
155
+ if changed: save_manifest(theme, man)
156
+
157
+ def ensure_index(theme='All'):
158
+ theme = canonical_theme(theme)
159
+ if theme in vectorstores: return vectorstores[theme]
160
+ upload_dir = theme_upload_dir(theme)
161
+ enabled_files = [os.path.join(upload_dir, n) for n, enabled in list_theme_files(theme) if enabled]
162
+ index_path = THEME_PATHS.get(theme)
163
+ vectorstores[theme] = bootstrap_vectorstore(sample_paths=enabled_files, index_path=index_path)
164
+ return vectorstores[theme]
165
+
166
+ # --- Gradio Callbacks ---
167
+ def collect_settings(*args):
168
+ keys = ["role", "patient_name", "caregiver_name", "tone", "language", "tts_lang", "temperature", "behaviour_tag", "emotion_tag", "topic_tag", "active_theme", "tts_on", "debug_mode"]
169
+ return dict(zip(keys, args))
170
+
171
+ # In app.py, replace the existing parse_and_tag_entries function with this one.
172
+
173
+ def parse_and_tag_entries(text_content: str, source: str) -> List[Document]:
174
+ separator_pattern = r'\n(?:---|--|-|-\*-|-\.-)\n'
175
+ entries = re.split(separator_pattern, text_content)
176
+
177
+ docs_to_add = []
178
+
179
+ for entry in entries:
180
+ if not entry.strip():
181
+ continue
182
+
183
+ title = "Untitled Text Entry"
184
+ content = entry.strip()
185
+
186
+ lines = entry.strip().split('\n')
187
+ if lines and "title:" in lines[0].lower():
188
+ title_line = lines[0].split(':', 1)
189
+ title = title_line[1].strip() if len(title_line) > 1 else "Untitled"
190
+
191
+ content_part = "\n".join(lines[1:])
192
+ if "content:" in content_part.lower():
193
+ content = content_part.split(':', 1)[1].strip()
194
+ else:
195
+ content = content_part.strip()
196
+
197
+ full_content = f"Title: {title}\n\nContent: {content}"
198
+
199
+ print(f" - Parsing entry: '{title}'")
200
+ behavior_options = CONFIG.get("behavior_tags", [])
201
+ emotion_options = CONFIG.get("emotion_tags", [])
202
+ topic_options = CONFIG.get("topic_tags", [])
203
+ context_options = CONFIG.get("context_tags", []) # <-- ADD THIS LINE
204
+
205
+ # Update the function call to include the new argument
206
+ detected_tags = detect_tags_from_query(
207
+ content,
208
+ behavior_options=behavior_options,
209
+ emotion_options=emotion_options,
210
+ topic_options=topic_options,
211
+ context_options=context_options # <-- AND ADD THIS ARGUMENT
212
+ )
213
+
214
+ metadata = {"source": source, "title": title}
215
+ # Note: The raw response from the NLU now returns lists for behaviors/contexts
216
+ detected_behaviors = detected_tags.get("detected_behaviors", [])
217
+ if detected_behaviors:
218
+ metadata["behaviors"] = [b.lower() for b in detected_behaviors]
219
+
220
+ if detected_tags.get("detected_emotion") != "None":
221
+ metadata["emotion"] = detected_tags.get("detected_emotion").lower()
222
+
223
+ detected_topics = detected_tags.get("detected_topic") # Topic is a single string
224
+ if detected_topics and detected_topics != "None":
225
+ metadata["topic_tags"] = [detected_topics.lower()]
226
+
227
+ detected_contexts = detected_tags.get("detected_contexts", [])
228
+ if detected_contexts:
229
+ metadata["context_tags"] = [c.lower() for c in detected_contexts]
230
+
231
+ docs_to_add.append(Document(page_content=full_content, metadata=metadata))
232
+
233
+ return docs_to_add
234
+
235
+
236
+
237
+ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url):
238
+ global personal_vectorstore
239
+ docs_to_add = []
240
+
241
+ # Corrected prioritization of inputs
242
+ if text_input and text_input.strip():
243
+ # Handle manual text input first
244
+ docs_to_add = parse_and_tag_entries(f"Title: {title}\n\nContent: {text_input}", "Text Input")
245
+ elif file_input:
246
+ content_source = os.path.basename(file_input)
247
+ if file_input.lower().endswith('.txt'):
248
+ with open(file_input, 'r', encoding='utf-8') as f:
249
+ file_content = f.read()
250
+ docs_to_add = parse_and_tag_entries(file_content, content_source)
251
+ else: # Handle audio/video
252
+ final_title = title.strip() if title and title.strip() else "Audio/Video Note"
253
+ content_text = transcribe_audio(file_input)
254
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
255
+ docs_to_add = parse_and_tag_entries(full_content, content_source)
256
+ elif image_input:
257
+ final_title = title.strip() if title and title.strip() else "Image Note"
258
+ content_text = describe_image(image_input)
259
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
260
+ docs_to_add = parse_and_tag_entries(full_content, "Image Input")
261
+ elif yt_url and ("youtube.com" in yt_url or "youtu.be" in yt_url):
262
+ try:
263
+ yt = YouTube(yt_url)
264
+ video_title = yt.title
265
+ final_title = title.strip() if title and title.strip() else video_title
266
+ audio_stream = yt.streams.get_audio_only()
267
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
268
+ audio_stream.download(filename=temp_audio_file.name)
269
+ temp_audio_path = temp_audio_file.name
270
+ content_text = transcribe_audio(temp_audio_path)
271
+ content_source = f"YouTube: {video_title}"
272
+ os.remove(temp_audio_path)
273
+ full_content = f"Title: {final_title}\n\nContent: {content_text}"
274
+ docs_to_add = parse_and_tag_entries(full_content, content_source)
275
+ except Exception as e:
276
+ return f"Error processing YouTube link: {e}"
277
+ else:
278
+ return "Please provide a title and content, or another input source."
279
+
280
+ if not docs_to_add:
281
+ return "No processable content found to add."
282
+
283
+ if personal_vectorstore is None:
284
+ personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
285
+ else:
286
+ personal_vectorstore.add_documents(docs_to_add)
287
+
288
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
289
+ return f"Successfully added {len(docs_to_add)} new memory/memories."
290
+
291
+ def save_chat_to_memory(chat_history):
292
+ global personal_vectorstore
293
+ if not chat_history: return "Nothing to save."
294
+ formatted_chat = []
295
+ for message in chat_history:
296
+ role = "User" if message["role"] == "user" else "Assistant"
297
+ content = message["content"].strip()
298
+ if content.startswith("*(Auto-detected context:"): continue
299
+ formatted_chat.append(f"{role}: {content}")
300
+ conversation_text = "\n".join(formatted_chat)
301
+ if not conversation_text: return "No conversation content to save."
302
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
303
+ title = f"Conversation from {timestamp}"
304
+ full_content = f"Title: {title}\n\nContent:\n{conversation_text}"
305
+ doc_to_add = Document(page_content=full_content, metadata={"source": "Saved Chat", "title": title})
306
+ if personal_vectorstore is None:
307
+ personal_vectorstore = build_or_load_vectorstore([doc_to_add], PERSONAL_INDEX_PATH, is_personal=True)
308
+ else:
309
+ personal_vectorstore.add_documents([doc_to_add])
310
+ personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
311
+ print(f"Saved conversation to long-term memory.")
312
+ return f"Conversation from {timestamp} saved successfully!"
313
+
314
+ def list_personal_memories():
315
+ global personal_vectorstore
316
+ if personal_vectorstore is None or not hasattr(personal_vectorstore.docstore, '_dict') or not personal_vectorstore.docstore._dict:
317
+ return gr.update(value=[["No memories to display", "", ""]]), gr.update(choices=["No memories to select"], value=None)
318
+ docs = list(personal_vectorstore.docstore._dict.values())
319
+ dataframe_data = [[doc.metadata.get('title', 'Untitled'), doc.metadata.get('source', 'Unknown'), doc.page_content] for doc in docs]
320
+ dropdown_choices = [doc.page_content for doc in docs]
321
+ return gr.update(value=dataframe_data), gr.update(choices=dropdown_choices)
322
+
323
+ def delete_personal_memory(memory_to_delete):
324
+ global personal_vectorstore
325
+ if personal_vectorstore is None or not memory_to_delete:
326
+ return "Knowledge base is empty or no memory selected."
327
+ all_docs = list(personal_vectorstore.docstore._dict.values())
328
+ docs_to_keep = [doc for doc in all_docs if doc.page_content != memory_to_delete]
329
+ if len(all_docs) == len(docs_to_keep):
330
+ return "Error: Could not find the selected memory to delete."
331
+ print(f"Deleting memory. {len(docs_to_keep)} memories remaining.")
332
+ if not docs_to_keep:
333
+ if os.path.isdir(PERSONAL_INDEX_PATH):
334
+ shutil.rmtree(PERSONAL_INDEX_PATH)
335
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
336
+ else:
337
+ new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
338
+ new_vs.save_local(PERSONAL_INDEX_PATH)
339
+ personal_vectorstore = new_vs
340
+ return "Successfully deleted memory. The list will now refresh."
341
+
342
+
343
+ # adjust the main application logic in chat_fn to use the new auto-detection after adding topic_tag
344
+ def chat_fn(user_text, audio_file, settings, chat_history):
345
+ global personal_vectorstore
346
+ question = (user_text or "").strip()
347
+ if audio_file and not question:
348
+ try:
349
+ voice_lang_name = settings.get("tts_lang", "English")
350
+ voice_lang_code = CONFIG["languages"].get(voice_lang_name, "en")
351
+ question = transcribe_audio(audio_file, lang=voice_lang_code)
352
+ except Exception as e:
353
+ err_msg = f"Audio Error: {e}" if settings.get("debug_mode") else "Sorry, I couldn't understand the audio."
354
+ chat_history.append({"role": "assistant", "content": err_msg})
355
+ return "", None, chat_history
356
+ if not question:
357
+ return "", None, chat_history
358
+
359
+ chat_history.append({"role": "user", "content": question})
360
+
361
+ # --- UPDATED DETECTION AND OVERRIDE LOGIC ---
362
+
363
+ # Get manual settings from the UI dropdowns
364
+ manual_behavior_tag = settings.get("behaviour_tag", "None")
365
+ manual_emotion_tag = settings.get("emotion_tag", "None")
366
+ manual_topic_tag = settings.get("topic_tag", "None")
367
+
368
+ # By default, the final tags are the manual ones.
369
+ scenario_tag = manual_behavior_tag
370
+ emotion_tag = manual_emotion_tag
371
+ topic_tag = manual_topic_tag
372
+
373
+ # If all manual filters are set to "None", then run auto-detection.
374
+ if manual_behavior_tag == "None" and manual_emotion_tag == "None" and manual_topic_tag == "None":
375
+ print("No manual tags set, running auto-detection...")
376
+ behavior_options = CONFIG.get("behavior_tags", [])
377
+ emotion_options = CONFIG.get("emotion_tags", [])
378
+ topic_options = CONFIG.get("topic_tags", [])
379
+
380
+ detected_tags = detect_tags_from_query(
381
+ question,
382
+ behavior_options=behavior_options,
383
+ emotion_options=emotion_options,
384
+ topic_options=topic_options
385
+ )
386
+
387
+ scenario_tag = detected_tags.get("detected_behavior", "None")
388
+ emotion_tag = detected_tags.get("detected_emotion", "None")
389
+ topic_tag = detected_tags.get("detected_topic", "None")
390
+
391
+ # Display the auto-detected tags in the chat
392
+ detected_parts = []
393
+ if scenario_tag and scenario_tag != "None":
394
+ detected_parts.append(f"Behavior=`{scenario_tag}`")
395
+ if emotion_tag and emotion_tag != "None":
396
+ detected_parts.append(f"Emotion=`{emotion_tag}`")
397
+ if topic_tag and topic_tag != "None":
398
+ detected_parts.append(f"Topic=`{topic_tag}`")
399
+
400
+ # Turn on debug mode
401
+ # if detected_parts and settings.get("debug_mode"):
402
+ # right now it's default without turning on debug mode
403
+ if detected_parts:
404
+ detected_msg = f"*(Auto-detected context: {', '.join(detected_parts)})*"
405
+ chat_history.append({"role": "assistant", "content": detected_msg})
406
+ else:
407
+ print("Manual tags detected, skipping auto-detection.")
408
+
409
+ # --- END OF UPDATED LOGIC ---
410
+
411
+ active_theme = settings.get("active_theme", "All")
412
+ vs_general = ensure_index(active_theme)
413
+ if personal_vectorstore is None:
414
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
415
+
416
+ rag_chain_settings = {"role": settings.get("role"), "temperature": settings.get("temperature"), "language": settings.get("language"), "patient_name": settings.get("patient_name"), "caregiver_name": settings.get("caregiver_name"), "tone": settings.get("tone"),}
417
+ chain = make_rag_chain(vs_general, personal_vectorstore, **rag_chain_settings)
418
+
419
+ # Ensure "None" values are treated as None
420
+ final_scenario_tag = scenario_tag if scenario_tag != "None" else None
421
+ final_emotion_tag = emotion_tag if emotion_tag != "None" else None
422
+ final_topic_tag = topic_tag if topic_tag != "None" else None
423
+
424
+ simple_history = chat_history[:-1]
425
+ response = answer_query(chain, question, chat_history=simple_history, scenario_tag=final_scenario_tag, emotion_tag=final_emotion_tag, topic_tag=final_topic_tag)
426
+
427
+ answer = response.get("answer", "[No answer found]")
428
+ chat_history.append({"role": "assistant", "content": answer})
429
+
430
+ # --- NEW SOURCE DISPLAY LOGIC ---
431
+ # If debug mode is on and the response dictionary contains sources, display them.
432
+ # if settings.get("debug_mode") and response.get("sources"):
433
+ # For now, turn on the sources without using debug mode line of code
434
+ if response.get("sources"):
435
+ sources = response.get("sources", [])
436
+ # Filter out placeholders or empty sources if they exist
437
+ valid_sources = [s for s in sources if s and s not in ["unknown", "placeholder"]]
438
+ if valid_sources:
439
+ source_msg = f"*(Sources used: {', '.join(valid_sources)})*"
440
+ chat_history.append({"role": "assistant", "content": source_msg})
441
+ # --- END OF NEW LOGIC ---
442
+
443
+ audio_out = None
444
+ if settings.get("tts_on") and answer:
445
+ tts_lang_code = CONFIG["languages"].get(settings.get("tts_lang"), "en")
446
+ audio_out = synthesize_tts(answer, lang=tts_lang_code)
447
+
448
+ from gradio import update
449
+ return "", (update(value=audio_out, visible=bool(audio_out))), chat_history
450
+
451
+
452
+ def upload_knowledge(files, current_theme):
453
+ if not files: return "No files were selected to upload."
454
+ added = 0
455
+ for f in files:
456
+ try:
457
+ copy_into_theme(current_theme, f.name); added += 1
458
+ except Exception as e: print(f"Error uploading file {f.name}: {e}")
459
+ if added > 0 and current_theme in vectorstores: del vectorstores[current_theme]
460
+ return f"Uploaded {added} file(s). Refreshing file list..."
461
+ def save_file_selection(current_theme, enabled_files):
462
+ man = load_manifest(current_theme)
463
+ for fname in man['files']: man['files'][fname] = fname in enabled_files
464
+ save_manifest(current_theme, man)
465
+ if current_theme in vectorstores: del vectorstores[current_theme]
466
+ return f"Settings saved. Index for theme '{current_theme}' will rebuild on the next query."
467
+ def refresh_file_list_ui(current_theme):
468
+ files = list_theme_files(current_theme)
469
+ enabled = [f for f, en in files if en]
470
+ msg = f"Found {len(files)} file(s). {len(enabled)} enabled."
471
+ return gr.update(choices=[f for f, _ in files], value=enabled), msg
472
+ def auto_setup_on_load(current_theme):
473
+ theme_dir = theme_upload_dir(current_theme)
474
+ if not os.listdir(theme_dir):
475
+ print("First-time setup: Auto-seeding sample data...")
476
+ seed_files_into_theme(current_theme)
477
+ all_settings = collect_settings("patient", "", "", "warm", "English", "English", 0.7, "None", "None", "All", True, False)
478
+ files_ui, status_msg = refresh_file_list_ui(current_theme)
479
+ return all_settings, files_ui, status_msg
480
+
481
+
482
+ # In app.py, add the test cases inside the Gradio Callbacks section,
483
+ def load_test_fixtures():
484
+ """Loads the test cases and returns a Gradio update object to populate the dropdown."""
485
+ global test_fixtures
486
+ test_fixtures = [] # Reset fixtures on each load attempt
487
+
488
+ try:
489
+ script_dir = os.path.dirname(os.path.abspath(__file__))
490
+ fixtures_path = os.path.join(script_dir, "conversation_test_fixtures.jsonl")
491
+
492
+ if not os.path.exists(fixtures_path):
493
+ print("WARNING: Test fixtures file not found.")
494
+ # Return an update with an empty list of choices
495
+ return gr.update(choices=[])
496
+
497
+ with open(fixtures_path, "r", encoding="utf-8") as f:
498
+ for line in f:
499
+ test_fixtures.append(json.loads(line))
500
+
501
+ # --- THIS IS THE KEY CHANGE ---
502
+ # Create a list of the test titles
503
+ test_titles = [fixture["title"] for fixture in test_fixtures]
504
+
505
+ # Return a Gradio update object that specifically targets the 'choices' property
506
+ return gr.update(choices=test_titles)
507
+ # --- END OF CHANGE ---
508
+
509
+ except Exception as e:
510
+ print(f"UNEXPECTED ERROR during file loading: {e}")
511
+ return gr.update(choices=[])
512
+
513
+
514
+ # In app.py, fixed run_nlu_test function to handle the new data structure from the detection logic
515
+
516
+ def run_nlu_test(test_title: str):
517
+ """Runs a selected NLU test case with correct pass/fail logic and detailed debugging."""
518
+ print("\n--- RUNNING NLU TEST (Definitive Version) ---")
519
+ if not test_title or not test_fixtures:
520
+ return "Please select a test case.", None
521
+
522
+ selected_fixture = next((f for f in test_fixtures if f["title"] == test_title), None)
523
+ if not selected_fixture:
524
+ return f"Error: Could not find test case titled '{test_title}'.", None
525
+
526
+ user_query = selected_fixture["turns"][0]["text"]
527
+ expected_results = selected_fixture["expected"]
528
+ print(f"Test Case: '{test_title}'")
529
+ print(f"User Query: '{user_query}'")
530
+
531
+ behavior_options = CONFIG.get("behavior_tags", [])
532
+ emotion_options = CONFIG.get("emotion_tags", [])
533
+ topic_options = CONFIG.get("topic_tags", [])
534
+ context_options = CONFIG.get("context_tags", [])
535
+
536
+ actual_results_raw = detect_tags_from_query(
537
+ user_query,
538
+ behavior_options=behavior_options,
539
+ emotion_options=emotion_options,
540
+ topic_options=topic_options,
541
+ context_options=context_options
542
+ )
543
+
544
+ print(f"\nRAW NLU RESULTS from detect_tags_from_query:\n{actual_results_raw}\n")
545
+
546
+ actual_results = {
547
+ "emotion": [actual_results_raw.get("detected_emotion")],
548
+ "behaviors": actual_results_raw.get("detected_behaviors", []),
549
+ "topic_tags": [actual_results_raw.get("detected_topic")],
550
+ "context_tags": actual_results_raw.get("detected_contexts", [])
551
+ }
552
+
553
+ pass_count = 0
554
+ total_count = 0
555
+ comparison_data = []
556
+
557
+ # Use a comprehensive set of keys from both expected and actual for thoroughness
558
+ all_keys = set(expected_results.keys()) | set(actual_results.keys())
559
+
560
+ print("--- COMPARING RESULTS ---")
561
+ for key in sorted(list(all_keys)):
562
+ expected_set = set(expected_results.get(key, []))
563
+ actual_set = set(a for a in actual_results.get(key, []) if a and a != "None")
564
+
565
+ # We only count categories that have an expectation
566
+ if not expected_set: continue
567
+
568
+ total_count += 1
569
+
570
+ # --- DEFINITIVE PASS/FAIL LOGIC ---
571
+ # The test passes ONLY if the set of expected tags is a subset of the actual tags.
572
+ # This means all expected tags must be present.
573
+ # is_pass = expected_set.issubset(actual_set)
574
+ # --- NEW FLEXIBLE PASS/FAIL LOGIC ---
575
+ # The test now passes if there is any overlap between the expected and actual tags.
576
+ is_pass = len(expected_set.intersection(actual_set)) > 0
577
+
578
+
579
+ print(f"Category: '{key}'")
580
+ print(f" - Expected Set: {expected_set}")
581
+ print(f" - Actual Set : {actual_set}")
582
+ print(f" - Logic : expected_set.issubset(actual_set)")
583
+ print(f" - Result : {is_pass}")
584
+
585
+ if is_pass:
586
+ pass_count += 1
587
+
588
+ comparison_data.append([
589
+ key,
590
+ ", ".join(sorted(list(expected_set))),
591
+ ", ".join(sorted(list(actual_set))) if actual_set else "None",
592
+ "βœ… Pass" if is_pass else "❌ Fail"
593
+ ])
594
+
595
+ status = f"## Test Result: {pass_count} / {total_count} Categories Passed"
596
+ print(f"Final Status: {pass_count}/{total_count} passed.")
597
+ print("--- TEST COMPLETE ---\n")
598
+ return status, comparison_data
599
+
600
+
601
+
602
+ # In app.py, inside the Gradio Callbacks section for debugging
603
+ def test_save_file():
604
+ """A simple function to test if we can write a file to the persistent storage."""
605
+ try:
606
+ # Get the directory where the personal index is supposed to be stored
607
+ storage_dir = os.path.dirname(PERSONAL_INDEX_PATH)
608
+ test_file_path = os.path.join(storage_dir, "persistence_test.txt")
609
+
610
+ # Write the current time to the file
611
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
612
+ content = f"File saved successfully at: {current_time}"
613
+
614
+ with open(test_file_path, "w", encoding="utf-8") as f:
615
+ f.write(content)
616
+
617
+ return f"βœ… Success! Wrote test file to: {test_file_path}"
618
+ except Exception as e:
619
+ return f"❌ Error! Failed to write file. Reason: {e}"
620
+
621
+ def check_test_file():
622
+ """A simple function to check if the test file from a previous session exists."""
623
+ try:
624
+ storage_dir = os.path.dirname(PERSONAL_INDEX_PATH)
625
+ test_file_path = os.path.join(storage_dir, "persistence_test.txt")
626
+
627
+ if os.path.exists(test_file_path):
628
+ with open(test_file_path, "r", encoding="utf-8") as f:
629
+ content = f.read()
630
+ return f"βœ… Success! Found test file. Contents: '{content}'"
631
+ else:
632
+ return f"❌ Failure. Test file not found at: {test_file_path}"
633
+ except Exception as e:
634
+ return f"❌ Error! Failed to check for file. Reason: {e}"
635
+
636
+
637
+ # --- UI Definition ---
638
+ CSS = ".gradio-container { font-size: 14px; } #chatbot { min-height: 250px; } #audio_out audio { max-height: 40px; } #audio_in audio { max-height: 40px; padding: 0; }"
639
+
640
+ with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
641
+ settings_state = gr.State({})
642
+
643
+ with gr.Tab("Chat"):
644
+ user_text = gr.Textbox(show_label=False, placeholder="Type your message here...")
645
+ audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Voice Input", elem_id="audio_in")
646
+ with gr.Row():
647
+ submit_btn = gr.Button("Send", variant="primary")
648
+ save_btn = gr.Button("Save to Memory")
649
+ clear_btn = gr.Button("Clear")
650
+ chat_status = gr.Markdown()
651
+ audio_out = gr.Audio(label="Response Audio", autoplay=True, visible=True, elem_id="audio_out")
652
+ chatbot = gr.Chatbot(elem_id="chatbot", label="Conversation", type="messages")
653
+
654
+ with gr.Tab("Personalize"):
655
+ with gr.Accordion("Add to Personal Knowledge Base", open=True):
656
+ gr.Markdown("Add personal notes, memories, or descriptions. A descriptive title helps the AI find memories more accurately.")
657
+ personal_title = gr.Textbox(label="Title / Entry Name", placeholder="e.g., 'Dad's favorite songs'")
658
+ personal_text = gr.Textbox(lines=5, label="Text Content (or use file upload)", placeholder="Type or paste text here. Use 'β€”' on a new line to separate multiple entries.")
659
+ personal_file = gr.File(label="Upload Audio/Video/Text File")
660
+ personal_image = gr.Image(type="filepath", label="Upload Image")
661
+ personal_yt_url = gr.Textbox(label="Or, provide a YouTube URL", placeholder="Paste a YouTube link here...")
662
+ with gr.Row():
663
+ personal_add_btn = gr.Button("Add Knowledge to Memory", variant="primary")
664
+ personal_status = gr.Markdown()
665
+ with gr.Accordion("Manage Personal Knowledge", open=False):
666
+ personal_memory_display = gr.DataFrame(headers=["Title", "Source", "Content"], label="Saved Personal Memories", interactive=False, row_count=(5, "dynamic"))
667
+ with gr.Row():
668
+ personal_refresh_btn = gr.Button("Refresh Memories")
669
+ with gr.Row():
670
+ personal_delete_selector = gr.Dropdown(label="Select a memory to delete (by its full content)", scale=3, interactive=True)
671
+ personal_delete_btn = gr.Button("Delete Selected Memory", variant="stop", scale=1)
672
+ personal_delete_status = gr.Markdown()
673
+
674
+ with gr.Tab("Testing"):
675
+ gr.Markdown("## NLU Context Detection Tests")
676
+ gr.Markdown("Select a test case from `conversation_test_fixtures.jsonl` to run it through the NLU classifier and see the results.")
677
+ with gr.Row():
678
+ test_case_dropdown = gr.Dropdown(label="Select Test Case", scale=3)
679
+ run_test_btn = gr.Button("Load & Run Test", variant="primary", scale=1)
680
+ test_status_md = gr.Markdown("### Please select and run a test case.")
681
+ test_results_df = gr.DataFrame(
682
+ label="Test Results Comparison",
683
+ headers=["Category", "Expected", "Actual", "Result"],
684
+ interactive=False
685
+ )
686
+
687
+ with gr.Tab("Settings"):
688
+ with gr.Group():
689
+ gr.Markdown("## Conversation & Persona Settings")
690
+ with gr.Row():
691
+ role = gr.Radio(CONFIG["roles"], value="caregiver", label="Your Role")
692
+ temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.1, label="Creativity")
693
+ tone = gr.Dropdown(CONFIG["tones"], value="warm", label="Response Tone")
694
+ with gr.Row():
695
+ patient_name = gr.Textbox(label="Patient's Name", placeholder="e.g., 'Dad' or 'John'")
696
+ caregiver_name = gr.Textbox(label="Caregiver's Name", placeholder="e.g., 'me' or 'Jane'")
697
+ behaviour_tag = gr.Dropdown(CONFIG["behavior_tags"], value="None", label="Behaviour Filter (Manual Override)")
698
+ emotion_tag = gr.Dropdown(CONFIG["emotion_tags"], value="None", label="Emotion Filter (Manual Override)")
699
+ topic_tag = gr.Dropdown(CONFIG["topic_tags"], value="None", label="Topic Tag Filter (Manual Override)")
700
+ with gr.Accordion("Language, Voice & Debugging", open=False):
701
+ language = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Response Language")
702
+ tts_lang = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Voice Language")
703
+ tts_on = gr.Checkbox(True, label="Enable Voice Response (TTS)")
704
+ debug_mode = gr.Checkbox(False, label="Show Debug Info")
705
+ gr.Markdown("--- \n ## General Knowledge Base Management")
706
+ active_theme = gr.Radio(CONFIG["themes"], value="All", label="Active Knowledge Theme")
707
+ with gr.Row():
708
+ with gr.Column(scale=1):
709
+ files_in = gr.File(file_count="multiple", file_types=[".jsonl", ".txt"], label="Upload Knowledge Files")
710
+ upload_btn = gr.Button("Upload to Theme", variant="secondary")
711
+ seed_btn = gr.Button("Import Sample Data", variant="secondary")
712
+ with gr.Column(scale=2):
713
+ mgmt_status = gr.Markdown()
714
+ files_box = gr.CheckboxGroup(choices=[], label="Enable Files for the Selected Theme")
715
+ with gr.Row():
716
+ save_files_btn = gr.Button("Save Selection", variant="primary")
717
+ refresh_btn = gr.Button("Refresh List")
718
+ with gr.Accordion("Persistence Test", open=False):
719
+ gr.Markdown("Use this tool to verify that the Hugging Face persistent storage is working correctly. \n1. Click 'Run Test'. \n2. Manually restart the Space. \n3. Click 'Check for File'.")
720
+ with gr.Row():
721
+ test_save_btn = gr.Button("1. Run Persistence Test (Save File)")
722
+ check_save_btn = gr.Button("3. Check for Test File")
723
+ test_status = gr.Markdown()
724
+
725
+
726
+ # --- Event Wiring ---
727
+ all_settings_components = [role, patient_name, caregiver_name, tone, language, tts_lang, temperature, behaviour_tag, emotion_tag, topic_tag, active_theme, tts_on, debug_mode]
728
+
729
+ for component in all_settings_components:
730
+ component.change(fn=collect_settings, inputs=all_settings_components, outputs=settings_state)
731
+
732
+ submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
733
+ save_btn.click(fn=save_chat_to_memory, inputs=[chatbot], outputs=[chat_status])
734
+ clear_btn.click(lambda: (None, None, [], None, "", ""), outputs=[user_text, audio_out, chatbot, audio_in, user_text, chat_status])
735
+
736
+ personal_add_btn.click(
737
+ fn=handle_add_knowledge,
738
+ inputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url],
739
+ outputs=[personal_status]
740
+ ).then(
741
+ lambda: (None, None, None, None, None),
742
+ outputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url]
743
+ )
744
+
745
+ personal_refresh_btn.click(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
746
+ personal_delete_btn.click(fn=delete_personal_memory, inputs=[personal_delete_selector], outputs=[personal_delete_status]).then(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
747
+
748
+ upload_btn.click(upload_knowledge, inputs=[files_in, active_theme], outputs=[mgmt_status]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
749
+ save_files_btn.click(save_file_selection, inputs=[active_theme, files_box], outputs=[mgmt_status])
750
+ seed_btn.click(seed_files_into_theme, inputs=[active_theme]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
751
+ refresh_btn.click(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
752
+ active_theme.change(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
753
+ demo.load(auto_setup_on_load, inputs=[active_theme], outputs=[settings_state, files_box, mgmt_status])
754
+ test_save_btn.click(fn=test_save_file, inputs=None, outputs=[test_status])
755
+ check_save_btn.click(fn=check_test_file, inputs=None, outputs=[test_status])
756
+
757
+ # --- ADD WIRING FOR THE TESTING TAB ---
758
+ demo.load(load_test_fixtures, outputs=[test_case_dropdown])
759
+ run_test_btn.click(
760
+ fn=run_nlu_test,
761
+ inputs=[test_case_dropdown],
762
+ outputs=[test_status_md, test_results_df]
763
+ )
764
+
765
+ # --- Startup Logic ---
766
+ def pre_load_indexes():
767
+ global personal_vectorstore
768
+ print("Pre-loading all knowledge base indexes at startup...")
769
+ for theme in CONFIG["themes"]:
770
+ print(f" - Loading general index for theme: '{theme}'")
771
+ try:
772
+ ensure_index(theme)
773
+ print(f" ...'{theme}' theme loaded successfully.")
774
+ except Exception as e:
775
+ print(f" ...Error loading theme '{theme}': {e}")
776
+ print(" - Loading personal knowledge index...")
777
+ try:
778
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
779
+ print(" ...Personal knowledge loaded successfully.")
780
+ except Exception as e:
781
+ print(f" ...Error loading personal knowledge: {e}")
782
+ print("All indexes loaded. Application is ready.")
783
+
784
+ if __name__ == "__main__":
785
+ pre_load_indexes()
786
+ demo.queue().launch(debug=True)