KeenWoo commited on
Commit
53c8496
·
verified ·
1 Parent(s): 4e0c1a7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -845
app.py DELETED
@@ -1,845 +0,0 @@
1
- import os
2
- import json
3
- import shutil
4
- import gradio as gr
5
- import tempfile
6
- from datetime import datetime
7
- from typing import List, Dict, Any, Optional
8
- from pytube import YouTube
9
- from pathlib import Path # <-- Add this import at the top of your file with the other imports
10
- import re
11
-
12
-
13
- # --- Agent Imports & Safe Fallbacks ---
14
- try:
15
- from alz_companion.agent import (
16
- bootstrap_vectorstore, make_rag_chain, answer_query, synthesize_tts,
17
- transcribe_audio, detect_tags_from_query, describe_image, build_or_load_vectorstore,
18
- _default_embeddings
19
- )
20
- from alz_companion.prompts import BEHAVIOUR_TAGS, EMOTION_STYLES
21
- from langchain.schema import Document
22
- from langchain_community.vectorstores import FAISS
23
- AGENT_OK = True
24
- except Exception as e:
25
- AGENT_OK = False
26
- # Define all fallback functions and classes
27
- def bootstrap_vectorstore(sample_paths=None, index_path="data/"): return object()
28
- def build_or_load_vectorstore(docs, index_path, is_personal=False): return object()
29
- def make_rag_chain(vs_general, vs_personal, **kwargs): return lambda q, **k: {"answer": f"(Demo) You asked: {q}", "sources": []}
30
- def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
31
- def synthesize_tts(text: str, lang: str = "en"): return None
32
- def transcribe_audio(filepath: str, lang: str = "en"): return "This is a transcribed message."
33
- def detect_tags_from_query(query: str, behavior_options: list, emotion_options: list): return {"detected_behavior": "None", "detected_emotion": "None"}
34
- def describe_image(image_path: str): return "This is a description of an image."
35
- def _default_embeddings(): return None
36
- class Document:
37
- def __init__(self, page_content, metadata):
38
- self.page_content = page_content
39
- self.metadata = metadata
40
- class FAISS:
41
- def __init__(self):
42
- self.docstore = type('obj', (object,), {'_dict': {}})()
43
- BEHAVIOUR_TAGS = {"None": []}
44
- EMOTION_STYLES = {"None": {}}
45
- print(f"WARNING: Could not import from alz_companion ({e}). Running in UI-only demo mode.")
46
-
47
- # --- Centralized Configuration ---
48
- CONFIG = {
49
- "themes": ["All", "The Father", "Still Alice", "Away from Her", "Alive Inside", "General Caregiving"],
50
- "roles": ["patient", "caregiver"],
51
- "behavior_tags": ["None"] + list(BEHAVIOUR_TAGS.keys()),
52
- "emotion_tags": ["None"] + list(EMOTION_STYLES.keys()),
53
- # --- THIS LIST HAS BEEN UPDATED AND EXPANDED ---
54
- "topic_tags": [
55
- "None",
56
- "caregiving_advice",
57
- "medical_fact",
58
- "personal_story",
59
- "research_update",
60
- "treatment_option:home_safety",
61
- "treatment_option:long_term_care",
62
- "treatment_option:music_therapy",
63
- "treatment_option:reassurance",
64
- "treatment_option:routine_structuring",
65
- "treatment_option:validation_therapy"
66
- ],
67
- # --- END OF Topic_tag UPDATE ---
68
- # --- ADD THIS NEW LIST to handle context_tag ---
69
- "context_tags": [
70
- "None", "disease_stage_mild",
71
- "disease_stage_moderate", "disease_stage_advanced",
72
- "disease_stage_unspecified", "interaction_mode_one_to_one",
73
- "interaction_mode_small_group", "interaction_mode_group_activity",
74
- "relationship_family", "relationship_spouse",
75
- "relationship_staff_or_caregiver", "relationship_unspecified",
76
- "setting_home_or_community", "setting_care_home",
77
- "setting_clinic_or_hospital"
78
- ],
79
- # --- END OF Context_tag UPDATE ---
80
- "languages": {"English": "en", "Chinese": "zh", "Malay": "ms", "French": "fr", "Spanish": "es"},
81
- "tones": ["warm", "neutral", "formal", "playful"]
82
- }
83
-
84
-
85
- # --- File Management & Vector Store Logic ---
86
- INDEX_BASE = os.getenv('INDEX_BASE', 'data')
87
- UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
88
-
89
- # OPTION A: --- CHANGE THIS LINE ---
90
- PERSONAL_INDEX_PATH = os.path.join(str(Path.home()), "AlzCompanionData", "personal_faiss_index")
91
- # for another space: PERSONAL_INDEX_PATH = os.path.join(str(Path.home()), "AlzPersonalData", "personal_faiss_index")
92
- # changing it from the absolute path (designed for a single local computer) to relative path (perfect for Hugging Face Spaces):
93
- # Does NOT work -> PERSONAL_INDEX_PATH = os.path.join(INDEX_BASE, "personal_faiss_index")
94
- # OPTION A: --- END CHANGE ---
95
- # old code PERSONAL_INDEX_PATH = os.path.join(INDEX_BASE, "personal_faiss_index")
96
- os.makedirs(UPLOADS_BASE, exist_ok=True)
97
- # OPTION A: Also create the parent directory for the personal index
98
- os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
99
-
100
-
101
-
102
- THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
103
- vectorstores = {}
104
- personal_vectorstore = None
105
- test_fixtures = [] # <-- ADD THIS LINE
106
- example_retriever = None # <-- ADD THIS
107
-
108
-
109
- def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
110
- def theme_upload_dir(theme: str) -> str:
111
- p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
112
- os.makedirs(p, exist_ok=True)
113
- return p
114
- def load_manifest(theme: str) -> Dict[str, Any]:
115
- p = os.path.join(theme_upload_dir(theme), "manifest.json")
116
- if os.path.exists(p):
117
- try:
118
- with open(p, "r", encoding="utf-8") as f: return json.load(f)
119
- except Exception: pass
120
- return {"files": {}}
121
- def save_manifest(theme: str, man: Dict[str, Any]):
122
- with open(os.path.join(theme_upload_dir(theme), "manifest.json"), "w", encoding="utf-8") as f: json.dump(man, f, indent=2)
123
- def list_theme_files(theme: str) -> List[tuple[str, bool]]:
124
- man = load_manifest(theme)
125
- base = theme_upload_dir(theme)
126
- found = [(n, bool(e)) for n, e in man.get("files", {}).items() if os.path.exists(os.path.join(base, n))]
127
- existing = {n for n, e in found}
128
- for name in sorted(os.listdir(base)):
129
- if name not in existing and os.path.isfile(os.path.join(base, name)): found.append((name, False))
130
- man["files"] = dict(found)
131
- save_manifest(theme, man)
132
- return found
133
- def copy_into_theme(theme: str, src_path: str) -> str:
134
- fname = os.path.basename(src_path)
135
- dest = os.path.join(theme_upload_dir(theme), fname)
136
- shutil.copy2(src_path, dest)
137
- return dest
138
- def seed_files_into_theme(theme: str):
139
- SEED_FILES = [
140
- ("sample_data/caregiving_tips.txt", True),
141
- ("sample_data/the_father_segments_enriched_harmonized_plus.jsonl", True),
142
- ("sample_data/still_alice_enriched_harmonized_plus.jsonl", True),
143
- ("sample_data/away_from_her_enriched_harmonized_plus.jsonl", True),
144
- ("sample_data/alive_inside_enriched_harmonized.jsonl", True)
145
- ]
146
- man, changed = load_manifest(theme), False
147
- for path, enable in SEED_FILES:
148
- if not os.path.exists(path): continue
149
- fname = os.path.basename(path)
150
- if not os.path.exists(os.path.join(theme_upload_dir(theme), fname)):
151
- copy_into_theme(theme, path)
152
- man["files"][fname] = bool(enable)
153
- changed = True
154
- if changed: save_manifest(theme, man)
155
-
156
- def ensure_index(theme='All'):
157
- theme = canonical_theme(theme)
158
- if theme in vectorstores: return vectorstores[theme]
159
- upload_dir = theme_upload_dir(theme)
160
- enabled_files = [os.path.join(upload_dir, n) for n, enabled in list_theme_files(theme) if enabled]
161
- index_path = THEME_PATHS.get(theme)
162
- vectorstores[theme] = bootstrap_vectorstore(sample_paths=enabled_files, index_path=index_path)
163
- return vectorstores[theme]
164
-
165
-
166
- # --- Gradio Callbacks ---
167
-
168
- def collect_settings(*args):
169
- keys = [
170
- "role", "patient_name", "caregiver_name", "tone", "language", "tts_lang",
171
- "temperature", "behaviour_tag", "emotion_tag", "topic_tag", "active_theme", "tts_on", "debug_mode",
172
- # --- ADD THE MISSING KEYS FOR THE CUSTOM PROMPTS ---
173
- "custom_system_template", "custom_factual_template", "custom_gen_knowledge_template", "custom_gen_conversation_template"
174
- ]
175
- return dict(zip(keys, args))
176
-
177
-
178
- # In app.py, replace the existing parse_and_tag_entries function with this one.
179
-
180
- def parse_and_tag_entries(text_content: str, source: str) -> List[Document]:
181
- separator_pattern = r'\n(?:---|--|-|-\*-|-\.-)\n'
182
- entries = re.split(separator_pattern, text_content)
183
-
184
- docs_to_add = []
185
-
186
- for entry in entries:
187
- if not entry.strip():
188
- continue
189
-
190
- title = "Untitled Text Entry"
191
- content = entry.strip()
192
-
193
- lines = entry.strip().split('\n')
194
- if lines and "title:" in lines[0].lower():
195
- title_line = lines[0].split(':', 1)
196
- title = title_line[1].strip() if len(title_line) > 1 else "Untitled"
197
-
198
- content_part = "\n".join(lines[1:])
199
- if "content:" in content_part.lower():
200
- content = content_part.split(':', 1)[1].strip()
201
- else:
202
- content = content_part.strip()
203
-
204
- full_content = f"Title: {title}\n\nContent: {content}"
205
-
206
- print(f" - Parsing entry: '{title}'")
207
- behavior_options = CONFIG.get("behavior_tags", [])
208
- emotion_options = CONFIG.get("emotion_tags", [])
209
- topic_options = CONFIG.get("topic_tags", [])
210
- context_options = CONFIG.get("context_tags", []) # <-- ADD THIS LINE
211
-
212
- # Update the function call to include the new argument
213
- detected_tags = detect_tags_from_query(
214
- content,
215
- behavior_options=behavior_options,
216
- emotion_options=emotion_options,
217
- topic_options=topic_options,
218
- context_options=context_options # <-- AND ADD THIS ARGUMENT
219
- )
220
-
221
- metadata = {"source": source, "title": title}
222
- # Note: The raw response from the NLU now returns lists for behaviors/contexts
223
- detected_behaviors = detected_tags.get("detected_behaviors", [])
224
- if detected_behaviors:
225
- metadata["behaviors"] = [b.lower() for b in detected_behaviors]
226
-
227
- if detected_tags.get("detected_emotion") != "None":
228
- metadata["emotion"] = detected_tags.get("detected_emotion").lower()
229
-
230
- detected_topics = detected_tags.get("detected_topic") # Topic is a single string
231
- if detected_topics and detected_topics != "None":
232
- metadata["topic_tags"] = [detected_topics.lower()]
233
-
234
- detected_contexts = detected_tags.get("detected_contexts", [])
235
- if detected_contexts:
236
- metadata["context_tags"] = [c.lower() for c in detected_contexts]
237
-
238
- docs_to_add.append(Document(page_content=full_content, metadata=metadata))
239
-
240
- return docs_to_add
241
-
242
-
243
-
244
- def handle_add_knowledge(title, text_input, file_input, image_input, yt_url):
245
- global personal_vectorstore
246
- docs_to_add = []
247
-
248
- # Corrected prioritization of inputs
249
- if text_input and text_input.strip():
250
- # Handle manual text input first
251
- docs_to_add = parse_and_tag_entries(f"Title: {title}\n\nContent: {text_input}", "Text Input")
252
- elif file_input:
253
- content_source = os.path.basename(file_input)
254
- if file_input.lower().endswith('.txt'):
255
- with open(file_input, 'r', encoding='utf-8') as f:
256
- file_content = f.read()
257
- docs_to_add = parse_and_tag_entries(file_content, content_source)
258
- else: # Handle audio/video
259
- final_title = title.strip() if title and title.strip() else "Audio/Video Note"
260
- content_text = transcribe_audio(file_input)
261
- full_content = f"Title: {final_title}\n\nContent: {content_text}"
262
- docs_to_add = parse_and_tag_entries(full_content, content_source)
263
- elif image_input:
264
- final_title = title.strip() if title and title.strip() else "Image Note"
265
- content_text = describe_image(image_input)
266
- full_content = f"Title: {final_title}\n\nContent: {content_text}"
267
- docs_to_add = parse_and_tag_entries(full_content, "Image Input")
268
- elif yt_url and ("youtube.com" in yt_url or "youtu.be" in yt_url):
269
- try:
270
- yt = YouTube(yt_url)
271
- video_title = yt.title
272
- final_title = title.strip() if title and title.strip() else video_title
273
- audio_stream = yt.streams.get_audio_only()
274
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
275
- audio_stream.download(filename=temp_audio_file.name)
276
- temp_audio_path = temp_audio_file.name
277
- content_text = transcribe_audio(temp_audio_path)
278
- content_source = f"YouTube: {video_title}"
279
- os.remove(temp_audio_path)
280
- full_content = f"Title: {final_title}\n\nContent: {content_text}"
281
- docs_to_add = parse_and_tag_entries(full_content, content_source)
282
- except Exception as e:
283
- return f"Error processing YouTube link: {e}"
284
- else:
285
- return "Please provide a title and content, or another input source."
286
-
287
- if not docs_to_add:
288
- return "No processable content found to add."
289
-
290
- if personal_vectorstore is None:
291
- personal_vectorstore = build_or_load_vectorstore(docs_to_add, PERSONAL_INDEX_PATH, is_personal=True)
292
- else:
293
- personal_vectorstore.add_documents(docs_to_add)
294
-
295
- personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
296
- return f"Successfully added {len(docs_to_add)} new memory/memories."
297
-
298
- def save_chat_to_memory(chat_history):
299
- global personal_vectorstore
300
- if not chat_history: return "Nothing to save."
301
- formatted_chat = []
302
- for message in chat_history:
303
- role = "User" if message["role"] == "user" else "Assistant"
304
- content = message["content"].strip()
305
- if content.startswith("*(Auto-detected context:"): continue
306
- formatted_chat.append(f"{role}: {content}")
307
- conversation_text = "\n".join(formatted_chat)
308
- if not conversation_text: return "No conversation content to save."
309
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
310
- title = f"Conversation from {timestamp}"
311
- full_content = f"Title: {title}\n\nContent:\n{conversation_text}"
312
- doc_to_add = Document(page_content=full_content, metadata={"source": "Saved Chat", "title": title})
313
- if personal_vectorstore is None:
314
- personal_vectorstore = build_or_load_vectorstore([doc_to_add], PERSONAL_INDEX_PATH, is_personal=True)
315
- else:
316
- personal_vectorstore.add_documents([doc_to_add])
317
- personal_vectorstore.save_local(PERSONAL_INDEX_PATH)
318
- print(f"Saved conversation to long-term memory.")
319
- return f"Conversation from {timestamp} saved successfully!"
320
-
321
- def list_personal_memories():
322
- global personal_vectorstore
323
- if personal_vectorstore is None or not hasattr(personal_vectorstore.docstore, '_dict') or not personal_vectorstore.docstore._dict:
324
- return gr.update(value=[["No memories to display", "", ""]]), gr.update(choices=["No memories to select"], value=None)
325
- docs = list(personal_vectorstore.docstore._dict.values())
326
- dataframe_data = [[doc.metadata.get('title', 'Untitled'), doc.metadata.get('source', 'Unknown'), doc.page_content] for doc in docs]
327
- dropdown_choices = [doc.page_content for doc in docs]
328
- return gr.update(value=dataframe_data), gr.update(choices=dropdown_choices)
329
-
330
- def delete_personal_memory(memory_to_delete):
331
- global personal_vectorstore
332
- if personal_vectorstore is None or not memory_to_delete:
333
- return "Knowledge base is empty or no memory selected."
334
- all_docs = list(personal_vectorstore.docstore._dict.values())
335
- docs_to_keep = [doc for doc in all_docs if doc.page_content != memory_to_delete]
336
- if len(all_docs) == len(docs_to_keep):
337
- return "Error: Could not find the selected memory to delete."
338
- print(f"Deleting memory. {len(docs_to_keep)} memories remaining.")
339
- if not docs_to_keep:
340
- if os.path.isdir(PERSONAL_INDEX_PATH):
341
- shutil.rmtree(PERSONAL_INDEX_PATH)
342
- personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
343
- else:
344
- new_vs = FAISS.from_documents(docs_to_keep, _default_embeddings())
345
- new_vs.save_local(PERSONAL_INDEX_PATH)
346
- personal_vectorstore = new_vs
347
- return "Successfully deleted memory. The list will now refresh."
348
-
349
-
350
- # pdate your chat_fn function in app.py to correctly gather the custom prompts from the settings and
351
- # pass them to the agent as make_rag_chain function is now waiting for the four custom prompt templates
352
- def chat_fn(user_text, audio_file, settings, chat_history):
353
- global personal_vectorstore, example_retriever
354
- question = (user_text or "").strip()
355
- if audio_file and not question:
356
- try:
357
- voice_lang_name = settings.get("tts_lang", "English")
358
- voice_lang_code = CONFIG["languages"].get(voice_lang_name, "en")
359
- question = transcribe_audio(audio_file, lang=voice_lang_code)
360
- except Exception as e:
361
- err_msg = f"Audio Error: {e}" if settings.get("debug_mode") else "Sorry, I couldn't understand the audio."
362
- chat_history.append({"role": "assistant", "content": err_msg})
363
- return "", None, chat_history
364
- if not question:
365
- return "", None, chat_history
366
-
367
- chat_history.append({"role": "user", "content": question})
368
-
369
- manual_behavior_tag = settings.get("behaviour_tag", "None")
370
- manual_emotion_tag = settings.get("emotion_tag", "None")
371
- manual_topic_tag = settings.get("topic_tag", "None")
372
-
373
- scenario_tag = "None"
374
- emotion_tag = "None"
375
- topic_tag = "None"
376
-
377
- # Prioritize manual tags. If any are set, use them and skip auto-detection.
378
- if manual_behavior_tag != "None" or manual_emotion_tag != "None" or manual_topic_tag != "None":
379
- print("Manual tags detected, skipping auto-detection.")
380
- scenario_tag = manual_behavior_tag
381
- emotion_tag = manual_emotion_tag
382
- topic_tag = manual_topic_tag
383
- else:
384
- # Otherwise, run auto-detection
385
- print("No manual tags set, running auto-detection...")
386
- behavior_options = CONFIG.get("behavior_tags", [])
387
- emotion_options = CONFIG.get("emotion_tags", [])
388
- topic_options = CONFIG.get("topic_tags", [])
389
- context_options = CONFIG.get("context_tags", [])
390
-
391
- detected_tags = detect_tags_from_query(
392
- question,
393
- behavior_options=behavior_options,
394
- emotion_options=emotion_options,
395
- topic_options=topic_options,
396
- context_options=context_options,
397
- example_retriever=example_retriever
398
- )
399
-
400
- # The NLU returns lists for these, so we take the first element if it exists
401
- scenario_tag = detected_tags.get("detected_behaviors", [None])[0] or "None"
402
- emotion_tag = detected_tags.get("detected_emotion", "None")
403
- topic_tag = detected_tags.get("detected_topic", "None")
404
-
405
- # (Auto-detection display logic remains the same)
406
- detected_parts = []
407
- if scenario_tag and scenario_tag != "None": detected_parts.append(f"Behavior=`{scenario_tag}`")
408
- if emotion_tag and emotion_tag != "None": detected_parts.append(f"Emotion=`{emotion_tag}`")
409
- if topic_tag and topic_tag != "None": detected_parts.append(f"Topic=`{topic_tag}`")
410
- if detected_parts and settings.get("debug_mode"):
411
- detected_msg = f"*(Auto-detected context: {', '.join(detected_parts)})*"
412
- chat_history.append({"role": "assistant", "content": detected_msg})
413
-
414
- active_theme = settings.get("active_theme", "All")
415
- vs_general = ensure_index(active_theme)
416
- if personal_vectorstore is None:
417
- personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
418
-
419
- rag_chain_settings = {
420
- "role": settings.get("role"), "temperature": settings.get("temperature"), "language": settings.get("language"),
421
- "patient_name": settings.get("patient_name"), "caregiver_name": settings.get("caregiver_name"), "tone": settings.get("tone"),
422
- }
423
-
424
- # --- THIS IS THE CORRECTED BLOCK ---
425
- # It now correctly passes the custom prompt templates from the settings to the agent.
426
- chain = make_rag_chain(
427
- vs_general,
428
- personal_vectorstore,
429
- **rag_chain_settings,
430
- system_template=settings.get("custom_system_template"),
431
- factual_template=settings.get("custom_factual_template"),
432
- general_knowledge_template=settings.get("custom_gen_knowledge_template"),
433
- general_conversation_template=settings.get("custom_gen_conversation_template")
434
- )
435
- # --- END OF CORRECTION ---
436
-
437
- final_scenario_tag = scenario_tag if scenario_tag != "None" else None
438
- final_emotion_tag = emotion_tag if emotion_tag != "None" else None
439
- final_topic_tag = topic_tag if topic_tag != "None" else None
440
-
441
- simple_history = chat_history[:-2] if chat_history[-1]["content"].startswith("*(Auto-detected") else chat_history[:-1]
442
- response = answer_query(chain, question, chat_history=simple_history, scenario_tag=final_scenario_tag, emotion_tag=final_emotion_tag, topic_tag=final_topic_tag)
443
-
444
- answer = response.get("answer", "[No answer found]")
445
- chat_history.append({"role": "assistant", "content": answer})
446
-
447
- if settings.get("debug_mode") and response.get("sources"):
448
- sources = response.get("sources", [])
449
- valid_sources = [s for s in sources if s and s not in ["unknown", "placeholder"]]
450
- if valid_sources:
451
- source_msg = f"*(Sources used: {', '.join(valid_sources)})*"
452
- chat_history.append({"role": "assistant", "content": source_msg})
453
-
454
- audio_out = None
455
- if settings.get("tts_on") and answer:
456
- tts_lang_code = CONFIG["languages"].get(settings.get("tts_lang"), "en")
457
- audio_out = synthesize_tts(answer, lang=tts_lang_code)
458
-
459
- from gradio import update
460
- return "", (update(value=audio_out, visible=bool(audio_out))), chat_history
461
-
462
-
463
- def upload_knowledge(files, current_theme):
464
- if not files: return "No files were selected to upload."
465
- added = 0
466
- for f in files:
467
- try:
468
- copy_into_theme(current_theme, f.name); added += 1
469
- except Exception as e: print(f"Error uploading file {f.name}: {e}")
470
- if added > 0 and current_theme in vectorstores: del vectorstores[current_theme]
471
- return f"Uploaded {added} file(s). Refreshing file list..."
472
- def save_file_selection(current_theme, enabled_files):
473
- man = load_manifest(current_theme)
474
- for fname in man['files']: man['files'][fname] = fname in enabled_files
475
- save_manifest(current_theme, man)
476
- if current_theme in vectorstores: del vectorstores[current_theme]
477
- return f"Settings saved. Index for theme '{current_theme}' will rebuild on the next query."
478
- def refresh_file_list_ui(current_theme):
479
- files = list_theme_files(current_theme)
480
- enabled = [f for f, en in files if en]
481
- msg = f"Found {len(files)} file(s). {len(enabled)} enabled."
482
- return gr.update(choices=[f for f, _ in files], value=enabled), msg
483
- def auto_setup_on_load(current_theme):
484
- theme_dir = theme_upload_dir(current_theme)
485
- if not os.listdir(theme_dir):
486
- print("First-time setup: Auto-seeding sample data...")
487
- seed_files_into_theme(current_theme)
488
- all_settings = collect_settings("patient", "", "", "warm", "English", "English", 0.7, "None", "None", "All", True, False)
489
- files_ui, status_msg = refresh_file_list_ui(current_theme)
490
- return all_settings, files_ui, status_msg
491
-
492
- def setup_example_retriever():
493
- """Loads test fixtures into a vector store for dynamic example retrieval."""
494
- global example_retriever
495
- print("Setting up example retriever...")
496
- fixtures_path = "conversation_test_fixtures.jsonl"
497
- if not os.path.exists(fixtures_path):
498
- print("WARNING: conversation_test_fixtures.jsonl not found. Dynamic examples will be disabled.")
499
- return
500
-
501
- example_docs = []
502
- with open(fixtures_path, "r", encoding="utf-8") as f:
503
- for line in f:
504
- data = json.loads(line)
505
- # Store the full JSON object as metadata for later retrieval
506
- doc = Document(page_content=data["turns"][0]["text"], metadata={"full_fixture": data})
507
- example_docs.append(doc)
508
-
509
- if example_docs:
510
- # Create an in-memory vector store of the test queries
511
- example_vs = FAISS.from_documents(example_docs, _default_embeddings())
512
- example_retriever = example_vs.as_retriever(search_kwargs={"k": 3})
513
- print(f"Example retriever created with {len(example_docs)} examples.")
514
-
515
-
516
- def run_all_startup_tasks(current_theme):
517
- """A single wrapper function to handle all application startup tasks."""
518
- print("--- Running all startup tasks ---")
519
- # Run the original auto_setup to get UI state
520
- settings, files_ui, status_msg = auto_setup_on_load(current_theme)
521
-
522
- # Run the setup for the dynamic example retriever
523
- setup_example_retriever()
524
-
525
- # Run the setup to populate the test case dropdown
526
- test_case_choices = load_test_fixtures()
527
-
528
- # Return all the necessary UI updates
529
- return settings, files_ui, status_msg, gr.update(choices=test_case_choices)
530
-
531
-
532
- ######### Below all test cases
533
- # In app.py, add the test cases inside the Gradio Callbacks section,
534
- def load_test_fixtures():
535
- """Loads the test cases and returns a Gradio update object to populate the dropdown."""
536
- global test_fixtures
537
- test_fixtures = [] # Reset fixtures on each load attempt
538
-
539
- try:
540
- script_dir = os.path.dirname(os.path.abspath(__file__))
541
- fixtures_path = os.path.join(script_dir, "conversation_test_fixtures.jsonl")
542
-
543
- if not os.path.exists(fixtures_path):
544
- print("WARNING: Test fixtures file not found.")
545
- # Return an update with an empty list of choices
546
- return gr.update(choices=[])
547
-
548
- with open(fixtures_path, "r", encoding="utf-8") as f:
549
- for line in f:
550
- test_fixtures.append(json.loads(line))
551
-
552
- # --- THIS IS THE KEY CHANGE ---
553
- # Create a list of the test titles
554
- test_titles = [fixture["title"] for fixture in test_fixtures]
555
-
556
- # Return a Gradio update object that specifically targets the 'choices' property
557
- return gr.update(choices=test_titles)
558
- # --- END OF CHANGE ---
559
-
560
- except Exception as e:
561
- print(f"UNEXPECTED ERROR during file loading: {e}")
562
- return gr.update(choices=[])
563
-
564
-
565
- # In app.py, fixed run_nlu_test function to handle the new data structure from the detection logic
566
-
567
- def run_nlu_test(test_title: str):
568
- """Runs a selected NLU test case with correct pass/fail logic and detailed debugging."""
569
- print("\n--- RUNNING NLU TEST (Definitive Version) ---")
570
- if not test_title or not test_fixtures:
571
- return "Please select a test case.", None
572
-
573
- selected_fixture = next((f for f in test_fixtures if f["title"] == test_title), None)
574
- if not selected_fixture:
575
- return f"Error: Could not find test case titled '{test_title}'.", None
576
-
577
- user_query = selected_fixture["turns"][0]["text"]
578
- expected_results = selected_fixture["expected"]
579
- print(f"Test Case: '{test_title}'")
580
- print(f"User Query: '{user_query}'")
581
-
582
- behavior_options = CONFIG.get("behavior_tags", [])
583
- emotion_options = CONFIG.get("emotion_tags", [])
584
- topic_options = CONFIG.get("topic_tags", [])
585
- context_options = CONFIG.get("context_tags", [])
586
-
587
- actual_results_raw = detect_tags_from_query(
588
- user_query,
589
- behavior_options=behavior_options,
590
- emotion_options=emotion_options,
591
- topic_options=topic_options,
592
- context_options=context_options,
593
- example_retriever=example_retriever # <-- Pass the retriever
594
- )
595
-
596
- print(f"\nRAW NLU RESULTS from detect_tags_from_query:\n{actual_results_raw}\n")
597
-
598
- actual_results = {
599
- "emotion": [actual_results_raw.get("detected_emotion")],
600
- "behaviors": actual_results_raw.get("detected_behaviors", []),
601
- "topic_tags": [actual_results_raw.get("detected_topic")],
602
- "context_tags": actual_results_raw.get("detected_contexts", [])
603
- }
604
-
605
- pass_count = 0
606
- total_count = 0
607
- comparison_data = []
608
-
609
- # Use a comprehensive set of keys from both expected and actual for thoroughness
610
- all_keys = set(expected_results.keys()) | set(actual_results.keys())
611
-
612
- print("--- COMPARING RESULTS ---")
613
- for key in sorted(list(all_keys)):
614
- expected_set = set(expected_results.get(key, []))
615
- actual_set = set(a for a in actual_results.get(key, []) if a and a != "None")
616
-
617
- # We only count categories that have an expectation
618
- if not expected_set: continue
619
-
620
- total_count += 1
621
-
622
- # --- DEFINITIVE PASS/FAIL LOGIC ---
623
- # The test passes ONLY if the set of expected tags is a subset of the actual tags.
624
- # This means all expected tags must be present.
625
- # is_pass = expected_set.issubset(actual_set)
626
- # --- NEW FLEXIBLE PASS/FAIL LOGIC ---
627
- # The test now passes if there is any overlap between the expected and actual tags.
628
- is_pass = len(expected_set.intersection(actual_set)) > 0
629
-
630
-
631
- print(f"Category: '{key}'")
632
- print(f" - Expected Set: {expected_set}")
633
- print(f" - Actual Set : {actual_set}")
634
- print(f" - Logic : expected_set.issubset(actual_set)")
635
- print(f" - Result : {is_pass}")
636
-
637
- if is_pass:
638
- pass_count += 1
639
-
640
- comparison_data.append([
641
- key,
642
- ", ".join(sorted(list(expected_set))),
643
- ", ".join(sorted(list(actual_set))) if actual_set else "None",
644
- "✅ Pass" if is_pass else "❌ Fail"
645
- ])
646
-
647
- status = f"## Test Result: {pass_count} / {total_count} Categories Passed"
648
- print(f"Final Status: {pass_count}/{total_count} passed.")
649
- print("--- TEST COMPLETE ---\n")
650
- return status, comparison_data
651
-
652
-
653
-
654
- # In app.py, inside the Gradio Callbacks section for debugging
655
- def test_save_file():
656
- """A simple function to test if we can write a file to the persistent storage."""
657
- try:
658
- # Get the directory where the personal index is supposed to be stored
659
- storage_dir = os.path.dirname(PERSONAL_INDEX_PATH)
660
- test_file_path = os.path.join(storage_dir, "persistence_test.txt")
661
-
662
- # Write the current time to the file
663
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
664
- content = f"File saved successfully at: {current_time}"
665
-
666
- with open(test_file_path, "w", encoding="utf-8") as f:
667
- f.write(content)
668
-
669
- return f"✅ Success! Wrote test file to: {test_file_path}"
670
- except Exception as e:
671
- return f"❌ Error! Failed to write file. Reason: {e}"
672
-
673
- def check_test_file():
674
- """A simple function to check if the test file from a previous session exists."""
675
- try:
676
- storage_dir = os.path.dirname(PERSONAL_INDEX_PATH)
677
- test_file_path = os.path.join(storage_dir, "persistence_test.txt")
678
-
679
- if os.path.exists(test_file_path):
680
- with open(test_file_path, "r", encoding="utf-8") as f:
681
- content = f.read()
682
- return f"✅ Success! Found test file. Contents: '{content}'"
683
- else:
684
- return f"❌ Failure. Test file not found at: {test_file_path}"
685
- except Exception as e:
686
- return f"❌ Error! Failed to check for file. Reason: {e}"
687
-
688
-
689
- # --- UI Definition ---
690
- CSS = ".gradio-container { font-size: 14px; } #chatbot { min-height: 250px; } #audio_out audio { max-height: 40px; } #audio_in audio { max-height: 40px; padding: 0; }"
691
-
692
- with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
693
- settings_state = gr.State({})
694
-
695
- with gr.Tab("Chat"):
696
- user_text = gr.Textbox(show_label=False, placeholder="Type your message here...")
697
- audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Voice Input", elem_id="audio_in")
698
- with gr.Row():
699
- submit_btn = gr.Button("Send", variant="primary")
700
- save_btn = gr.Button("Save to Memory")
701
- clear_btn = gr.Button("Clear")
702
- chat_status = gr.Markdown()
703
- audio_out = gr.Audio(label="Response Audio", autoplay=True, visible=True, elem_id="audio_out")
704
- chatbot = gr.Chatbot(elem_id="chatbot", label="Conversation", type="messages")
705
-
706
- with gr.Tab("Personalize"):
707
- with gr.Accordion("Add to Personal Knowledge Base", open=True):
708
- gr.Markdown("Add personal notes, memories, or descriptions. A descriptive title helps the AI find memories more accurately.")
709
- personal_title = gr.Textbox(label="Title / Entry Name", placeholder="e.g., 'Dad's favorite songs'")
710
- personal_text = gr.Textbox(lines=5, label="Text Content (or use file upload)", placeholder="Type or paste text here. Use '—' on a new line to separate multiple entries.")
711
- personal_file = gr.File(label="Upload Audio/Video/Text File")
712
- personal_image = gr.Image(type="filepath", label="Upload Image")
713
- personal_yt_url = gr.Textbox(label="Or, provide a YouTube URL", placeholder="Paste a YouTube link here...")
714
- with gr.Row():
715
- personal_add_btn = gr.Button("Add Knowledge to Memory", variant="primary")
716
- personal_status = gr.Markdown()
717
- with gr.Accordion("Manage Personal Knowledge", open=False):
718
- personal_memory_display = gr.DataFrame(headers=["Title", "Source", "Content"], label="Saved Personal Memories", interactive=False, row_count=(5, "dynamic"))
719
- with gr.Row():
720
- personal_refresh_btn = gr.Button("Refresh Memories")
721
- with gr.Row():
722
- personal_delete_selector = gr.Dropdown(label="Select a memory to delete (by its full content)", scale=3, interactive=True)
723
- personal_delete_btn = gr.Button("Delete Selected Memory", variant="stop", scale=1)
724
- personal_delete_status = gr.Markdown()
725
-
726
- with gr.Tab("Testing"):
727
- gr.Markdown("## NLU Context Detection Tests")
728
- gr.Markdown("Select a test case from `conversation_test_fixtures.jsonl` to run it through the NLU classifier and see the results.")
729
- with gr.Row():
730
- test_case_dropdown = gr.Dropdown(label="Select Test Case", scale=3)
731
- run_test_btn = gr.Button("Load & Run Test", variant="primary", scale=1)
732
- test_status_md = gr.Markdown("### Please select and run a test case.")
733
- test_results_df = gr.DataFrame(
734
- label="Test Results Comparison",
735
- headers=["Category", "Expected", "Actual", "Result"],
736
- interactive=False
737
- )
738
-
739
- with gr.Tab("Settings"):
740
- with gr.Group():
741
- gr.Markdown("## Conversation & Persona Settings")
742
- with gr.Row():
743
- role = gr.Radio(CONFIG["roles"], value="caregiver", label="Your Role")
744
- temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.1, label="Creativity")
745
- tone = gr.Dropdown(CONFIG["tones"], value="warm", label="Response Tone")
746
- with gr.Row():
747
- patient_name = gr.Textbox(label="Patient's Name", placeholder="e.g., 'Dad' or 'John'")
748
- caregiver_name = gr.Textbox(label="Caregiver's Name", placeholder="e.g., 'me' or 'Jane'")
749
- behaviour_tag = gr.Dropdown(CONFIG["behavior_tags"], value="None", label="Behaviour Filter (Manual Override)")
750
- emotion_tag = gr.Dropdown(CONFIG["emotion_tags"], value="None", label="Emotion Filter (Manual Override)")
751
- topic_tag = gr.Dropdown(CONFIG["topic_tags"], value="None", label="Topic Tag Filter (Manual Override)")
752
- with gr.Accordion("Language, Voice & Debugging", open=False):
753
- language = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Response Language")
754
- tts_lang = gr.Dropdown(list(CONFIG["languages"].keys()), value="English", label="Voice Language")
755
- tts_on = gr.Checkbox(True, label="Enable Voice Response (TTS)")
756
- debug_mode = gr.Checkbox(False, label="Show Debug Info")
757
- gr.Markdown("--- \n ## General Knowledge Base Management")
758
- active_theme = gr.Radio(CONFIG["themes"], value="All", label="Active Knowledge Theme")
759
- with gr.Row():
760
- with gr.Column(scale=1):
761
- files_in = gr.File(file_count="multiple", file_types=[".jsonl", ".txt"], label="Upload Knowledge Files")
762
- upload_btn = gr.Button("Upload to Theme", variant="secondary")
763
- seed_btn = gr.Button("Import Sample Data", variant="secondary")
764
- with gr.Column(scale=2):
765
- mgmt_status = gr.Markdown()
766
- files_box = gr.CheckboxGroup(choices=[], label="Enable Files for the Selected Theme")
767
- with gr.Row():
768
- save_files_btn = gr.Button("Save Selection", variant="primary")
769
- refresh_btn = gr.Button("Refresh List")
770
- with gr.Accordion("Persistence Test", open=False):
771
- gr.Markdown("Use this tool to verify that the Hugging Face persistent storage is working correctly. \n1. Click 'Run Test'. \n2. Manually restart the Space. \n3. Click 'Check for File'.")
772
- with gr.Row():
773
- test_save_btn = gr.Button("1. Run Persistence Test (Save File)")
774
- check_save_btn = gr.Button("3. Check for Test File")
775
- test_status = gr.Markdown()
776
-
777
-
778
- # --- Event Wiring ---
779
-
780
- all_settings_components = [
781
- role, patient_name, caregiver_name, tone, language, tts_lang,
782
- temperature, behaviour_tag, emotion_tag, topic_tag, active_theme, tts_on, debug_mode,
783
- # --- ADD THE MISSING TEXTBOX COMPONENTS ---
784
- custom_system_template, custom_factual_template, custom_gen_knowledge_template, custom_gen_conversation_template
785
- ]
786
-
787
- for component in all_settings_components:
788
- component.change(fn=collect_settings, inputs=all_settings_components, outputs=settings_state)
789
-
790
- submit_btn.click(fn=chat_fn, inputs=[user_text, audio_in, settings_state, chatbot], outputs=[user_text, audio_out, chatbot])
791
- save_btn.click(fn=save_chat_to_memory, inputs=[chatbot], outputs=[chat_status])
792
- clear_btn.click(lambda: (None, None, [], None, "", ""), outputs=[user_text, audio_out, chatbot, audio_in, user_text, chat_status])
793
-
794
- personal_add_btn.click(
795
- fn=handle_add_knowledge,
796
- inputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url],
797
- outputs=[personal_status]
798
- ).then(
799
- lambda: (None, None, None, None, None),
800
- outputs=[personal_title, personal_text, personal_file, personal_image, personal_yt_url]
801
- )
802
-
803
- personal_refresh_btn.click(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
804
- personal_delete_btn.click(fn=delete_personal_memory, inputs=[personal_delete_selector], outputs=[personal_delete_status]).then(fn=list_personal_memories, inputs=None, outputs=[personal_memory_display, personal_delete_selector])
805
-
806
- upload_btn.click(upload_knowledge, inputs=[files_in, active_theme], outputs=[mgmt_status]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
807
- save_files_btn.click(save_file_selection, inputs=[active_theme, files_box], outputs=[mgmt_status])
808
- seed_btn.click(seed_files_into_theme, inputs=[active_theme]).then(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
809
- refresh_btn.click(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
810
- active_theme.change(refresh_file_list_ui, inputs=[active_theme], outputs=[files_box, mgmt_status])
811
- demo.load(auto_setup_on_load, inputs=[active_theme], outputs=[settings_state, files_box, mgmt_status])
812
- test_save_btn.click(fn=test_save_file, inputs=None, outputs=[test_status])
813
- check_save_btn.click(fn=check_test_file, inputs=None, outputs=[test_status])
814
-
815
-
816
- # In app.py, in the Event Wiring section, replace BOTH demo.load calls with this:
817
- demo.load(
818
- fn=run_all_startup_tasks,
819
- inputs=[active_theme],
820
- outputs=[settings_state, files_box, mgmt_status, test_case_dropdown]
821
- )
822
-
823
-
824
- # --- Startup Logic ---
825
- def pre_load_indexes():
826
- global personal_vectorstore
827
- print("Pre-loading all knowledge base indexes at startup...")
828
- for theme in CONFIG["themes"]:
829
- print(f" - Loading general index for theme: '{theme}'")
830
- try:
831
- ensure_index(theme)
832
- print(f" ...'{theme}' theme loaded successfully.")
833
- except Exception as e:
834
- print(f" ...Error loading theme '{theme}': {e}")
835
- print(" - Loading personal knowledge index...")
836
- try:
837
- personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
838
- print(" ...Personal knowledge loaded successfully.")
839
- except Exception as e:
840
- print(f" ...Error loading personal knowledge: {e}")
841
- print("All indexes loaded. Application is ready.")
842
-
843
- if __name__ == "__main__":
844
- pre_load_indexes()
845
- demo.queue().launch(debug=True)