KeenWoo commited on
Commit
11de8fc
·
verified ·
1 Parent(s): 8fa6c27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -9
app.py CHANGED
@@ -10,6 +10,7 @@ from pathlib import Path # <-- Add this import at the top of your file with the
10
  import re
11
 
12
 
 
13
  # --- Agent Imports & Safe Fallbacks ---
14
  try:
15
  from alz_companion.agent import (
@@ -83,26 +84,67 @@ CONFIG = {
83
 
84
  # --- File Management & Vector Store Logic ---
85
 
86
- # --- FIX: Anchor all paths to the script's location for reliability on HF Spaces ---
87
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
88
- INDEX_BASE = os.path.join(SCRIPT_DIR, 'data')
89
- PERSONAL_DATA_BASE = os.path.join(SCRIPT_DIR, 'personal_data')
90
- # --- END FIX ---
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
93
  PERSONAL_INDEX_PATH = os.path.join(PERSONAL_DATA_BASE, "personal_faiss_index")
94
- THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
 
 
 
95
 
96
- # Ensure all necessary directories are created on startup
97
  os.makedirs(UPLOADS_BASE, exist_ok=True)
98
  os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
 
 
99
 
100
-
101
- THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
102
  vectorstores = {}
103
  personal_vectorstore = None
104
  test_fixtures = [] # <-- ADD THIS LINE
105
 
 
 
 
 
 
 
 
 
 
106
  def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
107
  def theme_upload_dir(theme: str) -> str:
108
  p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
@@ -265,6 +307,19 @@ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, set
265
  yt = YouTube(yt_url)
266
  video_title = yt.title
267
  final_title = title.strip() if title and title.strip() else video_title
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  audio_stream = yt.streams.get_audio_only()
269
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
270
  audio_stream.download(filename=temp_audio_file.name)
@@ -272,6 +327,7 @@ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, set
272
  content_text = transcribe_audio(temp_audio_path)
273
  content_source = f"YouTube: {video_title}"
274
  os.remove(temp_audio_path)
 
275
  full_content = f"Title: {final_title}\n\nContent: {content_text}"
276
  docs_to_add = parse_and_tag_entries(full_content, content_source, settings=settings)
277
  except Exception as e:
 
10
  import re
11
 
12
 
13
+
14
  # --- Agent Imports & Safe Fallbacks ---
15
  try:
16
  from alz_companion.agent import (
 
84
 
85
  # --- File Management & Vector Store Logic ---
86
 
 
 
 
 
 
87
 
88
+ # --- Persistent storage root --- CG5
89
+ def _storage_root() -> Path:
90
+ """
91
+ Choose a durable home for runtime artefacts:
92
+ 1) $SPACE_STORAGE -> custom mount if you set it
93
+ 2) /data -> Hugging Face Spaces persistent volume
94
+ 3) ~/.cache/alz_companion -> portable fallback
95
+ """
96
+ candidates = [
97
+ Path(os.getenv("SPACE_STORAGE", "")),
98
+ Path("/data"),
99
+ Path.home() / ".cache" / "alz_companion",
100
+ ]
101
+ for p in candidates:
102
+ if not p:
103
+ continue
104
+ try:
105
+ p.mkdir(parents=True, exist_ok=True)
106
+ probe = p / ".write_test"
107
+ with open(probe, "w") as f:
108
+ f.write("ok")
109
+ probe.unlink(missing_ok=True)
110
+ return p
111
+ except Exception:
112
+ continue
113
+ # Last resort: temp (not persistent, but avoids crashing)
114
+ tmp = Path(tempfile.gettempdir()) / "alz_companion"
115
+ tmp.mkdir(parents=True, exist_ok=True)
116
+ return tmp
117
+
118
+ STORAGE_ROOT = _storage_root()
119
+
120
+ # --- File Management & Vector Store Logic (persistent) --- CG5
121
+ INDEX_BASE = str(STORAGE_ROOT / "index")
122
+ PERSONAL_DATA_BASE = str(STORAGE_ROOT / "personal")
123
  UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
124
  PERSONAL_INDEX_PATH = os.path.join(PERSONAL_DATA_BASE, "personal_faiss_index")
125
+ THEME_PATHS = {
126
+ t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}")
127
+ for t in CONFIG["themes"]
128
+ }
129
 
 
130
  os.makedirs(UPLOADS_BASE, exist_ok=True)
131
  os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
132
+ for p in THEME_PATHS.values():
133
+ os.makedirs(p, exist_ok=True)
134
 
 
 
135
  vectorstores = {}
136
  personal_vectorstore = None
137
  test_fixtures = [] # <-- ADD THIS LINE
138
 
139
+ # --- Load existing personal index if present --- CG5
140
+ try:
141
+ personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
142
+ except Exception:
143
+ # stay graceful if the index is missing/corrupt; user can rebuild by adding memories
144
+ personal_vectorstore = None
145
+
146
+
147
+
148
  def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
149
  def theme_upload_dir(theme: str) -> str:
150
  p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
 
307
  yt = YouTube(yt_url)
308
  video_title = yt.title
309
  final_title = title.strip() if title and title.strip() else video_title
310
+
311
+ # --- suggested as optional by CG5
312
+ # media_dir = STORAGE_ROOT / "media"
313
+ # media_dir.mkdir(parents=True, exist_ok=True)
314
+ # temp_audio_path = str(media_dir / f"yt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4")
315
+
316
+ # audio_stream = yt.streams.get_audio_only()
317
+ # audio_stream.download(filename=temp_audio_path)
318
+
319
+ # content_text = transcribe_audio(temp_audio_path)
320
+ # content_source = f"YouTube: {video_title} ({temp_audio_path})"
321
+ # If you truly don't want to keep files, you may still remove it later via a UI control.
322
+
323
  audio_stream = yt.streams.get_audio_only()
324
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
325
  audio_stream.download(filename=temp_audio_file.name)
 
327
  content_text = transcribe_audio(temp_audio_path)
328
  content_source = f"YouTube: {video_title}"
329
  os.remove(temp_audio_path)
330
+
331
  full_content = f"Title: {final_title}\n\nContent: {content_text}"
332
  docs_to_add = parse_and_tag_entries(full_content, content_source, settings=settings)
333
  except Exception as e: