Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ from pathlib import Path # <-- Add this import at the top of your file with the
|
|
| 10 |
import re
|
| 11 |
|
| 12 |
|
|
|
|
| 13 |
# --- Agent Imports & Safe Fallbacks ---
|
| 14 |
try:
|
| 15 |
from alz_companion.agent import (
|
|
@@ -83,26 +84,67 @@ CONFIG = {
|
|
| 83 |
|
| 84 |
# --- File Management & Vector Store Logic ---
|
| 85 |
|
| 86 |
-
# --- FIX: Anchor all paths to the script's location for reliability on HF Spaces ---
|
| 87 |
-
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 88 |
-
INDEX_BASE = os.path.join(SCRIPT_DIR, 'data')
|
| 89 |
-
PERSONAL_DATA_BASE = os.path.join(SCRIPT_DIR, 'personal_data')
|
| 90 |
-
# --- END FIX ---
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
|
| 93 |
PERSONAL_INDEX_PATH = os.path.join(PERSONAL_DATA_BASE, "personal_faiss_index")
|
| 94 |
-
THEME_PATHS = {
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
# Ensure all necessary directories are created on startup
|
| 97 |
os.makedirs(UPLOADS_BASE, exist_ok=True)
|
| 98 |
os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
|
|
|
|
|
|
|
| 99 |
|
| 100 |
-
|
| 101 |
-
THEME_PATHS = {t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}") for t in CONFIG["themes"]}
|
| 102 |
vectorstores = {}
|
| 103 |
personal_vectorstore = None
|
| 104 |
test_fixtures = [] # <-- ADD THIS LINE
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
|
| 107 |
def theme_upload_dir(theme: str) -> str:
|
| 108 |
p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
|
|
@@ -265,6 +307,19 @@ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, set
|
|
| 265 |
yt = YouTube(yt_url)
|
| 266 |
video_title = yt.title
|
| 267 |
final_title = title.strip() if title and title.strip() else video_title
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
audio_stream = yt.streams.get_audio_only()
|
| 269 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
|
| 270 |
audio_stream.download(filename=temp_audio_file.name)
|
|
@@ -272,6 +327,7 @@ def handle_add_knowledge(title, text_input, file_input, image_input, yt_url, set
|
|
| 272 |
content_text = transcribe_audio(temp_audio_path)
|
| 273 |
content_source = f"YouTube: {video_title}"
|
| 274 |
os.remove(temp_audio_path)
|
|
|
|
| 275 |
full_content = f"Title: {final_title}\n\nContent: {content_text}"
|
| 276 |
docs_to_add = parse_and_tag_entries(full_content, content_source, settings=settings)
|
| 277 |
except Exception as e:
|
|
|
|
| 10 |
import re
|
| 11 |
|
| 12 |
|
| 13 |
+
|
| 14 |
# --- Agent Imports & Safe Fallbacks ---
|
| 15 |
try:
|
| 16 |
from alz_companion.agent import (
|
|
|
|
| 84 |
|
| 85 |
# --- File Management & Vector Store Logic ---
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# --- Persistent storage root --- CG5
|
| 89 |
+
def _storage_root() -> Path:
|
| 90 |
+
"""
|
| 91 |
+
Choose a durable home for runtime artefacts:
|
| 92 |
+
1) $SPACE_STORAGE -> custom mount if you set it
|
| 93 |
+
2) /data -> Hugging Face Spaces persistent volume
|
| 94 |
+
3) ~/.cache/alz_companion -> portable fallback
|
| 95 |
+
"""
|
| 96 |
+
candidates = [
|
| 97 |
+
Path(os.getenv("SPACE_STORAGE", "")),
|
| 98 |
+
Path("/data"),
|
| 99 |
+
Path.home() / ".cache" / "alz_companion",
|
| 100 |
+
]
|
| 101 |
+
for p in candidates:
|
| 102 |
+
if not p:
|
| 103 |
+
continue
|
| 104 |
+
try:
|
| 105 |
+
p.mkdir(parents=True, exist_ok=True)
|
| 106 |
+
probe = p / ".write_test"
|
| 107 |
+
with open(probe, "w") as f:
|
| 108 |
+
f.write("ok")
|
| 109 |
+
probe.unlink(missing_ok=True)
|
| 110 |
+
return p
|
| 111 |
+
except Exception:
|
| 112 |
+
continue
|
| 113 |
+
# Last resort: temp (not persistent, but avoids crashing)
|
| 114 |
+
tmp = Path(tempfile.gettempdir()) / "alz_companion"
|
| 115 |
+
tmp.mkdir(parents=True, exist_ok=True)
|
| 116 |
+
return tmp
|
| 117 |
+
|
| 118 |
+
STORAGE_ROOT = _storage_root()
|
| 119 |
+
|
| 120 |
+
# --- File Management & Vector Store Logic (persistent) --- CG5
|
| 121 |
+
INDEX_BASE = str(STORAGE_ROOT / "index")
|
| 122 |
+
PERSONAL_DATA_BASE = str(STORAGE_ROOT / "personal")
|
| 123 |
UPLOADS_BASE = os.path.join(INDEX_BASE, "uploads")
|
| 124 |
PERSONAL_INDEX_PATH = os.path.join(PERSONAL_DATA_BASE, "personal_faiss_index")
|
| 125 |
+
THEME_PATHS = {
|
| 126 |
+
t: os.path.join(INDEX_BASE, f"faiss_index_{t.replace(' ', '').lower()}")
|
| 127 |
+
for t in CONFIG["themes"]
|
| 128 |
+
}
|
| 129 |
|
|
|
|
| 130 |
os.makedirs(UPLOADS_BASE, exist_ok=True)
|
| 131 |
os.makedirs(os.path.dirname(PERSONAL_INDEX_PATH), exist_ok=True)
|
| 132 |
+
for p in THEME_PATHS.values():
|
| 133 |
+
os.makedirs(p, exist_ok=True)
|
| 134 |
|
|
|
|
|
|
|
| 135 |
vectorstores = {}
|
| 136 |
personal_vectorstore = None
|
| 137 |
test_fixtures = [] # <-- ADD THIS LINE
|
| 138 |
|
| 139 |
+
# --- Load existing personal index if present --- CG5
|
| 140 |
+
try:
|
| 141 |
+
personal_vectorstore = build_or_load_vectorstore([], PERSONAL_INDEX_PATH, is_personal=True)
|
| 142 |
+
except Exception:
|
| 143 |
+
# stay graceful if the index is missing/corrupt; user can rebuild by adding memories
|
| 144 |
+
personal_vectorstore = None
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
|
| 148 |
def canonical_theme(tk: str) -> str: return tk if tk in CONFIG["themes"] else "All"
|
| 149 |
def theme_upload_dir(theme: str) -> str:
|
| 150 |
p = os.path.join(UPLOADS_BASE, f"theme_{canonical_theme(theme).replace(' ', '').lower()}")
|
|
|
|
| 307 |
yt = YouTube(yt_url)
|
| 308 |
video_title = yt.title
|
| 309 |
final_title = title.strip() if title and title.strip() else video_title
|
| 310 |
+
|
| 311 |
+
# --- suggested as optional by CG5
|
| 312 |
+
# media_dir = STORAGE_ROOT / "media"
|
| 313 |
+
# media_dir.mkdir(parents=True, exist_ok=True)
|
| 314 |
+
# temp_audio_path = str(media_dir / f"yt_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4")
|
| 315 |
+
|
| 316 |
+
# audio_stream = yt.streams.get_audio_only()
|
| 317 |
+
# audio_stream.download(filename=temp_audio_path)
|
| 318 |
+
|
| 319 |
+
# content_text = transcribe_audio(temp_audio_path)
|
| 320 |
+
# content_source = f"YouTube: {video_title} ({temp_audio_path})"
|
| 321 |
+
# If you truly don't want to keep files, you may still remove it later via a UI control.
|
| 322 |
+
|
| 323 |
audio_stream = yt.streams.get_audio_only()
|
| 324 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
|
| 325 |
audio_stream.download(filename=temp_audio_file.name)
|
|
|
|
| 327 |
content_text = transcribe_audio(temp_audio_path)
|
| 328 |
content_source = f"YouTube: {video_title}"
|
| 329 |
os.remove(temp_audio_path)
|
| 330 |
+
|
| 331 |
full_content = f"Title: {final_title}\n\nContent: {content_text}"
|
| 332 |
docs_to_add = parse_and_tag_entries(full_content, content_source, settings=settings)
|
| 333 |
except Exception as e:
|