Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
5641301
1
Parent(s): 4a88c1a
🚀 Auto-deploy backend from GitHub (f2f5144)
Browse files- scripts/download_vectorstore_from_firebase.py +3 -0
- startup.sh +12 -0
scripts/download_vectorstore_from_firebase.py
CHANGED
|
@@ -52,6 +52,9 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
|
| 52 |
local_path.parent.mkdir(parents=True, exist_ok=True)
|
| 53 |
|
| 54 |
try:
|
|
|
|
|
|
|
|
|
|
| 55 |
blob.download_to_filename(str(local_path))
|
| 56 |
logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
|
| 57 |
downloaded += 1
|
|
|
|
| 52 |
local_path.parent.mkdir(parents=True, exist_ok=True)
|
| 53 |
|
| 54 |
try:
|
| 55 |
+
if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
|
| 56 |
+
logger.info("Skipped (already up-to-date): %s", blob.name)
|
| 57 |
+
continue
|
| 58 |
blob.download_to_filename(str(local_path))
|
| 59 |
logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
|
| 60 |
downloaded += 1
|
startup.sh
CHANGED
|
@@ -15,6 +15,12 @@ export CURRICULUM_VECTORSTORE_DIR="${VECTORSTORE_DIR}"
|
|
| 15 |
|
| 16 |
mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
_ingest_script="/app/scripts/ingest_curriculum.py"
|
| 19 |
if [ -f "${_ingest_script}" ]; then
|
| 20 |
if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
|
|
@@ -31,6 +37,12 @@ _vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py
|
|
| 31 |
if [ -f "${_vectorstore_download_script}" ]; then
|
| 32 |
echo "INFO: Downloading vectorstore from Firebase Storage..."
|
| 33 |
python "${_vectorstore_download_script}" || echo "WARNING: Vectorstore download failed, continuing anyway"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
else
|
| 35 |
echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
|
| 36 |
fi
|
|
|
|
| 15 |
|
| 16 |
mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
|
| 17 |
|
| 18 |
+
_vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
|
| 19 |
+
if [ ! -d "${_vectorstore_cache_dir}" ]; then
|
| 20 |
+
mkdir -p "${_vectorstore_cache_dir}"
|
| 21 |
+
echo "INFO: Initialized ChromaDB cache dir at ${_vectorstore_cache_dir}"
|
| 22 |
+
fi
|
| 23 |
+
|
| 24 |
_ingest_script="/app/scripts/ingest_curriculum.py"
|
| 25 |
if [ -f "${_ingest_script}" ]; then
|
| 26 |
if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
|
|
|
|
| 37 |
if [ -f "${_vectorstore_download_script}" ]; then
|
| 38 |
echo "INFO: Downloading vectorstore from Firebase Storage..."
|
| 39 |
python "${_vectorstore_download_script}" || echo "WARNING: Vectorstore download failed, continuing anyway"
|
| 40 |
+
_vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
|
| 41 |
+
if [ -f "${_vectorstore_summary_file}" ]; then
|
| 42 |
+
echo "INFO: Vectorstore summary found at ${_vectorstore_summary_file}"
|
| 43 |
+
else
|
| 44 |
+
echo "WARNING: Vectorstore summary not found at ${_vectorstore_summary_file}"
|
| 45 |
+
fi
|
| 46 |
else
|
| 47 |
echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
|
| 48 |
fi
|