github-actions[bot] commited on
Commit
9594951
·
1 Parent(s): b5cb5bb

🚀 Auto-deploy backend from GitHub (ce99ac1)

Browse files
scripts/download_vectorstore_from_firebase.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Download vectorstore directory from Firebase Storage at container startup.
3
+ Run: python -m backend.scripts.download_vectorstore_from_firebase
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger("mathpulse.download_vectorstore")
14
+
15
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
16
+
17
+ from backend.rag.firebase_storage_loader import _init_firebase_storage
18
+
19
+ REMOTE_PREFIX = "vectorstore/"
20
+ LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")
21
+
22
+
23
+ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
24
+ """Download all files under a prefix from Firebase Storage, preserving structure."""
25
+ _, bucket = _init_firebase_storage()
26
+ if bucket is None:
27
+ logger.warning("Firebase Storage not available, vectorstore download skipped")
28
+ return False
29
+
30
+ dest_dir.mkdir(parents=True, exist_ok=True)
31
+
32
+ blobs = list(bucket.list_blobs(prefix=prefix))
33
+ if not blobs:
34
+ logger.warning("No blobs found under prefix: %s", prefix)
35
+ return False
36
+
37
+ downloaded = 0
38
+ errors = 0
39
+
40
+ for blob in blobs:
41
+ relative_path = blob.name[len(prefix):].lstrip("/")
42
+ if not relative_path:
43
+ continue
44
+
45
+ local_path = dest_dir / relative_path
46
+ local_path.parent.mkdir(parents=True, exist_ok=True)
47
+
48
+ try:
49
+ blob.download_to_filename(str(local_path))
50
+ logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
51
+ downloaded += 1
52
+ except Exception as e:
53
+ logger.error("Failed to download %s: %s", blob.name, e)
54
+ errors += 1
55
+
56
+ logger.info("Download complete: %d files downloaded, %d errors", downloaded, errors)
57
+ return errors == 0
58
+
59
+
60
+ if __name__ == "__main__":
61
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
62
+ download_vectorstore(LOCAL_DEST_DIR, REMOTE_PREFIX)
scripts/upload_vectorstore_to_firebase.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Upload vectorstore directory to Firebase Storage.
3
+ Run: python -m backend.scripts.upload_vectorstore_to_firebase
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger("mathpulse.upload_vectorstore")
14
+
15
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
16
+
17
+ from backend.rag.firebase_storage_loader import _init_firebase_storage
18
+
19
+ VECTORSTORE_SOURCE_DIR = Path(__file__).resolve().parents[3] / "datasets" / "vectorstore"
20
+ REMOTE_PREFIX = "vectorstore/"
21
+
22
+
23
+ def upload_directory(local_dir: Path, bucket, prefix: str):
24
+ """Recursively upload a local directory to Firebase Storage prefix."""
25
+ uploaded = 0
26
+ skipped = 0
27
+
28
+ for root, dirs, files in os.walk(local_dir):
29
+ for filename in files:
30
+ local_path = Path(root) / filename
31
+ relative_path = local_path.relative_to(local_dir)
32
+ remote_path = f"{prefix}{relative_path.as_posix()}"
33
+
34
+ try:
35
+ blob = bucket.blob(remote_path)
36
+ blob.upload_from_filename(str(local_path))
37
+ logger.info("Uploaded: %s (%d bytes)", remote_path, local_path.stat().st_size)
38
+ uploaded += 1
39
+ except Exception as e:
40
+ logger.error("Failed to upload %s: %s", remote_path, e)
41
+ skipped += 1
42
+
43
+ return uploaded, skipped
44
+
45
+
46
+ if __name__ == "__main__":
47
+ import argparse
48
+
49
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
50
+
51
+ parser = argparse.ArgumentParser(description="Upload vectorstore to Firebase Storage")
52
+ parser.add_argument("--source", type=str, default=str(VECTORSTORE_SOURCE_DIR),
53
+ help="Local vectorstore directory")
54
+ parser.add_argument("--prefix", type=str, default=REMOTE_PREFIX,
55
+ help="Remote path prefix in Firebase Storage")
56
+ args = parser.parse_args()
57
+
58
+ source_dir = Path(args.source)
59
+ if not source_dir.exists():
60
+ logger.error("Source directory does not exist: %s", source_dir)
61
+ sys.exit(1)
62
+
63
+ _, bucket = _init_firebase_storage()
64
+ if bucket is None:
65
+ logger.error("Firebase Storage not available")
66
+ sys.exit(1)
67
+
68
+ logger.info("Uploading vectorstore from %s to gs://%s/%s",
69
+ source_dir, bucket.name, args.prefix)
70
+ uploaded, skipped = upload_directory(source_dir, bucket, args.prefix)
71
+ logger.info("Upload complete: %d uploaded, %d skipped", uploaded, skipped)