File size: 2,481 Bytes
92bfe31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Upload vectorstore directory to Firebase Storage.
Run: python -m backend.scripts.upload_vectorstore_to_firebase
"""

from __future__ import annotations

import logging
import os
import sys
from pathlib import Path

logger = logging.getLogger("mathpulse.upload_vectorstore")

sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

from backend.rag.firebase_storage_loader import _init_firebase_storage

VECTORSTORE_SOURCE_DIR = Path(__file__).resolve().parents[3] / "datasets" / "vectorstore"
REMOTE_PREFIX = "vectorstore/"


def upload_directory(local_dir: Path, bucket, prefix: str):
    """Recursively upload a local directory to Firebase Storage prefix."""
    uploaded = 0
    skipped = 0

    for root, dirs, files in os.walk(local_dir):
        for filename in files:
            local_path = Path(root) / filename
            relative_path = local_path.relative_to(local_dir)
            remote_path = f"{prefix}{relative_path.as_posix()}"

            try:
                blob = bucket.blob(remote_path)
                blob.upload_from_filename(str(local_path))
                logger.info("Uploaded: %s (%d bytes)", remote_path, local_path.stat().st_size)
                uploaded += 1
            except Exception as e:
                logger.error("Failed to upload %s: %s", remote_path, e)
                skipped += 1

    return uploaded, skipped


if __name__ == "__main__":
    import argparse

    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

    parser = argparse.ArgumentParser(description="Upload vectorstore to Firebase Storage")
    parser.add_argument("--source", type=str, default=str(VECTORSTORE_SOURCE_DIR),
                        help="Local vectorstore directory")
    parser.add_argument("--prefix", type=str, default=REMOTE_PREFIX,
                        help="Remote path prefix in Firebase Storage")
    args = parser.parse_args()

    source_dir = Path(args.source)
    if not source_dir.exists():
        logger.error("Source directory does not exist: %s", source_dir)
        sys.exit(1)

    _, bucket = _init_firebase_storage()
    if bucket is None:
        logger.error("Firebase Storage not available")
        sys.exit(1)

    logger.info("Uploading vectorstore from %s to gs://%s/%s",
                source_dir, bucket.name, args.prefix)
    uploaded, skipped = upload_directory(source_dir, bucket, args.prefix)
    logger.info("Upload complete: %d uploaded, %d skipped", uploaded, skipped)