File size: 952 Bytes
aac542c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3
"""Standalone script to list remote parquet files and verify connectivity.

Usage:
    python scripts/sync_data.py
"""

import sys
from pathlib import Path

# Allow running as a script from project root
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from app.config import settings
from app.core.logging_config import setup_logging
from app.core.database import db_manager


def main() -> None:
    setup_logging()

    print(f"Connecting to DuckDB (remote mode) …")
    db_manager.connect()

    print(f"Listing files from HuggingFace repo: {settings.HF_REPO_ID}")
    db_manager.build_metadata_index()

    print(f"\n=== Remote Metadata ===")
    print(f"  Total files : {len(db_manager.metadata)}")
    print(f"  Symbols     : {len(db_manager.symbols)}")
    if db_manager.symbols:
        print(f"  First 10    : {db_manager.symbols[:10]}")

    db_manager.close()


if __name__ == "__main__":
    main()