hue-portal-backend / backend /scripts /build_faiss_index.py
Davidtran99
Deploy backend to Hugging Face Space
faebf07
raw
history blame
2.66 kB
"""
Script to build FAISS indexes for all models.
"""
import argparse
import os
import sys
from pathlib import Path
ROOT_DIR = Path(__file__).resolve().parents[2]
BACKEND_DIR = ROOT_DIR / "backend"
HUE_PORTAL_DIR = BACKEND_DIR / "hue_portal"
# Add backend directory to sys.path so Django can find hue_portal package
# Django needs to import hue_portal.hue_portal.settings, so backend/ must be in path
# IMPORTANT: Only add BACKEND_DIR, not HUE_PORTAL_DIR, because Django needs to find
# the hue_portal package (which is in backend/hue_portal), not the hue_portal directory itself
if str(BACKEND_DIR) not in sys.path:
sys.path.insert(0, str(BACKEND_DIR))
# Add root for other imports if needed (but not HUE_PORTAL_DIR as it breaks Django imports)
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "hue_portal.hue_portal.settings")
import django
django.setup()
from hue_portal.core.models import (
Procedure,
Fine,
Office,
Advisory,
LegalSection,
)
from hue_portal.core.faiss_index import build_faiss_index_for_model
def main():
parser = argparse.ArgumentParser(description="Build FAISS indexes for models")
parser.add_argument("--model", choices=["procedure", "fine", "office", "advisory", "legal", "all"],
default="all", help="Which model to process")
parser.add_argument("--index-type", choices=["Flat", "IVF", "HNSW"], default="IVF",
help="Type of FAISS index")
args = parser.parse_args()
print("="*60)
print("FAISS Index Builder")
print("="*60)
models_to_process = []
if args.model == "all":
models_to_process = [
(Procedure, "Procedure"),
(Fine, "Fine"),
(Office, "Office"),
(Advisory, "Advisory"),
(LegalSection, "LegalSection"),
]
else:
model_map = {
"procedure": (Procedure, "Procedure"),
"fine": (Fine, "Fine"),
"office": (Office, "Office"),
"advisory": (Advisory, "Advisory"),
"legal": (LegalSection, "LegalSection"),
}
if args.model in model_map:
models_to_process = [model_map[args.model]]
for model_class, model_name in models_to_process:
try:
build_faiss_index_for_model(model_class, model_name, index_type=args.index_type)
except Exception as e:
print(f"❌ Error building index for {model_name}: {e}")
print("\n" + "="*60)
print("Index building complete")
print("="*60)
if __name__ == "__main__":
main()