jw-search / scripts /bootstrap_lower_third_persons.py
jw-tools's picture
deploy: latest main (lazy-ML cold start, durable launcher, web-image search, scene search) + full-app data refresh
7ea1851 verified
#!/usr/bin/env python3
"""Run the conservative lower-third person bootstrap pass."""
from __future__ import annotations
import argparse
import json
import os
import sys
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BACKEND_ROOT = os.path.join(REPO_ROOT, "backend")
sys.path.insert(0, BACKEND_ROOT)
from face_search import get_face_search_instance
from lower_third_person_bootstrap import DEFAULT_MIN_SCORE, bootstrap_lower_third_people
from media_metadata import get_all_media
from runtime_paths import get_image_db_path
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--apply", action="store_true", help="Create/link people instead of dry-running")
parser.add_argument("--language", default="E", help="JW language code")
parser.add_argument("--limit", type=int, default=100, help="Maximum candidates to inspect")
parser.add_argument("--min-score", type=float, default=DEFAULT_MIN_SCORE, help="Lower-third label score floor")
parser.add_argument("--ocr", action="store_true", help="Use RapidOCR when the title does not name a person")
parser.add_argument(
"--resolve-conflicts",
action="store_true",
help="Allow high-confidence lower-third evidence to replace an existing face assignment",
)
return parser.parse_args()
def main() -> int:
args = parse_args()
summary = bootstrap_lower_third_people(
image_db_path=get_image_db_path(),
face_search=get_face_search_instance(),
media_items=get_all_media(args.language),
min_score=args.min_score,
limit=args.limit,
dry_run=not args.apply,
use_ocr=args.ocr,
resolve_conflicts=args.resolve_conflicts,
)
print(json.dumps(summary.to_dict(), indent=2, ensure_ascii=False))
return 0
if __name__ == "__main__":
raise SystemExit(main())