""" Quick verification for reprocess_tokenized_from_clean.py. """ from __future__ import annotations import sys from pathlib import Path # Ensure imports work from project root. PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from scripts.reprocess_tokenized_from_clean import main as reprocess_main # noqa: E402 if __name__ == "__main__": try: sys.argv = [ "verify_reprocess_tokenized_from_clean.py", "--config", "configs/component3_reprocess_from_clean.yaml", "--max_records", "500", ] reprocess_main() print("") print("Reprocess verification passed.") except Exception as exc: print("Reprocess verification failed.") print(f"What went wrong: {exc}") print("Fix suggestion: verify input clean file and tokenizer path.") raise SystemExit(1)