{ "generated_at": "2026-01-28T17:19:43.408336Z", "out_jsonl": "data/normalized_v1_2/chunks_articles.jsonl", "out_manifest": "data/normalized_v1_2/manifest_articles.json", "total_chunks_written": 677, "sources_total": 29, "sources_skipped": [], "notes": [ "Towards Data Science links may return 403 and are skipped to keep the pipeline reproducible.", "arXiv PDFs are ingested via pdfminer; title/author/date may be enriched later." ] }