File size: 459 Bytes
f95b0cc
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
{
  "generated_at": "2026-01-28T17:19:43.408336Z",
  "out_jsonl": "data/normalized_v1_2/chunks_articles.jsonl",
  "out_manifest": "data/normalized_v1_2/manifest_articles.json",
  "total_chunks_written": 677,
  "sources_total": 29,
  "sources_skipped": [],
  "notes": [
    "Towards Data Science links may return 403 and are skipped to keep the pipeline reproducible.",
    "arXiv PDFs are ingested via pdfminer; title/author/date may be enriched later."
  ]
}