Spaces:
Sleeping
Sleeping
File size: 853 Bytes
4e7e4c0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | import json
from pathlib import Path
from typing import List , Dict
from src.ingestion.pdf_to_markdown import MarkdownLoader
from pathlib import Path
def load_documents(path: Path):
documents: List[Dict] = []
loader = MarkdownLoader()
if path.is_file():
if path.suffix.lower() == ".pdf" :
documents.extend(loader.load_pdf(path))
else :
raise ValueError(f"Unsupported file type: {path.suffix}")
elif path.is_dir():
pdf_files = sorted(path.rglob("*.pdf"))
if not pdf_files:
raise ValueError("No PDF files found in the directory")
for pdf_path in pdf_files :
documents.extend(loader.load_pdf(pdf_path))
else:
raise ValueError(f"Invalid path: {path}")
return documents
|