| import fitz | |
| import pymupdf | |
| def extract_text_from_pdf(pdf_path: str, max_pages: int = 150): | |
| doc = fitz.open(pdf_path) | |
| pages_text = [] | |
| for i, page in enumerate(doc): | |
| if i >= max_pages: | |
| break | |
| text = page.get_text("text") | |
| pages_text.append(text) | |
| return pages_text | |