adarshninja's picture
Upload 20 files
0fda845 verified
raw
history blame contribute delete
334 Bytes
import fitz
import pymupdf
def extract_text_from_pdf(pdf_path: str, max_pages: int = 150):
doc = fitz.open(pdf_path)
pages_text = []
for i, page in enumerate(doc):
if i >= max_pages:
break
text = page.get_text("text")
pages_text.append(text)
return pages_text