Spaces:

VladB46
/

QuestionAnswering

Running

QuestionAnswering / extract_text.py

Vlad Bastina

default changes

22c5be7 11 months ago

547 Bytes

	from pypdf import PdfReader

	def extract_text_pypdf(pdf_path):
	reader = PdfReader(pdf_path)
	text = ""

	for page in reader.pages:
	text += page.extract_text(extraction_mode='plain') + "\n\n"

	return text

	if __name__ == "__main__":
	# Example Usage
	pdf_path = "CFR-2019-title21-vol2.pdf" # Replace with your file path
	pdf_text = extract_text_pypdf(pdf_path)

	# Save to a text file
	with open("output.txt", "w", encoding="utf-8") as f:
	f.write(pdf_text)

	print(pdf_text) # Print extracted text