File size: 547 Bytes
22c5be7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from pypdf import PdfReader

def extract_text_pypdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""

    for page in reader.pages:
        text += page.extract_text(extraction_mode='plain') + "\n\n"

    return text

if __name__ == "__main__":
    # Example Usage
    pdf_path = "CFR-2019-title21-vol2.pdf"  # Replace with your file path
    pdf_text = extract_text_pypdf(pdf_path)

    # Save to a text file
    with open("output.txt", "w", encoding="utf-8") as f:
        f.write(pdf_text)

    print(pdf_text)  # Print extracted text