File size: 888 Bytes
b25b8f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import PyPDF2
import sys
def extract_text_from_pdf(pdf_path):
"""Extract text from a PDF file."""
with open(pdf_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += f"\n--- Page {page_num + 1} ---\n"
text += page.extract_text()
return text
if __name__ == "__main__":
pdf_file = r"c:\Users\Varshith Dharmaj\Downloads\major\MVM2-COMPLETE-MVP-MULTIMODAL.pdf"
extracted_text = extract_text_from_pdf(pdf_file)
# Write to file with UTF-8 encoding
output_file = r"c:\Users\Varshith Dharmaj\Downloads\major\pdf_content.txt"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(extracted_text)
print(f"PDF content extracted to: {output_file}")
|