| import PyPDF2 | |
| import sys | |
| def extract_text_from_pdf(pdf_path): | |
| """Extract text from a PDF file.""" | |
| with open(pdf_path, 'rb') as file: | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| text += f"\n--- Page {page_num + 1} ---\n" | |
| text += page.extract_text() | |
| return text | |
| if __name__ == "__main__": | |
| pdf_file = r"c:\Users\Varshith Dharmaj\Downloads\major\MVM2-COMPLETE-MVP-MULTIMODAL.pdf" | |
| extracted_text = extract_text_from_pdf(pdf_file) | |
| # Write to file with UTF-8 encoding | |
| output_file = r"c:\Users\Varshith Dharmaj\Downloads\major\pdf_content.txt" | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| f.write(extracted_text) | |
| print(f"PDF content extracted to: {output_file}") | |