File size: 782 Bytes
845d5aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import numpy as np
import pandas as pd
from os.path import join as pjoin
from pdb import set_trace
import requests
import math
from pathlib import Path
from scipy.spatial.distance import cdist

import asyncio

import PyPDF2  # or use pdfplumber, pymupdf
from io import BytesIO


def extract_text_from_pdf(pdf_path):
    """Extract text content from PDF file"""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            # set_trace()
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
        return text
    except Exception as e:
        print(f"Error extracting PDF text: {e}")
        return None

def main():
	pass

if __name__ == '__main__':
	main()