AI4PE-pre-alpha / process_pdf.py
Adr740's picture
Upload 13 files
02d5bc7 verified
raw
history blame contribute delete
910 Bytes
from utils import move_files
from pdfparser_hq import process_pdf_hq
from pdfparser_performance import process_pdf_performance
def process_pdf(input, path_pdf_input, data_dumpster, pdf_processing_mode):
"""_summary_
Args:
input (_type_): Company name
path_pdf_input (_type_): path where uploaded pdfs are located
data_dumpster (_type_): path where company data is stored
pdf_processing_mode (_type_): processing mode, quality or performance
"""
dest = f"./{data_dumpster}/{input.replace(' ','-')}/pdf"
src = path_pdf_input
pdf_parser_mapper = {
"quality" : process_pdf_hq,
"performance" : process_pdf_performance
}
move_files(src, dest, "pdf")
pdf_processor = pdf_parser_mapper[pdf_processing_mode]
pdf_processor(dest)
move_files(f"{dest}/extracted_pdf", f"./{data_dumpster}/{input.replace(' ','-')}", filetype="txt")