| | import PyPDF2 |
| | from jinja2 import FileSystemLoader, Environment |
| |
|
| |
|
| | def parse_pdf(pdf_file): |
| | if pdf_file is isinstance(pdf_file, str): |
| | with open(pdf_file, "rb") as file: |
| | return _parse(file) |
| | else: |
| | return _parse(pdf_file) |
| |
|
| |
|
| | def _parse(file): |
| | reader = PyPDF2.PdfReader(file) |
| | pdf_text = [] |
| | num_pages = len(reader.pages) |
| | |
| | for page_number in range(num_pages): |
| | |
| | page = reader.pages[page_number] |
| |
|
| | |
| | page_text = page.extract_text() |
| |
|
| | pdf_text.append(page_text) |
| | pdf_text = '\n'.join(pdf_text) |
| | return pdf_text, num_pages |
| |
|
| |
|
| | def build_html_resume(data): |
| | env = Environment(loader=FileSystemLoader('src/templates')) |
| | template = env.get_template('resume.html') |
| | html_resume = template.render(data) |
| | return html_resume |
| |
|
| |
|
| | def export_html(html_resume, output_path): |
| | with open(output_path, 'w', encoding='utf8') as f: |
| | f.write(html_resume) |
| |
|