Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| import os | |
| from PyPDF2 import PdfReader | |
| def read_pdf_text(pdf_file): | |
| # Reading the pdf | |
| pdf_reader = PdfReader(pdf_file) | |
| all_text = "" | |
| # make it limited. min(5, len(pages)) | |
| for page in pdf_reader.pages: | |
| all_text += page.extract_text() | |
| return all_text | |
| def parse_linkedin_pdf(pdf_text): | |
| sections = re.split(r'\n(?=\b(?:Experience|Contact|Education|Top Skills|Languages|Honors-Awards)\b)', pdf_text) | |
| parsed_data = {} | |
| for section in sections: | |
| lines = section.split('\n') | |
| section_name = lines[0] | |
| section_text = '\n'.join(lines[1:]) | |
| parsed_data[section_name] = section_text | |
| return parsed_data | |