from pdfminer.high_level import extract_text import spacy nlp = spacy.load("en_core_web_sm") def parse_cv(file) -> tuple[str, str]: text = extract_text(file) doc = nlp(text) education_keywords = ["Bachelor", "Master", "PhD", "High School", "Diploma"] education_level = "Not Found" for sent in doc.sents: for keyword in education_keywords: if keyword.lower() in sent.text.lower(): education_level = keyword break return text, education_level