Danial7 commited on
Commit
8251764
·
verified ·
1 Parent(s): 778549c

Create utils/parser.py

Browse files
Files changed (1) hide show
  1. utils/parser.py +16 -0
utils/parser.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pdfminer.high_level import extract_text
2
+ import spacy
3
+
4
+ nlp = spacy.load("en_core_web_sm")
5
+
6
+ def parse_cv(file) -> tuple[str, str]:
7
+ text = extract_text(file)
8
+ doc = nlp(text)
9
+ education_keywords = ["Bachelor", "Master", "PhD", "High School", "Diploma"]
10
+ education_level = "Not Found"
11
+ for sent in doc.sents:
12
+ for keyword in education_keywords:
13
+ if keyword.lower() in sent.text.lower():
14
+ education_level = keyword
15
+ break
16
+ return text, education_level