Spaces:
Sleeping
Sleeping
Create utils/parser.py
Browse files- utils/parser.py +16 -0
utils/parser.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pdfminer.high_level import extract_text
|
| 2 |
+
import spacy
|
| 3 |
+
|
| 4 |
+
nlp = spacy.load("en_core_web_sm")
|
| 5 |
+
|
| 6 |
+
def parse_cv(file) -> tuple[str, str]:
|
| 7 |
+
text = extract_text(file)
|
| 8 |
+
doc = nlp(text)
|
| 9 |
+
education_keywords = ["Bachelor", "Master", "PhD", "High School", "Diploma"]
|
| 10 |
+
education_level = "Not Found"
|
| 11 |
+
for sent in doc.sents:
|
| 12 |
+
for keyword in education_keywords:
|
| 13 |
+
if keyword.lower() in sent.text.lower():
|
| 14 |
+
education_level = keyword
|
| 15 |
+
break
|
| 16 |
+
return text, education_level
|