Spaces:

Danial7
/

CV_Analyzer_Final

Sleeping

CV_Analyzer_Final / utils /parser.py

Update utils/parser.py

c48432d verified 9 months ago

1.6 kB

	import fitz # PyMuPDF for PDF text extraction
	import spacy

	nlp = spacy.load("en_core_web_sm")

	EDUCATION_LEVELS = {
	"phd": "PhD",
	"doctorate": "PhD",
	"masters": "Masters",
	"master": "Masters",
	"bachelor": "Bachelors",
	"bsc": "Bachelors",
	"ba": "Bachelors",
	"diploma": "Diploma",
	"high school": "High School",
	"secondary school": "High School"
	}

	def extract_text_from_pdf(file_path):
	text = ""
	doc = fitz.open(file_path)
	for page in doc:
	text += page.get_text()
	return text

	def parse_cv(file_path):
	text = extract_text_from_pdf(file_path)
	doc = nlp(text)
	# You can add more parsing logic here if needed
	return text

	def extract_education_level(text):
	text_lower = text.lower()
	for key, level in EDUCATION_LEVELS.items():
	if key in text_lower:
	return level
	return "Not Found"

	def identify_cv_type(text):
	technical_keywords = ["python", "java", "c++", "sql", "software", "engineering", "developer", "data science", "machine learning", "it", "technology"]
	non_technical_keywords = ["management", "sales", "marketing", "human resources", "hr", "customer service", "finance", "accounting", "education", "teaching"]

	text_lower = text.lower()
	tech_matches = sum(word in text_lower for word in technical_keywords)
	non_tech_matches = sum(word in text_lower for word in non_technical_keywords)

	if tech_matches > non_tech_matches:
	return "Technical"
	elif non_tech_matches > tech_matches:
	return "Non-Technical"
	else:
	return "Unknown"