Spaces:

DreamStream-1
/

HR-Resume-Analyzer

Runtime error

HR-Resume-Analyzer / text_processing.py

Create text_processing.py

d052038 verified about 1 year ago

1.23 kB

	import re
	from datetime import datetime

	def preprocess_text(text):
	"""Enhanced text preprocessing."""
	text = text.lower()
	text = re.sub(r'[^\w\s-]', ' ', text) # Keep hyphens for date ranges
	text = re.sub(r'\s+', ' ', text)
	return text.strip()

	def extract_dates(text):
	"""Improved date extraction with various formats."""
	date_patterns = [
	r'(\d{4}/\d{2})\s-\s(\d{4}/\d{2}\|present\|current)',
	r'(\w+\s+\d{4})\s-\s(\w+\s+\d{4}\|present\|current)',
	r'(\d{4})\s-\s(\d{4}\|present\|current)',
	]

	dates = []
	for pattern in date_patterns:
	matches = re.finditer(pattern, text, re.IGNORECASE)
	dates.extend((m.group(1), m.group(2)) for m in matches)
	return dates

	def parse_date(date_str):
	"""Parse various date formats."""
	if not date_str or date_str.lower() in ['present', 'current']:
	return datetime.now()

	try:
	# Try different date formats
	formats = ['%Y/%m', '%B %Y', '%b %Y', '%Y']
	for fmt in formats:
	try:
	return datetime.strptime(date_str, fmt)
	except ValueError:
	continue
	return None
	except Exception:
	return None