Spaces:
Runtime error
Runtime error
File size: 1,229 Bytes
d052038 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import re
from datetime import datetime
def preprocess_text(text):
"""Enhanced text preprocessing."""
text = text.lower()
text = re.sub(r'[^\w\s-]', ' ', text) # Keep hyphens for date ranges
text = re.sub(r'\s+', ' ', text)
return text.strip()
def extract_dates(text):
"""Improved date extraction with various formats."""
date_patterns = [
r'(\d{4}/\d{2})\s*-\s*(\d{4}/\d{2}|present|current)',
r'(\w+\s+\d{4})\s*-\s*(\w+\s+\d{4}|present|current)',
r'(\d{4})\s*-\s*(\d{4}|present|current)',
]
dates = []
for pattern in date_patterns:
matches = re.finditer(pattern, text, re.IGNORECASE)
dates.extend((m.group(1), m.group(2)) for m in matches)
return dates
def parse_date(date_str):
"""Parse various date formats."""
if not date_str or date_str.lower() in ['present', 'current']:
return datetime.now()
try:
# Try different date formats
formats = ['%Y/%m', '%B %Y', '%b %Y', '%Y']
for fmt in formats:
try:
return datetime.strptime(date_str, fmt)
except ValueError:
continue
return None
except Exception:
return None |