Spaces:
Runtime error
Runtime error
| import re | |
| from datetime import datetime | |
| def preprocess_text(text): | |
| """Enhanced text preprocessing.""" | |
| text = text.lower() | |
| text = re.sub(r'[^\w\s-]', ' ', text) # Keep hyphens for date ranges | |
| text = re.sub(r'\s+', ' ', text) | |
| return text.strip() | |
| def extract_dates(text): | |
| """Improved date extraction with various formats.""" | |
| date_patterns = [ | |
| r'(\d{4}/\d{2})\s*-\s*(\d{4}/\d{2}|present|current)', | |
| r'(\w+\s+\d{4})\s*-\s*(\w+\s+\d{4}|present|current)', | |
| r'(\d{4})\s*-\s*(\d{4}|present|current)', | |
| ] | |
| dates = [] | |
| for pattern in date_patterns: | |
| matches = re.finditer(pattern, text, re.IGNORECASE) | |
| dates.extend((m.group(1), m.group(2)) for m in matches) | |
| return dates | |
| def parse_date(date_str): | |
| """Parse various date formats.""" | |
| if not date_str or date_str.lower() in ['present', 'current']: | |
| return datetime.now() | |
| try: | |
| # Try different date formats | |
| formats = ['%Y/%m', '%B %Y', '%b %Y', '%Y'] | |
| for fmt in formats: | |
| try: | |
| return datetime.strptime(date_str, fmt) | |
| except ValueError: | |
| continue | |
| return None | |
| except Exception: | |
| return None |