Spaces:
Build error
Build error
| import re | |
| from typing import List, Tuple | |
| from config import * | |
| class YearParser: | |
| VALID_YEARS = [2022, 2023, 2024] | |
| def extract_years(query: str) -> Tuple[List[int], str, bool, bool]: | |
| years = [] | |
| cleaned_query = query | |
| user_mentioned_year = False | |
| user_mentioned_invalid_year = False | |
| single_year_pattern = r'\b(20\d{2})\b' | |
| single_years = re.findall(single_year_pattern, query) | |
| range_patterns = [ | |
| r'\b(20\d{2})\s*-\s*(20\d{2})\b', # 2022-2024 | |
| r'\b(20\d{2})\s+sampai\s+(20\d{2})\b', # 2022 sampai 2024 | |
| r'\b(20\d{2})\s+hingga\s+(20\d{2})\b', # 2022 hingga 2024 | |
| r'\b(20\d{2})\s+s\.?d\.?\s+(20\d{2})\b', # 2022 s.d 2024 | |
| ] | |
| range_found = False | |
| for pattern in range_patterns: | |
| matches = re.findall(pattern, query, re.IGNORECASE) | |
| if matches: | |
| user_mentioned_year = True | |
| for start_year, end_year in matches: | |
| start = int(start_year) | |
| end = int(end_year) | |
| for year in range(start, end + 1): | |
| if year in YearParser.VALID_YEARS: | |
| years.append(year) | |
| else: | |
| user_mentioned_invalid_year = True | |
| range_found = True | |
| cleaned_query = re.sub(pattern, '', cleaned_query, flags=re.IGNORECASE) | |
| if not range_found and single_years: | |
| user_mentioned_year = True | |
| for year in single_years: | |
| y = int(year) | |
| if y in YearParser.VALID_YEARS: | |
| years.append(y) | |
| else: | |
| user_mentioned_invalid_year = True | |
| cleaned_query = re.sub(single_year_pattern, '', cleaned_query) | |
| # Tidak fallback ke semua tahun valid kalau user_mentioned_year True tapi semua tahun tidak valid | |
| if not years and not user_mentioned_year: | |
| years = YearParser.VALID_YEARS.copy() | |
| cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip() | |
| cleaned_query = re.sub(r'^[,\-\s]+|[,\-\s]+$', '', cleaned_query) | |
| return list(sorted(set(years))), cleaned_query, user_mentioned_year, user_mentioned_invalid_year | |