import re from typing import List, Tuple from config import * class YearParser: VALID_YEARS = [2022, 2023, 2024] @staticmethod def extract_years(query: str) -> Tuple[List[int], str, bool, bool]: years = [] cleaned_query = query user_mentioned_year = False user_mentioned_invalid_year = False single_year_pattern = r'\b(20\d{2})\b' single_years = re.findall(single_year_pattern, query) range_patterns = [ r'\b(20\d{2})\s*-\s*(20\d{2})\b', # 2022-2024 r'\b(20\d{2})\s+sampai\s+(20\d{2})\b', # 2022 sampai 2024 r'\b(20\d{2})\s+hingga\s+(20\d{2})\b', # 2022 hingga 2024 r'\b(20\d{2})\s+s\.?d\.?\s+(20\d{2})\b', # 2022 s.d 2024 ] range_found = False for pattern in range_patterns: matches = re.findall(pattern, query, re.IGNORECASE) if matches: user_mentioned_year = True for start_year, end_year in matches: start = int(start_year) end = int(end_year) for year in range(start, end + 1): if year in YearParser.VALID_YEARS: years.append(year) else: user_mentioned_invalid_year = True range_found = True cleaned_query = re.sub(pattern, '', cleaned_query, flags=re.IGNORECASE) if not range_found and single_years: user_mentioned_year = True for year in single_years: y = int(year) if y in YearParser.VALID_YEARS: years.append(y) else: user_mentioned_invalid_year = True cleaned_query = re.sub(single_year_pattern, '', cleaned_query) # Tidak fallback ke semua tahun valid kalau user_mentioned_year True tapi semua tahun tidak valid if not years and not user_mentioned_year: years = YearParser.VALID_YEARS.copy() cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip() cleaned_query = re.sub(r'^[,\-\s]+|[,\-\s]+$', '', cleaned_query) return list(sorted(set(years))), cleaned_query, user_mentioned_year, user_mentioned_invalid_year