File size: 2,316 Bytes
1b060e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

import re
from typing import List, Tuple
from config import *


class YearParser:
    VALID_YEARS = [2022, 2023, 2024]

    @staticmethod
    def extract_years(query: str) -> Tuple[List[int], str, bool, bool]:
        years = []
        cleaned_query = query
        user_mentioned_year = False
        user_mentioned_invalid_year = False

        single_year_pattern = r'\b(20\d{2})\b'
        single_years = re.findall(single_year_pattern, query)

        range_patterns = [
            r'\b(20\d{2})\s*-\s*(20\d{2})\b',  # 2022-2024
            r'\b(20\d{2})\s+sampai\s+(20\d{2})\b',  # 2022 sampai 2024
            r'\b(20\d{2})\s+hingga\s+(20\d{2})\b',  # 2022 hingga 2024
            r'\b(20\d{2})\s+s\.?d\.?\s+(20\d{2})\b',  # 2022 s.d 2024
        ]
        range_found = False

        for pattern in range_patterns:
            matches = re.findall(pattern, query, re.IGNORECASE)
            if matches:
                user_mentioned_year = True
                for start_year, end_year in matches:
                    start = int(start_year)
                    end = int(end_year)
                    for year in range(start, end + 1):
                        if year in YearParser.VALID_YEARS:
                            years.append(year)
                        else:
                            user_mentioned_invalid_year = True
                range_found = True
                cleaned_query = re.sub(pattern, '', cleaned_query, flags=re.IGNORECASE)

        if not range_found and single_years:
            user_mentioned_year = True
            for year in single_years:
                y = int(year)
                if y in YearParser.VALID_YEARS:
                    years.append(y)
                else:
                    user_mentioned_invalid_year = True
            cleaned_query = re.sub(single_year_pattern, '', cleaned_query)

        # Tidak fallback ke semua tahun valid kalau user_mentioned_year True tapi semua tahun tidak valid
        if not years and not user_mentioned_year:
            years = YearParser.VALID_YEARS.copy()

        cleaned_query = re.sub(r'\s+', ' ', cleaned_query).strip()
        cleaned_query = re.sub(r'^[,\-\s]+|[,\-\s]+$', '', cleaned_query)

        return list(sorted(set(years))), cleaned_query, user_mentioned_year, user_mentioned_invalid_year