import stanza import re import dateparser import datetime from date_parser.parser import DateParser from dateutil.relativedelta import relativedelta dp = DateParser() def f0(query): date_pattern = (r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b|' r'\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b') dates = re.findall(date_pattern, query) if len(dates) == 2: min_date = datetime.datetime.min.date() max_date = datetime.datetime.max.date() start_date_str = str(min_date) if dates[0] == '9999-12-31' else dates[0] end_date_str = str(max_date) if dates[1] == '9999-12-31' else dates[1] start_date = dateparser.parse(start_date_str).date() end_date = dateparser.parse(end_date_str).date() formatted_start_date = start_date.strftime('%Y-%m-%d') formatted_end_date = end_date.strftime('%Y-%m-%d') return formatted_start_date, formatted_end_date return None, None def f1(query): # stanza.download('en') # Download the English model nlp = stanza.Pipeline('en', processors='tokenize,ner', model_dir=r"C:\Users\Ankit Sharma\stanza_resources", download_method=stanza.DownloadMethod.REUSE_RESOURCES) doc = nlp(query) date_ranges = [] for sent in doc.sentences: for ent in sent.ents: if ent.type == 'DATE': date_text = ent.text # Split date_text based on keywords like "to" or "and" if ' to ' in date_text.lower(): parts = re.split(r'\bto\b', date_text, flags=re.IGNORECASE) if len(parts) == 2: parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts] date_ranges.append(parsed_dates) if ' and ' in date_text.lower(): parts = re.split(r'\band\b', date_text, flags=re.IGNORECASE) if len(parts) == 2: date_ranges.append(parts) parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts] date_ranges.append(parsed_dates) # Extract smallest and largest dates start_date = min(min(date_ranges, key=lambda x: x[0])) if date_ranges else None end_date = max(max(date_ranges, key=lambda x: x[1])) if date_ranges else None return start_date, end_date def f2(query, date_format="%Y-%m-%d"): # Use regular expression to find years in the query year_matches = re.findall(r'\b\d{4}\b', query) if len(year_matches) == 1: year = int(year_matches[0]) start_date = datetime.datetime(year, 1, 1) end_date = datetime.datetime(year, 12, 31) start_date_str = start_date.strftime(date_format) end_date_str = end_date.strftime(date_format) return start_date_str, end_date_str if len(year_matches) == 2: year1, year2 = map(int, year_matches) start_date = datetime.datetime(year1, 1, 1) end_date = datetime.datetime(year2, 12, 31) start_date_str = start_date.strftime(date_format) end_date_str = end_date.strftime(date_format) return start_date_str, end_date_str return None, None def f3(query, date_format="%Y-%m-%d"): # Use regular expression to find relative date expressions in the query date_format = '%Y-%m-%d' relative_matches = re.findall(r'\b(last|previous)\s*(\d*)\s*(year|month|week)s?\b', query, flags=re.IGNORECASE) if relative_matches: relative_type, numeric_value, time_unit = relative_matches[0] numeric_value = int(numeric_value) if numeric_value else 1 # Set default value to 1 if numeric value is not provided current_date = datetime.datetime.now() if time_unit.lower() in ['year', 'years']: start_date = current_date - relativedelta(years=numeric_value) end_date = current_date elif time_unit.lower() in ['month', 'months']: start_date = current_date - relativedelta(months=numeric_value) end_date = current_date elif time_unit.lower() in ['week', 'weeks']: start_date = current_date - relativedelta(weeks=numeric_value) end_date = current_date else: return None, None start_date_str = start_date.strftime(date_format) end_date_str = end_date.strftime(date_format) return start_date_str, end_date_str else: return None, None def f4(query): # Check if the input string contains the "till" keyword if "till" in query.lower(): # Find the index of "till" in the input string till_index = query.lower().find("till") # Extract the substring after "till" end_date_str = query[till_index + len("till"):].strip() # Use a fixed start date start_date_str = "2014-01-02" # Parse the start date start_date = dateparser.parse(start_date_str) # Parse the end date end_date = dateparser.parse(end_date_str) # Format dates as strings in %Y-%m-%d format formatted_start_date = start_date.strftime("%Y-%m-%d") formatted_end_date = end_date.strftime("%Y-%m-%d") return formatted_start_date, formatted_end_date else: # If "till" keyword is not present, return None return None, None def extract_date(text): start_date, end_date = None, None start_date, end_date = f0(text) if start_date is not None and end_date is not None: print(start_date, end_date, "f0") return start_date, end_date start_date, end_date = f1(text) if start_date is not None and end_date is not None: print(start_date, end_date, "f1") return start_date, end_date start_date, end_date = f2(text) if start_date is not None and end_date is not None: print(start_date, end_date, "f2") return start_date, end_date start_date, end_date = f3(text) if start_date is not None and end_date is not None: print(start_date, end_date, "f3") return start_date, end_date start_date, end_date = f4(text) if start_date is not None and end_date is not None: print(start_date, end_date, "f4") return start_date, end_date # Default case: 14 days back to current date start_date = datetime.datetime.now() - relativedelta(days=30) end_date = datetime.datetime.now() return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')