Spaces:
Sleeping
Sleeping
| import stanza | |
| import re | |
| import dateparser | |
| import datetime | |
| from date_parser.parser import DateParser | |
| from dateutil.relativedelta import relativedelta | |
| dp = DateParser() | |
| def f0(query): | |
| date_pattern = (r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b|' | |
| r'\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b') | |
| dates = re.findall(date_pattern, query) | |
| if len(dates) == 2: | |
| min_date = datetime.datetime.min.date() | |
| max_date = datetime.datetime.max.date() | |
| start_date_str = str(min_date) if dates[0] == '9999-12-31' else dates[0] | |
| end_date_str = str(max_date) if dates[1] == '9999-12-31' else dates[1] | |
| start_date = dateparser.parse(start_date_str).date() | |
| end_date = dateparser.parse(end_date_str).date() | |
| formatted_start_date = start_date.strftime('%Y-%m-%d') | |
| formatted_end_date = end_date.strftime('%Y-%m-%d') | |
| return formatted_start_date, formatted_end_date | |
| return None, None | |
| def f1(query): | |
| # stanza.download('en') # Download the English model | |
| nlp = stanza.Pipeline('en', processors='tokenize,ner', model_dir=r"C:\Users\Ankit Sharma\stanza_resources", download_method=stanza.DownloadMethod.REUSE_RESOURCES) | |
| doc = nlp(query) | |
| date_ranges = [] | |
| for sent in doc.sentences: | |
| for ent in sent.ents: | |
| if ent.type == 'DATE': | |
| date_text = ent.text | |
| # Split date_text based on keywords like "to" or "and" | |
| if ' to ' in date_text.lower(): | |
| parts = re.split(r'\bto\b', date_text, flags=re.IGNORECASE) | |
| if len(parts) == 2: | |
| parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts] | |
| date_ranges.append(parsed_dates) | |
| if ' and ' in date_text.lower(): | |
| parts = re.split(r'\band\b', date_text, flags=re.IGNORECASE) | |
| if len(parts) == 2: | |
| date_ranges.append(parts) | |
| parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts] | |
| date_ranges.append(parsed_dates) | |
| # Extract smallest and largest dates | |
| start_date = min(min(date_ranges, key=lambda x: x[0])) if date_ranges else None | |
| end_date = max(max(date_ranges, key=lambda x: x[1])) if date_ranges else None | |
| return start_date, end_date | |
| def f2(query, date_format="%Y-%m-%d"): | |
| # Use regular expression to find years in the query | |
| year_matches = re.findall(r'\b\d{4}\b', query) | |
| if len(year_matches) == 1: | |
| year = int(year_matches[0]) | |
| start_date = datetime.datetime(year, 1, 1) | |
| end_date = datetime.datetime(year, 12, 31) | |
| start_date_str = start_date.strftime(date_format) | |
| end_date_str = end_date.strftime(date_format) | |
| return start_date_str, end_date_str | |
| if len(year_matches) == 2: | |
| year1, year2 = map(int, year_matches) | |
| start_date = datetime.datetime(year1, 1, 1) | |
| end_date = datetime.datetime(year2, 12, 31) | |
| start_date_str = start_date.strftime(date_format) | |
| end_date_str = end_date.strftime(date_format) | |
| return start_date_str, end_date_str | |
| return None, None | |
| def f3(query, date_format="%Y-%m-%d"): | |
| # Use regular expression to find relative date expressions in the query | |
| date_format = '%Y-%m-%d' | |
| relative_matches = re.findall(r'\b(last|previous)\s*(\d*)\s*(year|month|week)s?\b', query, flags=re.IGNORECASE) | |
| if relative_matches: | |
| relative_type, numeric_value, time_unit = relative_matches[0] | |
| numeric_value = int(numeric_value) if numeric_value else 1 # Set default value to 1 if numeric value is not provided | |
| current_date = datetime.datetime.now() | |
| if time_unit.lower() in ['year', 'years']: | |
| start_date = current_date - relativedelta(years=numeric_value) | |
| end_date = current_date | |
| elif time_unit.lower() in ['month', 'months']: | |
| start_date = current_date - relativedelta(months=numeric_value) | |
| end_date = current_date | |
| elif time_unit.lower() in ['week', 'weeks']: | |
| start_date = current_date - relativedelta(weeks=numeric_value) | |
| end_date = current_date | |
| else: | |
| return None, None | |
| start_date_str = start_date.strftime(date_format) | |
| end_date_str = end_date.strftime(date_format) | |
| return start_date_str, end_date_str | |
| else: | |
| return None, None | |
| def f4(query): | |
| # Check if the input string contains the "till" keyword | |
| if "till" in query.lower(): | |
| # Find the index of "till" in the input string | |
| till_index = query.lower().find("till") | |
| # Extract the substring after "till" | |
| end_date_str = query[till_index + len("till"):].strip() | |
| # Use a fixed start date | |
| start_date_str = "2014-01-02" | |
| # Parse the start date | |
| start_date = dateparser.parse(start_date_str) | |
| # Parse the end date | |
| end_date = dateparser.parse(end_date_str) | |
| # Format dates as strings in %Y-%m-%d format | |
| formatted_start_date = start_date.strftime("%Y-%m-%d") | |
| formatted_end_date = end_date.strftime("%Y-%m-%d") | |
| return formatted_start_date, formatted_end_date | |
| else: | |
| # If "till" keyword is not present, return None | |
| return None, None | |
| def extract_date(text): | |
| start_date, end_date = None, None | |
| start_date, end_date = f0(text) | |
| if start_date is not None and end_date is not None: | |
| print(start_date, end_date, "f0") | |
| return start_date, end_date | |
| start_date, end_date = f1(text) | |
| if start_date is not None and end_date is not None: | |
| print(start_date, end_date, "f1") | |
| return start_date, end_date | |
| start_date, end_date = f2(text) | |
| if start_date is not None and end_date is not None: | |
| print(start_date, end_date, "f2") | |
| return start_date, end_date | |
| start_date, end_date = f3(text) | |
| if start_date is not None and end_date is not None: | |
| print(start_date, end_date, "f3") | |
| return start_date, end_date | |
| start_date, end_date = f4(text) | |
| if start_date is not None and end_date is not None: | |
| print(start_date, end_date, "f4") | |
| return start_date, end_date | |
| # Default case: 14 days back to current date | |
| start_date = datetime.datetime.now() - relativedelta(days=30) | |
| end_date = datetime.datetime.now() | |
| return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') |