Spaces:
Sleeping
Sleeping
File size: 6,681 Bytes
829f2ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | import stanza
import re
import dateparser
import datetime
from date_parser.parser import DateParser
from dateutil.relativedelta import relativedelta
dp = DateParser()
def f0(query):
date_pattern = (r'\b\d{1,2}[-/]\d{1,2}[-/]\d{4}\b|'
r'\b\d{4}[-/]\d{1,2}[-/]\d{1,2}\b')
dates = re.findall(date_pattern, query)
if len(dates) == 2:
min_date = datetime.datetime.min.date()
max_date = datetime.datetime.max.date()
start_date_str = str(min_date) if dates[0] == '9999-12-31' else dates[0]
end_date_str = str(max_date) if dates[1] == '9999-12-31' else dates[1]
start_date = dateparser.parse(start_date_str).date()
end_date = dateparser.parse(end_date_str).date()
formatted_start_date = start_date.strftime('%Y-%m-%d')
formatted_end_date = end_date.strftime('%Y-%m-%d')
return formatted_start_date, formatted_end_date
return None, None
def f1(query):
# stanza.download('en') # Download the English model
nlp = stanza.Pipeline('en', processors='tokenize,ner', model_dir=r"C:\Users\Ankit Sharma\stanza_resources", download_method=stanza.DownloadMethod.REUSE_RESOURCES)
doc = nlp(query)
date_ranges = []
for sent in doc.sentences:
for ent in sent.ents:
if ent.type == 'DATE':
date_text = ent.text
# Split date_text based on keywords like "to" or "and"
if ' to ' in date_text.lower():
parts = re.split(r'\bto\b', date_text, flags=re.IGNORECASE)
if len(parts) == 2:
parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts]
date_ranges.append(parsed_dates)
if ' and ' in date_text.lower():
parts = re.split(r'\band\b', date_text, flags=re.IGNORECASE)
if len(parts) == 2:
date_ranges.append(parts)
parsed_dates = [dp.parse_date(part.strip()).strftime('%Y-%m-%d') for part in parts]
date_ranges.append(parsed_dates)
# Extract smallest and largest dates
start_date = min(min(date_ranges, key=lambda x: x[0])) if date_ranges else None
end_date = max(max(date_ranges, key=lambda x: x[1])) if date_ranges else None
return start_date, end_date
def f2(query, date_format="%Y-%m-%d"):
# Use regular expression to find years in the query
year_matches = re.findall(r'\b\d{4}\b', query)
if len(year_matches) == 1:
year = int(year_matches[0])
start_date = datetime.datetime(year, 1, 1)
end_date = datetime.datetime(year, 12, 31)
start_date_str = start_date.strftime(date_format)
end_date_str = end_date.strftime(date_format)
return start_date_str, end_date_str
if len(year_matches) == 2:
year1, year2 = map(int, year_matches)
start_date = datetime.datetime(year1, 1, 1)
end_date = datetime.datetime(year2, 12, 31)
start_date_str = start_date.strftime(date_format)
end_date_str = end_date.strftime(date_format)
return start_date_str, end_date_str
return None, None
def f3(query, date_format="%Y-%m-%d"):
# Use regular expression to find relative date expressions in the query
date_format = '%Y-%m-%d'
relative_matches = re.findall(r'\b(last|previous)\s*(\d*)\s*(year|month|week)s?\b', query, flags=re.IGNORECASE)
if relative_matches:
relative_type, numeric_value, time_unit = relative_matches[0]
numeric_value = int(numeric_value) if numeric_value else 1 # Set default value to 1 if numeric value is not provided
current_date = datetime.datetime.now()
if time_unit.lower() in ['year', 'years']:
start_date = current_date - relativedelta(years=numeric_value)
end_date = current_date
elif time_unit.lower() in ['month', 'months']:
start_date = current_date - relativedelta(months=numeric_value)
end_date = current_date
elif time_unit.lower() in ['week', 'weeks']:
start_date = current_date - relativedelta(weeks=numeric_value)
end_date = current_date
else:
return None, None
start_date_str = start_date.strftime(date_format)
end_date_str = end_date.strftime(date_format)
return start_date_str, end_date_str
else:
return None, None
def f4(query):
# Check if the input string contains the "till" keyword
if "till" in query.lower():
# Find the index of "till" in the input string
till_index = query.lower().find("till")
# Extract the substring after "till"
end_date_str = query[till_index + len("till"):].strip()
# Use a fixed start date
start_date_str = "2014-01-02"
# Parse the start date
start_date = dateparser.parse(start_date_str)
# Parse the end date
end_date = dateparser.parse(end_date_str)
# Format dates as strings in %Y-%m-%d format
formatted_start_date = start_date.strftime("%Y-%m-%d")
formatted_end_date = end_date.strftime("%Y-%m-%d")
return formatted_start_date, formatted_end_date
else:
# If "till" keyword is not present, return None
return None, None
def extract_date(text):
start_date, end_date = None, None
start_date, end_date = f0(text)
if start_date is not None and end_date is not None:
print(start_date, end_date, "f0")
return start_date, end_date
start_date, end_date = f1(text)
if start_date is not None and end_date is not None:
print(start_date, end_date, "f1")
return start_date, end_date
start_date, end_date = f2(text)
if start_date is not None and end_date is not None:
print(start_date, end_date, "f2")
return start_date, end_date
start_date, end_date = f3(text)
if start_date is not None and end_date is not None:
print(start_date, end_date, "f3")
return start_date, end_date
start_date, end_date = f4(text)
if start_date is not None and end_date is not None:
print(start_date, end_date, "f4")
return start_date, end_date
# Default case: 14 days back to current date
start_date = datetime.datetime.now() - relativedelta(days=30)
end_date = datetime.datetime.now()
return start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d') |