Spaces:
Runtime error
Runtime error
updates to date logic
Browse files
app.py
CHANGED
|
@@ -27,13 +27,18 @@ def extract_features(tokens, labels):
|
|
| 27 |
current_date = ""
|
| 28 |
|
| 29 |
print(f"Debug -- Starting entity extraction")
|
|
|
|
|
|
|
|
|
|
| 30 |
date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
|
| 31 |
-
partial_date_pattern = r"\d{1,2}$|[/-]$" # Matches partial date components like "12"
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
# Loop through tokens and labels
|
| 35 |
for token, label in zip(tokens, labels):
|
| 36 |
print(f"Debug -- Potentially creating date,, token: {token} label: {label}")
|
|
|
|
| 37 |
if label == 'LABEL_1':
|
| 38 |
# Check for partial date fragments (like '12' or '/')
|
| 39 |
if re.match(partial_date_pattern, token):
|
|
@@ -41,7 +46,7 @@ def extract_features(tokens, labels):
|
|
| 41 |
current_date += token # Append token to the current entity
|
| 42 |
|
| 43 |
# If the accumulated entity matches a full date
|
| 44 |
-
|
| 45 |
merged_entities.append((current_date, 'date'))
|
| 46 |
print(f"Debug -- Complete date added: {token}")
|
| 47 |
current_date = "" # Reset for next entity
|
|
@@ -62,6 +67,7 @@ def extract_features(tokens, labels):
|
|
| 62 |
print(f"Debug -- Finalizing leftover date added: {current_date}")
|
| 63 |
current_date = "" # Reset
|
| 64 |
|
|
|
|
| 65 |
merged_entities.append((token, label))
|
| 66 |
|
| 67 |
if current_date:
|
|
|
|
| 27 |
current_date = ""
|
| 28 |
|
| 29 |
print(f"Debug -- Starting entity extraction")
|
| 30 |
+
#date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
|
| 31 |
+
#partial_date_pattern = r"\d{1,2}$|[/-]$" # Matches partial date components like "12" or "/" at the end
|
| 32 |
+
|
| 33 |
date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
|
| 34 |
+
partial_date_pattern = r"^\d{1,2}/?$|^[/-]$" # Matches partial date components like "12", "/", "02/", etc.
|
| 35 |
+
|
| 36 |
|
| 37 |
|
| 38 |
# Loop through tokens and labels
|
| 39 |
for token, label in zip(tokens, labels):
|
| 40 |
print(f"Debug -- Potentially creating date,, token: {token} label: {label}")
|
| 41 |
+
|
| 42 |
if label == 'LABEL_1':
|
| 43 |
# Check for partial date fragments (like '12' or '/')
|
| 44 |
if re.match(partial_date_pattern, token):
|
|
|
|
| 46 |
current_date += token # Append token to the current entity
|
| 47 |
|
| 48 |
# If the accumulated entity matches a full date
|
| 49 |
+
elif re.match(date_pattern, current_date):
|
| 50 |
merged_entities.append((current_date, 'date'))
|
| 51 |
print(f"Debug -- Complete date added: {token}")
|
| 52 |
current_date = "" # Reset for next entity
|
|
|
|
| 67 |
print(f"Debug -- Finalizing leftover date added: {current_date}")
|
| 68 |
current_date = "" # Reset
|
| 69 |
|
| 70 |
+
# Append LABEL_0 token
|
| 71 |
merged_entities.append((token, label))
|
| 72 |
|
| 73 |
if current_date:
|