rosemariafontana commited on
Commit
b6d1b68
Β·
verified Β·
1 Parent(s): e07066e

updates to date logic

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -27,13 +27,18 @@ def extract_features(tokens, labels):
27
  current_date = ""
28
 
29
  print(f"Debug -- Starting entity extraction")
 
 
 
30
  date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
31
- partial_date_pattern = r"\d{1,2}$|[/-]$" # Matches partial date components like "12" or "/" at the end
 
32
 
33
 
34
  # Loop through tokens and labels
35
  for token, label in zip(tokens, labels):
36
  print(f"Debug -- Potentially creating date,, token: {token} label: {label}")
 
37
  if label == 'LABEL_1':
38
  # Check for partial date fragments (like '12' or '/')
39
  if re.match(partial_date_pattern, token):
@@ -41,7 +46,7 @@ def extract_features(tokens, labels):
41
  current_date += token # Append token to the current entity
42
 
43
  # If the accumulated entity matches a full date
44
- if re.match(date_pattern, current_date):
45
  merged_entities.append((current_date, 'date'))
46
  print(f"Debug -- Complete date added: {token}")
47
  current_date = "" # Reset for next entity
@@ -62,6 +67,7 @@ def extract_features(tokens, labels):
62
  print(f"Debug -- Finalizing leftover date added: {current_date}")
63
  current_date = "" # Reset
64
 
 
65
  merged_entities.append((token, label))
66
 
67
  if current_date:
 
27
  current_date = ""
28
 
29
  print(f"Debug -- Starting entity extraction")
30
+ #date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
31
+ #partial_date_pattern = r"\d{1,2}$|[/-]$" # Matches partial date components like "12" or "/" at the end
32
+
33
  date_pattern = r"\d{1,2}/\d{1,2}/\d{2,4}" # Matches full date formats like MM/DD/YYYY or DD/MM/YYYY
34
+ partial_date_pattern = r"^\d{1,2}/?$|^[/-]$" # Matches partial date components like "12", "/", "02/", etc.
35
+
36
 
37
 
38
  # Loop through tokens and labels
39
  for token, label in zip(tokens, labels):
40
  print(f"Debug -- Potentially creating date,, token: {token} label: {label}")
41
+
42
  if label == 'LABEL_1':
43
  # Check for partial date fragments (like '12' or '/')
44
  if re.match(partial_date_pattern, token):
 
46
  current_date += token # Append token to the current entity
47
 
48
  # If the accumulated entity matches a full date
49
+ elif re.match(date_pattern, current_date):
50
  merged_entities.append((current_date, 'date'))
51
  print(f"Debug -- Complete date added: {token}")
52
  current_date = "" # Reset for next entity
 
67
  print(f"Debug -- Finalizing leftover date added: {current_date}")
68
  current_date = "" # Reset
69
 
70
+ # Append LABEL_0 token
71
  merged_entities.append((token, label))
72
 
73
  if current_date: