rosemariafontana commited on
Commit
9c9bc57
Β·
verified Β·
1 Parent(s): 568ee12

redo loop for date parsing

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -40,30 +40,42 @@ def extract_features(tokens, labels):
40
  for token, label in zip(tokens, labels):
41
  print(f"Debug -- Processing token: {token}")
42
 
43
- # Check complete date pattern
44
- if re.match(date_pattern, current_date):
 
 
 
 
 
45
  merged_entities.append((current_date, 'date'))
46
- print(f"Debug -- Completed date added: {current_date}")
47
- current_date = ""
48
- # Check if the token matches a partial date pattern
49
  elif re.match(partial_date_pattern, token):
50
- print(f"Debug -- Potentially building date: Token Start {token} After Token")
51
- current_date += token
52
  else:
 
53
  if current_date:
54
- merged_entities.append((current_date, 'date'))
55
  print(f"Debug -- Date finalized: {current_date}")
56
- current_date = ""
 
57
 
 
58
  print(f"Debug -- Appending non-date Token: {token}")
59
- merged_entities.append((token, 'other'))
60
 
61
- if current_date:
 
62
  print(f"Debug -- Dangling leftover date added: {current_date}")
63
  merged_entities.append((current_date, 'date'))
64
 
65
  return merged_entities
66
 
 
 
 
67
  # NOTE: labels aren't being applied properly ... This is the LABEL approach
68
  #
69
  # Loop through tokens and labels
 
40
  for token, label in zip(tokens, labels):
41
  print(f"Debug -- Processing token: {token}")
42
 
43
+ # If we already have some part of a date and the next token could still be part of it, continue accumulating
44
+ if current_date and re.match(partial_date_pattern, token):
45
+ current_date += token
46
+ print(f"Debug -- Potential partial date: {current_date}")
47
+ # If the accumulated entity matches a complete date after appending this token
48
+ elif re.match(date_pattern, current_date + token):
49
+ current_date += token
50
  merged_entities.append((current_date, 'date'))
51
+ print(f"Debug -- Complete date added: {current_date}")
52
+ current_date = "" # Reset for next entity
53
+ # If the token could start a new date (e.g., '14' could be a day or hour)
54
  elif re.match(partial_date_pattern, token):
55
+ current_date = token
56
+ print(f"Debug -- Potentially starting a new date: {token}")
57
  else:
58
+ # If no patterns are detected and there is any accumulated data
59
  if current_date:
60
+ # Finalize accumulated partial date
61
  print(f"Debug -- Date finalized: {current_date}")
62
+ merged_entities.append((current_date, 'date'))
63
+ current_date = "" # Reset for next entity
64
 
65
+ # Append token as non-date
66
  print(f"Debug -- Appending non-date Token: {token}")
67
+ merged_entities.append((token, 'non-date'))
68
 
69
+ # If there's any leftover accumulated date data, add it to merged_entities
70
+ if current_date:
71
  print(f"Debug -- Dangling leftover date added: {current_date}")
72
  merged_entities.append((current_date, 'date'))
73
 
74
  return merged_entities
75
 
76
+
77
+
78
+
79
  # NOTE: labels aren't being applied properly ... This is the LABEL approach
80
  #
81
  # Loop through tokens and labels