File size: 819 Bytes
2a64ad4 ed31594 2a64ad4 ed31594 2a64ad4 ed31594 2a64ad4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | import re
def normalize_text(text):
"""Normalize text for comparison: lowercase, strip punctuation."""
return re.sub(r'[^\w\s]', '', text.lower().strip())
def simple_sent_split(text):
"""Simple sentence splitter using regex"""
sentences = re.split(r'[.!?]+\s+|[.!?]+$', text)
return [s.strip() for s in sentences if s.strip()]
def extract_hard_commitments(text, nlp=None):
"""Extract commitments using expanded modal keyword detection."""
commitments = set()
hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'}
sentences = simple_sent_split(text)
for sent in sentences:
sent_lower = sent.lower()
if any(modal in sent_lower for modal in hard_modals):
commitments.add(sent.strip())
return commitments
|