import re def normalize_text(text): """Normalize text for comparison: lowercase, strip punctuation.""" return re.sub(r'[^\w\s]', '', text.lower().strip()) def simple_sent_split(text): """Simple sentence splitter using regex""" sentences = re.split(r'[.!?]+\s+|[.!?]+$', text) return [s.strip() for s in sentences if s.strip()] def extract_hard_commitments(text, nlp=None): """Extract commitments using expanded modal keyword detection.""" commitments = set() hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'} sentences = simple_sent_split(text) for sent in sentences: sent_lower = sent.lower() if any(modal in sent_lower for modal in hard_modals): commitments.add(sent.strip()) return commitments