| import re | |
| def normalize_text(text): | |
| """Normalize text for comparison: lowercase, strip punctuation.""" | |
| return re.sub(r'[^\w\s]', '', text.lower().strip()) | |
| def simple_sent_split(text): | |
| """Simple sentence splitter using regex""" | |
| sentences = re.split(r'[.!?]+\s+|[.!?]+$', text) | |
| return [s.strip() for s in sentences if s.strip()] | |
| def extract_hard_commitments(text, nlp=None): | |
| """Extract commitments using expanded modal keyword detection.""" | |
| commitments = set() | |
| hard_modals = {'must', 'shall', 'will', 'have', 'need', 'required', 'ought', 'cannot', 'should'} | |
| sentences = simple_sent_split(text) | |
| for sent in sentences: | |
| sent_lower = sent.lower() | |
| if any(modal in sent_lower for modal in hard_modals): | |
| commitments.add(sent.strip()) | |
| return commitments | |